mirror of
https://github.com/weyne85/PentestGPT.git
synced 2025-10-29 16:58:59 +00:00
fix: 🐛 minor API bug fix
Delete the chat session from class
This commit is contained in:
@@ -1,13 +0,0 @@
|
||||
from utils.chatgpt import ChatGPT
|
||||
from config.chatgpt_config import ChatGPTConfig
|
||||
|
||||
import loguru
|
||||
|
||||
logger = loguru.logger
|
||||
|
||||
# format: {name: {description: str, sample_curl: str, sample_response: str}}
|
||||
API_description = {}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
chatGPTAgent = ChatGPT()
|
||||
@@ -2,39 +2,62 @@ import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import json
|
||||
|
||||
def parse_markdown_page(url):
|
||||
|
||||
def crawl_dotCMS_description_page(
|
||||
url="https://www.dotcms.com/docs/latest/container-api", output_dir="outputs"
|
||||
):
|
||||
page = requests.get(url)
|
||||
soup = BeautifulSoup(page.content, 'html.parser')
|
||||
soup = BeautifulSoup(page.content, "html.parser")
|
||||
|
||||
# Extract the title of the page
|
||||
title = soup.find('h1').get_text()
|
||||
title = soup.find("h1").get_text()
|
||||
|
||||
# Extract the subtitles and their descriptions and code chunks
|
||||
subtitles = soup.find_all('h2')
|
||||
subtitles = soup.find_all("h2")
|
||||
parsed_subtitles = []
|
||||
for subtitle in subtitles:
|
||||
subtitle_title = subtitle.get_text()
|
||||
subtitle_contents = subtitle.find_next_siblings(['p', 'pre'])
|
||||
subtitle_contents = subtitle.find_next_siblings(["p", "pre"])
|
||||
subtitle_parsed_contents = []
|
||||
for i in range(0, len(subtitle_contents), 2):
|
||||
description = subtitle_contents[i].get_text()
|
||||
if len(subtitle_contents) > i+1:
|
||||
code = subtitle_contents[i+1].get_text()
|
||||
description = ""
|
||||
for content in subtitle_contents:
|
||||
# Check if the content is a code block
|
||||
if content.name == "pre" and content.code:
|
||||
code = content.get_text()
|
||||
# Add the previous description and code chunk to the list
|
||||
if len(description) != 0: # If there is no description, don't add it
|
||||
parsed_description = description.strip().replace("\n", " ")
|
||||
parsed_code = code.strip().replace("\n", " ")
|
||||
subtitle_parsed_contents.append([parsed_description, parsed_code])
|
||||
|
||||
else:
|
||||
code = ''
|
||||
subtitle_parsed_contents.append([description, code])
|
||||
# Concatenate the non-code content into a single description string
|
||||
description += (
|
||||
"\n" + content.get_text() if description else content.get_text()
|
||||
)
|
||||
parsed_subtitles.append([subtitle_title, subtitle_parsed_contents])
|
||||
|
||||
# Save the results as a structured JSON object
|
||||
output = {'title': title}
|
||||
title = title.strip().replace(" ", "_").lower()
|
||||
output = {"title": title}
|
||||
for i in range(len(parsed_subtitles)):
|
||||
output[f'subtitle{i+1}'] = parsed_subtitles[i][1]
|
||||
|
||||
with open('output.json', 'w') as outfile:
|
||||
json.dump(output, outfile, indent=4)
|
||||
output[parsed_subtitles[i][0]] = parsed_subtitles[i][1]
|
||||
|
||||
with open(f"{output_dir}/{title}.json", "w") as f:
|
||||
json.dump(output, f)
|
||||
return output
|
||||
|
||||
url = 'https://www.dotcms.com/docs/latest/container-api'
|
||||
output = parse_markdown_page(url)
|
||||
print(output)
|
||||
|
||||
def crawl_strapi_documentation(url, output_dir="outputs"):
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
output_dir = "outputs"
|
||||
|
||||
# example 1: crawl the description page of dotCMS container API
|
||||
# dotCMS_url = 'https://www.dotcms.com/docs/latest/container-api'
|
||||
# output = crawl_dotCMS_description_page(url=dotCMS_url, output_dir=output_dir)
|
||||
|
||||
# example 2: crawl the documentation page of
|
||||
pass
|
||||
|
||||
Reference in New Issue
Block a user