fix: 🐛 minor API bug fix

Delete the chat session from class
This commit is contained in:
Grey_D
2023-03-21 13:16:14 +08:00
parent de9010d25e
commit 2166113902
6 changed files with 131 additions and 44 deletions

66
NLP_RESTAPI_Generation.py Normal file
View File

@@ -0,0 +1,66 @@
from utils.chatgpt import ChatGPT
from config.chatgpt_config import ChatGPTConfig
import json
import loguru
logger = loguru.logger
# format: {name: {description: str, sample_curl: str, sample_response: str}}
task_prompt_0 = """
I need your help to convert natural language REST API documentation to OpenAPI 3.0 standard.
Here are the detailed requirements:
(1) I hope that the converted openapi documentation is in json format. I will give you the description for one request at a time, and you return me the corresponding json. You should handle the output with proper indent, so that I could paste all your outputs together to form a complete documentation.
(2) For each request, I'll give you a sample curl, and a request description. You should formulate the documentation based on them, especially to fill the "example" field of the request.
"""
task_prompt_1 = """
Now we start with a service called dotCMS. Please generate a header for OpenAPI 3.0 first. Take care of the indentation so that I can directly put it together with later outputs to form one API documentation.
It supports authorization token for each request. A sample curl looks like this:
```
curl --location --request GET 'https://demo.dotcms.com/api/v1/containers/working?containerId=REPLACE_THIS_UUID' \
--header 'Content-Type: application/json' \
--header 'Authorization: Basic YWRtaW5AZG90Y21zLmNvbTphZG1pbg=='
```
"""
task_prompt_2 = """
Let's start now. In the following, I'll give you a sample curl, and a request description.
"""
if __name__ == "__main__":
code_fragments = []
chatGPTAgent = ChatGPT(ChatGPTConfig())
text, conversation_id = chatGPTAgent.send_new_message(task_prompt_0)
text = chatGPTAgent.send_message(task_prompt_1, conversation_id)
text = chatGPTAgent.send_message(task_prompt_2, conversation_id)
# load the documentation
with open("outputs/container_api.json", "r") as f:
container_api = json.load(f)
for key, value in container_api.items():
if key == "title":
# TODO: get title
pass
elif len(value) != 0: # is not an empty list
title_name = key
for item_list in value:
description = item_list[0]
sample_curl = item_list[1]
# concat description and sample_curl
ask_text = (
"The meta function is "
+ title_name
+ "\nThe request description is:"
+ description
+ "\nThe sample curl is below: \n"
+ sample_curl
+ "\n"
)
# send description and curl
response = chatGPTAgent.send_message(ask_text, conversation_id)
# extract code fragments
code_fragments.append(chatGPTAgent.extract_code_fragments(response))
else:
logger.info("No request to process.")

View File

@@ -1,13 +0,0 @@
from utils.chatgpt import ChatGPT
from config.chatgpt_config import ChatGPTConfig
import loguru
logger = loguru.logger
# format: {name: {description: str, sample_curl: str, sample_response: str}}
API_description = {}
if __name__ == "__main__":
chatGPTAgent = ChatGPT()

View File

@@ -2,39 +2,62 @@ import requests
from bs4 import BeautifulSoup
import json
def parse_markdown_page(url):
def crawl_dotCMS_description_page(
url="https://www.dotcms.com/docs/latest/container-api", output_dir="outputs"
):
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')
soup = BeautifulSoup(page.content, "html.parser")
# Extract the title of the page
title = soup.find('h1').get_text()
title = soup.find("h1").get_text()
# Extract the subtitles and their descriptions and code chunks
subtitles = soup.find_all('h2')
subtitles = soup.find_all("h2")
parsed_subtitles = []
for subtitle in subtitles:
subtitle_title = subtitle.get_text()
subtitle_contents = subtitle.find_next_siblings(['p', 'pre'])
subtitle_contents = subtitle.find_next_siblings(["p", "pre"])
subtitle_parsed_contents = []
for i in range(0, len(subtitle_contents), 2):
description = subtitle_contents[i].get_text()
if len(subtitle_contents) > i+1:
code = subtitle_contents[i+1].get_text()
description = ""
for content in subtitle_contents:
# Check if the content is a code block
if content.name == "pre" and content.code:
code = content.get_text()
# Add the previous description and code chunk to the list
if len(description) != 0: # If there is no description, don't add it
parsed_description = description.strip().replace("\n", " ")
parsed_code = code.strip().replace("\n", " ")
subtitle_parsed_contents.append([parsed_description, parsed_code])
else:
code = ''
subtitle_parsed_contents.append([description, code])
# Concatenate the non-code content into a single description string
description += (
"\n" + content.get_text() if description else content.get_text()
)
parsed_subtitles.append([subtitle_title, subtitle_parsed_contents])
# Save the results as a structured JSON object
output = {'title': title}
title = title.strip().replace(" ", "_").lower()
output = {"title": title}
for i in range(len(parsed_subtitles)):
output[f'subtitle{i+1}'] = parsed_subtitles[i][1]
with open('output.json', 'w') as outfile:
json.dump(output, outfile, indent=4)
output[parsed_subtitles[i][0]] = parsed_subtitles[i][1]
with open(f"{output_dir}/{title}.json", "w") as f:
json.dump(output, f)
return output
url = 'https://www.dotcms.com/docs/latest/container-api'
output = parse_markdown_page(url)
print(output)
def crawl_strapi_documentation(url, output_dir="outputs"):
pass
if __name__ == "__main__":
output_dir = "outputs"
# example 1: crawl the description page of dotCMS container API
# dotCMS_url = 'https://www.dotcms.com/docs/latest/container-api'
# output = crawl_dotCMS_description_page(url=dotCMS_url, output_dir=output_dir)
# example 2: crawl the documentation page of
pass

0
utils/__init__.py Normal file
View File

View File

@@ -13,6 +13,18 @@ from config.chatgpt_config import ChatGPTConfig
logger = loguru.logger
# A sample ChatGPTConfig class has the following structure. All fields can be obtained from the browser's cookie.
# In particular, cf_clearance、__Secure-next-auth.session-token、_puid are required.
# @dataclasses.dataclass
# class ChatGPTConfig:
# model: str = "text-davinci-002-render-sha"
# _puid: str = ""
# cf_clearance: str = ""
# session_token: str = ""
# error_wait_time: float = 20
# is_debugging: bool = False
class ChatGPT:
def __init__(self, config: ChatGPTConfig):
@@ -42,13 +54,11 @@ class ChatGPT:
return "Bearer " + authorization
def get_latest_message_id(self, conversation_id):
# 获取会话窗口最新消息id连续对话必须
url = f"https://chat.openai.com/backend-api/conversation/{conversation_id}"
r = requests.get(url, headers=self.headers, proxies=self.proxies)
return r.json()["current_node"]
def send_new_message(self, message):
# 发送新会话窗口消息返回会话id
logger.info(f"send_new_message")
url = "https://chat.openai.com/backend-api/conversation"
message_id = str(uuid1())
@@ -67,7 +77,6 @@ class ChatGPT:
r = requests.post(url, headers=self.headers, json=data, proxies=self.proxies)
if r.status_code != 200:
# 发送消息阻塞时等待20秒从新发送
logger.error(r.json()["detail"])
time.sleep(self.config.error_wait_time)
return self.send_new_message(message)
@@ -89,10 +98,8 @@ class ChatGPT:
return text, conversation_id
def send_message(self, message, conversation_id):
# 指定会话窗口发送连续对话消息
logger.info(f"send_message")
url = "https://chat.openai.com/backend-api/conversation"
# 获取会话窗口最新消息id
if conversation_id not in self.latest_message_id_dict:
logger.info(f"conversation_id: {conversation_id}")
message_id = self.get_latest_message_id(conversation_id)
@@ -115,7 +122,6 @@ class ChatGPT:
}
r = requests.post(url, headers=self.headers, json=data, proxies=self.proxies)
if r.status_code != 200:
# 发送消息阻塞时等待20秒从新发送
logger.warning(r.json()["detail"])
time.sleep(self.config.error_wait_time)
return self.send_message(message, conversation_id)
@@ -158,6 +164,11 @@ class ChatGPT:
"is_visible": False,
}
r = requests.patch(url, headers=self.headers, json=data, proxies=self.proxies)
# delete conversation id locally
if conversation_id in self.latest_message_id_dict:
del self.latest_message_id_dict[conversation_id]
if r.status_code == 200:
return True
else:

View File

@@ -20,20 +20,21 @@ class ChatGPTBrowser:
The ChatGPT Wrapper based on browser (playwright).
It keeps the same interface as ChatGPT.
"""
def __init__(self, model=None):
config = Config()
if model is not None:
config.set('chat.model', model)
config.set("chat.model", model)
self.bot = ChatGPT(config)
def get_authorization(self):
# TODO: get authorization from browser
return
return
def get_latest_message_id(self, conversation_id):
# TODO: get latest message id from browser
return
return
def get_conversation_history(self, limit=20, offset=0):
# Get the conversation id in the history
return self.bot.get_history(limit, offset)
@@ -52,16 +53,15 @@ class ChatGPTBrowser:
def extract_code_fragments(self, text):
code_fragments = re.findall(r"```(.*?)```", text, re.DOTALL)
return code_fragments
def delete_conversation(self, conversation_id=None):
# delete conversation with its uuid
if conversation_id is not None:
self.bot.delete_conversation(conversation_id)
if __name__ == "__main__":
chatgptBrowser_session = ChatGPTBrowser()
text, conversation_id = chatgptBrowser_session.send_new_message(
"I am a new tester for RESTful APIs."
)