From 2166113902a491ae2f4e4e5ed8b8c60da217281a Mon Sep 17 00:00:00 2001 From: Grey_D Date: Tue, 21 Mar 2023 13:16:14 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=F0=9F=90=9B=20minor=20API=20bug=20fix?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Delete the chat session from class --- NLP_RESTAPI_Generation.py | 66 +++++++++++++++++++++++++++++++++ tasks/NLP_RESTAPI_Generation.py | 13 ------- tasks/crawler.py | 61 ++++++++++++++++++++---------- utils/__init__.py | 0 utils/chatgpt.py | 23 +++++++++--- utils/chatgpt_browser.py | 12 +++--- 6 files changed, 131 insertions(+), 44 deletions(-) create mode 100644 NLP_RESTAPI_Generation.py delete mode 100644 tasks/NLP_RESTAPI_Generation.py create mode 100644 utils/__init__.py diff --git a/NLP_RESTAPI_Generation.py b/NLP_RESTAPI_Generation.py new file mode 100644 index 0000000..bd014b7 --- /dev/null +++ b/NLP_RESTAPI_Generation.py @@ -0,0 +1,66 @@ +from utils.chatgpt import ChatGPT +from config.chatgpt_config import ChatGPTConfig +import json + +import loguru + +logger = loguru.logger + +# format: {name: {description: str, sample_curl: str, sample_response: str}} + +task_prompt_0 = """ +I need your help to convert natural language REST API documentation to OpenAPI 3.0 standard. +Here are the detailed requirements: +(1) I hope that the converted openapi documentation is in json format. I will give you the description for one request at a time, and you return me the corresponding json. You should handle the output with proper indent, so that I could paste all your outputs together to form a complete documentation. +(2) For each request, I'll give you a sample curl, and a request description. You should formulate the documentation based on them, especially to fill the "example" field of the request. +""" + +task_prompt_1 = """ +Now we start with a service called dotCMS. Please generate a header for OpenAPI 3.0 first. Take care of the indentation so that I can directly put it together with later outputs to form one API documentation. +It supports authorization token for each request. A sample curl looks like this: +``` +curl --location --request GET 'https://demo.dotcms.com/api/v1/containers/working?containerId=REPLACE_THIS_UUID' \ +--header 'Content-Type: application/json' \ +--header 'Authorization: Basic YWRtaW5AZG90Y21zLmNvbTphZG1pbg==' +``` +""" + +task_prompt_2 = """ +Let's start now. In the following, I'll give you a sample curl, and a request description. +""" + +if __name__ == "__main__": + code_fragments = [] + chatGPTAgent = ChatGPT(ChatGPTConfig()) + text, conversation_id = chatGPTAgent.send_new_message(task_prompt_0) + text = chatGPTAgent.send_message(task_prompt_1, conversation_id) + text = chatGPTAgent.send_message(task_prompt_2, conversation_id) + + # load the documentation + with open("outputs/container_api.json", "r") as f: + container_api = json.load(f) + for key, value in container_api.items(): + if key == "title": + # TODO: get title + pass + elif len(value) != 0: # is not an empty list + title_name = key + for item_list in value: + description = item_list[0] + sample_curl = item_list[1] + # concat description and sample_curl + ask_text = ( + "The meta function is " + + title_name + + "\nThe request description is:" + + description + + "\nThe sample curl is below: \n" + + sample_curl + + "\n" + ) + # send description and curl + response = chatGPTAgent.send_message(ask_text, conversation_id) + # extract code fragments + code_fragments.append(chatGPTAgent.extract_code_fragments(response)) + else: + logger.info("No request to process.") diff --git a/tasks/NLP_RESTAPI_Generation.py b/tasks/NLP_RESTAPI_Generation.py deleted file mode 100644 index 3833c97..0000000 --- a/tasks/NLP_RESTAPI_Generation.py +++ /dev/null @@ -1,13 +0,0 @@ -from utils.chatgpt import ChatGPT -from config.chatgpt_config import ChatGPTConfig - -import loguru - -logger = loguru.logger - -# format: {name: {description: str, sample_curl: str, sample_response: str}} -API_description = {} - - -if __name__ == "__main__": - chatGPTAgent = ChatGPT() diff --git a/tasks/crawler.py b/tasks/crawler.py index 0962877..acd86ae 100644 --- a/tasks/crawler.py +++ b/tasks/crawler.py @@ -2,39 +2,62 @@ import requests from bs4 import BeautifulSoup import json -def parse_markdown_page(url): + +def crawl_dotCMS_description_page( + url="https://www.dotcms.com/docs/latest/container-api", output_dir="outputs" +): page = requests.get(url) - soup = BeautifulSoup(page.content, 'html.parser') + soup = BeautifulSoup(page.content, "html.parser") # Extract the title of the page - title = soup.find('h1').get_text() + title = soup.find("h1").get_text() # Extract the subtitles and their descriptions and code chunks - subtitles = soup.find_all('h2') + subtitles = soup.find_all("h2") parsed_subtitles = [] for subtitle in subtitles: subtitle_title = subtitle.get_text() - subtitle_contents = subtitle.find_next_siblings(['p', 'pre']) + subtitle_contents = subtitle.find_next_siblings(["p", "pre"]) subtitle_parsed_contents = [] - for i in range(0, len(subtitle_contents), 2): - description = subtitle_contents[i].get_text() - if len(subtitle_contents) > i+1: - code = subtitle_contents[i+1].get_text() + description = "" + for content in subtitle_contents: + # Check if the content is a code block + if content.name == "pre" and content.code: + code = content.get_text() + # Add the previous description and code chunk to the list + if len(description) != 0: # If there is no description, don't add it + parsed_description = description.strip().replace("\n", " ") + parsed_code = code.strip().replace("\n", " ") + subtitle_parsed_contents.append([parsed_description, parsed_code]) + else: - code = '' - subtitle_parsed_contents.append([description, code]) + # Concatenate the non-code content into a single description string + description += ( + "\n" + content.get_text() if description else content.get_text() + ) parsed_subtitles.append([subtitle_title, subtitle_parsed_contents]) # Save the results as a structured JSON object - output = {'title': title} + title = title.strip().replace(" ", "_").lower() + output = {"title": title} for i in range(len(parsed_subtitles)): - output[f'subtitle{i+1}'] = parsed_subtitles[i][1] - - with open('output.json', 'w') as outfile: - json.dump(output, outfile, indent=4) + output[parsed_subtitles[i][0]] = parsed_subtitles[i][1] + with open(f"{output_dir}/{title}.json", "w") as f: + json.dump(output, f) return output -url = 'https://www.dotcms.com/docs/latest/container-api' -output = parse_markdown_page(url) -print(output) \ No newline at end of file + +def crawl_strapi_documentation(url, output_dir="outputs"): + pass + + +if __name__ == "__main__": + output_dir = "outputs" + + # example 1: crawl the description page of dotCMS container API + # dotCMS_url = 'https://www.dotcms.com/docs/latest/container-api' + # output = crawl_dotCMS_description_page(url=dotCMS_url, output_dir=output_dir) + + # example 2: crawl the documentation page of + pass diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/utils/chatgpt.py b/utils/chatgpt.py index d3daf1e..96658e2 100644 --- a/utils/chatgpt.py +++ b/utils/chatgpt.py @@ -13,6 +13,18 @@ from config.chatgpt_config import ChatGPTConfig logger = loguru.logger +# A sample ChatGPTConfig class has the following structure. All fields can be obtained from the browser's cookie. +# In particular, cf_clearance、__Secure-next-auth.session-token、_puid are required. + +# @dataclasses.dataclass +# class ChatGPTConfig: +# model: str = "text-davinci-002-render-sha" +# _puid: str = "" +# cf_clearance: str = "" +# session_token: str = "" +# error_wait_time: float = 20 +# is_debugging: bool = False + class ChatGPT: def __init__(self, config: ChatGPTConfig): @@ -42,13 +54,11 @@ class ChatGPT: return "Bearer " + authorization def get_latest_message_id(self, conversation_id): - # 获取会话窗口最新消息id,连续对话必须 url = f"https://chat.openai.com/backend-api/conversation/{conversation_id}" r = requests.get(url, headers=self.headers, proxies=self.proxies) return r.json()["current_node"] def send_new_message(self, message): - # 发送新会话窗口消息,返回会话id logger.info(f"send_new_message") url = "https://chat.openai.com/backend-api/conversation" message_id = str(uuid1()) @@ -67,7 +77,6 @@ class ChatGPT: r = requests.post(url, headers=self.headers, json=data, proxies=self.proxies) if r.status_code != 200: - # 发送消息阻塞时等待20秒从新发送 logger.error(r.json()["detail"]) time.sleep(self.config.error_wait_time) return self.send_new_message(message) @@ -89,10 +98,8 @@ class ChatGPT: return text, conversation_id def send_message(self, message, conversation_id): - # 指定会话窗口发送连续对话消息 logger.info(f"send_message") url = "https://chat.openai.com/backend-api/conversation" - # 获取会话窗口最新消息id if conversation_id not in self.latest_message_id_dict: logger.info(f"conversation_id: {conversation_id}") message_id = self.get_latest_message_id(conversation_id) @@ -115,7 +122,6 @@ class ChatGPT: } r = requests.post(url, headers=self.headers, json=data, proxies=self.proxies) if r.status_code != 200: - # 发送消息阻塞时等待20秒从新发送 logger.warning(r.json()["detail"]) time.sleep(self.config.error_wait_time) return self.send_message(message, conversation_id) @@ -158,6 +164,11 @@ class ChatGPT: "is_visible": False, } r = requests.patch(url, headers=self.headers, json=data, proxies=self.proxies) + + # delete conversation id locally + if conversation_id in self.latest_message_id_dict: + del self.latest_message_id_dict[conversation_id] + if r.status_code == 200: return True else: diff --git a/utils/chatgpt_browser.py b/utils/chatgpt_browser.py index 44893bd..c796f85 100644 --- a/utils/chatgpt_browser.py +++ b/utils/chatgpt_browser.py @@ -20,20 +20,21 @@ class ChatGPTBrowser: The ChatGPT Wrapper based on browser (playwright). It keeps the same interface as ChatGPT. """ + def __init__(self, model=None): config = Config() if model is not None: - config.set('chat.model', model) + config.set("chat.model", model) self.bot = ChatGPT(config) def get_authorization(self): # TODO: get authorization from browser - return + return def get_latest_message_id(self, conversation_id): # TODO: get latest message id from browser - return - + return + def get_conversation_history(self, limit=20, offset=0): # Get the conversation id in the history return self.bot.get_history(limit, offset) @@ -52,16 +53,15 @@ class ChatGPTBrowser: def extract_code_fragments(self, text): code_fragments = re.findall(r"```(.*?)```", text, re.DOTALL) return code_fragments - def delete_conversation(self, conversation_id=None): # delete conversation with its uuid if conversation_id is not None: self.bot.delete_conversation(conversation_id) + if __name__ == "__main__": chatgptBrowser_session = ChatGPTBrowser() text, conversation_id = chatgptBrowser_session.send_new_message( "I am a new tester for RESTful APIs." ) -