From 780187683a98b763c0b8ef9ddfbc4fa4d6672c35 Mon Sep 17 00:00:00 2001
From: Grey_D <GDENG003@e.ntu.edu.sg>
Date: Mon, 17 Apr 2023 00:46:43 +0800
Subject: [PATCH] =?UTF-8?q?fix:=20=F0=9F=90=9B=20fix=20chatgpt=20access=20?=
 =?UTF-8?q?issue?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 utils/chatgpt.py      | 13 +++++++++----
 utils/pentest_gpt.py  | 13 +++++++++++++
 utils/task_handler.py |  3 +++
 utils/web_parser.py   | 29 +++++++++++++++++++++++++++--
 4 files changed, 52 insertions(+), 6 deletions(-)

diff --git a/utils/chatgpt.py b/utils/chatgpt.py
index 5201de4..df02dec 100644
--- a/utils/chatgpt.py
+++ b/utils/chatgpt.py
@@ -19,6 +19,7 @@ logger.add(level="WARNING", sink="logs/chatgpt.log")
 
 # A sample ChatGPTConfig class has the following structure. All fields can be obtained from the browser's cookie.
 # In particular, cf_clearance、__Secure-next-auth.session-token、_puid are required.
+# Update: the login is currently not available. The current solution is to paste in the full cookie.
 
 # @dataclasses.dataclass
 # class ChatGPTConfig:
@@ -67,10 +68,11 @@ class ChatGPT:
         self.conversation_dict: Dict[str, Conversation] = {}
         self.headers = dict(
             {
-                "cookie": f"cf_clearance={self.cf_clearance}; _puid={self._puid}; "
-                f"__Secure-next-auth.session-token={self.session_token}",
-                "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
-                "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",
+                # "cookie": f"cf_clearance={self.cf_clearance}; _puid={self._puid}; __cf_bm=Nl7zy2rM7a8Ix1MB64EyiG5vePkLZ0HX2RtJuj1SYT4-1681638176-0-AUS+5CeavTt8Xs/aw07CxlVVfvtjjfcLCxru0byb1OdTmb5UpP6kbVhesib0j8vJblhaO19VTr7wVEtr46iiA7D+4zVNYD0b4Lh6gZ5wBXlSmf0lrOl/vDhtCn4WOiY92Uu2+6JqWAk6gtHYfSx+waQqzaKIPQnQMNti2IrpyZZd; __Secure-next-auth.callback-url=https%3A%2F%2Fchat.openai.com%2F; __Host-next-auth.csrf-token=0b7e3bb24cc2f1d21030a03269484f928527e4aab16c9b4d344529ee46ca9fe8%7C2a6e7e38eaac7ca8cbcae40912bade72150d8aa18317e3db74f49b125957613a;_cfuvid=FpwoyzyYPrG0a0NqtkhvayIWPZmzOQc4B9g3pEunvo8-1681470057686-0-604800000"
+                # f"__Secure-next-auth.session-token={self.session_token}",
+                "cookie": self.config.cookie,
+                "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
+                "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7"
                 # 'Content-Type': 'text/event-stream; charset=utf-8',
             }
         )
@@ -80,12 +82,15 @@ class ChatGPT:
         url = "https://chat.openai.com/api/auth/session"
         r = requests.get(url, headers=self.headers)
         authorization = r.json()["accessToken"]
+        # authorization = self.config.accessToken
         return "Bearer " + authorization
 
     def get_latest_message_id(self, conversation_id):
         # Get continuous conversation message id
         url = f"https://chat.openai.com/backend-api/conversation/{conversation_id}"
+        print(self.headers)
         r = requests.get(url, headers=self.headers, proxies=self.proxies)
+
         return r.json()["current_node"]
 
     def _parse_message_raw_output(self, response: requests.Response):
diff --git a/utils/pentest_gpt.py b/utils/pentest_gpt.py
index 22c0343..bbf8930 100644
--- a/utils/pentest_gpt.py
+++ b/utils/pentest_gpt.py
@@ -7,6 +7,7 @@ from prompts.prompt_class import PentestGPTPrompt
 from utils.prompt_select import prompt_select, prompt_ask
 from prompt_toolkit.formatted_text import HTML
 from utils.task_handler import main_task_entry, mainTaskCompleter
+from utils.web_parser import google_search, parse_web
 
 import loguru
 import time, os, textwrap
@@ -230,6 +231,18 @@ class pentestGPT:
             self.console.print("PentestGPT:\n", style="bold green")
             self.console.print(response + "\n", style="yellow")
 
+        # Google
+        elif request_option == "google":
+            # get the users input
+            self.console.print("Please enter your search query. PentestGPT will summarize the info from google.", style="bold green")
+            user_input = prompt_ask(
+                "(End with <shift + right-arrow>) Your input: ", multiline=False
+            )
+            with self.console.status("[bold green] PentestGPT Thinking...") as status:
+                # query the question
+                result = self.google_search(user_input, 5) # 5 results by default
+
+
         # end
         elif request_option == "quit":
             response = False
diff --git a/utils/task_handler.py b/utils/task_handler.py
index 62e3f5e..a40b27e 100644
--- a/utils/task_handler.py
+++ b/utils/task_handler.py
@@ -16,6 +16,7 @@ class mainTaskCompleter(Completer):
         "more",
         "todo",
         "discuss",
+        "google",
         "help",
         "quit",
     ]
@@ -25,6 +26,7 @@ class mainTaskCompleter(Completer):
         "more": HTML("Explain the task with more details."),
         "todo": HTML("Ask <b>PentestGPT</b> for todos."),
         "discuss": HTML("Discuss with <b>PentestGPT</b>."),
+        "google": HTML("Search on Google."),
         "help": HTML("Show the help page."),
         "quit": HTML("End the current session."),
     }
@@ -35,6 +37,7 @@ Below are the available tasks:
  - more: Explain the previous given task with more details.
  - todo: Ask PentestGPT for the task list and what to do next.
  - discuss: Discuss with PentestGPT. You can ask for help, discuss the task, or give any feedbacks.
+ - google: Search your question on Google. The results are automatically parsed by Google.
  - help: Show this help page.
  - quit: End the current session."""
 
diff --git a/utils/web_parser.py b/utils/web_parser.py
index ec12808..983768e 100644
--- a/utils/web_parser.py
+++ b/utils/web_parser.py
@@ -5,6 +5,13 @@ from urllib.parse import urlparse, urljoin
 from googlesearch import search
 
 
+######### Quick documentation #########
+## Use get response to get the original response from the URL
+## Use parse_web to get the text from the URL (bs4 handled)
+## Use google_search to get the search results from Google. Results are already parsed.
+#######################################
+
+
 # Function to check if the URL is valid
 def is_valid_url(url):
     try:
@@ -29,7 +36,7 @@ def check_local_file_access(url):
     return any(url.startswith(prefix) for prefix in local_prefixes)
 
 
-def get_response(url, timeout=10):
+def get_response(url, timeout=10) -> tuple:
     """
     Get the response from the URL.
 
@@ -76,7 +83,7 @@ def get_response(url, timeout=10):
         return None, "Error: " + str(re)
 
 
-def parse_web(url):
+def parse_web(url) -> str:
     # create a user agent header
     response, potential_error = get_response(url)
     if response is None:
@@ -98,6 +105,24 @@ def parse_web(url):
 
     return text
 
+def google_search(keyword, num_results=5) -> dict:
+    """
+    Search on Google and return the results.
+
+    Parameters:
+    ----------
+        keyword (str): The keyword to search on Google.
+        num_results (int): The number of results to return.
+
+    Returns:
+    -------
+        result (dict): The search results. Format: {"keyword": keyword, "search_result": {url, content}}}
+
+    """
+    search_result = {}
+    for url in search(keyword, tld="com", num=num_results, stop=num_results, pause=2):
+        search_result[url] = parse_web(url)
+    result = {"keyword": keyword, "search_result": search_result}
 
 if __name__ == "__main__":
     # test to query google search on "what is penetration testing?"