Merge pull request #11 from GreyDGL/browse

Browse
This commit is contained in:
Grey_D 2023-04-17 10:22:24 +08:00 committed by GitHub
commit 05bb7ece79
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 161 additions and 11 deletions

View File

@ -1,5 +1,10 @@
# PentestGPT
17/04/2023: Due to the additional verification by OpenAI, you now need to pass the full `cookie` variable into the session.
You can obtain it from `https://chat.openai.com/api/auth/session`, by examining the HTTP request cookie. Check the updated chatgpt_config_sample.
I'll try to fix this later.
## Introduction
**PentestGPT** is a penetration testing tool empowered by **ChatGPT**. It is designed to automate the penetration testing process. It is built on top of ChatGPT and operate in an interactive mode to guide penetration testers in both overall progress and specific operations.
A sample testing process of **PentestGPT** on a target VulnHub machine (Hackable II) is available at [here](./resources/PentestGPT_Hackable2.pdf).

View File

@ -7,5 +7,8 @@ class ChatGPTConfig:
_puid: str = ""
cf_clearance: str = ""
session_token: str = ""
accessToken: str = "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCIsImtpZCI6Ik1UaEVOVUpHTkVNMVFURTRNMEZCTWpkQ05UZzVNRFUxUlRVd1FVSkRNRU13UmtGRVFrRXpSZyJ9.eyJodHRwczovL2FwaS5vcGVuYWkuY29tL3Byb2ZpbGUiOnsiZW1haWwiOiJnZWxlaUBxdWFudHN0YW1wLmNvbSIsImVtYWlsX3ZlcmlmaWVkIjp0cnVlfSwiaHR0cHM6Ly9hcGkub3BlbmFpLmNvbS9hdXRoIjp7InVzZXJfaWQiOiJ1c2VyLW53ZmxBZzJ0aGxTVkh6cEJnd0dGUmdxRSJ9LCJpc3MiOiJodHRwczovL2F1dGgwLm9wZW5haS5jb20vIiwic3ViIjoiZ29vZ2xlLW9hdXRoMnwxMTM3MDI0Nzk2MzI2NTQ3NTk3NjIiLCJhdWQiOlsiaHR0cHM6Ly9hcGkub3BlbmFpLmNvbS92MSIsImh0dHBzOi8vb3BlbmFpLm9wZW5haS5hdXRoMGFwcC5jb20vdXNlcmluZm8iXSwiaWF0IjoxNjgxNjM3Mjc1LCJleHAiOjE2ODI4NDY4NzUsImF6cCI6IlRkSkljYmUxNldvVEh0Tjk1bnl5d2g1RTR5T282SXRHIiwic2NvcGUiOiJvcGVuaWQgcHJvZmlsZSBlbWFpbCBtb2RlbC5yZWFkIG1vZGVsLnJlcXVlc3Qgb3JnYW5pemF0aW9uLnJlYWQgb2ZmbGluZV9hY2Nlc3MifQ.n1FPeYK6Bu2VCJf5NtY2yro9qzpaA8OfUcscXbUoHkIeuth__LHdbjjnwFbO9midXiaP5nY2wi86LMIWZUU99P-_A6CodAT-Qs26Hef3-6daSFfYuL_tgMJ6jcPGq9KGlGIhytcJ7v4v90XSV5M9JjWMHMsTbMGhz1MuHplQGLZdjDG23mH_qxRUWduhEDExnEkBMNgiFT7POs30fNU5YzLz15w7UBnBTEctH60IfUFlni6C4ibybVZLcbY4IOoAIifW-okKhBpazlSIPDcLAq3CG7nzELbML69omdEM0qbEUCEOiB-E6Z0ICbWJFJGmACGHjycuN2d8F2oDbyGTeQ"
# pass the complete cookie string
cookie: str = "intercom-device-id-dgkjq2bp=0b79bf97-190f-4146-90b1-8e5ee76889a9; __Host-next-auth.csrf-token=0b7e3bb24cc2f1d21030a03269484f928527e4aab16c9b4d344529ee46ca9fe8%7C2a6e7e38eaac7ca8cbcae40912bade72150d8aa18317e3db74f49b125957613a; oai-asdf-ugss=user-nwflAg2thlSVHzpBgwGFRgqE; oai-asdf-gsspc=user-nwflAg2thlSVHzpBgwGFRgqE; intercom-id-dgkjq2bp=73b81fc6-1a89-4778-8602-938e95bb1c8f; cf_clearance=_z12D.4Y9J04S58iQVBXt_SuylQgcf_.9ZhUa6PdQgg-1680495871-0-1-b0e12fb5.3143ca51.e7e07055-160; _ga=GA1.1.251554109.1679673782; _ga_9YTZJE58M9=GS1.1.1681351156.9.1.1681351217.0.0.0; _cfuvid=FpwoyzyYPrG0a0NqtkhvayIWPZmzOQc4B9g3pEunvo8-1681470057686-0-604800000; cf_clearance=Abi_.usI_LK3Eoensyvi9UgaMHxP8uRhgqHgEgwYNS8-1681634935-0-1-30d8c658.7e8ece0b.32f642bc-160; __Secure-next-auth.callback-url=https%3A%2F%2Fchat.openai.com%2F; _puid=user-nwflAg2thlSVHzpBgwGFRgqE:1681637789-PBgn031oMYpyBkSzmaJs5%2F9gCN%2BWMUl6%2B4eJKNKXBHk%3D; intercom-session-dgkjq2bp=TjZpVEU3S0RDa0dZVkJuS3QxNkJpNE44L2VKNGswMnA0ZTJCYi9ud3B5MlN3ZXVQN1Q0Y2htcThUcGR1V3N1Vi0tbGJLd2ViR1hZL0oxek9Wc2NYbjBOdz09--237d1264dc1f1bb1e8751c867154000e0febe08f; __cf_bm=Nl7zy2rM7a8Ix1MB64EyiG5vePkLZ0HX2RtJuj1SYT4-1681638176-0-AUS+5CeavTt8Xs/aw07CxlVVfvtjjfcLCxru0byb1OdTmb5UpP6kbVhesib0j8vJblhaO19VTr7wVEtr46iiA7D+4zVNYD0b4Lh6gZ5wBXlSmf0lrOl/vDhtCn4WOiY92Uu2+6JqWAk6gtHYfSx+waQqzaKIPQnQMNti2IrpyZZd; __Secure-next-auth.session-token=eyJhbGciOiJkaXIiLCJlbmMiOiJBMjU2R0NNIn0..AWS-LTB6eFjfk-oM.7MNUUFAU1yRzRnRzwNlefn-076MjjtFxN2gylg6TN6IJGk19QJ5EkAeLwqgc1ce0YDhbkKCSOf0Y3D5C_ww--qFNfq5gGszIDjcjT7AKx_FMwjfRqVFjAJ2EUvnyZxTGRRXNXUQY21zjYndcETy66mpbVI1v2tZin7ge_9oqqNh_ikedVql0Gn9awBL5qS1hI8NHaAa9EjuJCJjtw4I4cCQ7AlYc52Ze3__cGQxR01cjPZq0HFrVlu89oIOqetexubO1OSYqaDc351Lk16C_dSLQz7XxTv5P_kDv2tUmTExx36z9DuuJY8InwrpHTPa6xH9vqy9HmDj9Yu_56fsNeVhArdbE4ulKzkZo-N1sZfTu2TzJVdOJWlIQSR6Vbdp5BEdVm_a8iYrAC4sCyswSYhnUm-EjxA4UGMFB-K7eDMwbimy1rYKk9hCwLgX2LWSPcRuFNvXvv4mZFPwIO3ZmbJ-U6vE__IaMwq-6fHqvQiQEjjInU5LyIGp4EFrXvlWI3iP7vGRXDneMsRhc84mL-iDYlOddfqWvhko-RLW3X-soMWH1ARpiKyEZCYAPSRZ_HnYjWWeFfEgvEy-lFb5MPPGPvpb9DC3PZty9NoQwQIIxKvWLxzkH4kEiIfQ7Wz4_tXadvgPddqkeBTHAUwvzThPDP_zL6KGy_i09xWAmAwgWU221AXQMc-RD1T0X8tq6ivQBSjVvtgpx4wP0tltG81hiAlTXZMjqG5hWGlWkPDg-uT2SafB5t-8rPnaj_ZIhej3wa78jjgfcJCairEtlFerNtZsCRnoslx9yRIAFuPs4ogIkCJdzNb5vfkJM1djZDaiFBRYIocjjE-AXNJY-bngjDQsofyWxFF_tX3hSwkF4KovMcRyvhp7Jn4QIpioL-n8D3uf11AqSINXY3jXkAXRumBxVtNBgZEEKuaBEqmGckQW5r7UTYai2f9Izoml9d-ccdJdpAS4BigY5_QTZc4fVZ8UJ8wr5rPS-hnx-L-Fp2rhwwqNCDUINKX9v5X3LATibQJkWovxw2HCJK-KVrNlsrh8kRpzqZpjVQ-G7niq8sc5ckE-8IrJuUwNSiDIvi9xHgCl8XQGP0qEivis-cHX_rcNZL7giZvm0jIxra9V1PABxrDpjLuvaLBTquhdvE296Tn7qesKKeVlo4TmxSg3kO6Tro4SFi_-bpNPuS1xXMkGdkP63fFfRSK6k4qPn87MQLu7DDvi_Ccdh_gE8mDhfMKcW6UE9IBf4AMQ3fUbWuQ7hCDtPT-ZLEPR6BkUkyxlkuzImNWWWFK0Q2m0lHsr3WSijm8_gQbWZ1Z1Nw9QNUPWzQVAP2sY6LTvCzq94bR5uwIjjcul7aDSZGG0NWnCuj6tN9jhdDKL3Ble8zJG7v_MBz0ZU64EFCsmM6oplfAPXXofNjYeaViccngn2q4ecosbUCrdw4x6cwBU1l2B_P40qTUGg4Pe301A6qe-7qnuFFe6rYp_ylPB7ADCxYpxHOjeR_bUx-cPd15_FyeBhY7Gly7CETli-LwJaQu51nPccxPj90H49b4NLWzo83Gq4f2Ah4Li-QH-iK_AwZPqhA40-0awZzq9PwGAfZKoEFppwvxBRQ-nRxCz6G_BCLPPZg1bKrLxF-uQWQl3pk1eFmdUaPtqvMEHgrT-28QTSxpzyu_Lh1aoJRuA-P8BeEnanIPJurAxFhspZCzX-37tY7AlQKcMScouASpNP7obQ4DR1nhAmhzj_LpV1lef9hx7EHfknKcxnsyJzg0QNnTpNL_rjMagM5Ni25A8fy5WGXlcjAmp8mlYYuyPQ1XQOxHVy4CrgyyS9t6R-t8s70x3jwFc000mnouU5exF8oijkR41dS-r0A6_Wc4dge6yEktodQVpM2Gqzkdl3pmhH_uIcvfDXiHqu-6mbGhaZrH6yk-L8VAtRut8iSKj0MUX8eBztTPYEaBFuYrH8F36aolFMBuOhBU9FxjXOx2xthOVg-UQttbZA4V5QaiJTIuQMOMis-0V9ErL1Z1Y5fh3vqS408guOheb8fhSobAJD9Bq9hMaJVf-rnWBtjMmnss-jU9VC7fFn1tgk-TAaUauKIyxd-RaWrUfwWocWBmtSO_Y0kwPG5kp5aXv7BUd_SyF_9AipuoGc8z7tfZTlPhKuogL9zZJ7DBz4BlidOtSuzeQQS1yx4GM4TJGBgdfaESQlifYyV4Xp21keyCBFf8xWA_HCKGdyXKmO80YtMoOa_WoCIf6KYo_w2-h5BbHe0GhrObIDWD99-TJB-k0Gh17b6TLH9L2J1MO1_wiI3HlKPERYPthE18OQhkktu8iGQ2SeQj1g-Yuh7wrwcFjOpzfM-xZoL2SfXc1Q54fWtglzkophWhRgX9mObEamV97aGrZH-SIT8HswDaH4gzAi094PaxqSQ47lT5iTZerffYWCzL86Y2JTXX7Hoqk84TyhRa6GtDkPu3PLjSYYjbRK7F4iCJ2Vk7UJ3OZ4Hx659gxZ9OO6az4Ymz1TDcpDXgucCpeF86pTsaYrhhe5YP6K6q_C18l7_iViDii41jUbcZpcAFLNWg8clni8Q1X5kF2lM0g3C3ezLcWxjk16bLpK11HPmk5IEEY6DK1TLKoHR6ZT.xzlZwEwM0beQe45RQIeSCw"
error_wait_time: float = 20
is_debugging: bool = False

View File

@ -8,4 +8,5 @@ loguru
beautifulsoup4~=4.11.2
colorama
rich
prompt-toolkit
prompt-toolkit
google

9
tests/testBrowsing.py Normal file
View File

@ -0,0 +1,9 @@
import unittest
class TestBrowsing(unittest.TestCase):
pass
if __name__ == "__main__":
unittest.main()

View File

@ -19,6 +19,7 @@ logger.add(level="WARNING", sink="logs/chatgpt.log")
# A sample ChatGPTConfig class has the following structure. All fields can be obtained from the browser's cookie.
# In particular, cf_clearance、__Secure-next-auth.session-token、_puid are required.
# Update: the login is currently not available. The current solution is to paste in the full cookie.
# @dataclasses.dataclass
# class ChatGPTConfig:
@ -67,10 +68,11 @@ class ChatGPT:
self.conversation_dict: Dict[str, Conversation] = {}
self.headers = dict(
{
"cookie": f"cf_clearance={self.cf_clearance}; _puid={self._puid}; "
f"__Secure-next-auth.session-token={self.session_token}",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",
# "cookie": f"cf_clearance={self.cf_clearance}; _puid={self._puid}; __cf_bm=Nl7zy2rM7a8Ix1MB64EyiG5vePkLZ0HX2RtJuj1SYT4-1681638176-0-AUS+5CeavTt8Xs/aw07CxlVVfvtjjfcLCxru0byb1OdTmb5UpP6kbVhesib0j8vJblhaO19VTr7wVEtr46iiA7D+4zVNYD0b4Lh6gZ5wBXlSmf0lrOl/vDhtCn4WOiY92Uu2+6JqWAk6gtHYfSx+waQqzaKIPQnQMNti2IrpyZZd; __Secure-next-auth.callback-url=https%3A%2F%2Fchat.openai.com%2F; __Host-next-auth.csrf-token=0b7e3bb24cc2f1d21030a03269484f928527e4aab16c9b4d344529ee46ca9fe8%7C2a6e7e38eaac7ca8cbcae40912bade72150d8aa18317e3db74f49b125957613a;_cfuvid=FpwoyzyYPrG0a0NqtkhvayIWPZmzOQc4B9g3pEunvo8-1681470057686-0-604800000"
# f"__Secure-next-auth.session-token={self.session_token}",
"cookie": self.config.cookie,
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7"
# 'Content-Type': 'text/event-stream; charset=utf-8',
}
)
@ -80,12 +82,15 @@ class ChatGPT:
url = "https://chat.openai.com/api/auth/session"
r = requests.get(url, headers=self.headers)
authorization = r.json()["accessToken"]
# authorization = self.config.accessToken
return "Bearer " + authorization
def get_latest_message_id(self, conversation_id):
# Get continuous conversation message id
url = f"https://chat.openai.com/backend-api/conversation/{conversation_id}"
print(self.headers)
r = requests.get(url, headers=self.headers, proxies=self.proxies)
return r.json()["current_node"]
def _parse_message_raw_output(self, response: requests.Response):

View File

@ -7,6 +7,7 @@ from prompts.prompt_class import PentestGPTPrompt
from utils.prompt_select import prompt_select, prompt_ask
from prompt_toolkit.formatted_text import HTML
from utils.task_handler import main_task_entry, mainTaskCompleter
from utils.web_parser import google_search, parse_web
import loguru
import time, os, textwrap
@ -230,6 +231,20 @@ class pentestGPT:
self.console.print("PentestGPT:\n", style="bold green")
self.console.print(response + "\n", style="yellow")
# Google
elif request_option == "google":
# get the users input
self.console.print(
"Please enter your search query. PentestGPT will summarize the info from google.",
style="bold green",
)
user_input = prompt_ask(
"(End with <shift + right-arrow>) Your input: ", multiline=False
)
with self.console.status("[bold green] PentestGPT Thinking...") as status:
# query the question
result = self.google_search(user_input, 5) # 5 results by default
# end
elif request_option == "quit":
response = False

View File

@ -16,6 +16,7 @@ class mainTaskCompleter(Completer):
"more",
"todo",
"discuss",
"google",
"help",
"quit",
]
@ -25,6 +26,7 @@ class mainTaskCompleter(Completer):
"more": HTML("Explain the task with more details."),
"todo": HTML("Ask <b>PentestGPT</b> for todos."),
"discuss": HTML("Discuss with <b>PentestGPT</b>."),
"google": HTML("Search on Google."),
"help": HTML("Show the help page."),
"quit": HTML("End the current session."),
}
@ -35,6 +37,7 @@ Below are the available tasks:
- more: Explain the previous given task with more details.
- todo: Ask PentestGPT for the task list and what to do next.
- discuss: Discuss with PentestGPT. You can ask for help, discuss the task, or give any feedbacks.
- google: Search your question on Google. The results are automatically parsed by Google.
- help: Show this help page.
- quit: End the current session."""

View File

@ -1,14 +1,94 @@
# TODO: parse the web contents with bs4.
# Use functions from Auto-GPT: https://github.com/Torantulino/Auto-GPT/blob/master/scripts/browse.py
import requests
from bs4 import BeautifulSoup
from urllib.parse import urlparse, urljoin
from googlesearch import search
def parse_web(url):
######### Quick documentation #########
## Use get response to get the original response from the URL
## Use parse_web to get the text from the URL (bs4 handled)
## Use google_search to get the search results from Google. Results are already parsed.
#######################################
# Function to check if the URL is valid
def is_valid_url(url):
try:
result = urlparse(url)
return all([result.scheme, result.netloc])
except ValueError:
return False
# Function to sanitize the URL
def sanitize_url(url):
return urljoin(url, urlparse(url).path)
def check_local_file_access(url):
local_prefixes = [
"file:///",
"file://localhost",
"http://localhost",
"https://localhost",
]
return any(url.startswith(prefix) for prefix in local_prefixes)
def get_response(url, timeout=10) -> tuple:
"""
Get the response from the URL.
Parameters:
----------
url (str): The URL to get the response from.
timeout (int): The timeout for the HTTP request.
Returns:
-------
response (requests.models.Response): The response from the URL.
error (str): The error message if any.
"""
try:
# Restrict access to local files
if check_local_file_access(url):
raise ValueError("Access to local files is restricted")
# Most basic check if the URL is valid:
if not url.startswith("http://") and not url.startswith("https://"):
raise ValueError("Invalid URL format")
sanitized_url = sanitize_url(url)
user_agent_header = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36"
}
response = requests.get(
sanitized_url, headers=user_agent_header, timeout=timeout
)
# Check if the response contains an HTTP error
if response.status_code >= 400:
return None, "Error: HTTP " + str(response.status_code) + " error"
return response, None
except ValueError as ve:
# Handle invalid URL format
return None, "Error: " + str(ve)
except requests.exceptions.RequestException as re:
# Handle exceptions related to the HTTP request (e.g., connection errors, timeouts, etc.)
return None, "Error: " + str(re)
def parse_web(url) -> str:
# create a user agent header
user_agent_header = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36"
}
response = requests.get(url, headers=user_agent_header)
response, potential_error = get_response(url)
if response is None:
return potential_error
# Check if the response contains an HTTP error
if response.status_code >= 400:
return "Error: HTTP " + str(response.status_code) + " error"
@ -24,3 +104,32 @@ def parse_web(url):
text = "\n".join(chunk for chunk in chunks if chunk)
return text
def google_search(keyword, num_results=5) -> dict:
"""
Search on Google and return the results.
Parameters:
----------
keyword (str): The keyword to search on Google.
num_results (int): The number of results to return.
Returns:
-------
result (dict): The search results. Format: {"keyword": keyword, "search_result": {url, content}}}
"""
search_result = {}
for url in search(keyword, tld="com", num=num_results, stop=num_results, pause=2):
search_result[url] = parse_web(url)
result = {"keyword": keyword, "search_result": search_result}
if __name__ == "__main__":
# test to query google search on "what is penetration testing?"
query = "what is penetration testing?"
for url in search(query, tld="com", num=5, stop=5, pause=2):
print(url)
web_content = parse_web(url)
print(web_content)