# TODO: parse the web contents with bs4. import requests from bs4 import BeautifulSoup def parse_web(url): # create a user agent header user_agent_header = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36" } response = requests.get(url, headers=user_agent_header) # Check if the response contains an HTTP error if response.status_code >= 400: return "Error: HTTP " + str(response.status_code) + " error" soup = BeautifulSoup(response.text, "html.parser") for script in soup(["script", "style"]): script.extract() text = soup.get_text() lines = (line.strip() for line in text.splitlines()) chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) text = "\n".join(chunk for chunk in chunks if chunk) return text