import requests from bs4 import BeautifulSoup import json def parse_markdown_page(url): page = requests.get(url) soup = BeautifulSoup(page.content, 'html.parser') # Extract the title of the page title = soup.find('h1').get_text() # Extract the subtitles and their descriptions and code chunks subtitles = soup.find_all('h2') parsed_subtitles = [] for subtitle in subtitles: subtitle_title = subtitle.get_text() subtitle_contents = subtitle.find_next_siblings(['p', 'pre']) subtitle_parsed_contents = [] for i in range(0, len(subtitle_contents), 2): description = subtitle_contents[i].get_text() if len(subtitle_contents) > i+1: code = subtitle_contents[i+1].get_text() else: code = '' subtitle_parsed_contents.append([description, code]) parsed_subtitles.append([subtitle_title, subtitle_parsed_contents]) # Save the results as a structured JSON object output = {'title': title} for i in range(len(parsed_subtitles)): output[f'subtitle{i+1}'] = parsed_subtitles[i][1] with open('output.json', 'w') as outfile: json.dump(output, outfile, indent=4) return output url = 'https://www.dotcms.com/docs/latest/container-api' output = parse_markdown_page(url) print(output)