us-disclosures/financial_disclosure.py

import requests
from bs4 import BeautifulSoup
import os

url = 'https://disclosures-clerk.house.gov'
data = {"LastName": "pelosi"}
response = requests.post(f'{url}/FinancialDisclosure/ViewMemberSearchResult', data=data)

createdDocumentUrls = {}
if 'documentUrls.txt' in os.listdir():
    with open('documentUrls.txt', 'r') as f:
        createdDocumentUrls = eval(f.read())

parsed_html = BeautifulSoup(response.text, 'html.parser')
fillings = parsed_html.find_all('tr', attrs={'role':'row'})
fillings.pop(0)

# sort fillings by year
fillings.sort(key=lambda x: int(x.find_all('td', attrs={"data-label": "Filing Year"})[0].text))
documentUrls = {}
for filling in fillings:
    key = filling.find_all('td', attrs={"data-label": "Filing Year"})[0].text
    url = f'{url}/{filling.a.get("href")}'
    arr = documentUrls.get(key, [])
    documentUrls[key] = arr + [url]


print(len(documentUrls))
# save the documentUrls to a file
with open('documentUrls.txt', 'w') as f:
    f.write(str(documentUrls))