feat(disclosures): basic get and save in file
This commit is contained in:
		| @@ -0,0 +1,31 @@ | |||||||
|  | import requests | ||||||
|  | from bs4 import BeautifulSoup | ||||||
|  | import os | ||||||
|  |  | ||||||
|  | url = 'https://disclosures-clerk.house.gov' | ||||||
|  | data = {"LastName": "pelosi"} | ||||||
|  | response = requests.post(f'{url}/FinancialDisclosure/ViewMemberSearchResult', data=data) | ||||||
|  |  | ||||||
|  | createdDocumentUrls = {} | ||||||
|  | if 'documentUrls.txt' in os.listdir(): | ||||||
|  |     with open('documentUrls.txt', 'r') as f: | ||||||
|  |         createdDocumentUrls = eval(f.read()) | ||||||
|  |  | ||||||
|  | parsed_html = BeautifulSoup(response.text, 'html.parser') | ||||||
|  | fillings = parsed_html.find_all('tr', attrs={'role':'row'}) | ||||||
|  | fillings.pop(0) | ||||||
|  |  | ||||||
|  | # sort fillings by year | ||||||
|  | fillings.sort(key=lambda x: int(x.find_all('td', attrs={"data-label": "Filing Year"})[0].text)) | ||||||
|  | documentUrls = {} | ||||||
|  | for filling in fillings: | ||||||
|  |     key = filling.find_all('td', attrs={"data-label": "Filing Year"})[0].text | ||||||
|  |     url = f'{url}/{filling.a.get("href")}' | ||||||
|  |     arr = documentUrls.get(key, []) | ||||||
|  |     documentUrls[key] = arr + [url] | ||||||
|  |  | ||||||
|  |  | ||||||
|  | print(len(documentUrls)) | ||||||
|  | # save the documentUrls to a file | ||||||
|  | with open('documentUrls.txt', 'w') as f: | ||||||
|  |     f.write(str(documentUrls)) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user