feat(disclosures): basic get and save in file
This commit is contained in:
		| @@ -0,0 +1,31 @@ | ||||
| import requests | ||||
| from bs4 import BeautifulSoup | ||||
| import os | ||||
|  | ||||
| url = 'https://disclosures-clerk.house.gov' | ||||
| data = {"LastName": "pelosi"} | ||||
| response = requests.post(f'{url}/FinancialDisclosure/ViewMemberSearchResult', data=data) | ||||
|  | ||||
| createdDocumentUrls = {} | ||||
| if 'documentUrls.txt' in os.listdir(): | ||||
|     with open('documentUrls.txt', 'r') as f: | ||||
|         createdDocumentUrls = eval(f.read()) | ||||
|  | ||||
| parsed_html = BeautifulSoup(response.text, 'html.parser') | ||||
| fillings = parsed_html.find_all('tr', attrs={'role':'row'}) | ||||
| fillings.pop(0) | ||||
|  | ||||
| # sort fillings by year | ||||
| fillings.sort(key=lambda x: int(x.find_all('td', attrs={"data-label": "Filing Year"})[0].text)) | ||||
| documentUrls = {} | ||||
| for filling in fillings: | ||||
|     key = filling.find_all('td', attrs={"data-label": "Filing Year"})[0].text | ||||
|     url = f'{url}/{filling.a.get("href")}' | ||||
|     arr = documentUrls.get(key, []) | ||||
|     documentUrls[key] = arr + [url] | ||||
|  | ||||
|  | ||||
| print(len(documentUrls)) | ||||
| # save the documentUrls to a file | ||||
| with open('documentUrls.txt', 'w') as f: | ||||
|     f.write(str(documentUrls)) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user