From 80ab4508e2feabc06b1ee7ac5b39340c2ce5ff6d Mon Sep 17 00:00:00 2001
From: oliverhnat <oliver.hnat@gmail.com>
Date: Mon, 6 Jan 2025 11:39:46 +0100
Subject: [PATCH] feat(disclosures): basic get and save in file

---
 financial_disclosure.py | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/financial_disclosure.py b/financial_disclosure.py
index e69de29..fdd5ccb 100644
--- a/financial_disclosure.py
+++ b/financial_disclosure.py
@@ -0,0 +1,31 @@
+import requests
+from bs4 import BeautifulSoup
+import os
+
+url = 'https://disclosures-clerk.house.gov'
+data = {"LastName": "pelosi"}
+response = requests.post(f'{url}/FinancialDisclosure/ViewMemberSearchResult', data=data)
+
+createdDocumentUrls = {}
+if 'documentUrls.txt' in os.listdir():
+    with open('documentUrls.txt', 'r') as f:
+        createdDocumentUrls = eval(f.read())
+
+parsed_html = BeautifulSoup(response.text, 'html.parser')
+fillings = parsed_html.find_all('tr', attrs={'role':'row'})
+fillings.pop(0)
+
+# sort fillings by year
+fillings.sort(key=lambda x: int(x.find_all('td', attrs={"data-label": "Filing Year"})[0].text))
+documentUrls = {}
+for filling in fillings:
+    key = filling.find_all('td', attrs={"data-label": "Filing Year"})[0].text
+    url = f'{url}/{filling.a.get("href")}'
+    arr = documentUrls.get(key, [])
+    documentUrls[key] = arr + [url]
+
+
+print(len(documentUrls))
+# save the documentUrls to a file
+with open('documentUrls.txt', 'w') as f:
+    f.write(str(documentUrls))