aboutsummaryrefslogtreecommitdiff
path: root/works/life/gpcp/GpcpScrawler.py
diff options
context:
space:
mode:
authorYuqian Yang <crupest@crupest.life>2025-02-28 23:13:39 +0800
committerYuqian Yang <crupest@crupest.life>2025-02-28 23:13:39 +0800
commit99e2e923d0c77b02f3fb4ff648ea916954868606 (patch)
treeec8e03f6f2cd1ce43990fb4fe6cd631967d0237e /works/life/gpcp/GpcpScrawler.py
parent1cee979f5d36b311a03cc7397a036ba11caf3d42 (diff)
downloadcrupest-99e2e923d0c77b02f3fb4ff648ea916954868606.tar.gz
crupest-99e2e923d0c77b02f3fb4ff648ea916954868606.tar.bz2
crupest-99e2e923d0c77b02f3fb4ff648ea916954868606.zip
chore(store): move everything to store.
Diffstat (limited to 'works/life/gpcp/GpcpScrawler.py')
-rw-r--r--works/life/gpcp/GpcpScrawler.py38
1 files changed, 0 insertions, 38 deletions
diff --git a/works/life/gpcp/GpcpScrawler.py b/works/life/gpcp/GpcpScrawler.py
deleted file mode 100644
index 7cf67ec..0000000
--- a/works/life/gpcp/GpcpScrawler.py
+++ /dev/null
@@ -1,38 +0,0 @@
-import requests
-from bs4 import BeautifulSoup
-import threading
-
-base_url = "https://www.ncei.noaa.gov/data/global-precipitation-climatology-project-gpcp-daily/access/"
-
-start_year = 2011
-
-
-def get_with_retry(url):
- while True:
- try:
- return requests.get(url, timeout=30)
- except:
- pass
-
-
-def worker(year):
- html_url = base_url + str(year)
- html = get_with_retry(html_url).text
- soup = BeautifulSoup(html, 'html.parser')
- names = [a.attrs['href'] for a in soup.find_all('a')]
- for name in names:
- if name.startswith("gpcp"):
- url = html_url + '/' + name
- print("Downloading " + name + "...")
- open(name, 'wb').write(get_with_retry(url).content)
-
-
-threads = []
-
-for year in range(start_year, start_year+10):
- t = threading.Thread(target=worker, args=(year,))
- threads.append(t)
- t.start()
-
-for t in threads:
- t.join()