import(life): ...

author: crupest <crupest@outlook.com> 2022-05-26 13:45:32 +0800
committer: crupest <crupest@outlook.com> 2022-05-26 13:45:32 +0800
commit: fcc7095d0eeca2d8df5f6af053e8c9c006600bb1 (patch)
tree: bb008cedd4fd3b42ea75263a155cd1e7b636922a /works/life/gpcp/GpcpScrawler.py
parent: c2a9cd1caff816a5215bbfd4f3edfa50f048a7b3 (diff)
download: crupest-fcc7095d0eeca2d8df5f6af053e8c9c006600bb1.tar.gz
crupest-fcc7095d0eeca2d8df5f6af053e8c9c006600bb1.tar.bz2
crupest-fcc7095d0eeca2d8df5f6af053e8c9c006600bb1.zip
1 files changed, 38 insertions, 0 deletions
diff --git a/works/life/gpcp/GpcpScrawler.py b/works/life/gpcp/GpcpScrawler.py
new file mode 100644
index 0000000..7cf67ec
--- /dev/null
+++ b/works/life/gpcp/GpcpScrawler.py
@@ -0,0 +1,38 @@
+import requests
+from bs4 import BeautifulSoup
+import threading
+
+base_url = "https://www.ncei.noaa.gov/data/global-precipitation-climatology-project-gpcp-daily/access/"
+
+start_year = 2011
+
+
+def get_with_retry(url):
+    while True:
+        try:
+            return requests.get(url, timeout=30)
+        except:
+            pass
+
+
+def worker(year):
+    html_url = base_url + str(year)
+    html = get_with_retry(html_url).text
+    soup = BeautifulSoup(html, 'html.parser')
+    names = [a.attrs['href'] for a in soup.find_all('a')]
+    for name in names:
+        if name.startswith("gpcp"):
+            url = html_url + '/' + name
+            print("Downloading " + name + "...")
+            open(name, 'wb').write(get_with_retry(url).content)
+
+
+threads = []
+
+for year in range(start_year, start_year+10):
+    t = threading.Thread(target=worker, args=(year,))
+    threads.append(t)
+    t.start()
+
+for t in threads:
+    t.join()
author	crupest <crupest@outlook.com>	2022-05-26 13:45:32 +0800
committer	crupest <crupest@outlook.com>	2022-05-26 13:45:32 +0800
commit	fcc7095d0eeca2d8df5f6af053e8c9c006600bb1 (patch)
tree	bb008cedd4fd3b42ea75263a155cd1e7b636922a /works/life/gpcp/GpcpScrawler.py
parent	c2a9cd1caff816a5215bbfd4f3edfa50f048a7b3 (diff)
download	crupest-fcc7095d0eeca2d8df5f6af053e8c9c006600bb1.tar.gz crupest-fcc7095d0eeca2d8df5f6af053e8c9c006600bb1.tar.bz2 crupest-fcc7095d0eeca2d8df5f6af053e8c9c006600bb1.zip