diff options
author | Yuqian Yang <crupest@crupest.life> | 2025-02-28 23:13:39 +0800 |
---|---|---|
committer | Yuqian Yang <crupest@crupest.life> | 2025-02-28 23:13:39 +0800 |
commit | 99e2e923d0c77b02f3fb4ff648ea916954868606 (patch) | |
tree | ec8e03f6f2cd1ce43990fb4fe6cd631967d0237e /works/life/gpcp/GpcpScrawler.py | |
parent | 1cee979f5d36b311a03cc7397a036ba11caf3d42 (diff) | |
download | crupest-99e2e923d0c77b02f3fb4ff648ea916954868606.tar.gz crupest-99e2e923d0c77b02f3fb4ff648ea916954868606.tar.bz2 crupest-99e2e923d0c77b02f3fb4ff648ea916954868606.zip |
chore(store): move everything to store.
Diffstat (limited to 'works/life/gpcp/GpcpScrawler.py')
-rw-r--r-- | works/life/gpcp/GpcpScrawler.py | 38 |
1 files changed, 0 insertions, 38 deletions
diff --git a/works/life/gpcp/GpcpScrawler.py b/works/life/gpcp/GpcpScrawler.py deleted file mode 100644 index 7cf67ec..0000000 --- a/works/life/gpcp/GpcpScrawler.py +++ /dev/null @@ -1,38 +0,0 @@ -import requests -from bs4 import BeautifulSoup -import threading - -base_url = "https://www.ncei.noaa.gov/data/global-precipitation-climatology-project-gpcp-daily/access/" - -start_year = 2011 - - -def get_with_retry(url): - while True: - try: - return requests.get(url, timeout=30) - except: - pass - - -def worker(year): - html_url = base_url + str(year) - html = get_with_retry(html_url).text - soup = BeautifulSoup(html, 'html.parser') - names = [a.attrs['href'] for a in soup.find_all('a')] - for name in names: - if name.startswith("gpcp"): - url = html_url + '/' + name - print("Downloading " + name + "...") - open(name, 'wb').write(get_with_retry(url).content) - - -threads = [] - -for year in range(start_year, start_year+10): - t = threading.Thread(target=worker, args=(year,)) - threads.append(t) - t.start() - -for t in threads: - t.join() |