blob: 7cf67ecc8280eab0fd6b0749051ec2e237d38e98 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
|
import requests
from bs4 import BeautifulSoup
import threading
base_url = "https://www.ncei.noaa.gov/data/global-precipitation-climatology-project-gpcp-daily/access/"
start_year = 2011
def get_with_retry(url):
while True:
try:
return requests.get(url, timeout=30)
except:
pass
def worker(year):
html_url = base_url + str(year)
html = get_with_retry(html_url).text
soup = BeautifulSoup(html, 'html.parser')
names = [a.attrs['href'] for a in soup.find_all('a')]
for name in names:
if name.startswith("gpcp"):
url = html_url + '/' + name
print("Downloading " + name + "...")
open(name, 'wb').write(get_with_retry(url).content)
threads = []
for year in range(start_year, start_year+10):
t = threading.Thread(target=worker, args=(year,))
threads.append(t)
t.start()
for t in threads:
t.join()
|