diff options
Diffstat (limited to 'works/life/gpcp')
-rw-r--r-- | works/life/gpcp/.gitignore | 3 | ||||
-rw-r--r-- | works/life/gpcp/GpcpConverter.py | 29 | ||||
-rw-r--r-- | works/life/gpcp/GpcpScrawler.py | 38 |
3 files changed, 0 insertions, 70 deletions
diff --git a/works/life/gpcp/.gitignore b/works/life/gpcp/.gitignore deleted file mode 100644 index 4da1cb4..0000000 --- a/works/life/gpcp/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -*.nc -GpcpData.zip -out.csv diff --git a/works/life/gpcp/GpcpConverter.py b/works/life/gpcp/GpcpConverter.py deleted file mode 100644 index 7aadd44..0000000 --- a/works/life/gpcp/GpcpConverter.py +++ /dev/null @@ -1,29 +0,0 @@ -import pandas -from pandas import DataFrame -import xarray as xr -import os -import os.path - -latitude = 30 -longitude = 114 - -data_dir = os.path.join(os.path.dirname(__file__), "GpcpData") -files = os.listdir(data_dir) -files = [os.path.join(data_dir, f) for f in files if f.endswith(".nc")] -files.sort() - -result = DataFrame([], columns=["date", "prec"], dtype="object") - -for file in files: - data_set = xr.open_dataset(file) - df = data_set.to_dataframe() - data_set.close() - df = df.query( - f"latitude == {latitude} & longitude == {longitude} & nv == 1") - df = df.reset_index() - df = df.drop(columns=["latitude", "longitude", "nv", - "lat_bounds", "lon_bounds", "time_bounds"]) - df = df.rename(columns={"time": "date", "precip": "prec"}) - result = pandas.concat([result, df], ignore_index=True) - -result.to_csv("./out.csv") diff --git a/works/life/gpcp/GpcpScrawler.py b/works/life/gpcp/GpcpScrawler.py deleted file mode 100644 index 7cf67ec..0000000 --- a/works/life/gpcp/GpcpScrawler.py +++ /dev/null @@ -1,38 +0,0 @@ -import requests -from bs4 import BeautifulSoup -import threading - -base_url = "https://www.ncei.noaa.gov/data/global-precipitation-climatology-project-gpcp-daily/access/" - -start_year = 2011 - - -def get_with_retry(url): - while True: - try: - return requests.get(url, timeout=30) - except: - pass - - -def worker(year): - html_url = base_url + str(year) - html = get_with_retry(html_url).text - soup = BeautifulSoup(html, 'html.parser') - names = [a.attrs['href'] for a in soup.find_all('a')] - for name in names: - if name.startswith("gpcp"): - url = html_url + '/' + name - print("Downloading " + name + "...") - open(name, 'wb').write(get_with_retry(url).content) - - -threads = [] - -for year in range(start_year, start_year+10): - t = threading.Thread(target=worker, args=(year,)) - threads.append(t) - t.start() - -for t in threads: - t.join() |