aboutsummaryrefslogtreecommitdiff
path: root/works/life
diff options
context:
space:
mode:
authorcrupest <crupest@outlook.com>2022-05-26 13:45:32 +0800
committercrupest <crupest@outlook.com>2022-05-26 13:45:32 +0800
commitfcc7095d0eeca2d8df5f6af053e8c9c006600bb1 (patch)
treebb008cedd4fd3b42ea75263a155cd1e7b636922a /works/life
parentc2a9cd1caff816a5215bbfd4f3edfa50f048a7b3 (diff)
downloadcrupest-fcc7095d0eeca2d8df5f6af053e8c9c006600bb1.tar.gz
crupest-fcc7095d0eeca2d8df5f6af053e8c9c006600bb1.tar.bz2
crupest-fcc7095d0eeca2d8df5f6af053e8c9c006600bb1.zip
import(life): ...
Diffstat (limited to 'works/life')
-rw-r--r--works/life/gpcp/.gitignore3
-rw-r--r--works/life/gpcp/GpcpConverter.py29
-rw-r--r--works/life/gpcp/GpcpScrawler.py38
3 files changed, 70 insertions, 0 deletions
diff --git a/works/life/gpcp/.gitignore b/works/life/gpcp/.gitignore
new file mode 100644
index 0000000..4da1cb4
--- /dev/null
+++ b/works/life/gpcp/.gitignore
@@ -0,0 +1,3 @@
+*.nc
+GpcpData.zip
+out.csv
diff --git a/works/life/gpcp/GpcpConverter.py b/works/life/gpcp/GpcpConverter.py
new file mode 100644
index 0000000..7aadd44
--- /dev/null
+++ b/works/life/gpcp/GpcpConverter.py
@@ -0,0 +1,29 @@
+import pandas
+from pandas import DataFrame
+import xarray as xr
+import os
+import os.path
+
+latitude = 30
+longitude = 114
+
+data_dir = os.path.join(os.path.dirname(__file__), "GpcpData")
+files = os.listdir(data_dir)
+files = [os.path.join(data_dir, f) for f in files if f.endswith(".nc")]
+files.sort()
+
+result = DataFrame([], columns=["date", "prec"], dtype="object")
+
+for file in files:
+ data_set = xr.open_dataset(file)
+ df = data_set.to_dataframe()
+ data_set.close()
+ df = df.query(
+ f"latitude == {latitude} & longitude == {longitude} & nv == 1")
+ df = df.reset_index()
+ df = df.drop(columns=["latitude", "longitude", "nv",
+ "lat_bounds", "lon_bounds", "time_bounds"])
+ df = df.rename(columns={"time": "date", "precip": "prec"})
+ result = pandas.concat([result, df], ignore_index=True)
+
+result.to_csv("./out.csv")
diff --git a/works/life/gpcp/GpcpScrawler.py b/works/life/gpcp/GpcpScrawler.py
new file mode 100644
index 0000000..7cf67ec
--- /dev/null
+++ b/works/life/gpcp/GpcpScrawler.py
@@ -0,0 +1,38 @@
+import requests
+from bs4 import BeautifulSoup
+import threading
+
+base_url = "https://www.ncei.noaa.gov/data/global-precipitation-climatology-project-gpcp-daily/access/"
+
+start_year = 2011
+
+
+def get_with_retry(url):
+ while True:
+ try:
+ return requests.get(url, timeout=30)
+ except:
+ pass
+
+
+def worker(year):
+ html_url = base_url + str(year)
+ html = get_with_retry(html_url).text
+ soup = BeautifulSoup(html, 'html.parser')
+ names = [a.attrs['href'] for a in soup.find_all('a')]
+ for name in names:
+ if name.startswith("gpcp"):
+ url = html_url + '/' + name
+ print("Downloading " + name + "...")
+ open(name, 'wb').write(get_with_retry(url).content)
+
+
+threads = []
+
+for year in range(start_year, start_year+10):
+ t = threading.Thread(target=worker, args=(year,))
+ threads.append(t)
+ t.start()
+
+for t in threads:
+ t.join()