aboutsummaryrefslogtreecommitdiff
path: root/works/life/gpcp
diff options
context:
space:
mode:
Diffstat (limited to 'works/life/gpcp')
-rw-r--r--works/life/gpcp/.gitignore3
-rw-r--r--works/life/gpcp/GpcpConverter.py29
-rw-r--r--works/life/gpcp/GpcpScrawler.py38
3 files changed, 0 insertions, 70 deletions
diff --git a/works/life/gpcp/.gitignore b/works/life/gpcp/.gitignore
deleted file mode 100644
index 4da1cb4..0000000
--- a/works/life/gpcp/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-*.nc
-GpcpData.zip
-out.csv
diff --git a/works/life/gpcp/GpcpConverter.py b/works/life/gpcp/GpcpConverter.py
deleted file mode 100644
index 7aadd44..0000000
--- a/works/life/gpcp/GpcpConverter.py
+++ /dev/null
@@ -1,29 +0,0 @@
-import pandas
-from pandas import DataFrame
-import xarray as xr
-import os
-import os.path
-
-latitude = 30
-longitude = 114
-
-data_dir = os.path.join(os.path.dirname(__file__), "GpcpData")
-files = os.listdir(data_dir)
-files = [os.path.join(data_dir, f) for f in files if f.endswith(".nc")]
-files.sort()
-
-result = DataFrame([], columns=["date", "prec"], dtype="object")
-
-for file in files:
- data_set = xr.open_dataset(file)
- df = data_set.to_dataframe()
- data_set.close()
- df = df.query(
- f"latitude == {latitude} & longitude == {longitude} & nv == 1")
- df = df.reset_index()
- df = df.drop(columns=["latitude", "longitude", "nv",
- "lat_bounds", "lon_bounds", "time_bounds"])
- df = df.rename(columns={"time": "date", "precip": "prec"})
- result = pandas.concat([result, df], ignore_index=True)
-
-result.to_csv("./out.csv")
diff --git a/works/life/gpcp/GpcpScrawler.py b/works/life/gpcp/GpcpScrawler.py
deleted file mode 100644
index 7cf67ec..0000000
--- a/works/life/gpcp/GpcpScrawler.py
+++ /dev/null
@@ -1,38 +0,0 @@
-import requests
-from bs4 import BeautifulSoup
-import threading
-
-base_url = "https://www.ncei.noaa.gov/data/global-precipitation-climatology-project-gpcp-daily/access/"
-
-start_year = 2011
-
-
-def get_with_retry(url):
- while True:
- try:
- return requests.get(url, timeout=30)
- except:
- pass
-
-
-def worker(year):
- html_url = base_url + str(year)
- html = get_with_retry(html_url).text
- soup = BeautifulSoup(html, 'html.parser')
- names = [a.attrs['href'] for a in soup.find_all('a')]
- for name in names:
- if name.startswith("gpcp"):
- url = html_url + '/' + name
- print("Downloading " + name + "...")
- open(name, 'wb').write(get_with_retry(url).content)
-
-
-threads = []
-
-for year in range(start_year, start_year+10):
- t = threading.Thread(target=worker, args=(year,))
- threads.append(t)
- t.start()
-
-for t in threads:
- t.join()