diff options
| author | crupest <crupest@outlook.com> | 2022-05-26 13:45:32 +0800 | 
|---|---|---|
| committer | crupest <crupest@outlook.com> | 2022-05-26 13:45:32 +0800 | 
| commit | fcc7095d0eeca2d8df5f6af053e8c9c006600bb1 (patch) | |
| tree | bb008cedd4fd3b42ea75263a155cd1e7b636922a /works | |
| parent | c2a9cd1caff816a5215bbfd4f3edfa50f048a7b3 (diff) | |
| download | crupest-fcc7095d0eeca2d8df5f6af053e8c9c006600bb1.tar.gz crupest-fcc7095d0eeca2d8df5f6af053e8c9c006600bb1.tar.bz2 crupest-fcc7095d0eeca2d8df5f6af053e8c9c006600bb1.zip | |
import(life): ...
Diffstat (limited to 'works')
| -rw-r--r-- | works/life/gpcp/.gitignore | 3 | ||||
| -rw-r--r-- | works/life/gpcp/GpcpConverter.py | 29 | ||||
| -rw-r--r-- | works/life/gpcp/GpcpScrawler.py | 38 | 
3 files changed, 70 insertions, 0 deletions
| diff --git a/works/life/gpcp/.gitignore b/works/life/gpcp/.gitignore new file mode 100644 index 0000000..4da1cb4 --- /dev/null +++ b/works/life/gpcp/.gitignore @@ -0,0 +1,3 @@ +*.nc +GpcpData.zip +out.csv diff --git a/works/life/gpcp/GpcpConverter.py b/works/life/gpcp/GpcpConverter.py new file mode 100644 index 0000000..7aadd44 --- /dev/null +++ b/works/life/gpcp/GpcpConverter.py @@ -0,0 +1,29 @@ +import pandas +from pandas import DataFrame +import xarray as xr +import os +import os.path + +latitude = 30 +longitude = 114 + +data_dir = os.path.join(os.path.dirname(__file__), "GpcpData") +files = os.listdir(data_dir) +files = [os.path.join(data_dir, f) for f in files if f.endswith(".nc")] +files.sort() + +result = DataFrame([], columns=["date", "prec"], dtype="object") + +for file in files: +    data_set = xr.open_dataset(file) +    df = data_set.to_dataframe() +    data_set.close() +    df = df.query( +        f"latitude == {latitude} & longitude == {longitude} & nv == 1") +    df = df.reset_index() +    df = df.drop(columns=["latitude", "longitude", "nv", +                 "lat_bounds", "lon_bounds", "time_bounds"]) +    df = df.rename(columns={"time": "date", "precip": "prec"}) +    result = pandas.concat([result, df], ignore_index=True) + +result.to_csv("./out.csv") diff --git a/works/life/gpcp/GpcpScrawler.py b/works/life/gpcp/GpcpScrawler.py new file mode 100644 index 0000000..7cf67ec --- /dev/null +++ b/works/life/gpcp/GpcpScrawler.py @@ -0,0 +1,38 @@ +import requests +from bs4 import BeautifulSoup +import threading + +base_url = "https://www.ncei.noaa.gov/data/global-precipitation-climatology-project-gpcp-daily/access/" + +start_year = 2011 + + +def get_with_retry(url): +    while True: +        try: +            return requests.get(url, timeout=30) +        except: +            pass + + +def worker(year): +    html_url = base_url + str(year) +    html = get_with_retry(html_url).text +    soup = BeautifulSoup(html, 'html.parser') +    names = [a.attrs['href'] for a in soup.find_all('a')] +    for name in names: +        if name.startswith("gpcp"): +            url = html_url + '/' + name +            print("Downloading " + name + "...") +            open(name, 'wb').write(get_with_retry(url).content) + + +threads = [] + +for year in range(start_year, start_year+10): +    t = threading.Thread(target=worker, args=(year,)) +    threads.append(t) +    t.start() + +for t in threads: +    t.join() | 
