示例#1
0
## QQplot 
plt.figure(figsize=(15,7))
data_points=data.Daily_Discharge
si.stats.probplot(np.log(data_points), dist='norm', plot=plt)
plt.show()

## Boxplot
plt.figure(figsize=(15,7))
plt.boxplot(data.Daily_Discharge,whis=3)
plt.show()


# 2. # Extract rainfall data
token = input('Enter into the token to access to the data: \t') #'lsANjWwoJQegJhKZtKNJPVDGWIGhBSJN'
# the Client object helps you acess the NCDC database with your token
my_client = Client(token, default_units='None', default_limit=1000)

# The extend is the lat, long of the target region.
extent = dict()
Dirs = ['north','south','east','west']
data = input('Enter the extent, format:"N,S,E,W":')
temp = data.split(',')
for i in range(len(Dirs)):
    extent[Dirs[i]] = float(temp[i])
    
# Displaying the dictionary
for key, value in extent.items():
	print(str(key)+':'+str(value)) #extent = 41.53,41.21,-84.90,-85.33
    
# input of start data, end date, type of dataset, and name of gauge
start_date = input('Enter begin date (format:yyyy-mm-dd) \t') # 2019-01-01
示例#2
0
    def filter(self):

        # semantic checks on params
        # Check (1) exactly four values need to be provided in extent
        extent_vals = list(map((lambda val: float(val)),self.extent.split(',')))

        if len(extent_vals) != 4:
            raise GeoEDFError('NOAAStationFilter requires a N,S,E,W string of floating point numbers as the extent')

        # Check (2) that lat and lon pairs are in the right order
        north = extent_vals[0]
        south = extent_vals[1]
        east = extent_vals[2]
        west = extent_vals[3]

        if south > north:
            raise GeoEDFError('please check the ordering of the south and north extents')
        
        if west > east:
            raise GeoEDFError('please check the ordering of the east and west extents')
            
        # passed semantic checks, prepare dict of extents for API
        extent_dict = {"north": north, "south": south, "east": east, "west": west}
        
        # process dates
        try:
            startdate = pd.to_datetime(self.start_date,format='%m/%d/%Y')
            enddate = pd.to_datetime(self.end_date,format='%m/%d/%Y')
        except:
            raise GeoEDFError("Error parsing dates provided to NOAAStationFiler, please ensure format is mm/dd/YYYY")
            
        # param checks complete
        try:
            # get a client for NCDC API usage
            cdo_client = Client(self.token, default_units="None", default_limit=1000)

            # we are looking for stations with GHCND data
            #The find_stations function returns the dataframe containing stations' info within the input extent.
            stations = cdo_client.find_stations(
                            datasetid="GHCND",
                            extent=extent_dict,
                            startdate=startdate,
                            enddate=enddate,
                            return_dataframe=True)
            
            # filter to only retain stations which have sufficient data for the date range
            stations_to_drop = []
            # Drop stations without enough observations for the given date range
            for i in range(len(stations.maxdate)):
                # get max and min date of each station
                station_maxdate = pd.to_datetime(stations.maxdate[i],format='%Y-%m-%d')
                station_mindate = pd.to_datetime(stations.mindate[i],format='%Y-%m-%d')
                # check if station's maxdate is earlier than enddate
                if station_maxdate < enddate:
                    stations_to_drop.append(i)
                # check if station's mindate is later than startdate
                if station_mindate > startdate:
                    stations_to_drop.append(i)
                    
            # delete stations without enough time length
            valid_stations = stations.drop(stations.index[stations_to_drop])
            
            # add station IDs to values array
            self.values += list(valid_stations.id)
                
        except:
            raise GeoEDFError('Error occurred when querying NCDC API for stations in NOAAStationFiler')
示例#3
0
Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1Ck3iB3C0gqZWSQb3VA4glYm_wgM7Gx-T
"""

# vjIpaNKxJbEDODuiQNRXxDujdLHmWbgt
#!pip install cdo-api-py
api_token = "aIILvsLDvkAbPPvZagdxBFTVeemdZEVq"

from cdo_api_py import Client
import pandas as pd
from datetime import datetime
from pprint import pprint  # be sure not to share your token publicly
my_client = Client(api_token, default_units=None, default_limit=1000)

extent = {
    "north": 37.18,
    "south": 25.51,
    "east": -92.27,
    "west": -111.97,
}

startdate = datetime(1919, 1, 1)
enddate = datetime(2019, 12, 31)

datasetid = 'GSOM'
datatypeid = ['TMIN', 'TMAX', 'PRCP', 'WSF5', 'WSF2']

pprint(my_client.list_datasets())
示例#4
0
from cdo_api_py import Client
import pandas as pd
from datetime import datetime
from pprint import pprint

# initialize a client with a developer token ,
# note 5 calls per second and 1000 calls per day limit for each token
token = "my token here!"
my_client = Client(token, default_units=None, default_limit=1000)
# the other valid option for units is 'standard', and default_limit maxes out at 1000

# first lets see what endpoints are associated with the API
# you can read more about this from NOAAs NCDC at
# https://www.ncdc.noaa.gov/cdo-web/webservices/v2#gettingStarted
pprint(my_client.list_endpoints())

# request a list of available datasets (about 11) with
pprint(my_client.list_datasets())

# there are more than 1000 datatypes, but you can see them all with
pprint(my_client.list_datatypes())

# define the extent we are interested in. in this case the DC metro area.
extent = {
    "north": 39.14,
    "south": 38.68,
    "east": -76.65,
    "west": -77.35,
}

# lets define the date range we're interested in as well,  December 2016
            stationid=station['id'],
            startdate=startdate,
            enddate=enddate,
            return_dataframe=True
            # include_station_meta=True
        )
        # Create new DataFrame limiting columns (we don't want ALL of the station data)
        columns = ['station', 'date', 'PRCP', 'SNOW']
        single = pd.DataFrame(station_data, columns=columns)
        # Send to database
        single.to_sql(con=db_engine, name='raw_weather',if_exists='append', index=False)


# Connect to DB
conn = connect()
cursor = conn.cursor()

# Get list of stations
cursor.execute('SELECT DISTINCT station_id FROM team')
station_list = [ row[0] for row in cursor.fetchall()]

# Set params for API call
token = '############'
client = Client(token, default_units='metric', default_limit=1000)

for years in range(2008,2013):
    get_year_of_data(client, station_list, years)


conn.close()