## QQplot plt.figure(figsize=(15,7)) data_points=data.Daily_Discharge si.stats.probplot(np.log(data_points), dist='norm', plot=plt) plt.show() ## Boxplot plt.figure(figsize=(15,7)) plt.boxplot(data.Daily_Discharge,whis=3) plt.show() # 2. # Extract rainfall data token = input('Enter into the token to access to the data: \t') #'lsANjWwoJQegJhKZtKNJPVDGWIGhBSJN' # the Client object helps you acess the NCDC database with your token my_client = Client(token, default_units='None', default_limit=1000) # The extend is the lat, long of the target region. extent = dict() Dirs = ['north','south','east','west'] data = input('Enter the extent, format:"N,S,E,W":') temp = data.split(',') for i in range(len(Dirs)): extent[Dirs[i]] = float(temp[i]) # Displaying the dictionary for key, value in extent.items(): print(str(key)+':'+str(value)) #extent = 41.53,41.21,-84.90,-85.33 # input of start data, end date, type of dataset, and name of gauge start_date = input('Enter begin date (format:yyyy-mm-dd) \t') # 2019-01-01
Automatically generated by Colaboratory. Original file is located at https://colab.research.google.com/drive/1Ck3iB3C0gqZWSQb3VA4glYm_wgM7Gx-T """ # vjIpaNKxJbEDODuiQNRXxDujdLHmWbgt #!pip install cdo-api-py api_token = "aIILvsLDvkAbPPvZagdxBFTVeemdZEVq" from cdo_api_py import Client import pandas as pd from datetime import datetime from pprint import pprint # be sure not to share your token publicly my_client = Client(api_token, default_units=None, default_limit=1000) extent = { "north": 37.18, "south": 25.51, "east": -92.27, "west": -111.97, } startdate = datetime(1919, 1, 1) enddate = datetime(2019, 12, 31) datasetid = 'GSOM' datatypeid = ['TMIN', 'TMAX', 'PRCP', 'WSF5', 'WSF2'] pprint(my_client.list_datasets())
def filter(self): # semantic checks on params # Check (1) exactly four values need to be provided in extent extent_vals = list(map((lambda val: float(val)),self.extent.split(','))) if len(extent_vals) != 4: raise GeoEDFError('NOAAStationFilter requires a N,S,E,W string of floating point numbers as the extent') # Check (2) that lat and lon pairs are in the right order north = extent_vals[0] south = extent_vals[1] east = extent_vals[2] west = extent_vals[3] if south > north: raise GeoEDFError('please check the ordering of the south and north extents') if west > east: raise GeoEDFError('please check the ordering of the east and west extents') # passed semantic checks, prepare dict of extents for API extent_dict = {"north": north, "south": south, "east": east, "west": west} # process dates try: startdate = pd.to_datetime(self.start_date,format='%m/%d/%Y') enddate = pd.to_datetime(self.end_date,format='%m/%d/%Y') except: raise GeoEDFError("Error parsing dates provided to NOAAStationFiler, please ensure format is mm/dd/YYYY") # param checks complete try: # get a client for NCDC API usage cdo_client = Client(self.token, default_units="None", default_limit=1000) # we are looking for stations with GHCND data #The find_stations function returns the dataframe containing stations' info within the input extent. stations = cdo_client.find_stations( datasetid="GHCND", extent=extent_dict, startdate=startdate, enddate=enddate, return_dataframe=True) # filter to only retain stations which have sufficient data for the date range stations_to_drop = [] # Drop stations without enough observations for the given date range for i in range(len(stations.maxdate)): # get max and min date of each station station_maxdate = pd.to_datetime(stations.maxdate[i],format='%Y-%m-%d') station_mindate = pd.to_datetime(stations.mindate[i],format='%Y-%m-%d') # check if station's maxdate is earlier than enddate if station_maxdate < enddate: stations_to_drop.append(i) # check if station's mindate is later than startdate if station_mindate > startdate: stations_to_drop.append(i) # delete stations without enough time length valid_stations = stations.drop(stations.index[stations_to_drop]) # add station IDs to values array self.values += list(valid_stations.id) except: raise GeoEDFError('Error occurred when querying NCDC API for stations in NOAAStationFiler')
stationid=station['id'], startdate=startdate, enddate=enddate, return_dataframe=True # include_station_meta=True ) # Create new DataFrame limiting columns (we don't want ALL of the station data) columns = ['station', 'date', 'PRCP', 'SNOW'] single = pd.DataFrame(station_data, columns=columns) # Send to database single.to_sql(con=db_engine, name='raw_weather',if_exists='append', index=False) # Connect to DB conn = connect() cursor = conn.cursor() # Get list of stations cursor.execute('SELECT DISTINCT station_id FROM team') station_list = [ row[0] for row in cursor.fetchall()] # Set params for API call token = '############' client = Client(token, default_units='metric', default_limit=1000) for years in range(2008,2013): get_year_of_data(client, station_list, years) conn.close()