def get_scoring(con): scoring = psql.read_sql(""" select * from backend.scoring_result sr where sr.dwh_country_id=1 and sr.created_at>'2015-07-07' """, con) return scoring loan_requests = psql.read_sql(""" select lr.id_loan_request, lr.loan_request_nr, sr.fk_user, sr.id, lr.principal_amount, ua.net_income, uh.postcheck_data from base.loan_request lr join base.user_account ua on lr.dwh_country_id=ua.dwh_country_id and lr.fk_user=ua.id_user join backend.loan_request_user_history uh on lr.dwh_country_id=uh.dwh_country_id and lr.id_loan_request=uh.fk_loan_request join backend.scoring_result sr on lr.dwh_country_id=sr.dwh_country_id and lr.fk_user=sr.fk_user where sr.dwh_country_id=1 and sr.created_at>'2015-07-07' and sr.is_latest=1 """,con) return loan_requests
def test_to_sql_index_label(self): temp_frame = DataFrame({'col1': range(4)}) # no index name, defaults to 'index' sql.to_sql(temp_frame, 'test_index_label', self.conn) frame = sql.read_sql('SELECT * FROM test_index_label', self.conn) self.assertEqual(frame.columns[0], 'index') # specifying index_label sql.to_sql(temp_frame, 'test_index_label', self.conn, if_exists='replace', index_label='other_label') frame = sql.read_sql('SELECT * FROM test_index_label', self.conn) self.assertEqual(frame.columns[0], 'other_label', "Specified index_label not written to database") # using the index name temp_frame.index.name = 'index_name' sql.to_sql(temp_frame, 'test_index_label', self.conn, if_exists='replace') frame = sql.read_sql('SELECT * FROM test_index_label', self.conn) self.assertEqual(frame.columns[0], 'index_name', "Index name not written to database") # has index name, but specifying index_label sql.to_sql(temp_frame, 'test_index_label', self.conn, if_exists='replace', index_label='other_label') frame = sql.read_sql('SELECT * FROM test_index_label', self.conn) self.assertEqual(frame.columns[0], 'other_label', "Specified index_label not written to database")
def Read_data_by_selection_index(self,con="*",tb_name=None,start_index=None, end_index=None,index_name="Default"): """ This function is used to access database by setting up a particular scope with index (e.g. datatime) :param con: a string composed of all the retrieving attrs or cols, ex: "attr1, attr2" :param tb_name:denotes which table it is :param start_index: str means datetime "2014-02-01" or int means index :param end_index: str means datetime "2014-02-01" or int means index :return: """ assert isinstance(index_name, basestring), "please input a valid index name" dict_rec = pd.DataFrame({}) if not con: print "nothing to read from database" return -1 else: # make sure the database is okay and table exists if self.database_name is not None and self.if_table_exist(tb_name): #num_of_entries = len(con) #access the data and retrieve if isinstance(start_index,basestring) and isinstance(end_index, basestring): dict_rec = sql.read_sql( """select %s from %s where %s between '%s' and '%s' order by %s ASC """ \ %(con, tb_name, index_name,start_index,end_index,index_name), con=self.db) else: dict_rec = sql.read_sql( """select %s from %s where %s between %s and %s order by %s ASC""" \ %(con, tb_name, index_name,start_index,end_index,index_name), con=self.db) #retrive data only within index range #dict_rec = dict_rec[ dict_rec.index.isin( range(start_index, end_index+1) ) ] return dict_rec else: print "database should be firstly set and table name %s should exist..."%(tb_name) return -1
def load_DB_table(self, table_name): """ Loads a table table_name from the SQL database associated with the class instance on initialization. If pickle format of the table exists and is newer than the database file, it will load the table from pickle file instead. Also checks whether the table is already loaded. """ if not table_name in self.loaded: file_pickle= self.DB_path + self.name + '/' + table_name + '.pkl' file_sql= self.DB_path + self.name + '/' + self.name + '.sqlite' if ((not os.path.isfile(file_pickle)) or (os.path.getmtime(file_sql) > os.path.getmtime(file_pickle))): print('Loading table ' + table_name + ' from database ' + self.name + '...') if table_name in ('vertices', 'bonds', 'frames') : self.DB_table[table_name]= psql.read_sql('SELECT * FROM ' + table_name + ';', self.con) else: self.DB_table[table_name]= psql.read_sql('SELECT * FROM ' + table_name + ' WHERE cell_id > 10000;', self.con) print('Writing table ' + table_name + ' to pickle file ' + self.name) self.DB_table[table_name].to_pickle(file_pickle) else: print('Loading table ' + table_name + ' from pickle file...') self.DB_table[table_name]= pd.read_pickle(file_pickle) if table_name == 'cells': self.DB_table[table_name]= self.DB_table[table_name][['frame', 'cell_id', 'center_x', 'center_y', 'area', 'elong_xx', 'elong_xy']] self.loaded.add(table_name)
def roadtmpc(grids, valid, iarchive): """ Do the RWIS Road times grid""" if iarchive: nt = NetworkTable(['IA_RWIS', 'MN_RWIS', 'WI_RWIS', 'IL_RWIS', 'MO_RWIS', 'KS_RWIS', 'NE_RWIS', 'SD_RWIS']) pgconn = psycopg2.connect(database='rwis', host='iemdb', user='******') df = read_sql(""" SELECT station, tfs0 as tsf0 from alldata WHERE valid >= %s and valid < %s and tfs0 >= -50 and tfs0 < 150 """, pgconn, params=((valid - datetime.timedelta(minutes=30)), (valid + datetime.timedelta(minutes=30))), index_col=None) df['lat'] = df['station'].apply(lambda x: nt.sts.get(x, {}).get('lat', 0)) df['lon'] = df['station'].apply(lambda x: nt.sts.get(x, {}).get('lon', 0)) else: pgconn = psycopg2.connect(database='iem', host='iemdb', user='******') df = read_sql(""" SELECT ST_x(geom) as lon, ST_y(geom) as lat, tsf0 from current c JOIN stations t on (c.iemid = t.iemid) WHERE c.valid > now() - '2 hours'::interval and t.network in ('IA_RWIS', 'MN_RWIS', 'WI_RWIS', 'IL_RWIS', 'MO_RWIS', 'KS_RWIS', 'NE_RWIS', 'SD_RWIS') and tsf0 >= -50 and tsf0 < 150 """, pgconn, index_col=None) nn = NearestNDInterpolator((df['lon'].values, df['lat'].values), temperature(df['tsf0'].values, 'F').value('C')) grids['roadtmpc'] = nn(XI, YI)
def GetSensorList(site=''): if site == '': try: db, cur = SenslopeDBConnect(Namedb) cur.execute("use "+ Namedb) query = 'SELECT name, num_nodes, seg_length, col_length FROM site_column_props' df = psql.read_sql(query, db) # make a sensor list of columnArray class functions sensors = [] for s in range(len(df)): if df.name[s] == 'mcatb' or df.name[s] == 'messb': continue s = columnArray(df.name[s],df.num_nodes[s],df.seg_length[s],df.col_length[s]) sensors.append(s) db.close() return sensors except: raise ValueError('Could not get sensor list from database') else: db, cur = SenslopeDBConnect(Namedb) cur.execute("use "+ Namedb) query = "SELECT name, num_nodes, seg_length, col_length FROM site_column_props WHERE name LIKE '%" query = query + str(site) + "%'" df = psql.read_sql(query, db) sensors = [] s = columnArray(df.name[0],df.num_nodes[0],df.seg_length[0],df.col_length[0]) sensors.append(s) db.close() return sensors
def GetJoinedDf(self): run_id = 12332 #dstore.Get("current_run") # DB df #[u'state', u'id', u'run_id', u'user_id', u'cell', u'time_raw', u'time1', u'lap1', u'time2', u'lap2', u'time3', u'lap3', u'un1', u'un2', u'un3', u'us1'] self.joinedDf = psql.read_sql(\ "SELECT * FROM times" +\ " WHERE (times.run_id = "+ str(run_id ) +")"\ , self.db) #set index = id self.joinedDf.set_index('id', drop=False, inplace = True) #replace nan with None self.joinedDf = self.joinedDf.where(pd.notnull(self.joinedDf), None) if(self.dstore.GetItem("racesettings-app", ['rfid']) == 2): tDf = psql.read_sql("SELECT * FROM tags", self.db, index_col = "id") tDf = tDf[["user_nr", "tag_id"]] self.joinedDf = pd.merge(self.joinedDf, tDf, left_on='user_id', right_on='tag_id', how="left") self.joinedDf = pd.merge(self.joinedDf, self.ucDf, left_on='user_nr', right_on='nr', how="left") self.joinedDf.set_index('id', drop=False, inplace = True) else: self.joinedDf = pd.merge(self.joinedDf, self.ucDf, left_on='user_id', right_index=True, how="left") self.joinedDf.sort("time_raw", inplace=True) #replace nan with None self.joinedDf = self.joinedDf.where(pd.notnull(self.joinedDf), None) return self.joinedDf
def fetch_query(self, query, index=None): """ ------------------------------------------------------------------------------------------ fetch records from the mysql database based on a custom query @param query - the query to be used to fetch selected records from the mysql DB @param index - the column to be used as df index @ret df - pandas df with the data retrieved from the mysql DB. [] if no data found. ------------------------------------------------------------------------------------------ """ con = self.connect_db() if con == False: exit("\nTerminating Class module", __version__) if index: df = psql.read_sql(query, con=con, index_col=index) else: df = psql.read_sql(query, con=con) try: con.close() except: print ("couldn't close mysql DB") return df.sort_index(axis=0) # return DF sorted with ascending index
def main(argv): """Go Main Go""" mlra_id = int(argv[1]) pgconn = get_dbconn('idep') mlraxref = read_sql(""" select distinct mlra_id, mlra_name from mlra """, pgconn, index_col='mlra_id') print("%s," % (mlraxref.at[mlra_id, 'mlra_name'], ), end='') for col, scenario in enumerate(range(36, 39)): df = read_sql(""" with myhucs as ( SELECT huc_12 from huc12 where scenario = 0 and mlra_id = %s ) select fpath, f.huc_12 from flowpaths f, myhucs h WHERE f.scenario = 0 and f.huc_12 = h.huc_12 """, pgconn, params=(mlra_id, ), index_col=None) if df.empty: print() continue hits = 0 for _, row in df.iterrows(): prj = ( "/prj/%s/%s/%s_%s.prj" ) % (row['huc_12'][:8], row['huc_12'][8:], row['huc_12'], row['fpath']) prj2 = "/i/%s/%s" % (scenario, prj) if open(prj2).read().find("SWITCHGRASS.rot") > 0: hits += 1 print("%.2f," % (hits / float(len(df.index)) * 100., ), end="") print()
def main(): """Go Main Go.""" pgconn = get_dbconn('postgis') base = read_sql(""" select segid, archive_begin, archive_end from roads_base """, pgconn, index_col='segid') for year in range(2003, 2018): table = "roads_%s_%s_log" % (year, year + 1) df = read_sql(""" SELECT segid, min(valid), max(valid) from """ + table + """ WHERE valid is not null GROUP by segid """, pgconn, index_col='segid') for segid, row in df.iterrows(): curmin = base.at[segid, 'archive_begin'] curmax = base.at[segid, 'archive_end'] if curmin is None or row['min'] < curmin: base.at[segid, 'archive_begin'] = row['min'] if curmax is None or row['max'] > curmax: base.at[segid, 'archive_end'] = row['max'] cursor = pgconn.cursor() for segid, row in base.iterrows(): if pd.isnull(row['archive_begin']) or pd.isnull(row['archive_end']): continue print("%s %s -> %s" % ( segid, row['archive_begin'], row['archive_end'])) cursor.execute(""" update roads_base SET archive_begin = %s, archive_end = %s where segid = %s """, (row['archive_begin'], row['archive_end'], segid)) cursor.close() pgconn.commit()
def main(): """Go!""" nt = NetworkTable("IA_ASOS") pgconn = get_dbconn('iem') df = read_sql(""" SELECT id, valid, tmpf::int as tmpf, dwpf::int as dwpf, sknt from current_log c JOIN stations t on (c.iemid = t.iemid) WHERE t.network = 'IA_ASOS' and c.valid > 'TODAY' and c.tmpf > 70 ORDER by id ASC """, pgconn, index_col=None) pgconn = get_dbconn('asos') for _, row in df.iterrows(): df2 = read_sql(""" SELECT valid, tmpf, dwpf, sknt from alldata WHERE station = %s and valid < '2018-10-03' and tmpf::int >= %s and dwpf::int >= %s and sknt >= %s ORDER by valid DESC """, pgconn, params=(row['id'], row['tmpf'], row['dwpf'], row['sknt']), index_col=None) if len(df2.index) > 5: continue lastdate = None if not df2.empty: lastdate = df2.iloc[0]['valid'].date() print( ("%s,%s,%s,%s,%.0f,%s,%s,%s" ) % (row['id'], row['valid'], row['tmpf'], row['dwpf'], row['sknt'], len(df2.index), lastdate, nt.sts[row['id']]['archive_begin'].year) )
def test_date_parsing(self): """ Test date parsing in read_sql """ # No Parsing df = sql.read_sql( "SELECT * FROM types_test_data", self.conn, flavor='sqlite') self.assertFalse( issubclass(df.DateCol.dtype.type, np.datetime64), "DateCol loaded with incorrect type") df = sql.read_sql("SELECT * FROM types_test_data", self.conn, flavor='sqlite', parse_dates=['DateCol']) self.assertTrue( issubclass(df.DateCol.dtype.type, np.datetime64), "DateCol loaded with incorrect type") df = sql.read_sql("SELECT * FROM types_test_data", self.conn, flavor='sqlite', parse_dates={'DateCol': '%Y-%m-%d %H:%M:%S'}) self.assertTrue( issubclass(df.DateCol.dtype.type, np.datetime64), "DateCol loaded with incorrect type") df = sql.read_sql("SELECT * FROM types_test_data", self.conn, flavor='sqlite', parse_dates=['IntDateCol']) self.assertTrue(issubclass(df.IntDateCol.dtype.type, np.datetime64), "IntDateCol loaded with incorrect type") df = sql.read_sql("SELECT * FROM types_test_data", self.conn, flavor='sqlite', parse_dates={'IntDateCol': 's'}) self.assertTrue(issubclass(df.IntDateCol.dtype.type, np.datetime64), "IntDateCol loaded with incorrect type")
def main(argv): """Go Main Go""" mlra_id = int(argv[1]) pgconn = get_dbconn('idep') fig, ax = plt.subplots(3, 3, figsize=(12, 6)) mlraxref = read_sql(""" select distinct mlra_id, mlra_name from mlra """, pgconn, index_col='mlra_id') fig.text(0.5, 0.96, ("CSCAP Yearly Scenario Changes for MLRA: %s [%s]" ) % (mlraxref.at[mlra_id, 'mlra_name'], mlra_id), ha='center', fontsize=16) for col, scenario in enumerate(range(36, 39)): df = read_sql(""" with myhucs as ( SELECT huc_12 from huc12 where scenario = 0 and mlra_id = %s ) select r.huc_12, scenario, extract(year from valid)::int as year, sum(avg_loss) * 10. as loss, sum(avg_runoff) as runoff, sum(avg_delivery) * 10. as delivery from results_by_huc12 r JOIN myhucs h on (r.huc_12 = h.huc_12) where r.valid >= '2008-01-01' and r.valid < '2017-01-01' and (scenario = 0 or scenario = %s) GROUP by r.huc_12, year, scenario """, pgconn, params=(mlra_id, scenario), index_col=None) gdf = df.groupby(('scenario', 'year')).mean() delta = gdf.loc[(scenario, )] - gdf.loc[(0, )] for row, varname in enumerate(['runoff', 'loss', 'delivery']): ax[row, col].bar(delta.index.values, delta[varname], color=['b' if x > 0 else 'r' for x in delta[varname].values]) if row == 0: ax[row, col].set_title("Scenario %s\n%s" % (scenario, LABELS[scenario])) ylabels = ['Runoff [mm]', 'Detachment\n[tonnes/ha]', 'Delivery [tonnes/ha]'] for row in range(3): ymin = 99 ymax = -99 for col in range(3): ylim = ax[row, col].get_ylim() ymin = min([ylim[0], ymin]) ymax = max([ylim[1], ymax]) for col in range(3): ax[row, col].set_ylim(ymin, ymax) ax[row, col].grid(True) if col > 0: ax[row, col].set_yticklabels([]) else: ax[row, col].set_ylabel(ylabels[row]) fig.savefig('mlra%s.png' % (mlra_id, )) fig.savefig('mlra%s.pdf' % (mlra_id, )) plt.close()
def get_context(fdict): pgconn = psycopg2.connect(database='hads', host='iemdb-hads', user='******') cursor = pgconn.cursor() ctx = get_autoplot_context(fdict, get_description()) ctx['station'] = ctx['station'].upper() station = ctx['station'] dt = ctx['dt'] # Attempt to get station information cursor.execute(""" SELECT name from stations where id = %s and network ~* 'DCP' """, (station,)) ctx['name'] = "" if cursor.rowcount > 0: row = cursor.fetchone() ctx['name'] = row[0] ctx['df'] = read_sql("""with fx as ( select id, issued, primaryname, primaryunits, secondaryname, secondaryunits from hml_forecast where station = %s and generationtime between %s and %s) SELECT f.id, f.issued, d.valid, d.primary_value, f.primaryname, f.primaryunits, d.secondary_value, f.secondaryname, f.secondaryunits from hml_forecast_data_""" + str(dt.year) + """ d JOIN fx f on (d.hml_forecast_id = f.id) ORDER by f.id ASC, d.valid ASC """, pgconn, params=(station, dt - datetime.timedelta(days=3), dt + datetime.timedelta(days=1)), index_col=None) if len(ctx['df'].index) > 0: ctx['primary'] = "%s[%s]" % (ctx['df'].iloc[0]['primaryname'], ctx['df'].iloc[0]['primaryunits']) ctx['secondary'] = "%s[%s]" % (ctx['df'].iloc[0]['secondaryname'], ctx['df'].iloc[0]['secondaryunits']) # get obs mints = ctx['df']['valid'].min() maxts = ctx['df']['valid'].max() else: mints = dt - datetime.timedelta(days=3) maxts = dt + datetime.timedelta(days=3) df = read_sql(""" SELECT valid, h.label, value from hml_observed_data_""" + str(dt.year) + """ d JOIN hml_observed_keys h on (d.key = h.id) WHERE station = %s and valid between %s and %s ORDER by valid """, pgconn, params=(station, mints, maxts), index_col=None) ctx['odf'] = df.pivot('valid', 'label', 'value') if len(ctx['df'].index) > 0: ctx['df'] = pd.merge(ctx['df'], ctx['odf'], left_on='valid', right_index=True, how='left', sort=False) ctx['title'] = "[%s] %s" % (ctx['station'], ctx['name']) ctx['subtitle'] = ctx['dt'].strftime("%d %b %Y %H:%M UTC") if len(ctx['df'].index) == 0 and len(ctx['odf'].index) > 0: ctx['primary'] = ctx['odf'].columns[0] ctx['secondary'] = ctx['odf'].columns[1] return ctx
def madlibLS(sConn, schema=None): if schema is None: return psql.read_sql("select * from information_schema.schemata;", sConn) else: stmt = """SELECT * FROM information_schema.tables WHERE table_schema = '{schema}';""".format( schema=schema ) return psql.read_sql(stmt, sConn)
def plotter(fdict): """ Go """ import matplotlib matplotlib.use('agg') import matplotlib.pyplot as plt ctx = get_autoplot_context(fdict, get_description()) station = ctx['station'] network = ctx['network'] varname = ctx['var'] thedate = ctx['thedate'] date = ctx['date'] nt = NetworkTable(network) pgconn = psycopg2.connect(database='coop', host='iemdb', user='******') table = "alldata_%s" % (station[:2], ) if date == 'exact': df = read_sql(""" SELECT year, high, day, precip from """ + table + """ WHERE station = %s and sday = %s ORDER by year ASC """, pgconn, params=(station, thedate.strftime("%m%d")), index_col='year') subtitle = thedate.strftime("%B %-d") else: if date == 'memorial': days = memorial_days() elif date == 'thanksgiving': days = thanksgiving() else: days = labor_days() df = read_sql(""" SELECT year, high, day, precip from """ + table + """ WHERE station = %s and day in %s ORDER by year ASC """, pgconn, params=(station, tuple(days)), index_col='year') subtitle = PDICT[date] (fig, ax) = plt.subplots(1, 1) ax.bar(df.index.values, df[varname], fc='r', ec='r', align='center') mean = df[varname].mean() ax.axhline(mean) ax.text(df.index.values[-1] + 1, mean, '%.2f' % (mean,), ha='left', va='center') ax.grid(True) ax.set_title(("%s [%s] Daily %s\non %s" ) % (nt.sts[station]['name'], station, PDICT2[varname], subtitle)) ax.set_xlim(df.index.values.min() - 1, df.index.values.max() + 1) ax.set_ylabel(PDICT2[varname]) if varname != 'precip': ax.set_ylim(df[varname].min() - 5, df[varname].max() + 5) return fig, df
def GetDataframe(self): uDf = psql.read_sql(\ "SELECT * FROM " + str(self.name ) , db.getDb()) # CATEGORY df cDf = psql.read_sql("SELECT id, name FROM categories", db.getDb()) cDf.columns = ['cdf_id', 'category'] uDf = pd.merge(uDf, cDf, left_on='category_id', right_on='cdf_id', how="left") uDf = uDf[["id", "nr", "status", "name", "first_name", "category", "club", "year", "sex", "email", "o1", "o2", "o3", "o4"]] return uDf
def fetch_pandas(self, select_statement, tb_name, query=''): """ Same data format as PSQLPython.fetch(select_statement, tb_name, query=''), but returns a pandas dataframe """ if query == '': pandas_output = pdsql.read_sql(("""SELECT %(select_statement)s FROM %(tb_name)s"""),self.con, \ params={"select_statement": AsIs(select_statement), "tb_name": AsIs(tb_name)}) else: pandas_output = pdsql.read_sql(("""SELECT %(select_statement)s FROM %(tb_name)s WHERE %(query)s"""), \ self.con, params={"select_statement": AsIs(select_statement), "tb_name": AsIs(tb_name), \ "query": AsIs(query)}) return pandas_output
def test_read_sql_delegate(self): iris_frame1 = sql.read_sql_query( "SELECT * FROM iris", self.conn) iris_frame2 = sql.read_sql( "SELECT * FROM iris", self.conn) tm.assert_frame_equal(iris_frame1, iris_frame2, "read_sql and read_sql_query have not the same" " result with a query") iris_frame1 = sql.read_sql_table('iris', self.conn) iris_frame2 = sql.read_sql('iris', self.conn) tm.assert_frame_equal(iris_frame1, iris_frame2)
def read_in_data(): ''' reads in the cleaned table from MySQL''' con = mdb.connect('localhost', 'root','***','HomewardBound'); with con: dogs = sql.read_sql('select * from Pets_cleaned;', con = con) weather = sql.read_sql('select * from PetWeather;',con = con) econ = sql.read_sql('select * from PetEcon;',con = con) return dogs, weather, econ
def Find_Next_Fraudster (table,possible_fraudsters_index): global max_fraudsters_in_DB engine = create_engine('postgresql://*****:*****@localhost:5432/fraud_detector') cnx = engine.raw_connection() possible_fraudsters_df = sql.read_sql("SELECT * FROM " + table , cnx) print "max_fraudsters_in_DB before" , max_fraudsters_in_DB max_fraudsters_in_DB = sql.read_sql("SELECT count(*) FROM " + table , cnx)['count'][0] print "max_fraudsters_in_DB after" , max_fraudsters_in_DB cnx.close() print "fraudster index is " , possible_fraudsters_index fraudster_id = possible_fraudsters_df.iloc[possible_fraudsters_index,1].encode('utf-8') return fraudster_id
def GetUserCategoryDf(self): # USER df uDf = psql.read_sql("SELECT * FROM users", self.db, index_col = "id") uDf["name"] = uDf['name'].str.upper() +' '+uDf['first_name'] # CATEGORY df cDf = psql.read_sql("SELECT id, name, start_nr FROM categories", self.db) cDf.columns = ['id', 'category', "start_nr"] uDf = pd.merge(uDf, cDf, left_on='category_id', right_on='id', how="left") #print "u", uDf.columns uDf = uDf[["nr", "status", "name", "category", "start_nr", "year", "club", "sex", "o1", "o2", "o3", "o4"]] return uDf
def load_data(self, stock): ''' 加载股票历史数据 ''' search_sql = "select * from {0}".format('day_'+stock) try: raw_data = SQL.read_sql(search_sql, self.engine) return raw_data except Exception,data: data = ts.get_h_data(stock) data = data.sort_index(ascending=True) data.to_sql('day_'+stock, self.engine,if_exists='append') raw_data = SQL.read_sql(search_sql, self.engine) return raw_data
def plotter(fdict): """ Go """ import matplotlib matplotlib.use('agg') import matplotlib.pyplot as plt pgconn = psycopg2.connect(database='postgis', host='iemdb', user='******') station = fdict.get('station', 'DMX')[:4] nt = NetworkTable('WFO') nt.sts['_ALL'] = {'name': 'All Offices'} (fig, ax) = plt.subplots(1, 1, sharex=True) if station == '_ALL': df = read_sql(""" with obs as ( SELECT distinct extract(year from issue) as yr, phenomena, significance from warnings WHERE phenomena is not null and significance is not null and issue > '2005-01-01' and issue is not null ) SELECT yr as year, count(*) from obs GROUP by yr ORDER by yr ASC """, pgconn, index_col=None) else: df = read_sql(""" with obs as ( SELECT distinct extract(year from issue) as yr, phenomena, significance from warnings WHERE wfo = %s and phenomena is not null and significance is not null and issue > '2005-01-01' and issue is not null ) SELECT yr as year, count(*) from obs GROUP by yr ORDER by yr ASC """, pgconn, params=(station, ), index_col=None) df['wfo'] = station df['year'] = df['year'].astype('i') ax.bar(df['year']-0.4, df['count'], width=0.8, fc='b', ec='b') for yr, val in zip(df['year'], df['count']): ax.text(yr, val+1, "%s" % (val,), ha='center') ax.set_title(("[%s] NWS %s\nCount of Distinct VTEC Phenomena/" "Significance - %i to %i" ) % (station, nt.sts[station]['name'], df['year'].min(), df['year'].max())) ax.grid() ax.set_ylabel("Count") return fig, df
def create_email_content_database(): """ Query the SQLite database and read the email content table, return it as a pandas dataframe. Note that since this table is very large querying is slow, we only read it once and save the data in a csv for repeated use. """ saved_file = data_dir + email_content_saved_fname if path.isfile(saved_file): print 'Email content was previously read, loading from save...' email_content = pd.read_csv(saved_file) else: print 'Reading email content for the first time. This may take a while...' conn = sqlite3.connect(data_dir + database_name) email_content_query = "SELECT * FROM email_content;" email_content = sql.read_sql(email_content_query, con=conn) # Convert the datetime string to datetime objects email_content = email_content.rename(columns={'send_time': 'datetime'}) email_content.to_csv(saved_file) conn.close() # Fix the data types email_content['article_id'] = email_content['article_id'].astype('int') email_content['user_id'] = email_content['user_id'].astype('int') email_content['email_id'] = email_content['email_id'].astype('int') email_content['datetime'] = pd.to_datetime(email_content['datetime']) print 'Email database loaded.' return email_content
def make_vsm_histogram_plot(station, sts, ets): """Option 6""" df = read_sql(""" SELECT CASE WHEN t12_c_avg_qc > 1 then vwc_12_avg_qc else null end as v12, CASE WHEN t24_c_avg_qc > 1 then vwc_24_avg_qc else null end as v24, CASE WHEN t50_c_avg_qc > 1 then vwc_50_avg_qc else null end as v50 from sm_hourly where station = %s and valid >= %s and valid < %s """, ISUAG, params=(station, sts, ets), index_col=None) (_, ax) = plt.subplots(3, 1, sharex=True) ax[0].set_title(("ISUSM Station: %s VWC Histogram\n" "For un-frozen condition between %s and %s" ) % (nt.sts[station]['name'], sts.strftime("%-d %b %Y"), ets.strftime("%-d %b %Y"))) for i, col in enumerate(['v12', 'v24', 'v50']): ax[i].hist(df[col] * 100., bins=50, range=(0, 50), normed=True) ax[i].set_ylabel("Frequency") ax[i].grid(True) ax[i].text(0.99, 0.99, "%s inches" % (col[1:],), transform=ax[i].transAxes, ha='right', va='top') ax[i].set_yscale("log") ax[2].set_xlabel("Volumetric Water Content [%]") sys.stdout.write("Content-Type: image/png\n\n") plt.savefig(sys.stdout, format='png')
def getQuestionData(file_name): from pandas.io.sql import read_sql conn = engine.connect() table_exists = engine.dialect.has_table(conn, the_table, schema=the_schema) conn.close() # It may be better to explicitly create the table elsewhere. # Checking like this might be slower. if table_exists: sql = "DELETE FROM %s.%s WHERE file_name='%s'" % \ (the_schema, the_table, file_name) engine.execute(sql) sql = """ SELECT file_name, last_update, context, speaker_number, speaker_text FROM streetevents.speaker_data WHERE file_name='%s' """ % (file_name) df = read_sql(sql, engine) return df
def load_data(self, stock): ''' 加载股票历史数据 ''' search_sql = "select * from {0}".format('day_'+stock) raw_data = SQL.read_sql(search_sql, self.engine) return raw_data
def main(): """Go Main Go""" pgconn = get_dbconn('postgis') sql = get_polygon() df = read_sql(sql, pgconn, index_col='year') print(df) (fig, ax) = plt.subplots(1, 1) ax.bar(df.index.values - 0.2, df['count'].values, width=0.4, fc='r') ax.set_ylabel("Warning Count", color='r') y2 = ax.twinx() y2.bar(df.index.values + 0.2, df['area'].values, width=0.4, fc='b') y2.set_ylabel("Size (Continental United States)", color='b') p1 = plt.Rectangle((0, 0), 1, 1, fc="r") p3 = plt.Rectangle((0, 0), 1, 1, fc="b") ax.legend([p1, p3], ["Counts", "Size"], loc=2) ax.grid(True) ax.set_title("NWS *Storm Based* Tornado + Severe Thunderstorm Warnings") ax.set_ylim(0, 90000) y2.set_ylim(0, 25) ax.set_xlim(1985.5, 2017.5) fig.text(0.01, 0.01, 'Generated %s' % (datetime.date.today(), )) fig.savefig('test.png')
def do_site(site): """Print out a simple listing of trouble""" df = read_sql(""" with ag as ( select year, varname, value, count(*) from agronomic_data where uniqueid = %s and (value is null or value in ('', '.')) GROUP by year, varname, value), soil as ( select year, varname, value, count(*) from soil_data where uniqueid = %s and (value is null or value in ('', '.')) GROUP by year, varname, value) SELECT * from ag UNION select * from soil ORDER by year ASC, varname ASC """, DBCONN, params=(site, site), index_col=None) ssw("Content-type: text/plain\n\n") ssw("CSCAP Variable Progress Report\n") ssw("Site: %s\n" % (site,)) ssw("Generated: %s\n" % ( datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"), )) ssw("Total Missing: %s\n" % (df['count'].sum(),)) ssw("%4s %-10s %-10s %-6s\n" % ('YEAR', 'VARNAME', 'VALUE', 'COUNT')) def nice(val): if val is None: return 'Empty' if val == '': return 'Empty' if val == '.': return "Period" return val for _, row in df.iterrows(): ssw("%s %-10s %-10s %-6s\n" % (row['year'], row['varname'], nice(row['value']), row['count']))
def plotter(fdict): """ Go """ pgconn = get_dbconn("postgis") ctx = get_autoplot_context(fdict, get_description()) sts = ctx["sdate"] ets = ctx["edate"] if (ets - sts).days > 366: raise NoDataFound("Chart duration needs to be less than 1 year.") wfo = ctx["wfo"] p1 = ctx["phenomenav1"] p2 = ctx["phenomenav2"] p3 = ctx["phenomenav3"] p4 = ctx["phenomenav4"] phenomena = [] for p in [p1, p2, p3, p4]: if p is not None: phenomena.append(p[:2]) s1 = ctx["significancev1"] s2 = ctx["significancev2"] s3 = ctx["significancev3"] s4 = ctx["significancev4"] significance = [] for s in [s1, s2, s3, s4]: if s is not None: significance.append(s[0]) pstr = [] title = "" for i, (p, s) in enumerate(zip(phenomena, significance)): pstr.append("(phenomena = '%s' and significance = '%s')" % (p, s)) if i == 2: title += "\n" title += "%s %s.%s, " % (vtec.get_ps_string(p, s), p, s) pstr = " or ".join(pstr) pstr = "(%s)" % (pstr, ) if ctx["w"] == "wfo": ctx["_nt"].sts["_ALL"] = { "name": "All Offices", "tzname": "America/Chicago", } if wfo not in ctx["_nt"].sts: raise NoDataFound("No Data Found.") wfo_limiter = (" and wfo = '%s' ") % (wfo if len(wfo) == 3 else wfo[1:], ) if wfo == "_ALL": wfo_limiter = "" tzname = ctx["_nt"].sts[wfo]["tzname"] else: wfo_limiter = " and substr(ugc, 1, 2) = '%s' " % (ctx["state"], ) tzname = "America/Chicago" df = read_sql( f""" with events as ( select wfo, min(issue at time zone %s) as localissue, extract(year from issue) as year, phenomena, significance, eventid from warnings where {pstr} {wfo_limiter} and issue >= %s and issue < %s GROUP by wfo, year, phenomena, significance, eventid ) SELECT date(localissue), count(*) from events GROUP by date(localissue) """, pgconn, params=( tzname, sts - datetime.timedelta(days=2), ets + datetime.timedelta(days=2), ), index_col="date", ) data = {} now = sts while now <= ets: data[now] = {"val": 0} now += datetime.timedelta(days=1) for date, row in df.iterrows(): data[date] = {"val": row["count"]} if ctx["w"] == "wfo": title2 = "NWS %s [%s]" % (ctx["_nt"].sts[wfo]["name"], wfo) if wfo == "_ALL": title2 = "All NWS Offices" else: title2 = state_names[ctx["state"]] fig = calendar_plot( sts, ets, data, heatmap=(ctx["heatmap"] == "yes"), title=("Number of VTEC Events for %s by Local Calendar Date") % (title2, ), subtitle="Valid %s - %s for %s" % (sts.strftime("%d %b %Y"), ets.strftime("%d %b %Y"), title), ) return fig, df
def plotter(fdict): """ Go """ import matplotlib matplotlib.use('agg') import matplotlib.pyplot as plt ctx = get_autoplot_context(fdict, get_description()) station = ctx['station'] varname = ctx['var'] network = 'RAOB' ts = ctx['date'] hour = int(ctx['hour']) ts = datetime.datetime(ts.year, ts.month, ts.day, hour) ts = ts.replace(tzinfo=pytz.timezone("UTC")) which = ctx['which'] vlimit = '' if which == 'month': vlimit = (" and extract(month from f.valid) = %s ") % (ts.month, ) nt = NetworkTable(network) name = nt.sts[station]['name'] stations = [ station, ] if station.startswith("_"): name = nt.sts[station]['name'].split("--")[0] stations = nt.sts[station]['name'].split("--")[1].strip().split(",") pgconn = psycopg2.connect(database='postgis', host='iemdb', user='******') df = read_sql(""" with data as ( select f.valid, p.pressure, count(*) OVER (PARTITION by p.pressure), min(valid) OVER () as min_valid, max(valid) OVER () as max_valid, p.tmpc, rank() OVER (PARTITION by p.pressure ORDER by p.tmpc ASC) as tmpc_rank, min(p.tmpc) OVER (PARTITION by p.pressure) as tmpc_min, max(p.tmpc) OVER (PARTITION by p.pressure) as tmpc_max, p.dwpc, rank() OVER (PARTITION by p.pressure ORDER by p.dwpc ASC) as dwpc_rank, min(p.dwpc) OVER (PARTITION by p.pressure) as dwpc_min, max(p.dwpc) OVER (PARTITION by p.pressure) as dwpc_max, p.height as hght, rank() OVER ( PARTITION by p.pressure ORDER by p.height ASC) as hght_rank, min(p.height) OVER (PARTITION by p.pressure) as hght_min, max(p.height) OVER (PARTITION by p.pressure) as hght_max, p.smps, rank() OVER (PARTITION by p.pressure ORDER by p.smps ASC) as smps_rank, min(p.smps) OVER (PARTITION by p.pressure) as smps_min, max(p.smps) OVER (PARTITION by p.pressure) as smps_max from raob_flights f JOIN raob_profile p on (f.fid = p.fid) WHERE f.station in %s and extract(hour from f.valid at time zone 'UTC') = %s """ + vlimit + """ and p.pressure in (925, 850, 700, 500, 400, 300, 250, 200, 150, 100, 70, 50, 10)) select * from data where valid = %s ORDER by pressure DESC """, pgconn, params=(tuple(stations), hour, ts), index_col='pressure') if len(df.index) == 0: raise Exception("No data found for query") for key in PDICT3.keys(): df[key + '_percentile'] = df[key + '_rank'] / df['count'] * 100. ax = plt.axes([0.1, 0.12, 0.65, 0.75]) bars = ax.barh(range(len(df.index)), df[varname + '_percentile'], align='center') y2labels = [] fmt = '%.1f' if varname not in [ 'hght', ] else '%.0f' for i, bar in enumerate(bars): ax.text(bar.get_width() + 1, i, '%.1f' % (bar.get_width(), ), va='center', bbox=dict(color='white')) y2labels.append((fmt + ' (' + fmt + ' ' + fmt + ')') % (df.iloc[i][varname], df.iloc[i][varname + "_min"], df.iloc[i][varname + "_max"])) ax.set_yticks(range(len(df.index))) ax.set_yticklabels(['%.0f' % (a, ) for a in df.index.values]) ax.set_ylim(-0.5, len(df.index) - 0.5) ax.set_xlabel("Percentile [100 = highest]") ax.set_ylabel("Mandatory Pressure Level (hPa)") plt.gcf().text( 0.5, 0.9, ("%s %s %s Sounding\n" "(%s-%s) Percentile Ranks (%s) for %s") % (station, name, ts.strftime("%Y/%m/%d %H UTC"), df.iloc[0]['min_valid'].year, df.iloc[0]['max_valid'].year, ("All Year" if which == 'none' else calendar.month_name[ts.month]), PDICT3[varname]), ha='center', va='bottom') ax.grid(True) ax.set_xticks([0, 5, 10, 25, 50, 75, 90, 95, 100]) ax.set_xlim(0, 110) ax.text(1.02, 1, 'Ob (Min Max)', transform=ax.transAxes) ax2 = ax.twinx() ax2.set_ylim(-0.5, len(df.index) - 0.5) ax2.set_yticks(range(len(df.index))) ax2.set_yticklabels(y2labels) return plt.gcf(), df
def make_plot(form): """Make the plot""" uniqueid = form.getfirst('site', 'ISUAG').split("::")[0] pgconn = get_dbconn('sustainablecorn') viewopt = form.getfirst('view', 'plot') varname = form.getfirst('varname', 'WAT2') df = read_sql(""" SELECT uniqueid, plotid, valid at time zone 'UTC' as v, value from waterquality_data WHERE uniqueid = %s and varname = %s ORDER by valid ASC """, pgconn, params=(uniqueid, varname)) if viewopt not in ['plot', 'js']: newcolname = "%s, %s" % (VARDICT[varname]['title'], VARDICT[varname]['units']) df.rename(columns=dict(v='timestamp', value=newcolname), inplace=True) df = add_bling(pgconn, df, 'Water') if viewopt == 'html': ssw("Content-type: text/html\n\n") ssw(df.to_html(index=False)) return if viewopt == 'csv': ssw('Content-type: application/octet-stream\n') ssw(('Content-Disposition: attachment; ' 'filename=%s.csv\n\n') % (uniqueid, )) ssw(df.to_csv(index=False)) return if viewopt == 'excel': ssw('Content-type: application/octet-stream\n') ssw(('Content-Disposition: attachment; ' 'filename=%s.xlsx\n\n') % (uniqueid, )) writer = pd.ExcelWriter('/tmp/ss.xlsx') df.to_excel(writer, 'Data', index=False) worksheet = writer.sheets['Data'] worksheet.freeze_panes(3, 0) writer.save() ssw(open('/tmp/ss.xlsx', 'rb').read()) os.unlink('/tmp/ss.xlsx') return # Begin highcharts output ssw("Content-type: application/javascript\n\n") title = ("Water Quality for Site: %s") % (uniqueid, ) splots = [] plot_ids = df['plotid'].unique() plot_ids.sort() df['ticks'] = df['v'].astype(np.int64) // 10**6 for plotid in plot_ids: df2 = df[df['plotid'] == plotid] splots.append( ("""{type: 'scatter', name: '""" + plotid + """', data: """ + str([[a, b] for a, b in zip(df2['ticks'].values, df2['value'].values)]) + """ }""").replace("None", "null").replace("nan", "null")) series = ",".join(splots) ssw(""" $("#hc").highcharts({ title: {text: '""" + title + """'}, chart: {zoomType: 'x'}, yAxis: {title: {text: '""" + VARDICT[varname]["title"] + """ """ + VARDICT[varname]["units"] + """'} }, plotOptions: {line: {turboThreshold: 0} }, xAxis: { type: 'datetime' }, tooltip: { pointFormat: 'date: <b>{point.x:%b %e %Y, %H:%M}</b><br/>value: <b>{point.y}</b><br/>', shared: true, valueDecimals: 2, valueSuffix: '""" + VARDICT[varname]["units"] + """' }, series: [""" + series + """] }); """)
get_ipython().magic('matplotlib inline') plt.plot(res[:, 0], res[:, 1], 'ro') plt.grid(True); plt.xlim(-0.5, 4.5); plt.ylim(-4.5, 0.5) """ ###7.2.2 从SQL到pandas 用pandas读取整个表或者查询结果通常更为高效。 在可以将整个表读入内存时,分析查询的执行通常比使用基于磁盘的SQL方法快得多。 总结起来可以对50M的模拟数据集得出如下结论: 写入SQLite3数据花费数秒,而使用pandas使用不到1s 从SQL数据库读取数据花费数秒,而pandas花费不到1s """ #pandas.io.sql子库中包含处理SQL数据库中所保存数据的函数 import pandas.io.sql as pds %time data = pds.read_sql('SELECT * FROM numbers', con) data.head() #数据在内存中可以进行更快的分析 %time data[(data['No1'] > 0) & (data['No2'] < 0)].head() #pandas能够控制更为复杂的查询,但是不能替换结构复杂的关系型数结构 %time res = data[['No1', 'No2']][((data['No1'] > 0.5) | (data['No1'] < -0.5)) & ((data['No2'] < -1) | (data['No2'] > 1))] plt.plot(res.No1, res.No2, 'ro') plt.grid(True); plt.axis('tight') """ 正如预期,只要pandas能够复制对应的SQL语句,使用pandas的内存分析能力可以大大加速。 这不仅是pandas的优势,而是pandas与PyTables紧密集成实现的。 """ #
def get_df(ctx): """Figure out what data we need to fetch here""" date = ctx['date'] # Rectify to Sunday if date.isoweekday() < 7: date = date - datetime.timedelta(days=date.isoweekday()) varname = ctx['var'] pgconn = get_dbconn('coop') params = LOOKUP[varname] if isinstance(params[2], list): dlimit = " unit_desc in %s" % (str(tuple(params[2]), )) else: dlimit = " unit_desc = '%s' " % (params[2], ) df = read_sql(""" select year, week_ending, sum(num_value) as value, state_alpha from nass_quickstats where commodity_desc = %s and statisticcat_desc = %s and """ + dlimit + """ and util_practice_desc = %s and num_value is not null GROUP by year, week_ending, state_alpha ORDER by state_alpha, week_ending """, pgconn, params=(params[0], params[1], params[3]), index_col=None) if df.empty: raise NoDataFound("No NASS Data was found for query, sorry.") df['week_ending'] = pd.to_datetime(df['week_ending']) data = {} # Average atleast ten years syear = max([1981, date.year - 10]) eyear = syear + 10 for state, gdf in df.groupby('state_alpha'): sdf = gdf.copy() sdf.set_index('week_ending', inplace=True) # TOO DIFFICULT to know what to do in this case. if date.strftime("%Y-%m-%d") not in sdf.index: continue thisval = sdf.loc[date.strftime("%Y-%m-%d")]['value'] # linear interpolate data to get comparables newdf = sdf.resample('D').interpolate(method='linear') # get doy averages y10 = newdf[(newdf['year'] >= syear) & (newdf['year'] < eyear)] if y10.empty: avgval = None else: doyavgs = y10.groupby(y10.index.strftime("%m%d")).mean() if date.strftime("%m%d") in doyavgs.index: avgval = doyavgs.at[date.strftime("%m%d"), 'value'] else: avgval = None data[state] = {'avg': avgval, 'thisval': thisval} ctx['df'] = pd.DataFrame.from_dict(data, orient='index') if ctx['df'].empty: raise NoDataFound("No Data Found.") ctx['df'].dropna(how='all', inplace=True) ctx['df'].index.name = 'state' ctx['df']['departure'] = ctx['df']['thisval'] - ctx['df']['avg'] ctx['title'] = ("%s USDA NASS %s" % (date.strftime("%-d %B %Y"), PDICT[varname])) ctx['subtitle'] = ('Top value is %i percentage, bottom value is ' 'departure from %i-%i avg' % (date.year, syear, eyear - 1))
def plotter(fdict): """ Go """ pgconn = get_dbconn('coop') station = fdict.get('station', 'IA0200') table = "alldata_%s" % (station[:2], ) nt = NetworkTable("%sCLIMATE" % (station[:2], )) df = read_sql(""" with events as ( SELECT c.climoweek, a.precip, a.year from """ + table + """ a JOIN climoweek c on (c.sday = a.sday) WHERE a.station = %s and precip >= 0.01), ranks as ( SELECT climoweek, year, rank() OVER (PARTITION by climoweek ORDER by precip DESC) from events), stats as ( SELECT climoweek, max(precip), avg(precip), sum(case when precip >= 0.01 and precip < 0.26 then 1 else 0 end) as cat1, sum(case when precip >= 0.26 and precip < 0.51 then 1 else 0 end) as cat2, sum(case when precip >= 0.51 and precip < 1.01 then 1 else 0 end) as cat3, sum(case when precip >= 1.01 and precip < 2.01 then 1 else 0 end) as cat4, sum(case when precip >= 2.01 then 1 else 0 end) as cat5, count(*) from events GROUP by climoweek) SELECT e.climoweek, e.max, r.year, e.avg, e.cat1, e.cat2, e.cat3, e.cat4, e.cat5 from stats e JOIN ranks r on (r.climoweek = e.climoweek) WHERE r.rank = 1 ORDER by e.climoweek ASC """, pgconn, params=(station, ), index_col=None) res = """\ # IEM Climodat https://mesonet.agron.iastate.edu/climodat/ # Report Generated: %s # Climate Record: %s -> %s # Site Information: [%s] %s # Contact Information: Daryl Herzmann [email protected] 515.294.5978 # Based on climoweek periods, this report summarizes liquid precipitation. # Number of precip events - (%% of total) CL MAX MEAN 0.01- 0.26- 0.51- 1.01- TOTAL WK TIME PERIOD VAL YR RAIN 0.25 0.50 1.00 2.00 >2.01 DAYS """ % (datetime.date.today().strftime("%d %b %Y"), nt.sts[station]['archive_begin'].date(), datetime.date.today(), station, nt.sts[station]['name']) annEvents = 0 cat1t = 0 cat2t = 0 cat3t = 0 cat4t = 0 cat5t = 0 maxRain = 0 totRain = 0 lastcw = 0 for _, row in df.iterrows(): cw = int(row["climoweek"]) # Skip ties if cw == lastcw: continue lastcw = cw cat1 = row["cat1"] cat2 = row["cat2"] cat3 = row["cat3"] cat4 = row["cat4"] cat5 = row["cat5"] cat1t += cat1 cat2t += cat2 cat3t += cat3 cat4t += cat4 cat5t += cat5 maxval = row["max"] if maxval > maxRain: maxRain = maxval meanval = row["avg"] totEvents = cat1 + cat2 + cat3 + cat4 + cat5 annEvents += totEvents totRain += (totEvents * meanval) res += ("%3s %-13s %5.2f %i %4.2f %4i(%2i) %4i(%2i) " "%4i(%2i) %4i(%2i) %4i(%2i) %4i\n") % ( cw, CWEEK[cw], maxval, row['year'], meanval, cat1, round((float(cat1) / float(totEvents)) * 100.0), cat2, round((float(cat2) / float(totEvents)) * 100.0), cat3, round((float(cat3) / float(totEvents)) * 100.0), cat4, round((float(cat4) / float(totEvents)) * 100.0), cat5, round((float(cat5) / float(totEvents)) * 100.0), totEvents) res += ("%-17s %5.2f %4.2f %4i(%2i) %4i(%2i) " "%4i(%2i) %4i(%2i) %4i(%2i) %5i\n") % ( "ANNUAL TOTALS", maxRain, totRain / annEvents, cat1t, (float(cat1t) / float(annEvents)) * 100, cat2t, (float(cat2t) / float(annEvents)) * 100, cat3t, (float(cat3t) / float(annEvents)) * 100, cat4t, (float(cat4t) / float(annEvents)) * 100, cat5t, (float(cat5t) / float(annEvents)) * 100, annEvents) return None, df, res
def plotter(fdict): """ Go """ import matplotlib matplotlib.use('agg') import matplotlib.pyplot as plt import matplotlib.dates as mdates ctx = get_autoplot_context(fdict, get_description()) station = ctx['station'] network = ctx['network'] year = ctx['year'] season = ctx['season'] nt = NetworkTable(network) table = "alldata_%s" % (station[:2], ) pgconn = psycopg2.connect(database='coop', host='iemdb', user='******') # Have to do a redundant query to get the running values obs = read_sql(""" WITH trail as ( SELECT day, year, avg((high+low)/2.) OVER (ORDER by day ASC ROWS 91 PRECEDING) as avgt from """ + table + """ WHERE station = %s) SELECT day, avgt from trail WHERE year between %s and %s ORDER by day ASC """, pgconn, params=(station, year, year + 2), index_col='day') df = read_sql(""" WITH trail as ( SELECT day, year, avg((high+low)/2.) OVER (ORDER by day ASC ROWS 91 PRECEDING) as avgt from """ + table + """ WHERE station = %s), extremes as ( SELECT day, year, avgt, rank() OVER (PARTITION by year ORDER by avgt ASC) as minrank, rank() OVER (PARTITION by year ORDER by avgt DESC) as maxrank from trail), yearmax as ( SELECT year, min(day) as summer_end, min(avgt) as summer from extremes where maxrank = 1 GROUP by year), yearmin as ( SELECT year, min(day) as winter_end, min(avgt) as winter from extremes where minrank = 1 GROUP by year) SELECT x.year, winter_end, winter, summer_end, summer, extract(doy from winter_end)::int as winter_end_doy, extract(doy from summer_end)::int as summer_end_doy from yearmax x JOIN yearmin n on (x.year = n.year) ORDER by x.year ASC """, pgconn, params=(station, ), index_col='year') # Throw out spring of the first year for col in ['winter', 'winter_end_doy', 'winter_end']: df.at[df.index.min(), col] = None # Need to cull current year if datetime.date.today().month < 8: for col in ['summer', 'summer_end_doy', 'summer_end']: df.at[datetime.date.today().year, col] = None if datetime.date.today().month < 2: for col in ['winter', 'winter_end_doy', 'winter_end']: df.at[datetime.date.today().year, col] = None df['spring_length'] = df['summer_end_doy'] - 91 - df['winter_end_doy'] # fall is a bit tricker df['fall_length'] = None df['fall_length'].values[:-1] = ((df['winter_end_doy'].values[1:] + 365) - 91 - df['summer_end_doy'].values[:-1]) (fig, ax) = plt.subplots(3, 1, figsize=(8, 9)) ax[0].plot(obs.index.values, obs['avgt'].values) ax[0].set_title(("%s-%s [%s] %s\n91 Day Average Temperatures") % (nt.sts[station]['archive_begin'].year, year + 3, station, nt.sts[station]['name'])) ax[0].set_ylabel(r"Trailing 91 Day Avg T $^{\circ}$F") ax[0].xaxis.set_major_formatter(mdates.DateFormatter('%b\n%Y')) ax[0].grid(True) # Label the maxes and mins for yr in range(year, year + 3): if yr not in df.index: continue date = df.at[yr, 'winter_end'] val = df.at[yr, 'winter'] if date is not None: ax[0].text(date, val - 5, r"%s %.1f$^\circ$F" % (date.strftime("%-d %b"), val), ha='center') date = df.at[yr, 'summer_end'] val = df.at[yr, 'summer'] if date is not None: ax[0].text(date, val + 2, r"%s %.1f$^\circ$F" % (date.strftime("%-d %b"), val), ha='center') df2 = df.dropna() p2col = 'winter_end_doy' if season == 'spring' else 'summer_end_doy' slp, intercept, r, _, _ = stats.linregress(df2.index.values, df2[p2col].values) ax[1].scatter(df.index.values, df[p2col].values) ax[1].grid(True) # Do labelling yticks = [] yticklabels = [] for doy in range(int(df[p2col].min()), int(df[p2col].max())): date = datetime.date(2000, 1, 1) + datetime.timedelta(days=(doy - 1)) if date.day in [1, 15]: yticks.append(doy) yticklabels.append(date.strftime("%-d %b")) ax[1].set_yticks(yticks) ax[1].set_yticklabels(yticklabels) lbl = ("Date of Minimum (Spring Start)" if season == 'spring' else "Date of Maximum (Fall Start)") ax[1].set_ylabel(lbl) ax[1].set_xlim(df.index.min() - 1, df.index.max() + 1) avgv = df[p2col].mean() ax[1].axhline(avgv, color='r') ax[1].plot(df.index.values, intercept + (df.index.values * slp)) d = (datetime.date(2000, 1, 1) + datetime.timedelta(days=int(avgv))).strftime("%-d %b") ax[1].text(0.02, 0.02, r"$\frac{\Delta days}{decade} = %.2f,R^2=%.2f, avg = %s$" % (slp * 10.0, r**2, d), va='bottom', transform=ax[1].transAxes) ax[1].set_ylim(bottom=(ax[1].get_ylim()[0] - 10)) p3col = 'spring_length' if season == 'spring' else 'fall_length' slp, intercept, r, _, _ = stats.linregress(df2.index.values, df2[p3col]) ax[2].scatter(df.index.values, df[p3col]) ax[2].set_xlim(df.index.min() - 1, df.index.max() + 1) ax[2].set_ylabel("Length of '%s' [days]" % (season.capitalize(), )) ax[2].grid(True) avgv = df[p3col].mean() ax[2].axhline(avgv, color='r') ax[2].plot(df.index.values, intercept + (df.index.values * slp)) ax[2].text(0.02, 0.02, r"$\frac{\Delta days}{decade} = %.2f,R^2=%.2f, avg = %.1fd$" % (slp * 10.0, r**2, avgv), va='bottom', transform=ax[2].transAxes) ax[2].set_ylim(bottom=(ax[2].get_ylim()[0] - 15)) return fig, df
def plotter(fdict): """ Go """ pgconn = get_dbconn('coop') ctx = get_autoplot_context(fdict, get_description()) station = ctx['station'] delta = ctx['delta'] year = ctx['year'] nt = network.Table("%sCLIMATE" % (station[:2], )) table = "alldata_%s" % (station[:2], ) df = read_sql(""" WITH days as ( select generate_series('%s-01-01'::date, '%s-12-31'::date, '1 day'::interval)::date as day, to_char(generate_series('%s-01-01'::date, '%s-12-31'::date, '1 day'::interval)::date, 'mmdd') as sday ), climo as ( SELECT sday, avg(high) as avg_high, stddev(high) as stddev_high, avg(low) as avg_low, stddev(low) as stddev_low from """ + table + """ WHERE station = %s GROUP by sday ), thisyear as ( SELECT day, sday, high, low from """ + table + """ WHERE station = %s and year = %s ), thisyear2 as ( SELECT d.day, d.sday, t.high, t.low from days d LEFT JOIN thisyear t on (d.sday = t.sday) ) SELECT t.day, t.sday, t.high, t.low, c.avg_high, c.avg_low, c.stddev_high, c.stddev_low from thisyear2 t JOIN climo c on (t.sday = c.sday) ORDER by t.day ASC """, pgconn, params=(year, year, year, year, station, station, year), index_col='day') df.index.name = 'Date' df['high_sigma'] = (df['high'] - df['avg_high']) / df['stddev_high'] df['low_sigma'] = (df['low'] - df['avg_low']) / df['stddev_low'] (fig, ax) = plt.subplots(2, 1, sharex=True, figsize=(8, 6)) ax[0].plot(df.index, df.avg_high, color='r', linestyle='-', label='Climate High') ax[0].plot(df.index, df.avg_low, color='b', label='Climate Low') ax[0].set_ylabel(r"Temperature $^\circ\mathrm{F}$") ax[0].set_title("[%s] %s Climatology & %s Observations" % (station, nt.sts[station]['name'], year)) ax[0].plot(df.index, df.high, color='brown', label='%s High' % (year, )) ax[0].plot(df.index, df.low, color='green', label='%s Low' % (year, )) if delta == 'abs': ax[1].plot(df.index, df.high - df.avg_high, color='r', label='High Diff %s - Climate' % (year)) ax[1].plot(df.index, df.low - df.avg_low, color='b', label='Low Diff') ax[1].set_ylabel(r"Temp Difference $^\circ\mathrm{F}$") else: ax[1].plot(df.index, df.high_sigma, color='r', label='High Diff %s - Climate' % (year)) ax[1].plot(df.index, df.low_sigma, color='b', label='Low Diff') ax[1].set_ylabel(r"Temp Difference $\sigma$") ymax = max([df.high_sigma.abs().max(), df.low_sigma.abs().max()]) + 1 ax[1].set_ylim(0 - ymax, ymax) ax[1].legend(fontsize=10, ncol=2, loc='best') ax[1].grid(True) ax[0].legend(fontsize=10, ncol=2, loc=8) ax[0].grid() ax[0].xaxis.set_major_locator(mdates.MonthLocator(interval=1)) ax[0].xaxis.set_major_formatter(mdates.DateFormatter('%-d\n%b')) ax[0].set_xlim(df.index.min() - datetime.timedelta(days=3), df.index.max() + datetime.timedelta(days=3)) return fig, df
def grid_hour(ts): """ I proctor the gridding of data on an hourly basis @param ts Timestamp of the analysis, we'll consider a 20 minute window """ pprint("grid_hour called...") nc = ncopen(iemre.get_hourly_ncname(ts.year), "a", timeout=300) domain = nc.variables["hasdata"][:, :] nc.close() ts0 = ts - datetime.timedelta(minutes=10) ts1 = ts + datetime.timedelta(minutes=10) utcnow = datetime.datetime.utcnow() utcnow = utcnow.replace(tzinfo=pytz.utc) - datetime.timedelta(hours=36) # If we are near realtime, look in IEMAccess instead of ASOS database mybuf = 2.0 params = ( iemre.WEST - mybuf, iemre.SOUTH - mybuf, iemre.WEST - mybuf, iemre.NORTH + mybuf, iemre.EAST + mybuf, iemre.NORTH + mybuf, iemre.EAST + mybuf, iemre.SOUTH - mybuf, iemre.WEST - mybuf, iemre.SOUTH - mybuf, ts0, ts1, ) if utcnow < ts: dbconn = get_dbconn("iem", user="******") sql = """SELECT t.id as station, ST_x(geom) as lon, ST_y(geom) as lat, max(case when tmpf > -60 and tmpf < 130 THEN tmpf else null end) as max_tmpf, max(case when sknt > 0 and sknt < 100 then sknt else 0 end) as max_sknt, max(getskyc(skyc1)) as max_skyc1, max(getskyc(skyc2)) as max_skyc2, max(getskyc(skyc3)) as max_skyc3, max(case when phour > 0 and phour < 1000 then phour else 0 end) as phour, max(case when dwpf > -60 and dwpf < 100 THEN dwpf else null end) as max_dwpf, max(case when sknt >= 0 then sknt else 0 end) as sknt, max(case when sknt >= 0 then drct else 0 end) as drct from current_log s JOIN stations t on (s.iemid = t.iemid) WHERE ST_Contains( ST_GeomFromEWKT('SRID=4326;POLYGON((%s %s, %s %s, %s %s, %s %s, %s %s))'), geom) and valid >= %s and valid < %s GROUP by station, lon, lat """ else: dbconn = get_dbconn("asos", user="******") sql = """SELECT station, ST_x(geom) as lon, st_y(geom) as lat, max(case when tmpf > -60 and tmpf < 130 THEN tmpf else null end) as max_tmpf, max(case when sknt > 0 and sknt < 100 then sknt else 0 end) as max_sknt, max(getskyc(skyc1)) as max_skyc1, max(getskyc(skyc2)) as max_skyc2, max(getskyc(skyc3)) as max_skyc3, max(case when p01i > 0 and p01i < 1000 then p01i else 0 end) as phour, max(case when dwpf > -60 and dwpf < 100 THEN dwpf else null end) as max_dwpf, max(case when sknt >= 0 then sknt else 0 end) as sknt, max(case when sknt >= 0 then drct else 0 end) as drct from alldata a JOIN stations t on (a.station = t.id) WHERE ST_Contains( ST_GeomFromEWKT('SRID=4326;POLYGON((%s %s, %s %s, %s %s, %s %s, %s %s))'), geom) and (t.network ~* 'ASOS' or t.network = 'AWOS') and valid >= %s and valid < %s GROUP by station, lon, lat""" df = read_sql(sql, dbconn, params=params, index_col="station") pprint("got database results") if df.empty: print(("%s has no entries, FAIL") % (ts.strftime("%Y-%m-%d %H:%M"), )) return ures, vres = grid_wind(df, domain) pprint("grid_wind is done") if ures is None: print("iemre.hourly_analysis failure for uwnd at %s" % (ts, )) else: write_grid(ts, "uwnd", ures) write_grid(ts, "vwnd", vres) tmpf = generic_gridder(df, "max_tmpf", domain) pprint("grid tmpf is done") if tmpf is None: print("iemre.hourly_analysis failure for tmpk at %s" % (ts, )) else: dwpf = generic_gridder(df, "max_dwpf", domain) pprint("grid dwpf is done") # require that dwpk <= tmpk mask = ~np.isnan(dwpf) mask[mask] &= dwpf[mask] > tmpf[mask] dwpf = np.where(mask, tmpf, dwpf) write_grid(ts, "tmpk", masked_array(tmpf, data_units="degF").to("degK")) write_grid(ts, "dwpk", masked_array(dwpf, data_units="degF").to("degK")) res = grid_skyc(df, domain) pprint("grid skyc is done") if res is None: print("iemre.hourly_analysis failure for skyc at %s" % (ts, )) else: write_grid(ts, "skyc", res)
def make_daily_water_change_plot(ctx): """Option 7""" # Get daily precip pdf = read_sql(""" SELECT valid, rain_mm_tot_qc from sm_daily where station = %s and valid >= %s and valid <= %s ORDER by valid ASC """, ctx['pgconn'], params=(ctx['station'], ctx['sts'].date(), ctx['ets'].date()), index_col='valid') df = read_sql(""" WITH obs as ( SELECT valid, CASE WHEN t12_c_avg_qc > 1 then calc_vwc_12_avg_qc else null end as v12, CASE WHEN t24_c_avg_qc > 1 then calc_vwc_24_avg_qc else null end as v24, CASE WHEN t50_c_avg_qc > 1 then calc_vwc_50_avg_qc else null end as v50 from sm_daily where station = %s and valid >= %s and valid < %s) SELECT valid, v12, v12 - lag(v12) OVER (ORDER by valid ASC) as v12_delta, v24, v24 - lag(v24) OVER (ORDER by valid ASC) as v24_delta, v50, v50 - lag(v50) OVER (ORDER by valid ASC) as v50_delta from obs ORDER by valid ASC """, ctx['pgconn'], params=(ctx['station'], ctx['sts'], ctx['ets']), index_col=None) # df.interpolate(inplace=True, axis=1, method='nearest') l1 = 12. l2 = 12. l3 = 0. df['change'] = (df['v12_delta'] * l1 + df['v24_delta'] * l2 + df['v50_delta'] * l3) df['depth'] = df['v12'] * l1 + df['v24'] * l2 + df['v50'] * l3 (fig, ax) = plt.subplots(2, 1, sharex=True) if not df['depth'].isnull().all(): ax[0].plot(df['valid'].values, df['depth'], color='b', lw=2) oneday = datetime.timedelta(days=1) for level in [0.15, 0.25, 0.35, 0.45]: ax[0].axhline((l1 + l2 + l3) * level, c='k') ax[0].text(df['valid'].values[-1] + oneday, (l1 + l2 + l3) * level, " %.0f%%" % (level * 100., ), va='center') ax[0].grid(True) ax[0].set_ylabel("Water Depth [inch]") ax[0].set_title( ("ISUSM Station: %s Daily Soil (6-30\") Water\n" "For un-frozen condition between %s and %s") % (ctx['nt'].sts[ctx['station']]['name'], ctx['sts'].strftime("%-d %b %Y"), ctx['ets'].strftime("%-d %b %Y"))) bars = ax[1].bar(df['valid'].values, df['change'].values, fc='b', ec='b') for mybar in bars: if mybar.get_y() < 0: mybar.set_facecolor('r') mybar.set_edgecolor('r') ax[1].set_ylabel("Soil Water Change [inch]") ax[1].xaxis.set_major_formatter(mdates.DateFormatter('%-d %b\n%Y')) interval = int(len(df.index) / 7 + 1) ax[1].xaxis.set_major_locator(mdates.DayLocator(interval=interval)) ax[1].grid(True) if (ctx['ets'] - ctx['sts']).total_seconds() < (60 * 86400): ylim = ax[1].get_ylim()[1] # Attempt to place precip text above this plot pdf['pday'] = distance(pdf['rain_mm_tot_qc'].values, 'MM').value('IN') for valid, row in pdf.iterrows(): if row['pday'] > 0: ax[1].text(valid, ylim, "%.2f" % (row['pday'], ), rotation=90, va='bottom', color='b') ax[1].text(-0.01, 1.05, "Rain ->", ha='right', transform=ax[1].transAxes, color='b') ax[0].set_xlim(df['valid'].min() - oneday, df['valid'].max() + oneday) return fig, df
def plot2(ctx): """Just soil temps""" df = read_sql("""SELECT * from sm_hourly WHERE station = %s and valid BETWEEN %s and %s ORDER by valid ASC """, ctx['pgconn'], params=(ctx['station'], ctx['sts'], ctx['ets']), index_col='valid') d12t = df['t12_c_avg_qc'] d24t = df['t24_c_avg_qc'] d50t = df['t50_c_avg_qc'] tsoil = df['tsoil_c_avg_qc'] valid = df.index.values # maxy = max([np.max(d12sm), np.max(d24sm), np.max(d50sm)]) # miny = min([np.min(d12sm), np.min(d24sm), np.min(d50sm)]) (fig, ax) = plt.subplots(1, 1) ax.grid(True) ax.set_title(("ISUSM Station: %s Timeseries\n" "Soil Temperature at Depth\n ") % (ctx['nt'].sts[ctx['station']]['name'], )) ax.plot(valid, temperature(tsoil, 'C').value('F'), linewidth=2, color='brown', label='4 inch') if not d12t.isnull().any(): ax.plot(valid, temperature(d12t, 'C').value('F'), linewidth=2, color='r', label='12 inch') if not d24t.isnull().any(): ax.plot(valid, temperature(d24t, 'C').value('F'), linewidth=2, color='purple', label='24 inch') if not d50t.isnull().any(): ax.plot(valid, temperature(d50t, 'C').value('F'), linewidth=2, color='black', label='50 inch') box = ax.get_position() ax.set_position([box.x0, box.y0, box.width, box.height * 0.9]) ax.legend(bbox_to_anchor=(0.5, 1.02), ncol=4, loc='center', fontsize=12) days = (ctx['ets'] - ctx['sts']).days if days >= 3: interval = max(int(days / 7), 1) ax.xaxis.set_major_locator( mdates.DayLocator(interval=interval, tz=pytz.timezone("America/Chicago"))) ax.xaxis.set_major_formatter( mdates.DateFormatter('%-d %b\n%Y', tz=pytz.timezone("America/Chicago"))) else: ax.xaxis.set_major_locator( mdates.AutoDateLocator(maxticks=10, tz=pytz.timezone("America/Chicago"))) ax.xaxis.set_major_formatter( mdates.DateFormatter('%-I %p\n%d %b', tz=pytz.timezone("America/Chicago"))) if ax.get_ylim()[0] < 40: ax.axhline(32, linestyle='--', lw=2, color='tan') ax.set_ylabel(r"Temperature $^\circ$F") return fig, df
def plot1(ctx): """Do main plotting logic""" df = read_sql(""" SELECT * from sm_hourly WHERE station = %s and valid BETWEEN %s and %s ORDER by valid ASC """, ctx['pgconn'], params=(ctx['station'], ctx['sts'], ctx['ets']), index_col='valid') if df.empty: raise ValueError("No Data Found for This Plot.") slrkw = df['slrkw_avg_qc'] d12sm = df['calc_vwc_12_avg_qc'] d12t = df['t12_c_avg_qc'] d24t = df['t24_c_avg_qc'] d50t = df['t50_c_avg_qc'] d24sm = df['calc_vwc_24_avg_qc'] d50sm = df['calc_vwc_50_avg_qc'] rain = df['rain_mm_tot_qc'] tair = df['tair_c_avg_qc'] tsoil = df['tsoil_c_avg_qc'] valid = df.index.values (fig, ax) = plt.subplots(3, 1, sharex=True, figsize=(8, 8)) ax[0].grid(True) ax2 = ax[0].twinx() ax[0].set_zorder(ax2.get_zorder() + 1) ax[0].patch.set_visible(False) # arange leads to funky values ax2.set_yticks([-0.6, -0.5, -0.4, -0.3, -0.2, -0.1, 0]) ax2.set_yticklabels([0.6, 0.5, 0.4, 0.3, 0.2, 0.1, 0]) ax2.set_ylim(-0.6, 0) ax2.set_ylabel("Hourly Precipitation [inch]") b1 = ax2.bar(valid, 0 - rain / 25.4, width=0.04, fc='b', ec='b', zorder=4) l1 = None l2 = None l3 = None if not d12sm.isnull().all(): l1, = ax[0].plot(valid, d12sm * 100.0, linewidth=2, color='r', zorder=5) if not d24sm.isnull().all(): l2, = ax[0].plot(valid, d24sm * 100.0, linewidth=2, color='purple', zorder=5) if not d50sm.isnull().all(): l3, = ax[0].plot(valid, d50sm * 100.0, linewidth=2, color='black', zorder=5) ax[0].set_ylabel("Volumetric Soil Water Content [%]", fontsize=10) days = (ctx['ets'] - ctx['sts']).days if days >= 3: interval = max(int(days / 7), 1) ax[0].xaxis.set_major_locator( mdates.DayLocator(interval=interval, tz=pytz.timezone("America/Chicago"))) ax[0].xaxis.set_major_formatter( mdates.DateFormatter('%-d %b\n%Y', tz=pytz.timezone("America/Chicago"))) else: ax[0].xaxis.set_major_locator( mdates.AutoDateLocator(maxticks=10, tz=pytz.timezone("America/Chicago"))) ax[0].xaxis.set_major_formatter( mdates.DateFormatter('%-I %p\n%d %b', tz=pytz.timezone("America/Chicago"))) ax[0].set_title(("ISUSM Station: %s Timeseries") % (ctx['nt'].sts[ctx['station']]['name'], )) box = ax[0].get_position() ax[0].set_position( [box.x0, box.y0 + box.height * 0.05, box.width, box.height * 0.95]) box = ax2.get_position() ax2.set_position( [box.x0, box.y0 + box.height * 0.05, box.width, box.height * 0.95]) if None not in [l1, l2, l3]: ax[0].legend([l1, l2, l3, b1], ['12 inch', '24 inch', '50 inch', 'Hourly Precip'], bbox_to_anchor=(0.5, -0.15), ncol=4, loc='center', fontsize=12) # ---------------------------------------- if not d12t.isnull().all(): ax[1].plot(valid, temperature(d12t, 'C').value('F'), linewidth=2, color='r', label='12in') if not d24t.isnull().all(): ax[1].plot(valid, temperature(d24t, 'C').value('F'), linewidth=2, color='purple', label='24in') if not d50t.isnull().all(): ax[1].plot(valid, temperature(d50t, 'C').value('F'), linewidth=2, color='black', label='50in') ax[1].grid(True) ax[1].set_ylabel(r"Temperature $^\circ$F") box = ax[1].get_position() ax[1].set_position( [box.x0, box.y0 + box.height * 0.05, box.width, box.height * 0.95]) # ------------------------------------------------------ ax2 = ax[2].twinx() l3, = ax2.plot(valid, slrkw, color='g', zorder=1, lw=2) ax2.set_ylabel("Solar Radiation [W/m^2]", color='g') l1, = ax[2].plot(valid, temperature(tair, 'C').value('F'), linewidth=2, color='blue', zorder=2) l2, = ax[2].plot(valid, temperature(tsoil, 'C').value('F'), linewidth=2, color='brown', zorder=2) ax[2].grid(True) ax[2].legend([l1, l2, l3], ['Air', '4" Soil', 'Solar Radiation'], bbox_to_anchor=(0.5, 1.1), loc='center', ncol=3) ax[2].set_ylabel(r"Temperature $^\circ$F") ax[2].set_zorder(ax2.get_zorder() + 1) ax[2].patch.set_visible(False) ax[0].set_xlim(df.index.min(), df.index.max()) return fig, df
def plotter(fdict): """ Go """ pgconn = util.get_dbconn('coop') ctx = util.get_autoplot_context(fdict, get_description()) station = ctx['station'] table = "alldata_%s" % (station[:2], ) df = read_sql(""" with data as ( select day, high, year, rank() OVER (PARTITION by high ORDER by sday DESC) from """ + table + """ where station = %s) SELECT day, year, high, rank from data WHERE rank = 1 ORDER by high DESC, day DESC """, pgconn, params=(station, ), index_col=None) if df.empty: raise NoDataFound("No data found!") (fig, ax) = plt.subplots(1, 1, figsize=(6, 8)) current = { 'd2000': datetime.date(2000, 1, 1), 'date': datetime.date(2000, 1, 1), 'ties': False } x = [] y = [] for level in np.arange(df['high'].max(), 0, -1): if level not in df['high']: continue df2 = df[df['high'] == level] row = df2.iloc[0] if row['day'].replace(year=2000) > current['d2000']: current['d2000'] = row['day'].replace(year=2000) current['date'] = row['day'] current['ties'] = (len(df2.index) > 1) if current['date'].month == 12 and current['date'].day > 20: break y.append(level) x.append(int(current['d2000'].strftime("%j"))) ax.text(x[-1] + 3, level, "%s -- %s %s%s" % (level, current['d2000'].strftime("%-d %b"), current['date'].year, " **" if current['ties'] else ""), va='center') ax.barh(y, x, align='center') ax.set_xticks((1, 32, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 365)) ax.set_xticklabels(calendar.month_abbr[1:]) ax.set_xlim(min(x) - 5, 400) ax.set_ylim(y[-1] - 1, y[0] + 1) ax.grid(True) ab = ctx['_nt'].sts[station]['archive_begin'] if ab is None: raise NoDataFound("Unknown station metadata.") ax.set_title( ("Most Recent & Latest Date of High Temperature\n" "[%s] %s (%s-%s)") % (station, ctx['_nt'].sts[station]['name'], ab.year, datetime.date.today().year)) ax.set_ylabel(r"High Temperature $^\circ$F") ax.set_xlabel("** denotes ties") return fig, df
def plotter(fdict): """ Go """ import matplotlib matplotlib.use('agg') import matplotlib.pyplot as plt pgconn = psycopg2.connect(database='coop', host='iemdb', user='******') station = fdict.get('station', 'IA2203') season = fdict.get('season', 'fall') table = "alldata_%s" % (station[:2],) nt = network.Table("%sCLIMATE" % (station[:2],)) df = read_sql(""" WITH obs as ( SELECT day, year, month, high, low, case when month > 6 then 'fall' else 'spring' end as season from """ + table + """ WHERE station = %s), data as ( SELECT year, day, season, max(high) OVER (PARTITION by year, season ORDER by day ASC ROWS BETWEEN 183 PRECEDING and CURRENT ROW) as mh, min(low) OVER (PARTITION by year, season ORDER by day ASC ROWS BETWEEN 183 PRECEDING and CURRENT ROW) as ml from obs), lows as ( SELECT year, day, ml as level, season, rank() OVER (PARTITION by year, ml ORDER by day ASC) from data WHERE season = 'fall'), highs as ( SELECT year, day, mh as level, season, rank() OVER (PARTITION by year, mh ORDER by day ASC) from data WHERE season = 'spring') (SELECT year, day, extract(doy from day) as doy, level, season from lows WHERE rank = 1) UNION (SELECT year, day, extract(doy from day) as doy, level, season from highs WHERE rank = 1) """, pgconn, params=[station]) df2 = df[df['season'] == season] (fig, ax) = plt.subplots(3, 1, figsize=(7, 10)) dyear = df2.groupby(['year']).count() ax[0].bar(dyear.index, dyear['level'], facecolor='tan', edgecolor='tan') ax[0].axhline(dyear['level'].mean(), lw=2) ax[0].set_ylabel("Yearly Events Avg: %.1f" % (dyear['level'].mean(), )) ax[0].set_xlim(dyear.index.min()-1, dyear.index.max()+1) title = "%s Steps %s" % (PDICT[season], "Down" if season == 'fall' else 'Up') ax[0].set_title("%s [%s]\n%s in Temperature" % (nt.sts[station]['name'], station, title)) ax[0].grid(True) ax[1].hist(np.array(df2['level'], 'f'), bins=np.arange(df2['level'].min(), df2['level'].max()+1, 2), normed=True, facecolor='tan') ax[1].set_ylabel("Probability Density") ax[1].axvline(32, lw=2) ax[1].grid(True) ax[1].set_xlabel("Temperature $^\circ$F, 32 degrees highlighted") ax[2].hist(np.array(df2['doy'], 'f'), bins=np.arange(df2['doy'].min(), df2['doy'].max()+1, 3), normed=True, facecolor='tan') ax[2].set_xticks((1, 32, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 365)) ax[2].set_xticklabels(calendar.month_abbr[1:]) ax[2].set_xlim(df2['doy'].min() - 3, df2['doy'].max() + 3) ax[2].set_ylabel("Probability Density") ax[2].grid(True) ax[2].set_xlabel("Day of Year, 3 Day Bins") return fig, df
def plotter(fdict): """ Go """ pgconn = get_dbconn('coop') ctx = get_autoplot_context(fdict, get_description()) station = ctx['station'] network = ctx['network'] year1 = ctx.get('year1') year2 = ctx.get('year2') year3 = ctx.get('year3') nt = NetworkTable(network) table = "alldata_%s" % (station[:2], ) df = read_sql(""" WITH years as (SELECT distinct year from """ + table + """ WHERE station = %s and sday = '0101') SELECT day, sday, year, precip, sum(precip) OVER (PARTITION by year ORDER by day ASC) as accum from """ + table + """ WHERE station = %s and year in (select year from years) ORDER by day ASC """, pgconn, params=(station, station), index_col='day') if df.empty: raise ValueError("No data found!") (fig, ax) = plt.subplots(1, 1) # Average jday = df[['sday', 'accum']].groupby('sday').mean() ax.plot(range(1, len(jday.index) + 1), jday['accum'], lw=2, zorder=5, color='k', label='Average - %.2f' % (jday['accum'].iloc[-1], )) # Min and Max jmin = df[['sday', 'accum']].groupby('sday').min() jmax = df[['sday', 'accum']].groupby('sday').max() ax.fill_between(range(1, len(jday.index) + 1), jmin['accum'], jmax['accum'], zorder=2, color='tan') # find max year plotted = [] for year, color in zip([ df['accum'].idxmax().year, df[df['sday'] == '1231']['accum'].idxmin().year, year1, year2, year3 ], ['b', 'brown', 'r', 'g', 'purple']): if year is None or year in plotted: continue plotted.append(year) df2 = df[df['year'] == year] ax.plot(range(1, len(df2.index) + 1), df2['accum'], label='%s - %.2f' % (year, df2['accum'].iloc[-1]), color=color, lw=2) ax.set_title( ("Year to Date Accumulated Precipitation\n" "[%s] %s (%s-%s)") % (station, nt.sts[station]['name'], nt.sts[station]['archive_begin'].year, datetime.date.today().year)) ax.set_ylabel("Precipitation [inch]") ax.grid(True) ax.legend(loc=2) ax.set_xlim(1, 366) ax.set_xticks((1, 32, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 365)) ax.set_xticklabels(calendar.month_abbr[1:]) return fig, df
captured = """ select a.column::date as "Date" ,b.Column2 as "RefNumber" ,round(a.column::numeric, 2) as "Amount" from schema.table a inner join schema2.table2 b on a.column1 = b.column2 where a.column = 'keyword' and (a.datecolumn::date >= date_trunc('month', current_date - interval '1' month) and a.datecolumna::date < date_trunc('month', current_date)) order by 1 asc, 2 asc limit 5000 """ #create variable DF from the SQL results df = read_sql(captured, conn, coerce_float=True) #show what's in DF print(df) #write sheet LastMonth Captured in the above created excel file and add results stored in DF df.to_excel(writer, sheet_name=capturedsheet, index=False) #Sales sheet SQL code sales = """ select date_column::date as "Date" ,column_Ref_number as "RefNumber" ,Column_amount::numeric as "Amount" from schema.table where column_ID = 'ID number' and date_column::date >= date_trunc('month', current_date - interval '1' month)
import matplotlib as mpl mpl.use('Agg') mpl.rcParams['figure.dpi'] = 150 import matplotlib.pyplot as plt import numpy as np import tool.acqconv from tqdm import tqdm if len(sys.argv) < 3: sys.exit(1) con = pg.connect("dbname=eusospb_data user=eusospb password=1e20eVjemeuso host=localhost") dataframe_9ec = psql.read_sql( "SELECT " \ "event_id, global_gtu, gtu_in_packet, packet_id, num_triggered_pixels, source_file_acquisition_full, x_y_active_pixels_num "\ "FROM spb_processing_event_ver2 " \ "WHERE source_file_acquisition LIKE 'allpackets-SPBEUSO-ACQUISITION-2017%' AND source_file_acquisition NOT LIKE '%sqz.root' AND " \ " x_y_active_pixels_num > 2048 ORDER BY x_y_active_pixels_num DESC LIMIT {limit:d} OFFSET {offset:d}".format(limit=int(sys.argv[1]),offset=int(sys.argv[2])),con) #dataframe_9ec.head(100) i=0 background_template_event_ids = [] plot_displayed = False with open('bg_tmp_analysis_log_limit_{}_offset_{}.txt'.format(sys.argv[1],sys.argv[2]),"w") as logfile: i=0 ax_i = 0 for idx, row in tqdm(dataframe_9ec.iterrows()):
def recuperaDocumentos(regionais): for sigla_trt in regionais: # sigla_trt='07' print( "----------------------------------------------------------------------------" ) print('Recuperando TODOS os documentos nao sigilosos para o TRT ' + sigla_trt) porta = '5' + sigla_trt + '2' nome_arquivo_documentos_selecionados = 'TRT_' + sigla_trt + '_2G_2010-2019_documentosSelecionados.csv' try: # ----------------------------------------------------------------------------------------------------------------------- # Recupera os documentos conn = psycopg2.connect(dbname=dbname_2g, user=userbugfix, password=senhabugfix, host=ipbugfix, port=porta) # conn = psycopg2.connect(dbname='pje_2grau_consulta', user=userbugfix, password=senhabugfix, host=ipbugfix, port=porta) start_time = time.time() sql_count = """select max(id_processo_documento) from tb_processo_documento""" total_registros = (psql.read_sql(sql_count, conn)) total_registros = total_registros['max'][0] print( 'Encontrados ' + str(total_registros) + ' documentos no total na tabela tb_processo_documento do TRT ' + sigla_trt) if os.path.isfile(path + nome_arquivo_documentos_selecionados): os.remove(path + nome_arquivo_documentos_selecionados) chunk_size = 1000 offset = 1000 dfs = [] while True: # for i in range(1,5): sql = sql_original + " and doc.id_processo_documento > %d and doc.id_processo_documento < %d limit %d " % ( offset - chunk_size, offset, chunk_size) dfs.append(psql.read_sql(sql, conn)) if offset == 1000: print('Primeiros dados recuperados ...' + sql[-100:]) dfs[-1].to_csv(path + nome_arquivo_documentos_selecionados, mode='a', header=True, quoting=csv.QUOTE_ALL) a = dfs[-1] else: dfs[-1].to_csv(path + nome_arquivo_documentos_selecionados, mode='a', header=False, quoting=csv.QUOTE_ALL) offset += chunk_size if offset > total_registros + chunk_size: print('Ultimo sql executado ...' + sql[-100:]) print('Dados recuperados com sucesso.') break total_time = time.time() - start_time print('\nTempo para recuperar dados: ' + str(timedelta(seconds=(total_time)))) except Exception as e: print("\033[91mNão foi possível se conectar na base do TRT " + sigla_trt + "\033[0m") print(e) continue
def get_data(ctx): """ Get the data""" days = ctx['days'] varname = ctx['var'] offset = 6 if varname.startswith('coldest') else 0 station = ctx['station'] if ctx['network'].endswith("CLIMATE"): table = "alldata_%s" % (station[:2], ) pgconn = get_dbconn('coop') highcol = "high" lowcol = "low" precipcol = "precip" stationcol = "station" else: station = ctx['_nt'].sts[station]['iemid'] pgconn = get_dbconn('iem') highcol = "max_tmpf" lowcol = "min_tmpf" precipcol = "pday" table = "summary" stationcol = "iemid" df = read_sql(""" WITH data as ( SELECT day, extract(year from day + '%s months'::interval) as season, avg((""" + highcol + """ + """ + lowcol + """)/2.) OVER (ORDER by day ASC ROWS %s preceding) as avg_temp, avg(""" + highcol + """) OVER (ORDER by day ASC ROWS %s preceding) as avg_hitemp, avg(""" + lowcol + """) OVER (ORDER by day ASC ROWS %s preceding) as avg_lotemp, sum(""" + precipcol + """) OVER (ORDER by day ASC ROWS %s preceding) as sum_precip from """+table+""" WHERE """ + stationcol + """ = %s), agg1 as ( SELECT season, day, avg_temp, avg_hitemp, avg_lotemp, sum_precip, rank() OVER (PARTITION by season ORDER by avg_temp ASC) as coldest_temp_rank, rank() OVER (PARTITION by season ORDER by avg_hitemp ASC) as coldest_hitemp_rank, rank() OVER (PARTITION by season ORDER by avg_lotemp ASC) as coldest_lotemp_rank, rank() OVER (PARTITION by season ORDER by avg_temp DESC) as warmest_temp_rank, rank() OVER (PARTITION by season ORDER by avg_hitemp DESC) as warmest_hitemp_rank, rank() OVER (PARTITION by season ORDER by avg_lotemp DESC) as warmest_lotemp_rank, rank() OVER (PARTITION by season ORDER by sum_precip DESC) as wettest_rank, count(*) OVER (PARTITION by season) from data) SELECT season, day, extract(doy from day - '%s days'::interval)::int as doy, avg_temp, avg_hitemp, avg_lotemp, sum_precip from agg1 where """+varname+"""_rank = 1 and count > 240 """, pgconn, params=(offset, days - 1, days - 1, days - 1, days - 1, station, days - 1), index_col='season') if varname.startswith('coldest'): df.loc[df['doy'] < 183, 'doy'] += 365. return df
import psycopg2 import sys from pandas.io.sql import read_sql pgconn = psycopg2.connect(database='idep', host='iemdb', user='******') df = read_sql(""" with yearly as ( SELECT huc_12, extract(year from valid) as yr, sum(avg_loss) from results_by_huc12 where scenario = 0 GROUP by huc_12, yr), agg as ( SELECT huc_12, avg(sum) from yearly GROUP by huc_12) SELECT st_x(st_centroid(st_transform(geom, 4326))), st_y(st_centroid(st_transform(geom, 4326))), a.huc_12, avg from huc12 h JOIN agg a on (h.huc_12 = a.huc_12) WHERE h.states ~* 'IA' ORDER by avg DESC """, pgconn, index_col='huc_12') pgconn = psycopg2.connect(database='postgis', host='iemdb', user='******') cursor = pgconn.cursor() DONE = [] for i, row in df.iterrows(): cursor.execute("""SELECT name from ugcs where end_ts is null and state = 'IA' and substr(ugc, 3, 1) = 'C' and ST_Contains(geom, St_SetSRID(ST_Point(%s, %s), 4326)) """, (row['st_x'], row['st_y'])) name = cursor.fetchone()[0] if name not in DONE: print name
def plotter(fdict): """ Go """ import matplotlib matplotlib.use('agg') import matplotlib.pyplot as plt pgconn = psycopg2.connect(database='iem', host='iemdb', user='******') ctx = get_autoplot_context(fdict, get_description()) sts = ctx['sts'] ets = ctx['ets'] varname = ctx['var'] df = read_sql(""" WITH data as ( SELECT valid, high - high_normal as high_delta, low - low_normal as low_delta, precip, snow from cli_data where valid >= %s and valid <= %s and substr(station, 1, 1) = 'K' ) SELECT valid, sum(case when high_delta > 0 then 1 else 0 end) as high_above, sum(case when high_delta = 0 then 1 else 0 end) as high_equal, sum(case when high_delta < 0 then 1 else 0 end) as high_below, sum(case when low_delta > 0 then 1 else 0 end) as low_above, sum(case when low_delta = 0 then 1 else 0 end) as low_equal, sum(case when low_delta < 0 then 1 else 0 end) as low_below, sum(case when precip > 0 then 1 else 0 end) as precip_above, sum(case when precip = 0 then 1 else 0 end) as precip_below, sum(case when snow > 0 then 1 else 0 end) as snow_above, sum(case when snow = 0 then 1 else 0 end) as snow_below from data GROUP by valid ORDER by valid ASC """, pgconn, params=(sts, ets), index_col='valid') if len(df.index) == 0: return 'Error, no results returned!' for v in ['precip', 'snow']: if varname == v: xlabel = "<-- No %s %% | %s %% -->" % (v.capitalize(), v.capitalize()) df[v + '_count'] = df[v + '_above'] + df[v + '_below'] colors = ['r', 'b'] for v in ['high', 'low']: df[v + '_count'] = (df[v + '_above'] + df[v + '_below'] + df[v + '_equal']) if varname == v: xlabel = "<-- Below Average % | Above Average % -->" colors = ['b', 'r'] (fig, ax) = plt.subplots(1, 1) ax.barh(df.index.values, 0 - (df[varname + '_below'] / df[varname + '_count'] * 100.), fc=colors[0], ec=colors[0], align='center') ax.barh(df.index.values, df[varname + '_above'] / df[varname + '_count'] * 100., fc=colors[1], ec=colors[1], align='center') ax.set_xlim(-100, 100) ax.grid(True) ax.set_title( ("Percentage of CONUS NWS First Order CLImate Sites\n" "(%s - %s) %s") % (sts.strftime("%-d %b %Y"), ets.strftime("%-d %b %Y"), MDICT.get(varname))) ax.set_xlabel(xlabel) ticks = [-100, -90, -75, -50, -25, -10, 0, 10, 25, 50, 75, 90, 100] ax.set_xticks(ticks) ax.set_xticklabels([abs(x) for x in ticks]) plt.setp(ax.get_xticklabels(), rotation=30) ax.set_position([0.2, 0.15, 0.75, 0.75]) return fig, df
def plotter(fdict): """ Go """ pgconn = get_dbconn('postgis') ctx = get_autoplot_context(fdict, get_description()) sts = ctx['sdate'] sts = sts.replace(tzinfo=pytz.UTC) ets = ctx['edate'] ets = ets.replace(tzinfo=pytz.UTC) p1 = ctx['phenomenav1'] p2 = ctx['phenomenav2'] p3 = ctx['phenomenav3'] p4 = ctx['phenomenav4'] phenomena = [] for p in [p1, p2, p3, p4]: if p is not None: phenomena.append(p[:2]) s1 = ctx['significancev1'] s2 = ctx['significancev2'] s3 = ctx['significancev3'] s4 = ctx['significancev4'] significance = [] for s in [s1, s2, s3, s4]: if s is not None: significance.append(s[0]) pstr = [] subtitle = "" title = "" for p, s in zip(phenomena, significance): pstr.append("(phenomena = '%s' and significance = '%s')" % (p, s)) subtitle += "%s.%s " % (p, s) title += vtec.get_ps_string(p, s) if len(phenomena) > 1: title = "VTEC Unique Event" pstr = " or ".join(pstr) pstr = "(%s)" % (pstr, ) df = read_sql(""" with total as ( select distinct wfo, extract(year from issue at time zone 'UTC') as year, phenomena, significance, eventid from warnings where """ + pstr + """ and issue >= %s and issue < %s ) SELECT wfo, phenomena, significance, year, count(*) from total GROUP by wfo, phenomena, significance, year """, pgconn, params=(sts, ets)) df2 = df.groupby('wfo')['count'].sum() nt = NetworkTable("WFO") for sid in nt.sts: sid = sid[-3:] if sid not in df2: df2[sid] = 0 maxv = df2.max() bins = [0, 1, 2, 3, 5, 10, 15, 20, 25, 30, 40, 50, 75, 100, 200] if maxv > 200: bins = [ 0, 1, 3, 5, 10, 20, 35, 50, 75, 100, 150, 200, 250, 500, 750, 1000 ] elif maxv > 1000: bins = [ 0, 1, 5, 10, 50, 100, 150, 200, 250, 500, 750, 1000, 1250, 1500, 2000 ] mp = MapPlot(sector='nws', axisbg='white', title='%s Counts by NWS Office' % (title, ), subtitle=('Valid %s - %s UTC, based on VTEC: %s') % (sts.strftime("%d %b %Y %H:%M"), ets.strftime("%d %b %Y %H:%M"), subtitle)) mp.fill_cwas(df2, bins=bins, ilabel=True) return mp.fig, df
def plotter(fdict): """ Go """ pgconn = util.get_dbconn('coop') today = datetime.date.today() ctx = util.get_autoplot_context(fdict, get_description()) station = ctx['station'] threshold = ctx['threshold'] month1 = ctx['month1'] varname1 = ctx['var1'] num1 = min([12, ctx['num1']]) month2 = ctx['month2'] varname2 = ctx['var2'] num2 = min([12, ctx['num2']]) months1, offsets1 = compute_months_and_offsets(month1, num1) months2, offsets2 = compute_months_and_offsets(month2, num2) table = "alldata_%s" % (station[:2],) nt = network.Table("%sCLIMATE" % (station[:2],)) # Compute the monthly totals df = read_sql(""" SELECT year, month, avg((high+low)/2.) as avg_temp, sum(precip) as total_precip, max(high) as max_high, min(low) as min_low, sum(case when high >= %s then 1 else 0 end) as days_high_aoa, sum(cdd(high, low, 65)) as cdd65, sum(hdd(high, low, 65)) as hdd65, sum(gddxx(32, 86, high, low)) as gdd32, sum(gddxx(41, 86, high, low)) as gdd41, sum(gddxx(46, 86, high, low)) as gdd46, sum(gddxx(48, 86, high, low)) as gdd48, sum(gddxx(50, 86, high, low)) as gdd50, sum(gddxx(51, 86, high, low)) as gdd51, sum(gddxx(52, 86, high, low)) as gdd52 from """+table+""" WHERE station = %s and day < %s GROUP by year, month """, pgconn, params=(threshold, station, today.replace(day=1)), index_col='year') xdf = combine(df, months1, offsets1) ydf = combine(df, months2, offsets2) if xdf.empty or ydf.empty: raise NoDataFound("Sorry, could not find data.") resdf = pd.DataFrame({"%s_1" % (varname1, ): xdf[varname1], "%s_2" % (varname2, ): ydf[varname2]}) resdf.dropna(inplace=True) (fig, ax) = plt.subplots(1, 1, figsize=(8, 6)) ax.scatter(resdf[varname1+"_1"], resdf[varname2+"_2"], marker='s', facecolor='b', edgecolor='b', label=None, zorder=3) ax.set_title(("%s-%s %s [%s]\n" "Comparison of Monthly Periods, Quadrant Frequency Labelled" ) % (resdf.index.min(), resdf.index.max(), nt.sts[station]['name'], station)) ax.grid(True) h_slope, intercept, r_value, _, _ = stats.linregress(resdf[varname1+"_1"], resdf[varname2+"_2"]) y = h_slope * np.arange(resdf[varname1+"_1"].min(), resdf[varname1+"_1"].max()) + intercept ax.plot(np.arange(resdf[varname1+"_1"].min(), resdf[varname1+"_1"].max()), y, lw=2, color='r', label="Slope=%.2f R$^2$=%.2f" % (h_slope, r_value ** 2)) ax.legend(fontsize=10) xmonths = ", ".join([calendar.month_abbr[x] for x in months1]) ymonths = ", ".join([calendar.month_abbr[x] for x in months2]) t1 = "" if varname1 not in ['days_high_aoa', ] else " %.0f" % (threshold,) t2 = "" if varname2 not in ['days_high_aoa', ] else " %.0f" % (threshold,) x = resdf["%s_1" % (varname1, )].mean() y = resdf["%s_2" % (varname2, )].mean() ax.set_xlabel("%s\n%s%s [%s], Avg: %.1f" % (xmonths, PDICT[varname1], t1, UNITS[varname1], x), fontsize=12) ax.set_ylabel("%s\n%s%s [%s], Avg: %.1f" % (ymonths, PDICT[varname2], t2, UNITS[varname2], y), fontsize=12) box = ax.get_position() ax.set_position([box.x0, box.y0 + box.height * 0.05, box.width, box.height * 0.95]) ax.axhline(y, linestyle='--', color='g') ax.axvline(x, linestyle='--', color='g') ur = len(resdf[(resdf["%s_1" % (varname1, )] >= x) & (resdf["%s_2" % (varname2, )] >= y)].index) ax.text(0.95, 0.75, "%s (%.1f%%)" % (ur, ur / float(len(resdf.index)) * 100.), color='tan', fontsize=24, transform=ax.transAxes, ha='right', zorder=2) lr = len(resdf[(resdf["%s_1" % (varname1, )] >= x) & (resdf["%s_2" % (varname2, )] < y)].index) ax.text(0.95, 0.25, "%s (%.1f%%)" % (lr, lr / float(len(resdf.index)) * 100.), color='tan', fontsize=24, transform=ax.transAxes, ha='right', zorder=2) ll = len(resdf[(resdf["%s_1" % (varname1, )] < x) & (resdf["%s_2" % (varname2, )] < y)].index) ax.text(0.05, 0.25, "%s (%.1f%%)" % (ll, ll / float(len(resdf.index)) * 100.), color='tan', fontsize=24, transform=ax.transAxes, ha='left', zorder=2) ul = len(resdf[(resdf["%s_1" % (varname1, )] < x) & (resdf["%s_2" % (varname2, )] >= y)].index) ax.text(0.05, 0.75, "%s (%.1f%%)" % (ul, ul / float(len(resdf.index)) * 100.), color='tan', fontsize=24, transform=ax.transAxes, ha='left', zorder=2) return fig, resdf
def Plot(self): """Plots image and saves image""" try: os.remove('/tmp/plot.png') except: pass if self.stype == 'plot': try: conn = connectDB() cursor = conn.cursor() cursor.execute(self.sql) data = pd.DataFrame( cursor.fetchall(), columns=['gb', 'date_trunc', 'workflow', 'type']) except TypeError as e: LOGGER.error(str(e)) return {'error': str(e)} if data.empty is not True: data.sort_values(["date_trunc", "workflow"], ascending=[False, True]) data.date_trunc = data.date_trunc.dt.date data['gb'] = data['gb'].astype(float) gdata = data.groupby(['date_trunc', 'type', 'workflow'])['gb'].sum() subsets = gdata.unstack(level=0).\ unstack(level=1).unstack(level=2) cleansubset = subsets.dropna() x_offset = -0.01 y_offset = -0.06 height = len(gdata) * 3 ax = cleansubset.plot(legend=False, kind='barh', stacked=True, subplots=False, figsize=(32, height), width=0.89, sharey=False) for p in ax.patches: b = p.get_bbox() val = "{:.0f}".format(b.x1 - b.x0) ax.annotate(val, ((b.x0 + b.x1) / 2 + x_offset, (b.y1) / (1) + y_offset), verticalalignment='top', horizontalalignment='left') fig = ax.get_figure() fig.savefig('/tmp/plot.png') LOGGER.info('saved image') plot.close(fig) else: LOGGER.info('no results, empty DataFrame') return {'error': 'emptydata, no results'} if self.stype == 'workflowplot': try: data = None conn = connectDB() cursor = conn.cursor() cursor.execute(self.sql) data = pd.DataFrame( cursor.fetchall(), columns=['gb', 'date_trunc', 'workflow', 'type']) except TypeError as e: LOGGER.error(str(e)) return {'error': str(e)} if data.empty is not True: height = 8 width = 16 mean = 5 if self.days >= 30: width = self.days height = 10 mean = 8 if self.days >= 100: width = 72 height = 18 mean = 20 if self.days >= 250: width = 160 height = 24 mean = 60 data.sort_values(["date_trunc", "workflow"], ascending=[False, True]) data.date_trunc = data.date_trunc.dt.date data['gb'] = data['gb'].astype(float) gdata = data.groupby(['date_trunc', 'workflow'])['gb'].sum() # avergae rolling mean over X days gdata_mean = gdata.rolling(mean).mean().fillna(value=0) d = gdata.unstack(level=-1).fillna(value=0) d2 = gdata_mean.unstack(level=-1) plot.style.use('ggplot') fig = plot.figure(figsize=(width, height)) ax = fig.add_subplot() ax2 = ax.twiny() ax2.set_title('Ingest workflow') ax2.set_ylabel("GB", loc='center') d2.plot(legend=True, kind='area', ax=ax, subplots=False, stacked=True, figsize=(width, height), colormap='summer') ax.legend(loc='upper left') d.plot(legend=True, kind='line', ax=ax2, subplots=False, linewidth=5.0, stacked=True, sharey=True, figsize=(width, height)) fig.get_figure() #plot.show() fig.savefig('/tmp/plot.png') LOGGER.info('saved image') plot.close(fig) else: LOGGER.error('no results, empty DataFrame') return {'error': 'emptydata, no results'} if self.stype == 'cpplot': conn = connectDB() df = None df = psql.read_sql(self.sqlcp, conn) LOGGER.info('ran dataFrame SQL read') if df.empty is not True: o = df.groupby(['date_trunc', 'organisation'], sort=False)['gb'].sum() u = o.unstack(level=-1) n = len(u.columns) height = int(n) * 6 ax = u.plot(figsize=(32, height), kind='area', subplots=True, stacked=False, colormap='Accent') #plot.show() plot.savefig('/tmp/plot.png') LOGGER.info('saved image') #plot.close(plot) else: LOGGER.error('no results, empty DataFrame') return {'error': 'emptydata, no results'} try: conn.close() except Exception as e: LOGGER.error(str(e)) pass
def do(date): """Do the necessary work for this date""" pgconn = psycopg2.connect(database='hads', host='iemdb-hads', user='******') iem_pgconn = psycopg2.connect(database='iem', host='iemdb') icursor = iem_pgconn.cursor() # load up the current obs df = read_sql(""" WITH dcp as ( SELECT id, iemid, tzname from stations where network ~* 'DCP' ), obs as ( SELECT iemid, pday from summary_""" + str(date.year) + """ WHERE day = %s) SELECT d.id, d.iemid, d.tzname, o.pday from dcp d LEFT JOIN obs o on (d.iemid = o.iemid) """, iem_pgconn, params=(date, ), index_col='id') bases = {} for tzname in df['tzname'].unique(): ts = datetime.datetime(date.year, date.month, date.day, 12) ts = ts.replace(tzinfo=pytz.timezone("UTC")) base = ts.astimezone(pytz.timezone(tzname)) bases[tzname] = base.replace(hour=0) # retreive data that is within 12 hours of our bounds sts = datetime.datetime(date.year, date.month, date.day) - datetime.timedelta(hours=12) ets = sts + datetime.timedelta(hours=48) obsdf = read_sql(""" SELECT distinct station, valid at time zone 'UTC' as utc_valid, value from raw""" + str(date.year) + """ WHERE valid between %s and %s and substr(key, 1, 3) = 'PPH' and value >= 0 """, pgconn, params=(sts, ets), index_col=None) obsdf['utc_valid'] = obsdf['utc_valid'].dt.tz_localize('utc') precip = np.zeros((24 * 60)) grouped = obsdf.groupby('station') for station in obsdf['station'].unique(): if station not in df.index: continue precip[:] = 0 tz = df.loc[station, 'tzname'] current_pday = df.loc[station, 'pday'] for _, row in grouped.get_group(station).iterrows(): ts = row['utc_valid'].to_datetime() if ts <= bases[tz]: continue t1 = (ts - bases[tz]).total_seconds() / 60. t0 = max([0, t1 - 60.]) precip[t0:t1] = row['value'] / 60. pday = np.sum(precip) if pday > 50 or np.allclose([ pday, ], [ current_pday, ]): # print("Skipping %s %s==%s" % (station, current_pday, # pday)) continue # print("Updating %s old: %s new: %s" % (station, current_pday, pday)) iemid = df.loc[station, 'iemid'] icursor.execute( """UPDATE summary_""" + str(date.year) + """ SET pday = %s WHERE iemid = %s and day = %s """, (pday, iemid, date)) if icursor.rowcount == 0: print("Adding record %s[%s] for day %s" % (station, iemid, date)) icursor.execute( """INSERT into summary_""" + str(date.year) + """ (iemid, day) VALUES (%s, %s) """, (iemid, date)) icursor.execute( """UPDATE summary_""" + str(date.year) + """ SET pday = %s WHERE iemid = %s and day = %s and %s > coalesce(pday, 0) """, (pday, iemid, date, pday)) icursor.close() iem_pgconn.commit()
def plotter(fdict): """ Go """ pgconn = get_dbconn("coop") ctx = get_autoplot_context(fdict, get_description()) station = ctx["station"] ab = ctx["_nt"].sts[station]["archive_begin"] if ab is None: raise NoDataFound("Unknown station metadatab.") syear = max([ctx["syear"], ab.year]) eyear = ctx["eyear"] sts = datetime.date(syear, 11, 1) ets = datetime.date(eyear + 1, 6, 1) table = "alldata_%s" % (station[:2], ) eyear = datetime.datetime.now().year obs = np.ma.ones((eyear - syear + 1, 183), "f") * -1 df = read_sql( """ SELECT year, extract(doy from day) as doy, snowd, day, case when month < 6 then year - 1 else year end as winter_year from """ + table + """ WHERE station = %s and month in (11, 12, 1, 2, 3, 4) and snowd >= 0 and day between %s and %s """, pgconn, params=(station, sts, ets), index_col="day", ) if df.empty: raise NoDataFound("No Data Found.") minyear = df["year"].min() maxyear = df["year"].max() for _, row in df.iterrows(): doy = row["doy"] if row["doy"] < 180 else (row["doy"] - 365) obs[int(row["winter_year"]) - syear, int(doy) + 61] = row["snowd"] obs.mask = np.where(obs < 0, True, False) # obs[obs == 0] = -1 fig = plt.figure(figsize=(8, 8)) ax = fig.add_axes([0.1, 0.1, 0.93, 0.8]) ax.set_xticks((0, 29, 60, 91, 120, 151, 181)) ax.set_xticklabels( ["Nov 1", "Dec 1", "Jan 1", "Feb 1", "Mar 1", "Apr 1", "May 1"]) ax.set_ylabel("Year of Nov,Dec of Season Labeled") ax.set_xlabel("Date of Winter Season") ax.set_title(("[%s] %s\nDaily Snow Depth (%s-%s) [inches]") % (station, ctx["_nt"].sts[station]["name"], minyear, eyear)) cmap = copy.copy(nwssnow()) norm = mpcolors.BoundaryNorm(LEVELS, cmap.N) cmap.set_bad("#EEEEEE") cmap.set_under("white") res = ax.imshow( obs, aspect="auto", rasterized=True, norm=norm, interpolation="nearest", cmap=cmap, extent=[0, 182, eyear + 1 - 0.5, syear - 0.5], ) fig.colorbar(res, spacing="proportional", ticks=LEVELS, extend="max") ax.grid(True) ax.set_ylim(maxyear + 0.5, minyear - 0.5) return fig, df
def plotter(fdict): """ Go """ import matplotlib matplotlib.use('agg') import matplotlib.pyplot as plt pgconn = get_dbconn('coop') ctx = get_autoplot_context(fdict, get_description()) st1 = ctx['st1'][:2] st2 = ctx['st2'][:2] st3 = ctx['st3'][:2] st4 = ctx['st4'][:2] st5 = ctx['st5'][:2] st6 = ctx['st6'][:2] y1 = ctx['y1'] y2 = ctx.get('y2') y3 = ctx.get('y3') y4 = ctx.get('y4') years = [y1, y2, y3, y4] states = [st1, st2, st3, st4, st5, st6] commodity_desc = ctx['commodity_desc'] df = read_sql(""" select week_ending, state_alpha, num_value, unit_desc, year, extract(doy from week_ending) as doy from nass_quickstats where commodity_desc = %s and statisticcat_desc = 'CONDITION' and state_alpha in %s ORDER by week_ending ASC """, pgconn, params=( commodity_desc, tuple(states), ), index_col=None) if df.empty: return "ERROR: No data found!" prop = matplotlib.font_manager.FontProperties(size=10) fig, ax = plt.subplots(3, 2, sharex=True, sharey=True, figsize=(8, 6)) i = 0 for row in range(3): for col in range(2): state = states[i] df2 = df[df['state_alpha'] == state] _years = df2['year'].unique() colors = ['black', 'green', 'blue', 'red'] for year in _years: s = df2[(df2['year'] == year) & ((df2['unit_desc'] == 'PCT POOR') | (df2['unit_desc'] == 'PCT VERY POOR'))] s2 = s[['doy', 'num_value']].groupby(by=['doy']).sum() if year in years: ax[row, col].plot(s2.index.values, s2['num_value'], c=colors.pop(), lw=3, zorder=5, label='%s' % (year, )) else: ax[row, col].plot(s2.index.values, s2['num_value'].values, c='tan') if row == 0 and col == 0: ax[row, col].legend(ncol=5, loc=(0.4, -0.19), prop=prop) ax[row, col].set_xticks( (121, 152, 182, 213, 244, 274, 305, 335, 365)) ax[row, col].set_xticklabels(calendar.month_abbr[5:]) ax[row, col].set_xlim(120, 310) ax[row, col].grid(True) ax[row, col].set_ylim(0, 100) if col == 0: ax[row, col].set_ylabel("Coverage [%]") ax[row, col].text(1., 0.5, "%s" % (state_names[state], ), ha='left', va='center', rotation=-90, size=16, transform=ax[row, col].transAxes) ax[row, col].text(0.02, 0.97, state, ha='left', va='top', size=14, transform=ax[row, col].transAxes) i += 1 fig.text(0.5, .91, ("USDA Weekly %s Crop Condition Report (%.0f-%.0f)\n" "Areal %% of State in Poor & Very Poor Condition " "(thru %s)") % (PDICT2[commodity_desc], df['year'].min(), df['year'].max(), df['week_ending'].max()), ha='center') return fig, df
def plotter(fdict): """ Go """ import matplotlib matplotlib.use('agg') import matplotlib.pyplot as plt pgconn = psycopg2.connect(database='isuag', host='iemdb', user='******') threshold = int(fdict.get('threshold', 50)) threshold_c = temperature(threshold, 'F').value('C') hours1 = int(fdict.get('hours1', 48)) hours2 = int(fdict.get('hours2', 24)) station = fdict.get('station', 'BOOI4') oldstation = XREF[station] df = read_sql(""" with obs as ( select valid, c300, lag(c300) OVER (ORDER by valid ASC) from hourly where station = %s), agg1 as ( select valid, case when c300 > %s and lag < %s then 1 when c300 < %s and lag > %s then -1 else 0 end as t from obs), agg2 as ( SELECT valid, t from agg1 where t != 0), agg3 as ( select valid, lead(valid) OVER (ORDER by valid ASC), t from agg2), agg4 as ( select extract(year from valid) as yr, valid, lead, rank() OVER (PARTITION by extract(year from valid) ORDER by valid ASC) from agg3 where t = 1 and (lead - valid) >= '%s hours'::interval), agg5 as ( select extract(year from valid) as yr, valid, lead from agg3 where t = -1) select f.yr, f.valid as fup, f.lead as flead, d.valid as dup, d.lead as dlead from agg4 f JOIN agg5 d ON (f.yr = d.yr) where f.rank = 1 and d.valid > f.valid ORDER by fup ASC """, pgconn, params=(oldstation, threshold, threshold, threshold, threshold, hours1), index_col=None) df2 = read_sql(""" with obs as ( select valid, tsoil_c_avg, lag(tsoil_c_avg) OVER (ORDER by valid ASC) from sm_hourly where station = %s), agg1 as ( select valid, case when tsoil_c_avg > %s and lag < %s then 1 when tsoil_c_avg < %s and lag > %s then -1 else 0 end as t from obs), agg2 as ( SELECT valid, t from agg1 where t != 0), agg3 as ( select valid, lead(valid) OVER (ORDER by valid ASC), t from agg2), agg4 as ( select extract(year from valid) as yr, valid, lead, rank() OVER (PARTITION by extract(year from valid) ORDER by valid ASC) from agg3 where t = 1 and (lead - valid) >= '%s hours'::interval), agg5 as ( select extract(year from valid) as yr, valid, lead from agg3 where t = -1) select f.yr, f.valid as fup, f.lead as flead, d.valid as dup, d.lead as dlead from agg4 f JOIN agg5 d ON (f.yr = d.yr) where f.rank = 1 and d.valid > f.valid ORDER by fup ASC """, pgconn, params=(station, threshold_c, threshold_c, threshold_c, threshold_c, hours1), index_col=None) (fig, ax) = plt.subplots(1, 1) d2000 = datetime.datetime(2000, 1, 1, 6) d2000 = d2000.replace(tzinfo=pytz.timezone("UTC")) for d in [df, df2]: for _, row in d.iterrows(): if row['dlead'] is None: continue f0 = (row['fup'].replace(year=2000) - d2000).total_seconds() f1 = (row['flead'].replace(year=2000) - d2000).total_seconds() d0 = (row['dup'].replace(year=2000) - d2000).total_seconds() d1 = (row['dlead'].replace(year=2000) - d2000).total_seconds() if d1 < d0: continue ax.barh(row['fup'].year, (f1 - f0), left=f0, facecolor='r', align='center', edgecolor='r') c = 'lightblue' if (d1 - d0) < (hours2 * 3600) else 'b' ax.barh(row['fup'].year, (d1 - d0), left=d0, facecolor=c, align='center', edgecolor=c) xticks = [] xticklabels = [] for i in range(1, 13): d2 = d2000.replace(month=i) xticks.append((d2 - d2000).total_seconds()) xticklabels.append(d2.strftime("%-d %b")) ax.set_xticks(xticks) ax.set_xticklabels(xticklabels) ax.set_xlim(xticks[2], xticks[6]) ax.grid(True) nt = NetworkTable("ISUSM") nt2 = NetworkTable("ISUAG") ax.set_title( ("[%s] %s 4 Inch Soil Temps\n[%s] %s used for pre-%s dates") % (station, nt.sts[station]['name'], oldstation, nt2.sts[oldstation]['name'], nt.sts[station]['archive_begin'].year)) ax.set_ylim(df['yr'].min() - 1, df2['yr'].max() + 1) p0 = plt.Rectangle((0, 0), 1, 1, fc="r") p1 = plt.Rectangle((0, 0), 1, 1, fc="lightblue") p2 = plt.Rectangle((0, 0), 1, 1, fc="b") ax.legend((p0, p1, p2), ('First Period Above %s for %s+ Hours' % (threshold, hours1), 'Below %s for 1+ Hours' % (threshold, ), 'Below %s for %s+ Hours' % (threshold, hours2)), ncol=2, fontsize=11, loc=(0., -0.2)) box = ax.get_position() ax.set_position( [box.x0, box.y0 + box.height * 0.1, box.width, box.height * 0.9]) return fig, df
def plotter(fdict): """ Go """ font0 = FontProperties() font0.set_family('monospace') font0.set_size(16) font1 = FontProperties() font1.set_size(16) pgconn = get_dbconn('asos') ctx = get_autoplot_context(fdict, get_description()) varname = ctx['var'] varname2 = varname.split("_")[1] if varname2 in ['dwpf', 'tmpf']: varname2 = "i" + varname2 month = ctx['month'] network = ctx['network'] station = ctx['zstation'] nt = NetworkTable(network) if month == 'all': months = range(1, 13) elif month == 'fall': months = [9, 10, 11] elif month == 'winter': months = [12, 1, 2] elif month == 'spring': months = [3, 4, 5] elif month == 'summer': months = [6, 7, 8] elif month == 'gs': months = [5, 6, 7, 8, 9] else: ts = datetime.datetime.strptime("2000-" + month + "-01", '%Y-%b-%d') # make sure it is length two for the trick below in SQL months = [ts.month] df = read_sql(""" WITH obs as ( SELECT (valid + '10 minutes'::interval) at time zone %s as ts, tmpf::int as itmpf, dwpf::int as idwpf, mslp, alti from alldata where station = %s and extract(month from valid at time zone %s) in %s), agg1 as ( SELECT extract(hour from ts) as hr, max(idwpf) as max_dwpf, max(itmpf) as max_tmpf, min(idwpf) as min_dwpf, min(itmpf) as min_tmpf, max(alti) as max_alti, min(alti) as min_alti, max(mslp) as max_mslp, min(mslp) as min_mslp from obs GROUP by hr) SELECT o.ts, a.hr::int as hr, a.""" + varname + """ from agg1 a JOIN obs o on (a.hr = extract(hour from o.ts) and a.""" + varname + """ = o.""" + varname2 + """) ORDER by a.hr ASC, o.ts DESC """, pgconn, params=(nt.sts[station]['tzname'], station, nt.sts[station]['tzname'], tuple(months)), index_col=None) y0 = 0.1 yheight = 0.8 dy = (yheight / 24.) (fig, ax) = plt.subplots(1, 1, figsize=(8, 8)) ax.set_position([0.12, y0, 0.57, yheight]) ax.barh(df['hr'], df[varname], align='center') ax.set_ylim(-0.5, 23.5) ax.set_yticks([0, 4, 8, 12, 16, 20]) ax.set_yticklabels(['Mid', '4 AM', '8 AM', 'Noon', '4 PM', '8 PM']) ax.grid(True) ax.set_xlim([df[varname].min() - 5, df[varname].max() + 5]) ax.set_ylabel("Local Time %s" % (nt.sts[station]['tzname'], ), fontproperties=font1) fig.text(0.5, 0.93, ("%s [%s] %s-%s\n" "%s [%s]") % (nt.sts[station]['name'], station, nt.sts[station]['archive_begin'].year, datetime.date.today().year, PDICT[varname], MDICT[month]), ha='center', fontproperties=font1) ypos = y0 + (dy / 2.) for hr in range(24): sdf = df[df['hr'] == hr] if sdf.empty: continue row = sdf.iloc[0] fig.text(0.7, ypos, "%3.0f: %s%s" % (row[varname], row['ts'].strftime("%d %b %Y"), ("*" if len(sdf.index) > 1 else '')), fontproperties=font0, va='center') ypos += dy ax.set_xlabel("%s %s, * denotes ties" % (PDICT[varname], UNITS[varname]), fontproperties=font1) return plt.gcf(), df
def plotter(fdict): """ Go """ pgconn = get_dbconn('postgis') ctx = get_autoplot_context(fdict, get_description()) opt = ctx['opt'] state = ctx['state'][:2] phenomena = ctx['phenomena'] significance = ctx['significance'] station = ctx['station'][:4] nt = NetworkTable('WFO') sts = datetime.datetime(2012, 1, 1) xticks = [] for i in range(1, 13): ts = sts.replace(month=i) xticks.append(int(ts.strftime("%j"))) (fig, ax) = plt.subplots(2, 1, sharex=True) limiter = " wfo = '%s' " % (station, ) title = "[%s] NWS %s" % (station, nt.sts[station]['name']) if opt == 'state': title = "State of %s" % (reference.state_names[state], ) limiter = " substr(ugc, 1, 2) = '%s' " % (state, ) df = read_sql(""" with obs as ( SELECT distinct extract(year from issue) as yr, extract(week from issue) as week, wfo, eventid from warnings WHERE """ + limiter + """ and phenomena = %s and significance = %s ) SELECT yr, week, count(*) from obs GROUP by yr, week ORDER by yr ASC """, pgconn, params=(phenomena, significance), index_col=None) if df.empty: raise ValueError("ERROR: No Results Found!") # Top Panel: count gdf = df.groupby('week').count() ax[0].bar((gdf.index.values - 1) * 7, gdf['yr'], width=7) ax[0].set_title(("%s\n%s (%s.%s) Events - %i to %i") % (title, vtec.get_ps_string(phenomena, significance), phenomena, significance, df['yr'].min(), df['yr'].max())) ax[0].grid() ax[0].set_ylabel("Years with 1+ Event") # Bottom Panel: events gdf = df.groupby('week').sum() ax[1].bar((gdf.index.values - 1) * 7, gdf['count'], width=7) ax[1].set_ylabel("Total Event Count") ax[1].grid() ax[1].set_xlabel("Partitioned by Week of the Year") ax[1].set_xticks(xticks) ax[1].set_xticklabels(calendar.month_abbr[1:]) ax[1].set_xlim(0, 366) return fig, df