def test_to_sql_index_label(self): temp_frame = DataFrame({'col1': range(4)}) # no index name, defaults to 'index' sql.to_sql(temp_frame, 'test_index_label', self.conn) frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn) self.assertEqual(frame.columns[0], 'index') # specifying index_label sql.to_sql(temp_frame, 'test_index_label', self.conn, if_exists='replace', index_label='other_label') frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn) self.assertEqual(frame.columns[0], 'other_label', "Specified index_label not written to database") # using the index name temp_frame.index.name = 'index_name' sql.to_sql(temp_frame, 'test_index_label', self.conn, if_exists='replace') frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn) self.assertEqual(frame.columns[0], 'index_name', "Index name not written to database") # has index name, but specifying index_label sql.to_sql(temp_frame, 'test_index_label', self.conn, if_exists='replace', index_label='other_label') frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn) self.assertEqual(frame.columns[0], 'other_label', "Specified index_label not written to database")
def test_date_parsing(self): # Test date parsing in read_sq # No Parsing df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn, flavor='sqlite') self.assertFalse( issubclass(df.DateCol.dtype.type, np.datetime64), "DateCol loaded with incorrect type") df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn, flavor='sqlite', parse_dates=['DateCol']) self.assertTrue( issubclass(df.DateCol.dtype.type, np.datetime64), "DateCol loaded with incorrect type") df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn, flavor='sqlite', parse_dates={'DateCol': '%Y-%m-%d %H:%M:%S'}) self.assertTrue( issubclass(df.DateCol.dtype.type, np.datetime64), "DateCol loaded with incorrect type") df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn, flavor='sqlite', parse_dates=['IntDateCol']) self.assertTrue(issubclass(df.IntDateCol.dtype.type, np.datetime64), "IntDateCol loaded with incorrect type") df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn, flavor='sqlite', parse_dates={'IntDateCol': 's'}) self.assertTrue(issubclass(df.IntDateCol.dtype.type, np.datetime64), "IntDateCol loaded with incorrect type")
def read(): pd.set_option('display.encoding','utf-8') engine = create_engine('mysql+mysqldb://scrapyuser:[email protected]:3306/testdb?charset=utf8', echo=True) df = sql.read_sql_query("SELECT url, rooms, floor, totfloors, m2, kitchenm2, restm2, " "wc, walls, ceilings, rennovation, builtdate, heating, " "water, balcony, security, " "x(location) as lat, y(location) as lon, price " "FROM realestate " #"WHERE security IS NOT NULL " #"LIMIT 10" ,engine) print df.shape df.loc[(df.balcony == u'есть')|(df.balcony == u'да'),'balcony'] = 1 df.loc[df.balcony == u'нет','balcony'] = -1 df.loc[df.balcony.isnull(),'balcony'] = 0 #df.loc[(df.security == u'есть')|(df.security == u'да'),'security'] = 1 #df.loc[df.security.isnull(),'security'] = 0 # панельный - 1; кирпичный - 2; монолит - 3 #df.loc[(df.walls == u'панельный')|(df.security == u'да'),'security'] = 1 print df df.to_csv('data.csv', sep=',', encoding='utf-8', index=True)
def test_sql_open_close(self): """ Test if the IO in the database still work if the connection is closed between the writing and reading (as in many real situations). """ self._load_test2_data() with tm.ensure_clean() as name: conn = self.connect(name) sql.to_sql( self.test_frame2, "test_frame2_legacy", conn, flavor="sqlite", index=False, ) conn.close() conn = self.connect(name) result = sql.read_sql_query( "SELECT * FROM test_frame2_legacy;", conn, flavor="sqlite", ) conn.close() tm.assert_frame_equal(self.test_frame2, result)
def get_stock_info(): ''' 从companyclassified中提取股票信息,这个表会每天进行更新,以获取最新的数据 包括股票代码,上市日期,市盈率等信息 ''' sql_str="SELECT * FROM stock_company.`company_basic_info`"; rs=sql.read_sql_query(sql=sql_str, con=conn, index_col='code', coerce_float=True) return rs
def getStockCodeListForStockHolder(reportDate): df_ap = ts.get_stock_basics() mysql_conn = pyodbc.connect(conn_info,charset='utf8') sql ="select distinct code from stock_holder_info t where date(t.report_date) = '"+reportDate+"';" df_exist = psql.read_sql_query(sql, mysql_conn) df_result = df_ap[~df_ap.index.isin(df_exist.code)] mysql_conn.close() return list(df_result.index)
def get_case_data(db): return sql.read_sql_query(select([Case.name, Case.winning_side, Case.facts, Case.dec_type, Case.dec_date, Case.id, Case.scdb_id]), con=db.engine, index_col='id')
def getStockCodeListForHistTran(start_date): df_ap = ts.get_stock_basics() mysql_conn = pyodbc.connect(conn_info,charset='utf8') sql ="select distinct code from his_trans t where t.tran_date > '"+start_date+"'" df_exist = psql.read_sql_query(sql, mysql_conn) df_result = df_ap[~df_ap.index.isin(df_exist.code)] mysql_conn.close() return list(df_result.index)
def dataFrame(self): connection = sqlite3.connect(pyswing.database.pySwingDatabase) query = "select * from Equities where Code = '%s'" % (self._tickerCode) equityData = read_sql_query(query, connection, 'Date') connection.close() return equityData
def calculateExitValues(self): Logger.log(logging.INFO, "Calculating Exit Values", {"scope":__name__, "Rule":self._tableName, "code":self._tickerCode}) connection = sqlite3.connect(pyswing.database.pySwingDatabase) self._selectBuyQuery = "select e.Date as Date, e.Date as TradeDate, e.Code, e.Open, e.Close, e.High, e.Low, x.Type, x.ExitValue, x.NumberOfDays, x.ExitDetail from Equities e left join '%s' x on e.Date = x.MatchDate and e.Code = x.Code and x.Type = 'Buy' where e.Code = '%s' and x.ExitValue is NULL" % (self._tableName, self._tickerCode) self._buyExitValueDataFrame = read_sql_query(self._selectBuyQuery, connection, "Date") numberOfRows = self._buyExitValueDataFrame.shape[0] for i in range(0, numberOfRows): self.calculateExitValueForBuy(i, numberOfRows - i) self._buyExitValueDataFrame.drop('Open', axis=1, inplace=True) self._buyExitValueDataFrame.drop('Close', axis=1, inplace=True) self._buyExitValueDataFrame.drop('High', axis=1, inplace=True) self._buyExitValueDataFrame.drop('Low', axis=1, inplace=True) self._buyExitValueDataFrame['MatchDate'] = self._buyExitValueDataFrame['TradeDate'].shift(1) self._buyExitValueDataFrame.drop('TradeDate', axis=1, inplace=True) newRecords = self._buyExitValueDataFrame.query("Type=='Buy'") connection.executemany(self._insertQuery, newRecords.to_records(index=True)) connection.commit() self._selectSellQuery = "select e.Date as Date, e.Date as TradeDate, e.Code, e.Open, e.Close, e.High, e.Low, x.Type, x.ExitValue, x.NumberOfDays, x.ExitDetail from Equities e left join '%s' x on e.Date = x.MatchDate and e.Code = x.Code and x.Type = 'Sell' where e.Code = '%s' and x.ExitValue is NULL" % (self._tableName, self._tickerCode) self._sellExitValueDataFrame = read_sql_query(self._selectSellQuery, connection, "Date") numberOfRows = self._sellExitValueDataFrame.shape[0] for i in range(0, numberOfRows): self.calculateExitValueForSell(i, numberOfRows - i) self._sellExitValueDataFrame.drop('Open', axis=1, inplace=True) self._sellExitValueDataFrame.drop('Close', axis=1, inplace=True) self._sellExitValueDataFrame.drop('High', axis=1, inplace=True) self._sellExitValueDataFrame.drop('Low', axis=1, inplace=True) self._sellExitValueDataFrame['MatchDate'] = self._sellExitValueDataFrame['TradeDate'].shift(1) self._sellExitValueDataFrame.drop('TradeDate', axis=1, inplace=True) newRecords = self._sellExitValueDataFrame.query("Type=='Sell'") connection.executemany(self._insertQuery, newRecords.to_records(index=True)) connection.commit() connection.close()
def test_read_sql_delegate(self): iris_frame1 = sql.read_sql_query( "SELECT * FROM iris", self.conn, flavor=self.flavor) iris_frame2 = sql.read_sql( "SELECT * FROM iris", self.conn, flavor=self.flavor) tm.assert_frame_equal(iris_frame1, iris_frame2, "read_sql and read_sql_query have not the same" " result with a query") self.assertRaises(ValueError, sql.read_sql, 'iris', self.conn, flavor=self.flavor)
def test_read_sql_delegate(self): iris_frame1 = sql.read_sql_query( "SELECT * FROM iris", self.conn) iris_frame2 = sql.read_sql( "SELECT * FROM iris", self.conn) tm.assert_frame_equal(iris_frame1, iris_frame2, "read_sql and read_sql_query have not the same" " result with a query") iris_frame1 = sql.read_sql_table('iris', self.conn) iris_frame2 = sql.read_sql('iris', self.conn) tm.assert_frame_equal(iris_frame1, iris_frame2)
def test_date_and_index(self): # Test case where same column appears in parse_date and index_col df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn, flavor='sqlite', index_col='DateCol', parse_dates=['DateCol', 'IntDateCol']) self.assertTrue(issubclass(df.index.dtype.type, np.datetime64), "DateCol loaded with incorrect type") self.assertTrue(issubclass(df.IntDateCol.dtype.type, np.datetime64), "IntDateCol loaded with incorrect type")
def to_df(queryset): """ :param queryset: django.db.models.query.QuerySet :return: pandas.core.frame.DataFrame """ try: query, params = queryset.query.sql_with_params() except EmptyResultSet: # Occurs when Django tries to create an expression for a # query which will certainly be empty # e.g. Book.objects.filter(author__in=[]) return pd.DataFrame() return read_sql_query(query, connection, params=params)
def get_data(code,excel): conn=pymysql.connect(**config) sql_st="select Date as date,Open as open,High as high,Close as close,Low as low,Amount as volume, \ AmountPrice as amount,Resumption as factor from StockPrice where Date>'2005-01-03' and\ Date<'2016-07-15' and ShortID={} order by Date desc;".format(i) df1=ts.get_h_data(code=code,start='2005-01-04',end='2016-07-04',autype='hfq',drop_factor=False) df2=sql.read_sql_query(sql_st,conn,index_col='date') df3=df1-df2 df=df3[(abs(df3.open)>=0.01)|(abs(df3.close)>=0.01)|(abs(df3.low)>=0.01)|(abs(df3.volume)>=0.01)|(abs(df3.amount)>=0.01)] if not df.empty: #对df增加一列,列的内容是股票的代码 df.insert(len(df.columns), 'code_name', value=int(i)) df.to_excel(excel,sheet_name=code)
def test_roundtrip(self): sql.to_sql(self.test_frame1, 'test_frame_roundtrip', con=self.conn, flavor='sqlite') result = sql.read_sql_query( 'SELECT * FROM test_frame_roundtrip', con=self.conn, flavor='sqlite') # HACK! result.index = self.test_frame1.index result.set_index('level_0', inplace=True) result.index.astype(int) result.index.name = None tm.assert_frame_equal(result, self.test_frame1)
def test_to_sql_index_label_multiindex(self): temp_frame = DataFrame({'col1': range(4)}, index=MultiIndex.from_product([('A0', 'A1'), ('B0', 'B1')])) # no index name, defaults to 'level_0' and 'level_1' sql.to_sql(temp_frame, 'test_index_label', self.conn) frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn) self.assertEqual(frame.columns[0], 'level_0') self.assertEqual(frame.columns[1], 'level_1') # specifying index_label sql.to_sql(temp_frame, 'test_index_label', self.conn, if_exists='replace', index_label=['A', 'B']) frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn) self.assertEqual(frame.columns[:2].tolist(), ['A', 'B'], "Specified index_labels not written to database") # using the index name temp_frame.index.names = ['A', 'B'] sql.to_sql(temp_frame, 'test_index_label', self.conn, if_exists='replace') frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn) self.assertEqual(frame.columns[:2].tolist(), ['A', 'B'], "Index names not written to database") # has index name, but specifying index_label sql.to_sql(temp_frame, 'test_index_label', self.conn, if_exists='replace', index_label=['C', 'D']) frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn) self.assertEqual(frame.columns[:2].tolist(), ['C', 'D'], "Specified index_labels not written to database") # wrong length of index_label self.assertRaises(ValueError, sql.to_sql, temp_frame, 'test_index_label', self.conn, if_exists='replace', index_label='C')
def analyse(self): # Logger.log(logging.INFO, "Analyse Strategy", {"scope":__name__, "Rule 1":self._rule1, "Rule 2":self._rule2, "Rule 3":self._rule3, "Type":self._type}) connection = sqlite3.connect(pyswing.database.pySwingDatabase) query = self.analyseStrategySql % (self._rule1, self._rule2, self._rule3, self._exit, self._type) self._strategyData = read_sql_query(query, connection, 'Date') self._strategyData['ExitValueAfterCosts'] = self._strategyData['ExitValue'] - 0.2 connection.close() exitValueDataFrame = self._strategyData.ix[:,'ExitValueAfterCosts'] mean = exitValueDataFrame.mean() median = exitValueDataFrame.median() sum = exitValueDataFrame.sum() count = exitValueDataFrame.count() tradesPerYear = count / 10 sharpeRatio = sqrt(tradesPerYear) * exitValueDataFrame.mean() / exitValueDataFrame.std() self._strategyData["Sum"] = expanding_sum(exitValueDataFrame) self._strategyData["Max"] = expanding_max(self._strategyData["Sum"]) self._strategyData["Min"] = expanding_min(self._strategyData["Sum"]) self._strategyData["DD"] = self._strategyData["Max"] - self._strategyData["Min"] runningSum = expanding_sum(exitValueDataFrame) max2here = expanding_max(runningSum) dd2here = runningSum - max2here drawDown = dd2here.min() Logger.log(logging.INFO, "Analysing Strategy", {"scope":__name__, "Rule 1":self._rule1, "Rule 2":self._rule2, "Rule 3":self._rule3, "Exit":self._exit, "Type":self._type, "Mean":str(mean), "Median":str(median), "Sum":str(sum), "Count":str(count), "SharpeRatio":str(sharpeRatio), "DrawDown":str(drawDown)}) connection = sqlite3.connect(pyswing.database.pySwingDatabase) c = connection.cursor() deleteSql = self.deleteStrategySql % (pyswing.globals.pySwingStrategy, self._rule1, self._rule2, self._rule3, self._exit, self._type) c.executescript(deleteSql) connection.commit() insertSql = self.insertStrategySql % (pyswing.globals.pySwingStrategy, self._rule1, self._rule2, self._rule3, self._exit, self._type, str(mean), str(median), str(sum), str(count), str(sharpeRatio), str(drawDown)) c.executescript(insertSql) connection.commit() c.close() connection.close()
def get_dataframe_query_cmp_day(query, user, interval, start_date, end_date, switch_id): """ build sql query return the dataframe """ upd_query = sqlquery upd_query = upd_query.replace("#SECOND_INDEX#", "switch_id") upd_query = upd_query.replace("#USER_CONDITION#", condition_user(user)) upd_query = upd_query.replace("#DATEDAY_FORMAT#", "extract(hour from dateday) as dateday") upd_query = upd_query.replace("#SWITCH_CONDITION#", condition_switch_id(switch_id)) upd_query = upd_query.replace("#INTERVAL#", interval) upd_query = upd_query.replace("#COUNTRY_CONDITION#", "") params = { 'start_date': start_date, 'end_date': end_date, } # df = sql.read_sql_query(upd_query, connection, params=params, index_col=["dateday", "switch_id"]) df = sql.read_sql_query(upd_query, connection, params=params) return df
def evaluateRule(self, tickerCode): """ ? :param tickerCode: """ self._tickerCode = tickerCode start = self._getLatestDate() Logger.log(logging.INFO, "Evaluating Rule", {"scope":__name__, "Rule":self._ruleTableName, "code":self._tickerCode, "start":str(start)}) self._restrictedSelectQuery = "%s where r.Code = '%s' and r.Date >= '%s'" % (self._selectQuery, self._tickerCode, start) connection = sqlite3.connect(pyswing.database.pySwingDatabase) self._ruleData = read_sql_query(self._restrictedSelectQuery, connection, 'Date') self._ruleData['LastCrosser'] = self._ruleData['Crosser'].shift(1) self._ruleData['LastCrossee'] = self._ruleData['Crossee'].shift(1) try: self._ruleData['Match'] = (self._ruleData['Crosser'] > self._ruleData['Crossee']) & (self._ruleData['LastCrossee'] > self._ruleData['LastCrosser']) self._ruleData['Match'] = self._ruleData['Match'].astype(float) except TypeError as e: # NOTE: Throws "TypeError: unorderable types: float() > NoneType()" if there is no data (e.g. SMA_200 for datasets smaller than 200) Logger.log(logging.ERROR, "Error Evaluating Rule", {"scope": __name__, "Rule":self._ruleTableName, "exception": str(e)}) self._ruleData['Match'] = 0.0 self._ruleData['Match'] = self._ruleData['Match'].astype(float) self._ruleData.drop('Crosser', axis=1, inplace=True) self._ruleData.drop('Crossee', axis=1, inplace=True) self._ruleData.drop('LastCrosser', axis=1, inplace=True) self._ruleData.drop('LastCrossee', axis=1, inplace=True) newRecords = self._ruleData.query("Date > '%s'" % (str(start))) connection.executemany(self._insertQuery, newRecords.to_records(index=True)) connection.commit() connection.close()
def __init__(self): """ Class Constructor. """ tableName = "Indicator_ADI" self._insertQuery = "insert or replace into %s (Date, ADI, ADI_SUM, ADI_ROC, ADI_EMA) values (?,?,?,?,?)" % (tableName) self._selectQuery = "SELECT Date, SUM(CASE WHEN Close > Open THEN 1 ELSE 0 END) - SUM(CASE WHEN Close < Open THEN 1 ELSE 0 END) as ADI FROM Equities group by Date" connection = sqlite3.connect(pyswing.database.pySwingDatabase) self._indicatorDataFrame = read_sql_query(self._selectQuery, connection, 'Date') self._indicatorDataFrame['ADI'] = self._indicatorDataFrame['ADI'].astype(float) self._indicatorDataFrame['ADI_SUM'] = self._indicatorDataFrame['ADI'].cumsum() self._indicatorDataFrame['ADI_ROC'] = abstract.ROC(self._indicatorDataFrame, timeperiod=5, price='ADI_SUM') self._indicatorDataFrame['ADI_EMA'] = abstract.EMA(self._indicatorDataFrame, timeperiod=5, price='ADI') self._tableName = "Indicator_ADI"
def executeQuery(self, query=None, throughReload=0, DK=None, DC=None): self.connectDB() query = self.queryTextEdit.toPlainText() if query is None: query = str(self.queryTextEdit.toPlainText()) # try: self.pandas = psql.read_sql_query(query, self.cnxn) # except Exception: # self.setInfo(('Query failed:', str(''))) # df = pd.DataFrame() self.data = convert_dataframe_to_orange(self.pandas) self.send("Data", self.data) self.send("Pandas", self.pandas) self.setInfo(("Query returned", "Read " + str(len(self.data)) + " examples!")) self.send("Feature Definitions", self.data.domain) self.setMeta() self.lastQuery = query
def get_transcript_data(db): turn_query = select([Turn, Advocacy.side, Argument.date, Argument.case_id]).where(and_(Section.id == Turn.section_id, Advocacy.id == Section.advocacy_id, Advocate.id == Advocacy.advocate_id, Argument.id == Section.argument_id)) turn_data = sql.read_sql_query(turn_query, con=db.engine, index_col='id') #parse_dates=['date']) turn_data.drop(['section_id', 'advocate_id'], inplace=True, axis=1) turn_data.columns = ['kind', 'turn_number', 'text', 'time_start', 'time_end', 'justice_id', 'side', 'date', 'case_id'] turn_data['length'] = np.abs(turn_data['time_end'] - turn_data['time_start']) turn_data.drop(['time_start', 'time_end', 'turn_number'], inplace=True, axis=1) turn_data['interrupted'] = turn_data['text'].str.endswith('--').astype(int) turn_data['interruption'] = turn_data['interrupted'].shift(1).fillna(False) #turn_data['gender'] = turn_data['gender'].apply(gender_encode) turn_data['choppiness'] = (turn_data['text'].str.count('--')>1).astype(int) turn_data['humor'] = turn_data['text'].str.contains(r'\[Laughter\]').astype(int) turn_data['question'] = turn_data['text'].str.contains(r'[?]').astype(int) return turn_data
def executeQuery(self, query=None, throughReload=0, DK=None, DC=None): self.connectDB() query = self.queryTextEdit.toPlainText() if query is None: query = str(self.queryTextEdit.toPlainText()) # try: self.pandas = psql.read_sql_query(query, self.cnxn) # except Exception: # self.setInfo(('Query failed:', str(''))) # df = pd.DataFrame() self.data = convert_dataframe_to_orange(self.pandas) self.send("Data", self.data) self.send("Pandas", self.pandas) self.setInfo( ('Query returned', 'Read ' + str(len(self.data)) + ' examples!')) self.send("Feature Definitions", self.data.domain) self.setMeta() self.lastQuery = query
def sql_one(limit_num=50): """ This version of the sql API allows us to set a number that we want to limit our sample query to, it will also naively set the table result to json (note that you'd want to probably have some more logic in a real sitaution, but turning a pandas table into a json so that it can be used as part of an API can be a powerful tool if you are using it properly. This also demonstrates connection info. I may also show a twist on this where we would also pass in the dbname as well. """ conn = ps2.connect( dbname = config.dbname, \ host = config.host, \ port = config.port, \ user = config.user, \ password = config.password) q1 = f'''SELECT * FROM film ORDER BY film_id DESC LIMIT {limit_num}''' data = sqlio.read_sql_query(q1, conn) print(data) return (data.to_json())
def sql_total_activeppl_in_postcode(conn): sql_get_totalactiveppl_in_postcode = """ with activepeople_activityaddress as ( SELECT work_address, count(individual_id) as number_pplactive FROM """ + table_individual_by_id + """ WHERE age_category_id != 0 --- young children, 0-3 yr old GROUP BY work_address ), activepeople_homeaddress as ( SELECT home_address, count(individual_id) as number_pplliving FROM """ + table_individual_by_id + """ WHERE age_category_id != 0 --- young children, 0-3 yr old GROUP BY home_address ), ppl_counting as ( SELECT a.address_id, a.sla_postcode, CASE WHEN number_pplactive is NULL THEN 0 ELSE number_pplactive END AS number_pplactive, CASE WHEN number_pplliving is NULL THEN 0 ELSE number_pplliving END AS number_pplliving FROM """ + table_sla_addresses + """ a LEFT JOIN activepeople_activityaddress b ON a.address_id = b.work_address LEFT JOIN activepeople_homeaddress c ON a.address_id = c.number_pplliving ), address_total_counting as ( SELECT address_id, sla_postcode, number_pplactive, number_pplliving, number_pplactive + number_pplliving as total_ppl FROM ppl_counting ) SELECT sla_postcode, sum(number_pplactive) as number_pplactive, sum(number_pplliving) as number_pplliving, sum(total_ppl) as total_ppl FROM address_total_counting GROUP BY sla_postcode """ df_pplcount_inpostcode = sqlio.read_sql_query( sql_get_totalactiveppl_in_postcode, conn) return df_pplcount_inpostcode
def correlation_funda_data_sellingprice_sellingtime(): #start connection with database with open('db_login.txt', 'r') as myfile: data = myfile.read() conn = psycopg2.connect(data) cur = conn.cursor() #Create dataframe to select columns of housing_data housinginfo_sellingpricetime_table = "SELECT sellingPrice, fullDescription, houseType, categoryObject, yearOfBuilding, garden, parcelSurface, numberRooms, numberBathrooms, energylabelClass, surface, sellingtime FROM funda;" housinginfo_sellingpricetime = sqlio.read_sql_query( housinginfo_sellingpricetime_table, conn) #Look for correlations between columns housing_data and sellingprice and sellingtime print(housinginfo_sellingpricetime.corr(method='pearson')) '''' Conclusions with regard to sellingprice: 1)garden+sellingprice=-0,258484 2)parcelSurface+sellingprice=0.076516 3)numberrooms+sellingprice=0.100043 4)numberbathrooms+sellingprice=0.069725 5)surface+sellingprice=0.580748 6)sellingtime+sellingprice=0.145279 Conclusion with reagrd to sellingtime: 1)garden+sellingtime=0.145279 2)garden+sellingtime=-0.085790 3)parcelsurface+sellingtime=0.002927 4)numberrooms+sellingtime= 0.136939 5)numberbathrooms+sellingtime=-0.073602 6)surface+sellingtime=0.153849''' return print('Analysis succesfully done')
def correlation_crime_info(): #Start connection with database with open('db_login.txt', 'r') as myfile: data = myfile.read() conn = psycopg2.connect(data) cur = conn.cursor() #Select municipality name, sellingprice, sellingtime and number of national monuments crime_info_sellingtime_and_price_table = "SELECT sellingPrice, MunicipalityCode, sellingtime, Number_of_registered_crimes FROM funda NATURAL LEFT JOIN zipcodes NATURAL LEFT JOIN crime_info;" crime_info_sellingtime_and_price = sqlio.read_sql_query( crime_info_sellingtime_and_price_table, conn) #Look for correlations between number of monuments (tourist info) and sellingprice and sellingtime print(crime_info_sellingtime_and_price.corr(method='pearson')) #Make changes to db persistent conn.commit() #End connection cur.close() conn.close() return print('Crime info analysis succesfully done')
def getDelayedTrains(self, f, t): # Open Connection self.openConnection() query = """ SELECT rid,tpl,ptd,dep_at,tpl_to,pta,arr_at FROM (SELECT rid,tpl,ptd,dep_at FROM nrch_livst_a51 WHERE tpl = '{1}' AND dep_at IS NOT NULL ) AS x JOIN (SELECT rid AS rid_to,tpl AS tpl_to,pta,arr_at FROM nrch_livst_a51 WHERE tpl = '{2}' AND arr_at IS NOT NULL ) AS y on x.rid = y.rid_to WHERE ptd < dep_at ORDER BY rid """.format(norwich_to_london, f, t) # Execute query and get results return sqlio.read_sql_query(query, self.connection)
def _query(self, sql, return_df=None): """ Execute query and return results sql str sql to execute return list of sets """ if return_df is not None: return_df=return_df else: return_df=self.return_df self._connect() with self.conn as conn: if return_df: return self._df_to_geodf(sqlio.read_sql_query(sql, conn)) else: with conn.cursor() as curs: curs.execute(sql) results = curs.fetchall() return results
def fetch_data(anno): engine = create_engine('postgresql://*****:*****@localhost/geonode-imports') connection = engine.connect() from sqlalchemy.orm import sessionmaker, scoped_session conta_sql = "SELECT COUNT (*) FROM conflicts.gd_" + str(anno) + ";" Session = scoped_session(sessionmaker(bind=engine)) s = Session() num_records = list(s.execute(conta_sql))[0][0] #print num_records stringa_sql = 'SELECT "SQLDATE","Actor1Code","GoldsteinScale" FROM conflicts.gd_' + str(anno) + ';' #stringa_sql = "SELECT * FROM sparc_wfp_areas;" #print stringa_sql df = psql.read_sql_query(stringa_sql, con=engine) #print df.columns.values #print df.describe() connection.close() return df
def get_dataframe_query(query, user, interval, start_date, end_date, switch_id, country_id_list, second_index): """ build sql query return the dataframe """ upd_query = sqlquery upd_query = upd_query.replace("#SECOND_INDEX#", second_index) upd_query = upd_query.replace("#USER_CONDITION#", condition_user(user)) upd_query = upd_query.replace("#DATEDAY_FORMAT#", "dateday AS dateday") upd_query = upd_query.replace("#SWITCH_CONDITION#", condition_switch_id(switch_id)) upd_query = upd_query.replace("#INTERVAL#", interval) if country_id_list and len(country_id_list) > 0: select_country = ", ".join(str(int(l)) for l in country_id_list) upd_query = upd_query.replace("#COUNTRY_CONDITION#", "AND country_id IN (" + select_country + ")") else: upd_query = upd_query.replace("#COUNTRY_CONDITION#", "") params = { 'start_date': start_date, 'end_date': end_date, } df = sql.read_sql_query(upd_query, connection, params=params) return df
def evaluateRule(self, tickerCode): """ ?. """ self._tickerCode = tickerCode start = self._getLatestDate() Logger.log(logging.INFO, "Evaluating Rule", {"scope":__name__, "Rule":self._ruleTableName, "code":self._tickerCode, "start":str(start)}) self._restrictedSelectQuery = "%s where t1.Code = '%s' and t1.Date > '%s'" % (self._selectQuery, self._tickerCode, start) connection = sqlite3.connect(pyswing.database.pySwingDatabase) self._ruleData = read_sql_query(self._restrictedSelectQuery, connection, 'Date') self._ruleData['Match'] = self._ruleData['Match'].astype(float) connection.executemany(self._insertQuery, self._ruleData.to_records(index=True)) connection.commit() connection.close()
def get_surficial_markers(host=None, from_memory=True): """ - Description. Args: Args (str): Args. Returns: Returns. Raises: MySQLdb.OperationalError: Error in database connection. """ mc = memory.get_handle() sc = memory.server_config() if from_memory: return mc.get("surficial_markers") if not host: print("Host defaults to datadb") host = sc["resource"]["datadb"] query = ("select m2.marker_id, m3.marker_name, m4.site_id from " "(select max(history_id) as history_id, " "marker_id from marker_history as m1 " "group by m1.marker_id " ") as m2 " "inner join marker_names as m3 " "on m2.history_id = m3.history_id " "inner join markers as m4 " "on m2.marker_id = m4.marker_id ") engine = dbio.connect(resource="sensor_data", conn_type=0) surficial_markers = psql.read_sql_query(query, engine) mc.set("surficial_markers", surficial_markers) return surficial_markers
def evaluateRule(self, tickerCode): """ ? :param tickerCode: """ self._tickerCode = tickerCode start = self._getLatestDate() Logger.log(logging.INFO, "Evaluating Rule", {"scope":__name__, "Rule":self._ruleTableName, "code":self._tickerCode, "start":str(start)}) # We can't use self._getLatestDate() because we need data from before that date... self._restrictedSelectQuery = "%s where Code = '%s'" % (self._selectQuery, self._tickerCode) connection = sqlite3.connect(pyswing.database.pySwingDatabase) self._ruleData = read_sql_query(self._restrictedSelectQuery, connection, 'Date') self._ruleData['Relative'] = self._ruleData[self._indicatorColumn].shift(self._relativeIndex * -1) if self._comparison == Comparison.GreaterThan : self._ruleData['Match'] = self._ruleData[self._indicatorColumn] > self._multiplier * self._ruleData['Relative'] else: self._ruleData['Match'] = self._ruleData[self._indicatorColumn] < self._multiplier * self._ruleData['Relative'] self._ruleData['Match'] = self._ruleData['Match'].astype(float) self._ruleData.drop('Relative', axis=1, inplace=True) self._ruleData.drop(self._indicatorColumn, axis=1, inplace=True) newRecords = self._ruleData.query("Date > '%s'" % (str(start))) connection.executemany(self._insertQuery, newRecords.to_records(index=True)) connection.commit() connection.close()
def calculate_lead_changes(): # Query the database the_data = sql.read_sql_query("SELECT game_id, current_score_home, current_score_away FROM mikes_db.ncaa_pxp_detail_2015 where bool_non_play_event not in ('1');", db) # Get a uique list of the games unique_game_list = the_data.loc[:, 'game_id'].unique() all_lead_chg_summaries = [] for game in unique_game_list: the_game_id = str(game) # Subset the data the_data_subset = the_data[the_data.game_id == str(the_game_id)] # If positive, the home team is ahead the_data_subset['current_score_diff'] = the_data_subset['current_score_home'].astype(int) - the_data_subset['current_score_away'].astype(int) # If positive, the home team is ahead the_data_subset['current_score_sign'] = np.sign(the_data_subset['current_score_diff']) # Get the sign of the previus play the_data_subset['prev_score_sign'] = np.sign(the_data_subset['current_score_diff'].shift()) # There will be an NaN at the beginning, give it a value of 0 the_data_subset['prev_score_sign'] = the_data_subset['prev_score_sign'].fillna(0) # if the sign of the current play and the last play are the same, then there was no lead change, otherwise there was the_data_subset['lead_change_bool'] = np.where(the_data_subset['prev_score_sign'] == the_data_subset['current_score_sign'], 0, 1) nLeadChanges = the_data_subset['lead_change_bool'].sum() print [the_game_id, nLeadChanges] all_lead_chg_summaries.append([the_game_id, nLeadChanges]) all_lead_chg_summaries = pandas.DataFrame(all_lead_chg_summaries) all_lead_chg_summaries.to_csv('/home/mrhodes/Documents/Code/Eclipse_Workspaces/NCAABasketballAnalysis/Sample_score_Diff.csv') return the_data_subset
def etl_data(db_data, pull_table, target_table, chunk_size): standardize = StandardScaler() offset = 0 # For counting the chunk chunk = chunk_size # Set chunk variable # Connect to database for count of rows in table connection = pymysql.connect(**db_data) schema = db_data['db'] # Connect to database for sqlalchemy -- mysql as a generic example engine_string = ("""mysql+pymysql://{}:{}@{}:{}/{}""").format(db_data['user'],db_data['password'],db_data['host'],db_data['port'],db_data['db']) with connection.cursor() as cursor: cursor.execute(("""SELECT COUNT(1) FROM {}.{}""").format(schema, pull_table)) count = cursor.fetchone() row_count = count['COUNT(1)'] print("Starting smoothing of " + str(row_count)) try: print('Starting chunks...') while offset < row_count: raw = sql.read_sql_query(("""SELECT * FROM {}.{} LIMIT {} OFFSET{}""").format(schema, pull_table, chunk, offset)) data = data.fillna(data.median()) # Fill NaN to avoid analysis issues - Median as example data.loc[:, data.dtypes != object] = standardize.fit_transform(data.loc[:, data.dtypes != object]) try: engine = create_engine(engine_string, echo=False) # Warning: 'replace' will drop and recreate the table - read to_sql documentation data.to_sql(name = target_table, con = engine, if_exists = 'replace', index = False) except Exception as ex: print(ex) offset += chunk print("Up to " + str(offset) + "\tchunked rows transformed.") if offset >= row_count: print("Done:\n Offset: " + str(offset) + "\nRow Count: " + str(row_count)) break except Exception as ex: print(ex) connection.close()
def _load_table_config_file(executer_instance, cube_obj): """ Load tables from config file. :param cube_obj: cubes object :return: tables dict with table name as key and DataFrame as value """ tables = {} # just one facts table right now executer_instance.facts = cube_obj.facts[0].table_name db = MyDB(db_config_file_path=os.path.dirname(executer_instance.cube_path), db=executer_instance.cube) for dimension in cube_obj.dimensions: df = psql.read_sql_query("SELECT * FROM {0}".format(dimension.name), db.engine) # only certain columns if dimension.columns.keys(): df = df[dimension.columns.keys()] # change table display name if dimension.displayName: table_name = dimension.displayName else: table_name = dimension.name # rename columns if value not None df.rename(columns=(dict( (k, v) for k, v in dimension.columns.items() if v)), inplace=True) tables[table_name] = df[[ col for col in df.columns if col.lower()[-2:] != 'id' ]] return tables
def get_feature_info(connection): query = ''' select a.id,name,description, category, b.is_categorical, b.lower_bound, b.upper_bound FROM features a, data_ranges b WHERE a.id=b.id; ''' df = sqlio.read_sql_query(query, connection) feature_ids = df['id'].values feature_names = df['name'].values feature_desc = df['description'].values feature_categ = df['category'].values feature_type = df['is_categorical'].values feature_lb = df['lower_bound'].values feature_ub = df['upper_bound'].values feature_info = {} for i in range(len(feature_ids)): feature_info[feature_ids[i]] = [ feature_names[i], feature_desc[i], feature_categ[i], feature_type[i], feature_lb[i], feature_ub[i] ] return feature_info
def get_glue_jobs_from_db(self): """ get all glue jobs from the 'jobs' table. This function returns a pandas sql data frame """ conn, cur = self.create_db_conn() sql_stmt = sq.select_from_jobs df, err = None, None try: cur.execute(sq.use_schema) df = sqlio.read_sql_query(sql_stmt, conn) except Exception as e: log.info("Error: select *") log.error(e) err = e # raise finally: if conn: cur.close() conn.close() log.info("successfully closed the db connection") # log.info("successfully executed function 'get_glue_jobs_from_db'") return df, err
def recommend(movie_user_likes, cosine_sim): with psycopg2.connect(user="******", password="******", database="Movie Rec") as connection: cmd = '''SELECT * FROM "Everything";''' df = sqlio.read_sql_query(cmd, connection) # cosine_sim = np.load('D:\\Documents\\Python\\movieRec\\similarity.npy') def get_index_from_title(title): return df[df.Title == title].index.values[0] movie_index = get_index_from_title(movie_user_likes) similar_movies = list(enumerate(cosine_sim[movie_index])) sorted_similar_movies = sorted(similar_movies, key=lambda x: x[1], reverse=True) recommendations = [(df[df.index == i[0]]["Title"].values[0], df[df.index == i[0]]["Image URL"].values[0], df[df.index == i[0]]["IMDB ID"].values[0]) for i in sorted_similar_movies[1:6]] return recommendations
def test_sql_open_close(self): # Test if the IO in the database still work if the connection closed # between the writing and reading (as in many real situations). self._load_test2_data() with tm.ensure_clean() as name: conn = self.connect(name) sql.to_sql(self.test_frame2, "test_frame2_legacy", conn, flavor="sqlite", index=False) conn.close() conn = self.connect(name) result = sql.read_sql_query("SELECT * FROM test_frame2_legacy;", conn, flavor="sqlite") conn.close() tm.assert_frame_equal(self.test_frame2, result)
def ListDrugdruginteractions(dbcon, fout=None): sql = """\ SELECT ddi.id AS ddi_id, ddi.drug_class1, ddi.drug_class2, ddi.source_id, drug_class1.id drug_class_id1, drug_class1.source source1, drug_class1.is_group is_group1, drug_class2.id drug_class_id2, drug_class2.source source2, drug_class2.is_group is_group2 FROM ddi JOIN drug_class drug_class1 ON drug_class1.name = ddi.drug_class1 JOIN drug_class drug_class2 ON drug_class2.name = ddi.drug_class2 """ logging.debug(f"SQL: {sql}") df = read_sql_query(sql, dbcon) if fout: df.to_csv(fout, "\t", index=False) logging.info(f"n_out: {df.shape[0]}") return df
def fit_anom_sql(min_price=.15, min_quant=30, days_released=45): """ A single SQL query to skip the filtering step when creating the dataframe :param min_price, min_quant, days_released: filter parameters :return: filtered dataframe """ conn = pg2.connect(dbname='steam_capstone', host='localhost') query = ( """ select t_days_released.item_name, t_days_released.date as timestamp, t_days_released.price as median_sell_price from (select *, count(*) over (partition by item_name order by date asc) as days_released from sales) as t_days_released inner join (select item_name from (select *, count(*) over (partition by item_name order by date asc) as days_released from sales) as t where days_released > %(days_released)s group by item_name having min(price) > %(min_price)s and min(quantity) > %(min_quant)s) as t_keep_items on t_days_released.item_name = t_keep_items.item_name where days_released > %(days_released)s order by t_days_released.item_name, timestamp; """) df = sqlio.read_sql_query(query, conn, parse_dates=['timestamp'], params={'min_price': min_price, 'min_quant': min_quant+1, 'days_released': days_released}) print_top(anom_consensus(df), n=10)
def checkCobra(con, frameID, loadPref): """ compare matched fibres """ #read from database comm = """select * from cobra_status where mcs_frame_id =""" + str( int(frameID)) + """ order by cobra_id""" data = sqlio.read_sql_query(comm, con) #read from file (which was used to populate database) fps, sz = db.readFPS(frameID, loadPref) #all the entries here are numbers for key in fps: for i in range(sz[0]): #turn None into NaN for comparison if (fps[key][i] == None): fps[key][i] = np.nan #equal_nan so that un matched cobras are compared correctly if (not np.isclose( np.float(fps[key][i]), data[key][i], equal_nan=True)): print(key, i, fps[key][i], data[key][i])
def Greenblatt(날짜='2011-12-31', 기간구분='년간'): result = DataFrame() query = """ SELECT A.날짜, A.기간구분, A.종목코드, C.종목명, B.종가, A.매출액, A.영업이익, A.당기순이익, A.자산총계, A.부채총계, A.자본총계, A.자본금, A.부채비율, A.유보율, A.영업이익률, A.순이익률, A.ROA, A.ROE, A.EPS, A.BPS, A.DPS, A.PER, 1/A.PER as RPER, A.PBR, A.발행주식수, A.배당수익률, C.종목상태 FROM 재무정보 A, (select 종목코드, 종가 from 일별주가 where 일자 = (select max(일자) from 일별주가 where 일자 <= '%s')) B, 종목코드 C WHERE 날짜='%s' and 기간구분='%s' and A.종목코드=B.종목코드 and A.종목코드=C.종목코드 """ % (날짜, 날짜, 기간구분) conn = mysqlconn() df = pdsql.read_sql_query(query, con=conn) conn.close() df['rank1'] = df['ROA'].rank(ascending=False) df['rank2'] = df['RPER'].rank(ascending=False) df['ranksum'] = df['rank1'] + df['rank2'] df['rank'] = df['ranksum'].rank(ascending=True) result = df.sort_values(['rank', 'rank1', 'rank2'], ascending=[True, True, True]) return result
def get_data(start,end,stock_name): postgre_db = psycopg2.connect(dbname = postgresql_db_config.NAME, user = postgresql_db_config.USER, password = postgresql_db_config.PASSWORD, host = postgresql_db_config.HOST, port = postgresql_db_config.PORT) sql =f''' select * from public.stock_data_full where stock_name = '{stock_name}' order by time_stamp asc ''' dat = sqlio.read_sql_query(sql, postgre_db) dat = dat.dropna() dat = dat.reset_index(drop=True) print(f"Now we are processing stock : {dat['stock_name'][0]}") features = dat[['open','volume','volume_obv','trend_macd','trend_macd_signal','trend_macd_diff','momentum_rsi','volume_vpt']] dataset = features.values data_mean = dataset.mean(axis=0) data_std = dataset.std(axis=0) dataset = (dataset-data_mean)/data_std if end == None: end = dataset.shape[0] if start == None: start = dataset.shape[0]-140 return dataset[start:end]
def get_held_shares(self, sid): try: sql = "select trade_type,price,SUM(volume) from public.trade where session_id = '" + str( sid) + "' GROUP BY trade_type,price" db = self.get_connection() data = sqlio.read_sql_query(sql, db) except: print("failed to query held_stocks for sid: " + str(sid)) raise finally: db.close() #1. Sum BUY's #2. Sum SELL's #3. Compute and return diff held_stocks = 0 for trade in data.itertuples(): _, t_type, price, vol = trade if t_type == "BUY": held_stocks += vol elif t_type == "SELL": held_stocks -= vol return held_stocks
def checkTableExists(self, table_name, connection): if not isinstance(table_name, str): Exception('Input(table_name) : not string') # --------------------------------------------------------------------- if connection != psycopg2.extensions.connection: Exception('Input(connection) : not valid psycopg2 connection') # --------------------------------------------------------------------- table_name = table_name.upper() table_name = table_name.strip() # --------------------------------------------------------------------- sql = \ ''' SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE UPPER(TABLE_NAME) = '{}' '''.format(table_name) data = sqlio.read_sql_query(sql, connection) # --------------------------------------------------------------------- if data.empty: return False else: return True
def paper_rank(): print("start page ranking .....") dg = digraph() conn = sqlite3.connect(PM.db) qry = 'select p_citer,p_cited from reference' p_id = sql.read_sql_query(qry, conn) print(str(p_id.shape) + '<---------p_id') citer = p_id.p_citer.unique() p_id = p_id.dropna(axis=0) cited = p_id.p_cited.unique() nd = set(citer).union(set(cited)) nd = list(nd) print('node is created .....') # add nodes nodes = np.array(nd).astype(np.int64) dg.add_nodes(nodes) print("add nodes finished .... ") # add edges edges = [ x for x in zip(p_id['p_citer'].astype(np.int64), p_id['p_cited'].astype(np.int64)) ] for ed in edges: dg.add_edge(ed) print('add edges finished ....') pg = pagerank(dg, damping_factor=0.85, max_iterations=100, min_delta=1e-06) pprk = pd.DataFrame(pd.Series(pg)) pprk.columns = ['pp_ranking'] pprk.index.name = 'paper_index' pprk.to_csv(PM.paper_rank, sep=u'|', header=1, index=True) print(pprk[:2])
def rankVideos(): cursor, connection = connect() try: # retrieve video stats sql = "SELECT v_id, likes, dislikes, views FROM video;" df = sqlio.read_sql_query(sql, connection) connection = None # compute average views total_views = 0 for i in range(0, len(df)): total_views += df.iat[i, 3] avg_views = total_views / len(df) # video ranking = [(likes-dislikes) / views]*log(views/avg_views) video_rankings = {} for i in range(0, len(df)): v_id = df.iat[i, 0] likes = df.iat[i, 1] dislikes = df.iat[i, 2] views = df.iat[i, 3] if views == 0: rank = 0 else: rank = ( (likes - dislikes) / views) * math.log(views / avg_views) video_rankings[v_id] = rank return video_rankings except Exception as e: print("Exception in rank videos: ", e) finally: closeConnection(connection, cursor)
def ListDiseases(dbcon, fout): sql = """ SELECT d.dtype, dt.description dtype_description, d.name diseaseName, d.ncats_name ncatsDiseaseName, d.did diseaseId, d.description diseaseDescription, d.reference, d.drug_name, d.source, COUNT(d.protein_id) n_target_associations FROM disease d LEFT OUTER JOIN disease_type dt ON dt.name = d.dtype GROUP BY d.dtype, dt.description, d.name, d.ncats_name, d.did, d.description, d.reference, d.drug_name, d.source """ df = read_sql_query(sql, dbcon) if fout: df.to_csv(fout, "\t", index=False) logging.info(f"rows: {df.shape[0]}") logging.info(f"diseaseIDs: {df.diseaseId.nunique()}") logging.info(f"diseaseNames: {df.diseaseName.nunique()}") logging.info(f"ncatsDiseaseNames: {df.ncatsDiseaseName.nunique()}") for dtype in df.dtype.unique().tolist(): logging.info( f"[{dtype}] diseaseIDs: {df[df.dtype==dtype].diseaseId.nunique()}") return df
def __cut_duplicates(dataframe, table_name): """ Pd.DataFrame -> pd.DataFrame Tests the data retrieved against the existing sql table, and drops duplicate keys """ print('<<<Removing duplicates from data retrieved ...>>>') # Connect to the database conn = pg2.connect(database='news_summary', user="******", host='localhost', password=config.passwords['postgresql']) # Pull table data for checking select_call = '''SELECT * FROM {}'''.format(table_name) existing_df = sqlio.read_sql_query(select_call, conn) # Combine to look for dupes full_df = existing_df.append(dataframe) full_df['duplicated'] = full_df.duplicated( ['headline', 'newssource', 'weblink'], keep=False) # Cut back to retrieved data, split into dupes & dedupes n = len(full_df) original_df = full_df[n - len(dataframe):n] unique_df = original_df.loc[original_df['duplicated'] == False] unique_df.drop(columns=['duplicated'], inplace=True) duplicates_df = original_df.loc[original_df['duplicated'] == True] duplicates_df.drop(columns=['duplicated'], inplace=True) new_stories_num = len(dataframe) - len(unique_df) print('Of {} stories retrieved in this call, {} were unique'.format( len(dataframe), new_stories_num)) print('<<<Removing duplicates from data retrieved ... COMPLETE>>>') return (unique_df, duplicates_df)
def tcrd_fetchdata_iter(): sql = ''' SELECT target.id tcrdTargetId, target.name tcrdTargetName, target.fam tcrdTargetFamily, target.tdl TDL, target.ttype tcrdTargetType, target.idg idgList, protein.id tcrdProteinId, protein.sym tcrdGeneSymbol, protein.family tcrdProteinFamily, protein.geneid ncbiGeneId, protein.uniprot uniprotId, protein.up_version uniprotVersion, protein.chr, protein.description tcrdProteinDescription, protein.dtoid dtoId, protein.dtoclass dtoClass, protein.stringid ensemblProteinId FROM target JOIN t2tc ON t2tc.target_id = target.id JOIN protein ON protein.id = t2tc.protein_id ''' dbcon = tcrd_Connect() df = read_sql_query(sql, dbcon) total = df.shape[0] logging.info("Targets: {}".format(total)) NMAX = 10 for i in range(total): #if i>NMAX: break target = df.iloc[i].to_dict() yield target
def generate_table(chart_dropdown, year_slider): max_rows = 12 year_filter = " WHERE date_part('year', agg_date) = " + str(year_slider) # Open a cursor to perform database operations cur = conn.cursor() if chart_dropdown == "select_T": sql = "select agg_date AS date, state, county_name as county, " + \ "avg_value as avg_temperature, sum_mort as mortality_count from combined_mo " + year_filter + \ " order by state, county, date limit 100;" if chart_dropdown == "select_stacoun": sql = "select * from stacoun limit 100;" dataframe = sqlio.read_sql_query(sql, conn) return html.Table([ html.Thead(html.Tr([html.Th(col) for col in dataframe.columns])), html.Tbody([ html.Tr( [html.Td(dataframe.iloc[i][col]) for col in dataframe.columns]) for i in range(min(len(dataframe), max_rows)) ]) ])
def construct_star_schema(self, facts): """Construct star schema DataFrame from configuration file for excel client. :param facts: Facts table name :return: star schema DataFrame """ fusion = self.load_one_table(facts) for fact_key, dimension_and_key in self.cube_config["facts"][ "keys"].items(): if self.cube_config["source"].upper() == "CSV": file = os.path.join(self.cube_path, dimension_and_key.split(".")[0] + ".csv") # with extension or not if not os.path.isfile(file): file.replace(".csv", "") df = pd.read_csv(file, sep=self.sep) else: df = psql.read_sql_query( f"SELECT * FROM {dimension_and_key.split('.')[0]}", self.sqla_engine, ) for dimension in self.cube_config["dimensions"]: if dimension_and_key.split(".")[0] == dimension["name"]: df.rename(columns=dimension["columns"], inplace=True) fusion = fusion.merge( df, left_on=fact_key, right_on=dimension_and_key.split(".")[1], how="left", # remove suffixe from dimension and keep the same column name for facts suffixes=("", "_y"), ) return fusion
def getDataAtDB(select_mun, select_dp, select_crime): conn = db.connect(host='localhost', database='crimes', user='******', password='******', port='5432') if select_dp == "Todos": sql_command = """ SELECT o.datas, SUM(o.ocorrencia) FROM crime_ocorrencia o, crime_localizacao l WHERE o.id = l.id and l.municipio = '{}' and l.tipo = '{}' GROUP BY o.datas ORDER BY o.datas; """.format(select_mun, select_crime) else: sql_command = """ SELECT o.datas, o.ocorrencia FROM crime_ocorrencia o, crime_localizacao l WHERE o.id = l.id and l.municipio = '{}' and l.delegacia = '{}' and l.tipo = '{}' """.format(select_mun, select_dp, select_crime) dat = sqlio.read_sql_query(sql_command, conn) return dat
import psycopg2 as pg import numpy import pandas.io.sql as psql from sklearn.feature_extraction.text import CountVectorizer # get connected to the database connection = pg.connect(database="housing_data_server_development", host="localhost") dataframe = psql.read_sql_query("SELECT id, description FROM listings", connection) count_vectorizer = CountVectorizer() counts = count_vectorizer.fit_transform(dataframe['description'].values) print(dataframe.columns.values)
import pandas as pd import numpy as np from sqlalchemy import create_engine engine = create_engine('postgresql://*****:*****@10.0.0.56:5433/sharesprod') import pandas.io.sql as psql sql = "SELECT isin, TO_TIMESTAMP(min(tradetime)/1000) min,TO_TIMESTAMP(max(tradetime)/1000) max, count(*) FROM trade group by 1;" data = psql.read_sql_query(sql, engine) print data