def down_fin(year, season): is_succ = False t_name = 'zf'+str(year)+str(season) print "down_fin.........", t_name print t_name while is_succ == False: try: pd = ts.get_profit_data(year,season) print type(pd) if str(type(pd)) == '<class \'pandas.core.frame.DataFrame\'>': pd.to_sql(t_name, G_DBengine, if_exists='replace') is_succ = True except ValueError, e: print 'ValueError:', e
def insert_data(df, export): ''' Function to export data to sql, bigquery, csv Parameters ---------- df : TYPE - dataframe dataframe derived from the Bike sharing API export : TYPE - string it should either be bigquery or sql Returns ------- None. Exports data to selected format ''' if export == 'sql': pd.to_sql(df, db_engine) print("\nSQL Data Insertion Time: " + str(datetime.now().strftime('%Y-%m-%d %H:%M:%S'))) elif export == 'bigquery': df.to_gbq(full_table_id, project_id=project_id, if_exists='append') print("\nBigQuery Data Insertion Time: " + str(datetime.now().strftime('%Y-%m-%d %H:%M:%S'))) elif export == 'csv': df.to_csv("citibike_stations_data.csv") print("\nCSV Data Insertion Time: " + str(datetime.now().strftime('%Y-%m-%d %H:%M:%S'))) else: print( "The format you requested is not available, please select one of the 3 below: \nsql \nbigquery \ncsv" ) return
def load_df_into_dwh(film_df, tablename, schema, db_engine): return pd.to_sql(tablename, db_engine, schema=schema, if_exist="replace")
test.reset_index(inplace=True) model_RF = RandomForestRegressor(n_estimators=150, oob_score=True, n_jobs=-1) mod_RF = model_RF.fit(X, train[[1]]) return mod_RF def oord_json(mod_RF, c_ini): bbb = mod_RF.fit_transform(c_ini[[4]]) return bbb def get_postgres_connection(): conn = psycopg2.connect( "dbname={} user={} password={} host={} port = {}".format( PG_DB, PG_USR, PG_PASS, PG_HOST, PG_PORT)) return conn if __name__ == '__main__': con = get_postgres_connection() c_ini = pd.to_sql(name=['table_names'], con=con, index=False) c_ini = califica(c_ini, 10) mod = train_rf(c_ini) orden = oord_json(mod, c_ini)
# save & read files pd.read_csv('file.csv', header=None, nrows=5) pd.read_excel('file.xlsx') from sqlalchemy import creat_engine engine = creat_engine('sqlite:///:memory:') pd.read_sql('SELECT * FROM my_table;', engine) pd.read_sql_table('my_table;', engine) pd.read_sql_query('SELECT * FROM my_table;', engine) xlsx = pd.ExcelFile('file.xls') df = pd.read_excel(xlsx, 'Sheet1') pd.to_csv('file.csv') pd.to_excel('file.xlsx', sheet_name='Sheet1') pd.to_sql('file', engine) # frame feature df.shape df.index df.columns df.info() df.count() df.sum() df.cumsum() df.min() / df.max() df.idmin() / df.idmax() df.describe() df.mean() df.median()
:return:None ''' res=pd.DataFrame(diff_cases,columns=['request_url','api_purpose','city']) res.to_excel(diffCaseFile) def city_case_to_excel(all_cases,city_CaseFile): ''' 导出已经参数化后的swagger_case至excel :return: None ''' df1=pd.DataFrame(all_cases,columns=['request_url','api_purpose','city']) df1.to_excel(city_CaseFile) def case_to_db(handleCaseFile): # 测试好的case追加进testcase表 from sqlalchemy import create_engine engine = create_engine('mysql+mysqlconnector://root:[email protected]/autotest?charset=utf8') df = pd.read_excel(handleCaseFile) df.to_sql('testcase', engine, index=False, if_exists='append') if __name__ =='__main__': from sqlalchemy import create_engine engine = create_engine('mysql+mysqlconnector://root:[email protected]/autotest?charset=utf8') from settings import dbCaseFile df = pd.to_sql('testcase',engine) df.to_sql('testcase', engine, index=False, if_exists='append')
index_col='Date') concat_data = pd.concat([origin_data, new_data], axis=1) concat_data.index = pd.to_datetime(concat_data.index) concat_data.fillna(method='backfill', inplace=True) concat_data.reset_index(inplace=True) concat_data.rename(columns={'index': 'Date'}, inplace=True) concat_data.to_sql(stock[0], engine, if_exists='replace', index=False) print(stock[0] + ' mission complete') else: error_stock.append(stock[0]) print(stock[0] + ' error founded') ''' pd.to_sql()该方法导入的数据在进行不同数据库间传输时会报错invalid default value for 报错原因:导入过程中自动设置默认值为empty string,高版本mysql禁止使用该默认值 解决方法: 1.对默认值为empty string的列删除其默认值 alter table table_name alter column column_name drop default 2.高版本mysql须修改配置文件,未实现 ''' ''' 数据库内存在问题的股票 现已发现的问题: 1.非股票代码 2.退市股票 3.截止至2018-04-27未上市股票 error_stock = ['stock_300361','stock_300646','stock_300728','stock_600002', 'stock_600349','stock_601206','stock_603302','stock_603587','stock_603897',
df[df['Country'] == 'India'] #Setting #Set index a of Series s to 6 s['a'] = 6 #Read and Write to SQL Query or Database Table from sqlalchemy import create_engine engine = create_engine('sqlite:///:memory:') pd.read_sql("SELECT * FROM my_table;", engine) pd.read_sql_table('my_table', engine) pd.read_sql_query("SELECT * FROM my_table;", engine) #read_sql()is a convenience wrapper around read_sql_table() and read_sql_query() pd.to_sql('myDf', engine) #Dropping #Drop values from rows (axis=0) s.drop(['a', 'c']) #Drop values from columns(axis=1) df.drop('Country', axis=1) #Sort & Rank #Sort by labels along an axis df.sort_index() #Sort by the values along an axis df.sort_values(by='Country') #Assign ranks to entries df.rank()