def get_link(url): link_exr = re.compile(r'<a.*?\s*href=\"(.*?)\".*?>(.*?)</a>') links = [] # open web content f = urllib2.urlopen(url) content = f.read() # versi find html tag : find all url and save to links # soup = BeautifulSoup(content, "lxml") # for a in soup.find_all('a', href=True): # if "detik.com" in a['href']: # if "http:" not in a['href']: # a['href'] = "http:" + a['href'] # print "Found the URL:", a['href'] # links.append(a['href']) # versi regex : find all url and save to links for link in link_exr.findall(content): if "detik.com" in link[0]: link_detik = link[0] if "http:" not in link_detik: link_detik = "http:" + link_detik links.append(link_detik) # save to DataFrame df = DataFrame(links, columns=['detik url']) df.drop_duplicates() print df.head(0) # create and save to sqlite database detik_db = create_engine("mysql://*****:*****@localhost/data_detik") df.to_sql('url_detik', detik_db, if_exists='replace')
class WriteSQLDtypes(object): goal_time = 0.2 params = (['sqlalchemy', 'sqlite'], ['float', 'float_with_nan', 'string', 'bool', 'int', 'datetime']) param_names = ['connection', 'dtype'] def setup(self, connection, dtype): N = 10000 con = {'sqlalchemy': create_engine('sqlite:///:memory:'), 'sqlite': sqlite3.connect(':memory:')} self.table_name = 'test_type' self.query_col = 'SELECT {} FROM {}'.format(dtype, self.table_name) self.con = con[connection] self.df = DataFrame({'float': np.random.randn(N), 'float_with_nan': np.random.randn(N), 'string': ['foo'] * N, 'bool': [True] * N, 'int': np.random.randint(0, N, size=N), 'datetime': date_range('2000-01-01', periods=N, freq='s')}, index=tm.makeStringIndex(N)) self.df.loc[1000:3000, 'float_with_nan'] = np.nan self.df['datetime_string'] = self.df['datetime'].astype(str) self.df.to_sql(self.table_name, self.con, if_exists='replace') def time_to_sql_dataframe_column(self, connection, dtype): self.df[[dtype]].to_sql('test1', self.con, if_exists='replace') def time_read_sql_query_select_column(self, connection, dtype): read_sql_query(self.query_col, self.con)
class ReadSQLTable(object): goal_time = 0.2 def setup(self): N = 10000 self.table_name = 'test' self.con = create_engine('sqlite:///:memory:') self.df = DataFrame({'float': np.random.randn(N), 'float_with_nan': np.random.randn(N), 'string': ['foo'] * N, 'bool': [True] * N, 'int': np.random.randint(0, N, size=N), 'datetime': date_range('2000-01-01', periods=N, freq='s')}, index=tm.makeStringIndex(N)) self.df.loc[1000:3000, 'float_with_nan'] = np.nan self.df['datetime_string'] = self.df['datetime'].astype(str) self.df.to_sql(self.table_name, self.con, if_exists='replace') def time_read_sql_table_all(self): read_sql_table(self.table_name, self.con) def time_read_sql_table_parse_dates(self): read_sql_table(self.table_name, self.con, columns=['datetime_string'], parse_dates=['datetime_string'])
def send_to_db(self): conn = sqlite3.connect('data2.sqlite', timeout=30) c = conn.cursor() df = DataFrame(self.__dict__.items(), index=self.__dict__.keys()) df = df.drop(0,1) df = df.transpose() df = df.sort(axis=1) df.to_sql('earnings_calendar', conn, if_exists='append', index=False)
def save_to_database(df: pd.DataFrame, table_name: str): con = mysql.connector.connect( host='ec2-34-245-208-245.eu-west-1.compute.amazonaws.com', database='pregnaware', user='******', password=os.environ['DB_PREGNAWARE_PWD']) df.to_sql(con=con, name=table_name, flavor='mysql', if_exists='append', index=False)
def to_mysql(self): try: df = DataFrame({'user_id':[self.user_id], 'user_name':[self.user_name], 'title':[self.title], 'detail': [self.detail], 'publish_time':[self.publish_time], 'device':[self.device], 'href':[self.href], 'repost_count':[self.repost_count], 'donate_count':[self.donate_count], 'comment_count':[self.comment_count] }, columns=['user_id', 'user_name', 'title', 'detail', 'publish_time', 'repost_count', 'donate_count', 'comment_count', 'device', 'href']) print df try: sql_del = "delete from {table} where user_id='{user_id}' and detail='{detail}' and publish_time='{publish_time}'".format( table = mysql_table_xueqiu_article, user_id = self.user_id, detail = self.detail, publish_time = self.publish_time ) engine.execute(sql_del) except Exception,e: print 'delete error! ', str(e) df.to_sql(mysql_table_xueqiu_article, engine, if_exists='append', index=False) return True
def to_mysql(self): try: df = DataFrame({'user_id':[self.user_id], 'name':[self.name], 'sex': [self.sex], 'area':[self.area], 'stock_count':[self.stock_count], 'talk_count':[self.talk_count], 'fans_count':[self.fans_count], 'big_v_in_fans_count':0, 'follows_count':0, 'capacitys':[self.capacitys], 'summary':[self.summary], 'follow_search_time':'', 'update_time':[self.update_time] }, columns=['user_id', 'name', 'sex', 'area', 'stock_count', 'talk_count', 'fans_count', 'big_v_in_fans_count', 'follows_count', 'capacitys', 'summary', 'follow_search_time', 'update_time']) print df df.to_sql(big_v_table_mysql, engine, if_exists='append', index=False) return True except Exception,e: print e return False
def insert_co_exp_ids(profile_ids, modalities, db_con_1, db_con_2): """ Scan high frequency type modalities and extract mutually existent exp ids. Save it to CSV file. """ print('\twriting co_exp_ids for sensor data') for profile_id in profile_ids: high_interval_mods = filter(lambda x: info.MOD_FREQ_TYPE[x] == info.FREQ_HIGH, modalities) co_exp_ids = [] for mod in high_interval_mods: exp_ids = loader.load_exp_ids(profile_id, mod, filtered=False, server_index=1, db_con=db_con_1, close=False) if len(exp_ids) > 0: co_exp_ids.append(pd.DataFrame([0] * len(exp_ids), index=exp_ids, columns=[mod])) co_exp_ids = pd.concat(co_exp_ids, axis=1) co_exp_ids = co_exp_ids.dropna() co_exp_ids = list(co_exp_ids.index) co_exp_ids.sort() done_ids = loader.load_co_exp_ids(profile_id, db_con=db_con_2, close=False) co_exp_ids = filter(lambda x: x not in done_ids, co_exp_ids) if len(co_exp_ids) == 0: print profile_id, "all co_exp_ids are already inserted!" continue df = DataFrame(co_exp_ids, columns=['expId']) df['profile_id'] = profile_id df.to_sql("co_exp_ids", db_con_2, flavor='mysql', if_exists='append', index=False) print('\t\t%s number of exp ids of user %s are successfully inserted!' % (len(df), profile_id))
def get_publish_articles(self): t1 = time.time() print 'begin query...' #sql = 'select distinct user_id from %s where user_id not in (select distinct user_id from %s)' % (big_v_table_mysql, archive_table_mysql) #df = pd.read_sql_query(sql, engine) #user_ids = df['user_id'].get_values() sql1 = 'select distinct user_id from %s where fans_count > 1000 and fans_count < 10001 ' % (big_v_table_mysql) sql2 = 'select distinct user_id from %s' % archive_table_mysql df1 = pd.read_sql_query(sql1, engine) df2 = pd.read_sql_query(sql2, engine) user_ids1 = df1['user_id'].get_values() user_ids2 = df2['user_id'].get_values() user_ids = [id for id in set(user_ids1).difference(user_ids2)] t2 = time.time() print 'query mysql by join cose:', t2-t1, 's' for user_id in user_ids: try: self.get_publish_articles_by_id(user_id) except Exception, e: se = Series([user_id, GetNowTime(), str(e)], index=['user_id', 'fail_time', 'fail_reason']) df = DataFrame(se).T df.to_sql(unfinish_arcticle_table_mysql, engine, if_exists='append', index=False) print e
def _big_v_in_fans_to_sql(self, followList, id): try: df = DataFrame({'user_id':followList, #被关注者 'fans_id':id #关注者 }, columns=['user_id', 'fans_id']) print df[:10] df.to_sql(fans_in_big_v_table_mysql, engine, if_exists='append', index=False) except Exception,e: print e
def test_mixed_dtype_insert(self): # see GH6509 s1 = Series(2**25 + 1,dtype=np.int32) s2 = Series(0.0,dtype=np.float32) df = DataFrame({'s1': s1, 's2': s2}) # write and read again df.to_sql("test_read_write", self.conn, index=False) df2 = sql.read_table("test_read_write", self.conn) tm.assert_frame_equal(df, df2, check_dtype=False, check_exact=True)
def insert_stat_types(): mysql_con = mdb.connect(info.HOST_2, info.ID, info.PWD, info.DB_NAME_2) df = pd.read_sql("select * from info_statistics where statType = 'basic'", mysql_con, index_col='id') new_dict = {'statistics': [], 'statType': [], 'valueType': []} for i in range(10): for index, st_series in df.iterrows(): new_dict['statistics'].append("cat%sdur%s" % (i, str(st_series['statistics'][0]).capitalize() + st_series['statistics'][1:])) new_dict['statType'].append('valueDuration') new_dict['valueType'].append(st_series['valueType']) new_df = DataFrame(new_dict) new_df.to_sql("info_statistics", mysql_con, flavor='mysql', if_exists='append', index=False)
def df2db(self, df: pd.DataFrame, tab_name): """ Upload a df to db :param df: df to upload :param tab_name: table name :return: None """ self.execute("set hive.execution.engine = tez") self.execute("set tez.queue.name = sephora_internal") self.execute( "drop table if exists {table_name}".format(table_name=tab_name)) df.to_sql(tab_name, self.engine, method='multi', index=False)
def save_data(df: pd.DataFrame, database_filename: str) -> None: """ export the result to a db input: df: a cleaned df database_filename: the file path of db output: None """ engine = create_engine('sqlite:///'+database_filename) df.to_sql('message_table', engine, index=False, if_exists='replace') engine.dispose()
def export_table( self, df: pd.DataFrame, table, schema=None, if_exists="replace", method="multi", show_confirmation=True, ): with PostgresConnection(database_var=self.database_var) as connection: schema = schema or self.schema df = self.get_filtered_export(df, table, schema, connection, if_exists=if_exists) if df.shape[0] == 0: return df = pre_convert_data(df) df_dtype_dict = get_dataframe_dtypes(df) df = convert_dataframe_columns(df, df_dtype_dict) table_already_exists = check_table_exists(table, schema, connection) dtype_param = convert_dtypes( dtype_dict=df_dtype_dict, from_dtype="dataframe_dtype", to_dtype="postgres_dtype", ) # Attempting to overwrite mismatched data results in error if if_exists == "replace": connection.connection.execute( f"drop table if exists {schema}.{table} cascade") if show_confirmation: print(f"Exporting {table} {df.shape} to {schema}", end="") start = time.time() df.to_sql( table, method=method, if_exists=if_exists, dtype=dtype_param, schema=schema, con=connection.connection, index=False, ) end = time.time() elapsed_time = end - start if not table_already_exists: declare_primary_key(df, table, schema, connection) if show_confirmation: print(f" in {elapsed_time} seconds")
def write_mysql(table_name: str, data: pd.DataFrame, dtype: dict = None): """写入MySQl数据库, 表格如果存在, 则新增数据""" try: data.to_sql(name=f'o_{table_name}', con=RemoteMySQLConfig.engine, if_exists='append', index=0, dtype=dtype) LOG.logger_font.debug(f"mysql write table {table_name} succeed!") except Exception as exc: LOG.logger_font.error( f"mysql write table {table_name} failed, error: {exc}.") raise Exception
def cached_table_push(df: pd.DataFrame, tablename: str, **kwargs): """ Save df to local cache file and replace the table in the database. Parameters ---------- df : pd.DataFrame DataFrame to push tablename : str Name of the table in the database """ cache_path = CACHE_PATH + '/' + tablename + '.pkl' df.to_pickle(cache_path) df.to_sql(tablename, DB_CONNECT_STRING, if_exists='replace', method='multi', **kwargs)
def store_table( data_frame: pd.DataFrame, table_name: str, dtype: Optional[Mapping] = None, ): """Store a data frame in the DB. dtype is a dictionary of (column_name, column_type) column type can be: - 'boolean', - 'datetime', - 'double', - 'integer', - 'string' The function will use these to translate into (respectively) - sqlalchemy.Boolean() - sqlalchemy.DateTime() - sqlalchemy.Float() - sqlalchemy.BigInteger() - sqlalchemy.UnicodeText() :param data_frame: The data frame to store :param table_name: The name of the table in the DB :param dtype: dictionary with (column_name, data type) to force the storage of certain data types :return: Nothing. Side effect in the DB """ # Check the length of the column names if any(len(cname) > sql.COLUMN_NAME_SIZE for cname in data_frame.columns): raise Exception( _('Column name is longer than {0} characters').format( sql.COLUMN_NAME_SIZE)) if dtype is None: dtype = {} with cache.lock(table_name): # We ovewrite the content and do not create an index data_frame.to_sql( table_name, OnTaskSharedState.engine, if_exists='replace', index=False, dtype={ key: ontask_to_sqlalchemy[tvalue] for key, tvalue in dtype.items() }, )
def aggregated_prod(self): print('Starting Aggegration......') try: data = self.cnx.execute( "Select count(*),name from prod group by name").fetchall() df = DataFrame(data, columns=['count', 'name']) df.to_sql('aggregated_data', self.cnx, if_exists='replace', index=False) print('Aggegration completed!!') except Exception as e: print(e) print('Aggegration failed!!')
def process_item(self, item, spider): global MYSQL_TableName poly = item['poly'] if item['results']: results = item['results'] rows = [] for result in results: row = [] keys1 = [ 'name', 'province', 'city', 'area', 'address', 'telephone', 'uid', 'street_id', 'detail', 'detail_info', 'location' ] for key in keys1: # d[key] = result.get(key) row.append(result.get(key)) keys2 = ['detail_url', 'tag', 'type'] for key in keys2: detail_info = result.get('detail_info') if detail_info is None: row.append(None) else: row.append(detail_info.get(key)) keys3 = ['search_word', 'region', 'requests_url'] for key in keys3: row.append(item[key]) rows.append([str(x) for x in row]) print('获取到的pois:%s' % row[0]) df = DataFrame(rows, columns=keys1 + keys2 + keys3) # region_pinyin = ''.join(lazy_pinyin(item['region'])) region_pinyin = str(item['region']) # 判断点是否在指定poly区域内,使用到了shapely polygon.contains函数 try: df['isin_region'] = df['location'].apply( lambda x: poly.contains( Point(float(eval(x)['lng']), float(eval(x)['lat'])))) except Exception as e: logging.info(e) df['isin_region'] = 999 if MYSQL_TableName == "": MYSQL_TableName = '{region}_bd_map_pois'.format( region=region_pinyin) else: pass df.to_sql(MYSQL_TableName, engine, if_exists='append', index=False)
def update_table(self, table_name: str, dataframe: pd.DataFrame, schema: str = None, **kwargs): """ This method updates an existing table based on an action For reference https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_sql.html Args: table_name: Name of the table to update dataframe: A pandas dataframe schema: defaults to current schema Kwargs: **if_exists**: {‘fail’, ‘replace’, ‘append’}, default ‘fail’ How to behave if the table already exists. fail: Raise a ValueError. replace: Drop the table before inserting new values. append: Insert new values to the existing table. **index**: bool, default True Write DataFrame index as a column. Uses index_label as the column name in the table. **index_label**: str or sequence, default None Column label for index column(s). If None is given (default) and index is True, then the index names are used.A sequence should be given if the DataFrame uses MultiIndex. **chunksize**: int, optional Specify the number of rows in each batch to be written at a time. By default, all rows will be written at once. **dtype**: dict or scalar, optional Specifying the datatype for columns. If a dictionary is used, the keys should be the column names and the values should be the SQLAlchemy types or strings for the sqlite3 legacy mode. If a scalar is provided, it will be applied to all columns. **method**: {None, ‘multi’, callable}, optional Controls the SQL insertion clause used: None : Uses standard SQL INSERT clause (one per row). ‘multi’: Pass multiple values in a single INSERT clause. callable with signature (pd_table, conn, keys, data_iter). Details and a sample callable implementation can be found in the section insert method of pandas documentation. Returns: None Raises: ValueError if if_exists is 'fail' which is default """ dataframe.to_sql(table_name, self.engine, schema=schema, **kwargs)
def table_write(table_name: str, df: pd.DataFrame): # Rename cols to snake_case df.columns = (pd.Series( df.columns).map(lambda col: re.sub("([A-Z])", lambda x: "_" + x.group( 1).lower(), col).lstrip("_")).tolist()) engine = create_engine("postgresql://{}:{}@{}:5432/{}".format( os.environ["POSTGRES_USER"], os.environ["POSTGRES_PASSWORD"], os.environ["POSTGRES_HOST"], os.environ["PROJECT_NAME"], )) df.to_sql(name=table_name, con=engine, if_exists="replace", index=True)
def dfToTable( df: pd.DataFrame, table: str, db: str, ifExists: str = "replace", indexCols: list[str] | None = None, ) -> None: """Saves dataframe as table in sqlite3 database Args: df (pd.DataFrame) -- data to save table (str) -- table name db (str) -- database name (ending in .db) ifExists (str) -- pass-thru for pandas arg. "replace" (default), "append", "fail" indexCols (list of str) -- cols to be used as index. Defaults to None (no index). Returns nothing. """ # Handle dtypes df = df.convert_dtypes() assert ifExists in ["replace", "append", "fail"], f"Invalid ifExists: {ifExists}" # Handle index var if indexCols is not None: index_label = indexCols df.set_index(indexCols, drop=True, inplace=True, verify_integrity=True) index = True else: index_label = None index = False # Load table with sq.connect(db) as con: df.to_sql( name=table, con=con, if_exists=ifExists, method="multi", index=index, index_label=index_label, chunksize=1000, )
def to_mysql(self): try: df = DataFrame( { "user_id": [self.user_id], "user_name": [self.user_name], "title": [self.title], "detail": [self.detail], "publish_time": [self.publish_time], "href": [self.href], "watch_count": [self.watch_count], "repost_count": [self.repost_count], "donate_count": [self.donate_count], "comment_count": [self.comment_count], #'is_top':[self.is_top], #'is_repost':[self.is_repost], #'repost_reason':[self.repost_reason "device": [self.device], # ] }, columns=[ "user_id", "user_name", "title", "detail", "publish_time", "href", "watch_count", "repost_count", "donate_count", "comment_count", "device", ], ) print df try: sql_del = "delete from {table} where user_id='{user_id}' and title='{title}' and publish_time='{publish_time}'".format( table=mysql_table_licaishi_viewpoint, user_id=self.user_id, title=self.title, publish_time=self.publish_time, ) engine.execute(sql_del) except Exception, e: print "delete error! ", str(e) df.to_sql(mysql_table_licaishi_viewpoint, engine, if_exists="append", index=False) return True
def insert_cols(table_name: str, df: pd.DataFrame): # Rename cols to snake_case df.columns = (pd.Series( df.columns).map(lambda col: re.sub("([A-Z])", lambda x: "_" + x.group( 1).lower(), col).lstrip("_")).tolist()) engine = create_engine("postgresql://{}:{}@{}:5432/{}".format( os.environ["POSTGRES_USER"], os.environ["POSTGRES_PASSWORD"], os.environ["POSTGRES_HOST"], os.environ["PROJECT_NAME"], )) df.to_sql(name=table_name + "_tmp", con=engine, if_exists="replace", index=True) dtype_mapper = { np.dtype("object"): "TEXT", np.dtype("int32"): "BIGINT", np.dtype("int64"): "BIGINT", np.dtype("uint8"): "BIGINT", np.dtype("float64"): "DOUBLE PRECISION", } queries = [ """ ALTER TABLE {0} DROP COLUMN IF EXISTS {1}; ALTER TABLE {0} ADD COLUMN IF NOT EXISTS {1} {2}; UPDATE {0} t1 SET {1} = t2.{1} FROM {0}_tmp t2 WHERE t1.index = t2.index """.format(table_name, col_name, dtype) for col_name, dtype in df.dtypes.map(dtype_mapper).iteritems() ] with psycopg2.connect( user=os.environ["POSTGRES_USER"], password=os.environ["POSTGRES_PASSWORD"], host=os.environ["POSTGRES_HOST"], port=5432, database=os.environ["PROJECT_NAME"], ) as conn, conn.cursor() as cur: for query in queries: cur.execute(query) conn.commit() cur.execute("DROP TABLE {}_tmp;".format(table_name)) conn.commit()
def save_data(df: pd.DataFrame, database_filename: str) -> None: ''' Saves dataframe to sqlite database so results contain. Parameters ---------- df : pd.DataFrame records to save database_filename : str database file name ''' with sqlite3.connect(database_filename) as conn: df.to_sql("disturbing_tweets", conn, if_exists='replace') conn.commit()
def writeToDB(self, ticker: str, df: pd.DataFrame): ### 'date','open','high','low','close' to 'id', 'time','open','high','low','close' conn = self.engine.connect() ticker_id = self._getTickerId(ticker) df.rename(columns={'date': 'time'}, inplace=True) df['id'] = ticker_id df.to_sql(DBAccessor.DATA_TABLE, index=False, con=conn, if_exists='append')
def save_data(df: list, table: str): engine = get_engine(password='', host='localhost', port=5432, database=constants.DATABASE) dfl = DataFrame(df) dfl.to_sql(table, con=engine, index=False, if_exists='append', chunksize=100) return True
def save_to_sql(self, df: pd.DataFrame) -> None: """Dump DataFrame to the (coinmarketcap table) coinmarketcap.db""" database_name = config.database_name.split('.')[0] try: with self.connection as conn: logger.info('Successfully connected to SQLite') self.cursor.execute(f'DROP TABLE IF EXISTS {database_name};') df.to_sql(database_name, conn) except sqlite3.Error as error: logger.error('Error with connection to sqlite', error) finally: if self.connection: self.connection.close() logger.info('The SQLite connection is closed')
def __insert(self, df: pd.DataFrame, table_name: str, dtype: dict, chunksize: int = 1000, mode: str = 'replace') -> None: df.to_sql(name=table_name, con=self.__engine, index=False, schema=self.SCHEMA, if_exists=mode, dtype=dtype, chunksize=chunksize, method='multi')
def upload_portfolio_valuation(df: pd.DataFrame): conn = sqlite3.connect(db) c = conn.cursor() checker = check_if_table_exists('PortfolioValuation') if not checker: df.to_sql('PortfolioValuation', conn) elif checker: c.execute('''drop table main.PortfolioValuation''') df.to_sql('PortfolioValuation', conn) conn.commit() conn.close()
def write_table(table: str, df: pd.DataFrame, mode='replace'): if len(df) > 0: engine = MysqlDAO.create_engine() df.to_sql(table, con=engine, if_exists=mode, index=False, chunksize=1000) with engine.begin() as conn: conn.execute(f'ALTER TABLE {table} ENGINE = MYISAM') engine.dispose()
def write_mysql( table_name: str, data: pd.DataFrame, ): """写入MySQl数据库, 表格如果存在, 则新增数据""" try: data.to_sql(name=f'o_{table_name}', con=RemoteMySQLConfig.engine, if_exists='append', index=0) logging.info(f"mysql write table {table_name} succeed!") except Exception as exc: logging.error(f"mysql write table {table_name} failed, error: {exc}.") raise Exception
def sqlselect(table: pd.DataFrame, sql): if len(table.columns) == 0: return (pd.DataFrame(), []) with sqlite3.connect(":memory:", detect_types=sqlite3.PARSE_DECLTYPES) as conn: table.to_sql("input", conn, index=False) with _deleting_cursor(conn.cursor()) as c: try: c.execute(sql) except sqlite3.DatabaseError as err: return None, _database_error_to_messages(err) except sqlite3.Warning as err: return None, _database_warning_to_messages(err) if c.description is None: return ( None, [ i18n.trans( "badValue.sql.commentedQuery", "Your query did nothing. Did you accidentally comment it out?", ) ], ) colnames = [d[0] for d in c.description] dupdetect = set() for colname in colnames: if colname in dupdetect: return ( None, [ i18n.trans( "badValue.sql.duplicateColumnName", 'Your query would produce two columns named {colname}. Please delete one or alias it with "AS".', {"colname": colname}, ) ], ) dupdetect.add(colname) # Memory-inefficient: creates a Python object per value data = c.fetchall( ) # TODO benchmark c.arraysize=1000, =100000, etc. return pd.DataFrame.from_records(data, columns=colnames), []
def set_events(events_df: pd.DataFrame, table_name: str): events_df = events_df.drop(['chip_plays', 'top_element_info'], axis=1) try: events_df.to_sql(name=table_name, con=database.cnx, if_exists='replace', index=True) except ValueError as vx: print(vx) except Exception as ex: print(ex) else: print(table_name + " table created successfully.")
def fetch_ohlcv(since): ohlcv = bitmex.fetch_ohlcv(symbol, timeframe='1m', since=since, limit=LIMIT, params={'partial': False}) df = DataFrame(ohlcv, columns=cols) df['date'] = to_datetime(df['date'], unit='ms', utc=True, infer_datetime_format=True) print(df) df.to_sql('ticks', con, if_exists='append', index=None) print("fetch done")
def save_data(df: pd.DataFrame, database_filename: str) -> None: """ Save the dataframe to a Sql-lite Database Parameters df: The pandas.Dataframe to be written database_filename: The filename path for the database Returns None """ engine = create_engine( 'sqlite:///' + database_filename ) # https://docs.sqlalchemy.org/en/13/dialects/sqlite.html df.to_sql( 'DisasterMessages', engine, if_exists="replace", index=False ) # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_sql.html
def writePandastoTable(self, df: pd.DataFrame, schema, tablename): """ Writes the Pandas Dataframe to DB Table. Data write operation is only append. If table doesn't exists in the DB, it creates new table and load the data :param df: Source Data :param tablename: Target DB Table :param schema: Target DB Schema :return: """ logging.info("Storing Dataframe data to " + schema + "." + tablename) print("Storing Dataframe data to " + schema + "." + tablename) df.to_sql(name=tablename, schema=schema, con=self._dbEngine, if_exists="append", index=False)
def get_one_stock_data_toDb_byEngine(self, stockId): ''' 存储股票数据到数据库,通过pandas的特殊方法 ''' try: df = ts.get_k_data(stockId) #SQLALCHEMY_DATABASE_URI = 'mysql+pymysql://.....' engine = create_engine( 'mysql+pymysql://root:Root@[email protected]/stocksdb?charset=utf8' ) # 存入数据库 df.to_sql(stockId + '_KDay', engine, if_exists='replace') except: logging.error('one stock toDb:\n%s' % traceback.format_exc()) pass
def write_df(table: DeclarativeMeta, df: pd.DataFrame) -> None: """ Writes the |df| to the |table|. The column headers on |df| must match the column names in |table|. All rows in |df| will be appended to |table|. If a row in |df| already exists in |table|, then that row will be skipped. """ try: df.to_sql(table.__tablename__, SQLAlchemyEngineManager.get_engine_for_schema_base(JailsBase), if_exists='append', index=False) except IntegrityError: _write_df_only_successful_rows(table, df)
def create_mock_bq_table( self, dataset_id: str, table_id: str, mock_schema: MockTableSchema, mock_data: pd.DataFrame, ) -> None: postgres_table_name = self.register_bq_address( address=BigQueryAddress(dataset_id=dataset_id, table_id=table_id)) mock_data.to_sql( name=postgres_table_name, con=self.postgres_engine, dtype=mock_schema.data_types, index=False, )
def probe(cls, conn: sqlite3.Connection, checkList: pd.DataFrame = None, **keyargs): ''' 探针函数,对参数中给出的主键在数据库中查询该条数据是否存在,被renew用于确定是否存在需要更新的数据。 使用方式: (1)以输入参数为查询条件查找对应的内容字段是否在数据库中存在,返回bool值 如probe(conn,ts_code='000001.SZ',trade_date='20191010')->True (2)输入DataFrame,以其中的每一行为查询条件判断数据库中是否存在对应内容字段,将无对应内容字段的行以新DataFrame返回 (目前若该行的条件对应多条数据,只要一条数据为null则判定该条件对应数据存在缺失.但某些api) todo:增加选项,若该行的条件对应多条数据,需要这些数据全部为null才返回该行为数据缺失). ''' tablename = list(cls.mapper.keys())[0] reverse_mapper = { value: key for key, value in cls.mapper[tablename].items() } if (checkList is None): sql = "select 1 from {targetTable} where {clause} limit 1" clause = " " for field in keyargs.keys(): db_field = reverse_mapper[field] clause += "{db_field}='{param}' and ".format( db_field=db_field, param=keyargs[field]) a1 = time.time() clause += "{nonkf} notnull".format(nonkf=cls.GET_NotKeyField()) probesql = sql.format(targetTable=tablename, clause=clause) rst = conn.execute(probesql) return True if rst.fetchall().__len__() > 0 else False else: if (type(checkList) is not pd.DataFrame): raise ("checkList必须为pandas.DataFrame!") checkList.to_sql(name='temp_table', con=conn, if_exists='replace', index=False) joinCondition = '' sql = 'select a.* from temp_table a left join {tablename} b on {joinCondition} where b.{contentField} isnull' for col in list(checkList.columns): joinCondition += 'a.{df_col} = b.{db_col} and'.format( df_col=col, db_col=reverse_mapper[col]) joinCondition = joinCondition[0:-4] probesql = sql.format(tablename=tablename, joinCondition=joinCondition, contentField=cls.GET_NotKeyField()) rst = pd.read_sql_query(probesql, conn) return rst
def load_data( self, table_name: str, data: pd.DataFrame, database: str | None = None, if_exists: Literal['fail', 'replace', 'append'] = 'fail', ) -> None: """Load data from a dataframe to the backend. Parameters ---------- table_name Name of the table in which to load data data Pandas DataFrame database Database in which the table exists if_exists What to do when data in `name` already exists Raises ------ NotImplementedError Loading data to a table from a different database is not yet implemented """ if database == self.current_database: # avoid fully qualified name database = None if database is not None: raise NotImplementedError( 'Loading data to a table from a different database is not ' 'yet implemented') params = {} if self.has_attachment: # for database with attachment # see: https://github.com/ibis-project/ibis/issues/1930 params['schema'] = self.current_database data.to_sql( table_name, con=self.con, index=False, if_exists=if_exists, **params, )
def _downloadFqFactor(codes): factorDF = DataFrame() for code in codes: logging.info("Downloading %s fq factor."%code) df = ts.stock.trading._parase_fq_factor(code,'','') df.insert(0,"code",code,True) df = df.drop_duplicates('date').set_index('date') factorDF = pd.concat([factorDF, df]) if conf.DEBUG: break logging.info("Deleting fq factor.") utils.executeSQL("delete from t_daily_fqFactor") logging.info("Saving fq factor.") factorDF.to_sql(name='t_daily_fqFactor',con=utils.getEngine(), if_exists="append",chunksize=20000) logging.info("Saved fq factor.")
def test_aliased_views_with_computation(): engine = sa.create_engine('sqlite:///:memory:') df_aaa = DataFrame({'x': [1, 2, 3, 2, 3], 'y': [2, 1, 2, 3, 1], 'z': [3, 3, 3, 1, 2]}) df_bbb = DataFrame({'w': [1, 2, 3, 2, 3], 'x': [2, 1, 2, 3, 1], 'y': [3, 3, 3, 1, 2]}) df_aaa.to_sql('aaa', engine) df_bbb.to_sql('bbb', engine) metadata = sa.MetaData(engine) metadata.reflect() sql_aaa = metadata.tables['aaa'] sql_bbb = metadata.tables['bbb'] L = Symbol('aaa', discover(df_aaa)) R = Symbol('bbb', discover(df_bbb)) expr = join(by(L.x, y_total=L.y.sum()), R) a = compute(expr, {L: df_aaa, R: df_bbb}) b = compute(expr, {L: sql_aaa, R: sql_bbb}) assert into(set, a) == into(set, b) expr2 = by(expr.w, count=expr.x.count(), total2=expr.y_total.sum()) a = compute(expr2, {L: df_aaa, R: df_bbb}) b = compute(expr2, {L: sql_aaa, R: sql_bbb}) assert into(set, a) == into(set, b) expr3 = by(expr.x, count=expr.y_total.count()) a = compute(expr3, {L: df_aaa, R: df_bbb}) b = compute(expr3, {L: sql_aaa, R: sql_bbb}) assert into(set, a) == into(set, b) expr4 = join(expr2, R) a = compute(expr4, {L: df_aaa, R: df_bbb}) b = compute(expr4, {L: sql_aaa, R: sql_bbb}) assert into(set, a) == into(set, b) """ # Takes a while
def insert_data_types(): mysql_con = mdb.connect(info.HOST_2, info.ID, info.PWD, info.DB_NAME_2) df = {'modality': [], 'field': [], 'variableType': [], 'permissionFree': [], 'category': [], 'collectionFrequency': [], 'collectionDuration': [], 'sensitiveSensor': []} for mod in info.FREE_MODE_LIST: fields = info.MOD_FIELD_LIST[mod] for f in fields: df['modality'] += [mod] df['field'] += [f] df['variableType'] += [info.MOD_FIELD_TYPE[mod][f].split("_type")[0]] df['permissionFree'] += ['yes'] if f in info.PERMISSION_FREE[mod] else ['no'] df['category'] += [info.MOD_CATEGORY[mod].split("Probe")[0]] df['collectionFrequency'] += [info.MOD_FREQ_TYPE[mod].split("_frequency")[0]] df['collectionDuration'] += [info.MOD_COL_TYPE[mod].split("_type")[0]] df['sensitiveSensor'] += ['yes'] if mod in info.SENSITIVE_MOD else ['no'] df = DataFrame(df, columns=['modality', 'field', 'variableType', 'permissionFree', 'category', 'collectionFrequency', 'collectionDuration', 'sensitiveSensor']) df.to_sql("dataTypes", mysql_con, flavor='mysql', if_exists='append', index=False)
def insert_from_cp(con): instCpStockCode = win32com.client.Dispatch("CpUtil.CpStockCode") instStockMst = win32com.client.Dispatch("dscbo1.StockMst") instCpCodeMgr = win32com.client.Dispatch("CpUtil.CpCodeMgr") code_data = {'CODE':[], 'NAME':[], 'TYPE':[], 'PER':[], 'BPS':[], #PRICE / BPS 'INDUSTRY_CODE':[], 'INDUSTRY':[]} total = instCpStockCode.GetCount() for i in range(0, total): code = instCpStockCode.GetData(CPSTOCKCODE_CODE, i) name = instCpStockCode.GetData(CPSTOCKCODE_NAME, i) code_data['CODE'].append(code) code_data['NAME'].append(name) instStockMst.SetInputValue(0, code) instStockMst.BlockRequest() type = instStockMst.GetHeaderValue(CPSTOCKMST_CATEGORY) per = instStockMst.GetHeaderValue(CPSTOCKMST_PER) bps = instStockMst.GetHeaderValue(CPSTOCKMST_BPS) industry_code = instStockMst.GetHeaderValue(CPSTOCKMST_INDUSTRY_CODE) industry_name = instCpCodeMgr.GetIndustryName(industry_code) code_data['TYPE'].append(type) code_data['PER'].append(per) code_data['BPS'].append(bps) code_data['INDUSTRY_CODE'].append(industry_code) code_data['INDUSTRY'].append(industry_name) get_logger().debug("{}/{} {} {} {} {} {}".format(i, total,code, name, type, per, bps)) data = DataFrame(code_data) data.to_sql("CODE", con, if_exists='replace', chunksize=1000) get_logger().debug("{} 주식 종목 코드를 저장 하였습니다.".format(len(data)))
def _write(self, tablename, expected_dtypes, frame): if frame is None or frame.empty: # keeping the dtypes correct for empty frames is not easy frame = DataFrame( np.array([], dtype=list(expected_dtypes.items())), ) else: if tablename == 'shares': pass else: if frozenset(frame.columns) != frozenset(expected_dtypes): raise ValueError( "Unexpected frame columns:\n" "Expected Columns: %s\n" "Received Columns: %s" % ( set(expected_dtypes), frame.columns.tolist(), ) ) actual_dtypes = frame.dtypes for colname, expected in iteritems(expected_dtypes): actual = actual_dtypes[colname] if not issubdtype(actual, expected): raise TypeError( "Expected data of type {expected} for column" " '{colname}', but got '{actual}'.".format( expected=expected, colname=colname, actual=actual, ), ) frame.to_sql( tablename, self.conn, if_exists='append', chunksize=50000, )
class SQLDFTest(unittest.TestCase): def setUp(self): self.default_df = DataFrame( [["l1", 1, 2], ["l2", 3, 4], ["l3", 4, 5]], columns=["label", "c1", "c2"]) self.default_env = {"a": 1, "df": self.default_df} self.default_udfs = {"udf1": lambda x: x} class udaf1(object): def __init__(self): self.count = 0 def step(self, x): self.count += 1 def finalize(self): return self.count self.default_udafs = {"udaf1": udaf1} def tearDown(self): pass def test_constructor_with_default(self): sqldf = SQLDF(self.default_env) self.assertEqual(isinstance(sqldf, SQLDF), True) self.assertEqual(sqldf.env, self.default_env) self.assertEqual(sqldf.inmemory, True) self.assertEqual(sqldf._dbname, ":memory:") self.assertEqual(sqldf.udfs, {}) self.assertEqual(sqldf.udafs, {}) self.assertEqual(isinstance(sqldf.conn, sqlite3.Connection), True) def test_constructor_with_assign(self): sqldf = SQLDF( self.default_env, inmemory=False, udfs=self.default_udfs, udafs=self.default_udafs) self.assertEqual(isinstance(sqldf, SQLDF), True) self.assertEqual(sqldf.env, self.default_env) self.assertEqual(sqldf.inmemory, False) self.assertEqual(sqldf._dbname, ".pysqldf.db") self.assertEqual(sqldf.udfs, self.default_udfs) self.assertEqual(sqldf.udafs, self.default_udafs) self.assertEqual(isinstance(sqldf.conn, sqlite3.Connection), True) def test_destructor_with_inmemory_db(self): sqldf = SQLDF(self.default_env) conn = sqldf.conn self.assertRaises( sqlite3.OperationalError, lambda: conn.execute("select * from tbl;")) sqldf = None # destruct self.assertRaises( sqlite3.ProgrammingError, lambda: conn.execute("select * from tbl;")) def test_destructor_with_fs_db(self): sqldf = SQLDF(self.default_env, inmemory=False) conn = sqldf.conn self.assertRaises( sqlite3.OperationalError, lambda: conn.execute("select * from tbl;")) self.assertEqual(os.path.exists(".pysqldf.db"), True) sqldf = None # destruct self.assertRaises( sqlite3.ProgrammingError, lambda: conn.execute("select * from tbl;")) self.assertEqual(os.path.exists(".pysqldf.db"), False) def test_execute_method(self): sqldf = SQLDF(self.default_env) query = "select * from df;" result = sqldf.execute(query) assert_frame_equal(result, self.default_df) # table deleted self.assertRaises( sqlite3.OperationalError, lambda: sqldf.conn.execute(query)) def test_execute_method_returning_none(self): births = load_births() result = SQLDF(locals()).execute( "select a from births limit 10;") # col a not exists self.assertEqual(result, None) def test_execute_method_with_table_not_found(self): sqldf = SQLDF(self.default_env) self.assertRaises( Exception, lambda: sqldf.execute("select * from notable")) # table deleted self.assertRaises(sqlite3.OperationalError, lambda: sqldf.conn.execute("select * from df;")) def test_execute_method_with_query_error(self): sqldf = SQLDF(self.default_env) self.assertEqual(sqldf.execute("select a from df uuuuuu;"), None) # table deleted self.assertRaises(sqlite3.OperationalError, lambda: sqldf.conn.execute("select * from df;")) def test_extract_table_names_method(self): sqldf = SQLDF(self.default_env) tablenames = { "select * from df;": ["df"], "select * from df": ["df"], "select * from _": ["_"], "select * from 11;": [], "select * from 1ab;": [], "select * from a-b;": [], "select * from a.b;": [], "select a;": [], "select * from (select * from subq_df) f;": ["subq_df"], "select * from df d1 inner join df2 d2 on d1.id = d2.id;": ["df", "df2"], "select a, b c from df where c in (select foo from df2 inner join df3 on df2.id = df3.id);": ["df", "df2", "df3"], "select * from df where a in (select a from (select c from df2 where c in (select a from df3 inner join df4 on df3.id = df4.id)));": ["df", "df2", "df3", "df4"] } for query, tablename in tablenames.items(): self.assertEqual( set(sqldf._extract_table_names(query)), set(tablename)) def test_ensure_data_frame_method_nested_list(self): data = [[1, 2, 3], [4, 5, 6]] result = SQLDF(locals())._ensure_data_frame(data, "df") self.assertEqual(len(result), 2) self.assertEqual(list(result.columns), ["c0", "c1", "c2"]) self.assertEqual(list(result.index), [0, 1]) def test_ensure_data_frame_method_list_of_tuple(self): data = [(1, 2, 3), (4, 5, 6)] result = SQLDF(locals())._ensure_data_frame(data, "df") self.assertEqual(len(result), 2) self.assertEqual(list(result.columns), ["c0", "c1", "c2"]) self.assertEqual(list(result.index), [0, 1]) def test_ensure_data_frame_method_nested_tuple(self): data = ((1, 2, 3), (4, 5, 6)) sqldf = SQLDF(locals()) self.assertRaises( Exception, lambda: sqldf._ensure_data_frame( data, "df")) def test_ensure_data_frame_method_tuple_of_list(self): data = ([1, 2, 3], [4, 5, 6]) sqldf = SQLDF(locals()) self.assertRaises( Exception, lambda: sqldf._ensure_data_frame( data, "df")) def test_ensure_data_frame_method_list_of_dict(self): data = [{"a": 1, "b": 2, "c": 3}, {"a": 4, "b": 5, "c": 6}] result = SQLDF(locals())._ensure_data_frame(data, "df") self.assertEqual(len(result), 2) self.assertEqual(list(result.columns), ["a", "b", "c"]) self.assertEqual(list(result.index), [0, 1]) def test_write_table_method(self): df = DataFrame([[1, 2], [3, 4]], columns=["col1", "col2"]) sqldf = SQLDF(locals()) sqldf._write_table("tbl", df) # table created cursor = sqldf.conn.cursor() sq_type, name, tbl_name, rootpage, sql = list( cursor.execute("select * from sqlite_master where type='table';"))[0] self.assertEqual(name, "tbl") def test_write_table_method_col_with_left_bracket(self): df = DataFrame([[1]], columns=["col("]) sqldf = SQLDF(locals()) self.assertRaises(Exception, lambda: sqldf._write_table("tbl", df)) def test_write_table_method_col_with_right_bracket(self): df = DataFrame([[1]], columns=["co)l"]) sqldf = SQLDF(locals()) self.assertRaises(Exception, lambda: sqldf._write_table("tbl", df)) def test_write_table_method_garbage_table(self): df = [[1, 2], [3, [4]]] sqldf = SQLDF(locals()) self.assertRaises(Exception, lambda: sqldf._write_table("tbl", df)) # table destroyed cursor = sqldf.conn.cursor() tablemaster = list(cursor.execute("select * from sqlite_master where type='table';")) self.assertEqual(tablemaster, []) def test_del_table_method(self): sqldf = SQLDF(locals()) cursor = sqldf.conn.cursor() # create table cursor.execute("create table deltbl(col);") sqldf._del_table(["deltbl"]) self.assertEqual( list( cursor.execute("select * from sqlite_master where type='table';")), []) def test_del_table_method_not_exist_table(self): sqldf = SQLDF(locals()) self.assertRaises( sqlite3.OperationalError, lambda: sqldf._del_table( ["deltblaaaaaaa"])) def test_set_udf_method(self): sqldf = SQLDF(locals()) conn = sqldf.conn self.default_df.to_sql("df", conn) sqldf._set_udf(self.default_udfs) self.assertEqual( list( conn.execute("select udf1(label) from df;")), [ ("l1",), ("l2",), ("l3",)]) def test_set_udaf_method_with_agg_class(self): sqldf = SQLDF(locals()) conn = sqldf.conn self.default_df.to_sql("df", conn) sqldf._set_udaf(self.default_udafs) self.assertEqual( list( conn.execute("select udaf1(label) from df;")), [ (3,)]) def test_set_udaf_method_with_agg_function(self): sqldf = SQLDF(locals()) conn = sqldf.conn self.default_df.to_sql("df", conn) def agg_func(values): return len(values) sqldf._set_udaf({"mycount": agg_func}) self.assertEqual( list( conn.execute("select mycount(label) from df;")), [ (3,)]) def test_udf(self): data = [{"a": 1, "b": 2, "c": 3}, {"a": 4, "b": 5, "c": 6}] def ten(x): return 10 result = SQLDF(locals(), udfs={"ten": ten}).execute( "SELECT ten(a) AS ten FROM data;") self.assertEqual(len(result), 2) self.assertEqual(list(result.columns), ["ten"]) self.assertEqual(list(result.index), [0, 1]) self.assertEqual(list(result["ten"]), [10, 10]) def test_udaf(self): data = [{"a": 1, "b": 2, "c": 3}, {"a": 4, "b": 5, "c": 6}] class mycount(object): def __init__(self): super(mycount, self).__init__() self.count = 0 def step(self, x): self.count += x def finalize(self): return self.count result = SQLDF(locals(), udafs={"mycount": mycount}).execute( "select mycount(a) as mycount from data;") self.assertEqual(len(result), 1) self.assertEqual(list(result.columns), ["mycount"]) self.assertEqual(list(result.index), [0]) self.assertEqual(list(result["mycount"]), [1 + 4]) def test_no_table(self): self.assertRaises( Exception, lambda: SQLDF( locals()).execute("select * from notable;")) def test_invalid_colname(self): data = [{"a": "valid", "(b)": "invalid"}] sqldf = SQLDF(locals()) self.assertRaises( Exception, lambda: sqldf.execute("select * from data;")) def test_db_in_fs(self): data = [{"a": 1, "b": 2, "c": 3}, {"a": 4, "b": 5, "c": 6}] sqldf = SQLDF(locals(), inmemory=False) self.assertEqual(os.path.exists(".pysqldf.db"), True) sqldf = None # run GC self.assertEqual(os.path.exists(".pysqldf.db"), False)
# In[163]: df # In[151]: conn = lite.connect('UN_education.db') cur = conn.cursor() # In[164]: cur.execute('DROP TABLE IF EXISTS school_years') df.to_sql('school_years', conn) # In[156]: cur.execute('SELECT * FROM school_years') for r in cur: print r # In[169]: print df.describe()
# coding:UTF-8
import dota2api from pandas import DataFrame, Series import pandas as pd import numpy as np import sqlalchemy from settings import API_KEY import settings ACCOUNT_ID = 172282397 mysql_engine = sqlalchemy.create_engine("postgresql://{}@localhost/{}".format(settings.PSQL_USER,settings.PSQL_DB)) #mysql_engine = sqlalchemy.create_engine('mysql+pymysql://root@localhost/bp_helper_db?charset=utf8') connection = mysql_engine.connect() api = dota2api.Initialise(API_KEY) #hero_info heroes = api.get_heroes(language='zh')["heroes"] heroes_df = DataFrame(heroes) heroes_df.to_sql('dota2_hero',mysql_engine,if_exists='replace') #item_info items = api.get_game_items(language='zh')["items"] items_df = DataFrame(items) items_df.to_sql('dota2_item',mysql_engine,if_exists='replace')
def run(): with sqlite3.connect("analyze.db") as con: cursor = con.cursor() code_magc = {'CODE':[], 'NAME':[]} code_bband= {'CODE':[], 'NAME':[]} code_macd = {'CODE':[], 'NAME':[]} code_arrange = {'CODE':[], 'NAME':[]} per_bps_pbr = {'CODE':[], 'NAME':[], 'OPEN':[], 'HIGH':[], 'LOW':[], 'CLOSE':[], 'VOLUME':[], 'PER':[], 'BPS':[], 'PBR':[], 'INDUSTRY_CODE':[], 'INDUSTRY':[]} for code, name in get_code_list(): df = makeDataFrame( code ) if len(df) == 0: continue # per,bps, pbr table per , bps = get_per_bps_with_code(code) open, high, low, close, volume = get_last_data_with_code(code) if bps != 0: pbr = close / bps else: pbr = 0.0 industry_code, industry_name = get_industry_with_code(code) per_bps_pbr['CODE'].append(code) per_bps_pbr['NAME'].append(name) per_bps_pbr['OPEN'].append(open) per_bps_pbr['HIGH'].append(high) per_bps_pbr['LOW'].append(low) per_bps_pbr['CLOSE'].append(close) per_bps_pbr['VOLUME'].append(volume) per_bps_pbr['PER'].append(per) per_bps_pbr['BPS'].append(bps) per_bps_pbr['PBR'].append(pbr) per_bps_pbr['INDUSTRY_CODE'].append(industry_code) per_bps_pbr['INDUSTRY'].append(industry_name) get_logger().debug("{} {} {} {} {} {} {} {} {} {}".format(code,name,open,high,low, close,volume,per,bps,pbr)) arranged = isArrange( df, 60, 120 ) if arranged == True and pbr <= 1: code_arrange['CODE'].append(code) code_arrange['NAME'].append(name) get_logger().debug("ARRANGE {}{}".format(code, name)) res = isBBandSignal( df, 20 ) if res == True and pbr <= 1: code_bband['CODE'].append(code) code_bband['NAME'].append(name) get_logger().debug("BBnad lower after up {}{}".format(code,name)) res = isMAGoldCross( df, 20, 60 ) if res == True and arranged == True and pbr <= 1: code_magc['CODE'].append(code) code_magc['NAME'].append(name) get_logger().debug("MA20, MA60 Golden Cross {}{}".format(code,name)) res = isMACDSignal( df, 12, 26, 9) if res == True and arranged == True and pbr <= 1: code_macd['CODE'].append(code) code_macd['NAME'].append(name) get_logger().debug("MACD sig {}{}".format(code,name)) magc = DataFrame(code_magc) bband = DataFrame(code_bband) macd = DataFrame(code_macd) magc.to_sql("MAGC", con, if_exists='replace', chunksize=1000) get_logger().debug("MAGC {} saved.".format(len(magc))) bband.to_sql("BBAND", con, if_exists='replace', chunksize=1000) get_logger().debug("BBAND {} saved.".format(len(bband))) macd.to_sql("MACD", con, if_exists='replace', chunksize=1000) get_logger().debug("MACD {} saved.".format(len(macd))) arrange = DataFrame(code_arrange) arrange.to_sql("ARRANGE", con, if_exists='replace', chunksize=1000) get_logger().debug("ARRANGE {} saved.".format(len(arrange))) per_bps_pbr_df = DataFrame(per_bps_pbr) per_bps_pbr_df.to_sql("BPS", con, if_exists='replace', chunksize=1000) get_logger().debug("BPS {} saved.".format(len(per_bps_pbr_df)))
import sqlite3 import pandas as pd import pandas.io.sql as sql from pandas import Series, DataFrame df = DataFrame({'A': [1,2,3]}) #df["id"] = df.index con = sqlite3.connect("test.db") df.to_sql("test", con)
if not (args.apps or args.tests or args.extra or args.conv): benches = apps else: if args.apps: benches.extend(apps) if args.tests: benches.extend(tests) if args.conv: benches.extend(conv) if args.extra: benches.extend(args.extra) benches = filter(lambda a: a not in disabled, benches) print 'Loading:\n ' + '\n '.join(benches) res = DataFrame() for app in benches: try: res = res.append(ingest(app)) except: # IOError,e: print 'Skipping missing or malformed: '+app #except: # print '\n\nFailed on',app,'\n' # raise db = create_engine('sqlite:///benchmarks.db') res.to_sql('benchmarks', db) res.to_csv('benchmarks.csv')
if __name__ == '__main__': db_name = sys.argv[1] db_user = sys.argv[2] db_pass = sys.argv[3] # Load data for each page con = db.connect('localhost',db_user,db_pass,db_name) query = 'SELECT rev_user, rev_ip, page_id, page_namespace, name FROM revision ' + \ 'INNER JOIN page ON page.page_id=revision.rev_page ' + \ 'INNER JOIN namespaces ON page.page_namespace=namespaces.code' data = read_sql(query, con) con.close() # Map IP addresses to country codes data['country'] = data['rev_ip'].apply(lambda ip: getCountryCode(ip)) data['country'] = data['country'].fillna('Unknown') # Create country contributions for each page con = db.connect('localhost',db_user,db_pass,db_name) for page_id, page_revs in data.groupby('page_id'): nRevs = len(page_revs) cRevs = page_revs.groupby('country').size() / nRevs # Insert into country_contrib # Values: page_id, country (cRevs.keys()), contributions(cRevs.keys()) df = DataFrame(cRevs, columns=['contribution'] ) df['page_id'] = page_id df.to_sql(con=con, name='country_contrib', if_exists='append', flavor='mysql') con.close()
def run(): global code global sell_point with sqlite3.connect("backtesting.db") as con: cursor = con.cursor() backtesting_save_data = { 'CODE':[], 'NAME':[], 'STRATEGY':[], 'SELL_PRICE_RATIO':[], 'PORTFOLIO_VALUE':[], 'OPEN':[], 'HIGH':[], 'LOW':[], 'CLOSE':[], 'VOLUME':[], 'PER':[], 'BPS':[], 'PBR':[]} for strategys in STRATEGY: for code, name in get_code_list_from_analyze(strategys): per , bps = get_per_bps_with_code(code) open, high, low, close, volume = get_last_data_with_code(code) if bps != 0: pbr = close / bps else: pbr = 0.0 get_logger().debug("code : {}. name:{} strategy:{} start".format(code,name,strategys)) last_portfolio = 0 data = makeBacktestingDataFrame(code) for point in SELL_PRICE_RATIO: sell_point = point if strategys == 'MAGC': algo = TradingAlgorithm(capital_base=10000000, initialize=initialize_magc, handle_data=handle_data_magc, identifiers=[code] ) results = algo.run(data) elif strategys == 'MACD': algo = TradingAlgorithm(capital_base=10000000, initialize=initialize_macd, handle_data=handle_data_macd, identifiers=[code] ) results = algo.run(data) elif strategys == 'BBAND': algo = TradingAlgorithm(capital_base=10000000, initialize=initialize_bband, handle_data=handle_data_bband, identifiers=[code] ) results = algo.run(data) portfolio = results['portfolio_value'][-1] if last_portfolio < portfolio: if last_portfolio != 0 : backtesting_save_data['CODE'].pop() backtesting_save_data['NAME'].pop() backtesting_save_data['STRATEGY'].pop() backtesting_save_data['SELL_PRICE_RATIO'].pop() backtesting_save_data['PORTFOLIO_VALUE'].pop() backtesting_save_data['OPEN'].pop() backtesting_save_data['HIGH'].pop() backtesting_save_data['LOW'].pop() backtesting_save_data['CLOSE'].pop() backtesting_save_data['VOLUME'].pop() backtesting_save_data['PER'].pop() backtesting_save_data['BPS'].pop() backtesting_save_data['PBR'].pop() backtesting_save_data['CODE'].append(code) backtesting_save_data['NAME'].append(name) backtesting_save_data['STRATEGY'].append(strategys) backtesting_save_data['SELL_PRICE_RATIO'].append('{}'.format(point)) backtesting_save_data['PORTFOLIO_VALUE'].append(portfolio) backtesting_save_data['OPEN'].append(open) backtesting_save_data['HIGH'].append(high) backtesting_save_data['LOW'].append(low) backtesting_save_data['CLOSE'].append(close) backtesting_save_data['VOLUME'].append(volume) backtesting_save_data['PER'].append(per) backtesting_save_data['BPS'].append(bps) backtesting_save_data['PBR'].append(pbr) last_portfolio = portfolio backtesting_save_df = DataFrame(backtesting_save_data) backtesting_save_df.to_sql('BACK', con, if_exists='replace', chunksize=1000) get_logger().debug("code : {}. name:{} strategy:{} end".format(code,name,strategys))
def load_audit_logs_into_postgres(options): session = ValkfleetConnector().db engine = WarehouseConnector().db log.info('Loading audit points for {start} to {stop} (batches of {n})'.format( start=options.start, stop=NOW, n=options.batch_size) ) # Avoid loading data twice: grab the latest timestamp in the database result = engine.execute(sql.statements[1]) resume_from = args.start if result.rowcount: last_timestamp = list(result)[0][0] + timedelta(milliseconds=1) if last_timestamp > args.start: resume_from = last_timestamp log.info('Resuming at %s', resume_from) cursor = None more = True batch_counter = 0 batch_max_timestamp = None while more: url = ENDPOINT + '?filter=timestamp gt {start}&orderby=timestamp&batch_size={batch_size}'.format( start=resume_from.isoformat(), batch_size=options.batch_size ) if cursor: url += '&cursor=%s' % cursor log.info('Requesting %s', url) response = session.get(url) json = response.json() if 'error' not in json.keys(): records = json['items'] more = json['more'] cursor = json['cursor'] def add_foreign_key(records_): for record in records_: if record['event'] != 'acceptRoute-clicked': record.update({'delivery_uuid': record['metadata']['delivery']}) del record['metadata'] yield record fc_records = list(add_foreign_key(records)) batch = DataFrame().from_records(fc_records) if batch.empty: log.warning('Batch %s is empty', batch_counter) batch.to_sql(TABLE, engine, schema=SCHEMA, if_exists='append', index=False) # The time columns are actually strings batch_min_timestamp = batch['timestamp'].min()[:19] batch_max_timestamp = batch['timestamp'].max()[:19] batch_counter += 1 kwargs = dict(schema=SCHEMA, table=TABLE, n=batch_counter, records=batch.shape[0], fields=batch.shape[1], min=batch_min_timestamp, max=batch_max_timestamp) log.info('Loaded batch {n} into table {schema}.{table} ' '({records} records, {fields} fields):' '{min} to {max}'.format(**kwargs)) else: message = 'Lost the cursor on batch {n} {time}): {status} {error}'.format( n=batch_counter, time=batch_max_timestamp, status=response.status_code, error=response.json() ) log.error(message, exc_info=True) exit(message) log.info('Finished loading tracks for %s to %s', options.start, NOW)
def run(): with sqlite3.connect("price.db") as con: cursor = con.cursor() def _make_long_date(date): return date.year * 10000 + date.month * 100 + date.day for code, name in get_code_list(): table_name = code recent_date = get_last_update_date(table_name) if recent_date is None: recent_date = START_DATE if recent_date.date() == datetime.now().date(): continue if recent_date.weekday() == 4 and datetime.now() - recent_date < timedelta(hours=64): continue start_date = (recent_date + timedelta(days=1)) end_date = datetime.now() if datetime.now().hour < 16: end_date = datetime.now() - timedelta(days=1) if start_date > end_date: continue instStockChart = win32com.client.Dispatch("CpSysDib.StockChart") instStockChart.SetInputValue(CPSTOCKCHART_REQ_CODE, code) instStockChart.SetInputValue(CPSTOCKCHART_REQ_DATE_OR_COUNT, CPSTOCKCHART_REQ_PARAM_DATE) instStockChart.SetInputValue(CPSTOCKCHART_REQ_END_DATE,_make_long_date(end_date)) instStockChart.SetInputValue(CPSTOCKCHART_REQ_START_DATE,_make_long_date(start_date)) instStockChart.SetInputValue(CPSTOCKCHART_REQ_FIELD, [CPSTOCKCHART_REQ_PARAM_FIELD_DATE, CPSTOCKCHART_REQ_PARAM_FIELD_OPEN, CPSTOCKCHART_REQ_PARAM_FIELD_HIGH, CPSTOCKCHART_REQ_PARAM_FIELD_LOW, CPSTOCKCHART_REQ_PARAM_FIELD_CLOSE, CPSTOCKCHART_REQ_PARAM_FIELD_VOLUME]) instStockChart.SetInputValue(CPSTOCKCHART_REQ_TYPE, CPSTOCKCHART_REQ_TYPE_PARAM_DAY) instStockChart.SetInputValue(CPSTOCKCHART_REQ_ADJ, CPSTOCKCHART_REQ_ADJ_PARAM_ADJUST) instStockChart.BlockRequest() numData = instStockChart.GetHeaderValue(CPSTOCKCHART_RES_DATA_COUNT) price_data = {'DATE':[], 'OPEN':[], 'HIGH':[], 'LOW':[], 'CLOSE':[], 'VOLUME':[]} # cybos plus 최근데이터 부터 온다 for i in reversed(range(numData)): long_date = instStockChart.GetDataValue(0, i) year = int(long_date / 10000) month = int(long_date / 100) % 100 day = long_date % 100 dateval = datetime(year, month, day, 0, 0, 0) open = instStockChart.GetDataValue(1, i) high = instStockChart.GetDataValue(2, i) low = instStockChart.GetDataValue(3, i) close = instStockChart.GetDataValue(4, i) volume = instStockChart.GetDataValue(5, i) price_data['DATE'].append(dateval) price_data['OPEN'].append(open) price_data['HIGH'].append(high) price_data['LOW'].append(low) price_data['CLOSE'].append(close) price_data['VOLUME'].append(volume) price = DataFrame(price_data) price.to_sql(table_name, con, if_exists='append', chunksize=1000) get_logger().debug("{} 종목의 {}데이터를 저장 하였습니다.".format(code, len(price))) #remove old data row = cursor.execute("SELECT COUNT(*) FROM '{}'".format(table_name)).fetchone() if row[0] > 600: row = cursor.execute("DELETE FROM '{}' WHERE DATE = (SELECT MIN(DATE) FROM '{}')".format(table_name, table_name))
for name, params in tables.items(): new_df = DataFrame() print(' -> Clean up {} dataframe'.format(name)) for year in Years: # clean up the table clean_func = params[0] tmp_df = clean_func(year) if new_df.empty: new_df = tmp_df else: new_df = new_df.append(tmp_df, (params[1] == 'index')) print(' -> Load {} dataframe to SQL database'.format(name)) if options.replace: if_exists = 'replace' else: if_exists = 'append' sql_options = {'con':sqlEngine, 'if_exists':if_exists, 'index':True, 'chunksize':100} new_df.to_sql(name=name, **sql_options) # We want the index to be a primary key to speed things up among other reasons. sqlEngine.execute('''ALTER TABLE {} ADD PRIMARY KEY (`{}`)'''.format(name,params[1]))