def exec_mask(self): df = self.__result # total count total = len(df) r = None for k, mask in self.stat.items(): self.stat[k] = mask.sum() / total if r is None: r = mask else: r = r & mask print('{}\t:{}'.format(k, r.sum())) df = df[r] self.stat['total'] = total self.stat['all'] = len(df) self.stat['all_ratio'] = len(df) / total df_stat = pd.DataFrame(self.stat, index=pd.Series([self.__y])) df_stat.insert(0, 'm', self.__m) df_stat.insert(0, 'y', self.__y) df.to_sql(self.standard_table_name, get_engine(), index=False, if_exists='append') df_stat.to_sql(self.standard_start_table_name, get_engine(), index=False, if_exists='append') self.stat = df_stat self.__result = df
def delete(self): sqls = """delete from his where TABLE_name in ('balancesheet','income','cashflow','fina_indicator','dividend','one_fina','one_index','all'); truncate table balancesheet; truncate table income; truncate table cashflow; truncate table fina_indicator; truncate table dividend; truncate table dividend_stat;""".split('\n') for sql in sqls: get_engine().execute(sql)
def loop_codes(): param = get_calc_param() # codes = pd.read_sql_query("select DISTINCT ts_code from i_data where debt_to_assets < 50 ts_code = '600177.SH' ;", get_engine()) codes = pd.read_sql_query("""select DISTINCT ts_code from i_data ;""", get_engine()) mps = [] for i, row in codes.iterrows(): t = my_calc(row['ts_code'], param) mps.append(t) df = pd.DataFrame(mps) df.to_sql('i_calc', get_engine(), index=False, if_exists='replace')
def get_index_distinct_codes(index_code): sql = "select DISTINCT(con_code) from index_weight where index_code = '{}'" sql = sql.format(index_code) df = pd.read_sql_query(sql, get_engine()) if not len(df): print('error index_code:', index_code) return df
def get_nincome_roe_pe_meta(ts_code): sql_abc = """select s.ts_code,f.y,f.m,f.end_date, i.n_income_attr_p , f.roe, m.close ,m.pe,m.pe_ttm, d.close as c2 ,d.pe p2 ,d.pe_ttm pt2 from stock_basic s,fina_indicator f, daily_basic_month m ,daily_basic d,income i where s.ts_code = f.ts_code and f.ts_code = m.ts_code and m.ts_code = d.ts_code and d.ts_code = i.ts_code and f.y = m.y and m.y = i.y and f.m = m.m and m.m = i.m and s.ts_code = '{}' and f.y > 2010 order by f.y ,f.m ;""".format(ts_code) all_abc = pd.read_sql_query(sql_abc, get_engine()) df = all_abc[all_abc.m == 3] df['yl'] = df['y'] df = df.set_index(['y']) for i in range(1, 5): t = all_abc[all_abc.m == 3 * i] t = t.set_index(['y']) df['n{}'.format(i * 3)] = t['n_income_attr_p'] df['r{}'.format(i * 3)] = t['roe'] df['pe{}'.format(i * 3)] = t['pe'] df['pe_ttm{}'.format(i * 3)] = t['pe_ttm'] return df
def get_analyse_collection(y, m, pe_thresthold, year_start): sql = """select # l.name, l.ts_code,l.list_status,l.list_date,l.delist_date, b.y,b.m, b.total_share,b.total_liab,b.total_cur_assets,b.total_assets,b.total_ncl,b.total_hldr_eqy_inc_min_int, b.intan_assets,b.r_and_d,b.goodwill,b.lt_amor_exp,b.defer_tax_assets,b.total_hldr_eqy_exc_min_int , f.dt_eps,f.eps,f.current_ratio,f.quick_ratio,f.tangible_asset, m.close,m.pe,1/m.pe as ep,m.dv_ratio,m.total_mv, d.stk_div,d.cash_div from stock_list l, stock_balancesheet b, stock_fina_indicator f, stock_month_matrix_basic m, stock_dividend d where l.ts_code = b.ts_code and l.list_date <= b.end_date and l.list_status = 'L' and b.ts_code = f.ts_code and b.y = f.y and b.m = f.m and b.ts_code = d.ts_code and b.y = d.y and b.ts_code = m.ts_code and b.y = m.y and b.m =m.m and m.pe > 0 and # b.total_hldr_eqy_inc_min_int is not null and b.y ={} and b.m ={} and m.pe <= {} and l.list_date <= '{}-12-31' ;""" sql = sql.format(y, m, pe_thresthold, year_start) df = pd.read_sql_query(sql, get_engine()) if not len(df): print('error sql:') return df
def _update_ts(self): cal_date = self._get_nearest_cal_date() if cal_date is None: return trade_date = cal_date.strftime('%Y%m%d') df = get_pro().daily_basic(ts_code='', trade_date=trade_date) dtype = { 'ts_code': VARCHAR(length=10), 'trade_date': DATE(), 'close': FLOAT(), 'y': INT(), 'm': INT(), 'turnover_rate': FLOAT(), 'turnover_rate_f': FLOAT(), 'volume_ratio': FLOAT(), 'pe': FLOAT(), 'pe_ttm': FLOAT(), 'pb': FLOAT(), 'ps': FLOAT(), 'ps_ttm': FLOAT(), 'dv_ratio': FLOAT(), 'dv_ttm': FLOAT(), 'total_share': FLOAT(), 'float_share': FLOAT(), 'free_share': FLOAT(), 'total_mv': FLOAT(), 'circ_mv': FLOAT() } df.to_sql(self.get_table_name(), get_engine(), dtype=dtype, index=False, if_exists='append')
def init_stock_price_monthly(ts_code, force=None): table_name = 'stock_price_monthly' if not need_pull_check(ts_code, table_name, force): print('need not 2 pull {} -> {}'.format(table_name, ts_code)) return else: print('start 2 pull {} -> {} .'.format(table_name, ts_code)) df = get_pro().monthly( ts_code=ts_code, fields='ts_code,trade_date,open,high,low,close,vol,amount') if not len(df): return df_add_y_m(df, 'trade_date') dtype = { 'ts_code': VARCHAR(length=10), 'trade_date': DATE(), 'y': INT, 'm': INT, 'open': DECIMAL(precision=8, scale=2), 'high': DECIMAL(precision=8, scale=2), 'low': DECIMAL(precision=8, scale=2), 'close': DECIMAL(precision=8, scale=2), 'vol': BIGINT(), 'amount': BIGINT() } df.to_sql(table_name, get_engine(), dtype=dtype, index=False, if_exists='append')
def get_trade_date(y, m): sql = '''select * from trade_cal where y ={} and m = {};''' sql = sql.format(y, m) df = pd.read_sql_query(sql, get_engine()) first = None if len(df) == 0 else df['first'][0] last = None if len(df) == 0 else df['last'][0] return first, last
def _init_ts(self): y_start = 1990 __pool = ThreadPoolExecutor(max_workers=MULTIPLE, thread_name_prefix="test_") fs = [] i = 0 for y_i in range(31)[::-1]: y = y_start + y_i first, last = dao.get_trade_date(y, 0) if not first: continue print("{}-{}".format(y, 0)) first = first.strftime('%Y%m%d') last = last.strftime('%Y%m%d') f1 = __pool.submit(get_pro().index_weight, index_code=self.biz_code, start_date=first, end_date=first) f2 = __pool.submit(get_pro().index_weight, index_code=self.biz_code, start_date=last, end_date=last) fs.append(f1) fs.append(f2) i += 2 if i > 197: print('198次后休息60秒') time.sleep(60) i = 0 df = None for f2 in fs: temp_df = f2.result() if len(temp_df): if df is None: df = temp_df else: df = df.append(temp_df, ignore_index=True) df_add_y_m(df, 'trade_date') dtype = { 'index_code': VARCHAR(length=10), 'con_code': VARCHAR(length=10), 'y': INT, 'm': INT, 'trade_date': DATE(), 'weight': DECIMAL(precision=10, scale=6) } df = df.reindex( columns='index_code,con_code,y,m,trade_date,weight'.split(',')) df.to_sql(self.get_table_name(), get_engine(), dtype=dtype, index=False, if_exists='append')
def get_list_all(limit=None, sql=None): sql = ("select * from stock_list " + ('' if limit is None else 'limit {}'.format(limit)) + ';') if sql is None else sql df = pd.read_sql_query(sql, get_engine()) if not len(df): print('error sql:') return df
def init_month_matrix_basic(): table_name = 'stock_month_matrix_basic' sql = 'select * from trade_date where m != 0 ;' yms = pd.read_sql_query(sql, get_engine()) df = None for i, row in yms.iterrows(): first_trade_date_str = row['first'].strftime('%Y%m%d') last_last_date_str = row['last'].strftime('%Y%m%d') data = __pro.daily_basic(ts_code='', trade_date=last_last_date_str) print(last_last_date_str) if df is None: df = data else: df = df.append(data) df_add_y_m(df, 'trade_date') df.reset_index(drop=True) df = df.iloc[::-1] dtype = { 'ts_code': VARCHAR(length=10), 'trade_date': DATE(), 'close': FLOAT(), 'y': INT(), 'm': INT(), 'turnover_rate': FLOAT(), 'turnover_rate_f': FLOAT(), 'volume_ratio': FLOAT(), 'pe': FLOAT(), 'pe_ttm': FLOAT(), 'pb': FLOAT(), 'ps': FLOAT(), 'ps_ttm': FLOAT(), 'dv_ratio': FLOAT(), 'dv_ttm': FLOAT(), 'total_share': FLOAT(), 'float_share': FLOAT(), 'free_share': FLOAT(), 'total_mv': FLOAT(), 'circ_mv': FLOAT() } df.to_sql(table_name, get_engine(), dtype=dtype, index=False, if_exists='append')
def load(self): # load market load_market = f"""select ts_code,y,m,close from daily_basic_month where ts_code = '{self.ts_code}' and {self.end} >= y and y>= {self.start} and m = 6 order by y ,m;""" load_premium = f"""select * from dividend_stat where ts_code = '{self.ts_code}' and {self.end} >= y and y>= {self.start} """ market_df = pd.read_sql_query(load_market, get_engine()) premium_df = pd.read_sql_query(load_premium, get_engine()) if len(market_df) < (self.end - self.start + 1): raise LookupError('Not A Good Data Sample.') premium_dict = {} for i, row in premium_df.iterrows(): premium_dict[row['y']] = row self._market = market_df self._premium = premium_dict
def _delete_ts(self): if self.operate == CTL_OPERATE_TRUNCATE: sql = 'drop table if exists {}'.format(self.get_table_name()) elif self.operate == CTL_OPERATE_APPEND: sql = "delete from {} where {} = '{}'".format( self.get_table_name(), self.biz_col_name, self.biz_code) get_engine().execute(sql) logging.info('==delete_ts ' + sql) if self.calc_table: if self.operate == CTL_OPERATE_TRUNCATE: sql = 'drop table if exists {}'.format(self.get_table_name() + self.calc_table) elif self.operate == CTL_OPERATE_APPEND: sql = "delete from {} where {} = '{}'".format( self.get_table_name() + self.calc_table, self.biz_col_name, self.biz_code) get_engine().execute(sql) logging.info('==delete_ts ' + sql)
def get_fina(ts_code, start, end, m): if ts_code is str: sql = """select * from stock_fina_indicator where ts_code = '{}' and y between {} and {} and m = {} order by y desc""" sql = sql.format(ts_code, start, end, m) else: sql = """select ts_code,end_date,y,m,eps,dt_eps from stock_fina_indicator where ts_code in ({}) and y between {} and {} and m = {} order by y desc""" sql = sql.format(','.join(["'%s'" % item for item in ts_code]), start, end, m) df = pd.read_sql_query(sql, get_engine()) return df
def _init_ts(self): pass if self.biz_code == 'tangchao': con_codes = '600519.SH,002304.SZ,002415.SZ,002027.SZ,000596.SZ'.split( ',') elif self.biz_code == 'manxl': con_codes = '600519.SH,002304.SZ,002415.SZ,002027.SZ,000596.SZ'.split( ',') sql_template = "INSERT INTO index_weight VALUES ('{}', '{}', {}, {}, '{}-{}-31', 0 );" sqls = [] for con_code in con_codes: sqls.append( sql_template.format(self.biz_code, con_code, 2020, 12, 2020, 12)) for sql in sqls: get_engine().execute(sql)
def _get_con_codes(self): sql = """select * from index_weight where index_code = '{}' and trade_date = (select max(trade_date) from index_weight where index_code = '{}')""".format( self.biz_code, self.biz_code, ) df = pd.read_sql_query(sql, get_engine()) if len(df) == 0: raise Exception('index not initialed.') return df['con_code']
def need_pull_check(code, table_name, force=None, condition_column='ts_code'): if force is None: sql = "select count(*) from {} where {} = '{}';".format( table_name, condition_column, code) # con = get_engine().connect() try: # size = get_engine().execute(sql).fetchone()[0] df = pd.read_sql_query(sql, get_engine()) size = df.iloc[0, 0] except Exception as e: if 'f405' == e.code: return True else: print(e) exit(4) # finally: # con.close() return False if size > 0 else True else: if 'delete' == force: sql = "delete from {} where {} = '{}';".format( table_name, condition_column, code) elif 'drop' == force: sql = "drop table {};".format(table_name) else: print('need_pull_check {} force flag error:{}'.format( table_name, force)) exit(4) try: r = pd.read_sql_query(sql, get_engine()).iloc[0, 0] print('force clean {} rows 4 {}'.format(r.rowcount, code)) except Exception as e: if 'f405' == e.code or 'e3q8' == e.code: return True else: print(e) exit(4) return True
def _get_nearest_cal_date(self): if time.localtime().tm_hour < 19: di = 1 else: di = 0 sql = f'select cal_date from trade_cal_detail where cal_date <= curdate() - {di} order by cal_date desc limit 1;' df = pd.read_sql_query(sql, get_engine()) cal_date = df.iloc[0]['cal_date'] if self.his is not None and cal_date <= self.his.end_date: return None return cal_date
def get_standard_stat(y, m): try: sql = 'select * from standard_stat where y = {} and m = {};' sql = sql.format(y, m) df = pd.read_sql_query(sql, get_engine()) except Exception as e: if 'f405' == e.code: return None else: print(e) return None return df
def get_balancesheet_df(ts_code, flag=None, y=None): if flag == 'assert': if not y: sql = balancesheet_get_stock_info_his_by_ts_code.format(**locals()) else: sql = balancesheet_get_stock_industry_info.format(**locals()) elif flag == 'liab': if not y: sql = balancesheet_get_stock_info_his_by_ts_code_2.format( **locals()) else: sql = balancesheet_get_stock_industry_info_2.format(**locals()) else: raise AttributeError('flag neither assert nor liab.') df = pd.read_sql_query(sql, get_engine()) if 'y' in df.columns: df = df.set_index(['y']) elif 'ts_code' in df.columns: print('mark') df.loc[df[df['ts_code'] == ts_code].index[0], ['ts_code']] = ts_code + '(*)' df = df.set_index(['ts_code']) else: raise AttributeError('Nether y nor ts_code must in dataframe!') column_list = df.columns.values.tolist() last_col = column_list[len(column_list) - 1] for col in df.columns.values.tolist(): # delete nan df[col] = df[col].apply(lambda x: 0 if x != x or x is None else x) # calc last_col if last_col != col: df[last_col] = df[last_col] - df[col] sm = df.sum(axis=1) flag1 = sm != 0 flag2 = df[df.columns[len(df.columns) - 1]] != 0 sm = sm[flag1 & flag2] df = df[flag1 & flag2] if len(df) == 0: return None for col in df.columns.values.tolist(): df[col] = round(df[col] * 100 / sm, 2) return df
def init_table_indexes(): sql = "SELECT TABLE_NAME FROM information_schema.TABLES where table_schema = '{}' and table_type = 'BASE TABLE';" sql = sql.format(SCHEMA) df = pd.read_sql_query(sql, get_engine()) target_list = set([ 'end_date', 'index_code', 'con_code', 'ts_code', 'trade_date', 'y', 'm', 't', 'list_status', 'list_date', 'delist_date', 'total_share', 'total_liab', 'total_cur_assets', 'total_assets', 'dt_eps', 'current_ratio', 'quick_ratio', 'close', 'stk_div', 'cash_div' ]) sql_tpl_create_index = 'create index idx_{}_{} on {} ({});' e = get_engine() for table_name in df.iloc[:, 0]: sql_index = 'show index from {};'.format(table_name) sql_columns = 'show columns from {};'.format(table_name) df_index = pd.read_sql_query(sql_index, get_engine()) df_columns = pd.read_sql_query(sql_columns, get_engine()) df_index = set(df_index['Column_name'].values.tolist()) df_columns = set(df_columns['Field'].values.tolist()) target_columns = target_list & (df_columns - df_index) for column in target_columns: sql = sql_tpl_create_index.format(table_name, column, table_name, column) print(sql) e.execute(sql) print('-' * 32, 'add index') df = df[df.TABLE_NAME != 'trade_date_detail'] df = df[df.TABLE_NAME != 'stock_dividend_detail'] key_list = ['index_code', 'con_code', 'ts_code', 'cal_date', 'y', 'm'] prim_keys = set(key_list) sql_prim = "select * from information_schema.TABLE_CONSTRAINTS where CONSTRAINT_SCHEMA = '{}' and table_name = '{}';" sql_add_prim = 'alter table {}.{} add primary key({});' for table_name in df.iloc[:, 0]: sql = sql_prim.format(SCHEMA, table_name) prim = pd.read_sql_query(sql, get_engine()) if len(prim) > 0: continue sql_columns = 'show columns from {};'.format(table_name) df_columns = pd.read_sql_query(sql_columns, get_engine()) df_columns = set(df_columns['Field'].values.tolist()) p = prim_keys & df_columns key_temp = key_list[:] for key in key_list: if not key in p: key_temp.remove(key) if key_temp: sql = sql_add_prim.format(SCHEMA, table_name, ','.join(key_temp)) print(sql) # print(table_name, key_temp) e.execute(sql)
def init_fund_nav(ts_code, force=None): table_name = 'fund_nav' if not need_pull_check(ts_code, table_name, force): print('need not 2 pull {} -> {}'.format(table_name, ts_code)) return else: print('start 2 pull {} -> {} .'.format(table_name, ts_code)) df = get_pro().fund_nav(ts_code=ts_code) if not len(df): return df_add_y_m(df, 'end_date') df.to_sql(table_name, get_engine(), index=False, if_exists='append')
def get_pe_low(y, m, percent): sql = """select *from ( select @row_num:=@row_num+1 as row_num , a.* from (select CASE when pe is null then 9999 else pe end as pea from {}.stock_month_matrix_basic where y = {} and m ={} order by pea asc) a, (select @row_num:=0) b order by pea ) base where base.row_num <= (@row_num*{}) order by pea desc limit 1""" sql = sql.format(SCHEMA, y, m, percent) df = pd.read_sql_query(sql, get_engine()) return df.iloc[0, 1]
def _init_ts(self): print('start init list...') fileds = 'ts_code,symbol,name,area,industry,fullname,market,exchange,curr_type,list_status,list_date,delist_date,is_hs' d_l = get_pro().query(self.interface, exchange='', list_status='L', fields=fileds) print('L', len(d_l)) d_d = get_pro().query(self.interface, exchange='', list_status='D', fields=fileds) print('D', len(d_d)) d_p = get_pro().query(self.interface, exchange='', list_status='P', fields=fileds) print('P', len(d_p)) df = d_l.append(d_d).append(d_p) print('all size:', len(df)) dtype = { 'ts_code': VARCHAR(length=10), 'symbol': VARCHAR(length=8), 'name': VARCHAR(length=20), 'area': VARCHAR(length=10), 'industry': VARCHAR(length=32), 'fullname': VARCHAR(length=32), 'market': VARCHAR(length=10), 'exchange': VARCHAR(length=10), 'curr_type': VARCHAR(length=5), 'list_status': VARCHAR(length=1), 'list_date': DATE(), 'delist_date': DATE(), 'is_hs': VARCHAR(length=1) } df.to_sql(self.get_table_name(), get_engine(), dtype=dtype, index=False, if_exists='replace')
def kkk(): sql = """ select b.y,b.total_hldr_eqy_inc_min_int as e, i.n_income as r, i.n_income /total_hldr_eqy_exc_min_int as roe, d.cash_div_tax*d.base_share as s from balancesheet b,income i , fina_indicator f,dividend d where 1=1 and b.ts_code = i.ts_code and i.ts_code = f.ts_code and f.ts_code = d.ts_code and b.y = i.y and i.y = f.y and f.y = d.y and b.m = i.m and i.m = f.m and b.ts_code = '{}' and b.y between {} and {} and b.m = 12 order by b.y asc; """.format(TEST_TS_CODE_GSYH, 2006, 2010) df = pd.read_sql_query(sql, get_engine()) return df
def init_stock_list_all(): print('start init list...') fileds = 'ts_code,symbol,name,area,industry,fullname,market,exchange,curr_type,list_status,list_date,delist_date,is_hs' d_l = __pro.stock_basic(exchange='', list_status='L', fields=fileds) print('L', len(d_l)) d_d = __pro.stock_basic(exchange='', list_status='D', fields=fileds) print('D', len(d_d)) d_p = __pro.stock_basic(exchange='', list_status='P', fields=fileds) print('P', len(d_p)) df = d_l.append(d_d).append(d_p) print('all size:', len(df)) dtype = { 'ts_code': VARCHAR(length=10), 'symbol': VARCHAR(length=8), 'name': VARCHAR(length=20), 'area': VARCHAR(length=10), 'industry': VARCHAR(length=32), 'fullname': VARCHAR(length=32), 'market': VARCHAR(length=10), 'exchange': VARCHAR(length=10), 'curr_type': VARCHAR(length=5), 'list_status': VARCHAR(length=1), 'list_date': DATE(), 'delist_date': DATE(), 'is_hs': VARCHAR(length=1) } # df.reset_index(drop=True) # df = df.reindex(columns='ts_code,end_date,ex_date,div_proc,stk_div,cash_div'.split(',')) df.to_sql('stock_list', get_engine(), dtype=dtype, index=False, if_exists='replace') print('finished init list.')
def init_fina_indicator(ts_code, force=None): table_name = 'stock_fina_indicator' if not need_pull_check(ts_code, table_name, force): print('need not 2 pull {} -> {}'.format(table_name, ts_code)) return else: print('start 2 pull {} -> {} .'.format(table_name, ts_code)) dtype = { 'ts_code': VARCHAR(length=10), 'ann_date': DATE(), 'end_date': DATE(), 'y': INT(), 'm': INT(), 'eps': FLOAT(), 'dt_eps': FLOAT(), 'total_revenue_ps': FLOAT(), 'revenue_ps': FLOAT(), 'capital_rese_ps': FLOAT(), 'surplus_rese_ps': FLOAT(), 'undist_profit_ps': FLOAT(), 'extra_item': FLOAT(), 'profit_dedt': FLOAT(), 'gross_margin': FLOAT(), 'current_ratio': FLOAT(), 'quick_ratio': FLOAT(), 'cash_ratio': FLOAT(), 'invturn_days': FLOAT(), 'arturn_days': FLOAT(), 'inv_turn': FLOAT(), 'ar_turn': FLOAT(), 'ca_turn': FLOAT(), 'fa_turn': FLOAT(), 'assets_turn': FLOAT(), 'op_income': FLOAT(), 'valuechange_income': FLOAT(), 'interst_income': FLOAT(), 'daa': FLOAT(), 'ebit': FLOAT(), 'ebitda': FLOAT(), 'fcff': FLOAT(), 'fcfe': FLOAT(), 'current_exint': FLOAT(), 'noncurrent_exint': FLOAT(), 'interestdebt': FLOAT(), 'netdebt': FLOAT(), 'tangible_asset': FLOAT(), 'working_capital': FLOAT(), 'networking_capital': FLOAT(), 'invest_capital': FLOAT(), 'retained_earnings': FLOAT(), 'diluted2_eps': FLOAT(), 'bps': FLOAT(), 'ocfps': FLOAT(), 'retainedps': FLOAT(), 'cfps': FLOAT(), 'ebit_ps': FLOAT(), 'fcff_ps': FLOAT(), 'fcfe_ps': FLOAT(), 'netprofit_margin': FLOAT(), 'grossprofit_margin': FLOAT(), 'cogs_of_sales': FLOAT(), 'expense_of_sales': FLOAT(), 'profit_to_gr': FLOAT(), 'saleexp_to_gr': FLOAT(), 'adminexp_of_gr': FLOAT(), 'finaexp_of_gr': FLOAT(), 'impai_ttm': FLOAT(), 'gc_of_gr': FLOAT(), 'op_of_gr': FLOAT(), 'ebit_of_gr': FLOAT(), 'roe': FLOAT(), 'roe_waa': FLOAT(), 'roe_dt': FLOAT(), 'roa': FLOAT(), 'npta': FLOAT(), 'roic': FLOAT(), 'roe_yearly': FLOAT(), 'roa2_yearly': FLOAT(), 'roe_avg': FLOAT(), 'opincome_of_ebt': FLOAT(), 'investincome_of_ebt': FLOAT(), 'n_op_profit_of_ebt': FLOAT(), 'tax_to_ebt': FLOAT(), 'dtprofit_to_profit': FLOAT(), 'salescash_to_or': FLOAT(), 'ocf_to_or': FLOAT(), 'ocf_to_opincome': FLOAT(), 'capitalized_to_da': FLOAT(), 'debt_to_assets': FLOAT(), 'assets_to_eqt': FLOAT(), 'dp_assets_to_eqt': FLOAT(), 'ca_to_assets': FLOAT(), 'nca_to_assets': FLOAT(), 'tbassets_to_totalassets': FLOAT(), 'int_to_talcap': FLOAT(), 'eqt_to_talcapital': FLOAT(), 'currentdebt_to_debt': FLOAT(), 'longdeb_to_debt': FLOAT(), 'ocf_to_shortdebt': FLOAT(), 'debt_to_eqt': FLOAT(), 'eqt_to_debt': FLOAT(), 'eqt_to_interestdebt': FLOAT(), 'tangibleasset_to_debt': FLOAT(), 'tangasset_to_intdebt': FLOAT(), 'tangibleasset_to_netdebt': FLOAT(), 'ocf_to_debt': FLOAT(), 'ocf_to_interestdebt': FLOAT(), 'ocf_to_netdebt': FLOAT(), 'ebit_to_interest': FLOAT(), 'longdebt_to_workingcapital': FLOAT(), 'ebitda_to_debt': FLOAT(), 'turn_days': FLOAT(), 'roa_yearly': FLOAT(), 'roa_dp': FLOAT(), 'fixed_assets': FLOAT(), 'profit_prefin_exp': FLOAT(), 'non_op_profit': FLOAT(), 'op_to_ebt': FLOAT(), 'nop_to_ebt': FLOAT(), 'ocf_to_profit': FLOAT(), 'cash_to_liqdebt': FLOAT(), 'cash_to_liqdebt_withinterest': FLOAT(), 'op_to_liqdebt': FLOAT(), 'op_to_debt': FLOAT(), 'roic_yearly': FLOAT(), 'total_fa_trun': FLOAT(), 'profit_to_op': FLOAT(), 'q_opincome': FLOAT(), 'q_investincome': FLOAT(), 'q_dtprofit': FLOAT(), 'q_eps': FLOAT(), 'q_netprofit_margin': FLOAT(), 'q_gsprofit_margin': FLOAT(), 'q_exp_to_sales': FLOAT(), 'q_profit_to_gr': FLOAT(), 'q_saleexp_to_gr': FLOAT(), 'q_adminexp_to_gr': FLOAT(), 'q_finaexp_to_gr': FLOAT(), 'q_impair_to_gr_ttm': FLOAT(), 'q_gc_to_gr': FLOAT(), 'q_op_to_gr': FLOAT(), 'q_roe': FLOAT(), 'q_dt_roe': FLOAT(), 'q_npta': FLOAT(), 'q_opincome_to_ebt': FLOAT(), 'q_investincome_to_ebt': FLOAT(), 'q_dtprofit_to_profit': FLOAT(), 'q_salescash_to_or': FLOAT(), 'q_ocf_to_sales': FLOAT(), 'q_ocf_to_or': FLOAT(), 'basic_eps_yoy': FLOAT(), 'dt_eps_yoy': FLOAT(), 'cfps_yoy': FLOAT(), 'op_yoy': FLOAT(), 'ebt_yoy': FLOAT(), 'netprofit_yoy': FLOAT(), 'dt_netprofit_yoy': FLOAT(), 'ocf_yoy': FLOAT(), 'roe_yoy': FLOAT(), 'bps_yoy': FLOAT(), 'assets_yoy': FLOAT(), 'eqt_yoy': FLOAT(), 'tr_yoy': FLOAT(), 'or_yoy': FLOAT(), 'q_gr_yoy': FLOAT(), 'q_gr_qoq': FLOAT(), 'q_sales_yoy': FLOAT(), 'q_sales_qoq': FLOAT(), 'q_op_yoy': FLOAT(), 'q_op_qoq': FLOAT(), 'q_profit_yoy': FLOAT(), 'q_profit_qoq': FLOAT(), 'q_netprofit_yoy': FLOAT(), 'q_netprofit_qoq': FLOAT(), 'equity_yoy': FLOAT(), 'rd_exp': FLOAT(), 'update_flag': VARCHAR(length=1) } columns = 'ts_code,ann_date,end_date,eps,dt_eps,total_revenue_ps,revenue_ps,capital_rese_ps,surplus_rese_ps,undist_profit_ps,extra_item,profit_dedt,gross_margin,current_ratio,quick_ratio,cash_ratio,invturn_days,arturn_days,inv_turn,ar_turn,ca_turn,fa_turn,assets_turn,op_income,valuechange_income,interst_income,daa,ebit,ebitda,fcff,fcfe,current_exint,noncurrent_exint,interestdebt,netdebt,tangible_asset,working_capital,networking_capital,invest_capital,retained_earnings,diluted2_eps,bps,ocfps,retainedps,cfps,ebit_ps,fcff_ps,fcfe_ps,netprofit_margin,grossprofit_margin,cogs_of_sales,expense_of_sales,profit_to_gr,saleexp_to_gr,adminexp_of_gr,finaexp_of_gr,impai_ttm,gc_of_gr,op_of_gr,ebit_of_gr,roe,roe_waa,roe_dt,roa,npta,roic,roe_yearly,roa2_yearly,roe_avg,opincome_of_ebt,investincome_of_ebt,n_op_profit_of_ebt,tax_to_ebt,dtprofit_to_profit,salescash_to_or,ocf_to_or,ocf_to_opincome,capitalized_to_da,debt_to_assets,assets_to_eqt,dp_assets_to_eqt,ca_to_assets,nca_to_assets,tbassets_to_totalassets,int_to_talcap,eqt_to_talcapital,currentdebt_to_debt,longdeb_to_debt,ocf_to_shortdebt,debt_to_eqt,eqt_to_debt,eqt_to_interestdebt,tangibleasset_to_debt,tangasset_to_intdebt,tangibleasset_to_netdebt,ocf_to_debt,ocf_to_interestdebt,ocf_to_netdebt,ebit_to_interest,longdebt_to_workingcapital,ebitda_to_debt,turn_days,roa_yearly,roa_dp,fixed_assets,profit_prefin_exp,non_op_profit,op_to_ebt,nop_to_ebt,ocf_to_profit,cash_to_liqdebt,cash_to_liqdebt_withinterest,op_to_liqdebt,op_to_debt,roic_yearly,total_fa_trun,profit_to_op,q_opincome,q_investincome,q_dtprofit,q_eps,q_netprofit_margin,q_gsprofit_margin,q_exp_to_sales,q_profit_to_gr,q_saleexp_to_gr,q_adminexp_to_gr,q_finaexp_to_gr,q_impair_to_gr_ttm,q_gc_to_gr,q_op_to_gr,q_roe,q_dt_roe,q_npta,q_opincome_to_ebt,q_investincome_to_ebt,q_dtprofit_to_profit,q_salescash_to_or,q_ocf_to_sales,q_ocf_to_or,basic_eps_yoy,dt_eps_yoy,cfps_yoy,op_yoy,ebt_yoy,netprofit_yoy,dt_netprofit_yoy,ocf_yoy,roe_yoy,bps_yoy,assets_yoy,eqt_yoy,tr_yoy,or_yoy,q_gr_yoy,q_gr_qoq,q_sales_yoy,q_sales_qoq,q_op_yoy,q_op_qoq,q_profit_yoy,q_profit_qoq,q_netprofit_yoy,q_netprofit_qoq,equity_yoy,rd_exp,update_flag' df = __pro.fina_indicator(ts_code=ts_code, start_date='19901201', end_date='20210101', columns=columns) if len(df) == 0: print('=' * 32, 'code:{}'.format(ts_code)) print('error exit middle') exit(4) # clean # df = df.drop_duplicates(["end_date"], keep="first") df = drop_more_nan_row(df, 'end_date') df_add_y_m(df, 'end_date') df.reset_index(drop=True) df.to_sql(table_name, get_engine(), dtype=dtype, index=False, if_exists='append')
def init_cashflow(ts_code, force=None): table_name = 'stock_cashflow' if not need_pull_check(ts_code, table_name, force): print('need not 2 pull {} -> {}'.format(table_name, ts_code)) return else: print('start 2 pull {} -> {} .'.format(table_name, ts_code)) dtype = { 'ts_code': VARCHAR(length=10), 'ann_date': DATE(), 'f_ann_date': DATE(), 'y': INT(), 'm': INT(), 'end_date': DATE(), 'comp_type': VARCHAR(length=1), 'report_type': VARCHAR(length=1), 'net_profit': BIGINT(), 'finan_exp': BIGINT(), 'c_fr_sale_sg': BIGINT(), 'recp_tax_rends': BIGINT(), 'n_depos_incr_fi': BIGINT(), 'n_incr_loans_cb': BIGINT(), 'n_inc_borr_oth_fi': BIGINT(), 'prem_fr_orig_contr': BIGINT(), 'n_incr_insured_dep': BIGINT(), 'n_reinsur_prem': BIGINT(), 'n_incr_disp_tfa': BIGINT(), 'ifc_cash_incr': BIGINT(), 'n_incr_disp_faas': BIGINT(), 'n_incr_loans_oth_bank': BIGINT(), 'n_cap_incr_repur': BIGINT(), 'c_fr_oth_operate_a': BIGINT(), 'c_inf_fr_operate_a': BIGINT(), 'c_paid_goods_s': BIGINT(), 'c_paid_to_for_empl': BIGINT(), 'c_paid_for_taxes': BIGINT(), 'n_incr_clt_loan_adv': BIGINT(), 'n_incr_dep_cbob': BIGINT(), 'c_pay_claims_orig_inco': BIGINT(), 'pay_handling_chrg': BIGINT(), 'pay_comm_insur_plcy': BIGINT(), 'oth_cash_pay_oper_act': BIGINT(), 'st_cash_out_act': BIGINT(), 'n_cashflow_act': BIGINT(), 'oth_recp_ral_inv_act': BIGINT(), 'c_disp_withdrwl_invest': BIGINT(), 'c_recp_return_invest': BIGINT(), 'n_recp_disp_fiolta': BIGINT(), 'n_recp_disp_sobu': BIGINT(), 'stot_inflows_inv_act': BIGINT(), 'c_pay_acq_const_fiolta': BIGINT(), 'c_paid_invest': BIGINT(), 'n_disp_subs_oth_biz': BIGINT(), 'oth_pay_ral_inv_act': BIGINT(), 'n_incr_pledge_loan': BIGINT(), 'stot_out_inv_act': BIGINT(), 'n_cashflow_inv_act': BIGINT(), 'c_recp_borrow': BIGINT(), 'proc_issue_bonds': BIGINT(), 'oth_cash_recp_ral_fnc_act': BIGINT(), 'stot_cash_in_fnc_act': BIGINT(), 'free_cashflow': BIGINT(), 'c_prepay_amt_borr': BIGINT(), 'c_pay_dist_dpcp_int_exp': BIGINT(), 'incl_dvd_profit_paid_sc_ms': BIGINT(), 'oth_cashpay_ral_fnc_act': BIGINT(), 'stot_cashout_fnc_act': BIGINT(), 'n_cash_flows_fnc_act': BIGINT(), 'eff_fx_flu_cash': BIGINT(), 'n_incr_cash_cash_equ': BIGINT(), 'c_cash_equ_beg_period': BIGINT(), 'c_cash_equ_end_period': BIGINT(), 'c_recp_cap_contrib': BIGINT(), 'incl_cash_rec_saims': BIGINT(), 'uncon_invest_loss': BIGINT(), 'prov_depr_assets': BIGINT(), 'depr_fa_coga_dpba': BIGINT(), 'amort_intang_assets': BIGINT(), 'lt_amort_deferred_exp': BIGINT(), 'decr_deferred_exp': BIGINT(), 'incr_acc_exp': BIGINT(), 'loss_disp_fiolta': BIGINT(), 'loss_scr_fa': BIGINT(), 'loss_fv_chg': BIGINT(), 'invest_loss': BIGINT(), 'decr_def_inc_tax_assets': BIGINT(), 'incr_def_inc_tax_liab': BIGINT(), 'decr_inventories': BIGINT(), 'decr_oper_payable': BIGINT(), 'incr_oper_payable': BIGINT(), 'others': BIGINT(), 'im_net_cashflow_oper_act': BIGINT(), 'conv_debt_into_cap': BIGINT(), 'conv_copbonds_due_within_1y': BIGINT(), 'fa_fnc_leases': BIGINT(), 'end_bal_cash': BIGINT(), 'beg_bal_cash': BIGINT(), 'end_bal_cash_equ': BIGINT(), 'beg_bal_cash_equ': BIGINT(), 'im_n_incr_cash_equ': BIGINT() } df = __pro.cashflow(ts_code=ts_code, start_date='19901201', end_date='20210101') # clean # df = df.drop_duplicates(["end_date"], keep="first") df = drop_more_nan_row(df, 'end_date') df_add_y_m(df, 'end_date') df.reset_index(drop=True) df.to_sql(table_name, get_engine(), dtype=dtype, index=False, if_exists='append')
def init_income(ts_code, force=None): table_name = 'stock_income' if not need_pull_check(ts_code, table_name, force): print('need not 2 pull {} -> {}'.format(table_name, ts_code)) return else: print('start 2 pull {} -> {} '.format(table_name, ts_code)) dtype = { 'ts_code': VARCHAR(length=10), 'ann_date': DATE(), 'f_ann_date': DATE(), 'y': INT(), 'm': INT(), 'end_date': DATE(), 'report_type': VARCHAR(length=1), 'comp_type': VARCHAR(length=1), 'basic_eps': FLOAT(), 'diluted_eps': FLOAT(), 'total_revenue': FLOAT(), 'revenue': FLOAT(), 'int_income': FLOAT(), 'prem_earned': FLOAT(), 'comm_income': FLOAT(), 'n_commis_income': FLOAT(), 'n_oth_income': FLOAT(), 'n_oth_b_income': FLOAT(), 'prem_income': FLOAT(), 'out_prem': FLOAT(), 'une_prem_reser': FLOAT(), 'reins_income': FLOAT(), 'n_sec_tb_income': FLOAT(), 'n_sec_uw_income': FLOAT(), 'n_asset_mg_income': FLOAT(), 'oth_b_income': FLOAT(), 'fv_value_chg_gain': FLOAT(), 'invest_income': FLOAT(), 'ass_invest_income': FLOAT(), 'forex_gain': FLOAT(), 'total_cogs': FLOAT(), 'oper_cost': FLOAT(), 'int_exp': FLOAT(), 'comm_exp': FLOAT(), 'biz_tax_surchg': FLOAT(), 'sell_exp': FLOAT(), 'admin_exp': FLOAT(), 'fin_exp': FLOAT(), 'assets_impair_loss': FLOAT(), 'prem_refund': FLOAT(), 'compens_payout': FLOAT(), 'reser_insur_liab': FLOAT(), 'div_payt': FLOAT(), 'reins_exp': FLOAT(), 'oper_exp': FLOAT(), 'compens_payout_refu': FLOAT(), 'insur_reser_refu': FLOAT(), 'reins_cost_refund': FLOAT(), 'other_bus_cost': FLOAT(), 'operate_profit': FLOAT(), 'non_oper_income': FLOAT(), 'non_oper_exp': FLOAT(), 'nca_disploss': FLOAT(), 'total_profit': FLOAT(), 'income_tax': FLOAT(), 'n_income': FLOAT(), 'n_income_attr_p': FLOAT(), 'minority_gain': FLOAT(), 'oth_compr_income': FLOAT(), 't_compr_income': FLOAT(), 'compr_inc_attr_p': FLOAT(), 'compr_inc_attr_m_s': FLOAT(), 'ebit': FLOAT(), 'ebitda': FLOAT(), 'insurance_exp': FLOAT(), 'undist_profit': FLOAT(), 'distable_profit': FLOAT(), 'update_flag': VARCHAR(length=1) } df = __pro.income(ts_code=ts_code, start_date='19901201', end_date='20210101') # clean df = drop_more_nan_row(df, 'end_date') # format df_add_y_m(df, 'end_date') # df.reset_index(drop=True) df.to_sql(table_name, get_engine(), dtype=dtype, index=False, if_exists='append')