def financial_factors_rank(path_root, filename='rank_result', dates=['2018-09-30'], factors=[FID['roe'], FID['roa']]): path = os.path.join(path_root) path_score = os.path.join(path_root, 'score') path_rank = os.path.join(path_root, 'rank') path_factor = os.path.join(path_root, 'factor_io') scu = SCU(path=path_root) stocks = scu.stock_codes_remove_no_stock_basic() ffr = financial_factor_rank(path=path, path_factor=path_factor, path_cluster=path_rank, stocks=stocks, dates=dates, indexs=factors, file_name=filename) """ FID['debt_incr_rate'],\ ###asset struct FID['debt_asset_ratio'],\ FID['debt_equality_ratio'],\ FID['debt_net_asset_ratio'],\ FID['revenue_asset_ratio'],\ FID['goodwell_equality_ratio'],\ FID['dev_rev_ratio']\ """ ffr.assess_selected_financial_factor(dates) print('rank all the stock successfully')
def download_all_stocks_basic(path_root='../../../data/'): #stock_codes = ['000001'] scu = SCU(path_root) stock_codes = scu.stock_codes() #get_dates = ['2018/6/30'] SB = stock_basic(stock_codes, path_root) #SB.get_stocks_basic() SB.get_all_stocks_basic() print('downloaded successfully')
def processed_all_stocks_basic(path_root='../../../data/'): scu = SCU(path_root) stock_codes = scu.stock_codes_remove_no_stock_basic() proc_dr = DR(path_root, 'stock_basic_proc.json') proc_id = proc_dr.read_index() if (proc_id >= len(stock_codes)): print("finished to proc all the stock, number is ", len(stock_codes)) else: stock_codes = stock_codes[proc_id:] sb = stock_basic(stock_codes, path_root) sb.processed_stocks_basic() print('processed successfully')
def fetch_selected_factors(self, factors, dates): if not os.path.exists(self.path_factor_io): pd_factor_values = pd.DataFrame(dtype=float) list_columns = [] for factor in factors: print('processing factor is', factor) factor_value, column = self.fetch_one_factor(factor, dates) list_columns = list_columns + column pd_factor_values = pd.concat([pd_factor_values, factor_value], axis=1) pd_factor_values.columns = list_columns scu = SCU(path=self.path) pd_factor_values.index = scu.add_allstock_sh_sz(self.stock_codes) pd_factor_values.to_csv(self.path_factor_io, encoding='gbk') else: pd_factor_values = pd.read_csv(self.path_factor_io,index_col=0) return pd_factor_values
def main_financial_data_process(path): scu = SCU(path=path) stock_codes = scu.stock_codes_remove_no_stock_basic() FFC = financail_factor_calc(path=path) dr = DR(path, 'stock_finance_factor_calc.json') #stock_codes = ['000001'] proc_id = dr.read_index() stock_codes = stock_codes[proc_id:] for stock_code in stock_codes: print("stock:", stock_code) FFC.FLS.load_all_financial_one_stock(stock_code) FFC.FLS.load_all_processed_stock_basic_one_stock([stock_code]) data_processed = FFC.financial_index_calc(stock_code) FFC.FLS.store_process_financical_data(data_processed, stock_code) proc_id = proc_id + 1 dr.write_index(proc_id)
def main_financial_statistic_process(path): store_path = os.path.join(path, 'statistic/') if not os.path.exists(store_path): os.makedirs(store_path) scu = SCU(path=path) stock_codes = scu.stock_codes_remove_no_stock_basic() FFC = financail_factor_statistic(path=path) sz50 = ts.get_sz50s() stock_codes = sz50['code'] stock_industry = ts.get_industry_classified() stock_codes = stock_industry[stock_industry['c_name'].isin(['房地产' ])]['code'] #stock_codes = stock_codes.pop('000527') #stock_codes = ['000001','000002','000004'] statistic_stock_data = {} statistic_stock_min_len = 100 for stock_code in stock_codes: print("stock:", stock_code) FFC.FLS.load_all_financial_one_stock(stock_code) FFC.FLS.load_all_processed_stock_basic_one_stock([stock_code]) data_processed = FFC.financial_index_calc(stock_code) (row, colum) = data_processed.shape if (row < statistic_stock_min_len): statistic_stock_min_len = row statistic_stock_data[stock_code] = data_processed #plt.figure() #data_processed.plot.bar(data_processed.index) sum_data = statistic_stock_data[stock_codes[0]].iloc[ -statistic_stock_min_len:, :] for stock_code in stock_codes[1:]: sum_data = sum_data + statistic_stock_data[stock_code].iloc[ -statistic_stock_min_len:, :] pct_data = sum_data.pct_change(periods=4) pct_data.to_csv(store_path + 'statistic_pct.csv') sum_data.to_csv(store_path + 'statistic_sum.csv')
def download_finance(path_root = '../../../data/'): scu = SCU(path_root) stock_codes = scu.stock_codes() #stock_codes = ts_stock_codes() path = os.path.join(path_root, 'finance') if not os.path.exists(path): os.makedirs(path) dr = DR(path_root, 'finance_download_record.json') stock_index = dr.read_index() stock_codes = stock_codes[stock_index:] for stock in (stock_codes): print("fetching stock", stock) continue_download_this_stock = 1 while continue_download_this_stock == 1: continue_download_this_stock = fetch_stock_finance_data(path, stock) # store_stock_index(download_stock_file, stock_index) dr.write_index(stock_index) stock_index = stock_index + 1 print("fetched stock", stock)
def __init__(self, path='../../../data/', stock_codes=['000001']): self.path = path self.path_finance = os.path.join(path, 'finance') self.path_finance_processed = os.path.join(path, 'finance_processed') if not os.path.exists(self.path_finance_processed): os.makedirs(self.path_finance_processed) self.path_finance_processed = os.path.join(self.path_finance_processed, '{}_processed_finance.csv') self.path_stock_basic = os.path.join(path, 'stock_basic', '{}_basic.csv') self.path_processed_stock_basic = os.path.join( path, 'processed_stock_basic', '{}_basic.csv') self.stock_codes = stock_codes self.dr = DR(path) self.scu = SCU(path)
data = pd.concat([data, dataframe_ohlcvt], axis=1) data.to_csv(factor_csv, index=False) def is_number(self, s): try: float(s) return True except ValueError: pass try: import unicodedata unicodedata.numeric(s) return True except (TypeError, ValueError): pass return False if __name__ == '__main__': scu = SCU('../../../data/') talib_f = talib_factor('~/', '../../../data/', date(2019, 9, 5), 1000) stock_codes = scu.stock_codes() stock_codes = scu.add_allstock_xshg_xshe(stock_codes) # stock_codes= ['002975.XSHE', '000002.XSHE'] for stock_code in stock_codes: print("calculated stock:", stock_code) talib_f.factor_calc(stock_code)
list_columns = list_columns + column pd_factor_values = pd.concat([pd_factor_values, factor_value], axis=1) pd_factor_values.columns = list_columns scu = SCU(path=self.path) pd_factor_values.index = scu.add_allstock_sh_sz(self.stock_codes) pd_factor_values.to_csv(self.path_factor_io, encoding='gbk') else: pd_factor_values = pd.read_csv(self.path_factor_io,index_col=0) return pd_factor_values if __name__ == '__main__': path = '../../../data/' path_factor = '../../../data/factor_io' print(FID['roe']) scu = SCU(path=path) stocks = scu.stock_codes_remove_no_stock_basic() stocks = ['000001','000002','000004','000005','000006'] dates = ['2018-06-30','2017-12-31']#,'2017-12-31' ffio = financial_factor_io(path=path, path_factor=path_factor, \ stocks = stocks, dates = dates,file_name = '1806_1712_1') indexs = [ #earning capacity FID['roe'],\ FID['roa'],\ FID['profit_revenue'],\ FID['profit_cost'],\ FID['equlity_incr_rate'],\ ###grow capacity FID['revenue_incr_rate'],\ FID['profit_incr_rate'],\
from stock_deeplearning.ultility.stock_codes_utility import stock_codes_utility as SCU from rqalpha.data.base_data_source import BaseDataSource from rqalpha.data.instrument_mixin import InstrumentMixin from rqalpha.core.bar_dict_price_board import BarDictPriceBoard from rqalpha.data.data_proxy import DataProxy import numpy as np import pandas as pd #from rqalpha.data.instrument_store import instrument_store path = '../../../data/' path_market = os.path.join(path,'trade_market') if not os.path.exists(path_market): os.makedirs(path_market) scu = SCU(path=path) stocks = scu.stock_codes_remove_no_stock_basic() stocks = scu.add_allstock_xshg_xshe(stocks) #rqa.update_bundle() rqalpha_path = r"~/.rqalpha" data_bundle_path = os.path.join(os.path.expanduser(rqalpha_path), "bundle") if not os.path.isdir(data_bundle_path): print("not exist this file", data_bundle_path) exit(-1) data_source = BaseDataSource(data_bundle_path) Instru = InstrumentMixin(data_source._instruments._instruments) # for stock_code in stocks: # print('procesing...',stock_code) # stock_data = pd.DataFrame(data_source._all_day_bars_of(Instru.instruments(stock_code)))
class financial_load_store: def __init__(self, path='../../../data/', stock_codes=['000001']): self.path = path self.path_finance = os.path.join(path, 'finance') self.path_finance_processed = os.path.join(path, 'finance_processed') if not os.path.exists(self.path_finance_processed): os.makedirs(self.path_finance_processed) self.path_finance_processed = os.path.join(self.path_finance_processed, '{}_processed_finance.csv') self.path_stock_basic = os.path.join(path, 'stock_basic', '{}_basic.csv') self.path_processed_stock_basic = os.path.join( path, 'processed_stock_basic', '{}_basic.csv') self.stock_codes = stock_codes self.dr = DR(path) self.scu = SCU(path) #self.load_stock_basic() '''financial data''' def load_financical_data(self, stock_code, file_list): if not os.path.exists(self.path_finance): print('this folder not exist!!!') exec(-1) #file_list = ['{}_main.csv','{}_abstract.csv','{}_profit.csv','{}_cash.csv','{}_loans.csv'] data_file = {} min_column = 3000 for ite in file_list: ite = ite.format(stock_code) csv_file_path = os.path.join(self.path_finance, FILE_LIST[ite].format(stock_code)) if os.path.exists(csv_file_path): data = pd.read_csv(csv_file_path, encoding='gbk', error_bad_lines=False) if (data.shape[1] < min_column): min_column = data.shape[1] data = data.replace('--', 0) data = data.replace('_', 0) data = data.fillna(0) else: print('stock :', stock_code, 'this file is not exist', FILE_LIST[ite]) self.dr.write_skip_stock(self.scu.add_stock_sh_sz(stock_code)) data = pd.DataFrame() #exit(-1) data_file[ite] = (data) self.min_column = min_column for ite in file_list: if data_file[ite].empty == False: data_file[ite] = data_file[ite].iloc[:, :self.min_column - TAIL_MARGIN] else: data_file[ite] = pd.DataFrame() return data_file def load_all_financial_one_stock(self, stock_code): file_list = ['main', 'abstract', 'profit', 'cash', 'loans'] data = self.load_financical_data(stock_code, file_list) self.all_financial_one_stock = data return data def load_one_financial_one_stock(self, stock_code, file_list): data = self.load_financical_data(stock_code, file_list) return data def fetch_one_financial_factor_in_stock(self, table, factor): data = self.all_financial_one_stock[table] try: data1 = data[data['报告日期'].isin([factor])] except: data1 = data[data[' 报告日期'].isin([factor])] data1 = data1.values.squeeze() data1 = np.float32(data1[1:]) return data1 '''processed financial data''' def store_process_financical_data(self, data, stock_code): csv_file_path = self.path_finance_processed.format(stock_code) data.to_csv(csv_file_path, encoding='gbk') def load_process_financical_data(self, stock_code): csv_file_path = self.path_finance_processed.format(stock_code) if not os.path.exists(csv_file_path): print('load_process_financical_data not exsit this file', stock_code) exit(-1) if os.path.exists(csv_file_path): data_pd = pd.read_csv(csv_file_path, encoding='gbk') data_pd.index = data_pd.iloc[:, 0] else: return pd.DataFrame() return data_pd '''basic stock data''' def load_all_stock_basic_one_stock(self, stock_codes): data_basic = {} for stock in stock_codes: path_csv = os.path.join(self.path_stock_basic.format(stock)) pd_basic = pd.read_csv(path_csv, index_col=0) data_basic[stock] = pd_basic self.stock_basic = data_basic return data_basic def load_all_processed_stock_basic_one_stock(self, stock_codes): data_basic = {} for stock in stock_codes: path_csv = os.path.join( self.path_processed_stock_basic.format(stock)) pd_basic = pd.read_csv(path_csv, index_col=0) data_basic[stock] = pd_basic self.stock_basic = data_basic return data_basic def fecth_one_stock_basic_in_stock(self, stock, factor): data = self.stock_basic[stock] data = data.T data1 = data.loc[factor].values.squeeze() data1 = np.float32(data1) return data1