def fileCompare(src_fullpath, tar_fullpath): # Check File Existence if not u.hasFile(src_fullpath): print('Source File %s Does Not Exist' % src_fullpath) raise SystemExit if not u.hasFile(tar_fullpath): print('Target File %s Does Not Exist' % tar_fullpath) raise SystemExit # Load Data Files src = u.read_csv(src_fullpath) tar = u.read_csv(tar_fullpath) src_row_number = len(src) tar_row_number = len(tar) src_col_number = len(src.columns) tar_col_number = len(tar.columns) matched = True print('File Compare Start: %s vs %s' % (src_fullpath, tar_fullpath)) if src_row_number != tar_row_number: matched = False print('Row Number Un-matched') elif src_col_number != tar_col_number: matched = False print('Col Number Un-matched') else: for i in range(src_row_number): for j in range(src_col_number): if src.iloc[i,j] != tar.iloc[i,j]: matched = False print('Element(%s,%s) Un-matched' % (i,j)) print('File Compare End: %s' % ('Matched' if matched else 'Un-Matched')) # Return Result return matched
def analyzePriceFollow(target_date, stock_id, is_index, threshold): file_postfix = 'Timing_%s_%s' % (u.stockFileName(stock_id, is_index), threshold) timing = u.read_csv(c.path_dict['strategy'] + file_postfix + '.csv', encoding='gbk') timing_number = len(timing) # Find the matched timing date and trend timing_index = -1 for i in range(timing_number): date = dt.datetime.strptime(timing.ix[i, 'date'], '%Y-%m-%d').date() if date <= target_date: timing_index = i else: break # Report results if timing_index != -1: date = dt.datetime.strptime(timing.ix[timing_index, 'date'], '%Y-%m-%d').date() trend = timing.ix[timing_index, 'trend'] if date == target_date: # Given target_date is Timing Date print('Date', target_date, ': Trend of', u.stockFileName(stock_id, is_index), 'Goes', trend) else: print('Date', target_date, ': Trend of', u.stockFileName(stock_id, is_index), 'Does Not Change, Still', trend) else: print('Date', target_date, ': Trend of', u.stockFileName(stock_id, is_index), 'Not Available, No Timing Data')
def extractRollingBeta(postfix): # Load Rolling Coefficient fullpath = c.path_dict['strategy'] + c.file_dict['strategy'] % postfix coef = u.read_csv(fullpath) if u.isNoneOrEmpty(coef): print('Require Coefficient File: %s!' % fullpath) return False # Extract Rolling Beta row_number = len(coef) beta = u.createDataFrame(row_number, ['date', 'beta']) beta['date'] = coef['date'] for column in coef.columns: if len(column) >= 4 and column[0:4] == 'beta': beta[column] = coef[column] # Calculate Rolling Beta Average beta_number = len(beta.columns) - 2 for i in range(row_number): beta_avg = 0.0 beta_count = 0 for j in range(beta_number): b = beta.ix[i, beta.columns[j + 2]] if not np.isnan(b): beta_avg = beta_avg + b beta_count = beta_count + 1 if beta_count > 0: beta.ix[i, 'beta'] = beta_avg / float(beta_count) beta.set_index('date', inplace=True) postfix = '_'.join([postfix, 'Beta']) u.to_csv(beta, c.path_dict['strategy'], c.file_dict['strategy'] % postfix)
def loadAllIndex(): ''' 函数功能: -------- 加载所有指数列表。 输入参数: -------- 无 输出参数: -------- 加载成功时,index_ids : pandas.Series, 所有指数列表。 加载失败时,None ''' # Load Local Cache file_postfix = '_'.join(['Common', 'AllIndex']) fullpath = c.path_dict['strategy'] + c.file_dict['strategy'] % file_postfix allindex = u.read_csv(fullpath) if not u.isNoneOrEmpty(allindex): allindex['code'] = allindex['code'].map(lambda x: str(x).zfill(6)) return allindex['code'] print('Failed to Load File: %s!' % fullpath) return None
def getRZRQDetailsSH(): # Download RZRQ Stock Data of SH Market rzrq_sh_details = pd.DataFrame() rzrq_sh = u.read_csv(c.fullpath_dict['rzrq'] % 'Market_SH') date_number = len(rzrq_sh) for i in range(date_number): date = rzrq_sh.ix[i,'date'] rzrq = get_rzrq_sh_details(date, date) print(rzrq.head(10)) if i == 0: rzrq_sh_details = pd.DataFrame.copy(rzrq) else: rzrq_sh_details = pd.concat([rzrq_sh_details, rzrq]) print(rzrq_sh_details.head(10)) rzrq.set_index('date', inplace=True) if not u.isNoneOrEmpty(rzrq): u.to_csv(rzrq, c.path_dict['rzrq'], c.file_dict['rzrq'] % ('Details_SH_%s'%date)) # Process RZRQ Stock Data of SH Market rzrq_sh_details.set_index('date', inplace=True) rzrq_sh_details.sort_index(ascending=True,inplace=True) if gs.is_debug: print(rzrq_sh_details.head(10)) # Save to CSV File if not u.isNoneOrEmpty(rzrq_sh_details): u.to_csv(rzrq_sh_details, c.path_dict['rzrq'], c.file_dict['rzrq'] % 'Details_SH')
def loadSamplePriceAllIndex(benchmark_id, period): ''' 函数功能: -------- 根据基准指数的时间范围和采样周期,加载所有指数的收盘价格采样文件。 输入参数: -------- benchmark_id : string, 指数代码 e.g. '000300',隐含起止时间。 period : string, 采样周期 e.g. 'M',支持'D', 'W', and 'M'。 输出参数: -------- 加载成功时,allprice : pandas.DataFrame, 所有指数的收盘价格采样结果。 加载失败时,None ''' # Check if AllPrice File Already Exists file_postfix = '_'.join( ['Common', 'AllPrice', benchmark_id, period, 'AllIndex']) fullpath = c.path_dict['strategy'] + c.file_dict['strategy'] % file_postfix allprice = u.read_csv(fullpath) if not u.isNoneOrEmpty(allprice): return allprice print('Failed to Load File: %s!' % fullpath) return None
def loadHPE(stock_id, period, ratio): key = 'hpe' if ratio == 'PE' else 'hep' path = c.path_dict[key] % period file = c.file_dict[key] % (period, stock_id) df = u.read_csv(path + file) return df
def mergeRZRQMarket(): rzrq_sh = u.read_csv(c.fullpath_dict['rzrq'] % 'Market_SH') rzrq_sz = u.read_csv(c.fullpath_dict['rzrq'] % 'Market_SZ') rzrq = pd.merge(rzrq_sh, rzrq_sz, how='inner', on='date') # Combine data from both market rzrq_columns = ['rzye', 'rzmre', 'rqyl', 'rqylje', 'rqmcl', 'rzrqye'] for column in rzrq_columns: rzrq[column] = 0.0 rzrq_number = len(rzrq) for row in range(rzrq_number): for col in rzrq_columns: rzrq.ix[row, col] = rzrq.ix[row, col+'_sh'] + rzrq.ix[row, col+'_sz'] rzrq.set_index('date',inplace=True) rzrq.sort_index(ascending=True,inplace=True) if gs.is_debug: print(rzrq.head(10)) if not u.isNoneOrEmpty(rzrq): u.to_csv(rzrq, c.path_dict['rzrq'], c.file_dict['rzrq'] % 'Market_Total')
def loadStockBasics(): # Ensure data file is available fullpath = c.fullpath_dict['basics'] if not u.hasFile(fullpath): print('Require Stock Basics: %s!' % fullpath) return None basics = u.read_csv(fullpath) return basics
def loadFinanceSummary(stock_id): # Ensure data file is available fullpath = c.fullpath_dict['finsum'] % stock_id if not u.hasFile(fullpath): print('Require Finance Summary of %s!' % fullpath) return None fs = u.read_csv(fullpath) return fs
def loadCoefficient(postfix, completeness_threshold): # Load Coefficient File fullpath = c.path_dict['strategy'] + c.file_dict['strategy'] % '_'.join( [postfix, completeness_threshold]) allcoef = u.read_csv(fullpath) if u.isNoneOrEmpty(allcoef): print('Require Coefficient File: %s!' % fullpath) return None return allcoef
def plot_index(index_name, benchmark_name): # Load Index Data File index_path = c.path_dict['index'] index_file = c.file_dict['index_r'] % index_name df = u.read_csv(index_path + index_file) # Plot Figure fig, (ax1, ax2, ax3) = plt.subplots(3, sharex=True) fig.set_size_inches(32, 18) # Define Font font = { 'family': 'serif', 'color': 'black', 'weight': 'normal', 'size': 18, } # Plot Sub-figure 1 title = '%s vs. %s' % (index_name, benchmark_name) ax1.set_title(title, fontdict=font) ax1.set_xlabel('', fontdict=font) ax1.set_ylabel('Ratio', fontdict=font) for column in ['ratio', 'b_ratio']: df.plot(x='date', y=column, ax=ax1) # Plot Sub-figure 2 title = 'Index %s' % index_name ax2.set_title(title, fontdict=font) ax2.set_xlabel('', fontdict=font) ax2.set_ylabel('Close Price', fontdict=font) df.plot(x='date', y='index', ax=ax2) # Plot Sub-figure 3 title = 'Index %s' % benchmark_name ax3.set_title(title, fontdict=font) ax3.set_xlabel('', fontdict=font) ax3.set_ylabel('Close Price', fontdict=font) df.plot(x='date', y='b_index', ax=ax3) # Common Format for Both Sub-figures for ax in [ax1, ax2, ax3]: ax.grid(True) fig.autofmt_xdate() fig.tight_layout() plt.setp(plt.gca().get_xticklabels(), rotation=30) plt.show() # Save Figure fig_key = 'fig_index' fig_path = c.path_dict[fig_key] fig_file = c.file_dict[fig_key] % (index_name + '_' + u.dateToStr(u.today())) u.saveFigure(fig, fig_path, fig_file)
def loadDailyHFQ(stock_id, is_index): fullpath = c.fullpath_dict['lshq'] % u.stockFileName(stock_id, is_index) # Ensure data file is available if not u.hasFile(fullpath): print('Require LSHQ of %s!' % u.stockFileName(stock_id, is_index)) return None # Read data file df = u.read_csv(fullpath) return df
def load_component(index_name): fullpath = c.path_dict['index'] + c.file_dict['index_c'] % index_name # Ensure data file is available if not u.hasFile(fullpath): print('Require Index Component of %s!' % index_name) return None # Read data file df = u.read_csv(fullpath) return df
def mergeRZRQMarketSZ(files_number): rzrq_sz = pd.DataFrame() for index in range(files_number): rzrq = u.read_csv(c.fullpath_dict['rzrq'] % ('Market_SZ_%s'%index)) if index == 0: rzrq_sz = pd.DataFrame.copy(rzrq) else: rzrq_sz = pd.concat([rzrq_sz, rzrq]) rzrq_sz.set_index('date',inplace=True) rzrq_sz.sort_index(ascending=True,inplace=True) if gs.is_debug: print(rzrq_sz.head(10)) if not u.isNoneOrEmpty(rzrq_sz): u.to_csv(rzrq_sz, c.path_dict['rzrq'], c.file_dict['rzrq'] % 'Market_SZ')
def plot_index_series(index_names, series_name, benchmark_name): # Load Index Data Files series_path = c.path_dict['index'] series_file = c.file_dict['index_r'] % series_name df = u.read_csv(series_path + series_file) # Plot Figure fig = plt.figure(figsize=(32, 18), dpi=72, facecolor="white") axes = plt.subplot(111) axes.cla() # Clear Axes # Define Font font = { 'family': 'serif', 'color': 'black', 'weight': 'normal', 'size': 18, } # Plot Sub-figure 1 title = '%s vs. %s' % (series_name, benchmark_name) plt.title(title, fontdict=font) axes.set_xlabel('', fontdict=font) axes.set_ylabel('Ratio', fontdict=font) df.plot(x='date', y='ratio_benchmark', ax=axes, color='grey', lw=2.0, ls='--') index_number = len(index_names) for i in range(index_number): index_name = index_names[i] column = 'ratio_' + index_name df.plot(x='date', y=column, ax=axes) # Common Format for Both Sub-figures axes.grid(True) fig.autofmt_xdate() fig.tight_layout() plt.setp(plt.gca().get_xticklabels(), rotation=30) plt.show() # Save Figure fig_key = 'fig_index' fig_path = c.path_dict[fig_key] fig_file = c.file_dict[fig_key] % (series_name + '_' + u.dateToStr(u.today())) u.saveFigure(fig, fig_path, fig_file)
def extractRZRQDetails(market = 'SH'): rzrq_details = u.read_csv(c.fullpath_dict['rzrq'] % ('Details_%s' % market)) stocks = pd.DataFrame({'code':rzrq_details['code']}) stocks.drop_duplicates(inplace=True) stocks.set_index('code',inplace=True) stocks_number = len(stocks) print('RZRQ Stock Number:', stocks_number) for i in range(stocks_number): stock_id = stocks.index[i] rzrq = rzrq_details[rzrq_details['code'] == stock_id] if not u.isNoneOrEmpty(rzrq): rzrq.set_index('date',inplace=True) rzrq.sort_index(ascending=True,inplace=True) rzrq['code'] = rzrq['code'].map(lambda x:str(x).zfill(6)) # Handle Missing Columns if market == 'SH': # rzrq['rqylje'] = rzrq['rzrqye'] = rzrq['rzye'] + rzrq['rqylje'] # elif market == 'SZ': # rzrq['rzche'] = # rzrq['rqchl'] = u.to_csv(rzrq, c.path_dict['rzrq'], c.file_dict['rzrq'] % ('Details_%s_%06d' % (market, stock_id)))
def filterCoefficient(postfix, completeness_threshold): # Load Coefficient File fullpath = c.path_dict['strategy'] + c.file_dict['strategy'] % postfix allcoef = u.read_csv(fullpath) if u.isNoneOrEmpty(allcoef): print('Require Coefficient AllCoef File: %s!' % fullpath) return False # Filter Out Stocks without Sufficient Data threshold = float(completeness_threshold.replace('%', '')) allcoef['completeness'] = allcoef['completeness'].map( lambda x: x.replace('%', '')) allcoef['completeness'] = allcoef['completeness'].astype(float) allcoef = allcoef[allcoef.completeness >= threshold] allcoef['completeness'] = allcoef['completeness'].map(lambda x: ('%.2f' % (x)) + '%') allcoef.set_index('code', inplace=True) # Save to CSV File file_postfix = '_'.join([postfix, completeness_threshold]) u.to_csv(allcoef, c.path_dict['strategy'], c.file_dict['strategy'] % file_postfix) return True
def mergePriceFollow(stock_list, is_index, threshold_list): stock_number = len(stock_list) if stock_number < 1: print('Stock Number:', stock_number) raise SystemExit threshold_number = len(threshold_list) if threshold_number < 1: print('Threshold Number:', threshold_number) raise SystemExit # Init Price Follow Statistics for All Indexes stats_columns = ['date', 'index'] for i in range(1, threshold_number - 1): stats_columns.append('wpredict_%s' % threshold_list[i]) stats_columns.append('wtrend_%s' % threshold_list[i]) stats = u.createDataFrame(stock_number, stats_columns) for s in range(stock_number): stock_id = stock_list[s] # Load Results from Different Threshold dfs = [] for i in range(threshold_number): threshold = threshold_list[i] file_postfix = 'PriceFollow_%s_%s' % (u.stockFileName( stock_id, is_index), threshold) fullpath = c.path_dict[ 'strategy'] + c.file_dict['strategy'] % file_postfix df = u.read_csv(fullpath) dfs.append(df) # Compose Final Results drop_columns = [ 'trend', 'trend_high', 'trend_low', 'trend_ref', 'trend_price', 'predict', 'confirm' ] df = dfs[0].drop(drop_columns, axis=1) for i in range(threshold_number): threshold = threshold_list[i] column = 'trend' df[column + '_%s' % threshold] = dfs[i][column] for i in range(threshold_number): threshold = threshold_list[i] column = 'trend_price' df[column + '_%s' % threshold] = dfs[i][column] for i in range(threshold_number): threshold = threshold_list[i] column = 'predict' df[column + '_%s' % threshold] = dfs[i][column] for i in range(threshold_number): threshold = threshold_list[i] column = 'confirm' df[column + '_%s' % threshold] = dfs[i][column] # Weighted Predict Columns cutoff = 0.0 # Optimized cutoff for weighted predict for i in range(1, threshold_number - 1): t_prev = threshold_list[i - 1] t_curr = threshold_list[i] t_next = threshold_list[i + 1] t_total = t_prev + t_curr + t_next column_postfix = '_%s' % t_curr df['wpredict' + column_postfix] = np.nan df['wtrend' + column_postfix] = np.nan row_number = len(df) for j in range(1, row_number): wpredict = 0.0 for t in [t_prev, t_curr, t_next]: wpredict = wpredict + t * df.ix[j, 'predict' + '_%s' % t] wpredict = wpredict / t_total df.ix[j, 'wpredict' + column_postfix] = wpredict df.ix[j, 'wtrend' + column_postfix] = 'Up' if wpredict >= cutoff else 'Down' # Fill One Row of Statistics last_index = len(df) - 1 stats.ix[s, 'date'] = df.ix[last_index, 'date'] stats.ix[s, 'index'] = stock_id for i in range(1, threshold_number - 1): column_postfix = '_%s' % threshold_list[i] stats.ix[s, 'wpredict' + column_postfix] = df.ix[last_index, 'wpredict' + column_postfix] stats.ix[s, 'wtrend' + column_postfix] = df.ix[last_index, 'wtrend' + column_postfix] # Format Columns df.set_index('date', inplace=True) # Save to CSV File file_postfix = 'PriceFollow_%s_All' % u.stockFileName( stock_id, is_index) u.to_csv(df, c.path_dict['strategy'], c.file_dict['strategy'] % file_postfix) # Format Columns stats.set_index('date', inplace=True) # Save to CSV File file_postfix = 'PriceFollow_Statistics' u.to_csv(stats, c.path_dict['strategy'], c.file_dict['strategy'] % file_postfix)
def loadCXG(): cxg = u.read_csv(c.fullpath_dict['cxg']) return cxg
def loadStockList(cutoff_date): stocks = u.read_csv(c.fullpath_dict['stock_list'] % cutoff_date) return stocks
def calc_hpe(stock_id, period, ratio): ''' 函数功能: -------- 逐周期计算历史市盈率。 假定:逐周期前复权数据,Finance Summary数据已经下载或计算完成,并存储成为CSV文件。 输入参数: -------- stock_id : string, 股票代码 e.g. 600036 period : string, 采样周期 e.g. 'W', 'M', 'Q' 输出参数: -------- DataFrame date 周期截止日期(为周期最后一天) e.g. 2005-03-31 high 周期最高价 close 周期收盘价 low 周期最低价 eps 周期末每股收益(可能有数据缺失) eps_filled 根据邻近周期推算出的周期末每股收益 eps_rolling 根据周期末每股收益(含推算),折算的年度预期每股收益 pe_high 根据周期最高价,计算出的市盈率 pe_close 根据周期收盘价,计算出的市盈率 pe_low 根据周期最低价,计算出的市盈率 ''' # Check Input Parameters if not isinstance(stock_id, str) or not isinstance(period, str): print('Incorrect type of one or more input parameters!') raise SystemExit # Check Period period_types = ['W', 'M', 'Q'] if not period in period_types: print('Un-supported period type - should be one of:', period_types) raise SystemExit # Check Ratio ratio_types = ['PE', 'EP'] if not ratio in ratio_types: print('Un-supported ratio type - should be one of:', ratio_types) raise SystemExit # Ensure Stock QFQ Data File is Available qfq_path = c.path_dict['qfq'] % period qfq_file = c.file_dict['qfq'] % (period, stock_id) qfq_fullpath = qfq_path + qfq_file if not u.hasFile(qfq_fullpath): print('Require stock QFQ file:', (qfq_fullpath)) raise SystemExit # Ensure Stock Finance Summary Data File is Available fs_fullpath = c.fullpath_dict['finsum'] % stock_id if not u.hasFile(fs_fullpath): print('Require stock finance summary file:', (fs_fullpath)) raise SystemExit # # Load QFQ Data # qfq = u.read_csv(qfq_fullpath) qfq.set_index('date', inplace=True) qfq.sort_index(ascending=True, inplace=True) if gs.is_debug: print(qfq.head(10)) # Check empty QFQ data qfq_number = len(qfq) if qfq_number == 0: print('Stock QFQ data length is 0!') raise SystemExit # Handle stop-trading period (by filling with previous period data) # Assume: qfq data has been sorted ascendingly by date. for i in range(qfq_number): if i > 0 and np.isnan(qfq.iloc[i]['close']): if gs.is_debug: print('close = ', qfq.iloc[i]['close']) if np.isnan(qfq.iloc[i - 1] ['close']): # Ignore leading stop-trading periods continue else: # Regular internal stop-trading periods for column in qfq.columns: qfq.iloc[i][column] = qfq.iloc[i - 1][column] # # Load Finance Summary Data # fs = u.read_csv(fs_fullpath) fs.set_index('date', inplace=True) fs.sort_index(ascending=True, inplace=True) if gs.is_debug: print(fs.head(10)) # Check empty Finance Summary data fs_number = len(fs) if fs_number == 0: print('Stock finance summary data length is 0!') raise SystemExit # # Generate Rolling EPS for Each Quarter # eps_index = [] date_start = u.dateFromStr(qfq.index[0]) # First element date_end = u.dateFromStr(qfq.index[-1]) # Last element year_start = date_start.year year_end = date_end.year for year in range(year_start, year_end + 1): for quarter in range(1, 5): date = u.quarterDateStr(year, quarter) eps_index.append(date) if gs.is_debug: print(eps_index) eps_columns = ['eps', 'eps_filled', 'eps_rolling'] eps_columns_number = len(eps_columns) eps_index_number = len(eps_index) # Init all elements to NaN data_init = np.random.randn(eps_index_number * eps_columns_number) for i in range(eps_index_number * eps_columns_number): data_init[i] = np.nan eps = pd.DataFrame(data_init.reshape(eps_index_number, eps_columns_number), index=eps_index, columns=eps_columns) # Inherite EPS from finance summary for i in range(eps_index_number): index = eps.index[i] if index in fs.index: # Has EPS data eps.iloc[i]['eps'] = fs.loc[index, 'eps'] else: # Missing EPS data eps.iloc[i]['eps'] = np.nan # Fill the Missing EPS Data for year in range(year_start, year_end + 1): index_q1 = u.quarterDateStr(year, 1) index_q2 = u.quarterDateStr(year, 2) index_q3 = u.quarterDateStr(year, 3) index_q4 = u.quarterDateStr(year, 4) eps_q1 = eps.loc[index_q1, 'eps'] eps_q2 = eps.loc[index_q2, 'eps'] eps_q3 = eps.loc[index_q3, 'eps'] eps_q4 = eps.loc[index_q4, 'eps'] if gs.is_debug: print('eps_q1 =', eps_q1, 'eps_q2 =', eps_q2, 'eps_q3 =', eps_q3, 'eps_q4 =', eps_q4) eps_q1_filled = eps_q1 eps_q2_filled = eps_q2 eps_q3_filled = eps_q3 eps_q4_filled = eps_q4 if (np.isnan(eps_q1)): if (not np.isnan(eps_q2)): eps_q1_filled = eps_q2 * 0.5 elif (not np.isnan(eps_q3)): eps_q1_filled = eps_q3 * 0.3333333333333333 elif (not np.isnan(eps_q4)): eps_q1_filled = eps_q4 * 0.25 if (np.isnan(eps_q2)): if (not np.isnan(eps_q1)): eps_q2_filled = eps_q1 * 2.0 elif (not np.isnan(eps_q3)): eps_q2_filled = eps_q3 * 0.6666666666666667 elif (not np.isnan(eps_q4)): eps_q2_filled = eps_q4 * 0.5 if (np.isnan(eps_q3)): if (not np.isnan(eps_q2)): eps_q3_filled = eps_q2 * 1.5 elif (not np.isnan(eps_q1)): eps_q3_filled = eps_q1 * 3.0 elif (not np.isnan(eps_q4)): eps_q3_filled = eps_q4 * 0.75 if (np.isnan(eps_q4)): if (not np.isnan(eps_q3)): eps_q4_filled = eps_q3 * 1.333333333333333 elif (not np.isnan(eps_q2)): eps_q4_filled = eps_q2 * 2.0 elif (not np.isnan(eps_q1)): eps_q4_filled = eps_q1 * 4.0 if gs.is_debug: print('eps_q1_filled =', eps_q1_filled, 'eps_q2_filled =', eps_q2_filled, 'eps_q3_filled =', eps_q3_filled, 'eps_q4_filled =', eps_q4_filled) eps.loc[index_q1, 'eps_filled'] = eps_q1_filled eps.loc[index_q2, 'eps_filled'] = eps_q2_filled eps.loc[index_q3, 'eps_filled'] = eps_q3_filled eps.loc[index_q4, 'eps_filled'] = eps_q4_filled # Calculate Rolling EPS rolling_ratio = [4.0, 2.0, 1.333333333333333, 1.0] for year in range(year_start, year_end + 1): for quarter in range(1, 5): index = u.quarterDateStr(year, quarter) eps_filled = eps.loc[index, 'eps_filled'] eps.loc[index, 'eps_rolling'] = eps_filled * rolling_ratio[quarter - 1] if gs.is_debug: print(eps.head(10)) # # Calculate HPE based on given period # # Drop un-used columns hpe = qfq.drop(['open', 'volume', 'amount'], axis=1) # Add columns to hpe if ratio == 'PE': for column in [ 'eps', 'eps_filled', 'eps_rolling', 'pe_high', 'pe_close', 'pe_low' ]: hpe[column] = np.nan else: for column in [ 'eps', 'eps_filled', 'eps_rolling', 'ep_high', 'ep_close', 'ep_low' ]: hpe[column] = np.nan # Calculate Historical P/E or E/P Ratio hpe_number = len(hpe) for i in range(hpe_number): index = hpe.index[i] # 'YYYY-mm-dd' index_date = u.dateFromStr(index) # datetime.date(YYYY-mm-dd) index_quarter = u.quarterDateStr( index_date.year, u.quarterOfDate(index_date)) # 'YYYY-mm-dd' for column in ['eps', 'eps_filled', 'eps_rolling']: hpe.loc[index, column] = eps.loc[index_quarter, column] if ratio == 'PE': # Calculate Historical P/E Ratio price = {'pe_close': 'close', 'pe_high': 'high', 'pe_low': 'low'} for i in range(hpe_number): index = hpe.index[i] # 'YYYY-mm-dd' eps_rolling = hpe.iloc[i]['eps_rolling'] for column in ['pe_close', 'pe_high', 'pe_low']: hpe.loc[index, column] = hpe.loc[index, price[column]] / eps_rolling else: # Calculate Historical E/P Ratio price = {'ep_close': 'close', 'ep_high': 'high', 'ep_low': 'low'} for i in range(hpe_number): index = hpe.index[i] # 'YYYY-mm-dd' eps_rolling = hpe.iloc[i]['eps_rolling'] for column in ['ep_close', 'ep_high', 'ep_low']: hpe.loc[index, column] = eps_rolling / hpe.loc[index, price[column]] # Format columns for column in hpe.columns: hpe[column] = hpe[column].map(lambda x: '%.2f' % x) hpe[column] = hpe[column].astype(float) return hpe
def calc_hpe_quarterly(stock_id, year_start, year_end): ''' 函数功能: -------- 逐季度计算历史市盈率。 假定:逐季度历史前复权数据,以及逐季度每股收益已经提前获取并存储为CSV文件。 输入参数: -------- stock_id : string, 股票代码 e.g. 600036 year_start : int, 起始年度 e.g. 2005 year_end : int, 终止年度 e.g. 2016 输出参数: -------- DataFrame date 季度截止日期 e.g. 2005-03-31 close 季度收盘价 high 季度最高价 low 季度最低价 eps 季度末每股收益(可能有数据缺失) eps_filled 根据临近季度推算出的,缺失的季度末每股收益 eps_rolling 根据季度末每股收益(含推算),折算的年度预期每股收益 pe_close 根据季度收盘价,计算出的市盈率 pe_high 根据季度最高价,计算出的市盈率 pe_low 根据季度最低价,计算出的市盈率 ''' # Check Input Parameters if not isinstance(stock_id, str) \ or not isinstance(year_start, int) or not isinstance(year_end, int): print('Incorrect type of one or more input parameters!') raise SystemExit if not (year_start <= year_end): print('Start year should be no later than end year!') raise SystemExit # Fetch Stock Data stock_data = u.read_csv(c.fullpath_dict['qfq_q'] % stock_id) stock_data_number = len(stock_data) if stock_data_number != (year_end - year_start + 1) * 4: print( 'The duration of tock data does not match the duration of analysis!' ) raise SystemExit stock_data.set_index('date', inplace=True) stock_data.sort_index(ascending=True, inplace=True) if gs.is_debug: print(stock_data.head(10)) # Handle stop-trading quarter (by filling with previous quarter data) # Assume: stock data has been sorted ascendingly by date. for i in range(stock_data_number): if i > 0 and np.isnan(stock_data.iloc[i]['close']): if gs.is_debug: print('close = ', stock_data.iloc[i]['close']) if np.isnan(stock_data.iloc[i - 1] ['close']): # Ignore leading stop-trading quarters continue else: # Regular internal stop-trading quarters for column in stock_data.columns: stock_data.iloc[i][column] = stock_data.iloc[i - 1][column] # Fetch Report Data report_data = u.read_csv(c.fullpath_dict['finsum'] % stock_id) report_data.set_index('date', inplace=True) report_data.sort_index(ascending=True, inplace=True) if gs.is_debug: print(report_data.head(10)) # Join Stock Data and Report Data (Assume Stock Data is the reference) hpe_columns = [ 'close', 'high', 'low', 'eps', 'eps_filled', 'eps_rolling', 'pe_close', 'pe_high', 'pe_low' ] hpe_columns_number = len(hpe_columns) hpe_index_number = stock_data_number # Init all elements to NaN data_init = np.random.randn(hpe_index_number * hpe_columns_number) for i in range(hpe_index_number * hpe_columns_number): data_init[i] = np.nan hpe = pd.DataFrame(data_init.reshape(hpe_index_number, hpe_columns_number), index=stock_data.index, columns=hpe_columns) # Inherite close/high/low from stock data, and eps from report data for i in range(hpe_index_number): for column in ['close', 'high', 'low']: hpe.iloc[i][column] = stock_data.iloc[i][column] index = hpe.index[i] if index in report_data.index: # Has EPS data hpe.iloc[i]['eps'] = report_data.loc[index, 'eps'] else: # Missing EPS data hpe.iloc[i]['eps'] = np.nan # Fill the Missing EPS Data for year in range(year_start, year_end + 1): index_q1 = u.quarterDateStr(year, 1) index_q2 = u.quarterDateStr(year, 2) index_q3 = u.quarterDateStr(year, 3) index_q4 = u.quarterDateStr(year, 4) eps_q1 = hpe.loc[index_q1, 'eps'] eps_q2 = hpe.loc[index_q2, 'eps'] eps_q3 = hpe.loc[index_q3, 'eps'] eps_q4 = hpe.loc[index_q4, 'eps'] if gs.is_debug: print('eps_q1 =', eps_q1, 'eps_q2 =', eps_q2, 'eps_q3 =', eps_q3, 'eps_q4 =', eps_q4) eps_q1_filled = eps_q1 eps_q2_filled = eps_q2 eps_q3_filled = eps_q3 eps_q4_filled = eps_q4 if (np.isnan(eps_q1)): if (not np.isnan(eps_q2)): eps_q1_filled = eps_q2 * 0.5 elif (not np.isnan(eps_q3)): eps_q1_filled = eps_q3 * 0.3333333333333333 elif (not np.isnan(eps_q4)): eps_q1_filled = eps_q4 * 0.25 if (np.isnan(eps_q2)): if (not np.isnan(eps_q1)): eps_q2_filled = eps_q1 * 2.0 elif (not np.isnan(eps_q3)): eps_q2_filled = eps_q3 * 0.6666666666666667 elif (not np.isnan(eps_q4)): eps_q2_filled = eps_q4 * 0.5 if (np.isnan(eps_q3)): if (not np.isnan(eps_q2)): eps_q3_filled = eps_q2 * 1.5 elif (not np.isnan(eps_q1)): eps_q3_filled = eps_q1 * 3.0 elif (not np.isnan(eps_q4)): eps_q3_filled = eps_q4 * 0.75 if (np.isnan(eps_q4)): if (not np.isnan(eps_q3)): eps_q4_filled = eps_q3 * 1.333333333333333 elif (not np.isnan(eps_q2)): eps_q4_filled = eps_q2 * 2.0 elif (not np.isnan(eps_q1)): eps_q4_filled = eps_q1 * 4.0 if gs.is_debug: print('eps_q1_filled =', eps_q1_filled, 'eps_q2_filled =', eps_q2_filled, 'eps_q3_filled =', eps_q3_filled, 'eps_q4_filled =', eps_q4_filled) hpe.loc[index_q1, 'eps_filled'] = eps_q1_filled hpe.loc[index_q2, 'eps_filled'] = eps_q2_filled hpe.loc[index_q3, 'eps_filled'] = eps_q3_filled hpe.loc[index_q4, 'eps_filled'] = eps_q4_filled # Calculate Rolling EPS rolling_ratio = [4.0, 2.0, 1.333333333333333, 1.0] for year in range(year_start, year_end + 1): for quarter in range(1, 5): index = u.quarterDateStr(year, quarter) eps_filled = hpe.loc[index, 'eps_filled'] hpe.loc[index, 'eps_rolling'] = eps_filled * rolling_ratio[quarter - 1] # Calculate Historical P/E Ratio price = {'pe_close': 'close', 'pe_high': 'high', 'pe_low': 'low'} for i in range(hpe_index_number): index = hpe.index[i] # 'YYYY-mm-dd' eps_rolling = hpe.iloc[i]['eps_rolling'] for column in ['pe_close', 'pe_high', 'pe_low']: hpe.loc[index, column] = hpe.loc[index, price[column]] / eps_rolling # Format columns for column in hpe_columns: hpe[column] = hpe[column].map(lambda x: '%.2f' % x) hpe[column] = hpe[column].astype(float) return hpe
def loadIndustrySina(): industry = u.read_csv(c.fullpath_dict['indu_sina']) return industry
# strategyCoefficient(benchmark_id, date_start, date_end, period, ratio_method, loadAllIndex(), True, 'AllIndex') # Analyze Strategy Results analyze_strategy = False target_name = 'AllIndex' #target_name = 'AllStock' common_postfix = '_'.join(['Coefficient', date_start, date_end, period, ratio_method, target_name, 'vs', benchmark_id]) if analyze_strategy: analyzeCoefficient(common_postfix, completeness_threshold, top_number) # Plot Strategy Results plot_strategy = False if plot_strategy: path = c.path_dict['strategy'] file = c.file_dict['strategy'] % '_'.join(['Common', 'AllPrice', benchmark_id, period, 'AllStock']) price_allstock = u.read_csv(path+file) file = c.file_dict['strategy'] % '_'.join(['Common', 'AllPrice', benchmark_id, period, 'AllIndex']) price_allindex = u.read_csv(path+file) # Generate Statistics List statistics = [] for coefficient in ['Correlation', 'Beta', 'Alpha']: for classification in ['Positive', 'Zero', 'Negative']: statistics.append(classification+coefficient) print(statistics) # Plot Statistics List for stats in statistics: # Plot statistics file = c.file_dict['strategy'] % '_'.join([common_postfix, completeness_threshold, stats]) data = u.read_csv(path+file)
def loadSuspended(): suspended = u.read_csv(c.fullpath_dict['suspended']) return suspended
def optimizePriceFollow(stock_list, is_index, threshold_list, method): stock_number = len(stock_list) if stock_number < 1: print('Stock Number:', stock_number) raise SystemExit threshold_number = len(threshold_list) if threshold_number < 1: print('Threshold Number:', threshold_number) raise SystemExit for stock_id in stock_list: # Load Results from Different Threshold file_postfix = 'PriceFollow_%s_All' % u.stockFileName( stock_id, is_index) df = u.read_csv(c.path_dict['strategy'] + c.file_dict['strategy'] % file_postfix) row_number = len(df) for i in range(1, threshold_number - 1): t_prev = threshold_list[i - 1] t_curr = threshold_list[i] t_next = threshold_list[i + 1] t_total = t_prev + t_curr + t_next column_postfix = '_%s' % t_curr # Slice Interested Columns select_columns = [] for t in [t_prev, t_curr, t_next]: select_columns.append('predict' + '_%s' % t) select_columns.append('confirm' + column_postfix) df2 = pd.DataFrame.copy(df.loc[:, select_columns]) # Calculate Weighted Predict df2['weighted_predict' + column_postfix] = np.nan for j in range(1, row_number): # Method 1: Threshold Weighted Predict if method == 'Threshold Weighted': predict = 0.0 for t in [t_prev, t_curr, t_next]: predict = predict + t * df2.ix[j, 'predict' + '_%s' % t] predict = predict / t_total df2.ix[j, 'weighted_predict' + column_postfix] = predict # Method 2: Equally Weighted Predict elif method == 'Equally Weighted': predict = 0.0 for t in [t_prev, t_curr, t_next]: predict = predict + 1.0 * df2.ix[j, 'predict' + '_%s' % t] predict = predict / 3.0 df2.ix[j, 'weighted_predict' + column_postfix] = predict # Method 3: Single Predict else: predict = df2.ix[j, 'predict' + column_postfix] df2.ix[j, 'weighted_predict' + column_postfix] = predict # Optimize for Given Segments within Range [1, -1] to Find Best Cutoff print('Optimization Starts for Threshold:', t_curr) segments = 10 delta_mean = [] delta_stddev = [] for j in range(1, row_number): weighted_predict = df2.ix[j, 'weighted_predict' + column_postfix] confirm = df2.ix[j, 'confirm' + column_postfix] for k in range(1, segments): ratio = float(k) / float(segments) cutoff = 1.0 * (1.0 - ratio) + (-1.0) * ratio predict_cutoff = 1 if weighted_predict > cutoff else -1 delta_cutoff = predict_cutoff - confirm if not np.isnan( confirm) else np.nan df2.ix[j, 'delta' + '_%.2f' % cutoff] = delta_cutoff # Save to CSV File file_postfix = 'PriceFollow_%s_Cutoff_%s' % (u.stockFileName( stock_id, is_index), t_curr) u.to_csv(df2, c.path_dict['strategy'], c.file_dict['strategy'] % file_postfix) # Gather Mean of Delta_Cutoffs for k in range(1, segments): ratio = float(k) / float(segments) cutoff = 1.0 * (1.0 - ratio) + (-1.0) * ratio describe = df2['delta' + '_%.2f' % cutoff].describe() delta_mean.append(np.abs(describe['mean'])) delta_stddev.append(describe['std']) # Find Best Cutoff delta_mean_min = min(delta_mean) delta_mean_index = delta_mean.index(delta_mean_min) print('Delta Mean:', delta_mean) print('Delta Stddev:', delta_stddev) print('Delta Mean Min:', delta_mean_min) print('Delta Mean Index:', delta_mean_index) ratio = float(delta_mean_index + 1) / float(segments) best_cutoff = 1.0 * (1.0 - ratio) + (-1.0) * ratio print('Best Cutoff:', best_cutoff) print('Optimization Ends for Threshold:', t_curr)
def loadConceptSina(): concept = u.read_csv(c.fullpath_dict['conc_sina']) return concept
def loadQFQ(stock_id, period): path = c.path_dict['qfq'] % period file = c.file_dict['qfq'] % (period, stock_id) df = u.read_csv(path + file) return df
def loadTerminated(): terminated = u.read_csv(c.fullpath_dict['terminated']) return terminated