def update(self, end_date = None, num = 10): if end_date is None: end_date = datetime.now().strftime('%Y-%m-%d') if end_date == datetime.now().strftime('%Y-%m-%d'): end_date = get_day_nday_ago(end_date, num = 1, dformat = "%Y-%m-%d") start_date = get_day_nday_ago(end_date, num = num, dformat = "%Y-%m-%d") succeed = True for mdate in get_dates_array(start_date, end_date): if mdate in self.balcklist: continue if CCalendar.is_trading_day(mdate, redis = self.redis): if not self.set_k_data(mdate): succeed = False self.logger.info("market %s for %s set failed" % (self.market, mdate)) return succeed
def download(output_directory, cdate, num=50): cdate = time.strftime("%Y%m%d", time.strptime(cdate, "%Y-%m-%d")) _date = get_day_nday_ago(cdate, num=num) start_date_dmy_format = time.strftime("%m/%d/%Y", time.strptime(_date, "%Y%m%d")) data_times = pd.date_range(start_date_dmy_format, periods=num + 1, freq='D') date_only_array = np.vectorize(lambda s: s.strftime('%Y%m%d'))( data_times.to_pydatetime()) date_only_array = date_only_array[::-1] for _date in date_only_array: filename = "%s.zip" % _date url = "http://www.tdx.com.cn/products/data/data/2ktic/%s" % filename filepath = "%s/%s" % (output_directory, filename) try: if os.path.exists(filepath): logger.debug("%s existed" % filepath) continue if not exists(url): logger.debug("%s not exists" % filename) continue wget.download(url, out=output_directory) except Exception as e: logger.error(e) return False return True
def choose_plate(edate = '2016-10-11', ndays = 90): rindustry_info_client = RIndexIndustryInfo(redis_host='127.0.0.1') today_industry_df = rindustry_info_client.get_k_data(edate) pchange_df = today_industry_df.sort_values(by = 'pchange', ascending = False).head(3) mchange_df = today_industry_df.sort_values(by = 'mchange', ascending = False).head(3) plate_code_list = list(set(pchange_df.code.tolist()).intersection(pchange_df.code.tolist())) if len(plate_code_list) == 0: logger.info("no interested plate for date:%s" % edate) return list() sdate = get_day_nday_ago(edate, ndays, '%Y-%m-%d') #get sh index data sh_index_obj = CIndex('000001', redis_host='127.0.0.1') sh_index_info = sh_index_obj.get_k_data_in_range(sdate, edate) sh_index_pchange = 100 * (sh_index_info.loc[len(sh_index_info) - 1, 'close'] - sh_index_info.loc[0, 'preclose']) / sh_index_info.loc[0, 'preclose'] #get industry data all_industry_df = rindustry_info_client.get_k_data_in_range(sdate, edate) all_industry_df = all_industry_df.loc[all_industry_df.code.isin(plate_code_list)] industry_static_info = DataFrame(columns={'code', 'sai', 'pchange', ct.KL, ct.QL, ct.JL, ct.FL}) #choose better industry redisobj = create_redis_obj("127.0.0.1") today_industry_info = IndustryInfo.get(redisobj) for code, industry in all_industry_df.groupby('code'): industry = industry.reset_index(drop = True) industry['sri'] = 0 industry['sri'] = industry['pchange'] - sh_index_info['pchange'] industry['sai'] = 0 industry.at[(industry.pchange > 0) & (sh_index_info.pchange < 0), 'sai'] = industry.loc[(industry.pchange > 0) & (sh_index_info.pchange < 0), 'sri'] industry_sai = len(industry.loc[industry.sai > 0]) industry_pchange = 100 * (industry.loc[len(industry) - 1, 'close'] - industry.loc[0, 'preclose']) / industry.loc[0, 'preclose'] code_list = json.loads(today_industry_info.loc[today_industry_info.code == code, 'content'].values[0]) info_dict, good_code_list = choose_stock(code_list, sdate, edate) industry_static_info = industry_static_info.append(DataFrame([[code, industry_sai, industry_pchange, info_dict[ct.KL], info_dict[ct.QL], info_dict[ct.JL], info_dict[ct.FL]]], columns = ['code', 'sai', 'pchange', ct.KL, ct.QL, ct.JL, ct.FL]), sort = 'True') industry_static_info = industry_static_info.reset_index(drop = True) industry_static_info = industry_static_info.sort_values(by=['pchange'], ascending=False) return good_code_list
def init_tdx_index_info(self, cdate=None, num=10): def _set_index_info(cdate, code_id): try: if code_id in self.index_objs: _obj = self.index_objs[code_id] else: _obj = CIndex(code_id) if code_id in list( ct.TDX_INDEX_DICT.keys()) else TdxFgIndex(code_id) return (code_id, _obj.set_k_data(cdate)) except Exception as e: self.logger.error(e) return (code_id, False) #index_code_list = self.get_concerned_index_codes() index_code_list = list(ct.TDX_INDEX_DICT.keys()) if cdate is None: cfunc = partial(_set_index_info, cdate) return concurrent_run(cfunc, index_code_list, num=5) else: succeed = True start_date = get_day_nday_ago(cdate, num=num, dformat="%Y-%m-%d") for mdate in get_dates_array(start_date, cdate, asending=True): if self.cal_client.is_trading_day(mdate): cfunc = partial(_set_index_info, mdate) if not concurrent_run(cfunc, index_code_list, num=5): succeed = False return succeed
def gen_animation(self, end_date, days): import matplotlib.animation as animation matplotlib.use('Agg') start_date = get_day_nday_ago(end_date, num=days, dformat="%Y-%m-%d") df = self.ris.get_k_data_in_range(start_date, end_date) fig, ax = plt.subplots() #get min profit day min_pday = df.pday.values.min() max_pday = df.pday.values.max() #get max profit day min_profit = df.profit.values.min() max_profit = df.profit.values.max() #set axis for map xmax = max(abs(min_pday), max_pday) ymax = max(abs(min_profit), max_profit) groups = df.groupby(df.date) dates = list(set(df.date.tolist())) dates.sort() Writer = animation.writers['ffmpeg'] writer = Writer(fps=2, metadata=dict(artist='biek'), bitrate=-1) def init(): ax.clear() ax.set_xlim(-xmax, xmax) ax.set_ylim(-ymax, ymax) ax.spines['top'].set_color('none') ax.spines['right'].set_color('none') ax.xaxis.set_ticks_position('bottom') ax.spines['bottom'].set_position(('data', 0)) ax.yaxis.set_ticks_position('left') ax.spines['left'].set_position(('data', 0)) def animate(i): cdate = dates[i] df = groups.get_group(cdate) init() print(cdate, len(df)) bull_stock_num = len(df[df.profit >= 0]) for code in df.code.tolist(): pday = df.loc[df.code == code, 'pday'] profit = df.loc[df.code == code, 'profit'] ax.scatter(pday, profit, color='black', s=1) ax.set_title("日期:%s 股票总数:%s 牛熊股比:%s" % (cdate, len(df), 100 * bull_stock_num / len(df)), fontproperties=get_chinese_font()) ani = animation.FuncAnimation(fig, animate, frames=len(dates), init_func=init, interval=1000, repeat=False) sfile = '/code/panimation.mp4' ani.save(sfile, writer) ax.set_title('Marauder Map for date') ax.grid(True) plt.close(fig)
def update(self, end_date=None, num=10): if end_date is None: end_date = datetime.now().strftime('%Y-%m-%d') start_date = get_day_nday_ago(end_date, num=num, dformat="%Y-%m-%d") succeed = True for mdate in get_dates_array(start_date, end_date): if CCalendar.is_trading_day(mdate, redis=self.redis): if mdate == end_date or mdate in self.balcklist: continue if not self.set_data(mdate): succeed = False return succeed
def update(self, end_date = None, num = 3): if end_date is None: end_date = datetime.now().strftime('%Y-%m-%d') start_date = get_day_nday_ago(end_date, num = num, dformat = "%Y-%m-%d") succeed = True for mdate in get_dates_array(start_date, end_date): if CCalendar.is_trading_day(mdate, redis = self.redis): if not self.set_score(mdate): succeed = False self.logger.info("set score for %s set failed" % mdate) return succeed
def update(self, end_date = datetime.now().strftime('%Y-%m-%d'), num = 19): #if end_date == datetime.now().strftime('%Y-%m-%d'): end_date = get_day_nday_ago(end_date, num = 1, dformat = "%Y-%m-%d") start_date = get_day_nday_ago(end_date, num = num, dformat = "%Y-%m-%d") date_array = get_dates_array(start_date, end_date) succeed = True for mdate in date_array: if CCalendar.is_trading_day(mdate, redis = self.redis): if not self.set_day_data(mdate): self.logger.error("set %s data for rstock failed" % mdate) succeed = False return succeed
def update(self, end_date=None): if end_date is None: end_date = datetime.now().strftime('%Y-%m-%d') start_date = get_day_nday_ago(end_date, num=205, dformat="%Y-%m-%d") date_array = get_dates_array(start_date, end_date) succeed = True for mdate in date_array: if CCalendar.is_trading_day(mdate, redis=self.redis): #if mdate == end_date: continue if not self.crawl_data(mdate): self.logger.error("%s set failed" % mdate) succeed = False return succeed
def update(self, cdate=datetime.now().strftime('%Y-%m-%d')): start_date = get_day_nday_ago(cdate, 200, dformat="%Y-%m-%d") end_date = cdate try: #market info sh_df = self.get_market_data(ct.SH_MARKET_SYMBOL, start_date, end_date) sz_df = self.get_market_data(ct.SZ_MARKET_SYMBOL, start_date, end_date) date_list = list( set(sh_df.date.tolist()).intersection(set( sz_df.date.tolist()))) sh_df = sh_df[sh_df.date.isin(date_list)] sh_df = sh_df.reset_index(drop=True) sz_df = sz_df[sz_df.date.isin(date_list)] sz_df = sz_df.reset_index(drop=True) #rzrq info sh_rzrq_df = self.get_rzrq_info(ct.SH_MARKET_SYMBOL, start_date, end_date) sz_rzrq_df = self.get_rzrq_info(ct.SZ_MARKET_SYMBOL, start_date, end_date) date_list = list( set(sh_rzrq_df.date.tolist()).intersection( set(sz_rzrq_df.date.tolist()))) sh_rzrq_df = sh_rzrq_df[sh_rzrq_df.date.isin(date_list)] sh_rzrq_df = sh_rzrq_df.reset_index(drop=True) sz_rzrq_df = sz_rzrq_df[sz_rzrq_df.date.isin(date_list)] sz_rzrq_df = sz_rzrq_df.reset_index(drop=True) #average price info av_df = self.get_index_df('880003', start_date, end_date) #limit up and down info limit_info = CLimit(self.dbinfo).get_data(cdate) stock_info = self.rstock_client.get_data(cdate) stock_info = stock_info[stock_info.volume > 0] #get volume > 0 stock list stock_info = stock_info.reset_index(drop=True) #index info index_info = self.get_index_data(end_date) #industry analysis industry_info = self.get_industry_data(cdate) #all stock info all_stock_info = self.rstock_client.get_k_data_in_range( start_date, end_date) #gen review file and make dir for new data self.doc.generate(cdate, sh_df, sz_df, sh_rzrq_df, sz_rzrq_df, av_df, limit_info, stock_info, industry_info, index_info, all_stock_info) ##gen review animation #self.gen_animation() return True except Exception as e: self.logger.error(e) return False
def update(self, end_date = None, num = 30): if end_date is None: end_date = datetime.now().strftime('%Y-%m-%d') #start_date = "1997-12-30" start_date = get_day_nday_ago(end_date, num = num, dformat = "%Y-%m-%d") succeed = True code_list = self.get_components(end_date) if 0 == len(code_list): self.logger.error("%s code_list for %s is empty" % (end_date, self.index_code)) return False for mdate in get_dates_array(start_date, end_date): if CCalendar.is_trading_day(mdate, redis = self.redis): if not self.set_ratio(code_list, mdate): self.logger.error("set %s score for %s set failed" % (self.index_code, mdate)) succeed = False return succeed
def init_industry_info(self, cdate, num): def _set_industry_info(cdate, code_id): return (code_id, CIndex(code_id).set_k_data(cdate)) df = self.industry_info_client.get() if cdate is None: cfunc = partial(_set_industry_info, cdate) return concurrent_run(cfunc, df.code.tolist(), num=5) else: succeed = True start_date = get_day_nday_ago(cdate, num=num, dformat="%Y-%m-%d") for mdate in get_dates_array(start_date, cdate, asending=True): if self.cal_client.is_trading_day(mdate): cfunc = partial(_set_industry_info, mdate) if not concurrent_run(cfunc, df.code.tolist(), num=5): succeed = False return succeed
def init_stock_info(self, cdate=None): def _set_stock_info(_date, bonus_info, index_info, code_id): try: if CStock(code_id).set_k_data(bonus_info, index_info, _date): self.logger.info("%s set k data success" % code_id) return (code_id, True) else: self.logger.error("%s set k data failed" % code_id) return (code_id, False) except Exception as e: self.logger.error("%s set k data exception:%s" % (code_id, e)) return (code_id, False) #get stock bonus info bonus_info = pd.read_csv("/data/tdx/base/bonus.csv", sep=',', dtype={ 'code': str, 'market': int, 'type': int, 'money': float, 'price': float, 'count': float, 'rate': float, 'date': int }) index_info = CIndex('000001').get_k_data() if index_info is None or index_info.empty: return False df = self.stock_info_client.get() failed_list = df.code.tolist() if cdate is None: cfunc = partial(_set_stock_info, cdate, bonus_info, index_info) return process_concurrent_run(cfunc, failed_list, num=5) else: succeed = True start_date = get_day_nday_ago(cdate, num=10, dformat="%Y-%m-%d") for mdate in get_dates_array(start_date, cdate, asending=True): if self.cal_client.is_trading_day(mdate): cfunc = partial(_set_stock_info, mdate, bonus_info, index_info) if not process_concurrent_run(cfunc, failed_list, num=500): succeed = False return succeed
df = df.reset_index(drop = True) df = df.sort_values(by = 'amount', ascending= False) df['money_change'] = (df['amount'] - df['preamount'])/1e8 industry_info = IndustryInfo.get() df = pd.merge(df, industry_info, how='left', on=['code']) return df def get_index_df(code, start_date, end_date): df = CIndex(code).get_k_data_in_range(start_date, end_date) df['time'] = df.index.tolist() df = df[['time', 'open', 'high', 'low', 'close', 'volume', 'amount', 'date']] return df if __name__ == '__main__': cdate = '2019-01-07' start_date = get_day_nday_ago(cdate, 100, dformat = "%Y-%m-%d") end_date = cdate #market info sh_df = get_market_data(ct.SH_MARKET_SYMBOL, start_date, end_date) sz_df = get_market_data(ct.SZ_MARKET_SYMBOL, start_date, end_date) date_list = list(set(sh_df.date.tolist()).intersection(set(sz_df.date.tolist()))) sh_df = sh_df[sh_df.date.isin(date_list)] sz_df = sz_df[sz_df.date.isin(date_list)] #rzrq info sh_rzrq_df = get_rzrq_info(ct.SH_MARKET_SYMBOL, start_date, end_date) sz_rzrq_df = get_rzrq_info(ct.SZ_MARKET_SYMBOL, start_date, end_date) date_list = list(set(sh_rzrq_df.date.tolist()).intersection(set(sz_rzrq_df.date.tolist()))) sh_rzrq_df = sh_rzrq_df[sh_rzrq_df.date.isin(date_list)] sz_rzrq_df = sz_rzrq_df[sz_rzrq_df.date.isin(date_list)] #average price info av_df = get_index_df('880003', start_date, end_date)