def get_sd(id, label_start_date, fea_delta = 240): # delta 指的是 确定label;的时候 是一个星期的涨幅还是什么 # fea delta 指的是, 我要准备多久的数据 ## 获取分笔数据 # get all time ticks # 创建一个日期列表 ticks = None date_list = dt_tool.dt_range(label_start_date, -fea_delta) for date in date_list: try: tick = ts.get_tick_data(id, date) except Exception, e: print e continue if tick is None: continue tick.type = tick.type.apply(lambda x : type2id(x)) ft = tick.sort('amount', ascending=False).head(10).reset_index().drop(['index', 'time', 'change'], 1).stack().reset_index() ft.index = ft.level_0.map(str) + '_' + ft.level_1 fT = ft.drop(['level_0', 'level_1'], 1).T fT['date'] = dt_tool.format(date) if ticks is None: ticks = fT else: ticks = ticks.append(fT) ticks.to_csv('data/ticks.csv', index=None)
def get_tick_feature(id, end_str, delta): if isinstance(end_str, str): end = dt.datetime.strptime(end_str, '%Y-%m-%d') date_list = [end - dt.timedelta(days=x) for x in range(0, 90)] ticks = None for date in date_list: date_str = dt.datetime.strftime(date, '%Y-%m-%d') try: tick = ts.get_tick_data(id, date_str) except: continue if tick is None: continue ft = tick.sort('amount', ascending=False).head(10).reset_index().drop(['index', 'time'], 1).stack().reset_index() ft['fea'] = ft.level_0.map(str) + '_' + ft.level_1 fea_pd = ft.drop(['level_0', 'level_1'], 1) fea_pd.index = fea_pd.fea fT = fea_pd.T fT['date'] = dt_tool.format(date_str) if ticks is None: ticks = fT else: ticks = ticks.append(fT) return ticks
def get_sd(id): try: hist = ts.get_h_data(id, autype='qfq', start='2005-06-10') except: return None ls = hist.index.format() hist.index = [ dt_tool.format(x) for x in ls] fea_tick = get_tick_feature(id, '2015-06-10', 1500) print 'fea', fea_tick.columns print 'hist', hist.columns hist.to_csv('hist.csv') fea_tick.to_csv('tick.csv') h = hist.merge(fea_tick, how='left', left_index=True, right_on='date') h.to_csv('merged.csv') s = h.stack() spd = pd.DataFrame(s) sd = spd.reset_index() sd.columns = ('date', 'type', 'value') return sd
fT['date'] = dt_tool.format(date) if ticks is None: ticks = fT else: ticks = ticks.append(fT) ticks.to_csv('data/ticks.csv', index=None) # 获取历史数据矩阵 # 将数据stack为series, 方便处理 try: #hist = ts.get_h_data(id, autype='qfq', start='2005-06-10') hist = ts.get_hist_data(id) except Exception, e: print e ls = hist.index.format() hist['date'] = [dt_tool.format(x) for x in ls] hist.to_csv('data/hist.csv', index=None) if hist is None or ticks is None: return None # 聚合两份数据 h = hist.merge(ticks, how='left', on='date') h.to_csv('data/merged.csv', index=None) h.index = h.date h = h.drop('date', 1) s = h.stack() spd = pd.DataFrame(s) sd = spd.reset_index() sd.columns = ('date', 'type', 'value') sd.to_csv('data/sd.csv', index=None)