def get_days_data(days=1, df=None): rzrq_status = 1 # data='' da = 0 i = 0 data2 = '' while rzrq_status: for x in range(days, 20): yestoday = cct.last_tddate(x) if yestoday in df.index: data2 = df.loc[yestoday] # log.info("yestoday:%s data:%s" % (yestoday, data2)) break # print da else: log.error("%s:None" % (yestoday)) rzrq_status = 0 return data2
def get_days_data(days=1): rzrq_status = 1 # data='' da = 0 i = 0 while rzrq_status: for x in range(days, 20): yestoday = cct.last_tddate(x).replace('-', '/') data2 = get_tzrq(url, yestoday) log.info("yestoday:%s data:%s" % (yestoday, data2)) if len(data2) > 0: i += 1 # if da ==days and days==0: # i +=1 # if i >= days-1: break # elif da > days: # break # else: da+=1 # print da else: log.info("%s:%s" % (yestoday, data2)) rzrq_status = 0 return data2
# cut_num=10000 success = 0 top_all = pd.DataFrame() time_s = time.time() # delay_time = 3600 delay_time = cct.get_delay_time() First = True # base_path = tdd.get_tdx_dir() # block_path = tdd.get_tdx_dir_blocknew() + '067.blk' # blkname = '067.blk' blkname = '063.blk' block_path = tdd.get_tdx_dir_blocknew() + blkname lastpTDX_DF = pd.DataFrame() duration_date = ct.duration_date_l resample = ct.resample_dtype end_date = cct.last_tddate(days=3) # all_diffpath = tdd.get_tdx_dir_blocknew() + '062.blk' # market_sort_value, market_sort_value_key = ct.get_market_sort_value_key(ct.sort_value_key_perd) # st_key_sort = '2' # st_key_sort = '7' st_key_sort = '4' st = None # st_key_sort = ct.sort_value_key_perd while 1: try: # top_now = tdd.getSinaAlldf(market='sh', vol=ct.json_countVol, vtype=ct.json_countType) time_Rt = time.time() if st is None and st_key_sort in ['2', '3']: st_key_sort = '%s %s' % (st_key_sort.split()[0], cct.get_index_fibl())
def get_linear_model_histogramDouble(code, ptype='low', dtype='d', start=None, end=None, vtype='f', filter='n', df=None, dl=None): # 399001','cyb':'zs399006','zxb':'zs399005 # code = '999999' # code = '601608' # code = '000002' # asset = get_kdate_data(code)['close'].sort_index(ascending=True) # df = tdd.get_tdx_Exp_day_to_df(code, 'f').sort_index(ascending=True) # ptype='close' # if ptype == 'close' or ptype=='' # ptype= if start is not None and filter == 'y': if code not in ['999999', '399006', '399001']: index_d, dl = tdd.get_duration_Index_date(dt=start) log.debug("index_d:%s dl:%s" % (str(index_d), dl)) else: index_d = cct.day8_to_day10(start) log.debug("index_d:%s" % (index_d)) start = tdd.get_duration_price_date(code, ptype='low', dt=index_d) log.debug("start:%s" % (start)) if start is None and df is None and dl is not None: start = cct.last_tddate(dl) # print start df = tdd.get_tdx_append_now_df_api(code, start=start, end=end).sort_index(ascending=True) if df is None: # df = tdd.get_tdx_append_now_df(code, ptype, start, end).sort_index(ascending=True) df = tdd.get_tdx_append_now_df_api(code, start, end).sort_index(ascending=True) if not dtype == 'd': df = tdd.get_tdx_stock_period_to_type(df, dtype).sort_index(ascending=True) if len(df) == 0: raise Exception("Code:%s error, df is None" % (code)) asset = df[ptype].round(2) log.info("df:%s" % asset[:1]) asset = asset.dropna() dates = asset.index if not code.startswith('999') and not code.startswith('399'): # print "code:",code if code[:1] in ['5', '6', '9']: code2 = '999999' elif code[:2] in ['30']: # print "cyb" code2 = '399006' else: code2 = '399001' df1 = tdd.get_tdx_append_now_df_api(code2, start, end).sort_index(ascending=True) # df1 = tdd.get_tdx_append_now_df(code2, ptype, start, end).sort_index(ascending=True) if not dtype == 'd': df1 = tdd.get_tdx_stock_period_to_type( df1, dtype).sort_index(ascending=True) # if len(asset) < len(df1): # asset1 = df1.loc[asset.index, ptype] # else: # asset1 = df1.loc[asset.index, ptype] # startv = asset1[:1] # asset1 = asset1.apply(lambda x: round(x / asset1[:1], 2)) # print asset[:1].index[0] , df1[:1].index[0] if asset[:1].index[0] > df1[:1].index[0]: asset1 = df1.loc[asset.index, ptype] startv = asset1[:1] asset1 = asset1.apply(lambda x: round(x / asset1[:1], 2)) else: df = df[df.index >= df1.index[0]] asset = df[ptype] asset = asset.dropna() dates = asset.index asset1 = df1.loc[df.index, ptype] asset1 = asset1.apply(lambda x: round(x / asset1[:1], 2)) else: if code.startswith('399001'): code2 = '399006' elif code.startswith('399006'): code2 = '399005' else: code2 = '399006' if code2.startswith('3990'): df1 = tdd.get_tdx_append_now_df_api(code2, start, end).sort_index(ascending=True) if len(df1) < int(len(df) / 4): code2 = '399001' df1 = tdd.get_tdx_append_now_df_api( code2, start, end).sort_index(ascending=True) # df1 = tdd.get_tdx_append_now_df(code2, ptype, start, end).sort_index(ascending=True) if not dtype == 'd': df1 = tdd.get_tdx_stock_period_to_type( df1, dtype).sort_index(ascending=True) if len(asset) < len(df1): asset1 = df1.loc[asset.index, ptype] asset1 = asset1.apply(lambda x: round(x / asset1[:1], 2)) else: df = df[df.index >= df1.index[0]] asset = df[ptype] asset = asset.dropna() dates = asset.index asset1 = df1.loc[df.index, ptype] asset1 = asset1.apply(lambda x: round(x / asset1[:1], 2)) # print len(df),len(asset),len(df1),len(asset1) if end is not None: # print asset[-1:] asset = asset[:-1] dates = asset.index asset1 = asset1[:-1] asset1 = asset1.apply(lambda x: round(x / asset1[:1], 2)) # 画出价格随时间变化的图像 # _, ax = plt.subplots() # fig = plt.figure() # plt.ion() fig = plt.figure(figsize=(16, 10)) # fig = plt.figure(figsize=(16, 10), dpi=72) # fig.autofmt_xdate() #(no fact) # plt.subplots_adjust(bottom=0.1, right=0.8, top=0.9) plt.subplots_adjust(left=0.05, bottom=0.08, right=0.95, top=0.95, wspace=0.15, hspace=0.25) # set (gca,'Position',[0,0,512,512]) # fig.set_size_inches(18.5, 10.5) # fig=plt.fig(figsize=(14,8)) ax1 = fig.add_subplot(321) # asset=asset.apply(lambda x:round( x/asset[:1],2)) ax1.plot(asset) # ax1.plot(asset1,'-r', linewidth=2) ticks = ax1.get_xticks() # start, end = ax1.get_xlim() # print start, end, len(asset) # print ticks, ticks[:-1] # (ticks[:-1] if len(asset) > end else np.append(ticks[:-1], len(asset) - 1)) ax1.set_xticklabels( [dates[int(i)] for i in (np.append(ticks[:-1], len(asset) - 1))], rotation=15) # Label x-axis with dates # 拟合 X = np.arange(len(asset)) x = sm.add_constant(X) model = regression.linear_model.OLS(asset, x).fit() a = model.params[0] b = model.params[1] # log.info("a:%s b:%s" % (a, b)) log.info("X:%s a:%s b:%s" % (len(asset), a, b)) Y_hat = X * b + a # 真实值-拟合值,差值最大最小作为价值波动区间 # 向下平移 i = (asset.values.T - Y_hat).argmin() c_low = X[i] * b + a - asset.values[i] Y_hatlow = X * b + a - c_low # 向上平移 i = (asset.values.T - Y_hat).argmax() c_high = X[i] * b + a - asset.values[i] Y_hathigh = X * b + a - c_high plt.plot(X, Y_hat, 'k', alpha=0.9) plt.plot(X, Y_hatlow, 'r', alpha=0.9) plt.plot(X, Y_hathigh, 'r', alpha=0.9) # plt.xlabel('Date', fontsize=12) plt.ylabel('Price', fontsize=12) plt.title(code + " | " + str(dates[-1])[:11], fontsize=14) plt.legend([asset.iat[-1]], fontsize=12, loc=4) plt.grid(True) # #plot volume # pad = 0.25 # yl = ax1.get_ylim() # ax1.set_ylim(yl[0]-(yl[1]-yl[0])*pad,yl[1]) # axx = ax1.twinx() # axx.set_position(transforms.Bbox([[0.125,0.1],[0.9,0.32]])) # volume = np.asarray(df.vol) # pos = df['open']-df['close']<0 # neg = df['open']-df['close']>=0 # idx = np.asarray([x for x in range(len(df))]) # axx.bar(idx[pos],volume[pos],color='red',width=1,align='center') # axx.bar(idx[neg],volume[neg],color='green',width=1,align='center') # plt.legend([code]); # plt.legend([code, 'Value center line', 'Value interval line']); # fig=plt.fig() # fig.figsize = [14,8] scale = 1.1 zp = zoompan.ZoomPan() figZoom = zp.zoom_factory(ax1, base_scale=scale) figPan = zp.pan_factory(ax1) # 将Y-Y_hat股价偏离中枢线的距离单画出一张图显示,对其边界线之间的区域进行均分,大于0的区间为高估,小于0的区间为低估,0为价值中枢线。 ax3 = fig.add_subplot(322) # distance = (asset.values.T - Y_hat) distance = (asset.values.T - Y_hat)[0] # if code.startswith('999') or code.startswith('399'): if len(asset) > len(df1): ax3.plot(asset) plt.plot(distance) ticks = ax3.get_xticks() ax3.set_xticklabels( [dates[int(i)] for i in (np.append(ticks[:-1], len(asset) - 1))], rotation=15) n = 5 d = (-c_high + c_low) / n c = c_high while c <= c_low: Y = X * b + a - c plt.plot(X, Y - Y_hat, 'r', alpha=0.9) c = c + d ax3.plot(asset) ## plt.xlabel('Date', fontsize=12) plt.ylabel('Price-center price', fontsize=14) plt.grid(True) else: as3 = asset.apply(lambda x: round(x / asset[:1], 2)) ax3.plot(as3) ticks = ax3.get_xticks() ax3.plot(asset1, '-r', linewidth=2) # show volume bar !!! # assvol = df.loc[asset.index]['vol'] # assvol = assvol.apply(lambda x: round(x / assvol[:1], 2)) # ax3.plot(assvol, '-g', linewidth=0.5) ax3.set_xticklabels( [dates[int(i)] for i in (np.append(ticks[:-1], len(asset) - 1))], rotation=15) plt.grid(True) zp3 = zoompan.ZoomPan() figZoom = zp3.zoom_factory(ax3, base_scale=scale) figPan = zp3.pan_factory(ax3) # plt.title(code, fontsize=14) if 'name' in df.columns: plt.legend([df.name.values[-1:][0], df1.name.values[-1:][0]], loc=0) else: if code not in ['999999', '399006', '399001']: indexIdx = False else: indexIdx = True dm = tdd.get_sina_data_df(code, index=indexIdx) if 'name' in dm.columns: cname = dm.name[0] else: cname = '-' # plt.legend([code, code2], loc=0) plt.legend([cname, code2], loc=0) ax2 = fig.add_subplot(323) # ax2.plot(asset) # ticks = ax2.get_xticks() ax2.set_xticklabels( [dates[int(i)] for i in (np.append(ticks[:-1], len(asset) - 1))], rotation=15) # plt.plot(X, Y_hat, 'k', alpha=0.9) n = 5 d = (-c_high + c_low) / n c = c_high while c <= c_low: Y = X * b + a - c plt.plot(X, Y, 'r', alpha=0.9) c = c + d # asset=asset.apply(lambda x:round(x/asset[:1],2)) ax2.plot(asset) # ax2.plot(asset1,'-r', linewidth=2) # plt.xlabel('Date', fontsize=12) plt.ylabel('Price', fontsize=12) plt.grid(True) # plt.title(code, fontsize=14) # plt.legend([code]) if len(df) > 10: ax6 = fig.add_subplot(324) h = df.loc[:, ['open', 'close', 'high', 'low']] highp = h['high'].values lowp = h['low'].values openp = h['open'].values closep = h['close'].values # print len(closep) lr = LinearRegression() x = np.atleast_2d(np.linspace(0, len(closep), len(closep))).T lr.fit(x, closep) LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False) xt = np.atleast_2d(np.linspace(0, len(closep) + 200, len(closep) + 200)).T yt = lr.predict(xt) bV = [] bP = [] for i in range(1, len(highp) - 1): if highp[i] <= highp[i - 1] and highp[i] < highp[ i + 1] and lowp[i] <= lowp[i - 1] and lowp[i] < lowp[i + 1]: bV.append(lowp[i]) bP.append(i) else: bV.append(lowp[i - 1]) bP.append(i - 1) if len(bV) > 0: d, p = LIS(bV) idx = [] for i in range(len(p)): idx.append(bP[p[i]]) lr = LinearRegression() X = np.atleast_2d(np.array(idx)).T Y = np.array(d) lr.fit(X, Y) estV = lr.predict(xt) ax6.plot(closep, linewidth=2) ax6.plot(idx, d, 'ko') ax6.plot(xt, estV, '-r', linewidth=3) ax6.plot(xt, yt, '-g', linewidth=3) plt.grid(True) # plt.tight_layout() zp2 = zoompan.ZoomPan() figZoom = zp2.zoom_factory(ax6, base_scale=scale) figPan = zp2.pan_factory(ax6) # 统计出每个区域内各股价的频数,得到直方图,为了更精细的显示各个区域的频数,这里将整个边界区间分成100份。 ax4 = fig.add_subplot(325) log.info("assert:len:%s %s" % (len(asset.values.T - Y_hat), (asset.values.T - Y_hat)[0])) # distance = map(lambda x:int(x),(asset.values.T - Y_hat)/Y_hat*100) # now_distanse=int((asset.iat[-1]-Y_hat[-1])/Y_hat[-1]*100) # log.debug("dis:%s now:%s"%(distance[:2],now_distanse)) # log.debug("now_distanse:%s"%now_distanse) distance = (asset.values.T - Y_hat) now_distanse = asset.iat[-1] - Y_hat[-1] # distance = (asset.values.T-Y_hat)[0] pd.Series(distance).plot(kind='hist', stacked=True, bins=100) # plt.plot((asset.iat[-1].T-Y_hat),'b',alpha=0.9) plt.axvline(now_distanse, hold=None, label="1", color='red') # plt.axhline(now_distanse,hold=None,label="1",color='red') # plt.axvline(asset.iat[0],hold=None,label="1",color='red',linestyle="--") plt.xlabel( 'Undervalue ------------------------------------------> Overvalue', fontsize=12) plt.ylabel('Frequency', fontsize=14) # plt.title('Undervalue & Overvalue Statistical Chart', fontsize=14) plt.legend([code, asset.iat[-1], str(dates[-1])[5:11]], fontsize=12) plt.grid(True) # plt.show() # import os # print(os.path.abspath(os.path.curdir)) ax5 = fig.add_subplot(326) # fig.figsize=(5, 10) log.info("assert:len:%s %s" % (len(asset.values.T - Y_hat), (asset.values.T - Y_hat)[0])) # distance = map(lambda x:int(x),(asset.values.T - Y_hat)/Y_hat*100) distance = (asset.values.T - Y_hat) / Y_hat * 100 now_distanse = ((asset.iat[-1] - Y_hat[-1]) / Y_hat[-1] * 100) log.debug("dis:%s now:%s" % (distance[:2], now_distanse)) log.debug("now_distanse:%s" % now_distanse) # n, bins = np.histogram(distance, 50) # print n, bins[:2] pd.Series(distance).plot(kind='hist', stacked=True, bins=100) # plt.plot((asset.iat[-1].T-Y_hat),'b',alpha=0.9) plt.axvline(now_distanse, hold=None, label="1", color='red') # plt.axhline(now_distanse,hold=None,label="1",color='red') # plt.axvline(asset.iat[0],hold=None,label="1",color='red',linestyle="--") plt.xlabel( 'Undervalue ------------------------------------------> Overvalue', fontsize=14) plt.ylabel('Frequency', fontsize=12) # plt.title('Undervalue & Overvalue Statistical Chart', fontsize=14) plt.legend([code, asset.iat[-1]], fontsize=12) plt.grid(True) # plt.ion() plt.draw() plt.pause(0.001) # plt.show(block=False) # plt.draw() # plt.pause(0.001) # plt.close() # print plt.get_backend() # plt.show(block=True) return df
def get_roll_mean_all(single=True, tdx=False, app=True, duration=100, ma_250_l=1.02, ma_250_h=1.11): # df = tdd.search_Tdx_multi_data_duration('tdx_all_df_300', 'all_300', df=None,code_l=code_list, start=start, end=None, freq=None, col=None, index='date') block_path = tdd.get_tdx_dir_blocknew() + '060.blk' if not app and cct.get_file_size( block_path) > 100 and cct.creation_date_duration(block_path) == 0: print "It's Today Update" return True code_list = sina_data.Sina().market('all').index.tolist() print "all code:", len(code_list) if duration < 300: h5_fname = 'tdx_all_df' + '_' + str(300) h5_table = 'all' + '_' + str(300) else: h5_fname = 'tdx_all_df' + '_' + str(900) h5_table = 'all' + '_' + str(900) # df = tdd.search_Tdx_multi_data_duration('tdx_all_df_300', 'all_300', df=None,code_l=code_list, start='20150501', end=None, freq=None, col=None, index='date') df = tdd.search_Tdx_multi_data_duration(h5_fname, h5_table, df=None, code_l=code_list, start=None, end=None, freq=None, col=None, index='date') # df = tdd.search_Tdx_multi_data_duration(h5_fname, h5_table, df=None,code_l=code_list, start=None, end=None, freq=None, col=None, index='date',tail=1) code_uniquelist = df.index.get_level_values('code').unique() code_select = code_uniquelist[random.randint(0, len(code_uniquelist) - 1)] print round(time.time() - time_s, 2), df.index.get_level_values( 'code').unique().shape, code_select, df.loc[code_select].shape # df.groupby(level=[0]),df.index.get_level_values(0) # len(df.index.get_level_values('code').unique()) # df = df[~df.index.duplicated(keep='first')] dfs = df def get_groupby_mean_median_close(dfs): groupd = dfs.groupby(level=[0]) df = groupd['close'].agg({'median': 'median', 'mean': 'mean'}) df['close'] = groupd.tail(1).reset_index().set_index(['code'])['close'] # dfs['mean'] = groupd['close'].agg('mean') # dfs['median'] = groupd['close'].agg('median') # dfs = dfs.fillna(0) # idx = pd.IndexSlice # mask = ( (dfs['mean'] > dfs['median']) # & (dfs['close'] > dfs['mean']) # ) # df=dfs.loc[idx[mask, :]] df = df[(df['mean'] > df['median']) & (df['close'] > df['mean'])] # dt_low = None # if dl == 1: # dfs = groupd.tail(1) # print("dfs tail1") # else: # dl = 30 # dindex = tdd.get_tdx_Exp_day_to_df( # '999999', dl=dl).sort_index(ascending=False) # dt = tdd.get_duration_price_date('999999', df=dindex) # dt = dindex[dindex.index >= dt].index.values # dt_low = dt[-1] # dtlen = len(dt) if len(dt) >0 else 1 # dfs = groupd.tail(dtlen) # print("dfs tail:%s dt:%s"%(dtlen,dt)) # dfs = get_multi_date_duration(dfs,dt[-1]) return df groupd = dfs.groupby(level=[0]) # rollma = ['5','10','60','100','200'] # rollma = ['5','10','250'] if duration < 300: rollma = ['10'] else: rollma = ['10', '250'] rollma.extend([str(duration)]) # import ipdb;ipdb.set_trace() # df.loc['300130'][:2] # dfs['mean'] = groupd['close'].agg('mean') # dfs['median'] = groupd['close'].agg('median') for da in rollma: cumdays = int(da) dfs['ma%d' % cumdays] = groupd['close'].apply(pd.rolling_mean, cumdays) if cumdays == 10: dfs['upper'] = dfs['ma%d' % cumdays].apply(lambda x: round( (1 + 11.0 / 100) * x, 1)) dfs['lower'] = dfs['ma%d' % cumdays].apply(lambda x: round( (1 - 9.0 / 100) * x, 1)) dfs['ene'] = map(lambda x, y: round((x + y) / 2, 1), dfs['upper'], dfs['lower']) # df['upper'] = map(lambda x: round((1 + 11.0 / 100) * x, 1), df.ma10d) # df['lower'] = map(lambda x: round((1 - 9.0 / 100) * x, 1), df.ma10d) # df['ene'] = map(lambda x, y: round((x + y) / 2, 1), df.upper, df.lower) # dfs['amount%d'%cumdays] = groupd['amount'].apply(pd.rolling_mean, cumdays) # df.ix[df.index.levels[0]] #df.ix[df.index[len(df.index)-1][0]] #last row # dfs = tdd.search_Tdx_multi_data_duration(df=dfs,code_l=code_list, start='20170918', end='20170918', freq=None, col=None, index='date') # print dfs[:1],len(dfs) # groupd.agg({'low': 'min'}) # '''idx mask filter''' # ''' dt_low = None df_idx = None if single: dfs = groupd.tail(1) print("dfs tail1") else: dl = 30 dindex = tdd.get_tdx_Exp_day_to_df('999999', dl=dl).sort_index(ascending=False) dt = tdd.get_duration_price_date('999999', df=dindex) dt = dindex[dindex.index >= dt].index.values dt_low = dt[-1] dtlen = len(dt) if len(dt) > 0 else 1 dfs = groupd.tail(dtlen) # import ipdb;ipdb.set_trace() df_idx = get_groupby_mean_median_close(dfs) print("dfs tail:%s dt:%s" % (dtlen, dt)) dfs = get_multi_date_duration(dfs, dt[-1]) # groupd2 = dfs.groupby(level=[0]) # dfs['ma%d'%cumdays] = groupd['close'].apply(pd.rolling_mean, cumdays) # dfs.reset_index().groupby(['code'])['date'].transform('count') single = True dfs = dfs.fillna(0) idx = pd.IndexSlice # mask = (dfs[('ma%s')%(rollma[0])] > dfs[('ma%s')%(rollma[1])]) & (dfs[('ma%s')%(rollma[-1])] > 0) & (dfs[('close')] > dfs[('ma%s')%(rollma[0])]) & (dfs[('close')] > dfs[('ma%s')%(rollma[-1])]) # mask = (dfs[('ma%s')%(rollma[0])] > dfs[('ma%s')%(rollma[1])]) & (dfs[('ma%s')%(rollma[-1])] > 0) & (dfs[('close')] > dfs[('ma%s')%(rollma[1])]) & (dfs[('close')] > dfs[('ma%s')%(rollma[-1])]) # mask = (dfs[('ma%s')%(rollma[0])] > dfs[('ma%s')%(rollma[1])]) & (dfs[('ma%s')%(rollma[-1])] > 0) & (dfs[('close')] > dfs[('ma%s')%(rollma[-1])]) # mask = ( (dfs[('ma%s')%(rollma[0])] > 0) & (dfs[('ma%s')%(rollma[-1])] > 0) & (dfs[('close')] > dfs[('ma%s')%(rollma[-1])]) & (dfs[('close')] > dfs[('ma%s')%(rollma[0])])) # mask = ( (dfs[('ma%s')%(rollma[0])] > 0) & (dfs[('ma%s')%(rollma[-1])] > 0) # & (dfs[('close')] > dfs[('ma%s')%(rollma[-1])]*ma_250_l) # & (dfs[('close')] < dfs[('ma%s')%(rollma[-1])]*ma_250_h) # & (dfs[('close')] > dfs[('ma%s')%(rollma[0])])) # & (dfs['mean'] > dfs['median']) # & (dfs['close'] > dfs['mean']) if len(rollma) > 1: mask = ((dfs[('ma%s') % (rollma[0])] > 0) & (dfs[('ma%s') % (rollma[-1])] > 0) & (dfs[('ma%s') % (rollma[0])] > dfs[('ma%s') % (rollma[-1])]) & (dfs[('close')] > dfs[('ma%s') % (rollma[0])]) & (dfs[('close')] > dfs[('ma%s') % (rollma[-1])] * ma_250_h) & ((dfs[('close')] > dfs['ene']) | (dfs[('close')] > dfs['upper']))) else: mask = ((dfs[('ma%s') % (rollma[0])] > 0) & (dfs[('close')] > dfs[('ma%s') % (rollma[0])]) & ((dfs[('close')] > dfs['ene']) | (dfs[('close')] > dfs['upper']))) # mask = ((dfs[('close')] > dfs[('ma%s')%(rollma[-1])])) df = dfs.loc[idx[mask, :]] df = get_multi_code_count(df) print(df.couts[:5]) # import ipdb;ipdb.set_trace() # df.sort_values(by='couts',ascending=0) # groupd.first()[:2],groupd.last()[:2] # groupd = df250.groupby(level=[0]) # ''' # groupd.transform(lambda x: x.iloc[-1]) # groupd.last() # groupd.apply(lambda x: x.close > x.ma250) # df.shape,df.sort_index(ascending=False)[:5] # ?groupd.agg # groupd = df.groupby(level=[0]) # groupd['close'].apply(pd.rolling_mean, 250, min_periods=1) #ex:# Group df by df.platoon, then apply a rolling mean lambda function to df.casualties # df.groupby('Platoon')['Casualties'].apply(lambda x:x.rolling(center=False,window=2).mean()) code_uniquelist = df.index.get_level_values('code').unique() code_select = code_uniquelist[random.randint(0, len(code_uniquelist) - 1)] if app: print round(time.time() - time_s, 2), 's', df.index.get_level_values( 'code').unique().shape, code_select, df.loc[code_select][-1:] if single: # groupd = df.groupby(level=[0]) if tdx: # block_path = tdd.get_tdx_dir_blocknew() + '060.blk' # if cct.get_work_time(): # codew = df[df.date == cct.get_today()].index.tolist() if dt_low is not None: groupd2 = df.groupby(level=[0]) df = groupd2.tail(1) df = df.reset_index().set_index('code') # import ipdb;ipdb.set_trace() # df = df[(df.date >= dt_low) & (df.date <= cct.get_today())] dd = df[(df.date == dt_low)] df = df[(df.date >= cct.last_tddate(1))] # import ipdb;ipdb.set_trace() print("df:%s df_idx:%s" % (len(df), len(df_idx))) if df_idx is not None and len(df_idx) > 0: df = df.loc[df_idx.index, :].dropna() print("Main Down dd :%s MainUP df:%s couts std:%0.1f " % (len(dd), len(df), df.couts.std())) # print df.date.mode()[0] df = df.sort_values(by='couts', ascending=1) df = df[df.couts > df.couts.std()] # df = df[(df.date >= df.date.mode()[0]) & (df.date <= cct.get_today())] codew = df.index.tolist() if app: print round(time.time() - time_s, 2), 'groupd2', len(df) else: df = df.reset_index().set_index('code') df = df[(df.date >= cct.last_tddate(days=10)) & (df.date <= cct.get_today())] codew = df.index.tolist() top_temp = tdd.get_sina_datadf_cnamedf(codew, df) top_temp = top_temp[(~top_temp.index.str.contains('688')) & (~top_temp.name.str.contains('ST'))] codew = top_temp.index.tolist() #clean st and 688 if app: hdf5_wri = cct.cct_raw_input("rewrite code [Y] or append [N]:") if hdf5_wri == 'y' or hdf5_wri == 'Y': append_status = False else: append_status = True else: append_status = False if len(codew) > 10: cct.write_to_blocknew(block_path, codew, append_status, doubleFile=False, keep_last=0) print "write:%s block_path:%s" % (len(codew), block_path) else: print "write error:%s block_path:%s" % (len(codew), block_path) # df['date'] = df['date'].apply(lambda x:(x.replace('-',''))) # df['date'] = df['date'].astype(int) # print df.loc[code_select].T,df.shape MultiIndex = False else: MultiIndex = True h5a.write_hdf_db('all300', df, table='roll200', index=False, baseCount=500, append=False, MultiIndex=MultiIndex) return df