def work2(df, f): setname = "sp500" taname = "base1" (phase1, phase2, phase3) = \ feat_select.split_dates(feat_select.load_feat(taname, setname)) df2 = feat_select.apply(df, phase2, "label5", "_p2") df2 = feat_select.apply(df2, phase3, "label5", "_p3") feat_select.ana2(df2, f, setname)
def work2(df, f): setname = "sp500" taname = "base1" (phase1, phase2, phase3) = \ feat_select.split_dates(feat_select.load_feat(taname, setname)) df2 = feat_select.apply(df,phase2, "label5", "_p2") df2 = feat_select.apply(df2,phase3, "label5", "_p3") feat_select.ana2(df2, f, setname)
def phase1_dump(taname, setname): dfTa = feat_select.load_feat(taname, setname) (phase1, phase2, phase3) = feat_select.split_dates(dfTa) dfmetas = feat_select.flat_metas(feat_select.get_metas(phase1)) outdir = os.path.join(root, "data", "feat_select", "phase1_dump") if not os.path.exists(outdir): os.makedirs(outdir) dfmetas.to_pickle(os.path.join(outdir, "%s_%s.pkl" % (setname, taname)))
def work(df, f): for i in range(10): frm = 50 * i to = frm + 50 setname = "sp500R%dT%d" % (frm, to) taname = "base1" (phase1, phase2, phase3) = \ feat_select.split_dates(feat_select.load_feat(taname, setname)) df2 = feat_select.apply(df, phase2, "label5", "_p2") df2 = feat_select.apply(df2, phase3, "label5", "_p3") feat_select.ana2(df2, f, setname)
def work(df, f): for i in range(10): frm = 50 * i to = frm + 50 setname = "sp500R%dT%d" % (frm, to) taname = "base1" (phase1, phase2, phase3) = \ feat_select.split_dates(feat_select.load_feat(taname, setname)) df2 = feat_select.apply(df,phase2, "label5", "_p2") df2 = feat_select.apply(df2,phase3, "label5", "_p3") feat_select.ana2(df2, f, setname)
orig_direct_n_set = abs_direct_n_set.copy() print len(abs_direct_p_set) print len(abs_direct_n_set) print >> f, "=" * 8 for i in range(10): frm = 50 * i to = frm + 50 setname = "sp500R%dT%d" % (frm, to) taname = "base1" filename = os.path.join( dataroot, "phase1_dump", "sp500_base1_apply_phase1_%s_%s_%d.pkl" % (setname, taname, args.depth)) if not os.path.exists(filename): df2 = feat_select.apply( df, feat_select.split_dates(feat_select.load_feat(taname, setname))[0], "label5", "_p1") df2.to_pickle(filename) df2 = pd.read_pickle(filename) feat_select.ana_apply(df2, "_p1", setname, f) cur_p_set = set(df2[df2.direct_p1 == 1].name.unique()) cur_n_set = set(df2[df2.direct_p1 == -1].name.unique()) abs_direct_p_set = abs_direct_p_set.intersection(cur_p_set) abs_direct_n_set = abs_direct_n_set.intersection(cur_n_set) print list(abs_direct_n_set) df.loc[:,"istable"] = df.apply(lambda row: 1 if row["name"] in abs_direct_p_set else \ (1 if row["name"] in abs_direct_n_set else 0), axis = 1) df.loc[:, "direct"] = df.apply(lambda row: 0 if row["istable"] == 0 else row["direct"], axis=1)