def phase1_dump(taname, setname): dfTa = feat_select.load_feat(taname, setname) (phase1, phase2, phase3) = feat_select.split_dates(dfTa) dfmetas = feat_select.flat_metas(feat_select.get_metas(phase1)) outdir = os.path.join(root, "data", "feat_select", "phase1_dump") if not os.path.exists(outdir): os.makedirs(outdir) dfmetas.to_pickle(os.path.join(outdir, "%s_%s.pkl" % (setname, taname)))
def work(setname, start, end, depth, thresh, scorename): """ """ phase1 = base.get_merged("base1", getattr(yeod, "get_%s" % setname)(), start, end) print phase1.shape phase1.reset_index(drop=True, inplace=True) phase1 = score.agn_rank_score(phase1) phase1 = score.agn_rank_score(phase1, interval=5, threshold=0.55) phase1 = score.agn_label_score(phase1, interval=5, threshold=1.0) meta = feat_select.flat_metas(phase1, depth, 100000, scorename) print meta[["fname", "c_p"]] meta = extract_meta(meta, thresh) meta.reset_index(drop=True, inplace=True) meta.to_pickle("./data/model/meta_base1_%s_%s_%s_%s_%d_100000.pkl" % (setname, scorename, start, end, depth)) return meta