Exemplo n.º 1
0
def prepare_synthetic_seq(patts_dets, bck_dets):
    Hpatts = []
    Hoccs = set()
    for patt_dets in patts_dets:
        patt, occs = prepare_pattern(**patt_dets)
        Hoccs.update(occs)
        Hpatts.append(patt)
    tmp = sorted(Hoccs)
    T_first, T_last = (tmp[0][0], tmp[-1][0])
    
    for bck_det in bck_dets:
        occs = prepare_bck(T_first, T_last, len(tmp), **bck_det)
        Hoccs.update(occs)
        
    ds = DataSequence(sorted(Hoccs))    
    ev_to_num = ds.getEvToNum()
    for (Ptree, tp0, Ep) in Hpatts:
        Ptree.mapEvents(ev_to_num)    
    pc_org = PatternCollection(Hpatts)
    return pc_org, ds
Exemplo n.º 2
0
def run_combine(setts, fn_b, i, counts_cmp, pool):
    fn_basis = "%s-%s" % (fn_b, i)

    k = numpy.random.randint(setts["k_low"], setts["k_up"]+1)
    patt_fn = []
    combine_seqs = {}
    patterns_list = []
    offset_t0 = 0
    prev_span = 0
    t0_list = []
    for ii in numpy.random.choice(len(pool), size=k):
        fn_sub_basis = re.sub("_ds.txt", "", pool[ii])
        seqs = readSequence({"filename": fn_sub_basis+"_ds.txt", "SEP": " "})
        ds = DataSequence(seqs)
        pcsH = load_pc(fn_sub_basis+"_pcH.txt", ds)
        patt_fn.append(fn_sub_basis)

        
        next_t0 = offset_t0 + numpy.random.randint(numpy.ceil(prev_span*setts["t0_low"]), numpy.ceil(prev_span*setts["t0_up"])+1)
        t0_list.append(next_t0)
        for ev, seq in seqs.items():
            if ev not in combine_seqs:
                combine_seqs[ev] = set()
            combine_seqs[ev].update(seq+next_t0)
                
        for (p, pt0, pE) in pcsH.getPatterns():
            patterns_list.append((p, pt0+next_t0, pE))
                
        offset_t0 = next_t0
        prev_span = ds.getTend()

    comb_setts = {"t0s": t0_list, "patt_fn": patt_fn, "k": k}
    combine_ss = dict([(ev, numpy.array(sorted(s))) for (ev, s) in combine_seqs.items()])
    ds = DataSequence(combine_ss)
    pcH = PatternCollection(patterns_list)
    writeSYNTHin(ds, pcH, fn_basis)

    #### mine data sequence
    SXPS = SyntheXPS()        
    mine_seqs(ds, fn_basis, writePCout_fun=SXPS.addPC)    
    pcF = SXPS.getPC()
    #####
    out_v, stats, results = compare_pcs(ds, pcH, pcF)
    writeSYNTHout(setts, ds, pcH, pcF, out_v, fn_basis, save_pc=True, comb_setts=comb_setts)
    counts_cmp[out_v] = counts_cmp.get(out_v, 0)+1
    
    if out_v  != 0 : #== -1:
        print "RUN %s\tcl: %f vs. %f\t%d >> %s" % ( i, stats[0]["cl"], stats[1]["cl"], out_v, CMP_OUT_CODES[out_v])
    return stats[0]["cl"], stats[1]["cl"], ds.codeLengthResiduals()
Exemplo n.º 3
0
def run_one(setts, fn_b, i, counts_cmp):        
    fn_basis = "%s-%s" % (fn_b, i)
    if os.path.isfile(fn_basis+"_pcH.txt"):
        ds = DataSequence(readSequence({"filename": fn_basis+"_ds.txt", "SEP": " "}))
        pcH = load_pc(fn_basis+"_pcH.txt", ds)
    else:
        #### generate data sequence
        k = setts["level"]
        Rs = []
        if k == 1:
            Rs = [numpy.random.randint(int(.66*setts["nb_occs"]), setts["nb_occs"])]
        elif k > 1:
            mm = int(numpy.floor((setts["nb_occs"]/(1.*numpy.prod(range(1,k+1))))**(1./k)))
            if mm < 3:
                nn = int(numpy.floor(setts["nb_occs"]**(1./k)))
                if nn < 3:
                    Rs = [3 for kk in range(k)]
                else:
                    xx = [(.33*nn, nn+1) for kk in range(k)]
                    Rs = [numpy.random.randint(max(3, numpy.ceil(.33*nn)), nn+1) for kk in range(k)][::-1]              
            else:
                xx = [((kk+.33)*mm, (kk+1.)*mm) for kk in range(k)]
                Rs = [numpy.random.randint(max(3, numpy.ceil((kk+.33)*mm)), (kk+1)*mm+1) for kk in range(k)][::-1]
        Ps = [numpy.random.randint(setts["p_down"], setts["p_up"])]        
        for kk in range(1,k):
            prev = Ps[-1]*.5*Rs[kk-1]
            if not setts.get("overlap", False):
                prev = Ps[-1]*Rs[kk-1]
            tt = numpy.random.randint(prev, prev+100)
            ik = 0
            while any([tt % p == 0 for p in Ps]) and ik < 100:
                ik += 1
                tt = numpy.random.randint(prev, prev+100)
            Ps.append(tt)
        patts_dets = [{"inner": setts["inner"], "t0": 0, "Rs": Rs, "Ps": Ps, "noise_lvl": setts["noise_lvl"], "noise_dens": setts["noise_dens"]}]
        bck_dets = []
        for (e, c) in setts.get("add_noise", []):
            bck_dets.append({"event": e, "nb_occs": c})

        print patts_dets, bck_dets, numpy.prod(Rs)
        pcH, ds = prepare_synthetic_seq(patts_dets, bck_dets)
        writeSYNTHin(ds, pcH, fn_basis)
        #####

    save_pcF = True
    if os.path.isfile(fn_basis+"_pcF.txt"):
        pcF = load_pc(fn_basis+"_pcF.txt", ds)
        save_pcF = False
    else:
        #### mine data sequence
        SXPS = SyntheXPS()        
        mine_seqs(ds, fn_basis, writePCout_fun=SXPS.addPC)    
        pcF = SXPS.getPC()
        #####
    if os.path.isfile(fn_basis+"_summary.txt"):
        out_v = -1
        with open(fn_basis+"_summary.txt") as fp:
            out_v = int(fp.readline().split()[0])
        stats = {0: {"cl": pcH.codeLength(ds)}, 1: {"cl": pcF.codeLength(ds)}}
    else:
        out_v, stats, results = compare_pcs(ds, pcH, pcF)
        writeSYNTHout(setts, ds, pcH, pcF, out_v, fn_basis, save_pc=save_pcF)
    counts_cmp[out_v] = counts_cmp.get(out_v, 0)+1
    
    if out_v  != 0 : #== -1:
        print "RUN %s\tcl: %f vs. %f\t%d >> %s" % ( i, stats[0]["cl"], stats[1]["cl"], out_v, CMP_OUT_CODES[out_v])
    return stats[0]["cl"], stats[1]["cl"], ds.codeLengthResiduals()
Exemplo n.º 4
0
        "max_len": 3000
    }
    # fparams = {"filename": "/home/egalbrun/TKTL/misc/itrami/per-pat/data/traces/prepared/trace_bugzilla_1_data.dat",
    #                "timestamp": False, "events": ["*"], "min_len": 30, "max_len": 3000}

    # seqs = readSequence(fparams)

    seq = []
    for i in range(15):
        seq.append(i * 400 + numpy.arange(0, 20, 2))
    seqs = {0: numpy.hstack(seq)}

    for ci, seq in seqs.items():
        dets = None

        ds = DataSequence({"a": seq})
        print "------------"
        if len(seq) < 30:
            print "SEQUENCE %s: (%d)\t %s" % (ci, len(seq), seq)
        else:
            print "SEQUENCE %s: (%d)" % (ci, len(seq))
        data_details = ds.getDetails()

        results = run_test(seq, "a", data_details, dets)
        for result in results:
            nb_cycl = len([c for c in result["cycles"] if c.get("cp") > 0])
            nb_res = numpy.sum(
                [len(c["occs"]) for c in result["cycles"] if c.get("cp") <= 0])

            print "%s:\tCL=%f nC=%d nR=%d RT=%f" % (
                result["meth"], result["CL"], nb_cycl, nb_res, result["RT"])