Exemplo n.º 1
0
def START_AUTO(filename):
    read = MAR()
    read = read.create(filename)
    pos_last = 0
    full_life = 3
    life = full_life
    while True:
        pos, neg, total = read.get_numbers()
        print("%d/ %d" % (pos, pos + neg))
        if pos >= 10:
            if pos == pos_last:
                life = life - 1
                if life == 0:
                    break
            else:
                life = full_life
        if pos == 0:
            for id in read.random():
                read.code(id, read.body["label"][id])
        else:
            a, b, ids, c = read.train()
            for id in ids:
                read.code(id, read.body["label"][id])
        pos_last = pos
    return read
Exemplo n.º 2
0
def START(filename, cl="linear"):
    stop = 1
    thres = 40
    # thres=10000000000

    read = MAR()
    read = read.create(filename)
    target = int(read.get_allbugs() * stop)
    while True:
        found, cost, total = read.get_numbers()
        try:
            print("%d, %d" % (found, cost))
        except:
            pass
        if found >= target:
            break

        if found == 0 or cost < thres:
            for id in read.loc_sort():
                read.code(id, read.body["label"][id])
        else:
            ids, c = read.train(cl=cl)
            for id in ids:
                read.code(id, read.body["label"][id])
    read.plot()
    set_trace()
    return read
Exemplo n.º 3
0
def START_est(filename):
    stop = 0.90
    thres = 40
    flag = True

    read = MAR()
    read = read.create(filename)
    read.restart()
    read = MAR()
    read = read.create(filename)
    target = int(read.get_allpos() * stop)
    while True:
        pos, neg, total = read.get_numbers()
        # print("%d, %d" %(pos,pos+neg))
        if pos >= target:
            break
        if pos == 0 or pos + neg < thres:
            for id in read.random():
                read.code(id, read.body["label"][id])
        else:
            a, b, ids, c = read.train(pne=True)

            if pos >= 60 and flag:
                read.cache_est()
                # read.xx=read.simcurve['x']
                # read.yy=read.simcurve['pos']
                flag = False

            for id in ids:
                read.code(id, read.body["label"][id])
    return read
Exemplo n.º 4
0
def LINEAR(filename):
    read = MAR()
    read = read.create(filename)
    while True:
        pos, neg, total = read.get_numbers()
        if total - (pos + neg) < 10:
            break
        for id in read.random():
            read.code(id, read.body["label"][id])
    return read
Exemplo n.º 5
0
def LOC(filename):
    stop = 1

    read = MAR()
    read = read.create(filename)
    target = int(read.get_allpos() * stop)
    while True:
        pos, neg, total = read.get_numbers()
        print("%d, %d" % (pos, pos + neg))
        if pos >= target:
            break
        for id in read.loc_sort():
            read.code(id, read.body["label"][id])
    return read
Exemplo n.º 6
0
def START_ERROR(filename):
    read = MAR()
    read = read.create(filename)
    pos_last = 0
    full_life = 3
    human_error = 0.2
    life = full_life
    while True:
        pos, neg, total = read.get_numbers()
        print("%d/ %d" % (pos, pos + neg))
        if pos >= 10:
            if pos == pos_last:
                life = life - 1
                if life == 0:
                    break
            else:
                life = full_life
        if pos == 0:
            for id in read.random():
                if read.body["label"][id] == "no":
                    if random.random() < human_error**2:
                        hl = "yes"
                    else:
                        hl = "no"
                elif read.body["label"][id] == "yes":
                    if random.random() < 2 * (human_error - human_error**2):
                        hl = "no"
                    else:
                        hl = "yes"
                read.code(id, hl)
        else:
            a, b, ids, c = read.train()
            for id in ids:
                if read.body["label"][id] == "no":
                    if random.random() < human_error**2:
                        hl = "yes"
                    else:
                        hl = "no"
                elif read.body["label"][id] == "yes":
                    if random.random() < 2 * (human_error - human_error**2):
                        hl = "no"
                    else:
                        hl = "yes"
                read.code(id, hl)
        pos_last = pos
    read.export()
    return read
Exemplo n.º 7
0
def REUSE_RANDOM(filename, old):
    stop = 0.9

    read = MAR()
    read = read.create(filename)
    read.create_old(old)
    num2 = read.get_allpos()
    target = int(num2 * stop)
    while True:
        pos, neg, total = read.get_numbers()
        # print("%d/ %d" % (pos,pos+neg))
        if pos >= target:
            break
        a, b, ids, c = read.train_reuse_random()
        for id in ids:
            read.code(id, read.body["label"][id])
    return read
Exemplo n.º 8
0
def UPDATE(filename, old, pne=False, cl="RF"):
    stop = 1

    read = MAR()
    read = read.create(filename)
    read.create_old(old)
    num2 = read.get_allpos()
    target = int(num2 * stop)
    while True:
        pos, neg, total = read.get_numbers()
        print("%d/ %d" % (pos, pos + neg))
        if pos >= target:
            break
        a, b, ids, c = read.train(pne=pne, cl=cl)
        for id in ids:
            read.code(id, read.body["label"][id])
    return read
Exemplo n.º 9
0
def START_LOC(filename, cl="SVM-linear"):
    stop = 1

    read = MAR()
    read = read.create(filename)
    target = int(read.get_allpos() * stop)
    while True:
        pos, neg, total = read.get_numbers()
        print("%d, %d" % (pos, pos + neg))
        if pos >= target:
            break
        if pos == 0 or pos + neg < 40:
            for id in read.loc_sort():
                read.code(id, read.body["label"][id])
        else:
            a, b, ids, c = read.train(cl=cl)
            for id in ids:
                read.code(id, read.body["label"][id])
    return read
Exemplo n.º 10
0
def TIME_START(filename):
    stop = 0.9

    read = MAR()
    read = read.create(filename)
    num2 = read.get_allpos()
    target = int(num2 * stop)
    while True:
        pos, neg, total = read.get_numbers()
        # print("%d/ %d" % (pos,pos+neg))
        if pos >= target:
            break
        if pos == 0:
            for id in read.random():
                read.code(id, read.body["label"][id])
        else:
            a, b, ids, c = read.train_kept()
            for id in ids:
                read.code(id, read.body["label"][id])
    return read
Exemplo n.º 11
0
def UPDATE_AUTO(filename, old, pne=True):

    read = MAR()
    read = read.create(filename)
    read.create_old(old)
    pos_last = -1
    full_life = 5
    life = full_life
    while True:
        pos, neg, total = read.get_numbers()
        print("%d/ %d" % (pos, pos + neg))
        if pos == pos_last:
            life = life - 1
            if life == 0:
                break
        else:
            life = full_life
        a, b, ids, c = read.train(pne)
        for id in ids:
            read.code(id, read.body["label"][id])
        pos_last = pos
    return read
Exemplo n.º 12
0
def START_DOC2VEC(filename):
    stop = 0.95
    thres = 40

    read = MAR()
    read = read.create(filename)
    read.restart()
    read = MAR()
    read = read.create(filename)
    target = int(read.get_allpos() * stop)
    while True:
        pos, neg, total = read.get_numbers()
        print("%d, %d" % (pos, pos + neg))
        if pos >= target:
            break
        if pos == 0 or pos + neg < thres:
            for id in read.random():
                read.code(id, read.body["label"][id])
        else:
            a, b, c, d, e = read.train(weighting=True)
            for id in c:
                read.code(id, read.body["label"][id])
    return read
Exemplo n.º 13
0
def REUSE(filename, old, pne=True):
    stop = 0.9
    thres = 5

    read = MAR()
    read = read.create(filename)
    read.create_old(old)
    num2 = read.get_allpos()
    target = int(num2 * stop)
    while True:
        pos, neg, total = read.get_numbers()
        print("%d/ %d" % (pos, pos + neg))
        if pos >= target:
            break
        if pos < thres:
            a, b, ids, c = read.train(pne)
            for id in ids:
                read.code(id, read.body["label"][id])
        else:
            a, b, ids, c = read.train_reuse(pne)
            for id in ids:
                read.code(id, read.body["label"][id])
    return read
Exemplo n.º 14
0
def UPDATE_REUSE(filename, old):
    stop = 0.9
    lifes = 2
    life = lifes
    last_pos = 0
    thres = 5

    read = MAR()
    read = read.create(filename)
    read.create_old(old)
    num2 = read.get_allpos()
    target = int(num2 * stop)
    while True:
        pos, neg, total = read.get_numbers()
        # print("%d/ %d" % (pos, pos + neg))

        if pos - last_pos:
            life = lifes
        else:
            life = life - 1
        last_pos = pos

        if pos >= target:
            break
        # if (pos >= thres or pos==0) and life<1:
        if (pos >= thres) and life < 1:
            # print("reuse")
            lifes = 0
            a, b, ids, c = read.train_reuse()
            for id in ids:
                read.code(id, read.body["label"][id])
        else:
            # print("update")
            a, b, ids, c = read.train()
            for id in ids:
                read.code(id, read.body["label"][id])
    return read
Exemplo n.º 15
0
def Codes(filename, code):
    stop = 0.95
    thres = 0
    if "P" in code:
        starting = 5
    else:
        starting = 1

    weighting = "W" in code or "M" in code
    uncertain = "U" in code
    stopping = "S" in code

    read = MAR()
    read = read.create(filename)
    read.restart()
    read = MAR()
    read = read.create(filename)
    if not ("A" in code or "M" in code):
        read.enough = 100000
    target = int(read.get_allpos() * stop)
    while True:
        pos, neg, total = read.get_numbers()
        # print("%d, %d" %(pos,pos+neg))
        if pos >= target:
            break
        if pos < starting or pos + neg < thres:
            for id in read.random():
                read.code(id, read.body["label"][id])
        else:
            a, b, c, d, e = read.train(weighting=weighting)
            if pos < 30 and uncertain:
                for id in a:
                    read.code(id, read.body["label"][id])
            else:
                if stopping:
                    now = 0
                    while pos < target:
                        for id in e[now:now + read.step]:
                            read.code(id, read.body["label"][id])
                        pos, neg, total = read.get_numbers()
                        now = now + read.step
                else:
                    for id in c:
                        read.code(id, read.body["label"][id])
    return read
Exemplo n.º 16
0
        read.BM25(query.split())
    while True:
        pos, neg, total = read.get_numbers()
        try:
            print("%d, %d, %d" % (pos, pos + neg, read.est_num))
        except:
            print("%d, %d" % (pos, pos + neg))

        if pos + neg >= total:
            break

        if pos < 1:
            if query:
                ids, scores = read.BM25_get()
                for id in ids:
                    read.code(id, read.body["label"][id])
            else:
                for id in read.random():
                    read.code(id, read.body["label"][id])
        else:
            uncertain, uncertain_proba, certain, certain_proba, _ = read.train(
                weighting=True, pne=True)
            if target_recall * read.est_num <= pos:
                break
            if pos <= thres:
                for id in uncertain:
                    read.code(id, read.body["label"][id])
            else:
                for id in certain:
                    read.code(id, read.body["label"][id])