def START_AUTO(filename): read = MAR() read = read.create(filename) pos_last = 0 full_life = 3 life = full_life while True: pos, neg, total = read.get_numbers() print("%d/ %d" % (pos, pos + neg)) if pos >= 10: if pos == pos_last: life = life - 1 if life == 0: break else: life = full_life if pos == 0: for id in read.random(): read.code(id, read.body["label"][id]) else: a, b, ids, c = read.train() for id in ids: read.code(id, read.body["label"][id]) pos_last = pos return read
def START(filename, cl="linear"): stop = 1 thres = 40 # thres=10000000000 read = MAR() read = read.create(filename) target = int(read.get_allbugs() * stop) while True: found, cost, total = read.get_numbers() try: print("%d, %d" % (found, cost)) except: pass if found >= target: break if found == 0 or cost < thres: for id in read.loc_sort(): read.code(id, read.body["label"][id]) else: ids, c = read.train(cl=cl) for id in ids: read.code(id, read.body["label"][id]) read.plot() set_trace() return read
def START_est(filename): stop = 0.90 thres = 40 flag = True read = MAR() read = read.create(filename) read.restart() read = MAR() read = read.create(filename) target = int(read.get_allpos() * stop) while True: pos, neg, total = read.get_numbers() # print("%d, %d" %(pos,pos+neg)) if pos >= target: break if pos == 0 or pos + neg < thres: for id in read.random(): read.code(id, read.body["label"][id]) else: a, b, ids, c = read.train(pne=True) if pos >= 60 and flag: read.cache_est() # read.xx=read.simcurve['x'] # read.yy=read.simcurve['pos'] flag = False for id in ids: read.code(id, read.body["label"][id]) return read
def LINEAR(filename): read = MAR() read = read.create(filename) while True: pos, neg, total = read.get_numbers() if total - (pos + neg) < 10: break for id in read.random(): read.code(id, read.body["label"][id]) return read
def LOC(filename): stop = 1 read = MAR() read = read.create(filename) target = int(read.get_allpos() * stop) while True: pos, neg, total = read.get_numbers() print("%d, %d" % (pos, pos + neg)) if pos >= target: break for id in read.loc_sort(): read.code(id, read.body["label"][id]) return read
def START_ERROR(filename): read = MAR() read = read.create(filename) pos_last = 0 full_life = 3 human_error = 0.2 life = full_life while True: pos, neg, total = read.get_numbers() print("%d/ %d" % (pos, pos + neg)) if pos >= 10: if pos == pos_last: life = life - 1 if life == 0: break else: life = full_life if pos == 0: for id in read.random(): if read.body["label"][id] == "no": if random.random() < human_error**2: hl = "yes" else: hl = "no" elif read.body["label"][id] == "yes": if random.random() < 2 * (human_error - human_error**2): hl = "no" else: hl = "yes" read.code(id, hl) else: a, b, ids, c = read.train() for id in ids: if read.body["label"][id] == "no": if random.random() < human_error**2: hl = "yes" else: hl = "no" elif read.body["label"][id] == "yes": if random.random() < 2 * (human_error - human_error**2): hl = "no" else: hl = "yes" read.code(id, hl) pos_last = pos read.export() return read
def REUSE_RANDOM(filename, old): stop = 0.9 read = MAR() read = read.create(filename) read.create_old(old) num2 = read.get_allpos() target = int(num2 * stop) while True: pos, neg, total = read.get_numbers() # print("%d/ %d" % (pos,pos+neg)) if pos >= target: break a, b, ids, c = read.train_reuse_random() for id in ids: read.code(id, read.body["label"][id]) return read
def UPDATE(filename, old, pne=False, cl="RF"): stop = 1 read = MAR() read = read.create(filename) read.create_old(old) num2 = read.get_allpos() target = int(num2 * stop) while True: pos, neg, total = read.get_numbers() print("%d/ %d" % (pos, pos + neg)) if pos >= target: break a, b, ids, c = read.train(pne=pne, cl=cl) for id in ids: read.code(id, read.body["label"][id]) return read
def START_LOC(filename, cl="SVM-linear"): stop = 1 read = MAR() read = read.create(filename) target = int(read.get_allpos() * stop) while True: pos, neg, total = read.get_numbers() print("%d, %d" % (pos, pos + neg)) if pos >= target: break if pos == 0 or pos + neg < 40: for id in read.loc_sort(): read.code(id, read.body["label"][id]) else: a, b, ids, c = read.train(cl=cl) for id in ids: read.code(id, read.body["label"][id]) return read
def TIME_START(filename): stop = 0.9 read = MAR() read = read.create(filename) num2 = read.get_allpos() target = int(num2 * stop) while True: pos, neg, total = read.get_numbers() # print("%d/ %d" % (pos,pos+neg)) if pos >= target: break if pos == 0: for id in read.random(): read.code(id, read.body["label"][id]) else: a, b, ids, c = read.train_kept() for id in ids: read.code(id, read.body["label"][id]) return read
def UPDATE_AUTO(filename, old, pne=True): read = MAR() read = read.create(filename) read.create_old(old) pos_last = -1 full_life = 5 life = full_life while True: pos, neg, total = read.get_numbers() print("%d/ %d" % (pos, pos + neg)) if pos == pos_last: life = life - 1 if life == 0: break else: life = full_life a, b, ids, c = read.train(pne) for id in ids: read.code(id, read.body["label"][id]) pos_last = pos return read
def START_DOC2VEC(filename): stop = 0.95 thres = 40 read = MAR() read = read.create(filename) read.restart() read = MAR() read = read.create(filename) target = int(read.get_allpos() * stop) while True: pos, neg, total = read.get_numbers() print("%d, %d" % (pos, pos + neg)) if pos >= target: break if pos == 0 or pos + neg < thres: for id in read.random(): read.code(id, read.body["label"][id]) else: a, b, c, d, e = read.train(weighting=True) for id in c: read.code(id, read.body["label"][id]) return read
def REUSE(filename, old, pne=True): stop = 0.9 thres = 5 read = MAR() read = read.create(filename) read.create_old(old) num2 = read.get_allpos() target = int(num2 * stop) while True: pos, neg, total = read.get_numbers() print("%d/ %d" % (pos, pos + neg)) if pos >= target: break if pos < thres: a, b, ids, c = read.train(pne) for id in ids: read.code(id, read.body["label"][id]) else: a, b, ids, c = read.train_reuse(pne) for id in ids: read.code(id, read.body["label"][id]) return read
def UPDATE_REUSE(filename, old): stop = 0.9 lifes = 2 life = lifes last_pos = 0 thres = 5 read = MAR() read = read.create(filename) read.create_old(old) num2 = read.get_allpos() target = int(num2 * stop) while True: pos, neg, total = read.get_numbers() # print("%d/ %d" % (pos, pos + neg)) if pos - last_pos: life = lifes else: life = life - 1 last_pos = pos if pos >= target: break # if (pos >= thres or pos==0) and life<1: if (pos >= thres) and life < 1: # print("reuse") lifes = 0 a, b, ids, c = read.train_reuse() for id in ids: read.code(id, read.body["label"][id]) else: # print("update") a, b, ids, c = read.train() for id in ids: read.code(id, read.body["label"][id]) return read
def Codes(filename, code): stop = 0.95 thres = 0 if "P" in code: starting = 5 else: starting = 1 weighting = "W" in code or "M" in code uncertain = "U" in code stopping = "S" in code read = MAR() read = read.create(filename) read.restart() read = MAR() read = read.create(filename) if not ("A" in code or "M" in code): read.enough = 100000 target = int(read.get_allpos() * stop) while True: pos, neg, total = read.get_numbers() # print("%d, %d" %(pos,pos+neg)) if pos >= target: break if pos < starting or pos + neg < thres: for id in read.random(): read.code(id, read.body["label"][id]) else: a, b, c, d, e = read.train(weighting=weighting) if pos < 30 and uncertain: for id in a: read.code(id, read.body["label"][id]) else: if stopping: now = 0 while pos < target: for id in e[now:now + read.step]: read.code(id, read.body["label"][id]) pos, neg, total = read.get_numbers() now = now + read.step else: for id in c: read.code(id, read.body["label"][id]) return read
read.BM25(query.split()) while True: pos, neg, total = read.get_numbers() try: print("%d, %d, %d" % (pos, pos + neg, read.est_num)) except: print("%d, %d" % (pos, pos + neg)) if pos + neg >= total: break if pos < 1: if query: ids, scores = read.BM25_get() for id in ids: read.code(id, read.body["label"][id]) else: for id in read.random(): read.code(id, read.body["label"][id]) else: uncertain, uncertain_proba, certain, certain_proba, _ = read.train( weighting=True, pne=True) if target_recall * read.est_num <= pos: break if pos <= thres: for id in uncertain: read.code(id, read.body["label"][id]) else: for id in certain: read.code(id, read.body["label"][id])