def START_est(filename): stop = 0.90 thres = 40 flag = True read = MAR() read = read.create(filename) read.restart() read = MAR() read = read.create(filename) target = int(read.get_allpos() * stop) while True: pos, neg, total = read.get_numbers() # print("%d, %d" %(pos,pos+neg)) if pos >= target: break if pos == 0 or pos + neg < thres: for id in read.random(): read.code(id, read.body["label"][id]) else: a, b, ids, c = read.train(pne=True) if pos >= 60 and flag: read.cache_est() # read.xx=read.simcurve['x'] # read.yy=read.simcurve['pos'] flag = False for id in ids: read.code(id, read.body["label"][id]) return read
def START_AUTO(filename): read = MAR() read = read.create(filename) pos_last = 0 full_life = 3 life = full_life while True: pos, neg, total = read.get_numbers() print("%d/ %d" % (pos, pos + neg)) if pos >= 10: if pos == pos_last: life = life - 1 if life == 0: break else: life = full_life if pos == 0: for id in read.random(): read.code(id, read.body["label"][id]) else: a, b, ids, c = read.train() for id in ids: read.code(id, read.body["label"][id]) pos_last = pos return read
def LINEAR(filename): read = MAR() read = read.create(filename) while True: pos, neg, total = read.get_numbers() if total - (pos + neg) < 10: break for id in read.random(): read.code(id, read.body["label"][id]) return read
def TEST_AL(filename, old_files=[], stop='est', stopat=1, error='none', interval=100000, starting=1, seed=0, step=10): stopat = float(stopat) thres = 0 counter = 0 pos_last = 0 np.random.seed(seed) read = MAR() read = read.create(filename, old_files) read.step = step read.interval = interval num2 = read.get_allpos() target = int(num2 * stopat) if stop == 'est': read.enable_est = True else: read.enable_est = False while True: pos, neg, total = read.get_numbers() try: print("%d, %d, %d" % (pos, pos + neg, read.est_num)) except: print("%d, %d" % (pos, pos + neg)) if pos + neg >= total: break if pos < starting or pos + neg < thres: for id in read.random(): read.code_error(id, error=error) else: a, b, c, d = read.train(weighting=True, pne=True) if pos >= target and read.est_num * stopat <= pos: break for id in c: read.code_error(id, error=error) # read.export() # results = analyze(read) # print(results) # read.plot() return read
def START_ERROR(filename): read = MAR() read = read.create(filename) pos_last = 0 full_life = 3 human_error = 0.2 life = full_life while True: pos, neg, total = read.get_numbers() print("%d/ %d" % (pos, pos + neg)) if pos >= 10: if pos == pos_last: life = life - 1 if life == 0: break else: life = full_life if pos == 0: for id in read.random(): if read.body["label"][id] == "no": if random.random() < human_error**2: hl = "yes" else: hl = "no" elif read.body["label"][id] == "yes": if random.random() < 2 * (human_error - human_error**2): hl = "no" else: hl = "yes" read.code(id, hl) else: a, b, ids, c = read.train() for id in ids: if read.body["label"][id] == "no": if random.random() < human_error**2: hl = "yes" else: hl = "no" elif read.body["label"][id] == "yes": if random.random() < 2 * (human_error - human_error**2): hl = "no" else: hl = "yes" read.code(id, hl) pos_last = pos read.export() return read
def Codes(filename, code): stop = 0.95 thres = 0 if "P" in code: starting = 5 else: starting = 1 weighting = "W" in code or "M" in code uncertain = "U" in code stopping = "S" in code read = MAR() read = read.create(filename) read.restart() read = MAR() read = read.create(filename) if not ("A" in code or "M" in code): read.enough = 100000 target = int(read.get_allpos() * stop) while True: pos, neg, total = read.get_numbers() # print("%d, %d" %(pos,pos+neg)) if pos >= target: break if pos < starting or pos + neg < thres: for id in read.random(): read.code(id, read.body["label"][id]) else: a, b, c, d, e = read.train(weighting=weighting) if pos < 30 and uncertain: for id in a: read.code(id, read.body["label"][id]) else: if stopping: now = 0 while pos < target: for id in e[now:now + read.step]: read.code(id, read.body["label"][id]) pos, neg, total = read.get_numbers() now = now + read.step else: for id in c: read.code(id, read.body["label"][id]) return read
def TIME_START(filename): stop = 0.9 read = MAR() read = read.create(filename) num2 = read.get_allpos() target = int(num2 * stop) while True: pos, neg, total = read.get_numbers() # print("%d/ %d" % (pos,pos+neg)) if pos >= target: break if pos == 0: for id in read.random(): read.code(id, read.body["label"][id]) else: a, b, ids, c = read.train_kept() for id in ids: read.code(id, read.body["label"][id]) return read
def START_DOC2VEC(filename): stop = 0.95 thres = 40 read = MAR() read = read.create(filename) read.restart() read = MAR() read = read.create(filename) target = int(read.get_allpos() * stop) while True: pos, neg, total = read.get_numbers() print("%d, %d" % (pos, pos + neg)) if pos >= target: break if pos == 0 or pos + neg < thres: for id in read.random(): read.code(id, read.body["label"][id]) else: a, b, c, d, e = read.train(weighting=True) for id in c: read.code(id, read.body["label"][id]) return read
read.BM25(query.split()) while True: pos, neg, total = read.get_numbers() try: print("%d, %d, %d" % (pos, pos + neg, read.est_num)) except: print("%d, %d" % (pos, pos + neg)) if pos + neg >= total: break if pos < 1: if query: ids, scores = read.BM25_get() for id in ids: read.code(id, read.body["label"][id]) else: for id in read.random(): read.code(id, read.body["label"][id]) else: uncertain, uncertain_proba, certain, certain_proba, _ = read.train( weighting=True, pne=True) if target_recall * read.est_num <= pos: break if pos <= thres: for id in uncertain: read.code(id, read.body["label"][id]) else: for id in certain: read.code(id, read.body["label"][id])