def main(): # Getting required data with open(TARGETS_PATH, "rb") as f: target_array, scores = pickle.load(f) target_list = target_array.tolist() with open(FMAT_PATH, "rb") as f: fmat = pickle.load(f) with open(PASSAGES_PATH, "rb") as f: passages = pickle.load(f) passages = passages[:NUM_PASSAGES] terminals, token_labels = tokeneval.get_terminals_labels(passages) tokens = [x.text for x in terminals] # Running through random parameters settings #for i, params in enumerate(params_generator(NUM_SAMPLING)): for i, params in enumerate(PARAMS): sys.stderr.write('{} {}\n'.format(METHOD, i)) clas, _, _ = classify_scene.self_train_classifier( fmat, scores, target_array, params, method=METHOD, c_param=CLS_PRM, nu_param=CLS_PRM, learn_rate=CLS_PRM, n_estimators=500) target_labels = [int(x >= classify_scene.PRE_LABELS_THRESH) for x in scores] target_labels += list(classify_scene.predict_labels(clas, fmat[len(scores):])) stats = tokeneval.evaluate_with_type(tokens, token_labels, target_list, target_labels) print("\t".join([str(x) for x in params] + [str(len(x)) for x in stats]))
def main(): # Getting required data with open(TARGETS_PATH, "rb") as f: target_array, scores = pickle.load(f) target_list = target_array.tolist() with open(FMAT_PATH, "rb") as f: fmat = pickle.load(f) with open(PASSAGES_PATH, "rb") as f: passages = pickle.load(f) passages = passages[:NUM_PASSAGES] terminals, token_labels = tokeneval.get_terminals_labels(passages) tokens = [x.text for x in terminals] # Running through random parameters settings #for i, params in enumerate(params_generator(NUM_SAMPLING)): for i, params in enumerate(PARAMS): sys.stderr.write('{} {}\n'.format(METHOD, i)) clas, _, _ = classify.self_train_classifier(fmat, scores, target_array, params, method=METHOD, c_param=CLS_PRM, nu_param=CLS_PRM, learn_rate=CLS_PRM, n_estimators=500) target_labels = [int(x >= classify.PRE_LABELS_THRESH) for x in scores] target_labels += list(classify.predict_labels(clas, fmat[len(scores):])) stats = tokeneval.evaluate_with_type(tokens, token_labels, target_list, target_labels) print("\t".join([str(x) for x in params] + [str(len(x)) for x in stats]))
def main(): with open(PASSAGES_PATH, "rb") as f: passages = pickle.load(f) passages = passages[:NUM_PASSAGES] terminals, token_labels = tokeneval.get_terminals_labels(passages) tokens = [x.text for x in terminals] clas = classify.train_classifier(FMAT[:len(LABELS)], LABELS, METHOD, c_param=PARAM, nu_param=PARAM, learn_rate=PARAM, n_estimators=500) if TOKENS_FMAT is not None: # use token evaluation, not type stats = tokeneval.evaluate_with_classifier(tokens, token_labels, TARGETS, TOKENS_FMAT, clas) else: target_labels = LABELS.tolist() target_labels += classify.predict_labels(clas, FMAT[len(LABELS):]).tolist() stats = tokeneval.evaluate_with_type(tokens, token_labels, TARGETS, target_labels) print("\t".join(str(len(x)) for x in stats))