def main(): args = process_commands() linkings = corpus.load_linking(args.linking) arguments = argument.ArgumentFile(args.argument) test_arguments = argument.ArgumentFile(args.argument_test) corpus_file = corpus.CorpusFile(args.corpus, args.corpus_pos, args.corpus_parse, args.corpus_dep) fhelper = corpus.FoldsHelper(args.folds) keep_boundary = args.keep_boundary and args.argument == args.argument_test test(fhelper, arguments, test_arguments, corpus_file, linkings, args.train, args.test, args.model, args.crfsuite, args.log, keep_boundary, use_baseline=args.use_baseline, use_feature=args.select, reverse_select=args.reverse_select, rstats=args.rstats, threshold=args.threshold)
def main(): args = process_commands() # choose a classifier if args.classifier == 'SVM': global_classifier['key'] = SVC elif args.classifier == 'DT': global_classifier['key'] = DecisionTreeClassifier elif args.classifier == 'RF': global_classifier['key'] = RandomForestClassifier elif args.classifier == 'NB': global_classifier['key'] = GaussianNB elif args.classifier == 'LSVM': global_classifier['key'] = LinearSVC elif args.classifier == 'LR': global_classifier['key'] = LogisticRegressor else: assert (False) # loading data fhelper = corpus.FoldsHelper(args.folds) feature_tbl = features.load_features_table(args.word_features) word_probs = train_word_probs(fhelper, feature_tbl, ambig_path=args.word_ambig, count_path=args.word_count, check_accuracy=args.check_accuracy) output_file(args.output, word_probs)
def main(): args = process_commands() # loading data truth = linkage.LinkageFile(args.linkage) fhelper = corpus.FoldsHelper(args.folds) detector = linkage.LinkageDetector(args.connective) vectors = corpus.VectorFile(args.vector) corpus_file = corpus.CorpusFile(args.corpus, args.corpus_pos, args.corpus_parse) print('process file') if args.output: cands, Y, X = get_linkage_features(corpus_file, detector, vectors, truth, reverse_select=args.reverse_select, select=args.select, select_cnnct=args.select_cnnct) output_file(args.output, cands, Y, X) if args.check_accuracy: check_accuracy(X, Y) # extract perfect features for sense experiments if args.perfect_output: print('process perfect file') cands, Y, X = get_linkage_features(corpus_file, detector, vectors, truth, select=args.select, reverse_select=args.reverse_select, perfect=True, select_cnnct=args.select_cnnct) output_file(args.perfect_output, cands, Y, X) if args.check_accuracy: check_accuracy(X, Y)
def main(): args = process_commands() train_prob = args.output is not None train_classify = args.output_classify is not None assert (train_prob or train_classify) # choose a classifier if args.classifier == 'SVM': global_classifier['key'] = SVC elif args.classifier == 'DT': global_classifier['key'] = DecisionTreeClassifier elif args.classifier == 'RF': global_classifier['key'] = RandomForestClassifier elif args.classifier == 'NB': global_classifier['key'] = GaussianNB elif args.classifier == 'LSVM': global_classifier['key'] = LinearSVC elif args.classifier == 'LR': global_classifier['key'] = LogisticRegressor else: assert (False) fhelper = corpus.FoldsHelper(args.folds) feature_tbl = features.load_features_table(args.linkage_features, lambda x: tuple(x.split('-'))) linkage_counts = count_linkage(args.linkage) probs, classes = train_linkage_probs(fhelper, feature_tbl, linkage_counts, ambig_path=args.word_ambig, count_path=args.word_count, check_accuracy=args.check_accuracy, train_prob=train_prob, train_classify=train_classify) if train_prob: output_file(args.output, probs) if train_classify: output_file(args.output_classify, classes)
def main(): args = process_commands() corpus_file = corpus.CorpusFile(args.corpus, args.corpus_pos, args.corpus_parse) fhelper = corpus.FoldsHelper(args.folds) truth = linkage.LinkageFile(args.linkage) words = truth.all_words() detector = linkage.LinkageDetector(args.connective) feature_tbl = features.load_features_table(args.linkage_features, lambda x: tuple(x.split('-'))) linkage_counts, lcdict = count_linkage(args.linkage) linkage_probs = load_linkage_probs(args.linkage_probs) linkage_class = load_linkage_probs(args.linkage_class) word_ambig = evaluate.WordAmbig(args.word_ambig) ranking_probs = compute_ranking_probs(linkage_probs) if args.perfect: cut = lambda x, _: any((x[0], w) not in words for w in x[1]) else: cut = lambda x, _: linkage_class[x] < args.threshold # The B2 model if not args.pipeline: print('===== ranking model =====') cross_validation( corpus_file, fhelper, feature_tbl, truth, detector, linkage_counts, lcdict, ranking_probs, word_ambig, cut=cut, words=words, perfect=args.perfect, count_path=args.word_count, arg_output=args.arg_output, greedy=args.greedy, rank=args.rank, predict_sstats=args.predict_sense, predict_wstats=args.predict_wstats, ) elif not args.perfect: word_probs, word_truth = load_word_probs(args.word_probs) # cut by word probs cut = lambda x, _: any(word_probs[(x[0], w)] < args.threshold for w in x[1]) # or linkage_class[x] < args.threshold # The B1 model print('\n===== pipeline model =====') cross_validation( corpus_file, fhelper, feature_tbl, truth, detector, linkage_counts, lcdict, ranking_probs, word_ambig, cut=cut, words=words, count_path=args.word_count, perfect=args.perfect, greedy=args.greedy, rank=args.rank, predict_sstats=args.predict_sense, predict_wstats=args.predict_wstats, )