Пример #1
0
def main():
    args = process_commands()
    linkings = corpus.load_linking(args.linking)
    arguments = argument.ArgumentFile(args.argument)
    test_arguments = argument.ArgumentFile(args.argument_test)
    corpus_file = corpus.CorpusFile(args.corpus, args.corpus_pos,
                                    args.corpus_parse, args.corpus_dep)
    fhelper = corpus.FoldsHelper(args.folds)

    keep_boundary = args.keep_boundary and args.argument == args.argument_test

    test(fhelper,
         arguments,
         test_arguments,
         corpus_file,
         linkings,
         args.train,
         args.test,
         args.model,
         args.crfsuite,
         args.log,
         keep_boundary,
         use_baseline=args.use_baseline,
         use_feature=args.select,
         reverse_select=args.reverse_select,
         rstats=args.rstats,
         threshold=args.threshold)
Пример #2
0
def main():
    args = process_commands()

    # choose a classifier
    if args.classifier == 'SVM':
        global_classifier['key'] = SVC
    elif args.classifier == 'DT':
        global_classifier['key'] = DecisionTreeClassifier
    elif args.classifier == 'RF':
        global_classifier['key'] = RandomForestClassifier
    elif args.classifier == 'NB':
        global_classifier['key'] = GaussianNB
    elif args.classifier == 'LSVM':
        global_classifier['key'] = LinearSVC
    elif args.classifier == 'LR':
        global_classifier['key'] = LogisticRegressor
    else:
        assert (False)

    # loading data

    fhelper = corpus.FoldsHelper(args.folds)
    feature_tbl = features.load_features_table(args.word_features)

    word_probs = train_word_probs(fhelper,
                                  feature_tbl,
                                  ambig_path=args.word_ambig,
                                  count_path=args.word_count,
                                  check_accuracy=args.check_accuracy)

    output_file(args.output, word_probs)
Пример #3
0
def main():
    args = process_commands()

    # loading data

    truth = linkage.LinkageFile(args.linkage)
    fhelper = corpus.FoldsHelper(args.folds)
    detector = linkage.LinkageDetector(args.connective)
    vectors = corpus.VectorFile(args.vector)
    corpus_file = corpus.CorpusFile(args.corpus, args.corpus_pos,
                                    args.corpus_parse)

    print('process file')

    if args.output:
        cands, Y, X = get_linkage_features(corpus_file,
                                           detector,
                                           vectors,
                                           truth,
                                           reverse_select=args.reverse_select,
                                           select=args.select,
                                           select_cnnct=args.select_cnnct)

        output_file(args.output, cands, Y, X)

        if args.check_accuracy:
            check_accuracy(X, Y)

    # extract perfect features for sense experiments

    if args.perfect_output:
        print('process perfect file')

        cands, Y, X = get_linkage_features(corpus_file,
                                           detector,
                                           vectors,
                                           truth,
                                           select=args.select,
                                           reverse_select=args.reverse_select,
                                           perfect=True,
                                           select_cnnct=args.select_cnnct)

        output_file(args.perfect_output, cands, Y, X)

        if args.check_accuracy:
            check_accuracy(X, Y)
Пример #4
0
def main():
    args = process_commands()
    train_prob = args.output is not None
    train_classify = args.output_classify is not None
    assert (train_prob or train_classify)

    # choose a classifier
    if args.classifier == 'SVM':
        global_classifier['key'] = SVC
    elif args.classifier == 'DT':
        global_classifier['key'] = DecisionTreeClassifier
    elif args.classifier == 'RF':
        global_classifier['key'] = RandomForestClassifier
    elif args.classifier == 'NB':
        global_classifier['key'] = GaussianNB
    elif args.classifier == 'LSVM':
        global_classifier['key'] = LinearSVC
    elif args.classifier == 'LR':
        global_classifier['key'] = LogisticRegressor
    else:
        assert (False)

    fhelper = corpus.FoldsHelper(args.folds)
    feature_tbl = features.load_features_table(args.linkage_features,
                                               lambda x: tuple(x.split('-')))
    linkage_counts = count_linkage(args.linkage)

    probs, classes = train_linkage_probs(fhelper,
                                         feature_tbl,
                                         linkage_counts,
                                         ambig_path=args.word_ambig,
                                         count_path=args.word_count,
                                         check_accuracy=args.check_accuracy,
                                         train_prob=train_prob,
                                         train_classify=train_classify)

    if train_prob:
        output_file(args.output, probs)
    if train_classify:
        output_file(args.output_classify, classes)
Пример #5
0
def main():
    args = process_commands()

    corpus_file = corpus.CorpusFile(args.corpus, args.corpus_pos,
                                    args.corpus_parse)
    fhelper = corpus.FoldsHelper(args.folds)
    truth = linkage.LinkageFile(args.linkage)
    words = truth.all_words()
    detector = linkage.LinkageDetector(args.connective)
    feature_tbl = features.load_features_table(args.linkage_features,
                                               lambda x: tuple(x.split('-')))

    linkage_counts, lcdict = count_linkage(args.linkage)
    linkage_probs = load_linkage_probs(args.linkage_probs)
    linkage_class = load_linkage_probs(args.linkage_class)

    word_ambig = evaluate.WordAmbig(args.word_ambig)

    ranking_probs = compute_ranking_probs(linkage_probs)

    if args.perfect:
        cut = lambda x, _: any((x[0], w) not in words for w in x[1])
    else:
        cut = lambda x, _: linkage_class[x] < args.threshold

    # The B2 model
    if not args.pipeline:
        print('===== ranking model =====')
        cross_validation(
            corpus_file,
            fhelper,
            feature_tbl,
            truth,
            detector,
            linkage_counts,
            lcdict,
            ranking_probs,
            word_ambig,
            cut=cut,
            words=words,
            perfect=args.perfect,
            count_path=args.word_count,
            arg_output=args.arg_output,
            greedy=args.greedy,
            rank=args.rank,
            predict_sstats=args.predict_sense,
            predict_wstats=args.predict_wstats,
        )

    elif not args.perfect:
        word_probs, word_truth = load_word_probs(args.word_probs)
        # cut by word probs
        cut = lambda x, _: any(word_probs[(x[0], w)] < args.threshold for w in
                               x[1])  # or linkage_class[x] < args.threshold

        # The B1 model
        print('\n===== pipeline model =====')
        cross_validation(
            corpus_file,
            fhelper,
            feature_tbl,
            truth,
            detector,
            linkage_counts,
            lcdict,
            ranking_probs,
            word_ambig,
            cut=cut,
            words=words,
            count_path=args.word_count,
            perfect=args.perfect,
            greedy=args.greedy,
            rank=args.rank,
            predict_sstats=args.predict_sense,
            predict_wstats=args.predict_wstats,
        )