data = read_twitter(trainfile=args.train, evalfiles=args.eval)

    import tagger

    if args.tagger == "logreg":
        tagger = tagger.LogisticRegressionTagger()
    elif args.tagger == "crf":
        tagger = tagger.CRFPerceptron()
    else:
        sys.stderr.write("Did not properly select tagger!")
        sys.exit(1)

    # Train the tagger
    tagger.fit_data(data.train_sents, data.train_labels)

    # Evaluation (also writes out predictions)
    trainoutfile = "{}/{}.pred".format(args.outdir,
                                       os.path.basename(args.train))
    print "### Train evaluation; writing to {}".format(trainoutfile)
    data.train_preds = tagger.evaluate_data(data.train_sents,
                                            data.train_labels)
    write_preds(trainoutfile, data.train_sents, data.train_labels,
                data.train_preds)
    for evalstr, evalset in zip(args.eval, data.eval):
        evaloutfile = "{}/{}.pred".format(args.outdir,
                                          os.path.basename(evalstr))
        print "### evaluation of {}; writing to {}".format(
            evalstr, evaloutfile)
        preds = tagger.evaluate_data(evalset["sents"], evalset["labels"])
        write_preds(evaloutfile, evalset["sents"], evalset["labels"], preds)
Exemplo n.º 2
0
    use_test = args.test

    data = read_twitter(test=use_test)

    if model == 'crf':
        tagger = tagger.CRFPerceptron()
    else:
        tagger = tagger.LogisticRegressionTagger()


    # Train the tagger
    tagger.fit_data(data.train_sents, data.train_labels)

    # Evaluation (also writes out predictions)
    print "### Train evaluation"
    data.train_preds = tagger.evaluate_data(data.train_sents, data.train_labels)
    write_preds("%s/twitter_train.%s.pred" % (base_path_predictions, model),
                data.train_sents,
                data.train_labels,
                data.train_preds)

    print "### Dev evaluation"
    data.dev_preds = tagger.evaluate_data(data.dev_sents, data.dev_labels)
    write_preds("%s/twitter_dev.%s.pred" % (base_path_predictions, model),
        data.dev_sents, data.dev_labels, data.dev_preds)

    # Following is commented, only useful once test data is available.
    if use_test:
        print "### Generating Test predictions"
        data.test_preds = tagger.evaluate_data(data.test_sents, data.test_labels, quite=True)
        write_preds("%s/twitter_test.%s.pred" % (base_path_predictions, model),
Exemplo n.º 3
0
    # Do no run, the following function was used to generate the splits
    # file_splitter("data/twitter_train_all.pos", "data/twitter_train.pos", "data/twitter_dev.pos")

    dname = "pos"  # or "ner"
    data = read_twitter(dname)
    # data = synthetic_data()

    import tagger
    tagger = tagger.LogisticRegressionTagger()
    #tagger = tagger.CRFPerceptron()

    # Train the tagger
    tagger.fit_data(data.train_sents, data.train_labels)

    # Evaluation (also writes out predictions)
    print "### Train evaluation"
    data.train_preds = tagger.evaluate_data(data.train_sents,
                                            data.train_labels)
    write_preds("data/twitter_train.%s.pred" % dname, data.train_sents,
                data.train_labels, data.train_preds)
    print "### Dev evaluation"
    data.dev_preds = tagger.evaluate_data(data.dev_sents, data.dev_labels)
    write_preds("data/twitter_dev.%s.pred" % dname, data.dev_sents,
                data.dev_labels, data.dev_preds)

    # Following is commented, only useful once test data is available.
    # print "### Test evaluation"
    # data.test_preds = tagger.evaluate_data(data.test_sents, data.test_labels)
    # write_preds("data/twitter_test.%s.pred" % dname,
    #     data.test_sents, data.test_labels, data.test_preds)
Exemplo n.º 4
0
                        default=4,
                        help="batch size")
    try:
        args = parser.parse_args()
    except IOError as msg:
        parser.error(str(msg))

    # print os.getcwd() #
    data = read_twitter(trainfile=args.train, evalfiles=args.eval)

    import tagger
    if args.tagger == "logreg":
        tagger = tagger.LogisticRegressionTagger()
    elif args.tagger == "crf":
        tagger = tagger.CRFPerceptron(args.epochs, args.batch_size)
    else:
        sys.stderr.write("Did not properly select tagger!")
        sys.exit(1)

    # Train the tagger
    tagger.fit_data(data.train_sents, data.train_labels)

    # Evaluation (also writes out predictions)
    for evalstr, evalset in zip(args.eval, data.eval):
        evaloutfile = "{}/{}.pred".format(args.outdir,
                                          os.path.basename(evalstr))
        print("### evaluation of {}; writing to {}".format(
            evalstr, evaloutfile))
        preds = tagger.evaluate_data(evalset["sents"], evalset["labels"])
        write_preds(evaloutfile, evalset["sents"], evalset["labels"], preds)