示例#1
0
    logger.info("Loading DB {0}".format(args.db))
    db = FeverDocDB(args.db)

    mname = args.model
    logger.info("Model name is {0}".format(mname))

    ffns = []

    if args.sentence:
        logger.info("Model is Sentence level")
        ffns.append(SentenceLevelTermFrequencyFeatureFunction(db, naming=mname))
    else:
        logger.info("Model is Document level")
        ffns.append(TermFrequencyFeatureFunction(db,naming=mname))

    f = Features(mname,ffns)
    jlr = JSONLineReader()

    formatter = FEVERGoldFormatter(None, FEVERLabelSchema(),filtering=args.filtering)

    train_ds = DataSet(file=args.train, reader=jlr, formatter=formatter)
    dev_ds = DataSet(file=args.dev, reader=jlr, formatter=formatter)

    train_ds.read()
    dev_ds.read()

    test_ds = None
    if args.test is not None:
        test_ds = DataSet(file=args.test, reader=jlr, formatter=formatter)
        test_ds.read()
示例#2
0
                          name="davidson_train")
    davidson_tr.read()

    davidson_dv = DataSet(os.path.join("data", "davidson.dv.csv"),
                          reader=csvreader,
                          formatter=df,
                          name="davidson_dev")
    davidson_dv.read()

    davidson_te = DataSet(os.path.join("data", "davidson.te.csv"),
                          reader=csvreader,
                          formatter=df,
                          name="davidson_test")
    davidson_te.read()

    features = Features(get_feature_functions(mname))
    primary_train_fs, aux_train_fs, dev_fs, test_fs_primary, test_fs_aux = features.load(
        waseem_tr_composite, davidson_tr, waseem_de_composite,
        waseem_te_composite, davidson_te)

    print("Number of features in primary: {0}".format(
        primary_train_fs[0].shape[1]))
    print("Number of features aux (=): {0}".format(aux_train_fs[0].shape[1]))

    model = MTMLP(primary_train_fs[0].shape[1], get_model_shape(), 3, 3)

    if gpu():
        model.cuda()

    if model_exists(mname) and os.getenv("TRAIN").lower() not in [
            "y", "1", "t", "yes"
示例#3
0
    def __init__(self):
        super().__init__(["related", "unrelated"])


if __name__ == "__main__":
    SimpleRandom.set_seeds()

    maxdoc = sys.argv[1]
    ns_docsize = sys.argv[2]

    db = FeverDocDB("data/fever/fever.db")
    idx = set(db.get_doc_ids())

    mname = "2way-p{0}-{1}".format(maxdoc, ns_docsize)

    f = Features([SentenceTermFrequencyFeatureFunction(db, naming=mname)])
    jlr = JSONLineReader()

    formatter = FEVERSentenceFormatter(idx, db, RelatedLabelSchema())

    train_ds = DataSet(
        file="data/fever/train.ns.pages.p{0}.jsonl".format(ns_docsize),
        reader=jlr,
        formatter=formatter)
    dev_ds = DataSet(file="data/fever/dev.pages.p{0}.jsonl".format(maxdoc),
                     reader=jlr,
                     formatter=formatter)
    test_ds = DataSet(file="data/fever/test.pages.p{0}.jsonl".format(maxdoc),
                      reader=jlr,
                      formatter=formatter)
示例#4
0
    def format_line(self, line):
        annotation = self.label_schema.get_id(
            line["Stance"]) if "Stance" in line else None
        return {
            "claim": line["Headline"],
            "evidence": line["Body ID"],
            "label": annotation
        }


if __name__ == "__main__":
    bodies = Bodies("data/fnc-1/train_bodies.csv",
                    "data/fnc-1/competition_test_bodies.csv")

    f = Features([FNCTermFrequencyFeatureFunction(bodies)])
    csvr = CSVReader()
    formatter = FNCFormatter(FNCLabelSchema())

    train_ds = DataSet(file="data/fnc-1/train_stances.csv",
                       reader=csvr,
                       formatter=formatter)
    test_ds = DataSet(file="data/fnc-1/competition_test_stances.csv",
                      reader=csvr,
                      formatter=formatter)

    train_ds.read()
    test_ds.read()

    train_feats, _, test_feats = f.load(train_ds, None, test_ds)
示例#5
0
            "claim": line["Headline"],
            "evidence": line["Body ID"],
            "label": annotation
        }


if __name__ == "__main__":

    db = FeverDocDB("data/fever/drqa.db")
    idx = set(db.get_doc_ids())

    fnc_bodies = Bodies("data/fnc-1/train_bodies.csv",
                        "data/fnc-1/competition_test_bodies.csv")
    fever_bodies = db

    f = Features(
        [FeverOrFNCTermFrequencyFeatureFunction(fever_bodies, fnc_bodies)])
    csvr = CSVReader()
    jlr = JSONLineReader()
    fnc_formatter = FNCFormatter2(FNCSimpleLabelSchema())
    fever_formatter = FEVERPredictionsFormatter(idx, FEVERLabelSchema())

    train_ds = DataSet(file="data/fnc-1/train_stances.csv",
                       reader=csvr,
                       formatter=fnc_formatter)
    dev_ds = DataSet(file="data/fnc-1/competition_test_stances.csv",
                     reader=csvr,
                     formatter=fnc_formatter)
    test_ds = DataSet(file="data/fever/fever.dev.pages.p5.jsonl",
                      reader=jlr,
                      formatter=fever_formatter)