name="davidson_train") davidson_tr.read() davidson_dv = DataSet(os.path.join("data", "davidson.dv.csv"), reader=csvreader, formatter=df, name="davidson_dev") davidson_dv.read() davidson_te = DataSet(os.path.join("data", "davidson.te.csv"), reader=csvreader, formatter=df, name="davidson_test") davidson_te.read() features = Features(get_feature_functions(mname)) primary_train_fs, aux_train_fs, dev_fs, test_fs_primary, test_fs_aux = features.load( waseem_tr_composite, davidson_tr, waseem_de_composite, waseem_te_composite, davidson_te) print("Number of features in primary: {0}".format( primary_train_fs[0].shape[1])) print("Number of features aux (=): {0}".format(aux_train_fs[0].shape[1])) model = MTMLP(primary_train_fs[0].shape[1], get_model_shape(), 3, 3) if gpu(): model.cuda() if model_exists(mname) and os.getenv("TRAIN").lower() not in [ "y", "1", "t", "yes"
logger.info("Loading DB {0}".format(args.db)) db = FeverDocDB(args.db) mname = args.model logger.info("Model name is {0}".format(mname)) ffns = [] if args.sentence: logger.info("Model is Sentence level") ffns.append(SentenceLevelTermFrequencyFeatureFunction(db, naming=mname)) else: logger.info("Model is Document level") ffns.append(TermFrequencyFeatureFunction(db,naming=mname)) f = Features(mname,ffns) jlr = JSONLineReader() formatter = FEVERGoldFormatter(None, FEVERLabelSchema(),filtering=args.filtering) train_ds = DataSet(file=args.train, reader=jlr, formatter=formatter) dev_ds = DataSet(file=args.dev, reader=jlr, formatter=formatter) train_ds.read() dev_ds.read() test_ds = None if args.test is not None: test_ds = DataSet(file=args.test, reader=jlr, formatter=formatter) test_ds.read()
def format_line(self, line): annotation = self.label_schema.get_id( line["Stance"]) if "Stance" in line else None return { "claim": line["Headline"], "evidence": line["Body ID"], "label": annotation } if __name__ == "__main__": bodies = Bodies("data/fnc-1/train_bodies.csv", "data/fnc-1/competition_test_bodies.csv") f = Features([FNCTermFrequencyFeatureFunction(bodies)]) csvr = CSVReader() formatter = FNCFormatter(FNCLabelSchema()) train_ds = DataSet(file="data/fnc-1/train_stances.csv", reader=csvr, formatter=formatter) test_ds = DataSet(file="data/fnc-1/competition_test_stances.csv", reader=csvr, formatter=formatter) train_ds.read() test_ds.read() train_feats, _, test_feats = f.load(train_ds, None, test_ds)
dataset.read() waseem_dv_composite.add(dataset) davidson_te = DataSet(os.path.join("data", "davidson.te.csv"), reader=csvreader, formatter=df, name="davidson_test") davidson_te.read() davidson_dv = DataSet(os.path.join("data", "davidson.dv.csv"), reader=csvreader, formatter=df, name="davidson_dev") davidson_dv.read() features = Features(get_feature_functions(mname)) train_fs, dev_fs, test_fs = features.load(waseem_tr_composite, waseem_dv_composite, davidson_te) print("Number of features: {0}".format(train_fs[0].shape[1])) model = MLP(train_fs[0].shape[1], get_model_shape(), 3) if gpu(): model.cuda() if model_exists(mname) and os.getenv("TRAIN").lower() not in [ "y", "1", "t", "yes" ]: model.load_state_dict(torch.load("models/{0}.model".format(mname))) else: train(model,
db = FeverDocDB(args.db) mname = args.model logger.info("Model name is {0}".format(mname)) ffns = [] if args.sentence: logger.info("Model is Sentence level") ffns.append(SentenceLevelTermFrequencyFeatureFunction(db, naming=mname)) else: logger.info("Model is Document level") ffns.append(TermFrequencyFeatureFunction(db, naming=mname)) f = Features(mname, ffns) f.load_vocab(mname) jlr = JSONLineReader() formatter = FEVERGoldFormatter(None, FEVERLabelSchema()) test_ds = DataSet(file=args.test, reader=jlr, formatter=formatter) test_ds.read() feats = f.lookup(test_ds) input_shape = feats[0].shape[1] model = SimpleMLP(input_shape, 100, 3) if gpu(): model.cuda()
print('Db is ', db) mname = args.model logger.info("Model name is {0}".format(mname)) ffns = [] if args.sentence: logger.info("Model is Sentence level") ffns.append(SentenceLevelTermFrequencyFeatureFunction(db, naming=mname)) else: logger.info("Model is Document level") ffns.append(TermFrequencyFeatureFunction(db, naming=mname)) f = Features(mname, ffns) jlr = JSONLineReader() formatter = FEVERGoldFormatter(None, FEVERLabelSchema(), filtering=args.filtering) train_ds = DataSet(file=args.train, reader=jlr, formatter=formatter) dev_ds = DataSet(file=args.dev, reader=jlr, formatter=formatter) train_ds.read() dev_ds.read() test_ds = None if args.test is not None: test_ds = DataSet(file=args.test, reader=jlr, formatter=formatter)
def __init__(self): super().__init__(["related", "unrelated"]) if __name__ == "__main__": SimpleRandom.set_seeds() maxdoc = sys.argv[1] ns_docsize = sys.argv[2] db = FeverDocDB("data/fever/fever.db") idx = set(db.get_doc_ids()) mname = "2way-p{0}-{1}".format(maxdoc, ns_docsize) f = Features([SentenceTermFrequencyFeatureFunction(db, naming=mname)]) jlr = JSONLineReader() formatter = FEVERSentenceFormatter(idx, db, RelatedLabelSchema()) train_ds = DataSet( file="data/fever/train.ns.pages.p{0}.jsonl".format(ns_docsize), reader=jlr, formatter=formatter) dev_ds = DataSet(file="data/fever/dev.pages.p{0}.jsonl".format(maxdoc), reader=jlr, formatter=formatter) test_ds = DataSet(file="data/fever/test.pages.p{0}.jsonl".format(maxdoc), reader=jlr, formatter=formatter)
reader=csvreader, name="davidson_train") davidson_dv_dataset = DataSet(os.path.join("data", "davidson.dv.csv"), formatter=df, reader=csvreader, name="davidson_dev") davidson_te_dataset = DataSet(os.path.join("data", "davidson.te.csv"), formatter=df, reader=csvreader, name="davidson_test") davidson_tr_dataset.read() davidson_dv_dataset.read() davidson_te_dataset.read() features = Features(get_feature_functions(mname)) train_fs, dev_fs, test_fs = features.load(davidson_tr_dataset, davidson_dv_dataset, davidson_te_dataset) print("Number of features: {0}".format(train_fs[0].shape[1])) model = MLP(train_fs[0].shape[1], get_model_shape(), 3) if gpu(): model.cuda() if model_exists(mname) and os.getenv("TRAIN").lower() not in [ "y", "1", "t", "yes" ]: model.load_state_dict(torch.load("models/{0}.model".format(mname))) else:
"claim": line["Headline"], "evidence": line["Body ID"], "label": annotation } if __name__ == "__main__": db = FeverDocDB("data/fever/drqa.db") idx = set(db.get_doc_ids()) fnc_bodies = Bodies("data/fnc-1/train_bodies.csv", "data/fnc-1/competition_test_bodies.csv") fever_bodies = db f = Features( [FeverOrFNCTermFrequencyFeatureFunction(fever_bodies, fnc_bodies)]) csvr = CSVReader() jlr = JSONLineReader() fnc_formatter = FNCFormatter2(FNCSimpleLabelSchema()) fever_formatter = FEVERPredictionsFormatter(idx, FEVERLabelSchema()) train_ds = DataSet(file="data/fnc-1/train_stances.csv", reader=csvr, formatter=fnc_formatter) dev_ds = DataSet(file="data/fnc-1/competition_test_stances.csv", reader=csvr, formatter=fnc_formatter) test_ds = DataSet(file="data/fever/fever.dev.pages.p5.jsonl", reader=jlr, formatter=fever_formatter)