def make(dataset): claim_map = dev_claim_map if dataset == 'dev' else test_claim_map df = data.load(dataset) file_name = '%s-adv-full.txt' % dataset new_claims = [claim_map[c] for c in list(df.claim)] new_labels = [not l for l in df.correctLabelW0orW1] adv = df.copy() adv.claim = new_claims adv.correctLabelW0orW1 = new_labels adv.to_csv('data/arct/%s' % file_name, sep='\t', index=False)
def test(self, args): data_points = data.load('test-adv') return self.get_data_loader(data_points, args)
def dev(self, args): data_points = data.load('dev-adv') return self.get_data_loader(data_points, args)
def train(self, args): data_points = data.load('train-adv') self.n_training_points = len(data_points) return self.get_data_loader(data_points, args)
def test(self, args): return self.get_data_loader(data.load('test'), args)
def dev(self, args): return self.get_data_loader(data.load('dev'), args)
from util import text def flatten(list_of_lists): return [x for sublist in list_of_lists for x in sublist] if __name__ == '__main__': print('Building ARCT vocab...') # grab all sents from all data subsets datasets = ['train', 'dev', 'test'] sent_cols = ['claim', 'reason', 'warrant0', 'warrant1'] sents = [] for dataset in datasets: df = data.load(dataset) for _, row in df.iterrows(): for col in sent_cols: sents.append(row[col]) # tokenize tokens = set(flatten([text.tokenize(s) for s in sents])) # build the vocab dictionary vocab = dict(zip(tokens, range(len(tokens)))) rev_vocab = {v: k for k, v in vocab.items()} # save the vocab dictionary vocab_path = os.path.join(glovar.ARCT_DIR, 'vocab.json') rev_vocab_path = os.path.join(glovar.ARCT_DIR, 'rev_vocab.json') with open(vocab_path, 'w') as f:
def merge(dataset): dfo = data.load(dataset) dfa = data.load('%s-adv' % dataset) dfm = pd.concat([dfo, dfa]) dfm.to_csv('data/arct/%s.csv' % dataset, sep='\t', index=False)
def test(self, args): df = data.load('test-original') self.n_train_examples = len(df) examples = self.create_examples(df) return self.get_data_loader(examples, args)
def train(self, args): df = data.load('train-original') self.n_training_points = len(df) examples = self.create_examples(df) return self.get_data_loader(examples, args)
def test(self, args): df = data.load('test-adv-negated') self.n_training_points = len(df) examples = self.create_examples(df) return self.get_data_loader(examples, args)
def dev(self, args): df = data.load('dev-adv-swapped') self.n_training_points = len(df) examples = self.create_examples(df) return self.get_data_loader(examples, args)
def dev(self, args): df = data.load('dev') self.n_train_examples = len(df) examples = self.create_examples(df) return self.get_data_loader(examples, args)
def test_adv(self, args): return self.get_data_loader(data.load('test-adv'), args)