def read_from_csv(self): utils.assert_in_type(self.opt.QUESTION_TYPE) train_path = os.path.join(self.opt.TEXT_DIR, ('Train' + CSV_TYPE[self.opt.QUESTION_TYPE])) test_path = os.path.join(self.opt.TEXT_DIR, ('Test' + CSV_TYPE[self.opt.QUESTION_TYPE])) utils.assert_exits(train_path) utils.assert_exits(test_path) text_train = pd.read_csv(train_path, sep='\t') text_test = pd.read_csv(test_path, sep='\t') text_train = text_train.set_index('vid_id') text_test = text_test.set_index('vid_id') total_path = os.path.join(self.opt.TEXT_DIR, ('Total' + CSV_TYPE[self.opt.QUESTION_TYPE])) total_set = pd.read_csv(total_path, sep='\t') return text_train, text_test, total_set
def __init__(self, question_type, dictionary, mode): super(FeatureDataset, self).__init__() self.opt = config.parse_opt() utils.assert_in_type(question_type) if question_type == 'FrameQA': self.ans2label = pkl.load(open('./data/ans2label.pkl', 'rb')) self.label2ans = pkl.load(open('./data/label2ans.pkl', 'rb')) self.num_ans = len(self.ans2label) self.dictionary = dictionary entry_path = './data/entries_' + str(mode) + '.pkl' print('Load Dataset') self.entries = load_dataset(mode) print('Dataset\'s length is %d' % (len(self.entries))) self.tokenize() self.read_from_h5py() self.tensorize() '''