def build_dictionary(args: Args, task: str) -> Dictionary: dictionary_path = os.path.join(args.output_path, f'{task}_dictionary.pkl') if os.path.exists(dictionary_path): print(f'{dictionary_path} exists, load cache') return Dictionary.load_from_file(dictionary_path) dictionary = Dictionary() def build(split: str): df = load_csv_from_dataset(task, split) for question in df['question']: dictionary.tokenize( question, add_word=True, extra_dict=glove.stoi if split == 'Test' else None) if task in MULTIPLE_CHOICE_TASKS: for answer_key in ['a1', 'a2', 'a3', 'a4', 'a5']: for answer in df[answer_key]: dictionary.tokenize( answer, add_word=True, extra_dict=glove.stoi if split == 'Test' else None) build('Train') build('Test') dictionary.dump_to_file(dictionary_path) return dictionary
if __name__ == "__main__": set_default_logger(args.experiment_path, debug=args.debug) # config = ConfigFactory.parse_file(args.config) fix_seed(config) pprint(config) TASK = config.get_string('task') best_meters = dict() if TASK == 'youtube2text': youtube2text_dictionary = Dictionary.load_from_file( os.path.join(config.get_string('cache_path'), 'youtube2text_dictionary.pkl')) youtube2text_qtype_dict = dict() for qtype in ['what', 'who']: qtype_id = youtube2text_dictionary.word2idx[qtype] youtube2text_qtype_dict[qtype_id] = qtype if args.experiment_path is not None: writer = SummaryWriter(log_dir=args.experiment_path) else: # writer: SummaryWriter = FakeObj() raise Exception('No exp path for tensorboard') main() writer.close()