def do_format_to_lines(args): print(time.clock()) data_builder.format_to_lines(args) print(time.clock())
parser.add_argument('-min_tgt_ntokens', default=5, type=int) parser.add_argument('-max_tgt_ntokens', default=500, type=int) parser.add_argument("-lower", type=str2bool, nargs='?', const=True, default=True) parser.add_argument("-use_bert_basic_tokenizer", type=str2bool, nargs='?', const=True, default=False) parser.add_argument('-log_file', default='../../logs/cnndm.log') parser.add_argument('-dataset', default='') parser.add_argument('-n_cpus', default=2, type=int) args = parser.parse_args() init_logger(args.log_file) # eval('data_builder.'+args.mode + '(args)') if args.mode == "tokenize": data_builder.tokenize(args) elif args.mode == "format_to_lines": data_builder.format_to_lines(args) elif args.mode == "format_to_bert": data_builder.format_to_bert(args)