Exemplo n.º 1
0
def main(args):
    tensorizer = init_tenzorizer(args.encoder_model_type, args)

    # disable auto-padding to save disk space of serialized files
    tensorizer.set_pad_to_max(False)

    convert_retriever_results(args.is_train_set, args.retriever_results,
                              args.out_file, args.gold_passages_src,
                              tensorizer, args.num_workers)
Exemplo n.º 2
0
 def _run_preprocessing(tensorizer: Tensorizer):
     # temporarily disable auto-padding to save disk space usage of serialized files
     tensorizer.set_pad_to_max(False)
     serialized_files = convert_retriever_results(is_train, data_files[0], out_file_prefix,
                                                  gold_passages_src,
                                                  self.tensorizer,
                                                  num_workers=self.args.num_workers)
     tensorizer.set_pad_to_max(True)
     return serialized_files