def run(model_type, task, ori_task=None, gpu="-1"): gpu = str(gpu) task = task.lower() ori_task = ori_task.lower() if ori_task is not None else None os.environ["CUDA_VISIBLE_DEVICES"] = gpu if model_type in ["Glove", "Word2vec", "LSTM"]: print("{} not supported now.".format(model_type)) else: _model_type = model_type.split('-')[0] ori_model_name = "{}[{}]".format( model_type, ori_task) if ori_task is not None else None _model_name_or_path = model_type if ori_task is None else "./model/{}".format( ori_model_name) _task_name = find_source_task(task) _data_dir = "./data/{}".format(task.upper()) _max_seq_length = str(128) _per_gpu_train_batch_size = str(8) _learning_rate = "2e-5" if ori_task is None else "0.0" _num_train_epochs = str(3) new_model_name = "{}[{}]".format( model_type, task) if ori_task is None else "{}[{}]-[{}]".format( model_type, task, ori_task) _output_dir = "./model/{}".format(new_model_name) main([ "--model_type", _model_type, "--model_name_or_path", _model_name_or_path, "--task_name", _task_name, "--do_train", "--do_eval", "--do_lower_case", "--data_dir", _data_dir, "--max_seq_length", _max_seq_length, "--per_gpu_train_batch_size", _per_gpu_train_batch_size, "--learning_rate", _learning_rate, "--num_train_epochs", _num_train_epochs, "--output_dir", _output_dir, "--save_steps", "10000", "--overwrite_output_dir" ]) return new_model_name
def test_run_glue(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) tmp_dir = self.get_auto_remove_tmp_dir() testargs = f""" run_glue.py --model_name_or_path distilbert-base-uncased --output_dir {tmp_dir} --overwrite_output_dir --train_file ./tests/fixtures/tests_samples/MRPC/train.csv --validation_file ./tests/fixtures/tests_samples/MRPC/dev.csv --do_train --do_eval --per_device_train_batch_size=2 --per_device_eval_batch_size=1 --learning_rate=1e-4 --max_steps=10 --warmup_steps=2 --seed=42 --max_seq_length=128 """.split() if is_cuda_and_apex_available(): testargs.append("--fp16") with patch.object(sys, "argv", testargs): run_glue.main() result = get_results(tmp_dir) self.assertGreaterEqual(result["eval_accuracy"], 0.75)
def test_run_glue(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) testargs = """ run_glue.py --model_name_or_path bert-base-uncased --data_dir ./tests/fixtures/tests_samples/MRPC/ --task_name mrpc --do_train --do_eval --output_dir ./tests/fixtures/tests_samples/temp_dir --per_gpu_train_batch_size=2 --per_gpu_eval_batch_size=1 --learning_rate=1e-4 --max_steps=10 --warmup_steps=2 --overwrite_output_dir --seed=42 --max_seq_length=128 """.split() with patch.object(sys, "argv", testargs): result = run_glue.main() del result["loss"] for value in result.values(): self.assertGreaterEqual(value, 0.75)
def test_run_glue(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) testargs = [ "run_glue.py", "--data_dir=./examples/tests_samples/MRPC/", "--task_name=mrpc", "--do_train", "--do_eval", "--output_dir=./examples/tests_samples/temp_dir", "--per_gpu_train_batch_size=2", "--per_gpu_eval_batch_size=1", "--learning_rate=1e-4", "--max_steps=10", "--warmup_steps=2", "--overwrite_output_dir", "--seed=42", ] model_type, model_name = ("--model_type=bert", "--model_name_or_path=bert-base-uncased") with patch.object(sys, "argv", testargs + [model_type, model_name]): result = run_glue.main() for value in result.values(): self.assertGreaterEqual(value, 0.75)
def test_run_glue(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) tmp_dir = self.get_auto_remove_tmp_dir() testargs = f""" run_glue.py --model_name_or_path distilbert-base-uncased --data_dir ./tests/fixtures/tests_samples/MRPC/ --output_dir {tmp_dir} --overwrite_output_dir --task_name mrpc --do_train --do_eval --per_device_train_batch_size=2 --per_device_eval_batch_size=1 --learning_rate=1e-4 --max_steps=10 --warmup_steps=2 --seed=42 --max_seq_length=128 """ output_dir = "./tests/fixtures/tests_samples/temp_dir_{}".format( hash(testargs)) testargs += "--output_dir " + output_dir testargs = testargs.split() if is_cuda_and_apex_available(): testargs.append("--fp16") with patch.object(sys, "argv", testargs): result = run_glue.main() del result["eval_loss"] for value in result.values(): self.assertGreaterEqual(value, 0.75)
def execute(self): flags = copy.deepcopy(_args) cur_process = multiprocessing.current_process() flags.output_dir = os.path.join(flags.output_dir) #flags.output_dir = os.path.join(flags.output_dir, cur_process.name) flags.job_id = self._job_id print("flags.output_dir: ", flags.output_dir) self.update_flags(self._settings, flags) os.environ["CUDA_VISIBLE_DEVICES"] = self._gpu_id # os.environ["TF_CPP_MIN_LOG_LEVEL"] = '3' self.print_user_flags(flags) self.result, self.all_result = main(flags)
parser.add_argument('-output_path', type=str, required=True) parser.add_argument('-alignment_model_path', type=str, required=True) parser.add_argument('-database', type=str, default='None') args = parser.parse_args() aligner = docSum2MRPC_Aligner(data_path=args.data_path, mode=args.mode, log_file=args.log_file, output_file = args.output_path, database=args.database) logging.info(f'output_file_name: {args.output_file}') summary_files = glob.glob(f"{args.data_path}/summaries/*") for sfile in summary_files: print ('Starting with summary {}'.format(sfile)) aligner.read_and_split(args.database, sfile) aligner.scu_span_aligner() aligner.save_predictions() with redirect_argv('python --model_type roberta --model_name_or_path roberta-large-mnli --task_name MRPC --do_eval' f' --calc_final_alignments --weight_decay 0.1 --data_dir {args.output_path}' ' --max_seq_length 128 --per_gpu_train_batch_size 16 --per_gpu_eval_batch_size 16 --learning_rate 2e-6' ' --logging_steps 500 --num_train_epochs 2.0 --evaluate_during_training --overwrite_cache' f' --output_dir {args.alignment_model_path}'): run_glue.main()