def main(): args = parse_args() seed = args.pop("seed") if seed: log.info(f"Seed for random number generator: {seed}") import random import torch random.seed(seed) torch.manual_seed(seed) work_dir = Path(args.pop('work_dir')) is_big = load_conf(work_dir / 'conf.yml').get('spark', {}) if is_big: log.info("Big experiment mode enabled; checking pyspark backend") try: import pyspark except: log.warning("unable to import pyspark. Please do 'pip install pyspark' and run again") raise from rtg.big.exp import BigTranslationExperiment exp = BigTranslationExperiment(work_dir=work_dir) else: exp = Experiment(work_dir=work_dir) assert exp.has_prepared(), f'Experiment dir {exp.work_dir} is not ready to train. ' \ f'Please run "prep" sub task' exp.train(args)
def test_finetune_pipeline_transformer(): codec_lib = 'nlcodec' tmp_dir = tempfile.mkdtemp() print(f"Testing finetune transformer: {tmp_dir}") config = load_conf('experiments/sample-exp/conf.yml') prep = config['prep'] prep.update( dict(codec_lib=codec_lib, char_coverage=0.9995, finetune_src=prep['train_src'], finetune_tgt=prep['train_tgt'])) exp = Experiment(tmp_dir, config=config, read_only=False) exp.config['trainer'].update( dict(steps=50, check_point=25, finetune_steps=100, batch_size=400, split_ratio=0.1, dynamic_epoch=True)) Pipeline(exp).run() assert exp.train_file.exists() or exp.train_db.exists() assert exp.finetune_file.exists() # TODO: add more assertions print(f"Cleaning up {tmp_dir}") shutil.rmtree(tmp_dir, ignore_errors=True)
def test_robertamt_2layer_init(): tmp_dir = tempfile.mkdtemp() config = load_conf('experiments/pretrained/robertamt-xlmr-2layer.yml') model_id = config['model_args']['model_id'] print(f"Testing {model_id} --> {tmp_dir}") assert 'pretrainmatch' == config['prep'].get('codec_lib') exp = Experiment(tmp_dir, config=config, read_only=False) exp.config['trainer'].update(dict(steps=4, check_point=1)) Pipeline(exp).run(run_tests=False) sanity_check_experiment(exp) print(f"Cleaning up {tmp_dir}") shutil.rmtree(tmp_dir, ignore_errors=True)
def test_pipeline_transformer(): for codec_lib in ['sentpiece', 'nlcodec']: tmp_dir = tempfile.mkdtemp() config = load_conf('experiments/transformer.test.yml') print(f"Testing {codec_lib} --> {tmp_dir}") config['prep'].update({ 'codec_lib': codec_lib, 'char_coverage': 0.9995 }) exp = Experiment(tmp_dir, config=config, read_only=False) exp.config['trainer'].update(dict(steps=50, check_point=25)) exp.config['prep']['num_samples'] = 0 Pipeline(exp).run(run_tests=False) sanity_check_experiment(exp) print(f"Cleaning up {tmp_dir}") shutil.rmtree(tmp_dir, ignore_errors=True)
def test_spark_prep(): tmp_dir = tempfile.mkdtemp() try: print(f"Testing dataprep on pyspark: {tmp_dir}") config = load_conf('experiments/spark-bigdataprep.yml') exp = Experiment(tmp_dir, config=config, read_only=False) exp.config['trainer'].update( dict(steps=50, check_point=25, batch_size=400)) Pipeline(exp).run() assert exp._prepared_flag.exists() assert exp._trained_flag.exists() assert exp.train_file.exists() or exp.train_db.exists() sanity_check_experiment(exp) finally: print(f"Cleaning up {tmp_dir}") shutil.rmtree(tmp_dir, ignore_errors=True)
def main(): args = parse_args() conf_file: Path = args.conf_file if args.conf_file else args.work_dir / 'conf.yml' assert conf_file.exists() ExpFactory = TranslationExperiment is_big = load_conf(conf_file).get('spark', {}) if is_big: log.info("Big experiment mode enabled; checking pyspark backend") try: import pyspark log.info("pyspark is available") except: log.warning("unable to import pyspark. Please do 'pip install pyspark' and run again") raise from rtg.big.exp import BigTranslationExperiment ExpFactory = BigTranslationExperiment exp = ExpFactory(args.exp, config=conf_file, read_only=False) return exp.pre_process()
def test_parent_child_pipeline(): parent_dir = tempfile.mkdtemp() # parent_dir = 'tmp-xyz-parent' print(f"Making parent at {parent_dir}") exp = Experiment(parent_dir, config='experiments/transformer.test.yml', read_only=False) exp.config['trainer'].update(dict(steps=50, check_point=25)) Pipeline(exp).run(run_tests=False) sanity_check_experiment(exp) assert not exp.parent_model_state.exists() child_config = load_conf('experiments/transformer.test.yml') child_config.update({ 'parent': { 'experiment': str(parent_dir), 'vocab': { 'shared': 'shared' }, 'model': { 'ensemble': 2 } } }) child_dir = tempfile.mkdtemp() # child_dir = 'tmp-xyz-child' print(f"Making child at {child_dir}") exp = Experiment(child_dir, config=child_config, read_only=False) exp.config['trainer'].update(dict(steps=50, check_point=25)) Pipeline(exp).run(run_tests=False) sanity_check_experiment(exp) assert exp.parent_model_state.exists() for dir in [parent_dir, child_dir]: print(f"Cleaning up {dir}") shutil.rmtree(dir, ignore_errors=True)
def parse_args(): parser = argparse.ArgumentParser(prog="rtg-pipe", description="RTG Pipeline CLI") parser.add_argument("exp", metavar='EXP_DIR', help="Working directory of experiment", type=Path) parser.add_argument( "conf", metavar='conf.yml', type=Path, nargs='?', help="Config File. By default <work_dir>/conf.yml is used") parser.add_argument("-G", "--gpu-only", action="store_true", default=False, help="Crash if no GPU is available") parser.add_argument("-fp16", "--fp16", action="store_true", default=False, help="Float 16") # multi-gpu / multi-node parser.add_argument("--local_rank", "--local-rank", type=int, default=-1, help="Multi-GPU - Local rank") parser.add_argument("--master-port", type=int, default=-1, help="Master port (for multi-node SLURM jobs)") dtorch.setup() args = parser.parse_args() if args.fp16: assert torch.cuda.is_available(), "GPU required for fp16... exiting." dtorch.enable_fp16() if args.gpu_only: assert torch.cuda.is_available(), "No GPU found... exiting" if torch.cuda.is_available(): for i in range(torch.cuda.device_count()): log.info(f'Cuda {i}: {torch.cuda.get_device_properties(i)}') conf_file: Path = args.conf if args.conf else args.exp / 'conf.yml' assert conf_file.exists(), f'NOT FOUND: {conf_file}' ExpFactory = Experiment is_big = load_conf(conf_file).get('spark', {}) if is_big: log.info("Big experiment mode enabled; checking pyspark backend") try: import pyspark log.info("pyspark is available") except: log.warning( "unable to import pyspark. Please do 'pip install pyspark' and run again" ) raise from rtg.big.exp import BigTranslationExperiment ExpFactory = BigTranslationExperiment read_only = not dtorch.is_global_main # only main can modify experiment exp = ExpFactory(args.exp, config=conf_file, read_only=read_only) dtorch.barrier() return exp