def run_sweep_gcp(self, run_method, params, s3_log_name=None, add_date_to_logname=True, region='us-west1-a', instance_type='n1-standard-4', repeat=1, args=None, extra_mounts=None): if extra_mounts is None: extra_mounts = [] if s3_log_name is None: s3_log_name = 'unnamed_experiment' if add_date_to_logname: datestamp = datetime.now().strftime('%Y_%m_%d') s3_log_name = '%s_%s' % (datestamp, s3_log_name) mode_ec2 = doodad.mode.GCPDocker( image=self.image, zone=region, gcp_bucket_name=self.gcp_bucket_name, instance_type=instance_type, gcp_log_prefix=s3_log_name, image_name=self.gcp_image, image_project=self.gcp_project, ) run_sweep_doodad(run_method, params, run_mode=mode_ec2, python_cmd=self.python_cmd, mounts=self.mounts + [self.mount_out_gcp] + extra_mounts, repeat=repeat, args=args)
def run_sweep_ec2(self, run_method, params, bucket_name, s3_log_name=None, add_date_to_logname=True, region='us-east-2', instance_type='c4.xlarge', repeat=1): if s3_log_name is None: s3_log_name = 'unnamed_experiment' if add_date_to_logname: datestamp = datetime.now().strftime('%Y_%m_%d') s3_log_name = '%s_%s' % (datestamp, s3_log_name) mode_ec2 = doodad.mode.EC2AutoconfigDocker( image=self.image, region=region, s3_bucket=bucket_name, instance_type=instance_type, spot_price=INSTANCE_TO_PRICE[instance_type], s3_log_prefix=s3_log_name, ) run_sweep_doodad(run_method, params, run_mode=mode_ec2, mounts=self.mounts + [self.mount_out_s3], repeat=repeat)
def run_test_docker(self, run_method, params, args=None, extra_mounts=None): if extra_mounts is None: extra_mounts = [] run_sweep_doodad(run_method, params, run_mode=self.mode_local, python_cmd=self.python_cmd, mounts=self.mounts+[self.mount_out_local]+extra_mounts, test_one=True, args=args)
def run_test_docker(self, run_method, params, **kwargs): run_sweep_doodad( run_method, params, run_mode=self.mode_local, mounts=self.mounts + [self.mount_out_local], test_one=True, )
def run_sweep(run_experiment, sweep_params, exp_name, parser, instance_type='c4.xlarge'): parser.add_argument('--mode', type=str, default='local', help='Mode for running the experiments - local: runs on local machine, ' 'ec2: runs on AWS ec2 cluster (requires a proper configuration file)') parser.add_argument('--num_gpu', '-g', type=int, default=1, help='Number of GPUs to use for running the experiments') parser.add_argument('--exps_per_gpu', '-e', type=int, default=1, help='Number of experiments per GPU simultaneously') parser.add_argument('--num_cpu', '-c', type=int, default=multiprocessing.cpu_count(), help='Number of threads to use for running experiments') args = parser.parse_args(sys.argv[1:]) local_mount = mount.MountLocal(local_dir=config.BASE_DIR, pythonpath=True) docker_mount_point = os.path.join(config.DOCKER_MOUNT_DIR, exp_name) sweeper = launcher.DoodadSweeper([local_mount], docker_img=config.DOCKER_IMAGE, docker_output_dir=docker_mount_point, local_output_dir=os.path.join(config.DATA_DIR, 'local', exp_name)) if args.mode == 'ec2': sweeper.mount_out_s3 = mount.MountS3(s3_path='', mount_point=docker_mount_point, output=True) if args.mode == 'ec2': print("\n" + "**********" * 10 + "\nexp_prefix: {}\nvariants: {}".format(exp_name, len( list(itertools.product(*[value for value in sweep_params.values()]))))) if query_yes_no("Continue?"): sweeper.run_sweep_ec2(run_experiment, sweep_params, bucket_name=config.S3_BUCKET_NAME, instance_type=instance_type, region='us-west-2', s3_log_name=exp_name, add_date_to_logname=False) elif args.mode == 'local_docker': mode_docker = dd.mode.LocalDocker( image=sweeper.image, ) run_sweep_doodad(run_experiment, sweep_params, run_mode=mode_docker, mounts=sweeper.mounts) elif args.mode == 'local': sweeper.run_sweep_serial(run_experiment, sweep_params) elif args.mode == 'local_par': sweeper.run_sweep_parallel(run_experiment, sweep_params) elif args.mode == 'multi_gpu': run_sweep_multi_gpu(run_experiment, sweep_params, num_gpu=args.num_gpu, exps_per_gpu=args.exps_per_gpu) elif args.mode == 'local_singularity': mode_singularity = dd.mode.LocalSingularity( image='~/maml_zoo.simg') run_sweep_doodad(run_experiment, sweep_params, run_mode=mode_singularity, mounts=sweeper.mounts) else: raise NotImplementedError
def launch(method, params, mode='ec2', data_dependencies=dict(), repeat=1, instance_type='c4.xlarge'): params['output_dir'] = [REMOTE_OUTPUT_DIR] params['data_dir'] = [REMOTE_DATA_DIR] if mode == 'local': doodad_mode = pd.mode.Local() params['output_dir'] = [LOCAL_OUTPUT_DIR] elif mode == 'docker': doodad_mode = pd.mode.LocalDocker(image='dibyaghosh/gcsl:0.1') elif mode == 'ec2': assert instance_type in instance_types doodad_mode = pd.mode.EC2AutoconfigDocker( image='dibyaghosh/gcsl:0.1', region='us-west-1', # EC2 region s3_log_prefix='gcsl', # Folder to store log files under s3_log_name='gcsl', terminate=True, # Whether to terminate on finishing job **instance_types[instance_type]) data_mounts = [ mount.MountLocal(local_dir=osp.realpath(directory), mount_point=osp.join(REMOTE_DATA_DIR, remote_name)) for remote_name, directory in data_dependencies.items() ] if mode == 'local': output_mounts = [] elif mode == 'docker' or mode == 'ssh': output_dir = osp.join(LOCAL_OUTPUT_DIR, 'docker/') output_mounts = [ mount.MountLocal(local_dir=output_dir, mount_point=REMOTE_OUTPUT_DIR, output=True) ] elif mode == 'ec2': output_mounts = [ mount.MountS3(s3_path='data', mount_point=REMOTE_OUTPUT_DIR, output=True) ] mounts = code_mounts + data_mounts + output_mounts hyper_sweep.run_sweep_doodad(method, params, doodad_mode, mounts, repeat=repeat)
def launch_experiment( exp_name, variant, sweep_values=None, num_seeds=1, get_confirmation=True, # arguments specifying where the code to run the experiment is experiment_class=None, get_config=None, get_algorithm=None, get_offline_algorithm=None, load_config=None, # misc arguments instance_type='c4.2xlarge', use_gpu=False, include_date=True, ): parser = argparse.ArgumentParser() parser.add_argument( '--mode', type=str, default='local', help='Mode for running the experiments - local: runs on local machine, ' 'ec2: runs on AWS ec2 cluster (requires a proper configuration file)') parser.add_argument( '--gpu_id', '-id', type=int, default=0, help='GPU id for running experiments (if using single GPU)') parser.add_argument( '--num_gpu', '-g', type=int, default=3, help='Number of GPUs to use for running the experiments') parser.add_argument('--exps_per_gpu', '-e', type=int, default=1, help='Number of experiments per GPU simultaneously') parser.add_argument( '--num_cpu', '-c', type=int, default=multiprocessing.cpu_count(), help='Number of threads to use for running experiments') parser.add_argument('--log_to_wandb', '-w', type=bool, default=False, help='Whether or not to log to Weights and Biases') args = parser.parse_args(sys.argv[1:]) """ Generating experiment from specified functions: If the user specifies experiment_class, it is assumed that if get_algorithm and/or get_offline_algorithm are specified, then they are located there. This is mostly just for backwards compatibility. Otherwise, get_algorithm and get_offline_algorithm should be fed into launch_experiment, which is generally more modular than specifying the class. get_config must be specified, either in experiment_class or in the method call. load_config is called after the initialization of the config dict, so it can modify any values of the dict in place as needed, and must be fed directly. """ experiment_config = dict() if experiment_class is not None: experiment_config['get_config'] = experiment_class.get_config if hasattr(experiment_class, 'get_algorithm'): experiment_config['get_algorithm'] = experiment_class.get_algorithm if hasattr(experiment_class, 'get_offline_algorithm'): experiment_config['get_offline_algorithm'] = \ experiment_class.get_offline_algorithm if get_config is not None: experiment_config['get_config'] = get_config if get_algorithm is not None: experiment_config['get_algorithm'] = get_algorithm if get_offline_algorithm is not None: experiment_config['get_offline_algorithm'] = get_offline_algorithm if load_config is not None: experiment_config['load_config'] = load_config if sweep_values is None: variants = [variant] else: variants = generate_variants(variant, sweep_values, num_seeds=num_seeds) """ Setup in the form to feed into the doodad sweeper. """ if include_date: timestamp = datetime.now().strftime('%m-%d') exp_name = '%s-%s' % (timestamp, exp_name) gpu_id = args.gpu_id log_to_wandb = args.log_to_wandb sweep_params = dict( experiment_config=[experiment_config], exp_prefix=[exp_name], variant=variants, gpu_kwargs=[{ 'mode': use_gpu if args.mode != 'ec2' else False, # don't use GPU with EC2 'gpu_id': gpu_id }], log_to_wandb=[log_to_wandb], ) """ Confirmation """ print('\n') print('=' * 50) print('Launching experiment: %s' % exp_name) print('num variants: %d, num seeds: %d' % (len(variants) // num_seeds, num_seeds)) print('About to launch %d total experiments' % (len(variants))) print('=' * 50) for k in sweep_values: print('%s:' % k, sweep_values[k]) print('=' * 50) print('\n') if get_confirmation and not query_yes_no('Confirm?'): return """ Standard run_sweep """ local_mount = mount.MountLocal(local_dir=config.BASE_DIR, pythonpath=True) docker_mount_point = os.path.join(config.DOCKER_MOUNT_DIR, exp_name) sweeper = launcher.DoodadSweeper([local_mount], docker_img=config.DOCKER_IMAGE, docker_output_dir=docker_mount_point, local_output_dir=os.path.join( config.DATA_DIR, 'local', exp_name)) # it's annoying to have to set up s3 if we don't want to use it # TODO: if you want to use S3, uncomment this sweeper.mount_out_s3 = None # mount.MountS3(s3_path='', mount_point=docker_mount_point, output=True) if args.mode == 'ec2': print("\n" + "**********" * 10 + "\nexp_prefix: {}\nvariants: {}".format( exp_name, len( list( itertools.product( *[value for value in sweep_params.values()]))))) if query_yes_no("Continue?"): sweeper.run_sweep_ec2(run_experiment, sweep_params, bucket_name=config.S3_BUCKET_NAME, instance_type=instance_type, region='us-east-2', s3_log_name=exp_name, add_date_to_logname=False) elif args.mode == 'local_docker': mode_docker = dd.mode.LocalDocker(image=sweeper.image, ) run_sweep_doodad(run_experiment, sweep_params, run_mode=mode_docker, mounts=sweeper.mounts) elif args.mode == 'local': sweeper.run_sweep_serial(run_experiment, sweep_params) elif args.mode == 'local_par': sweeper.run_sweep_parallel(run_experiment, sweep_params) elif args.mode == 'multi_gpu': run_sweep_multi_gpu(run_experiment, sweep_params, num_gpu=args.num_gpu, exps_per_gpu=args.exps_per_gpu) else: raise NotImplementedError('experiment run mode not recognized')
print(config.BASE_DIR) local_mount = mount.MountLocal(local_dir=config.BASE_DIR, pythonpath=True) docker_mount_point = os.path.join(config.DOCKER_MOUNT_DIR, EXP_NAME) sweeper = launcher.DoodadSweeper([local_mount], docker_img=config.DOCKER_IMAGE, docker_output_dir=docker_mount_point, local_output_dir=os.path.join( config.DATA_DIR, 'local', EXP_NAME)) sweeper.mount_out_s3 = mount.MountS3(s3_path='', mount_point=docker_mount_point, output=True) if args.mode == 'ec2': if query_yes_no("Continue?"): sweeper.run_sweep_ec2(run_experiment, {'alg': [0]}, bucket_name=config.S3_BUCKET_NAME, instance_type='c4.xlarge', region='us-west-1', s3_log_name=EXP_NAME, add_date_to_logname=True) elif args.mode == 'local_docker': mode_docker = dd.mode.LocalDocker(image=sweeper.image, ) run_sweep_doodad(run_experiment, {'alg': [0]}, run_mode=mode_docker, mounts=sweeper.mounts) else: run_experiment()
docker_mount_point = os.path.join(config.DOCKER_MOUNT_DIR, EXP_NAME) sweeper = launcher.DoodadSweeper([local_mount], docker_img=config.DOCKER_IMAGE, docker_output_dir=docker_mount_point, local_output_dir=os.path.join( config.DATA_DIR, 'local', EXP_NAME)) sweeper.mount_out_s3 = mount.MountS3(s3_path='', mount_point=docker_mount_point, output=True) if args.mode == 'ec2': if query_yes_no("Continue?"): sweeper.run_sweep_ec2(run_experiment, { 'empowerment': [0.001], 'exp_title': [''], 'seed': [1] }, bucket_name=config.S3_BUCKET_NAME, instance_type='c4.2xlarge', region='us-west-1', s3_log_name=EXP_NAME, add_date_to_logname=True) elif args.mode == 'local_docker': mode_docker = dd.mode.LocalDocker(image=sweeper.image, ) run_sweep_doodad(run_experiment, {'empowerment': [100.0]}, run_mode=mode_docker, mounts=sweeper.mounts) else: run_experiment(empowerment=args.empowerment, exp_title=args.exp_title + '_' + str(args.seed))