def run_sweep_doodad(run_method, params, run_mode, mounts, repeat=1, test_one=False, args=None, python_cmd='python'): if args is None: args = {} sweeper = Sweeper(params, repeat) for config in sweeper: def run_method_args(): run_method(**config) args['run_method'] = run_method_args doodad.launch_python( target=os.path.join(SCRIPTS_DIR, 'run_experiment_lite_doodad.py'), mode=run_mode, mount_points=mounts, use_cloudpickle=True, python_cmd=python_cmd, args=args, ) if test_one: break
def run_single_doodad(run_method, kwargs, run_mode, mounts, repeat=1): """ Run a single function via doodad """ sweeper = Sweeper(params, repeat) def run_method_args(): run_method(**kwargs) doodad.launch_python( target = os.path.join(SCRIPTS_DIR, 'run_experiment_lite_doodad.py'), mode=run_mode, mount_points=mounts, use_cloudpickle=True, args = {'run_method': run_method_args}, )
def run_sweep_doodad(run_method, params, run_mode, mounts, repeat=1, test_one=False): sweeper = Sweeper(params, repeat) for config in sweeper: def run_method_args(): run_method(**config) doodad.launch_python( target = os.path.join(SCRIPTS_DIR, 'run_experiment_lite_doodad.py'), mode=run_mode, mount_points=mounts, use_cloudpickle=True, args = {'run_method': run_method_args}, ) if test_one: break
def run_single_doodad(run_method, kwargs, run_mode, mounts, repeat=1, args=None, python_cmd='python'): """ Run a single function via doodad """ if args is None: args = {} def run_method_args(): run_method(**kwargs) args['run_method'] = run_method_args doodad.launch_python( target=os.path.join(SCRIPTS_DIR, 'run_experiment_lite_doodad.py'), mode=run_mode, mount_points=mounts, use_cloudpickle=True, python_cmd=python_cmd, args=args, )
mode_docker = dd.mode.LocalDocker(image="python:3.5", ) # or this! Run experiment via docker on another machine through SSH mode_ssh = dd.mode.SSHDocker( image="python:3.5", credentials=ssh.SSHCredentials( hostname="my.machine.name", username="******", identity_file="~/.ssh/id_rsa", ), ) MY_RUN_MODE = mode_docker # CHANGE THIS # Set up code and output directories mounts = [ mount.MountLocal(local_dir=REPO_DIR, pythonpath=True), # Code ] THIS_FILE_DIR = os.path.realpath(os.path.dirname(__file__)) dd.launch_python( target=os.path.join( THIS_FILE_DIR, "app_main.py" ), # point to a target script. If running remotely, this will be copied over mode=MY_RUN_MODE, mount_points=mounts, args={ "arg1": 50, }, )
] if MY_RUN_MODE == mode_ec2: output_mount = mount.MountS3(s3_path='outputs', mount_point=OUTPUT_DIR, output=True, sync_interval=900) # use this for ec2 else: output_mount = mount.MountLocal(local_dir=os.path.join( EXAMPLES_DIR, 'tmp_output'), mount_point=OUTPUT_DIR, output=True) mounts.append(output_mount) # print(mounts) THIS_FILE_DIR = os.path.realpath(os.path.dirname(__file__)) dd.launch_python( # target=os.path.join(THIS_FILE_DIR, 'app_main.py'), # point to a target script. If running remotely, this will be copied over target=os.path.join(REPO_DIR, 'noreward-rl-private/src/train.py'), mode=MY_RUN_MODE, mount_points=mounts, args={ 'num-workers': 17, 'unsup': 'action', 'log-dir': OUTPUT_DIR.replace("richard", "ubuntu"), 'env-id': "MonsterKongTrain-v0" }, verbose=True, )
Launch an experiment on local machine with docker """ import sys import doodad as dd import doodad.ssh as ssh import doodad.mount as mount mode_local = dd.mode.LocalDocker(image='rakelly/rlkit:latest') # Set up code and output directories OUTPUT_DIR = 'output' # doodad will prepend `/mounts` to this, set config in code to output to this path mounts = [ mount.MountLocal(local_dir='~/rlkit', pythonpath=True), # point to your code mount.MountLocal(local_dir='~/.mujoco', mount_point='/root/.mujoco'), # point to your mujoco mount.MountLocal(local_dir='~/rlkit/output', mount_point=OUTPUT_DIR, output=True), ] call = sys.argv[1] + ' 1' # assume script has arg for docker mode dd.launch_python( target=call, # call target script (absolute path) mode=mode_local, mount_points=mounts, verbose=True, )
OUTPUT_DIR = '/mount/outputs' # this is the directory visible to the target input_mounts = [ # mount.MountLocal(local_dir=REPO_DIR, pythonpath=True), mount.MountLocal(local_dir='~/install/rllab', pythonpath=True), # rllab # mount.MountLocal(local_dir='~/install/gym/.mujoco', mount_point='/root/.mujoco'), # mujoco # mount.MountLocal(local_dir='~/code/doodad', pythonpath=True), # rllab ] output_mounts = [ mount.MountLocal(local_dir='~/data/vitchyr', mount_point=OUTPUT_DIR, read_only=False), # mujoco mount.MountS3( s3_path="test", s3_bucket="2-12-2017.railrl.vitchyr.rail.bucket", mount_point=OUTPUT_DIR, output=True, ) ] mounts = input_mounts + output_mounts script_path = os.path.join(THIS_FILE_DIR, 'test_script.py') print(script_path) pd.launch_python( target=script_path, # script. If running remotely, this will be copied over #target='/media/sg2/scripts/swimmer_data.py', mode=mode_ec2, # mode=mode_ssh, # mode=mode_local, mount_points=mounts, args={'output_dir': OUTPUT_DIR})
def run_experiment( method_call, mode='local', exp_name='default', variant=None, prepend_date_to_exp_name=True, use_gpu=False, gpu_id=0, base_log_dir=None, local_input_dir_to_mount_point_dict=None, # TODO(vitchyr): test this # local settings skip_wait=False, # ec2 settings sync_interval=180, region='us-east-1', instance_type=None, spot_price=None, verbose=False, num_exps_per_instance=1, docker_image=None, # sss settings time_in_mins=None, slurm_config_name=None, # ssh settings ssh_host=None, # gcp gcp_kwargs=None, s3_log_prefix=None, s3_log_name="", ): """ Usage: ``` def foo(doodad_config, variant): x = variant['x'] y = variant['y'] print("sum", x+y) with open(doodad_config.base_log_dir, "w") as f: f.write('sum = %f' % x + y) variant = { 'x': 4, 'y': 3, } run_experiment(foo, variant, exp_name='my-experiment', mode='ec2') ``` For outputs to be saved properly, make sure you write to the directory in `doodad_config.base_log_dir`. Do NOT output to `easy_launch.config.LOCAL_LOG_DIR` or any other directory in config. This ensures that when you run code on GCP or AWS, it'll save to the proper location and get synced accordingly. Within the corresponding output mount, the outputs are saved to `base_log_dir/<date>-my-experiment/<date>-my-experiment-<unique-id>` For local experiment, the base_log_dir is determined by `config.LOCAL_LOG_DIR/`. For GCP or AWS, base_log_dir will be synced to some bucket. :param method_call: a function that takes in a dictionary as argument :param mode: A string: - 'local' - 'local_docker' - 'ec2' - 'here_no_doodad': Run without doodad call - 'ssh' - 'gcp' - 'local_singularity': run locally with singularity - 'htp': generate a taskfile and script for using BRC's high-throughput script - 'slurm_singularity': submit a slurm job using singularity - 'sss': generate a script to run on some slurm job using singularity :param exp_name: name of experiment :param variant: Dictionary :param prepend_date_to_exp_name: If False, do not prepend the date to the experiment directory. :param use_gpu: :param sync_interval: How often to sync s3 data (in seconds). :param local_input_dir_to_mount_point_dict: Dictionary for doodad. :param ssh_host: the name of the host you want to ssh onto, should correspond to an entry in config.py of the following form: SSH_HOSTS=dict( ssh_host=dict( username='******', hostname='hostname/ip address', ) ) - if ssh_host is set to None, you will use ssh_host specified by config.SSH_DEFAULT_HOST :return: """ global _global_target_mount global _global_is_first_launch global _global_n_tasks_total """ Sanitize inputs as needed """ variant = sanitize_variant(variant) base_log_dir = sanitize_base_log_dir(base_log_dir, mode) base_exp_name = exp_name if prepend_date_to_exp_name: exp_name = time.strftime("%y-%m-%d") + "-" + exp_name git_infos = generate_git_infos() doodad_config = DoodadConfig( exp_name=exp_name, base_log_dir=base_log_dir, use_gpu=use_gpu, gpu_id=gpu_id, git_infos=git_infos, script_name=' '.join(sys.argv), extra_launch_info=dict( base_exp_name=base_exp_name, instance_type=str(instance_type), ), ) if mode == 'here_no_doodad': return method_call(doodad_config, variant) """ Safety Checks """ if mode == 'ec2' or mode == 'gcp': # if _global_is_first_launch and not query_yes_no( # "{} costs money. Are you sure you want to run?".format(mode) # ): # sys.exit(1) if _global_is_first_launch and use_gpu: if not query_yes_no( "{} is more expensive with GPUs. Confirm?".format(mode)): sys.exit(1) """ GPU vs normal configs """ if use_gpu: docker_image = docker_image or config.GPU_DOODAD_DOCKER_IMAGE if instance_type is None: instance_type = config.GPU_INSTANCE_TYPE else: assert instance_type[0] in {'g', 'p'} if spot_price is None: spot_price = config.GPU_SPOT_PRICE doodad_config.extra_launch_info['docker_image'] = docker_image else: docker_image = docker_image or config.DOODAD_DOCKER_IMAGE if instance_type is None: instance_type = config.INSTANCE_TYPE if spot_price is None: spot_price = config.SPOT_PRICE doodad_config.extra_launch_info['docker_image'] = docker_image if mode in {'sss', 'htp'}: if use_gpu: singularity_image = config.SSS_GPU_IMAGE else: singularity_image = config.SSS_CPU_IMAGE doodad_config.extra_launch_info[ 'singularity_image'] = singularity_image elif mode in ['local_singularity', 'slurm_singularity']: singularity_image = config.SINGULARITY_IMAGE doodad_config.extra_launch_info[ 'singularity_image'] = singularity_image else: singularity_image = None """ Get the mode """ mode_kwargs = {} if mode == 'ec2': image_id = config.REGION_TO_GPU_AWS_IMAGE_ID[region] doodad_config.extra_launch_info['aws_ami_id'] = image_id if hasattr(config, "AWS_S3_PATH"): aws_s3_path = config.AWS_S3_PATH else: aws_s3_path = None """ Create mode """ _global_n_tasks_total += 1 if mode == 'local': dmode = doodad.mode.Local(skip_wait=skip_wait) elif mode == 'local_docker': dmode = doodad.mode.LocalDocker( image=docker_image, gpu=use_gpu, ) elif mode == 'ssh': if ssh_host: ssh_dict = config.SSH_HOSTS[ssh_host] else: ssh_dict = config.SSH_HOSTS[config.SSH_DEFAULT_HOST] credentials = doodad.ssh.credentials.SSHCredentials( username=ssh_dict['username'], hostname=ssh_dict['hostname'], identity_file=config.SSH_PRIVATE_KEY) dmode = doodad.mode.SSHDocker( credentials=credentials, image=docker_image, gpu=use_gpu, tmp_dir=config.SSH_TMP_DIR, ) elif mode == 'local_singularity': dmode = doodad.mode.LocalSingularity( image=singularity_image, gpu=use_gpu, pre_cmd=config.SINGULARITY_PRE_CMDS, ) elif mode in {'slurm_singularity', 'sss', 'htp'}: if slurm_config_name is None: slurm_config_name = "gpu" if use_gpu else "cpu" slurm_config_kwargs = config.SLURM_CONFIGS[slurm_config_name] if use_gpu: assert slurm_config_kwargs["n_gpus"] > 0, slurm_config_name else: assert slurm_config_kwargs["n_gpus"] == 0, slurm_config_name if time_in_mins is not None: slurm_config_kwargs["time_in_mins"] = time_in_mins if slurm_config_kwargs["time_in_mins"] is None: raise ValueError('Must approximate/set time in minutes') slurm_config = SlurmConfig(**slurm_config_kwargs) if mode == 'slurm_singularity': dmode = doodad.mode.SlurmSingularity( image=singularity_image, gpu=use_gpu, skip_wait=skip_wait, pre_cmd=config.SINGULARITY_PRE_CMDS, extra_args=config.BRC_EXTRA_SINGULARITY_ARGS, slurm_config=slurm_config, ) elif mode == 'htp': dmode = doodad.mode.BrcHighThroughputMode( image=singularity_image, gpu=use_gpu, pre_cmd=config.SSS_PRE_CMDS, extra_args=config.BRC_EXTRA_SINGULARITY_ARGS, slurm_config=slurm_config, taskfile_path_on_brc=config.TASKFILE_PATH_ON_BRC, overwrite_task_script=_global_is_first_launch, n_tasks_total=_global_n_tasks_total, ) else: dmode = doodad.mode.ScriptSlurmSingularity( image=singularity_image, gpu=use_gpu, pre_cmd=config.SSS_PRE_CMDS, extra_args=config.BRC_EXTRA_SINGULARITY_ARGS, slurm_config=slurm_config, overwrite_script=_global_is_first_launch, ) elif mode == 'ec2': # Do this separately in case someone does not have EC2 configured if s3_log_prefix is None: s3_log_prefix = exp_name dmode = doodad.mode.EC2AutoconfigDocker( image=docker_image, image_id=image_id, region=region, instance_type=instance_type, spot_price=spot_price, s3_log_prefix=s3_log_prefix, # Make the sub-directories within launching code rather # than relying on doodad. s3_log_name=s3_log_name, gpu=use_gpu, aws_s3_path=aws_s3_path, num_exps=num_exps_per_instance, **mode_kwargs) elif mode == 'gcp': image_name = config.GCP_IMAGE_NAME if use_gpu: image_name = config.GCP_GPU_IMAGE_NAME if gcp_kwargs is None: gcp_kwargs = {} config_kwargs = { **config.GCP_DEFAULT_KWARGS, **dict(image_name=image_name), **gcp_kwargs } dmode = doodad.mode.GCPDocker(image=docker_image, gpu=use_gpu, gcp_bucket_name=config.GCP_BUCKET_NAME, gcp_log_prefix=exp_name, gcp_log_name="", num_exps=num_exps_per_instance, **config_kwargs) doodad_config.extra_launch_info['gcp_image'] = image_name else: raise NotImplementedError("Mode not supported: {}".format(mode)) _global_is_first_launch = False """ Get the mounts """ mounts = create_mounts( base_log_dir=base_log_dir, mode=mode, sync_interval=sync_interval, local_input_dir_to_mount_point_dict=local_input_dir_to_mount_point_dict, ) """ Get the outputs """ launch_locally = None # target = config.RUN_DOODAD_EXPERIMENT_SCRIPT_PATH target = osp.join(REPO_DIR, 'doodad/easy_launch/run_experiment.py') snapshot_dir_for_script = None # if not update, will be set automatically if mode == 'ec2': # Ignored since I'm setting the snapshot dir directly base_log_dir_for_script = None # The snapshot dir needs to be specified for S3 because S3 will # automatically create the experiment director and sub-directory. snapshot_dir_for_script = config.OUTPUT_DIR_FOR_DOODAD_TARGET elif mode == 'local': base_log_dir_for_script = base_log_dir elif mode == 'local_docker': base_log_dir_for_script = config.OUTPUT_DIR_FOR_DOODAD_TARGET elif mode == 'ssh': base_log_dir_for_script = config.OUTPUT_DIR_FOR_DOODAD_TARGET elif mode in {'local_singularity', 'slurm_singularity', 'sss', 'htp'}: base_log_dir_for_script = base_log_dir launch_locally = True if mode in {'sss', 'htp'}: target = config.SSS_RUN_DOODAD_EXPERIMENT_SCRIPT_PATH elif mode == 'here_no_doodad': base_log_dir_for_script = base_log_dir elif mode == 'gcp': # Ignored since I'm setting the snapshot dir directly base_log_dir_for_script = None snapshot_dir_for_script = config.OUTPUT_DIR_FOR_DOODAD_TARGET else: raise NotImplementedError("Mode not supported: {}".format(mode)) doodad_config = doodad_config._replace( base_log_dir=base_log_dir_for_script) _global_target_mount = doodad.launch_python( target=target, mode=dmode, mount_points=mounts, args={ 'method_call': method_call, 'output_dir': snapshot_dir_for_script, 'doodad_config': doodad_config, 'variant': variant, 'mode': mode, }, use_cloudpickle=True, target_mount=_global_target_mount, verbose=verbose, launch_locally=launch_locally, port=config.DOCKER_PORT)
# Use local mode to test code mode_local = dd.mode.LocalDocker( image='justinfu/rl_base:0.1' ) # Use docker mode to launch jobs on newton machine mode_ssh = dd.mode.SSHDocker( image='justinfu/rl_base:0.1', credentials=ssh.SSHCredentials(hostname='newton2.banatao.berkeley.edu', username='******', identity_file='path/to/identity'), ) # Set up code and output directories OUTPUT_DIR = '/mount/outputs' # this is the directory visible to the target script inside docker mounts = [ mount.MountLocal(local_dir='~/install/rllab', pythonpath=True), # point to your rllab mount.MountLocal(local_dir='~/install/gym/.mujoco', mount_point='/root/.mujoco'), # point to your mujoco # this output directory will be visible on the remote machine # TODO: this directory will have root permissions. For now you need to scp your data inside your script. mount.MountLocal(local_dir='~/data/%s' % MY_USERNAME, mount_point=OUTPUT_DIR, output=True), ] dd.launch_python( target='/home/fhkingma/Documents/glow/train.py', # point to a target script (absolute path). mode=mode_local, mount_points=mounts, verbose=True, )
def run_experiment( method_call, target_script=None, mode='local', exp_name='default', variant=None, exp_id=0, use_gpu=False, gpu_id=0, seed=None, # logger info prepend_date_to_exp_name=False, snapshot_mode='last', snapshot_gap=1, base_log_dir=None, local_input_dir_to_mount_point_dict=None, # TODO(vitchyr): test this # local settings skip_wait=False, # ec2 settings sync_interval=180, region='us-east-1', instance_type=None, spot_price=None, verbose=False, trial_dir_suffix=None, num_exps_per_instance=1, # sss settings time_in_mins=None, # ssh settings ssh_host=None, # gcp gcp_kwargs=None, ): """ Usage: ``` def foo(variant): x = variant['x'] y = variant['y'] logger.log("sum", x+y) variant = { 'x': 4, 'y': 3, } run_experiment(foo, variant, exp_name="my-experiment") ``` Results are saved to `base_log_dir/<date>-my-experiment/<date>-my-experiment-<unique-id>` By default, the base_log_dir is determined by `config.LOCAL_LOG_DIR/` :param method_call: a function that takes in a dictionary as argument :param mode: A string: - 'local' - 'local_docker' - 'ec2' - 'here_no_doodad': Run without doodad call :param exp_name: name of experiment :param seed: Seed for this specific trial. :param variant: Dictionary :param exp_id: One experiment = one variant setting + multiple seeds :param prepend_date_to_exp_name: If False, do not prepend the date to the experiment directory. :param use_gpu: :param snapshot_mode: See easy_logger.logging.logger :param snapshot_gap: See easy_logger.logging.logger :param base_log_dir: Will over :param sync_interval: How often to sync s3 data (in seconds). :param local_input_dir_to_mount_point_dict: Dictionary for doodad. :param ssh_host: the name of the host you want to ssh onto, should correspond to an entry in config.py of the following form: SSH_HOSTS=dict( ssh_host=dict( username='******', hostname='hostname/ip address', ) ) - if ssh_host is set to None, you will use ssh_host specified by config.SSH_DEFAULT_HOST :return: """ try: import doodad import doodad.mode import doodad.ssh except ImportError: print("Doodad not set up! Running experiment here.") mode = 'here_no_doodad' global ec2_okayed global gpu_ec2_okayed global slurm_config global _global_target_mount global _global_is_first_launch global _global_n_tasks_total """ Sanitize inputs as needed """ if seed is None: seed = random.randint(0, 100000) if variant is None: variant = {} if mode == 'ssh' and base_log_dir is None: base_log_dir = config.SSH_LOG_DIR if base_log_dir is None: if mode == 'sss': base_log_dir = config.SSS_LOG_DIR else: base_log_dir = config.LOCAL_LOG_DIR for key, value in ppp.recursive_items(variant): # This check isn't really necessary, but it's to prevent myself from # forgetting to pass a variant through dot_map_dict_to_nested_dict. if "." in key: raise Exception( "Variants should not have periods in keys. Did you mean to " "convert {} into a nested dictionary?".format(key)) variant['base_exp_name'] = exp_name if prepend_date_to_exp_name: exp_name = time.strftime("%y-%m-%d") + "-" + exp_name variant['seed'] = seed variant['exp_id'] = str(exp_id) variant['exp_name'] = str(exp_name) variant['instance_type'] = str(instance_type) git_infos = get_git_info() run_experiment_kwargs = dict( exp_name=exp_name, variant=variant, exp_id=exp_id, seed=seed, use_gpu=use_gpu, gpu_id=gpu_id, snapshot_mode=snapshot_mode, snapshot_gap=snapshot_gap, git_infos=git_infos, script_name=main.__file__, trial_dir_suffix=trial_dir_suffix, ) if mode == 'here_no_doodad': run_experiment_kwargs['base_log_dir'] = base_log_dir return run_experiment_here(method_call, **run_experiment_kwargs) """ Safety Checks """ if mode == 'ec2' or mode == 'gcp': if _global_is_first_launch and not ec2_okayed and not util.query_yes_no( "{} costs money. Are you sure you want to run?".format(mode)): sys.exit(1) if _global_is_first_launch and not gpu_ec2_okayed and use_gpu: if not util.query_yes_no( "{} is more expensive with GPUs. Confirm?".format(mode)): sys.exit(1) gpu_ec2_okayed = True ec2_okayed = True """ GPU vs normal configs """ if use_gpu: docker_image = config.GPU_DOODAD_DOCKER_IMAGE if instance_type is None: instance_type = config.GPU_INSTANCE_TYPE else: assert instance_type[0] == 'g' if spot_price is None: spot_price = config.GPU_SPOT_PRICE variant['docker_image'] = docker_image else: docker_image = config.DOODAD_DOCKER_IMAGE if instance_type is None: instance_type = config.INSTANCE_TYPE if spot_price is None: spot_price = config.SPOT_PRICE variant['docker_image'] = docker_image if mode in {'sss', 'htp'}: if use_gpu: singularity_image = config.SSS_GPU_IMAGE else: singularity_image = config.SSS_CPU_IMAGE variant['singularity_image'] = singularity_image elif mode in ['local_singularity', 'slurm_singularity']: singularity_image = config.SINGULARITY_IMAGE variant['singularity_image'] = singularity_image else: singularity_image = None """ Get the mode """ mode_kwargs = {} if use_gpu and mode == 'ec2': image_id = config.REGION_TO_GPU_AWS_IMAGE_ID[region] if region == 'us-east-1': avail_zone = config.REGION_TO_GPU_AWS_AVAIL_ZONE.get( region, "us-east-1b") mode_kwargs['extra_ec2_instance_kwargs'] = dict(Placement=dict( AvailabilityZone=avail_zone, ), ) else: image_id = None if hasattr(config, "AWS_S3_PATH"): aws_s3_path = config.AWS_S3_PATH else: aws_s3_path = None if "run_id" in variant and variant["run_id"] is not None: run_id, exp_id = variant["run_id"], variant["exp_id"] s3_log_name = "run{}/id{}".format(run_id, exp_id) else: s3_log_name = "{}-id{}-s{}".format(exp_name, exp_id, seed) if trial_dir_suffix is not None: s3_log_name = s3_log_name + "-" + trial_dir_suffix """ Create mode """ if mode == 'local': dmode = doodad.mode.Local(skip_wait=skip_wait) elif mode == 'local_docker': dmode = doodad.mode.LocalDocker( image=docker_image, gpu=use_gpu, ) elif mode == 'ssh': if ssh_host == None: ssh_dict = config.SSH_HOSTS[config.SSH_DEFAULT_HOST] else: ssh_dict = config.SSH_HOSTS[ssh_host] credentials = doodad.ssh.credentials.SSHCredentials( username=ssh_dict['username'], hostname=ssh_dict['hostname'], identity_file=config.SSH_PRIVATE_KEY) dmode = doodad.mode.SSHDocker( credentials=credentials, image=docker_image, gpu=use_gpu, tmp_dir=config.SSH_TMP_DIR, ) elif mode == 'local_singularity': dmode = doodad.mode.LocalSingularity( image=singularity_image, gpu=use_gpu, pre_cmd=config.SINGULARITY_PRE_CMDS, ) elif mode in {'slurm_singularity', 'sss', 'htp'}: assert time_in_mins is not None, "Must approximate/set time in minutes" if slurm_config is None: if use_gpu: slurm_config = SlurmConfig(time_in_mins=time_in_mins, **config.SLURM_GPU_CONFIG) else: slurm_config = SlurmConfig(time_in_mins=time_in_mins, **config.SLURM_CPU_CONFIG) if mode == 'slurm_singularity': dmode = doodad.mode.SlurmSingularity( image=singularity_image, gpu=use_gpu, skip_wait=skip_wait, pre_cmd=config.SINGULARITY_PRE_CMDS, extra_args=config.BRC_EXTRA_SINGULARITY_ARGS, slurm_config=slurm_config, ) elif mode == 'htp': dmode = doodad.mode.BrcHighThroughputMode( image=singularity_image, gpu=use_gpu, pre_cmd=config.SSS_PRE_CMDS, extra_args=config.BRC_EXTRA_SINGULARITY_ARGS, slurm_config=slurm_config, taskfile_dir_on_brc=config.TASKFILE_DIR_ON_BRC, overwrite_task_script=_global_is_first_launch, n_tasks_total=_global_n_tasks_total, launch_id=_global_launch_uuid) else: dmode = doodad.mode.ScriptSlurmSingularity( image=singularity_image, gpu=use_gpu, pre_cmd=config.SSS_PRE_CMDS, extra_args=config.BRC_EXTRA_SINGULARITY_ARGS, slurm_config=slurm_config, ) elif mode == 'ec2': # Do this separately in case someone does not have EC2 configured dmode = doodad.mode.EC2AutoconfigDocker( image=docker_image, image_id=image_id, region=region, instance_type=instance_type, spot_price=spot_price, s3_log_prefix=exp_name, # Ask Vitchyr or Steven from an explanation, but basically we # will start just making the sub-directories within railrl rather # than relying on doodad to do that. s3_log_name="", gpu=use_gpu, aws_s3_path=aws_s3_path, num_exps=num_exps_per_instance, **mode_kwargs) elif mode == 'gcp': image_name = config.GCP_IMAGE_NAME if use_gpu: image_name = config.GCP_GPU_IMAGE_NAME if gcp_kwargs is None: gcp_kwargs = {} config_kwargs = { **config.GCP_DEFAULT_KWARGS, **dict(image_name=image_name), **gcp_kwargs } dmode = doodad.mode.GCPDocker(image=docker_image, gpu=use_gpu, gcp_bucket_name=config.GCP_BUCKET_NAME, gcp_log_prefix=exp_name, gcp_log_name="", num_exps=num_exps_per_instance, **config_kwargs) else: raise NotImplementedError("Mode not supported: {}".format(mode)) """ Get the mounts """ mounts = create_mounts( base_log_dir=base_log_dir, mode=mode, sync_interval=sync_interval, local_input_dir_to_mount_point_dict=local_input_dir_to_mount_point_dict, ) """ Get the outputs """ launch_locally = None target = target_script or osp.join(util.REPO_DIR, 'easy_launcher/run_experiment.py') if mode == 'ec2': # Ignored since I'm setting the snapshot dir directly base_log_dir_for_script = None run_experiment_kwargs['randomize_seed'] = True # The snapshot dir needs to be specified for S3 because S3 will # automatically create the experiment director and sub-directory. snapshot_dir_for_script = config.OUTPUT_DIR_FOR_DOODAD_TARGET elif mode == 'local': base_log_dir_for_script = base_log_dir # The snapshot dir will be automatically created snapshot_dir_for_script = None elif mode == 'local_docker': base_log_dir_for_script = config.OUTPUT_DIR_FOR_DOODAD_TARGET # The snapshot dir will be automatically created snapshot_dir_for_script = None elif mode == 'ssh': base_log_dir_for_script = config.OUTPUT_DIR_FOR_DOODAD_TARGET # The snapshot dir will be automatically created snapshot_dir_for_script = None elif mode in ['local_singularity', 'slurm_singularity', 'sss', 'htp']: base_log_dir_for_script = base_log_dir # The snapshot dir will be automatically created snapshot_dir_for_script = None launch_locally = True if mode in {'sss', 'htp'}: target = config.SSS_RUN_DOODAD_EXPERIMENT_SCRIPT_PATH elif mode == 'here_no_doodad': base_log_dir_for_script = base_log_dir # The snapshot dir will be automatically created snapshot_dir_for_script = None elif mode == 'gcp': # Ignored since I'm setting the snapshot dir directly base_log_dir_for_script = None run_experiment_kwargs['randomize_seed'] = True snapshot_dir_for_script = config.OUTPUT_DIR_FOR_DOODAD_TARGET else: raise NotImplementedError("Mode not supported: {}".format(mode)) run_experiment_kwargs['base_log_dir'] = base_log_dir_for_script _global_target_mount = doodad.launch_python( target=target, mode=dmode, mount_points=mounts, python_cmd=config.MODE_TO_PYTHON_CMD[mode], args={ 'method_call': method_call, 'output_dir': snapshot_dir_for_script, 'run_experiment_kwargs': run_experiment_kwargs, 'mode': mode, }, use_cloudpickle=True, target_mount=_global_target_mount, verbose=verbose, launch_locally=launch_locally, ) _global_is_first_launch = False _global_n_tasks_total += 1
] output_dir = '/home/docker/store/umrl/output' if mode == mode_gcp: output_mount = mount.MountGCP(gcp_path='output', gcp_bucket_name='umrl', mount_point=output_dir, output=True, include_types=('*.txt', '*.csv', '*.json', '*.gz', '*.tar', '*.log', '*.pkl', '*.png', '*.html', '*.mp4')) else: output_mount = mount.MountLocal(local_dir='/home/kylehsu/experiments/umrl/output', mount_point=output_dir, output=True) mounts.append(output_mount) dd.launch_python( target=os.path.join(os.path.dirname(os.path.realpath(__file__)), 'main_contextual.py'), mode=mode, mount_points=mounts, args=dict(log_dir_root=output_dir), python_cmd='source activate umrl && python -m ipdb -c continue', fake_display=False ) # dd.launch_shell( # command='ls', # mode=mode, # dry=False, # mount_points=[] # # mount_points=mounts # )
def run_experiment( method_call, mode='local', exp_folder=None, exp_prefix='default', seed=None, variant=None, exp_id=0, prepend_date_to_exp_prefix=True, use_gpu=False, gpu_id=None, snapshot_mode='last', snapshot_gap=1, base_log_dir=None, local_input_dir_to_mount_point_dict=None, # local settings skip_wait=False, logger=default_logger, verbose=False, trial_dir_suffix=None, num_exps_per_instance=1, # ssh settings ssh_host=None, interactive_docker=False, ): """ Usage: ``` def foo(variant): x = variant['x'] y = variant['y'] logger.log("sum", x+y) variant = { 'x': 4, 'y': 3, } run_experiment(foo, variant, exp_prefix="my-experiment") ``` Results are saved to `base_log_dir/<date>-my-experiment/<date>-my-experiment-<unique-id>` By default, the base_log_dir is determined by `config.LOCAL_LOG_DIR/` :param method_call: a function that takes in a dictionary as argument :param mode: A string: - 'local' - 'local_docker' - 'here_no_doodad': Run without doodad call :param exp_prefix: name of experiment :param seed: Seed for this specific trial. :param variant: Dictionary :param exp_id: One experiment = one variant setting + multiple seeds :param prepend_date_to_exp_prefix: If False, do not prepend the date to the experiment directory. :param use_gpu: :param snapshot_mode: See rllab.logger :param snapshot_gap: See rllab.logger :param base_log_dir: Will over :param local_input_dir_to_mount_point_dict: Dictionary for doodad. :param ssh_host: the name of the host you want to ssh onto, should correspond to an entry in launcher_config.py of the following form: SSH_HOSTS=dict( ssh_host=dict( username='******', hostname='hostname/ip address', ) ) - if ssh_host is set to None, you will use ssh_host specified by config.SSH_DEFAULT_HOST :return: """ try: import doodad import doodad.mode import doodad.ssh except ImportError: print("Doodad not set up! Running experiment here.") mode = 'here_no_doodad' global ec2_okayed global gpu_ec2_okayed global target_mount global first_sss_launch """ Sanitize inputs as needed """ if seed is None: seed = random.randint(0, 100000) if variant is None: variant = {} if base_log_dir is None: if mode == 'ssh': base_log_dir = launcher_config.SSH_LOG_DIR else: base_log_dir = launcher_config.LOCAL_LOG_DIR if exp_folder is not None: base_log_dir = os.path.join(base_log_dir, exp_folder) for key, value in ppp.recursive_items(variant): # This check isn't really necessary, but it's to prevent myself from # forgetting to pass a variant through dot_map_dict_to_nested_dict. if isinstance(key, str) and "." in key: raise Exception( "Variants should not have periods in keys. Did you mean to " "convert {} into a nested dictionary?".format(key)) if prepend_date_to_exp_prefix: exp_prefix = time.strftime("%m-%d") + "-" + exp_prefix variant['seed'] = str(seed) variant['exp_id'] = str(exp_id) variant['exp_prefix'] = str(exp_prefix) try: import git doodad_path = osp.abspath( osp.join(osp.dirname(doodad.__file__), os.pardir)) dirs = launcher_config.CODE_DIRS_TO_MOUNT + [doodad_path] git_infos = [] for directory in dirs: # Idk how to query these things, so I'm just doing try-catch try: repo = git.Repo(directory) try: branch_name = repo.active_branch.name except TypeError: branch_name = '[DETACHED]' git_infos.append( GitInfo( directory=directory, code_diff=repo.git.diff(None), code_diff_staged=repo.git.diff('--staged'), commit_hash=repo.head.commit.hexsha, branch_name=branch_name, )) except git.exc.InvalidGitRepositoryError: pass except ImportError: git_infos = None run_experiment_kwargs = dict( exp_prefix=exp_prefix, variant=variant, exp_id=exp_id, seed=seed, use_gpu=use_gpu, snapshot_mode=snapshot_mode, snapshot_gap=snapshot_gap, git_infos=git_infos, script_name=main.__file__, logger=logger, trial_dir_suffix=trial_dir_suffix, ) if mode == 'here_no_doodad': run_experiment_kwargs['base_log_dir'] = base_log_dir return run_experiment_here(method_call, **run_experiment_kwargs) """ GPU vs normal configs """ if use_gpu: docker_image = launcher_config.GPU_DOODAD_DOCKER_IMAGE_SSH else: docker_image = launcher_config.DOODAD_DOCKER_IMAGE_SSH """ Create mode """ if mode == 'local': dmode = doodad.mode.Local() elif mode == 'local_docker': dmode = doodad.mode.LocalDocker( image=docker_image, gpu=use_gpu, ) elif mode == 'ssh': if ssh_host == None: ssh_dict = launcher_config.SSH_HOSTS[ launcher_config.SSH_DEFAULT_HOST] else: ssh_dict = launcher_config.SSH_HOSTS[ssh_host] credentials = doodad.ssh.credentials.SSHCredentials( username=ssh_dict['username'], hostname=ssh_dict['hostname'], identity_file=launcher_config.SSH_PRIVATE_KEY) dmode = doodad.mode.SSHDocker( tmp_dir=launcher_config.SSH_TMP_DIR, credentials=credentials, image=docker_image, gpu=use_gpu, ) else: raise NotImplementedError("Mode not supported: {}".format(mode)) """ Get the mounts """ mounts = create_mounts( base_log_dir=base_log_dir, mode=mode, local_input_dir_to_mount_point_dict=local_input_dir_to_mount_point_dict, exp_folder=exp_folder, ) """ Get the outputs """ mode_specific_kwargs = {} launch_locally = None target = launcher_config.RUN_DOODAD_EXPERIMENT_SCRIPT_PATH if mode == 'local': base_log_dir_for_script = base_log_dir # The snapshot dir will be automatically created snapshot_dir_for_script = None mode_specific_kwargs['skip_wait'] = skip_wait elif mode == 'local_docker': base_log_dir_for_script = base_log_dir # The snapshot dir will be automatically created snapshot_dir_for_script = None mode_specific_kwargs['interactive_docker'] = interactive_docker elif mode == 'ssh': base_log_dir_for_script = base_log_dir if exp_prefix is not None: base_log_dir_for_script = osp.join(base_log_dir_for_script, exp_folder) # The snapshot dir will be automatically created snapshot_dir_for_script = None mode_specific_kwargs['interactive_docker'] = interactive_docker elif mode == 'here_no_doodad': base_log_dir_for_script = base_log_dir # The snapshot dir will be automatically created snapshot_dir_for_script = None else: raise NotImplementedError("Mode not supported: {}".format(mode)) run_experiment_kwargs['base_log_dir'] = base_log_dir_for_script target_mount = doodad.launch_python(target=target, mode=dmode, mount_points=mounts, args={ 'method_call': method_call, 'output_dir': snapshot_dir_for_script, 'run_experiment_kwargs': run_experiment_kwargs, 'mode': mode, }, use_cloudpickle=True, target_mount=target_mount, verbose=verbose, launch_locally=launch_locally, gpu_id=gpu_id, **mode_specific_kwargs)
def run_experiment( method_call, mode='local', exp_prefix='default', seed=None, variant=None, exp_id=0, unique_id=None, prepend_date_to_exp_prefix=True, use_gpu=False, snapshot_mode='last', snapshot_gap=1, n_parallel=0, base_log_dir=None, sync_interval=180, local_input_dir_to_mount_point_dict=None, # TODO(vitchyr): test this ): """ Usage: ``` def foo(variant): x = variant['x'] y = variant['y'] logger.log("sum", x+y) variant = { 'x': 4, 'y': 3, } run_experiment(foo, variant, exp_prefix="my-experiment") ``` Results are saved to `base_log_dir/<date>-my-experiment/<date>-my-experiment-<unique-id>` By default, the base_log_dir is determined by `config.LOCAL_LOG_DIR/` :param method_call: a function that takes in a dictionary as argument :param mode: 'local', 'local_docker', or 'ec2' :param exp_prefix: name of experiment :param seed: Seed for this specific trial. :param variant: Dictionary :param exp_id: One experiment = one variant setting + multiple seeds :param unique_id: If not set, the unique id is generated. :param prepend_date_to_exp_prefix: If False, do not prepend the date to the experiment directory. :param use_gpu: :param snapshot_mode: See rllab.logger :param snapshot_gap: See rllab.logger :param n_parallel: :param base_log_dir: Will over :param sync_interval: How often to sync s3 data (in seconds). :param local_input_dir_to_mount_point_dict: Dictionary for doodad. :return: """ try: import doodad import doodad.mode import doodad.mount as mount from doodad.utils import REPO_DIR except ImportError: return run_experiment_old( method_call, exp_prefix=exp_prefix, seed=seed, variant=variant, time_it=True, mode=mode, exp_id=exp_id, unique_id=unique_id, prepend_date_to_exp_prefix=prepend_date_to_exp_prefix, use_gpu=use_gpu, snapshot_mode=snapshot_mode, snapshot_gap=snapshot_gap, n_parallel=n_parallel, base_log_dir=base_log_dir, periodic_sync_interval=sync_interval, ) global ec2_okayed global gpu_ec2_okayed if local_input_dir_to_mount_point_dict is None: local_input_dir_to_mount_point_dict = {} else: raise NotImplementedError("TODO(vitchyr): Implement this") # Modify some of the inputs if seed is None: seed = random.randint(0, 100000) if variant is None: variant = {} for key, value in ppp.recursive_items(variant): # This check isn't really necessary, but it's to prevent myself from # forgetting to pass a variant through dot_map_dict_to_nested_dict. if "." in key: raise Exception( "Variants should not have periods in keys. Did you mean to " "convert {} into a nested dictionary?".format(key) ) if unique_id is None: unique_id = str(uuid.uuid4()) if prepend_date_to_exp_prefix: exp_prefix = time.strftime("%m-%d") + "-" + exp_prefix variant['seed'] = str(seed) variant['exp_id'] = str(exp_id) variant['unique_id'] = str(unique_id) logger.log("Variant:") logger.log(json.dumps(ppp.dict_to_safe_json(variant), indent=2)) mode_str_to_doodad_mode = { 'local': doodad.mode.Local(), 'local_docker': doodad.mode.LocalDocker( image=config.DOODAD_DOCKER_IMAGE, ), 'ec2': doodad.mode.EC2AutoconfigDocker( image=config.DOODAD_DOCKER_IMAGE, region='us-east-2', instance_type='c4.large', spot_price=0.03, s3_log_prefix=exp_prefix, s3_log_name="{}-id{}-s{}".format(exp_prefix, exp_id, seed), ), } if base_log_dir is None: base_log_dir = config.LOCAL_LOG_DIR output_mount_point = config.OUTPUT_DIR_FOR_DOODAD_TARGET mounts = [ mount.MountLocal(local_dir=REPO_DIR, pythonpath=True), ] for code_dir in config.CODE_DIRS_TO_MOUNT: mounts.append(mount.MountLocal(local_dir=code_dir, pythonpath=True)) for dir, mount_point in local_input_dir_to_mount_point_dict.items(): mounts.append(mount.MountLocal( local_dir=dir, mount_point=mount_point, pythonpath=False, )) if mode != 'local': for non_code_mapping in config.DIR_AND_MOUNT_POINT_MAPPINGS: mounts.append(mount.MountLocal(**non_code_mapping)) if mode == 'ec2': if not ec2_okayed and not query_yes_no( "EC2 costs money. Are you sure you want to run?" ): sys.exit(1) if not gpu_ec2_okayed and use_gpu: if not query_yes_no( "EC2 is more expensive with GPUs. Confirm?" ): sys.exit(1) gpu_ec2_okayed = True ec2_okayed = True output_mount = mount.MountS3( s3_path='', mount_point=output_mount_point, output=True, sync_interval=sync_interval, ) # This will be over-written by the snapshot dir, but I'm setting it for # good measure. base_log_dir_for_script = output_mount_point # The snapshot dir needs to be specified for S3 because S3 will # automatically create the experiment director and sub-directory. snapshot_dir_for_script = output_mount_point elif mode == 'local': output_mount = mount.MountLocal( local_dir=base_log_dir, mount_point=None, # For purely local mode, skip mounting. output=True, ) base_log_dir_for_script = base_log_dir # The snapshot dir will be automatically created snapshot_dir_for_script = None else: output_mount = mount.MountLocal( local_dir=base_log_dir, mount_point=output_mount_point, output=True, ) base_log_dir_for_script = output_mount_point # The snapshot dir will be automatically created snapshot_dir_for_script = None mounts.append(output_mount) repo = git.Repo(os.getcwd()) code_diff = repo.git.diff(None) if len(code_diff) > 5000: logger.log("Git diff %d greater than 5000. Not saving diff." % len(code_diff)) code_diff = None run_experiment_kwargs = dict( exp_prefix=exp_prefix, variant=variant, exp_id=exp_id, seed=seed, use_gpu=use_gpu, snapshot_mode=snapshot_mode, snapshot_gap=snapshot_gap, code_diff=code_diff, commit_hash=repo.head.commit.hexsha, script_name=main.__file__, n_parallel=n_parallel, base_log_dir=base_log_dir_for_script, ) doodad.launch_python( target=config.RUN_DOODAD_EXPERIMENT_SCRIPT_PATH, mode=mode_str_to_doodad_mode[mode], mount_points=mounts, args={ 'method_call': method_call, 'output_dir': snapshot_dir_for_script, 'run_experiment_kwargs': run_experiment_kwargs, }, use_cloudpickle=True, fake_display=True if mode != 'local' else False, )
def run_experiment( method_call, mode='local', exp_prefix='default', seed=None, variant=None, exp_id=0, prepend_date_to_exp_prefix=True, use_gpu=False, snapshot_mode='last', snapshot_gap=1, base_log_dir=None, local_input_dir_to_mount_point_dict=None, # TODO(vitchyr): test this # local settings skip_wait=False, # ec2 settings sync_interval=180, region='us-east-1', instance_type=None, spot_price=None, verbose=False, num_exps_per_instance=1, # sss settings time_in_mins=None, # ssh settings ssh_host=None, # gcp gcp_kwargs=None, ): """ Usage: ``` def foo(variant): x = variant['x'] y = variant['y'] logger.log("sum", x+y) variant = { 'x': 4, 'y': 3, } run_experiment(foo, variant, exp_prefix="my-experiment") ``` Results are saved to `base_log_dir/<date>-my-experiment/<date>-my-experiment-<unique-id>` By default, the base_log_dir is determined by `config.LOCAL_LOG_DIR/` :param method_call: a function that takes in a dictionary as argument :param mode: A string: - 'local' - 'local_docker' - 'ec2' - 'here_no_doodad': Run without doodad call :param exp_prefix: name of experiment :param seed: Seed for this specific trial. :param variant: Dictionary :param exp_id: One experiment = one variant setting + multiple seeds :param prepend_date_to_exp_prefix: If False, do not prepend the date to the experiment directory. :param use_gpu: :param snapshot_mode: See rlkit.core.logging :param snapshot_gap: See rlkit.core.logging :param base_log_dir: Will over :param sync_interval: How often to sync s3 data (in seconds). :param local_input_dir_to_mount_point_dict: Dictionary for doodad. :param ssh_host: the name of the host you want to ssh onto, should correspond to an entry in config.py of the following form: SSH_HOSTS=dict( ssh_host=dict( username='******', hostname='hostname/ip address', ) ) - if ssh_host is set to None, you will use ssh_host specified by config.SSH_DEFAULT_HOST :return: """ try: import doodad import doodad.mode import doodad.ssh except ImportError: print("Doodad not set up! Running experiment here.") mode = 'here_no_doodad' global ec2_okayed global gpu_ec2_okayed global target_mount global first_sss_launch """ Sanitize inputs as needed """ if seed is None: seed = random.randint(0, 100000) if variant is None: variant = {} if mode == 'ssh' and base_log_dir is None: base_log_dir = conf.SSH_LOG_DIR if base_log_dir is None: if mode == 'sss': base_log_dir = conf.SSS_LOG_DIR else: base_log_dir = conf.LOCAL_LOG_DIR for key, value in ppp.recursive_items(variant): # This check isn't really necessary, but it's to prevent myself from # forgetting to pass a variant through dot_map_dict_to_nested_dict. if "." in key: raise Exception( "Variants should not have periods in keys. Did you mean to " "convert {} into a nested dictionary?".format(key) ) if prepend_date_to_exp_prefix: exp_prefix = time.strftime("%m-%d") + "-" + exp_prefix variant['seed'] = str(seed) variant['exp_id'] = str(exp_id) variant['exp_prefix'] = str(exp_prefix) variant['instance_type'] = str(instance_type) try: import git doodad_path = osp.abspath(osp.join( osp.dirname(doodad.__file__), os.pardir )) dirs = conf.CODE_DIRS_TO_MOUNT + [doodad_path] git_infos = [] for directory in dirs: # Idk how to query these things, so I'm just doing try-catch try: repo = git.Repo(directory) try: branch_name = repo.active_branch.name except TypeError: branch_name = '[DETACHED]' git_infos.append(GitInfo( directory=directory, code_diff=repo.git.diff(None), code_diff_staged=repo.git.diff('--staged'), commit_hash=repo.head.commit.hexsha, branch_name=branch_name, )) except git.exc.InvalidGitRepositoryError: pass except ImportError: git_infos = None run_experiment_kwargs = dict( exp_prefix=exp_prefix, variant=variant, exp_id=exp_id, seed=seed, use_gpu=use_gpu, snapshot_mode=snapshot_mode, snapshot_gap=snapshot_gap, git_infos=git_infos, script_name=main.__file__, ) if mode == 'here_no_doodad': run_experiment_kwargs['base_log_dir'] = base_log_dir return run_experiment_here( method_call, **run_experiment_kwargs ) """ Safety Checks """ if mode == 'ec2' or mode == 'gcp': if not ec2_okayed and not query_yes_no( "{} costs money. Are you sure you want to run?".format(mode) ): sys.exit(1) if not gpu_ec2_okayed and use_gpu: if not query_yes_no( "{} is more expensive with GPUs. Confirm?".format(mode) ): sys.exit(1) gpu_ec2_okayed = True ec2_okayed = True """ GPU vs normal configs """ if use_gpu: docker_image = conf.GPU_DOODAD_DOCKER_IMAGE if instance_type is None: instance_type = conf.GPU_INSTANCE_TYPE else: assert instance_type[0] == 'g' if spot_price is None: spot_price = conf.GPU_SPOT_PRICE else: docker_image = conf.DOODAD_DOCKER_IMAGE if instance_type is None: instance_type = conf.INSTANCE_TYPE if spot_price is None: spot_price = conf.SPOT_PRICE if mode == 'sss': singularity_image = conf.SSS_IMAGE elif mode in ['local_singularity', 'slurm_singularity']: singularity_image = conf.SINGULARITY_IMAGE else: singularity_image = None """ Get the mode """ mode_kwargs = {} if use_gpu and mode == 'ec2': image_id = conf.REGION_TO_GPU_AWS_IMAGE_ID[region] if region == 'us-east-1': avail_zone = conf.REGION_TO_GPU_AWS_AVAIL_ZONE.get(region, "us-east-1b") mode_kwargs['extra_ec2_instance_kwargs'] = dict( Placement=dict( AvailabilityZone=avail_zone, ), ) else: image_id = None if hasattr(conf, "AWS_S3_PATH"): aws_s3_path = conf.AWS_S3_PATH else: aws_s3_path = None """ Create mode """ if mode == 'local': dmode = doodad.mode.Local(skip_wait=skip_wait) elif mode == 'local_docker': dmode = doodad.mode.LocalDocker( image=docker_image, gpu=use_gpu, ) elif mode == 'ssh': if ssh_host == None: ssh_dict = conf.SSH_HOSTS[conf.SSH_DEFAULT_HOST] else: ssh_dict = conf.SSH_HOSTS[ssh_host] credentials = doodad.ssh.credentials.SSHCredentials( username=ssh_dict['username'], hostname=ssh_dict['hostname'], identity_file=conf.SSH_PRIVATE_KEY ) dmode = doodad.mode.SSHDocker( credentials=credentials, image=docker_image, gpu=use_gpu, ) elif mode == 'local_singularity': dmode = doodad.mode.LocalSingularity( image=singularity_image, gpu=use_gpu, ) elif mode == 'slurm_singularity' or mode == 'sss': assert time_in_mins is not None, "Must approximate/set time in minutes" if use_gpu: kwargs = conf.SLURM_GPU_CONFIG else: kwargs = conf.SLURM_CPU_CONFIG if mode == 'slurm_singularity': dmode = doodad.mode.SlurmSingularity( image=singularity_image, gpu=use_gpu, time_in_mins=time_in_mins, skip_wait=skip_wait, pre_cmd=conf.SINGULARITY_PRE_CMDS, **kwargs ) else: dmode = doodad.mode.ScriptSlurmSingularity( image=singularity_image, gpu=use_gpu, time_in_mins=time_in_mins, skip_wait=skip_wait, pre_cmd=conf.SSS_PRE_CMDS, **kwargs ) elif mode == 'ec2': # Do this separately in case someone does not have EC2 configured dmode = doodad.mode.EC2AutoconfigDocker( image=docker_image, image_id=image_id, region=region, instance_type=instance_type, spot_price=spot_price, s3_log_prefix=exp_prefix, # Ask Vitchyr or Steven from an explanation, but basically we # will start just making the sub-directories within rlkit rather # than relying on doodad to do that. s3_log_name="", gpu=use_gpu, aws_s3_path=aws_s3_path, num_exps=num_exps_per_instance, **mode_kwargs ) elif mode == 'gcp': image_name = conf.GCP_IMAGE_NAME if use_gpu: image_name = conf.GCP_GPU_IMAGE_NAME if gcp_kwargs is None: gcp_kwargs = {} config_kwargs = { **conf.GCP_DEFAULT_KWARGS, **dict(image_name=image_name), **gcp_kwargs } dmode = doodad.mode.GCPDocker( image=docker_image, gpu=use_gpu, gcp_bucket_name=conf.GCP_BUCKET_NAME, gcp_log_prefix=exp_prefix, gcp_log_name="", **config_kwargs ) else: raise NotImplementedError("Mode not supported: {}".format(mode)) """ Get the mounts """ mounts = create_mounts( base_log_dir=base_log_dir, mode=mode, sync_interval=sync_interval, local_input_dir_to_mount_point_dict=local_input_dir_to_mount_point_dict, ) """ Get the outputs """ launch_locally = None target = conf.RUN_DOODAD_EXPERIMENT_SCRIPT_PATH if mode == 'ec2': # Ignored since I'm setting the snapshot dir directly base_log_dir_for_script = None run_experiment_kwargs['force_randomize_seed'] = True # The snapshot dir needs to be specified for S3 because S3 will # automatically create the experiment director and sub-directory. snapshot_dir_for_script = conf.OUTPUT_DIR_FOR_DOODAD_TARGET elif mode == 'local': base_log_dir_for_script = base_log_dir # The snapshot dir will be automatically created snapshot_dir_for_script = None elif mode == 'local_docker': base_log_dir_for_script = conf.OUTPUT_DIR_FOR_DOODAD_TARGET # The snapshot dir will be automatically created snapshot_dir_for_script = None elif mode == 'ssh': base_log_dir_for_script = conf.OUTPUT_DIR_FOR_DOODAD_TARGET # The snapshot dir will be automatically created snapshot_dir_for_script = None elif mode in ['local_singularity', 'slurm_singularity', 'sss']: base_log_dir_for_script = base_log_dir # The snapshot dir will be automatically created snapshot_dir_for_script = None launch_locally = True if mode == 'sss': dmode.set_first_time(first_sss_launch) first_sss_launch = False target = conf.SSS_RUN_DOODAD_EXPERIMENT_SCRIPT_PATH elif mode == 'here_no_doodad': base_log_dir_for_script = base_log_dir # The snapshot dir will be automatically created snapshot_dir_for_script = None elif mode == 'gcp': # Ignored since I'm setting the snapshot dir directly base_log_dir_for_script = None run_experiment_kwargs['force_randomize_seed'] = True snapshot_dir_for_script = conf.OUTPUT_DIR_FOR_DOODAD_TARGET else: raise NotImplementedError("Mode not supported: {}".format(mode)) run_experiment_kwargs['base_log_dir'] = base_log_dir_for_script target_mount = doodad.launch_python( target=target, mode=dmode, mount_points=mounts, args={ 'method_call': method_call, 'output_dir': snapshot_dir_for_script, 'run_experiment_kwargs': run_experiment_kwargs, 'mode': mode, }, use_cloudpickle=True, target_mount=target_mount, verbose=verbose, launch_locally=launch_locally, )
if MY_RUN_MODE == mode_ec2: output_mount = mount.MountS3(s3_path='outputs', mount_point=OUTPUT_DIR, output=True) # use this for ec2 else: output_mount = mount.MountLocal(local_dir=os.path.join(EXAMPLES_DIR, 'tmp_output'), mount_point=OUTPUT_DIR, output=True) mounts.append(output_mount) print(mounts) THIS_FILE_DIR = os.path.realpath(os.path.dirname(__file__)) dd.launch_python( target=os.path.join(THIS_FILE_DIR, 'app_main.py'), # point to a target script. If running remotely, this will be copied over mode=MY_RUN_MODE, mount_points=mounts, args={ 'arg1': 50, 'arg2': 25, 'output_dir': OUTPUT_DIR, } ) # # LOG_DIR = '/home/thanard/Downloads/rllab/data/test-ec2/' # THIS_FILE_DIR = '/home/thanard/Downloads/rllab/sandbox/thanard/infoGAN' # # DEMO_FILE= '/home/giulia/NIPS/softqlearning/softqlearning/environments/goals/ant_10_goals.pkl' # # ENV_FILE = '/home/giulia/NIPS/softqlearning/low_gear_ratio_ant.xml' # dd.launch_tools.launch_python( # target=os.path.join(THIS_FILE_DIR, 'infogan_2d.py'), # mode=MY_RUN_MODE, # mount_points=mounts, # args={
if kwargs['fcnpath']: mounts.append( mount.MountLocal(local_dir=args.fcnpath, mount_point=args.fcnpath)) print(mounts) THIS_FILE_DIR = os.path.realpath(os.path.dirname(__file__)) for i in range(args.n): dd.launch_python( target=os.path.abspath(args.target), mode=MY_RUN_MODE, mount_points=mounts, args={ 'data_dir': DATA_DIR, 'output_dir': OUTPUT_DIR, 'seed': i, 'kwargs': kwargs, 'ec2': True, 'planning_data_dir': PLANNING_DATA_DIR }, dry=False, verbose=True, postfix='%03d' % i, # postfix only works with my modified doodad. ) else: mounts = [ mount.MountLocal(local_dir=project_dir, pythonpath=True), ] if args.seed is None: args.seed = random.randint(1, 10000) print("Random Seed: ", args.seed)
def run_experiment( method_call, mode='local_docker', exp_prefix='default', seed=None, variant=None, exp_id=0, unique_id=None, prepend_date_to_exp_prefix=True, use_gpu=True, snapshot_mode='last', snapshot_gap=1, base_log_dir=None, local_input_dir_to_mount_point_dict=None, # TODO(vitchyr): test this # Settings for EC2 only sync_interval=180, region='us-east-1', instance_type=None, spot_price=None, logger=default_logger, ): """ Usage: ``` def foo(variant): x = variant['x'] y = variant['y'] logger.log("sum", x+y) variant = { 'x': 4, 'y': 3, } run_experiment(foo, variant, exp_prefix="my-experiment") ``` Results are saved to `base_log_dir/<date>-my-experiment/<date>-my-experiment-<unique-id>` By default, the base_log_dir is determined by `config.LOCAL_LOG_DIR/` :param method_call: a function that takes in a dictionary as argument :param mode: A string: - 'local' - 'local_docker' - 'ec2' - 'here_no_doodad': Run without doodad call :param exp_prefix: name of experiment :param seed: Seed for this specific trial. :param variant: Dictionary :param exp_id: One experiment = one variant setting + multiple seeds :param unique_id: If not set, the unique id is generated. :param prepend_date_to_exp_prefix: If False, do not prepend the date to the experiment directory. :param use_gpu: :param snapshot_mode: See rllab.logger :param snapshot_gap: See rllab.logger :param base_log_dir: Will over :param sync_interval: How often to sync s3 data (in seconds). :param local_input_dir_to_mount_point_dict: Dictionary for doodad. :return: """ try: import doodad import doodad.mode except ImportError: print("Doodad not set up! Running experiment here.") mode = 'here_no_doodad' global ec2_okayed global gpu_ec2_okayed global target_mount """ Sanitize inputs as needed """ if seed is None: seed = random.randint(0, 100000) if variant is None: variant = {} if base_log_dir is None: base_log_dir = config.LOCAL_LOG_DIR for key, value in ppp.recursive_items(variant): # This check isn't really necessary, but it's to prevent myself from # forgetting to pass a variant through dot_map_dict_to_nested_dict. if "." in key: raise Exception( "Variants should not have periods in keys. Did you mean to " "convert {} into a nested dictionary?".format(key)) if unique_id is None: unique_id = str(uuid.uuid4()) if prepend_date_to_exp_prefix: exp_prefix = time.strftime("%m-%d") + "-" + exp_prefix variant['seed'] = str(seed) variant['exp_id'] = str(exp_id) variant['unique_id'] = str(unique_id) variant['exp_prefix'] = str(exp_prefix) variant['instance_type'] = str(instance_type) try: import git repo = git.Repo(os.getcwd()) git_info = GitInfo( code_diff=repo.git.diff(None), commit_hash=repo.head.commit.hexsha, branch_name=repo.active_branch.name, ) except ImportError: git_info = None run_experiment_kwargs = dict( exp_prefix=exp_prefix, variant=variant, exp_id=exp_id, seed=seed, use_gpu=use_gpu, snapshot_mode=snapshot_mode, snapshot_gap=snapshot_gap, git_info=git_info, script_name=main.__file__, logger=logger, ) if mode == 'here_no_doodad': run_experiment_kwargs['base_log_dir'] = base_log_dir return run_experiment_here(method_call, **run_experiment_kwargs) """ Safety Checks """ if mode == 'ec2': if not ec2_okayed and not query_yes_no( "EC2 costs money. Are you sure you want to run?"): sys.exit(1) if not gpu_ec2_okayed and use_gpu: if not query_yes_no("EC2 is more expensive with GPUs. Confirm?"): sys.exit(1) gpu_ec2_okayed = True ec2_okayed = True """ GPU vs normal configs """ if use_gpu: docker_image = config.GPU_DOODAD_DOCKER_IMAGE if instance_type is None: instance_type = config.GPU_INSTANCE_TYPE else: assert instance_type[0] == 'g' if spot_price is None: spot_price = config.GPU_SPOT_PRICE else: docker_image = config.DOODAD_DOCKER_IMAGE if instance_type is None: instance_type = config.INSTANCE_TYPE if spot_price is None: spot_price = config.SPOT_PRICE """ Get the mode """ mode_kwargs = {} if use_gpu: image_id = config.REGION_TO_GPU_AWS_IMAGE_ID[region] if region == 'us-east-1': mode_kwargs['extra_ec2_instance_kwargs'] = dict(Placement=dict( AvailabilityZone='us-east-1b', ), ) else: image_id = None if hasattr(config, "AWS_S3_PATH"): aws_s3_path = config.AWS_S3_PATH else: aws_s3_path = None if "run_id" in variant and variant["run_id"] is not None: run_id, exp_id = variant["run_id"], variant["exp_id"] s3_log_name = "run{}/id{}".format(run_id, exp_id) else: s3_log_name = "{}-id{}-s{}".format(exp_prefix, exp_id, seed) mode_str_to_doodad_mode = { 'local': doodad.mode.Local(), 'local_docker': doodad.mode.LocalDocker( image=docker_image, gpu=use_gpu, ), 'ec2': doodad.mode.EC2AutoconfigDocker(image=docker_image, image_id=image_id, region=region, instance_type=instance_type, spot_price=spot_price, s3_log_prefix=exp_prefix, s3_log_name=s3_log_name, gpu=use_gpu, aws_s3_path=aws_s3_path, **mode_kwargs), } """ Get the mounts """ mounts = create_mounts( base_log_dir=base_log_dir, mode=mode, sync_interval=sync_interval, local_input_dir_to_mount_point_dict=local_input_dir_to_mount_point_dict, ) """ Get the outputs """ if mode == 'ec2': # Ignored since I'm setting the snapshot dir directly base_log_dir_for_script = None # The snapshot dir needs to be specified for S3 because S3 will # automatically create the experiment director and sub-directory. snapshot_dir_for_script = config.OUTPUT_DIR_FOR_DOODAD_TARGET elif mode == 'local': base_log_dir_for_script = base_log_dir # The snapshot dir will be automatically created snapshot_dir_for_script = None elif mode == 'local_docker': base_log_dir_for_script = config.OUTPUT_DIR_FOR_DOODAD_TARGET # The snapshot dir will be automatically created snapshot_dir_for_script = None elif mode == 'here_no_doodad': base_log_dir_for_script = base_log_dir # The snapshot dir will be automatically created snapshot_dir_for_script = None else: raise NotImplementedError("Mode not supported: {}".format(mode)) run_experiment_kwargs['base_log_dir'] = base_log_dir_for_script target_mount = doodad.launch_python( target=config.RUN_DOODAD_EXPERIMENT_SCRIPT_PATH, mode=mode_str_to_doodad_mode[mode], mount_points=mounts, args={ 'method_call': method_call, 'output_dir': snapshot_dir_for_script, 'run_experiment_kwargs': run_experiment_kwargs, 'mode': mode, }, use_cloudpickle=True, target_mount=target_mount, )
mode_local = dd.mode.LocalDocker(image='justinfu/rl_base:0.1') # Use docker mode to launch jobs on newton machine mode_ssh = dd.mode.SSHDocker( image='justinfu/rl_base:0.1', credentials=ssh.SSHCredentials(hostname='newton2.banatao.berkeley.edu', username='******', identity_file='path/to/identity'), ) # Set up code and output directories OUTPUT_DIR = '/mount/outputs' # this is the directory visible to the target script inside docker mounts = [ mount.MountLocal(local_dir='~/install/rllab', pythonpath=True), # point to your rllab mount.MountLocal(local_dir='~/install/gym/.mujoco', mount_point='/root/.mujoco'), # point to your mujoco # this output directory will be visible on the remote machine # TODO: this directory will have root permissions. For now you need to scp your data inside your script. mount.MountLocal(local_dir='~/data/%s' % MY_USERNAME, mount_point=OUTPUT_DIR, output=True), ] dd.launch_python( target='path/to/script.py', # point to a target script (absolute path). mode=mode_ssh, mount_points=mounts, )