def run_experiment(args): # def env_fn(): # import flexibility # register flexibility to gym env registry # return gym.make(args.env_name) eg = ExperimentGrid(name=args.exp_name) eg.add('seed', [10*i for i in range(args.num_runs)] if args.seed is None else args.seed) eg.add('epochs', args.epochs) eg.add('steps_per_epoch', args.steps_per_epoch) eg.add('save_freq', args.save_freq) eg.add('max_ep_len', 200) eg.add('ac_kwargs:activation', eval(args.act), '') eg.add('custom_h', args.custom_h) eg.add('do_checkpoint_eval', args.do_checkpoint_eval) eg.add('eval_episodes', args.eval_episodes) eg.add('train_v_iters', args.train_v_iters) eg.add('eval_temp', args.eval_temp) eg.add('train_starting_temp', args.train_starting_temp) eg.add('gamma', args.gamma) eg.add('env_version', args.env_version) eg.add('env_name', args.env_name) eg.add('env_subtract_full_flex', args.env_subtract_full_flex) eg.add('meta_learning', args.meta_learning) eg.add('finetune', args.finetune) eg.add('finetune_model_path', args.finetune_model_path) eg.add('lam', args.lam) eg.add('early_stop_epochs', args.early_stop_epochs) eg.add('save_all_eval', args.save_all_eval) if args.episodes_per_epoch is not None: eg.add('episodes_per_epoch', args.episodes_per_epoch) if args.env_version >= 3: # args.file_path = "/home/user/git/spinningup/spinup/FlexibilityEnv/input_m8n12_cv0.8.pkl" prefix = os.getcwd().split('RL_flex_design')[0] args.file_path = prefix + "RL_flex_design/spinup/FlexibilityEnv_input/{}".format(args.env_input) m, n, mean_c, mean_d, sd_d, profit_mat, target_arcs, fixed_costs, flex_0 = load_FlexibilityEnv_input(args.file_path) eg.add('env_input', args.file_path) eg.add('env_n_sample', args.env_n_sample) if args.target_arcs is None: eg.add('target_arcs', target_arcs) else: # target_arcs is explicitly specified by the scripts, which overrides the target_arc from the input file eg.add('target_arcs', args.target_arcs) if args.algo == "ppo": eg.add('train_pi_iters', args.train_pi_iters) eg.run(ppo) elif args.algo == "vpg": eg.run(vpg)
def get_custom_env_fn(env_name, env_version=None, target_arcs=None, env_input=None, env_n_sample=None, subtract_full_flexibility_performance=False, meta_leraning=False): if env_version in (1, 2): # parse FlexibilityEnv settings from env_name n_plant, n_product, target_arcs, n_sample = _parse_attributes(env_name) class CustomFlexibilityEnv(FlexibilityEnv): def __init__(self): super().__init__(n_plant=n_plant, n_product=n_product, target_arcs=target_arcs, n_sample=n_sample, env_version=env_version) print( 'using custom env: {} | n_plant: {} | n_product: {} | target_arcs: {} | n_sample: {} | env_version: {}' .format(env_name, n_plant, n_product, target_arcs, n_sample, env_version)) else: # env_version in (3, 4, >40, 5): # load FlexibilityEnv settings from env_input n_plant, n_product, mean_c, mean_d, sd_d, profit_mat, _, fixed_costs, flex_0 = load_FlexibilityEnv_input( env_input) def to_numpy_array(obj): if isinstance(obj, list): obj_array = np.asarray(obj, dtype=np.float32) return obj_array else: return obj mean_c = to_numpy_array(mean_c) mean_d = to_numpy_array(mean_d) sd_d = to_numpy_array(sd_d) class CustomFlexibilityEnv(FlexibilityEnv): def __init__(self): super().__init__( n_plant=n_plant, n_product=n_product, target_arcs=target_arcs, # for env_version=3, target_arcs is passed into the function call n_sample=env_n_sample, capacity_mean=mean_c, env_version=env_version, demand_mean=mean_d, demand_std=sd_d, profit_matrix=profit_mat, fixed_costs=fixed_costs, starting_structure=flex_0, subtract_full_flexibility_performance= subtract_full_flexibility_performance, meta_learning=meta_leraning) print( 'using env: {} | n_plant: {} | n_product: {} | target_arcs: {} | n_sample: {} | env_version: {} ' '| subtract_full_flex: {} | meta_leraning: {}'.format( env_name, n_plant, n_product, target_arcs, env_n_sample, env_version, subtract_full_flexibility_performance, meta_leraning)) return CustomFlexibilityEnv
def generate_scripts_for_multiple_target_arcs(experiment, env_input, env_version_list, epoch_episodes, num_tars_per_script, num_batches, num_runs, gamma=None, lam=None, variance_reduction=False, env_n_sample=50, custom_h=None, cpu=2, tar_list=None, early_stop=None, epoch=800, save_freq=10, save_all_eval=None, included_tars=None): m, n, mean_c, mean_d, sd_d, profit_mat, target_arcs, fixed_costs, flex_0 = load_FlexibilityEnv_input( _get_full_path(env_input)) print("number of existing arcs {}".format(flex_0.sum())) if included_tars is not None: # filter out tars that is not in included_tars target_arcs = [tar for tar in target_arcs if tar in included_tars] if tar_list is None or (tar_list is not None and len(tar_list) == 0): tar_list = get_tars_list(num_tars_per_script, target_arcs) print("target arcs to be run: {}".format(tar_list)) for batch in range(num_batches): starting_seed = 100 * batch for env_version in env_version_list: # create entrypoint script # !/bin/bash path = 'run_{}_ENV{}_batch{}_entrypoint.sh'.format( experiment, env_version, batch) python_string = 'for((i=0;i < {};i++)); do bash run_{}_ENV{}_batch{}_'.format(len(tar_list), experiment, env_version, batch) \ + '$' + '{' + 'i' + '}' + '.sh & done' with open(path, 'w') as f: f.write('#!/bin/bash\n\n') f.write(python_string) make_executable(path) print(python_string) # create scripts to be called in parallel for idx, target_arcs in enumerate(tar_list): assert len(target_arcs) >= 1 if len(target_arcs) == 1: # add 'tar' to exp_name explicitely exp_name = 'F{}_CH{}_ENV{}_tar{}'.format( experiment, '1024-128' if custom_h is None else custom_h, env_version, target_arcs[0]) else: exp_name = 'F{}_CH{}_ENV{}'.format( experiment, '1024-128' if custom_h is None else custom_h, env_version) python_string = "python -m spinup.run_flexibility \\\n \ --algo ppo \\\n \ --env_name F{}-v{} \\\n \ --exp_name {} \\\n \ --cpu {} \\\n \ --epochs {} \\\n \ --custom_h 1024-128 \\\n \ --env_version {} \\\n \ --env_input {} \\\n \ --target_arcs {} \\\n \ --seed {} \\\n \ --save_freq {} \\\n \ --steps_per_epoch {} \\\n \ --do_checkpoint_eval \\\n".format( experiment, env_version, exp_name, cpu, epoch, env_version, env_input, _get_target_arcs_string(target_arcs), _get_seed_str(starting_seed, num_runs), save_freq, int( np.ceil( (int(statistics.mean(target_arcs)) - flex_0.sum()) * epoch_episodes))) if variance_reduction: python_string += ' --env_subtract_full_flex \\\n' if env_n_sample != 50: python_string += ' --env_n_sample {} \\\n'.format( env_n_sample) if custom_h is not None: python_string += ' --custom_h {} \\\n'.format( custom_h) if early_stop is not None: python_string += ' --early_stop {} \\\n'.format( early_stop) if save_all_eval is not None: python_string += ' --save_all_eval \\\n' if gamma is not None: python_string += ' --gamma {} \\\n'.format( gamma) if lam is None: python_string += ' ;' else: python_string += ' --lam {};'.format( lam) path = 'run_{}_ENV{}_batch{}_{}.sh'.format( experiment, env_version, batch, idx) with open(path, 'w') as f: f.write('#!/bin/bash\n\n') f.write(python_string) make_executable(path) print(python_string)
else: exp = experiment return exp from spinup.FlexibilityEnv_input.FlexibilityEnv_input_files import INPUTS if __name__ == "__main__": experiment = '10x10a-lspe' envs = ['ENV5'] print() input_path = get_input_path(INPUTS[get_input_key(experiment)]) exclude = ['abcdef'] m, n, mean_c, mean_d, sd_d, profit_mat, target_arcs, fixed_costs, flex_0 = load_FlexibilityEnv_input(input_path) perf_dicts = [] files_dicts = [] for env in envs: print("==== processing files for {}".format(env)) files = collect_best_structures(experiment, env, exclude) dict_tar_perf = {} dict_tar_file = {} files = sorted(files) for file in files: tar = file.split('T')[1].split('_SP')[0] with open(file, 'rb') as f:
def generate_scripts_for_one_target_arcs(experiment, env_input, env_version_list, epoch_episodes, target_arcs, num_runs, starting_seed, gamma=None, lam=None, variance_reduction=False, env_n_sample=50, early_stop=None, cpu=8, epoch=800, save_freq=10, save_all_eval=None, custom_h=None, meta_learning=False, finetune=False, finetune_path=None, finetune_meta_trained_epoch=None): m, n, mean_c, mean_d, sd_d, profit_mat, _, fixed_costs, flex_0 = load_FlexibilityEnv_input( _get_full_path(env_input)) print("number of existing arcs {}".format(flex_0.sum())) target_arcs_in_names = 0 if meta_learning else target_arcs for env_version in env_version_list: # create entrypoint script # !/bin/bash path = 'run_{}_ENV{}_tar{}_entrypoint.sh'.format( experiment, env_version, target_arcs_in_names) python_string = 'for((i=0;i < {};i++)); do bash run_{}_ENV{}_tar{}_'.format(num_runs, experiment, env_version, target_arcs_in_names) \ + '$' + '{' + 'i' + '}' + '.sh & done' with open(path, 'w') as f: f.write('#!/bin/bash\n\n') f.write(python_string) make_executable(path) print(python_string) # create scripts to be called in parallel for idx in range(num_runs): target_arcs = target_arcs if meta_learning is False else ( 13 if idx % 2 == 1 else 22) python_string = "python -m spinup.run_flexibility \\\n \ --algo ppo \\\n \ --env_name F{}-v{} \\\n \ --exp_name F{}_CH{}_ENV{}_tar{} \\\n \ --cpu {} \\\n \ --epochs {} \\\n \ --custom_h 1024-128 \\\n \ --env_version {} \\\n \ --env_input {} \\\n \ --target_arcs {} \\\n \ --seed {} \\\n \ --save_freq {} \\\n \ --steps_per_epoch {} \\\n \ --do_checkpoint_eval \\\n".format( experiment, env_version, experiment, '1024-128' if custom_h is None else custom_h, env_version, target_arcs_in_names, cpu, epoch, env_version, env_input, target_arcs if meta_learning is False else (13 if idx % 2 == 1 else 22), starting_seed + 10 * idx, save_freq, int(np.ceil( (target_arcs - flex_0.sum()) * epoch_episodes)) if meta_learning is False else 20 * epoch_episodes) if variance_reduction: python_string += ' --env_subtract_full_flex \\\n' if env_n_sample != 50: python_string += ' --env_n_sample {} \\\n'.format( env_n_sample) if early_stop is not None: python_string += ' --early_stop {} \\\n'.format( early_stop) if save_all_eval is not None: python_string += ' --save_all_eval \\\n' if gamma is not None: python_string += ' --gamma {} \\\n'.format( gamma) if custom_h is not None: python_string += ' --custom_h {} \\\n'.format( custom_h) if meta_learning: python_string += ' --meta_learning \\\n' if finetune: python_string += ' --finetune \\\n' if finetune_meta_trained_epoch is None: python_string += ' --finetune_model_path {} \\\n'.format( '{}_s{}'.format(finetune_path, starting_seed + 10 * idx)) else: python_string += ' --finetune_model_path {} \\\n'.format( '{}_s{}/simple_save{}'.format( finetune_path, starting_seed + 10 * idx, finetune_meta_trained_epoch)) if lam is None: python_string += ' ;' else: python_string += ' --lam {};'.format( lam) path = 'run_{}_ENV{}_tar{}_{}.sh'.format(experiment, env_version, target_arcs_in_names, idx) with open(path, 'w') as f: f.write('#!/bin/bash\n\n') f.write(python_string) make_executable(path) print(python_string)