def do_network_based_leo(base_args, cores, name, nn_params, runs, tasks, starting_task): args = base_args.copy() args['rb_min_size'] = 1000 args['default_damage'] = 4035.00 args['perf_td_error'] = True args['perf_l2_reg'] = True args['steps'] = 300000 args["cl_batch_norm"] = False args['cl_structure'] = 'rnnc:gru_tanh_6_dropout;fc_linear_3' args['cl_stages'] = 'balancing_tf;balancing;walking:monotonic' args['cl_depth'] = 2 args['cl_pt_shape'] = (2, 3) args["cl_pt_load"] = nn_params[1] cl_load = nn_params[0] hp = Helper(args, 'cl', name, tasks, starting_task, cores, use_mp=True) # Weights of the NN solutions = [None] * len(runs) begin = runs[0] mp_cfgs = hp.gen_cfg(solutions, 1, begin=begin) mp_cfgs_new = [] for cfg in mp_cfgs: config, tasks, starting_task = cfg copy_config = config.copy() copy_config["cl_load"] = cl_load mp_cfgs_new.append((copy_config, tasks, starting_task)) return mp_cfgs_new
def do_steps_based(base_args, cores, name, steps, runs, options=None, tasks={}, starting_task=''): args = base_args.copy() args['steps'] = steps if options: suffix = '' if options['balancing_tf']: suffix += '1_' + options['balancing_tf'] if options['balancing']: suffix += '2_' + options['balancing'] if options['walking']: suffix += '3_' + options['walking'] if suffix: name += '-' + suffix args['options'] = options hp = Helper(args, 'cl', name, tasks, starting_task, cores, use_mp=True) # generate configurations solutions = [None] * len(runs) begin = runs[0] mp_cfgs = hp.gen_cfg(solutions, 1, begin=begin) return mp_cfgs
def do_reach_timeout_based(base_args, cores, name, reach_timeout, runs, options=None, tasks={}, starting_task=''): args = base_args.copy() args['reach_timeout'] = reach_timeout steps = 300000 args['steps'] = steps args['rb_max_size'] = steps if options: suffix = '' if options['balancing_tf']: suffix += '1_' + options['balancing_tf'] + '_' if options['balancing']: suffix += '2_' + options['balancing'] + '_' if options['walking']: suffix += '3_' + options['walking'] if suffix: name += '-' + suffix args['options'] = options hp = Helper(args, 'cl', name, tasks, starting_task, cores, use_mp=True) # Weights of the NN solutions = [None] * len(runs) begin = runs[0] mp_cfgs = hp.gen_cfg(solutions, 1, begin=begin) return mp_cfgs
def do_steps_based(base_args, cores, name, steps, runs, tasks, starting_task): args = base_args.copy() args['steps'] = steps hp = Helper(args, 'cl', name, tasks, starting_task, cores, use_mp=True) # Weights of the NN solutions = [None] * len(runs) begin = runs[0] mp_cfgs = hp.gen_cfg(solutions, 1, begin=begin) return mp_cfgs
def do_network_based_mujoco(base_args, cores, name, nn_params, options, runs, tasks, starting_task): args = base_args.copy() args['env_td_error_scale'] = 600.0 args['env_timeout'] = 16.5 args['steps'] = 700000 args["rb_max_size"] = args['steps'] args['rb_min_size'] = 1000 args['default_damage'] = 4035.00 args['perf_td_error'] = True args['perf_l2_reg'] = True args["cl_batch_norm"] = False args['cl_structure'] = 'rnnc:gru_tanh_6_dropout;fc_linear_3' args['cl_stages'] = 'balancing_tf;balancing;walking:monotonic' args['cl_pt_shape'] = (args['cl_depth'], 3) args["cl_pt_load"] = nn_params[1] cl_load = nn_params[0] if options: suffix = '' if options['balancing_tf']: suffix += '1_' + options['balancing_tf'] + '_' if options['balancing']: suffix += '2_' + options['balancing'] + '_' if options['walking']: suffix += '3_' + options['walking'] if suffix: name += '-' + suffix args['options'] = options hp = Helper(args, 'cl', name, tasks, starting_task, cores, use_mp=True) # Weights of the NN solutions = [None] * len(runs) begin = runs[0] mp_cfgs = hp.gen_cfg(solutions, 1, begin=begin) mp_cfgs_new = [] for cfg in mp_cfgs: config, tasks, starting_task = cfg copy_config = config.copy() copy_config["cl_load"] = cl_load mp_cfgs_new.append((copy_config, tasks, starting_task)) return mp_cfgs_new
args['perf_l2_reg'] = True args['steps'] = 300000 args["rb_max_size"] = args['steps'] #args["cl_batch_norm"] = True #args['cl_structure'] = 'ffcritic:fc_relu_4;fc_relu_3;fc_relu_3' args["cl_batch_norm"] = False args['cl_structure'] = 'rnnc:gru_tanh_6_dropout;fc_linear_3' args["cl_stages"] = "balancing_tf;balancing;walking:monotonic" args['cl_depth'] = 2 args['cl_pt_shape'] = (args['cl_depth'], 3) args['test_interval'] = 30 #args["cl_target"] = True export_names = "eq_curriculum_network_depth_" + str(args['cl_depth']) nn_params = (export_names, "{}_stat.pkl".format(export_names)) args["cl_pt_load"] = nn_params[1] # Parameters tasks = { 'balancing_tf': 'cfg/leo_balancing_tf.yaml', 'balancing': 'cfg/leo_balancing.yaml', 'walking': 'cfg/leo_walking.yaml' } starting_task = 'balancing_tf' hp = Helper(args, 'cl', 'ddpg', tasks, starting_task, 1, use_mp=False) # Run actual script. config, tasks, starting_task = hp.gen_cfg([None], 1)[0] config["cl_load"] = nn_params[0] cl_run(tasks, starting_task, **config)
def main(): prepare_multiprocessing() alg = 'ddpg' args = parse_args() if args['cores']: arg_cores = min(multiprocessing.cpu_count(), args['cores']) else: arg_cores = min(multiprocessing.cpu_count(), 32) print('Using {} cores.'.format(arg_cores)) args['mp_debug'] = True #args['reach_return'] = 1422.66 #args['default_damage'] = 4035.00 args['reach_return'] = 526.0 args['default_damage'] = 4132.00 args['perf_td_error'] = True args['perf_l2_reg'] = True args['rb_min_size'] = 1000 args['cl_l2_reg'] = 0 steps = 400000 args['rb_max_size'] = steps steps_delta_a = 1000 steps_delta_b = 4000 popsize = 16 * 6 G = 100 use_mp = True # ### For debugging # args['mp_debug'] = False # steps = 3000 # steps_delta_a = 50 # steps_delta_b = 50 # G = 100 # popsize = 3 # use_mp = False # ### # Tasks tasks = { 'balancing_tf': 'cfg/leo_balancing_tf.yaml', 'balancing': 'cfg/leo_balancing.yaml', 'walking': 'cfg/leo_walking.yaml' } starting_task = 'balancing_tf' options = { 'balancing_tf': '', 'balancing': 'nnload_rbload', 'walking': 'nnload_rbload' } root = "cl" if not os.path.exists(root): os.makedirs(root) categories = range(26) #balancing_tf = np.array(categories)/max(categories) #balancing_tf = [int(steps_ub*(math.exp(3*x)-1)/(math.exp(3)-1)) for x in balancing_tf] # To ensure fair sampling, enumberate all step_options and select unique ones! step_combinations, step_solutions = [], [] for a in categories: for b in categories: sol = comb_to_sol((a, b), steps, steps_delta_a, steps_delta_b) if sol not in step_solutions: step_combinations.append((a, b)) step_solutions.append(sol) opt = opt_ce(popsize, step_combinations, categories) g = 1 #opt = opt_ce.load(root, 'opt.pkl') #g = 2 hp = Helper(args, root, alg, tasks, starting_task, arg_cores, use_mp=use_mp) while not opt.stop() and g <= G: if args['mp_debug']: sys.stdout = Logger(root + "/stdout-g{:04}.log".format(g)) print("Should work") combinations = opt.ask() # convert sampled options to solutions solutions = [] for comb in combinations: solutions.append( comb_to_sol(comb, steps, steps_delta_a, steps_delta_b)) # preparation mp_cfgs = hp.gen_cfg_steps(solutions, g, options=options) for cfg in mp_cfgs: cfg[0]['test_interval'] = 1 + randint(0, 29) # evaluate and backup immediately damage = hp.run(mp_cfgs) with open(root + '/damage.pkl', 'wb') as f: pickle.dump(damage, f, 2) # remove None elements notnonel = np.where(np.array(damage) != None)[0] damage = [d for i, d in enumerate(damage) if i in notnonel] combinations = [d for i, d in enumerate(combinations) if i in notnonel] # update using *original* solutions == combinations best = opt.tell(combinations, damage) # back-project to array incluing None elements best = [notnonel[i] for i in best] # logging opt.log(root, alg, g, hp.damage_info, hp.reeval_damage_info, best) opt.save(root, 'opt.pkl') # new iteration g += 1