def main(main_args): ### Set all arguments ## Target maker target_maker_args = {} target_maker_args['future_steps'] = [1,2,4,8,16,32] target_maker_args['meas_to_predict'] = [0,1,2] target_maker_args['min_num_targs'] = 3 target_maker_args['rwrd_schedule_type'] = 'exp' target_maker_args['gammas'] = [] target_maker_args['invalid_targets_replacement'] = 'nan' ## Simulator simulator_args = {} simulator_args['config'] = '../../maps/D3_battle.cfg' simulator_args['resolution'] = (84,84) simulator_args['frame_skip'] = 4 simulator_args['color_mode'] = 'GRAY' simulator_args['use_shaping_reward'] = False simulator_args['maps'] = ['MAP01'] simulator_args['switch_maps'] = False #train simulator_args['num_simulators'] = 8 ## Experience # Train experience train_experience_args = {} train_experience_args['memory_capacity'] = 20000 train_experience_args['history_length'] = 1 train_experience_args['history_step'] = 1 train_experience_args['action_format'] = 'enumerate' train_experience_args['shared'] = False # Test prediction experience test_prediction_experience_args = train_experience_args.copy() test_prediction_experience_args['memory_capacity'] = 1 # Test policy experience test_policy_experience_args = train_experience_args.copy() test_policy_experience_args['memory_capacity'] = 55000 ## Agent agent_args = {} # agent type agent_args['agent_type'] = 'advantage' # preprocessing agent_args['preprocess_input_images'] = lambda x: x / 255. - 0.5 agent_args['preprocess_input_measurements'] = lambda x: x / 100. - 0.5 targ_scale_coeffs = np.expand_dims((np.expand_dims(np.array([7.5,30.,1.]),1) * np.ones((1,len(target_maker_args['future_steps'])))).flatten(),0) agent_args['preprocess_input_targets'] = lambda x: x / targ_scale_coeffs agent_args['postprocess_predictions'] = lambda x: x * targ_scale_coeffs # agent properties agent_args['objective_coeffs_temporal'] = [0., 0. ,0. ,0.5, 0.5, 1.] agent_args['objective_coeffs_meas'] = [0.5, 0.5, 1.] agent_args['random_exploration_schedule'] = lambda step: (0.02 + 145000. / (float(step) + 150000.)) agent_args['new_memories_per_batch'] = 8 # net parameters agent_args['conv_params'] = np.array([(32,8,4), (64,4,2), (64,3,1)], dtype = [('out_channels',int), ('kernel',int), ('stride',int)]) agent_args['fc_img_params'] = np.array([(512,)], dtype = [('out_dims',int)]) agent_args['fc_meas_params'] = np.array([(128,), (128,), (128,)], dtype = [('out_dims',int)]) agent_args['fc_joint_params'] = np.array([(512,), (-1,)], dtype = [('out_dims',int)]) # we put -1 here because it will be automatically replaced when creating the net agent_args['weight_decay'] = 0.00000 # optimization parameters agent_args['batch_size'] = 64 agent_args['init_learning_rate'] = 0.0001 agent_args['lr_step_size'] = 250000 agent_args['lr_decay_factor'] = 0.3 agent_args['adam_beta1'] = 0.95 agent_args['adam_epsilon'] = 1e-4 agent_args['optimizer'] = 'Adam' agent_args['reset_iter_count'] = False # directories agent_args['checkpoint_dir'] = 'checkpoints' agent_args['log_dir'] = 'logs' agent_args['init_model'] = '' agent_args['model_name'] = "predictor.model" agent_args['model_dir'] = time.strftime("%Y_%m_%d_%H_%M_%S") # logging and testing agent_args['print_err_every'] = 50 agent_args['detailed_summary_every'] = 1000 agent_args['test_pred_every'] = 0 agent_args['test_policy_every'] = 7812 agent_args['num_batches_per_pred_test'] = 0 agent_args['num_steps_per_policy_test'] = test_policy_experience_args['memory_capacity'] / simulator_args['num_simulators'] agent_args['checkpoint_every'] = 10000 agent_args['save_param_histograms_every'] = 5000 agent_args['test_policy_in_the_beginning'] = True # experiment arguments experiment_args = {} experiment_args['num_train_iterations'] = 820000 experiment_args['test_objective_coeffs_temporal'] = np.array([0., 0., 0., 0.5, 0.5, 1.]) experiment_args['test_objective_coeffs_meas'] = np.array([0.5,0.5,1.]) experiment_args['test_random_prob'] = 0. experiment_args['test_checkpoint'] = 'checkpoints/2017_04_08_10_44_20' experiment_args['test_policy_num_steps'] = 2000 experiment_args['show_predictions'] = False experiment_args['multiplayer'] = False # Create and run the experiment experiment = MultiExperiment(target_maker_args=target_maker_args, simulator_args=simulator_args, train_experience_args=train_experience_args, test_policy_experience_args=test_policy_experience_args, agent_args=agent_args, experiment_args=experiment_args) experiment.run(main_args[0])
def main(main_args): ### Set all arguments ## Target maker target_maker_args = {} target_maker_args['future_steps'] = [ 1, 2, 4, 8, 16, 32 ] # Offsets with which to predict the measurements target_maker_args['meas_to_predict'] = [ 0, 1, 2, 3 ] # Measurements that we aim to predict target_maker_args[ 'min_num_targs'] = 3 # Defines the minimum nb of available measurements needed to try evaluate a frame target_maker_args[ 'rwrd_schedule_type'] = 'exp' # Possible discount for future rewards target_maker_args['gammas'] = [] target_maker_args[ 'invalid_targets_replacement'] = 'nan' # Replacement for unavailable targets (when close to the end of experiment) ## Simulator simulator_args = {} simulator_args[ 'config'] = '../../maps/D5.cfg' # defines some parameter (available buttons, game variables etc) simulator_args['resolution'] = (84, 84 ) # Resolution used for the images (?) simulator_args[ 'frame_skip'] = 4 # Take decisions and predict every n frames (?) simulator_args['color_mode'] = 'GRAY' simulator_args['maps'] = ['MAP01'] # Map on which to play simulator_args['switch_maps'] = False #train simulator_args[ 'num_simulators'] = 8 # Number of simulations to run in parallel (?) ## Experience # Train experience train_experience_args = {} train_experience_args[ 'memory_capacity'] = 20000 # Number of observations to retain train_experience_args[ 'history_length'] = 1 # Number of frames taken as input when making a prediction train_experience_args['history_step'] = 1 train_experience_args['action_format'] = 'enumerate' train_experience_args['shared'] = False # Test prediction experience test_prediction_experience_args = train_experience_args.copy() test_prediction_experience_args['memory_capacity'] = 1 # Test policy experience test_policy_experience_args = train_experience_args.copy() test_policy_experience_args['memory_capacity'] = 55000 ## Agent agent_args = {} # agent type agent_args[ 'agent_type'] = 'advantage' # Defines the kind of network used for the prediction # preprocessing agent_args[ 'preprocess_input_images'] = lambda x: x / 255. - 0.5 # Preprocessing to apply to the images agent_args[ 'preprocess_input_measurements'] = lambda x: x / 100. - 0.5 # Preprocessing to apply to measurements targ_scale_coeffs = np.expand_dims( (np.expand_dims(np.array([30., 100., 100., 100.]), 1) * np.ones( (1, len(target_maker_args['future_steps'])))).flatten(), 0) # addapt dim to meas? # scaling at 100 works for VELOCITY meas agent_args[ 'preprocess_input_targets'] = lambda x: x / targ_scale_coeffs # targ_scale_coeffs is a simple array of 1 agent_args['postprocess_predictions'] = lambda x: x * targ_scale_coeffs # agent properties agent_args['objective_coeffs_temporal'] = [ 0., 0., 0., 0.5, 0.5, 1. ] # Multiplicative factors for rewards accross predicted time steps agent_args['objective_coeffs_meas'] = [1., 0., 0., 0.] # Weights of means for reward agent_args['random_exploration_schedule'] = lambda step: ( 0.02 + 145000. / (float(step) + 150000.)) # epsilon for epsilon-greedy policy (?) agent_args['new_memories_per_batch'] = 8 # net parameters agent_args['conv_params'] = np.array([(32, 8, 4), (64, 4, 2), (64, 3, 1)], dtype=[('out_channels', int), ('kernel', int), ('stride', int)]) agent_args['fc_img_params'] = np.array([(512, )], dtype=[('out_dims', int)]) agent_args['fc_meas_params'] = np.array([(128, ), (128, ), (128, )], dtype=[('out_dims', int)]) agent_args['fc_joint_params'] = np.array( [(512, ), (-1, )], dtype=[('out_dims', int)] ) # we put -1 here because it will be automatically replaced when creating the net agent_args['weight_decay'] = 0.00000 # optimization parameters agent_args['batch_size'] = 64 agent_args['init_learning_rate'] = 0.0001 agent_args['lr_step_size'] = 250000 agent_args['lr_decay_factor'] = 0.3 agent_args['adam_beta1'] = 0.95 agent_args['adam_epsilon'] = 1e-4 agent_args['optimizer'] = 'Adam' agent_args['reset_iter_count'] = False # directories agent_args['checkpoint_dir'] = 'checkpoints' agent_args['log_dir'] = 'logs' agent_args['init_model'] = '' agent_args['model_name'] = "predictor.model" agent_args['model_dir'] = time.strftime("%Y_%m_%d_%H_%M_%S") # logging and testing agent_args['print_err_every'] = 50 agent_args['detailed_summary_every'] = 1000 agent_args['test_pred_every'] = 0 agent_args['test_policy_every'] = 7812 agent_args['num_batches_per_pred_test'] = 0 agent_args['num_steps_per_policy_test'] = test_policy_experience_args[ 'memory_capacity'] / simulator_args['num_simulators'] agent_args['checkpoint_every'] = 10000 agent_args['save_param_histograms_every'] = 5000 agent_args['test_policy_in_the_beginning'] = True # experiment arguments experiment_args = {} experiment_args['num_train_iterations'] = 820000 experiment_args['test_objective_coeffs_temporal'] = np.array( [0., 0., 0., 0.5, 0.5, 1.]) experiment_args['test_objective_coeffs_meas'] = np.array([1., 0., 0., 0.]) experiment_args['test_random_prob'] = 0. experiment_args['test_checkpoint'] = 'checkpoints/2017_04_09_09_11_48' experiment_args['test_policy_num_steps'] = 2000 experiment_args['show_predictions'] = False experiment_args['multiplayer'] = False # Create and run the experiment experiment = MultiExperiment( target_maker_args=target_maker_args, simulator_args=simulator_args, train_experience_args=train_experience_args, test_policy_experience_args=test_policy_experience_args, agent_args=agent_args, experiment_args=experiment_args) experiment.run(main_args[0])
def main(mode, doom_config_file): ### Set all arguments ## Target maker target_maker_args = {} target_maker_args['future_steps'] = [1, 2, 4, 8, 16, 32] target_maker_args['meas_to_predict'] = [0, 1, 2] target_maker_args['min_num_targs'] = 3 target_maker_args['rwrd_schedule_type'] = 'exp' target_maker_args['gammas'] = [] target_maker_args['invalid_targets_replacement'] = 'nan' ## Simulator simulator_args = {} simulator_args[ 'config'] = '../../maps/' + doom_config_file #D3_battle.cfg' simulator_args['resolution'] = (84, 84) simulator_args[ 'frame_skip'] = 1 #4 #TODO Change back to 4 for experiements. 1 helps get nicer videos though. simulator_args['color_mode'] = 'GRAY' simulator_args['maps'] = ['MAP01'] simulator_args['switch_maps'] = False #train simulator_args[ 'num_simulators'] = 8 #TODO Set back to 8, just testing something. 8 ## Experience # Train experience train_experience_args = {} train_experience_args['memory_capacity'] = 20000 train_experience_args['history_length'] = 1 train_experience_args['history_step'] = 1 train_experience_args['action_format'] = 'enumerate' train_experience_args['shared'] = False # Test prediction experience test_prediction_experience_args = train_experience_args.copy() test_prediction_experience_args['memory_capacity'] = 1 # Test policy experience test_policy_experience_args = train_experience_args.copy() test_policy_experience_args['memory_capacity'] = 55000 ## Agent agent_args = {} # agent type agent_args['agent_type'] = 'advantage' # preprocessing agent_args['preprocess_input_images'] = lambda x: x / 255. - 0.5 agent_args['preprocess_input_measurements'] = lambda x: x / 100. - 0.5 targ_scale_coeffs = np.expand_dims( (np.expand_dims(np.array([7.5, 30., 1.]), 1) * np.ones( (1, len(target_maker_args['future_steps'])))).flatten(), 0) agent_args['preprocess_input_targets'] = lambda x: x / targ_scale_coeffs agent_args['postprocess_predictions'] = lambda x: x * targ_scale_coeffs # agent properties agent_args['objective_coeffs_temporal'] = [0., 0., 0., 0.5, 0.5, 1.] agent_args['objective_coeffs_meas'] = [ 0.5, 0.5, 1. ] #KOE: Note these are vals for training (?) for testing is below. agent_args['random_exploration_schedule'] = lambda step: ( 0.02 + 145000. / (float(step) + 150000.)) agent_args['new_memories_per_batch'] = 8 agent_args['random_objective_coeffs'] = True agent_args['objective_coeffs_distribution'] = 'uniform_pos_neg' # net parameters agent_args['conv_params'] = np.array([(32, 8, 4), (64, 4, 2), (64, 3, 1)], dtype=[('out_channels', int), ('kernel', int), ('stride', int)]) agent_args['fc_img_params'] = np.array([(512, )], dtype=[('out_dims', int)]) agent_args['fc_meas_params'] = np.array([(128, ), (128, ), (128, )], dtype=[('out_dims', int)]) agent_args['fc_obj_params'] = np.array([(128, ), (128, ), (128, )], dtype=[('out_dims', int)]) agent_args['fc_joint_params'] = np.array( [(512, ), (-1, )], dtype=[('out_dims', int)] ) # we put -1 here because it will be automatically replaced when creating the net agent_args['weight_decay'] = 0.00000 # optimization parameters agent_args['batch_size'] = 64 agent_args['init_learning_rate'] = 0.0001 agent_args['lr_step_size'] = 250000 agent_args['lr_decay_factor'] = 0.3 agent_args['adam_beta1'] = 0.95 agent_args['adam_epsilon'] = 1e-4 agent_args['optimizer'] = 'Adam' agent_args['reset_iter_count'] = False # directories agent_args['checkpoint_dir'] = 'checkpoints' agent_args['log_dir'] = 'logs' agent_args['init_model'] = '' agent_args['model_name'] = "predictor.model" agent_args['model_dir'] = time.strftime("%Y_%m_%d_%H_%M_%S") # logging and testing agent_args['print_err_every'] = 50 agent_args['detailed_summary_every'] = 1000 agent_args['test_pred_every'] = 0 agent_args['test_policy_every'] = 7812 agent_args['num_batches_per_pred_test'] = 0 agent_args['num_steps_per_policy_test'] = test_policy_experience_args[ 'memory_capacity'] / simulator_args['num_simulators'] agent_args['checkpoint_every'] = 10000 agent_args['save_param_histograms_every'] = 5000 agent_args['test_policy_in_the_beginning'] = True # experiment arguments experiment_args = {} experiment_args['num_train_iterations'] = 820000 # KOE: This defines the weights and temporal weights of objectives. experiment_args['test_objective_coeffs_temporal'] = np.array( [0., 0., 0., 0.5, 0.5, 1.]) experiment_args['test_objective_coeffs_meas'] = np.array([0.5, 0.5, 1.]) #KOETODO Revert to old values above. #Health, ammo, frags #experiment_args['test_objective_coeffs_meas'] = np.array([-1,-1,-1]) KOE Opposite objectives, just for testing. experiment_args['test_random_prob'] = 0. experiment_args[ 'test_checkpoint'] = 'checkpoints/2017_04_09_09_13_17' #KOE: This defines the weights to load experiment_args[ 'test_policy_num_steps'] = 2000 #KOE: How many steps to run the test agent. experiment_args['show_predictions'] = False experiment_args['multiplayer'] = False # Create and run the experiment experiment = MultiExperiment( target_maker_args=target_maker_args, simulator_args=simulator_args, train_experience_args=train_experience_args, test_policy_experience_args=test_policy_experience_args, agent_args=agent_args, experiment_args=experiment_args) return experiment.run(mode)
def main(main_args): ### Set all arguments ## Target maker target_maker_args = {} target_maker_args['future_steps'] = [1, 2, 4, 8, 16, 32] target_maker_args['meas_to_predict'] = [0] target_maker_args['min_num_targs'] = 3 target_maker_args['rwrd_schedule_type'] = 'exp' target_maker_args['gammas'] = [] target_maker_args['invalid_targets_replacement'] = 'nan' ## Simulator simulator_args = {} simulator_args['config'] = '../../maps/D1_basic.cfg' simulator_args['resolution'] = (84, 84) simulator_args['frame_skip'] = 4 simulator_args['color_mode'] = 'GRAY' # simulator_args['color_mode'] = 'RGB' simulator_args['maps'] = ['MAP01'] simulator_args['switch_maps'] = False #train simulator_args['num_simulators'] = 8 ## Experience # Train experience train_experience_args = {} train_experience_args['memory_capacity'] = 20000 train_experience_args['history_length'] = 1 train_experience_args['history_step'] = 1 train_experience_args['action_format'] = 'enumerate' train_experience_args['shared'] = False # Test prediction experience test_prediction_experience_args = train_experience_args.copy() test_prediction_experience_args['memory_capacity'] = 1 # Test policy experience test_policy_experience_args = train_experience_args.copy() test_policy_experience_args['memory_capacity'] = 55000 ## Agent agent_args = {} # agent type # agent_args['agent_type'] = 'advantage' # agent_args['agent_type'] = 'advantage_objects' agent_args['agent_type'] = 'advantage_objects_depth' # agent_args['agent_type'] = 'advantage_objects_no_image' # preprocessing agent_args['preprocess_input_images'] = lambda x: x / 255. - 0.5 agent_args['preprocess_input_measurements'] = lambda x: x / 100. - 0.5 targ_scale_coeffs = np.expand_dims( (np.expand_dims(np.array([30.]), 1) * np.ones( (1, len(target_maker_args['future_steps'])))).flatten(), 0) agent_args['preprocess_input_targets'] = lambda x: x / targ_scale_coeffs agent_args['postprocess_predictions'] = lambda x: x * targ_scale_coeffs # agent properties agent_args['objective_coeffs_temporal'] = [0., 0., 0., 0.5, 0.5, 1.] agent_args['objective_coeffs_meas'] = [1.] agent_args['random_exploration_schedule'] = lambda step: ( 0.02 + 145000. / (float(step) + 150000.)) agent_args['new_memories_per_batch'] = 8 # net parameters agent_args['conv_params'] = np.array([(32, 8, 4), (64, 4, 2), (64, 3, 1)], dtype=[('out_channels', int), ('kernel', int), ('stride', int)]) agent_args['fc_img_params'] = np.array([(512, )], dtype=[('out_dims', int)]) agent_args['fc_meas_params'] = np.array([(128, ), (128, ), (128, )], dtype=[('out_dims', int)]) agent_args['fc_objects_params'] = np.array([(128, ), (128, ), (128, )], dtype=[('out_dims', int)]) agent_args['fc_joint_params'] = np.array( [(512, ), (-1, )], dtype=[('out_dims', int)] ) # we put -1 here because it will be automatically replaced when creating the net agent_args['weight_decay'] = 0.00000 # optimization parameters agent_args['batch_size'] = 64 agent_args['init_learning_rate'] = 0.0001 agent_args['lr_step_size'] = 250000 agent_args['lr_decay_factor'] = 0.3 agent_args['adam_beta1'] = 0.95 agent_args['adam_epsilon'] = 1e-4 agent_args['optimizer'] = 'Adam' agent_args['reset_iter_count'] = False # directories agent_args['checkpoint_dir'] = 'checkpoints' agent_args['log_dir'] = 'logs' agent_args['init_model'] = '' agent_args['model_name'] = "predictor.model" agent_args['model_dir'] = time.strftime("%Y_%m_%d_%H_%M_%S") # logging and testing agent_args['print_err_every'] = 50 agent_args['detailed_summary_every'] = 1000 agent_args['test_pred_every'] = 0 agent_args['test_policy_every'] = 7812 agent_args['num_batches_per_pred_test'] = 0 agent_args['num_steps_per_policy_test'] = test_policy_experience_args[ 'memory_capacity'] / simulator_args['num_simulators'] agent_args['checkpoint_every'] = 10000 agent_args['save_param_histograms_every'] = 5000 agent_args['test_policy_in_the_beginning'] = True # experiment arguments experiment_args = {} experiment_args['num_train_iterations'] = 820000 experiment_args['test_objective_coeffs_temporal'] = np.array( [0., 0., 0., 0.5, 0.5, 1.]) experiment_args['test_objective_coeffs_meas'] = np.array([1.]) experiment_args['test_random_prob'] = 0. experiment_args['test_checkpoint'] = 'checkpoints/2017_04_09_09_07_45' experiment_args['test_policy_num_steps'] = 2000 experiment_args['show_predictions'] = False experiment_args['multiplayer'] = False # Create and run the experiment experiment = MultiExperiment( target_maker_args=target_maker_args, simulator_args=simulator_args, train_experience_args=train_experience_args, test_policy_experience_args=test_policy_experience_args, agent_args=agent_args, experiment_args=experiment_args) experiment.run(main_args[0])
def main(main_args): ### Set all arguments ## Target maker target_maker_args = {} target_maker_args['future_steps'] = [1, 2, 4, 8, 16, 32] target_maker_args['meas_to_predict'] = [0, 2] target_maker_args['min_num_targs'] = 3 target_maker_args['rwrd_schedule_type'] = 'exp' target_maker_args['gammas'] = [] target_maker_args['invalid_targets_replacement'] = 'nan' ## Simulator simulator_args = {} simulator_args['config'] = '../../maps/cartpole.cfg' simulator_args['resolution'] = (84, 84) simulator_args['frame_skip'] = 0 simulator_args['env_name'] = 'CartPole-v1' simulator_args['color_mode'] = 'RGB' simulator_args['environnement'] = 'gym' simulator_args['num_meas'] = 4 simulator_args['gym'] = True #train simulator_args['num_simulators'] = 8 ## Experience # Train experience train_experience_args = {} train_experience_args['memory_capacity'] = 20000 train_experience_args['history_length'] = 3 train_experience_args['history_step'] = 1 train_experience_args['action_format'] = 'enumerate' train_experience_args['shared'] = False # Test prediction experience test_prediction_experience_args = train_experience_args.copy() test_prediction_experience_args['memory_capacity'] = 1 # Test policy experience test_policy_experience_args = train_experience_args.copy() test_policy_experience_args['memory_capacity'] = 3000 ## Agent agent_args = {} # agent type agent_args['agent_type'] = 'advantage' # preprocessing agent_args['preprocess_input_images'] = lambda x: x / 255. - 0.5 agent_args['preprocess_input_measurements'] = lambda x: x targ_scale_coeffs = np.expand_dims( (np.expand_dims(np.array([1, 12.]), 1) * np.ones( (1, len(target_maker_args['future_steps'])))).flatten(), 0) agent_args['preprocess_input_targets'] = lambda x: x / targ_scale_coeffs agent_args['postprocess_predictions'] = lambda x: x * targ_scale_coeffs agent_args['discrete_controls_manual'] = [] agent_args['opposite_button_pairs'] = [[0, 1]] # agent properties agent_args['objective_coeffs_temporal'] = [0.1, 0.1, 0.1, 1, 1, 1] agent_args['objective_coeffs_meas'] = [ -0.1, -1 ] # position and angle pos (-4.8, 4.8) , angle (-24, 24 ) def f1(x): return x**2 def f2(x): return np.abs(x) agent_args['objective_function'] = [f1, f2] # do not deviate from center agent_args['random_exploration_schedule'] = lambda step: (0.02 + 14500. / ( float(step) + 15000.)) agent_args['new_memories_per_batch'] = 8 agent_args['gym'] = True # in gym only one button is allowed # net parameters agent_args['conv_params'] = np.array([(32, 8, 4), (64, 4, 2), (64, 3, 1)], dtype=[('out_channels', int), ('kernel', int), ('stride', int)]) agent_args['fc_img_params'] = np.array([(512, )], dtype=[('out_dims', int)]) agent_args['fc_meas_params'] = np.array([(128, ), (128, ), (128, )], dtype=[('out_dims', int)]) agent_args['fc_joint_params'] = np.array( [(512, ), (-1, )], dtype=[('out_dims', int)] ) # we put -1 here because it will be automatically replaced when creating the net agent_args['weight_decay'] = 0.00001 # optimization parameters agent_args['batch_size'] = 64 agent_args['init_learning_rate'] = 0.0001 agent_args['lr_step_size'] = 250000 agent_args['lr_decay_factor'] = 0.3 agent_args['adam_beta1'] = 0.95 agent_args['adam_epsilon'] = 1e-4 agent_args['optimizer'] = 'Adam' agent_args['reset_iter_count'] = False # directories agent_args['checkpoint_dir'] = 'checkpoints' agent_args['log_dir'] = 'logs' agent_args['init_model'] = '' agent_args['model_name'] = "predictor.model" agent_args['model_dir'] = time.strftime("%Y_%m_%d_%H_%M_%S") # logging and testing agent_args['print_err_every'] = 50 agent_args['detailed_summary_every'] = 1000 agent_args['test_pred_every'] = 0 agent_args['test_policy_every'] = 1000 agent_args['num_batches_per_pred_test'] = 0 agent_args['num_steps_per_policy_test'] = test_policy_experience_args[ 'memory_capacity'] / simulator_args['num_simulators'] agent_args['checkpoint_every'] = 10000 agent_args['save_param_histograms_every'] = 5000 agent_args['test_policy_in_the_beginning'] = True # experiment arguments experiment_args = {} experiment_args['num_train_iterations'] = 820000 experiment_args['test_objective_coeffs_temporal'] = np.array( [0.1, 0.1, 0.1, 1, 1, 1]) experiment_args['test_objective_coeffs_meas'] = np.array([-0.1, -1]) experiment_args['test_random_prob'] = 0. experiment_args['test_checkpoint'] = 'checkpoints/2020_01_11_23_06_26' experiment_args['test_policy_num_steps'] = 2000 experiment_args['show_predictions'] = False experiment_args['multiplayer'] = False # Create and run the experiment experiment = MultiExperiment( target_maker_args=target_maker_args, simulator_args=simulator_args, train_experience_args=train_experience_args, test_policy_experience_args=test_policy_experience_args, agent_args=agent_args, experiment_args=experiment_args) experiment.run(main_args[0])