# observation_size = sensor.hp.winx*sensor.hp.winy*2 observation_size = 530#2*(hp.resolution//2)**2+2 rising_beta_schedule = [[hp.beta_t1 // hp.steps_between_learnings, hp.beta_b1], [hp.beta_t2 // hp.steps_between_learnings, hp.beta_b2]] flat_beta_schedule = [[hp.beta_t1 // hp.steps_between_learnings, hp.beta_b2], [hp.beta_t2 // hp.steps_between_learnings, hp.beta_b2]] # rising_beta_schedule = [[400000 // hp.steps_between_learnings, 0.1], [700000 // hp.steps_between_learnings, 1]] # flat_beta_schedule = [[400000 // hp.steps_between_learnings, 1.0], [700000 // hp.steps_between_learnings, 1]] RL = DeepQNetwork(len(agent.hp.action_space), observation_size*hp.mem_depth,#sensor.frame_size+2, reward_decay=0.99, e_greedy=0.95, e_greedy0=0.8, replace_target_iter=10, memory_size=100000, e_greedy_increment=0.0001, learning_rate=hp.syclop_learning_rate, double_q=True, dqn_mode=True, state_table=np.zeros([1,observation_size*hp.mem_depth]), soft_q_type='boltzmann', beta_schedule=rising_beta_schedule if hp.rising_beta_schedule else flat_beta_schedule, arch='mlp') keras.backend.set_session(RL.dqn.sess) if hp.decoder_initial_network is None: decoder = rnn_model_102(lr=hp.decoder_learning_rate,ignore_input_B=hp.decoder_ignore_position,dropout=hp.decoder_dropout,rnn_type=hp.decoder_rnn_type, input_size=(hp.resolution//2,hp.resolution//2, 1)) else: decoder = keras.models.load_model(hp.decoder_initial_network) #for example: 'ref_nets/keras_decoder_5stp_101.model' keras.backend.set_value(decoder.optimizer.lr, hp.decoder_learning_rate) if not(hp.dqn_initial_network is None): RL.dqn.load_nwk_param(hp.dqn_initial_network)
reward = syc.Rewards( reward_types=['central_rms_intensity', 'speed', 'saccade'], relative_weights=[1.0, -float(sys.argv[1]), 0]) # observation_size = sensor.hp.winx*sensor.hp.winy*2 observation_size = 64 * 64 + 2 RL = DeepQNetwork(len(agent.hp.action_space), observation_size, n_features_shaped=list(np.shape(sensor.dvs_view)) + [1], shape_fun=None, reward_decay=0.99, e_greedy=0.95, e_greedy0=0.8, replace_target_iter=10, memory_size=100000, e_greedy_increment=0.0001, learning_rate=0.0025, double_q=True, dqn_mode=True, state_table=np.zeros( [1, observation_size * hp.mem_depth]), soft_q_type='boltzmann', beta=0.1, beta_schedule=[[4000, 0.1], [7000, 1.0]], arch='conv_ctrl') # RL.dqn.load_nwk_param('tempX_1.nwk') # RL.dqn.save_nwk_param('liron_encircle.nwk') if not (hp.dqn_initial_network is None): RL.dqn.load_nwk_param(hp.dqn_initial_network) hp.scene = scene.hp hp.sensor = sensor.hp
max_q=[scene.maxx - sensor.hp.winx, scene.maxy - sensor.hp.winy]) reward = syc.Rewards( reward_types=['central_rms_intensity', 'speed', 'saccade'], relative_weights=[1.0, hp.speed_penalty, -200]) # observation_size = sensor.hp.winx*sensor.hp.winy*2 observation_size = 256 * 4 RL = DeepQNetwork( len(agent.hp.action_space), observation_size * hp.mem_depth, #sensor.frame_size+2, reward_decay=0.99, e_greedy=0.95, e_greedy0=0.8, replace_target_iter=10, memory_size=100000, e_greedy_increment=0.0001, learning_rate=0.0025, double_q=True, dqn_mode=True, state_table=np.zeros([1, observation_size * hp.mem_depth]), soft_q_type='boltzmann', beta_schedule=[[400000 // hp.steps_between_learnings, 0.1], [700000 // hp.steps_between_learnings, 1]], arch='mlp') if not (hp.dqn_initial_network is None): RL.dqn.load_nwk_param(hp.dqn_initial_network) hp.scene = scene.hp hp.sensor = sensor.hp hp.agent = agent.hp hp.reward = reward.hp