# observation_size = sensor.hp.winx*sensor.hp.winy*2
    observation_size = 530#2*(hp.resolution//2)**2+2

    rising_beta_schedule = [[hp.beta_t1 // hp.steps_between_learnings, hp.beta_b1], [hp.beta_t2 // hp.steps_between_learnings, hp.beta_b2]]
    flat_beta_schedule = [[hp.beta_t1 // hp.steps_between_learnings, hp.beta_b2], [hp.beta_t2 // hp.steps_between_learnings, hp.beta_b2]]

    # rising_beta_schedule = [[400000 // hp.steps_between_learnings, 0.1], [700000 // hp.steps_between_learnings, 1]]
    # flat_beta_schedule = [[400000 // hp.steps_between_learnings, 1.0], [700000 // hp.steps_between_learnings, 1]]

    RL = DeepQNetwork(len(agent.hp.action_space), observation_size*hp.mem_depth,#sensor.frame_size+2,
                      reward_decay=0.99,
                      e_greedy=0.95,
                      e_greedy0=0.8,
                      replace_target_iter=10,
                      memory_size=100000,
                      e_greedy_increment=0.0001,
                      learning_rate=hp.syclop_learning_rate,
                      double_q=True,
                      dqn_mode=True,
                      state_table=np.zeros([1,observation_size*hp.mem_depth]),
                      soft_q_type='boltzmann',
                      beta_schedule=rising_beta_schedule if hp.rising_beta_schedule else flat_beta_schedule,
                      arch='mlp')
    keras.backend.set_session(RL.dqn.sess)
    if hp.decoder_initial_network is None:
        decoder = rnn_model_102(lr=hp.decoder_learning_rate,ignore_input_B=hp.decoder_ignore_position,dropout=hp.decoder_dropout,rnn_type=hp.decoder_rnn_type,
                                input_size=(hp.resolution//2,hp.resolution//2, 1))
    else:
        decoder = keras.models.load_model(hp.decoder_initial_network) #for example: 'ref_nets/keras_decoder_5stp_101.model'
        keras.backend.set_value(decoder.optimizer.lr, hp.decoder_learning_rate)
    if not(hp.dqn_initial_network is None):
        RL.dqn.load_nwk_param(hp.dqn_initial_network)
    reward = syc.Rewards(
        reward_types=['central_rms_intensity', 'speed', 'saccade'],
        relative_weights=[1.0, -float(sys.argv[1]), 0])
    # observation_size = sensor.hp.winx*sensor.hp.winy*2
    observation_size = 64 * 64 + 2
    RL = DeepQNetwork(len(agent.hp.action_space),
                      observation_size,
                      n_features_shaped=list(np.shape(sensor.dvs_view)) + [1],
                      shape_fun=None,
                      reward_decay=0.99,
                      e_greedy=0.95,
                      e_greedy0=0.8,
                      replace_target_iter=10,
                      memory_size=100000,
                      e_greedy_increment=0.0001,
                      learning_rate=0.0025,
                      double_q=True,
                      dqn_mode=True,
                      state_table=np.zeros(
                          [1, observation_size * hp.mem_depth]),
                      soft_q_type='boltzmann',
                      beta=0.1,
                      beta_schedule=[[4000, 0.1], [7000, 1.0]],
                      arch='conv_ctrl')
    # RL.dqn.load_nwk_param('tempX_1.nwk')
    # RL.dqn.save_nwk_param('liron_encircle.nwk')
    if not (hp.dqn_initial_network is None):
        RL.dqn.load_nwk_param(hp.dqn_initial_network)
    hp.scene = scene.hp
    hp.sensor = sensor.hp
        max_q=[scene.maxx - sensor.hp.winx, scene.maxy - sensor.hp.winy])

    reward = syc.Rewards(
        reward_types=['central_rms_intensity', 'speed', 'saccade'],
        relative_weights=[1.0, hp.speed_penalty, -200])
    # observation_size = sensor.hp.winx*sensor.hp.winy*2
    observation_size = 256 * 4
    RL = DeepQNetwork(
        len(agent.hp.action_space),
        observation_size * hp.mem_depth,  #sensor.frame_size+2,
        reward_decay=0.99,
        e_greedy=0.95,
        e_greedy0=0.8,
        replace_target_iter=10,
        memory_size=100000,
        e_greedy_increment=0.0001,
        learning_rate=0.0025,
        double_q=True,
        dqn_mode=True,
        state_table=np.zeros([1, observation_size * hp.mem_depth]),
        soft_q_type='boltzmann',
        beta_schedule=[[400000 // hp.steps_between_learnings, 0.1],
                       [700000 // hp.steps_between_learnings, 1]],
        arch='mlp')

    if not (hp.dqn_initial_network is None):
        RL.dqn.load_nwk_param(hp.dqn_initial_network)
    hp.scene = scene.hp
    hp.sensor = sensor.hp
    hp.agent = agent.hp
    hp.reward = reward.hp