Пример #1
0
                   memory_size=100000,
                   e_greedy_increment=0.0001,
                   learning_rate=0.0025,
                   double_q=True,
                   dqn_mode=True,
                   soft_q_type='boltzmann',
                   beta_schedule=[[400000//hp.steps_between_learnings, 1], [700000//hp.steps_between_learnings, 10]],
                   arch='conv_saccades_v1',
                 n_modulating_features=hp.drift_state_size
                   )
 # at this point drift network is a standalone network taken from some external source (e.g. pretrained)
 # in future it will be an action generating network from the drift loop
 # drift_net = Stand_alone_net(16*16,10,arch='mlp', layer_size = [None]+[100]+[100]+[ None])
 drift_net = Stand_alone_net([32,32,1],10,arch='conv',
                                 layer_size = [None]+[[3,3,32]]+[[2,2,16]]+[200]+[ None],
                                 loss_type='softmax_cross_entropy',
                                 trainable=True,
                                 lr=0.0005,
                                 lambda_reg=0.0) #simple cifar10 classifier
 drift_net.assign_session_to_nwk(saccade_RL.dqn.sess)
 saccade_RL.dqn.sess.run(tf.global_variables_initializer())
 saccade_RL.dqn.reset()
 if not(hp.drift_initial_network is None):
     drift_net.load_nwk_param(hp.drift_initial_network)
 if not(hp.dqn_initial_network is None):
     saccade_RL.dqn.load_nwk_param(hp.dqn_initial_network)
 # hp.scene = scene.hp
 print('debug hp',sensor.hp.centralwinx,sensor.hp.centralwiny)
 hp.sensor = sensor.hp
 hp.saccade_agent = saccade_agent.hp
 hp.reward = reward.hp
 hp.saccade_RL = saccade_RL.hp
Пример #2
0
     double_q=True,
     dqn_mode=True,
     soft_q_type='boltzmann',
     beta_schedule=[[400000 // hp.steps_between_learnings, 1],
                    [700000 // hp.steps_between_learnings, 10]],
     arch='conv_saccades_v1',
     n_modulating_features=hp.drift_state_size)
 # at this point drift network is a standalone network taken from some external source (e.g. pretrained)
 # in future it will be an action generating network from the drift loop
 # drift_net = Stand_alone_net(16*16,10,arch='mlp', layer_size = [None]+[100]+[100]+[ None])
 drift_net = Stand_alone_net(
     [32, 32, 3],
     10,
     arch='conv',
     layer_size=[None] + [[5, 5, 96], [5, 5, 80], [5, 5, 64], [5, 5, 64]] +
     [200] + [None],
     loss_type='softmax_cross_entropy',
     trainable=True,
     lr=1,
     dropout_p_keep=0.99,
     lambda_reg=0.0)
 drift_net.assign_session_to_nwk(saccade_RL.dqn.sess)
 saccade_RL.dqn.sess.run(tf.global_variables_initializer())
 saccade_RL.dqn.reset()
 if not (hp.drift_initial_network is None):
     drift_net.load_nwk_param(hp.drift_initial_network)
 if not (hp.dqn_initial_network is None):
     saccade_RL.dqn.load_nwk_param(hp.dqn_initial_network)
 # hp.scene = scene.hp
 print('debug hp', sensor.hp.centralwinx, sensor.hp.centralwiny)
 hp.sensor = sensor.hp
Пример #3
0
     reward_decay=0.99,
     replace_target_iter=10,
     memory_size=100000,
     e_greedy_increment=0.0001,
     learning_rate=0.0025,
     double_q=True,
     dqn_mode=True,
     soft_q_type='boltzmann',
     beta_schedule=[[4000, 1], [7000, 10]],
     arch='conv_saccades_v1')
 # at this point drift network is a standalone network taken from some external source (e.g. pretrained)
 # in future it will be an action generating network from the drift loop
 # drift_net = Stand_alone_net(16*16,10,arch='mlp', layer_size = [None]+[100]+[100]+[ None])
 drift_net = Stand_alone_net(16 * 16,
                             16 * 16,
                             arch='mlp',
                             layer_size=[None] + [100] + [100] +
                             [None])  #ae
 drift_net.assign_session_to_nwk(saccade_RL.dqn.sess)
 saccade_RL.dqn.sess.run(tf.global_variables_initializer())
 saccade_RL.dqn.reset()
 drift_net.load_nwk_param(hp.drift_initial_network)
 if not (hp.dqn_initial_network is None):
     saccade_RL.dqn.load_nwk_param(hp.dqn_initial_network)
 # hp.scene = scene.hp
 hp.sensor = sensor.hp
 hp.saccade_agent = saccade_agent.hp
 hp.reward = reward.hp
 hp.saccade_RL = saccade_RL.hp
 deploy_logs()
 with open(hp.this_run_path + hp_file, 'wb') as f:
Пример #4
0
     e_greedy_increment=0.0001,
     learning_rate=0.0025,
     double_q=True,
     dqn_mode=True,
     soft_q_type='boltzmann',
     beta_schedule=[[400000 // hp.steps_between_learnings, 1],
                    [700000 // hp.steps_between_learnings, 10]],
     arch='conv_saccades_v1',
     n_modulating_features=16)
 # at this point drift network is a standalone network taken from some external source (e.g. pretrained)
 # in future it will be an action generating network from the drift loop
 # drift_net = Stand_alone_net(16*16,10,arch='mlp', layer_size = [None]+[100]+[100]+[ None])
 drift_net = Stand_alone_net(16 * 16,
                             16 * 16,
                             arch='mlp',
                             layer_size=[None] + [100, 16, 100] + [None],
                             loss_type='mean_squared',
                             trainable=True,
                             lr=0.0005,
                             lambda_reg=0.0)  #ae
 drift_net.assign_session_to_nwk(saccade_RL.dqn.sess)
 saccade_RL.dqn.sess.run(tf.global_variables_initializer())
 saccade_RL.dqn.reset()
 if not (hp.drift_initial_network is None):
     drift_net.load_nwk_param(hp.drift_initial_network)
 if not (hp.dqn_initial_network is None):
     saccade_RL.dqn.load_nwk_param(hp.dqn_initial_network)
 # hp.scene = scene.hp
 hp.sensor = sensor.hp
 hp.saccade_agent = saccade_agent.hp
 hp.reward = reward.hp
 hp.saccade_RL = saccade_RL.hp