memory_size=100000, e_greedy_increment=0.0001, learning_rate=0.0025, double_q=True, dqn_mode=True, soft_q_type='boltzmann', beta_schedule=[[400000//hp.steps_between_learnings, 1], [700000//hp.steps_between_learnings, 10]], arch='conv_saccades_v1', n_modulating_features=hp.drift_state_size ) # at this point drift network is a standalone network taken from some external source (e.g. pretrained) # in future it will be an action generating network from the drift loop # drift_net = Stand_alone_net(16*16,10,arch='mlp', layer_size = [None]+[100]+[100]+[ None]) drift_net = Stand_alone_net([32,32,1],10,arch='conv', layer_size = [None]+[[3,3,32]]+[[2,2,16]]+[200]+[ None], loss_type='softmax_cross_entropy', trainable=True, lr=0.0005, lambda_reg=0.0) #simple cifar10 classifier drift_net.assign_session_to_nwk(saccade_RL.dqn.sess) saccade_RL.dqn.sess.run(tf.global_variables_initializer()) saccade_RL.dqn.reset() if not(hp.drift_initial_network is None): drift_net.load_nwk_param(hp.drift_initial_network) if not(hp.dqn_initial_network is None): saccade_RL.dqn.load_nwk_param(hp.dqn_initial_network) # hp.scene = scene.hp print('debug hp',sensor.hp.centralwinx,sensor.hp.centralwiny) hp.sensor = sensor.hp hp.saccade_agent = saccade_agent.hp hp.reward = reward.hp hp.saccade_RL = saccade_RL.hp
double_q=True, dqn_mode=True, soft_q_type='boltzmann', beta_schedule=[[400000 // hp.steps_between_learnings, 1], [700000 // hp.steps_between_learnings, 10]], arch='conv_saccades_v1', n_modulating_features=hp.drift_state_size) # at this point drift network is a standalone network taken from some external source (e.g. pretrained) # in future it will be an action generating network from the drift loop # drift_net = Stand_alone_net(16*16,10,arch='mlp', layer_size = [None]+[100]+[100]+[ None]) drift_net = Stand_alone_net( [32, 32, 3], 10, arch='conv', layer_size=[None] + [[5, 5, 96], [5, 5, 80], [5, 5, 64], [5, 5, 64]] + [200] + [None], loss_type='softmax_cross_entropy', trainable=True, lr=1, dropout_p_keep=0.99, lambda_reg=0.0) drift_net.assign_session_to_nwk(saccade_RL.dqn.sess) saccade_RL.dqn.sess.run(tf.global_variables_initializer()) saccade_RL.dqn.reset() if not (hp.drift_initial_network is None): drift_net.load_nwk_param(hp.drift_initial_network) if not (hp.dqn_initial_network is None): saccade_RL.dqn.load_nwk_param(hp.dqn_initial_network) # hp.scene = scene.hp print('debug hp', sensor.hp.centralwinx, sensor.hp.centralwiny) hp.sensor = sensor.hp
reward_decay=0.99, replace_target_iter=10, memory_size=100000, e_greedy_increment=0.0001, learning_rate=0.0025, double_q=True, dqn_mode=True, soft_q_type='boltzmann', beta_schedule=[[4000, 1], [7000, 10]], arch='conv_saccades_v1') # at this point drift network is a standalone network taken from some external source (e.g. pretrained) # in future it will be an action generating network from the drift loop # drift_net = Stand_alone_net(16*16,10,arch='mlp', layer_size = [None]+[100]+[100]+[ None]) drift_net = Stand_alone_net(16 * 16, 16 * 16, arch='mlp', layer_size=[None] + [100] + [100] + [None]) #ae drift_net.assign_session_to_nwk(saccade_RL.dqn.sess) saccade_RL.dqn.sess.run(tf.global_variables_initializer()) saccade_RL.dqn.reset() drift_net.load_nwk_param(hp.drift_initial_network) if not (hp.dqn_initial_network is None): saccade_RL.dqn.load_nwk_param(hp.dqn_initial_network) # hp.scene = scene.hp hp.sensor = sensor.hp hp.saccade_agent = saccade_agent.hp hp.reward = reward.hp hp.saccade_RL = saccade_RL.hp deploy_logs() with open(hp.this_run_path + hp_file, 'wb') as f:
e_greedy_increment=0.0001, learning_rate=0.0025, double_q=True, dqn_mode=True, soft_q_type='boltzmann', beta_schedule=[[400000 // hp.steps_between_learnings, 1], [700000 // hp.steps_between_learnings, 10]], arch='conv_saccades_v1', n_modulating_features=16) # at this point drift network is a standalone network taken from some external source (e.g. pretrained) # in future it will be an action generating network from the drift loop # drift_net = Stand_alone_net(16*16,10,arch='mlp', layer_size = [None]+[100]+[100]+[ None]) drift_net = Stand_alone_net(16 * 16, 16 * 16, arch='mlp', layer_size=[None] + [100, 16, 100] + [None], loss_type='mean_squared', trainable=True, lr=0.0005, lambda_reg=0.0) #ae drift_net.assign_session_to_nwk(saccade_RL.dqn.sess) saccade_RL.dqn.sess.run(tf.global_variables_initializer()) saccade_RL.dqn.reset() if not (hp.drift_initial_network is None): drift_net.load_nwk_param(hp.drift_initial_network) if not (hp.dqn_initial_network is None): saccade_RL.dqn.load_nwk_param(hp.dqn_initial_network) # hp.scene = scene.hp hp.sensor = sensor.hp hp.saccade_agent = saccade_agent.hp hp.reward = reward.hp hp.saccade_RL = saccade_RL.hp