import sys, os sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) from DQN.DQN_model import Agent_DQN from common.utils import train, test, save_train_res, load_train_res, train_results_plots import numpy as np env = gym.make('12_AX-v0', size=10, prob_target=0.5) N_tr = 2000 N_tst = 1000 BATCH_SIZE = 32 LR = 0.001 # learning rate DECAY = 0.001 EPSILON = 0.2 # greedy policy GAMMA = 0.9 # reward discount TARGET_REPLACE_ITER = 200 # target update frequency MEMORY_CAPACITY = 5000 N_ACTIONS = env.action_space.n N_STATES = 1 S_FOR_DONE = 0.0 agent = Agent_DQN(env.observation_space.n, env.action_space.n, MEMORY_CAPACITY, N_STATES, LR, EPSILON, N_ACTIONS, TARGET_REPLACE_ITER, BATCH_SIZE, GAMMA, DECAY, S_FOR_DONE) # train(env, agent, N_tr, seed=123) # test(env, agent, N_tst, seed=123) res=train(env,agent,N_tr,seed=123) # save the training records, including every episode's reward, action accuracy and f1 over iteration save_train_res('./save/12_ax/DQN',res) test(env,agent,N_tst,seed=123)
lamb = 0.2 # synaptic tag decay beta = 0.15 # weight update coefficient discount = 0.9 # discount rate for future rewards alpha = 1-lamb*discount # synaptic permanence eps = 0.025 # percentage of softmax modality for activity selection leak = 1.0 # additional parameter: leaking decay of the integrative memory g = 1 # reward settings rew = 'RL' prop = 'std' policy_train = 'softmax' policy_test = 'greedy' stoc_train = 'soft' stoc_test = 'soft' t_weighted_train = True t_weighted_test = True e_weighted = False first_flag = False reset_tags_seq = False agent = Agent_AuGMEnT(env.observation_space.n, R, M, env.action_space.n, alpha, beta, discount, eps, g, leak, rew, prop, policy_train, policy_test, stoc_train, stoc_test, t_weighted_train, t_weighted_test, e_weighted, first_flag, reset_tags_seq) # train(env, agent, N_tr, seed=123) # test(env, agent, N_tst, seed=123) res=train(env,agent,N_tr,seed=123) # save the training records, including every episode's reward, action accuracy and f1 over iteration save_train_res('save/saccade/AuGMEnT_reward_7',res) test(env,agent,N_tst,seed=123)
import gym import gym_cog_ml_tasks from LSTM.LSTM_model import Agent_LSTM from common.utils import train, test, save_train_res, load_train_res, train_results_plots env = gym.make('12AX_CPT-v0', size=200) N_tr = 5000 N_tst = 500 n_hidden = 50 lr = 0.01 agent = Agent_LSTM(env.observation_space.n, env.action_space.n, n_hidden, lr) res = train(env, agent, N_tr, print_progress=True, seed=123) save_train_res('./save/ax_cpt/LSTM_50_res', res) train_results_plots(dir='./save/ax_cpt', figname='LSTM_50', names=['LSTM_50'], numbers=[res])
import gym import gym_cog_ml_tasks import sys, os sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) from MonteCarlo.MonteCarlo_model import Agent_MC from common.utils import train, test, save_train_res, train_results_plots env = gym.make('seq_prediction-v0', size=50, p=0.5) N_tr = 80000 N_tst = 1000 agent = Agent_MC(env.observation_space.n, env.action_space.n) res = train(env, agent, N_tr, print_progress=True, seed=123) test(env, agent, N_tst, print_progress=True, seed=123) save_train_res( './agents/cog_tasks_rl_agents/MonteCarlo/save/seq_pred/MC_50_.5', res) train_results_plots( dir='./agents/cog_tasks_rl_agents/MonteCarlo/save/seq_pred', figname='MC_50_.5', names=['MC_50_.5'], numbers=[res])
import gym import gym_cog_ml_tasks from LSTM.LSTM_model import Agent_LSTM from common.utils import train, test, save_train_res import torch torch.manual_seed(123) env = gym.make('12_AX_CPT-v0', size=100, prob_target=0.5, prob_12=0.1) N_tr = 5000 N_tst = 1000 n_hidden = 20 n_layers = 2 lr = 0.01 agent = Agent_LSTM(env.observation_space.n, env.action_space.n, n_hidden, lr, n_layers) res = train(env, agent, N_tr, seed=123) # save the training records, including every episode's reward, action accuracy and f1 over iteration save_train_res('./save/12_ax_cpt/LSTM', res) test(env, agent, N_tst, seed=123)
import gym import gym_cog_ml_tasks from DRQN.DRQN_agent import Agent_DRQN from common.utils import train, test, save_train_res import torch torch.manual_seed(123) env = gym.make('12_AX-v0', size=10, prob_target=0.5) N_tr = 10000 N_tst = 1000 max_mem_size = 300 lr = 1e-3 epsilon = 0.999 gamma = 0.9 drqn_params = { "lstm_hidden_size": 50, "n_lstm_layers": 1, "linear_hidden_size": 50, "n_linear_layers": 1 } agent = Agent_DRQN(env.observation_space.n, env.action_space.n, max_mem_size, lr, epsilon, gamma, drqn_params) res = train(env, agent, N_tr, seed=123) save_train_res('./save/12_AX/DRQN', res) test(env, agent, N_tst, seed=123)
import gym import gym_cog_ml_tasks import sys, os sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) from HER.HER_model import Agent_HER from common.utils import train, test, save_train_res import numpy as np env = gym.make('12_AX-v0', size=10, prob_target=0.5) N_tr = 50 N_tst = 1000 lr = 0.01 hierarchy_num = 3 learn_mode = 'SL' hyperparam = { 'alpha':np.ones(hierarchy_num) * 0.075, 'lambd':np.array([0.1, 0.5, 0.99]), 'beta':np.ones(hierarchy_num) * 15, 'bias':np.array([1/(10**i) for i in range(hierarchy_num)]), 'gamma':15 } agent = Agent_HER(env.observation_space.n, env.action_space.n, hierarchy_num, learn_mode, hyperparam) res = train(env, agent, N_tr, seed=123) # save the training records, including every episode's reward, action accuracy and f1 over iteration save_train_res('./save/12_ax/HER', res) # test(env, agent, N_tst, seed=123)
import gym import gym_cog_ml_tasks from DRQN.DRQN_agent import Agent_DRQN from common.utils import train, test, save_train_res import torch torch.manual_seed(123) env = gym.make('Simple_Copy-v0', n_char=10, size=10) N_tr = 5000 N_tst = 1000 max_mem_size = 300 lr = 1e-3 epsilon = 0.999 gamma = 0.9 model_params = { "lstm_hidden_size": 50, "n_lstm_layers": 1, "linear_hidden_size": 50, "n_linear_layers": 1 } agent = Agent_DRQN(env.observation_space.n, env.action_space.n, max_mem_size, lr, epsilon, gamma, model_params) res = train(env, agent, N_tr, seed=123) save_train_res('./save/simple_cp/DRQN', res) test(env, agent, N_tst, seed=123)
from DQN.DQN_model import Agent_DQN from common.utils import train, test, save_train_res, load_train_res, train_results_plots import numpy as np env = gym.make('12_AX_S-v0', size=10, prob_target=0.5) N_tr = 2000 N_tst = 1000 BATCH_SIZE = 32 LR = 0.001 # learning rate DECAY = 0.001 EPSILON = 0.2 # greedy policy GAMMA = 0.9 # reward discount TARGET_REPLACE_ITER = 200 # target update frequency MEMORY_CAPACITY = 5000 N_ACTIONS = env.action_space.n N_STATES = 1 S_FOR_DONE = 0.0 agent = Agent_DQN(env.observation_space.n, env.action_space.n, MEMORY_CAPACITY, N_STATES, LR, EPSILON, N_ACTIONS, TARGET_REPLACE_ITER, BATCH_SIZE, GAMMA, DECAY, S_FOR_DONE) # train(env, agent, N_tr, seed=123) # test(env, agent, N_tst, seed=123) res = train(env, agent, N_tr, seed=123) # save the training records, including every episode's reward, action accuracy and f1 over iteration save_train_res('save/12_ax_s/DQN', res) test(env, agent, N_tst, seed=123)
import gym import gym_cog_ml_tasks import sys, os sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) from common import utils from HER.HER_model import Agent_HER from common.utils import train, test, save_train_res, load_train_res, train_results_plots import numpy as np env = gym.make('seq_prediction-v0', size=50, p=0.5) N_tr = 50 N_tst = 1000 hierarchy_num = 3 learn_mode = 'SL' hyperparam = { 'alpha': np.ones(hierarchy_num) * 0.075, 'lambd': np.array([0.1, 0.5, 0.99]), 'beta': np.ones(hierarchy_num) * 15, 'bias': np.array([1 / (10**i) for i in range(hierarchy_num)]), 'gamma': 15 } agent = Agent_HER(env.observation_space.n, env.action_space.n, hierarchy_num, learn_mode, hyperparam) res = train(env, agent, N_tr, seed=123) # save the training records, including every episode's reward, action accuracy and f1 over iteration save_train_res('./save/seq_pred/HER_50', res) # test(env, agent, N_tst, seed=123)
import gym import gym_cog_ml_tasks import sys, os sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) from MonteCarlo.MonteCarlo_model import Agent_MC from common.utils import train, test, save_train_res, train_results_plots import torch seed = 123 env = gym.make('Simple_Copy_Repeat-v0', n_char=5, size=10, repeat=3) N_tr = 10000 N_tst = 1000 agent = Agent_MC(env.observation_space.n, env.action_space.n) res = train(env, agent, N_tr, seed=123, print_progress=True) test(env, agent, N_tst, seed=123, print_progress=True) save_train_res( './agents/cog_tasks_rl_agents/MonteCarlo/save/copy_repeat/MC_10_3', res) train_results_plots( dir='./agents/cog_tasks_rl_agents/MonteCarlo/save/copy_repeat/', figname='MC_10_3', names=['MC_10_3'], numbers=[res])
beta = 0.15 # weight update coefficient discount = 0.9 # discount rate for future rewards alpha = 1-lamb*discount # synaptic permanence eps = 0.025 # percentage of softmax modality for activity selection g = 1 leak = [0.7, 1.0] # additional parameter: leaking decay of the integrative memory # reward settings rew = 'BRL' prop = 'std' policy_train = 'greedy' policy_test = 'greedy' stoc_train = 'soft' stoc_test = 'soft' t_weighted_train = True t_weighted_test = True e_weighted = False first_flag = True reset_tags_seq = False agent = Agent_AuGMEnT(env.observation_space.n, R, M, env.action_space.n, alpha, beta, discount, eps, g, leak, rew, prop, policy_train, policy_test, stoc_train, stoc_test, t_weighted_train, t_weighted_test, e_weighted, first_flag, reset_tags_seq) # train(env, agent, N_tr, seed=123) # test(env, agent, N_tst, seed=123) res=train(env,agent,N_tr,seed=123) # save the training records, including every episode's reward, action accuracy and f1 over iteration save_train_res('save/ax_cpt/AuGMEnT',res) test(env,agent,N_tst,seed=123)
sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) from DQN.DQN_model import Agent_DQN from common.utils import train, test, save_train_res, load_train_res, train_results_plots import numpy as np env = gym.make('AX_CPT-v0', size=100) N_tr = 2000 N_tst = 1000 BATCH_SIZE = 32 LR = 0.001 # learning rate DECAY = 0.001 EPSILON = 0.2 # greedy policy GAMMA = 0.9 # reward discount TARGET_REPLACE_ITER = 200 # target update frequency MEMORY_CAPACITY = 5000 N_ACTIONS = env.action_space.n N_STATES = 1 S_FOR_DONE = 0.0 agent = Agent_DQN(env.observation_space.n, env.action_space.n, MEMORY_CAPACITY, N_STATES, LR, EPSILON, N_ACTIONS, TARGET_REPLACE_ITER, BATCH_SIZE, GAMMA, DECAY, S_FOR_DONE) # train(env, agent, N_tr, seed=123) # test(env, agent, N_tst, seed=123) res = train(env, agent, N_tr, seed=123) # save the training records, including every episode's reward, action accuracy and f1 over iteration save_train_res('./save/ax_cpt/DQN', res) test(env, agent, N_tst, seed=123)
from DQN.DQN_model import Agent_DQN from common.utils import train, test, save_train_res, load_train_res, train_results_plots import numpy as np env = gym.make('seq_prediction-v0', size=50, p=0.5) N_tr = 2000 N_tst = 1000 BATCH_SIZE = 32 LR = 0.001 # learning rate DECAY = 0.001 EPSILON = 0.2 # greedy policy GAMMA = 0.9 # reward discount TARGET_REPLACE_ITER = 200 # target update frequency MEMORY_CAPACITY = 5000 N_ACTIONS = env.action_space.n N_STATES = 1 S_FOR_DONE = 0.0 agent = Agent_DQN(env.observation_space.n, env.action_space.n, MEMORY_CAPACITY, N_STATES, LR, EPSILON, N_ACTIONS, TARGET_REPLACE_ITER, BATCH_SIZE, GAMMA, DECAY, S_FOR_DONE) # train(env, agent, N_tr, seed=123) # test(env, agent, N_tst, seed=123) res = train(env, agent, N_tr, seed=123) # save the training records, including every episode's reward, action accuracy and f1 over iteration save_train_res('./save/seq_pred/DQN_size_50', res) test(env, agent, N_tst, seed=123)
import gym import gym_cog_ml_tasks from LSTM.LSTM_model import Agent_LSTM from common.utils import train, test, save_train_res import torch torch.manual_seed(123) env = gym.make('Saccade-v0', go_reward=7) N_tr = 1000 N_tst = 1000 n_hidden = 10 n_layers = 1 lr = 0.01 agent = Agent_LSTM(env.observation_space.n, env.action_space.n, n_hidden, lr, n_layers) res = train(env, agent, N_tr, seed=123) # save the training records, including every episode's reward, action accuracy and f1 over iteration save_train_res('./save/saccade/LSTM', res) test(env, agent, N_tst, seed=123)
import gym import gym_cog_ml_tasks from LSTM.LSTM_model import Agent_LSTM from common.utils import train, test, save_train_res import torch torch.manual_seed(123) env = gym.make('Simple_Copy-v0', n_char=5, size=100) N_tr = 100 N_tst = 1000 n_hidden = 10 n_layers = 1 lr = 0.01 agent = Agent_LSTM(env.observation_space.n, env.action_space.n, n_hidden, lr, n_layers) res = train(env, agent, N_tr, seed=123) # save the training records, including every episode's reward, action accuracy and f1 over iteration save_train_res('./save/simple_cp/LSTM', res) test(env, agent, N_tst, seed=123)
import gym import gym_cog_ml_tasks from LSTM.LSTM_model import Agent_LSTM from common.utils import train, test, save_train_res import torch torch.manual_seed(123) # env = gym.make('seq_prediction-v0', size=10, p=0.5) env = gym.make('seq_prediction-v0', size=50, p=0.5) N_tr = 100 N_tst = 1000 n_hidden = 10 n_layers = 1 lr = 0.01 agent = Agent_LSTM(env.observation_space.n, env.action_space.n, n_hidden, lr, n_layers) res = train(env, agent, N_tr, seed=123) # save the training records, including every episode's reward, action accuracy and f1 over iteration save_train_res('./save/seq_pred/LSTM', res) test(env, agent, N_tst, seed=123)
import gym import gym_cog_ml_tasks import sys, os sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) from common import utils from HER.HER_model import Agent_HER from common.utils import train, test, save_train_res, load_train_res, train_results_plots import numpy as np env = gym.make('Saccade-v0', go_reward=7) N_tr = 1600 N_tst = 1000 hierarchy_num = 3 learn_mode = 'SL' hyperparam = { 'alpha':np.ones(hierarchy_num) * 0.075, 'lambd':np.array([0.1, 0.5, 0.99]), 'beta':np.ones(hierarchy_num) * 15, 'bias':np.array([1/(10**i) for i in range(hierarchy_num)]), 'gamma':15 } agent = Agent_HER(env.observation_space.n, env.action_space.n, hierarchy_num, learn_mode, hyperparam) res = train(env, agent, N_tr, seed=123) # save the training records, including every episode's reward, action accuracy and f1 over iteration save_train_res('./save/saccade/HER_7', res) # test(env, agent, N_tst, seed=123)
import gym import gym_cog_ml_tasks from LSTM.LSTM_model import Agent_LSTM from common.utils import train, test, save_train_res, load_train_res, train_results_plots env = gym.make('seq_prediction-v0', size=10, p=0.5) N_tr = 300 N_tst = 100 n_hidden = 10 lr = 0.01 agent = Agent_LSTM(env.observation_space.n, env.action_space.n, n_hidden, lr) res_10 = train(env, agent, N_tr, print_progress=True, seed=123) save_train_res('./save/seq_pred/LSTM_10_res', res_10) # agent.save(dir='./save/seq_pred', name='LSTM_10') # agent.load('./save/seq_pred/LSTM_10') # test(env, agent, N_tst, print_progress=True, seed=123) # a = load_train_res('./save/seq_pred/LSTM_10_res.npy') N_tr = 300 N_tst = 100 n_hidden = 30 lr = 0.01 agent = Agent_LSTM(env.observation_space.n, env.action_space.n, n_hidden, lr) res_30 = train(env, agent, N_tr, print_progress=True, seed=123) save_train_res('./save/seq_pred/LSTM_30_res', res_30) # agent.save(dir='./save/seq_pred', name='LSTM_30') # test(env, agent, N_tst, print_progress=True, seed=123)
from DQN.DQN_model import Agent_DQN from common.utils import train, test, save_train_res, load_train_res, train_results_plots import numpy as np env = gym.make('Saccade-v0', go_reward=1) N_tr = 2000 N_tst = 1000 BATCH_SIZE = 32 LR = 0.001 # learning rate DECAY = 0.001 EPSILON = 0.2 # greedy policy GAMMA = 0.9 # reward discount TARGET_REPLACE_ITER = 200 # target update frequency MEMORY_CAPACITY = 5000 N_ACTIONS = env.action_space.n N_STATES = 1 S_FOR_DONE = 0.0 agent = Agent_DQN(env.observation_space.n, env.action_space.n, MEMORY_CAPACITY, N_STATES, LR, EPSILON, N_ACTIONS, TARGET_REPLACE_ITER, BATCH_SIZE, GAMMA, DECAY, S_FOR_DONE) # train(env, agent, N_tr, seed=123) # test(env, agent, N_tst, seed=123) res = train(env, agent, N_tr, seed=123) # save the training records, including every episode's reward, action accuracy and f1 over iteration save_train_res('./save/saccade/DQN_reward_1', res) test(env, agent, N_tst, seed=123)
import gym import gym_cog_ml_tasks import sys, os sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) from HER.HER_model import Agent_HER from common.utils import train, test, save_train_res, load_train_res, train_results_plots import numpy as np env = gym.make('AX_CPT-v0', size=100, prob_target=0.5) N_tr = 30 N_tst = 1000 hierarchy_num = 2 learn_mode = 'SL' hyperparam = { 'alpha': np.ones(hierarchy_num) * 0.075, 'lambd': np.array([0.1, 0.5]), 'beta': np.ones(hierarchy_num) * 15, 'bias': np.array([1 / (10**i) for i in range(hierarchy_num)]), 'gamma': 15 } agent = Agent_HER(env.observation_space.n, env.action_space.n, hierarchy_num, learn_mode, hyperparam) res = train(env, agent, N_tr, seed=123) # save the training records, including every episode's reward, action accuracy and f1 over iteration save_train_res('./save/ax_cpt/HER', res) # test(env, agent, N_tst, seed=123)
import gym import gym_cog_ml_tasks import sys, os sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) from MonteCarlo.MonteCarlo_model import Agent_MC from common.utils import train, test, save_train_res, train_results_plots import torch env = gym.make('AX_CPT-v0', size=100, prob_target=0.5) N_tr = 50000 N_tst = 1000 agent = Agent_MC(env.observation_space.n, env.action_space.n) res = train(env, agent, N_tr, print_progress=True, seed=123) test(env, agent, N_tst, print_progress=True, seed=123) save_train_res( './agents/cog_tasks_rl_agents/MonteCarlo/save/ax_CPT/MC_100_0.5', res) train_results_plots(dir='./agents/cog_tasks_rl_agents/MonteCarlo/save/ax_CPT/', figname='MC_100_0.5', names=['MC_100_0.5'], numbers=[res])
lamb = 0.15 # synaptic tag decay beta = 0.15 # weight update coefficient discount = 0.9 # discount rate for future rewards alpha = 1-lamb*discount # synaptic permanence eps = 0.025 # percentage of softmax modality for activity selection g = 1 leak = 1.0 # additional parameter: leaking decay of the integrative memory # reward settings rew = 'BRL' prop = 'std' policy_train = 'softmax' policy_test = 'greedy' stoc_train = 'soft' stoc_test = 'soft' t_weighted_train = True t_weighted_test = True e_weighted = False first_flag = False reset_tags_seq = False agent = Agent_AuGMEnT(env.observation_space.n, R, M, env.action_space.n, alpha, beta, discount, eps, g, leak, rew, prop, policy_train, policy_test, stoc_train, stoc_test, t_weighted_train, t_weighted_test, e_weighted, first_flag, reset_tags_seq) # train(env, agent, N_tr, seed=123) # test(env, agent, N_tst, seed=123) res=train(env,agent,N_tr,seed=123) # save the training records, including every episode's reward, action accuracy and f1 over iteration save_train_res('save/simple_copy/AuGMEnT_size_100',res) test(env,agent,N_tst,seed=123)
MAX_MEM_SIZE = 400 LR = 1e-3 EPSILON = 0.999 GAMMA = 0.9 PARAMS = { "lstm_hidden_size": 50, "n_lstm_layers": 2, "linear_hidden_size": 50, "n_linear_layers": 1 } env = BabiEnv(TASK_ID) env_test = BabiEnv(TASK_ID, mode='test') agent = Agent_DRQN(len(env.state_space), len(env.action_space), MAX_MEM_SIZE, LR, \ EPSILON, GAMMA, PARAMS) res_tr = train(env, agent, N_tr, seed=123, print_progress=False, render=False) res_te = test(env_test, agent, N_tst, seed=123, print_progress=False) save_train_res('./results/{0}_drqn_tr2'.format(TASK_ID), res_tr) save_train_res('./results/{0}_drqn_te2'.format(TASK_ID), res_te) te1, te2, te3 = load_train_res('./results/{0}_drqn_te2.npy'.format(TASK_ID)) res_tr = load_train_res('./results/{0}_drqn_tr2.npy'.format(TASK_ID)) train_results_plots(dir='./plots/', figname='{0}_tr'.format(TASK_ID), names=['DRQN_tr'], \ numbers=[res_tr]) print('Plots saved for task', TASK_ID)
beta = 0.15 # weight update coefficient discount = 0.9 # discount rate for future rewards alpha = 1 - lamb * discount # synaptic permanence eps = 0.025 # percentage of softmax modality for activity selection g = 1 leak = 1.0 # additional parameter: leaking decay of the integrative memory # reward settings rew = 'SRL' prop = 'std' policy_train = 'eps_greedy' policy_test = 'greedy' stoc_train = 'soft' stoc_test = 'unif' t_weighted_train = True t_weighted_test = False e_weighted = False first_flag = False reset_tags_seq = True agent = Agent_AuGMEnT(env.observation_space.n, R, M, env.action_space.n, alpha, beta, discount, eps, g, leak, rew, prop, policy_train, policy_test, stoc_train, stoc_test, t_weighted_train, t_weighted_test, e_weighted, first_flag, reset_tags_seq) res = train(env, agent, N_tr, seed=123) # save the training records, including every episode's reward, action accuracy and f1 over iteration save_train_res('save/seq_pred/AuGMEnT_size_50', res) test(env, agent, N_tst, seed=123)
import gym import gym_cog_ml_tasks import sys, os sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) from HER.HER_model import Agent_HER from common.utils import train, test, save_train_res, load_train_res, train_results_plots import numpy as np env = gym.make('Simple_Copy_Repeat-v0', size=10, repeat=3) N_tr = 10000 N_tst = 1000 hierarchy_num = 3 # size*repeat learn_mode = 'SL' hyperparam = { 'alpha': np.ones(hierarchy_num) * 0.075, 'lambd': np.linspace(0.1, 0.99, hierarchy_num), 'beta': np.ones(hierarchy_num) * 15, 'bias': np.zeros(hierarchy_num), 'gamma': 15 } agent = Agent_HER(env.observation_space.n, env.action_space.n, hierarchy_num, learn_mode, hyperparam) res = train(env, agent, N_tr, seed=123) # save the training records, including every episode's reward, action accuracy and f1 over iteration save_train_res('./save/simple_copy_repeat/HER_10_3', res) test(env, agent, N_tst, seed=123)
import gym import gym_cog_ml_tasks import sys, os sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) from MonteCarlo.MonteCarlo_model import Agent_MC from common.utils import train, test, save_train_res, train_results_plots import torch torch.manual_seed(123) env = gym.make('12_AX-v0', size=10, prob_target=0.5) N_tr = 20000 N_tst = 1000 agent = Agent_MC(env.observation_space.n, env.action_space.n) res = train(env, agent, N_tr, seed=123) # save the training records, including every episode's reward, action accuracy and f1 over iteration test(env, agent, N_tst, seed=123) save_train_res('./agents/cog_tasks_rl_agents/MonteCarlo/save/12_ax/MC_10_0.5', res) train_results_plots(dir='./agents/cog_tasks_rl_agents/MonteCarlo/save/12_ax/', figname='MC_10_0.5', names=['MC_10_0.5'], numbers=[res])
sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) from DQN.DQN_model import Agent_DQN from common.utils import train, test, save_train_res, load_train_res, train_results_plots import numpy as np env = gym.make('Simple_Copy_Repeat-v0', n_char=5, size=10, repeat=3) N_tr = 10000 N_tst = 1000 BATCH_SIZE = 32 LR = 0.001 # learning rate DECAY = 0.001 EPSILON = 0.2 # greedy policy GAMMA = 0.9 # reward discount TARGET_REPLACE_ITER = 200 # target update frequency MEMORY_CAPACITY = 5000 N_ACTIONS = env.action_space.n N_STATES = 1 S_FOR_DONE = 0.0 agent = Agent_DQN(env.observation_space.n, env.action_space.n, MEMORY_CAPACITY, N_STATES, LR, EPSILON, N_ACTIONS, TARGET_REPLACE_ITER, BATCH_SIZE, GAMMA, DECAY, S_FOR_DONE) # train(env, agent, N_tr, seed=123) # test(env, agent, N_tst, seed=123) res=train(env,agent,N_tr,seed=123) # save the training records, including every episode's reward, action accuracy and f1 over iteration save_train_res('./save/simple_copy_repeat/DQN_size_10',res) test(env,agent,N_tst,seed=123)
from DQN.DQN_model import Agent_DQN from common.utils import train, test, save_train_res, load_train_res, train_results_plots import numpy as np env = gym.make('Simple_Copy-v0', n_char=10, size=100) N_tr = 2000 N_tst = 1000 BATCH_SIZE = 32 LR = 0.001 # learning rate DECAY = 0.001 EPSILON = 0.2 # greedy policy GAMMA = 0.9 # reward discount TARGET_REPLACE_ITER = 200 # target update frequency MEMORY_CAPACITY = 5000 N_ACTIONS = env.action_space.n N_STATES = 1 S_FOR_DONE = 0.0 agent = Agent_DQN(env.observation_space.n, env.action_space.n, MEMORY_CAPACITY, N_STATES, LR, EPSILON, N_ACTIONS, TARGET_REPLACE_ITER, BATCH_SIZE, GAMMA, DECAY, S_FOR_DONE) # train(env, agent, N_tr, seed=123) # test(env, agent, N_tst, seed=123) res = train(env, agent, N_tr, seed=123) # save the training records, including every episode's reward, action accuracy and f1 over iteration save_train_res('./save/simple_copy/DQN_size_100', res) test(env, agent, N_tst, seed=123)
import gym import gym_cog_ml_tasks from LSTM.LSTM_model import Agent_LSTM from common.utils import train, test, save_train_res import torch torch.manual_seed(123) env = gym.make('Simple_Copy_Repeat-v0', n_char=5, size=10, repeat=3) N_tr = 100000 N_tst = 1000 n_hidden = 30 n_layers = 2 lr = 0.001 agent = Agent_LSTM(env.observation_space.n, env.action_space.n, n_hidden, lr, n_layers) res = train(env, agent, N_tr, seed=123) # save the training records, including every episode's reward, action accuracy and f1 over iteration save_train_res('./save/cp_r/LSTM', res) test(env, agent, N_tst, seed=123)