Exemplo n.º 1
0
import sys, os
sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
from DQN.DQN_model import Agent_DQN
from common.utils import train, test, save_train_res, load_train_res, train_results_plots
import numpy as np

env = gym.make('12_AX-v0', size=10, prob_target=0.5)

N_tr = 2000
N_tst = 1000

BATCH_SIZE = 32
LR = 0.001                  # learning rate
DECAY = 0.001
EPSILON = 0.2               # greedy policy
GAMMA = 0.9                 # reward discount
TARGET_REPLACE_ITER = 200   # target update frequency
MEMORY_CAPACITY = 5000
N_ACTIONS = env.action_space.n
N_STATES = 1
S_FOR_DONE = 0.0

agent = Agent_DQN(env.observation_space.n, env.action_space.n, MEMORY_CAPACITY, N_STATES, LR, EPSILON, N_ACTIONS, TARGET_REPLACE_ITER, BATCH_SIZE, GAMMA, DECAY, S_FOR_DONE)

# train(env, agent, N_tr, seed=123)
# test(env, agent, N_tst, seed=123)

res=train(env,agent,N_tr,seed=123)
# save the training records, including every episode's reward, action accuracy and f1 over iteration
save_train_res('./save/12_ax/DQN',res)
test(env,agent,N_tst,seed=123)
Exemplo n.º 2
0
lamb = 0.2    			# synaptic tag decay 
beta = 0.15			# weight update coefficient
discount = 0.9			# discount rate for future rewards
alpha = 1-lamb*discount 	# synaptic permanence	
eps = 0.025			# percentage of softmax modality for activity selection
leak = 1.0 			# additional parameter: leaking decay of the integrative memory
g = 1

# reward settings
rew = 'RL'
prop = 'std'

policy_train = 'softmax'
policy_test = 'greedy'
stoc_train = 'soft'
stoc_test = 'soft'
t_weighted_train = True
t_weighted_test = True
e_weighted = False
first_flag = False
reset_tags_seq = False

agent = Agent_AuGMEnT(env.observation_space.n, R, M, env.action_space.n, alpha, beta, discount, eps, g, leak, rew, prop, policy_train, policy_test, stoc_train, stoc_test, t_weighted_train, t_weighted_test, e_weighted, first_flag, reset_tags_seq)

# train(env, agent, N_tr, seed=123)
# test(env, agent, N_tst, seed=123)

res=train(env,agent,N_tr,seed=123)
# save the training records, including every episode's reward, action accuracy and f1 over iteration
save_train_res('save/saccade/AuGMEnT_reward_7',res)
test(env,agent,N_tst,seed=123)
Exemplo n.º 3
0
import gym
import gym_cog_ml_tasks
from LSTM.LSTM_model import Agent_LSTM
from common.utils import train, test, save_train_res, load_train_res, train_results_plots

env = gym.make('12AX_CPT-v0', size=200)

N_tr = 5000
N_tst = 500
n_hidden = 50
lr = 0.01
agent = Agent_LSTM(env.observation_space.n, env.action_space.n, n_hidden, lr)

res = train(env, agent, N_tr, print_progress=True, seed=123)
save_train_res('./save/ax_cpt/LSTM_50_res', res)

train_results_plots(dir='./save/ax_cpt',
                    figname='LSTM_50',
                    names=['LSTM_50'],
                    numbers=[res])
Exemplo n.º 4
0
import gym
import gym_cog_ml_tasks
import sys, os
sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
from MonteCarlo.MonteCarlo_model import Agent_MC
from common.utils import train, test, save_train_res, train_results_plots

env = gym.make('seq_prediction-v0', size=50, p=0.5)

N_tr = 80000
N_tst = 1000

agent = Agent_MC(env.observation_space.n, env.action_space.n)

res = train(env, agent, N_tr, print_progress=True, seed=123)
test(env, agent, N_tst, print_progress=True, seed=123)
save_train_res(
    './agents/cog_tasks_rl_agents/MonteCarlo/save/seq_pred/MC_50_.5', res)
train_results_plots(
    dir='./agents/cog_tasks_rl_agents/MonteCarlo/save/seq_pred',
    figname='MC_50_.5',
    names=['MC_50_.5'],
    numbers=[res])
import gym
import gym_cog_ml_tasks
from LSTM.LSTM_model import Agent_LSTM
from common.utils import train, test, save_train_res
import torch

torch.manual_seed(123)

env = gym.make('12_AX_CPT-v0', size=100, prob_target=0.5, prob_12=0.1)

N_tr = 5000
N_tst = 1000

n_hidden = 20
n_layers = 2
lr = 0.01
agent = Agent_LSTM(env.observation_space.n, env.action_space.n, n_hidden, lr, n_layers)

res = train(env, agent, N_tr, seed=123)
# save the training records, including every episode's reward, action accuracy and f1 over iteration
save_train_res('./save/12_ax_cpt/LSTM', res)
test(env, agent, N_tst, seed=123)
Exemplo n.º 6
0
import gym
import gym_cog_ml_tasks
from DRQN.DRQN_agent import Agent_DRQN
from common.utils import train, test, save_train_res
import torch

torch.manual_seed(123)

env = gym.make('12_AX-v0', size=10, prob_target=0.5)

N_tr = 10000
N_tst = 1000

max_mem_size = 300
lr = 1e-3
epsilon = 0.999
gamma = 0.9
drqn_params = {
    "lstm_hidden_size": 50,
    "n_lstm_layers": 1,
    "linear_hidden_size": 50,
    "n_linear_layers": 1
}

agent = Agent_DRQN(env.observation_space.n, env.action_space.n, max_mem_size, lr, epsilon, gamma, drqn_params)

res = train(env, agent, N_tr, seed=123)
save_train_res('./save/12_AX/DRQN', res)
test(env, agent, N_tst, seed=123)
Exemplo n.º 7
0
import gym
import gym_cog_ml_tasks
import sys, os
sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
from HER.HER_model import Agent_HER
from common.utils import train, test, save_train_res
import numpy as np


env = gym.make('12_AX-v0', size=10, prob_target=0.5)

N_tr = 50
N_tst = 1000
lr = 0.01
hierarchy_num = 3
learn_mode = 'SL'
hyperparam = {
    'alpha':np.ones(hierarchy_num) * 0.075,
    'lambd':np.array([0.1, 0.5, 0.99]),
    'beta':np.ones(hierarchy_num) * 15,
    'bias':np.array([1/(10**i) for i in range(hierarchy_num)]),
    'gamma':15
}
agent = Agent_HER(env.observation_space.n, env.action_space.n, hierarchy_num, learn_mode, hyperparam)

res = train(env, agent, N_tr, seed=123)
# save the training records, including every episode's reward, action accuracy and f1 over iteration
save_train_res('./save/12_ax/HER', res)
# test(env, agent, N_tst, seed=123)
import gym
import gym_cog_ml_tasks
from DRQN.DRQN_agent import Agent_DRQN
from common.utils import train, test, save_train_res
import torch

torch.manual_seed(123)

env = gym.make('Simple_Copy-v0', n_char=10, size=10)

N_tr = 5000
N_tst = 1000

max_mem_size = 300
lr = 1e-3
epsilon = 0.999
gamma = 0.9
model_params = {
    "lstm_hidden_size": 50,
    "n_lstm_layers": 1,
    "linear_hidden_size": 50,
    "n_linear_layers": 1
}

agent = Agent_DRQN(env.observation_space.n, env.action_space.n, max_mem_size,
                   lr, epsilon, gamma, model_params)

res = train(env, agent, N_tr, seed=123)
save_train_res('./save/simple_cp/DRQN', res)
test(env, agent, N_tst, seed=123)
Exemplo n.º 9
0
from DQN.DQN_model import Agent_DQN
from common.utils import train, test, save_train_res, load_train_res, train_results_plots
import numpy as np

env = gym.make('12_AX_S-v0', size=10, prob_target=0.5)

N_tr = 2000
N_tst = 1000

BATCH_SIZE = 32
LR = 0.001  # learning rate
DECAY = 0.001
EPSILON = 0.2  # greedy policy
GAMMA = 0.9  # reward discount
TARGET_REPLACE_ITER = 200  # target update frequency
MEMORY_CAPACITY = 5000
N_ACTIONS = env.action_space.n
N_STATES = 1
S_FOR_DONE = 0.0

agent = Agent_DQN(env.observation_space.n, env.action_space.n, MEMORY_CAPACITY,
                  N_STATES, LR, EPSILON, N_ACTIONS, TARGET_REPLACE_ITER,
                  BATCH_SIZE, GAMMA, DECAY, S_FOR_DONE)

# train(env, agent, N_tr, seed=123)
# test(env, agent, N_tst, seed=123)

res = train(env, agent, N_tr, seed=123)
# save the training records, including every episode's reward, action accuracy and f1 over iteration
save_train_res('save/12_ax_s/DQN', res)
test(env, agent, N_tst, seed=123)
Exemplo n.º 10
0
import gym
import gym_cog_ml_tasks
import sys, os
sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
from common import utils
from HER.HER_model import Agent_HER
from common.utils import train, test, save_train_res, load_train_res, train_results_plots
import numpy as np

env = gym.make('seq_prediction-v0', size=50, p=0.5)

N_tr = 50
N_tst = 1000
hierarchy_num = 3
learn_mode = 'SL'
hyperparam = {
    'alpha': np.ones(hierarchy_num) * 0.075,
    'lambd': np.array([0.1, 0.5, 0.99]),
    'beta': np.ones(hierarchy_num) * 15,
    'bias': np.array([1 / (10**i) for i in range(hierarchy_num)]),
    'gamma': 15
}
agent = Agent_HER(env.observation_space.n, env.action_space.n, hierarchy_num,
                  learn_mode, hyperparam)

res = train(env, agent, N_tr, seed=123)
# save the training records, including every episode's reward, action accuracy and f1 over iteration
save_train_res('./save/seq_pred/HER_50', res)
# test(env, agent, N_tst, seed=123)
import gym
import gym_cog_ml_tasks
import sys, os
sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
from MonteCarlo.MonteCarlo_model import Agent_MC
from common.utils import train, test, save_train_res, train_results_plots
import torch

seed = 123

env = gym.make('Simple_Copy_Repeat-v0', n_char=5, size=10, repeat=3)

N_tr = 10000
N_tst = 1000

agent = Agent_MC(env.observation_space.n, env.action_space.n)
res = train(env, agent, N_tr, seed=123, print_progress=True)
test(env, agent, N_tst, seed=123, print_progress=True)
save_train_res(
    './agents/cog_tasks_rl_agents/MonteCarlo/save/copy_repeat/MC_10_3', res)
train_results_plots(
    dir='./agents/cog_tasks_rl_agents/MonteCarlo/save/copy_repeat/',
    figname='MC_10_3',
    names=['MC_10_3'],
    numbers=[res])
Exemplo n.º 12
0
beta = 0.15			# weight update coefficient
discount = 0.9			# discount rate for future rewards
alpha = 1-lamb*discount 	# synaptic permanence	
eps = 0.025		# percentage of softmax modality for activity selection
g = 1

leak = [0.7, 1.0]			# additional parameter: leaking decay of the integrative memory

# reward settings
rew = 'BRL'
prop = 'std'

policy_train = 'greedy'
policy_test = 'greedy'
stoc_train = 'soft'
stoc_test = 'soft'
t_weighted_train = True
t_weighted_test = True
e_weighted = False
first_flag = True
reset_tags_seq = False

agent = Agent_AuGMEnT(env.observation_space.n, R, M, env.action_space.n, alpha, beta, discount, eps, g, leak, rew, prop, policy_train, policy_test, stoc_train, stoc_test, t_weighted_train, t_weighted_test, e_weighted, first_flag, reset_tags_seq)

# train(env, agent, N_tr, seed=123)
# test(env, agent, N_tst, seed=123)

res=train(env,agent,N_tr,seed=123)
# save the training records, including every episode's reward, action accuracy and f1 over iteration
save_train_res('save/ax_cpt/AuGMEnT',res)
test(env,agent,N_tst,seed=123)
Exemplo n.º 13
0
sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
from DQN.DQN_model import Agent_DQN
from common.utils import train, test, save_train_res, load_train_res, train_results_plots
import numpy as np
env = gym.make('AX_CPT-v0', size=100)

N_tr = 2000
N_tst = 1000

BATCH_SIZE = 32
LR = 0.001  # learning rate
DECAY = 0.001
EPSILON = 0.2  # greedy policy
GAMMA = 0.9  # reward discount
TARGET_REPLACE_ITER = 200  # target update frequency
MEMORY_CAPACITY = 5000
N_ACTIONS = env.action_space.n
N_STATES = 1
S_FOR_DONE = 0.0

agent = Agent_DQN(env.observation_space.n, env.action_space.n, MEMORY_CAPACITY,
                  N_STATES, LR, EPSILON, N_ACTIONS, TARGET_REPLACE_ITER,
                  BATCH_SIZE, GAMMA, DECAY, S_FOR_DONE)

# train(env, agent, N_tr, seed=123)
# test(env, agent, N_tst, seed=123)

res = train(env, agent, N_tr, seed=123)
# save the training records, including every episode's reward, action accuracy and f1 over iteration
save_train_res('./save/ax_cpt/DQN', res)
test(env, agent, N_tst, seed=123)
Exemplo n.º 14
0
from DQN.DQN_model import Agent_DQN
from common.utils import train, test, save_train_res, load_train_res, train_results_plots
import numpy as np

env = gym.make('seq_prediction-v0', size=50, p=0.5)

N_tr = 2000
N_tst = 1000

BATCH_SIZE = 32
LR = 0.001  # learning rate
DECAY = 0.001
EPSILON = 0.2  # greedy policy
GAMMA = 0.9  # reward discount
TARGET_REPLACE_ITER = 200  # target update frequency
MEMORY_CAPACITY = 5000
N_ACTIONS = env.action_space.n
N_STATES = 1
S_FOR_DONE = 0.0

agent = Agent_DQN(env.observation_space.n, env.action_space.n, MEMORY_CAPACITY,
                  N_STATES, LR, EPSILON, N_ACTIONS, TARGET_REPLACE_ITER,
                  BATCH_SIZE, GAMMA, DECAY, S_FOR_DONE)

# train(env, agent, N_tr, seed=123)
# test(env, agent, N_tst, seed=123)

res = train(env, agent, N_tr, seed=123)
# save the training records, including every episode's reward, action accuracy and f1 over iteration
save_train_res('./save/seq_pred/DQN_size_50', res)
test(env, agent, N_tst, seed=123)
Exemplo n.º 15
0
import gym
import gym_cog_ml_tasks
from LSTM.LSTM_model import Agent_LSTM
from common.utils import train, test, save_train_res
import torch

torch.manual_seed(123)

env = gym.make('Saccade-v0', go_reward=7)

N_tr = 1000
N_tst = 1000

n_hidden = 10
n_layers = 1
lr = 0.01
agent = Agent_LSTM(env.observation_space.n, env.action_space.n, n_hidden, lr,
                   n_layers)

res = train(env, agent, N_tr, seed=123)
# save the training records, including every episode's reward, action accuracy and f1 over iteration
save_train_res('./save/saccade/LSTM', res)
test(env, agent, N_tst, seed=123)
Exemplo n.º 16
0
import gym
import gym_cog_ml_tasks
from LSTM.LSTM_model import Agent_LSTM
from common.utils import train, test, save_train_res
import torch

torch.manual_seed(123)

env = gym.make('Simple_Copy-v0', n_char=5, size=100)

N_tr = 100
N_tst = 1000

n_hidden = 10
n_layers = 1
lr = 0.01
agent = Agent_LSTM(env.observation_space.n, env.action_space.n, n_hidden, lr,
                   n_layers)

res = train(env, agent, N_tr, seed=123)
# save the training records, including every episode's reward, action accuracy and f1 over iteration
save_train_res('./save/simple_cp/LSTM', res)
test(env, agent, N_tst, seed=123)
Exemplo n.º 17
0
import gym
import gym_cog_ml_tasks
from LSTM.LSTM_model import Agent_LSTM
from common.utils import train, test, save_train_res
import torch

torch.manual_seed(123)

# env = gym.make('seq_prediction-v0', size=10, p=0.5)
env = gym.make('seq_prediction-v0', size=50, p=0.5)

N_tr = 100
N_tst = 1000
n_hidden = 10
n_layers = 1
lr = 0.01
agent = Agent_LSTM(env.observation_space.n, env.action_space.n, n_hidden, lr,
                   n_layers)

res = train(env, agent, N_tr, seed=123)
# save the training records, including every episode's reward, action accuracy and f1 over iteration
save_train_res('./save/seq_pred/LSTM', res)
test(env, agent, N_tst, seed=123)
Exemplo n.º 18
0
import gym
import gym_cog_ml_tasks
import sys, os
sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
from common import utils
from HER.HER_model import Agent_HER
from common.utils import train, test, save_train_res, load_train_res, train_results_plots
import numpy as np


env = gym.make('Saccade-v0', go_reward=7)

N_tr = 1600
N_tst = 1000
hierarchy_num = 3
learn_mode = 'SL'
hyperparam = {
    'alpha':np.ones(hierarchy_num) * 0.075,
    'lambd':np.array([0.1, 0.5, 0.99]),
    'beta':np.ones(hierarchy_num) * 15, 
    'bias':np.array([1/(10**i) for i in range(hierarchy_num)]),
    'gamma':15
}
agent = Agent_HER(env.observation_space.n, env.action_space.n, hierarchy_num, learn_mode, hyperparam)

res = train(env, agent, N_tr, seed=123)
# save the training records, including every episode's reward, action accuracy and f1 over iteration
save_train_res('./save/saccade/HER_7', res)
# test(env, agent, N_tst, seed=123)
Exemplo n.º 19
0
import gym
import gym_cog_ml_tasks
from LSTM.LSTM_model import Agent_LSTM
from common.utils import train, test, save_train_res, load_train_res, train_results_plots

env = gym.make('seq_prediction-v0', size=10, p=0.5)

N_tr = 300
N_tst = 100
n_hidden = 10
lr = 0.01
agent = Agent_LSTM(env.observation_space.n, env.action_space.n, n_hidden, lr)

res_10 = train(env, agent, N_tr, print_progress=True, seed=123)
save_train_res('./save/seq_pred/LSTM_10_res', res_10)
# agent.save(dir='./save/seq_pred', name='LSTM_10')
# agent.load('./save/seq_pred/LSTM_10')
# test(env, agent, N_tst, print_progress=True, seed=123)
# a = load_train_res('./save/seq_pred/LSTM_10_res.npy')

N_tr = 300
N_tst = 100
n_hidden = 30
lr = 0.01
agent = Agent_LSTM(env.observation_space.n, env.action_space.n, n_hidden, lr)

res_30 = train(env, agent, N_tr, print_progress=True, seed=123)
save_train_res('./save/seq_pred/LSTM_30_res', res_30)
# agent.save(dir='./save/seq_pred', name='LSTM_30')
# test(env, agent, N_tst, print_progress=True, seed=123)
Exemplo n.º 20
0
from DQN.DQN_model import Agent_DQN
from common.utils import train, test, save_train_res, load_train_res, train_results_plots
import numpy as np

env = gym.make('Saccade-v0', go_reward=1)

N_tr = 2000
N_tst = 1000

BATCH_SIZE = 32
LR = 0.001  # learning rate
DECAY = 0.001
EPSILON = 0.2  # greedy policy
GAMMA = 0.9  # reward discount
TARGET_REPLACE_ITER = 200  # target update frequency
MEMORY_CAPACITY = 5000
N_ACTIONS = env.action_space.n
N_STATES = 1
S_FOR_DONE = 0.0

agent = Agent_DQN(env.observation_space.n, env.action_space.n, MEMORY_CAPACITY,
                  N_STATES, LR, EPSILON, N_ACTIONS, TARGET_REPLACE_ITER,
                  BATCH_SIZE, GAMMA, DECAY, S_FOR_DONE)

# train(env, agent, N_tr, seed=123)
# test(env, agent, N_tst, seed=123)

res = train(env, agent, N_tr, seed=123)
# save the training records, including every episode's reward, action accuracy and f1 over iteration
save_train_res('./save/saccade/DQN_reward_1', res)
test(env, agent, N_tst, seed=123)
Exemplo n.º 21
0
import gym
import gym_cog_ml_tasks
import sys, os
sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
from HER.HER_model import Agent_HER
from common.utils import train, test, save_train_res, load_train_res, train_results_plots
import numpy as np

env = gym.make('AX_CPT-v0', size=100, prob_target=0.5)

N_tr = 30
N_tst = 1000
hierarchy_num = 2
learn_mode = 'SL'
hyperparam = {
    'alpha': np.ones(hierarchy_num) * 0.075,
    'lambd': np.array([0.1, 0.5]),
    'beta': np.ones(hierarchy_num) * 15,
    'bias': np.array([1 / (10**i) for i in range(hierarchy_num)]),
    'gamma': 15
}
agent = Agent_HER(env.observation_space.n, env.action_space.n, hierarchy_num,
                  learn_mode, hyperparam)

res = train(env, agent, N_tr, seed=123)
# save the training records, including every episode's reward, action accuracy and f1 over iteration
save_train_res('./save/ax_cpt/HER', res)
# test(env, agent, N_tst, seed=123)
Exemplo n.º 22
0
import gym
import gym_cog_ml_tasks
import sys, os
sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
from MonteCarlo.MonteCarlo_model import Agent_MC
from common.utils import train, test, save_train_res, train_results_plots
import torch

env = gym.make('AX_CPT-v0', size=100, prob_target=0.5)

N_tr = 50000
N_tst = 1000

agent = Agent_MC(env.observation_space.n, env.action_space.n)

res = train(env, agent, N_tr, print_progress=True, seed=123)
test(env, agent, N_tst, print_progress=True, seed=123)
save_train_res(
    './agents/cog_tasks_rl_agents/MonteCarlo/save/ax_CPT/MC_100_0.5', res)
train_results_plots(dir='./agents/cog_tasks_rl_agents/MonteCarlo/save/ax_CPT/',
                    figname='MC_100_0.5',
                    names=['MC_100_0.5'],
                    numbers=[res])
lamb = 0.15    			# synaptic tag decay 
beta = 0.15			# weight update coefficient
discount = 0.9			# discount rate for future rewards
alpha = 1-lamb*discount 	# synaptic permanence	
eps = 0.025		# percentage of softmax modality for activity selection
g = 1
leak = 1.0 			# additional parameter: leaking decay of the integrative memory

# reward settings
rew = 'BRL'
prop = 'std'

policy_train = 'softmax'
policy_test = 'greedy'
stoc_train = 'soft'
stoc_test = 'soft'
t_weighted_train = True
t_weighted_test = True
e_weighted = False
first_flag = False
reset_tags_seq = False

agent = Agent_AuGMEnT(env.observation_space.n, R, M, env.action_space.n, alpha, beta, discount, eps, g, leak, rew, prop, policy_train, policy_test, stoc_train, stoc_test, t_weighted_train, t_weighted_test, e_weighted, first_flag, reset_tags_seq)

# train(env, agent, N_tr, seed=123)
# test(env, agent, N_tst, seed=123)

res=train(env,agent,N_tr,seed=123)
# save the training records, including every episode's reward, action accuracy and f1 over iteration
save_train_res('save/simple_copy/AuGMEnT_size_100',res)
test(env,agent,N_tst,seed=123)
Exemplo n.º 24
0
MAX_MEM_SIZE = 400
LR = 1e-3
EPSILON = 0.999
GAMMA = 0.9
PARAMS = {
    "lstm_hidden_size": 50,
    "n_lstm_layers": 2,
    "linear_hidden_size": 50,
    "n_linear_layers": 1
}

env = BabiEnv(TASK_ID)
env_test = BabiEnv(TASK_ID, mode='test')

agent = Agent_DRQN(len(env.state_space), len(env.action_space), MAX_MEM_SIZE, LR, \
                    EPSILON, GAMMA, PARAMS)

res_tr = train(env, agent, N_tr, seed=123, print_progress=False, render=False)
res_te = test(env_test, agent, N_tst, seed=123, print_progress=False)

save_train_res('./results/{0}_drqn_tr2'.format(TASK_ID), res_tr)
save_train_res('./results/{0}_drqn_te2'.format(TASK_ID), res_te)

te1, te2, te3 = load_train_res('./results/{0}_drqn_te2.npy'.format(TASK_ID))
res_tr = load_train_res('./results/{0}_drqn_tr2.npy'.format(TASK_ID))

train_results_plots(dir='./plots/', figname='{0}_tr'.format(TASK_ID), names=['DRQN_tr'], \
                    numbers=[res_tr])

print('Plots saved for task', TASK_ID)
Exemplo n.º 25
0
beta = 0.15  # weight update coefficient
discount = 0.9  # discount rate for future rewards
alpha = 1 - lamb * discount  # synaptic permanence
eps = 0.025  # percentage of softmax modality for activity selection
g = 1

leak = 1.0  # additional parameter: leaking decay of the integrative memory

# reward settings
rew = 'SRL'
prop = 'std'

policy_train = 'eps_greedy'
policy_test = 'greedy'
stoc_train = 'soft'
stoc_test = 'unif'
t_weighted_train = True
t_weighted_test = False
e_weighted = False
first_flag = False
reset_tags_seq = True

agent = Agent_AuGMEnT(env.observation_space.n, R, M, env.action_space.n, alpha,
                      beta, discount, eps, g, leak, rew, prop, policy_train,
                      policy_test, stoc_train, stoc_test, t_weighted_train,
                      t_weighted_test, e_weighted, first_flag, reset_tags_seq)

res = train(env, agent, N_tr, seed=123)
# save the training records, including every episode's reward, action accuracy and f1 over iteration
save_train_res('save/seq_pred/AuGMEnT_size_50', res)
test(env, agent, N_tst, seed=123)
import gym
import gym_cog_ml_tasks
import sys, os

sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
from HER.HER_model import Agent_HER
from common.utils import train, test, save_train_res, load_train_res, train_results_plots
import numpy as np

env = gym.make('Simple_Copy_Repeat-v0', size=10, repeat=3)

N_tr = 10000
N_tst = 1000
hierarchy_num = 3  # size*repeat
learn_mode = 'SL'
hyperparam = {
    'alpha': np.ones(hierarchy_num) * 0.075,
    'lambd': np.linspace(0.1, 0.99, hierarchy_num),
    'beta': np.ones(hierarchy_num) * 15,
    'bias': np.zeros(hierarchy_num),
    'gamma': 15
}
agent = Agent_HER(env.observation_space.n, env.action_space.n, hierarchy_num,
                  learn_mode, hyperparam)

res = train(env, agent, N_tr, seed=123)
# save the training records, including every episode's reward, action accuracy and f1 over iteration
save_train_res('./save/simple_copy_repeat/HER_10_3', res)
test(env, agent, N_tst, seed=123)
Exemplo n.º 27
0
import gym
import gym_cog_ml_tasks
import sys, os
sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
from MonteCarlo.MonteCarlo_model import Agent_MC
from common.utils import train, test, save_train_res, train_results_plots
import torch

torch.manual_seed(123)

env = gym.make('12_AX-v0', size=10, prob_target=0.5)

N_tr = 20000
N_tst = 1000

agent = Agent_MC(env.observation_space.n, env.action_space.n)

res = train(env, agent, N_tr, seed=123)
# save the training records, including every episode's reward, action accuracy and f1 over iteration
test(env, agent, N_tst, seed=123)

save_train_res('./agents/cog_tasks_rl_agents/MonteCarlo/save/12_ax/MC_10_0.5',
               res)
train_results_plots(dir='./agents/cog_tasks_rl_agents/MonteCarlo/save/12_ax/',
                    figname='MC_10_0.5',
                    names=['MC_10_0.5'],
                    numbers=[res])
Exemplo n.º 28
0
sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
from DQN.DQN_model import Agent_DQN
from common.utils import train, test, save_train_res, load_train_res, train_results_plots
import numpy as np


env = gym.make('Simple_Copy_Repeat-v0', n_char=5, size=10, repeat=3)

N_tr = 10000
N_tst = 1000

BATCH_SIZE = 32
LR = 0.001                  # learning rate
DECAY = 0.001
EPSILON = 0.2               # greedy policy
GAMMA = 0.9                 # reward discount
TARGET_REPLACE_ITER = 200   # target update frequency
MEMORY_CAPACITY = 5000
N_ACTIONS = env.action_space.n
N_STATES = 1
S_FOR_DONE = 0.0

agent = Agent_DQN(env.observation_space.n, env.action_space.n, MEMORY_CAPACITY, N_STATES, LR, EPSILON, N_ACTIONS, TARGET_REPLACE_ITER, BATCH_SIZE, GAMMA, DECAY, S_FOR_DONE)

# train(env, agent, N_tr, seed=123)
# test(env, agent, N_tst, seed=123)

res=train(env,agent,N_tr,seed=123)
# save the training records, including every episode's reward, action accuracy and f1 over iteration
save_train_res('./save/simple_copy_repeat/DQN_size_10',res)
test(env,agent,N_tst,seed=123)
from DQN.DQN_model import Agent_DQN
from common.utils import train, test, save_train_res, load_train_res, train_results_plots
import numpy as np

env = gym.make('Simple_Copy-v0', n_char=10, size=100)

N_tr = 2000
N_tst = 1000

BATCH_SIZE = 32
LR = 0.001  # learning rate
DECAY = 0.001
EPSILON = 0.2  # greedy policy
GAMMA = 0.9  # reward discount
TARGET_REPLACE_ITER = 200  # target update frequency
MEMORY_CAPACITY = 5000
N_ACTIONS = env.action_space.n
N_STATES = 1
S_FOR_DONE = 0.0

agent = Agent_DQN(env.observation_space.n, env.action_space.n, MEMORY_CAPACITY,
                  N_STATES, LR, EPSILON, N_ACTIONS, TARGET_REPLACE_ITER,
                  BATCH_SIZE, GAMMA, DECAY, S_FOR_DONE)

# train(env, agent, N_tr, seed=123)
# test(env, agent, N_tst, seed=123)

res = train(env, agent, N_tr, seed=123)
# save the training records, including every episode's reward, action accuracy and f1 over iteration
save_train_res('./save/simple_copy/DQN_size_100', res)
test(env, agent, N_tst, seed=123)
import gym
import gym_cog_ml_tasks
from LSTM.LSTM_model import Agent_LSTM
from common.utils import train, test, save_train_res
import torch

torch.manual_seed(123)

env = gym.make('Simple_Copy_Repeat-v0', n_char=5, size=10, repeat=3)

N_tr = 100000
N_tst = 1000

n_hidden = 30
n_layers = 2
lr = 0.001
agent = Agent_LSTM(env.observation_space.n, env.action_space.n, n_hidden, lr, n_layers)

res = train(env, agent, N_tr, seed=123)
# save the training records, including every episode's reward, action accuracy and f1 over iteration
save_train_res('./save/cp_r/LSTM', res)
test(env, agent, N_tst, seed=123)