from DQN.DQN_model import Agent_DQN
from common.utils import train, test, save_train_res, load_train_res, train_results_plots
import numpy as np

env = gym.make('Simple_Copy-v0', n_char=10, size=100)

N_tr = 2000
N_tst = 1000

BATCH_SIZE = 32
LR = 0.001  # learning rate
DECAY = 0.001
EPSILON = 0.2  # greedy policy
GAMMA = 0.9  # reward discount
TARGET_REPLACE_ITER = 200  # target update frequency
MEMORY_CAPACITY = 5000
N_ACTIONS = env.action_space.n
N_STATES = 1
S_FOR_DONE = 0.0

agent = Agent_DQN(env.observation_space.n, env.action_space.n, MEMORY_CAPACITY,
                  N_STATES, LR, EPSILON, N_ACTIONS, TARGET_REPLACE_ITER,
                  BATCH_SIZE, GAMMA, DECAY, S_FOR_DONE)

# train(env, agent, N_tr, seed=123)
# test(env, agent, N_tst, seed=123)

res = train(env, agent, N_tr, seed=123)
# save the training records, including every episode's reward, action accuracy and f1 over iteration
save_train_res('./save/simple_copy/DQN_size_100', res)
test(env, agent, N_tst, seed=123)
Exemplo n.º 2
0
def main():
    logging.basicConfig(level=logging.DEBUG,
                        format='[%(asctime)s %(filename)s] %(message)s')
    json_file = open('params.json')
    json_str = json_file.read()
    config = json.loads(json_str)


    if len(sys.argv) == 2:
        config['policy_dir'] = sys.argv[1]

    # loading the dataset
    train_loader, valid_loader = __load_data(config)

    # Creating the model
    model = NN(config['model']['architecture'], is_maskable = True)
    model = model.to(config['device'])

    # Getting the criterion, optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = get_optimizer(config, model)

    ###################
    # Train and Prune #
    ###################
    done = False
    epoch = 0
    while not done:
        logging.info('')
        logging.info('Global step {}'.format(epoch))
        logging.info('')
        train(model, train_loader, valid_loader, criterion, optimizer, config['train']['epochs'], config['train']['print_every'], config['device'])

        if epoch % config['prune']['each'] == 0 and epoch != 0:
            logging.info('Pruning the model')
            model = __prune(model, config)
            
            if config['prune']['reward_weights'] == True:
                logging.info('Rewarding the weights')
                model.reward()
        
            done = __verify_stop(model, config)
        
        epoch += 1

    ##############
    # Last Train #
    ##############
    train(model, train_loader, valid_loader, criterion, optimizer, 5, config['train']['print_every'], config['device'])
    
    checkpoint = config['policy_dir'] + '/Train-{}-epochs__Prune-each-{}__Prune-rate-{}.pt'.format(
            config['train']['epochs'], config['prune']['each'], config['prune']['rate']
    )
    torch.save(model.state_dict(), checkpoint)
    logging.info("Model checkpoint saved to %s" % checkpoint)

    ######################
    # Validate the model #
    ######################
    test_loss = 0.0
    classes = range(0, 10)
    class_correct = list(0. for i in range(10))
    class_total = list(0. for i in range(10))
    model.eval() # prep model for evaluation

    for data, target in valid_loader:
        data, target = data.to(config['device']), target.to(config['device'])

        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the loss
        loss = criterion(output, target)
        # update test loss 
        test_loss += loss.item()*data.size(0)
        # convert output probabilities to predicted class
        _, pred = torch.max(output, 1)
        # compare predictions to true label
        correct = np.squeeze(pred.eq(target.data.view_as(pred)))
        # calculate test accuracy for each object class
        for i in range(len(target)):
            label = target.data[i]
            class_correct[label] += correct[i].item()
            class_total[label] += 1

    # calculate and print avg test loss
    test_loss = test_loss/len(valid_loader.sampler)
    logging.info('Valid Loss: {:.6f}\n'.format(test_loss))

    results = []
    for i in range(10):
        if class_total[i] > 0:
            logging.info('Test Accuracy of %5s: %2d%% (%2d/%2d)' % (
                str(i), 100 * class_correct[i] / class_total[i],
                np.sum(class_correct[i]), np.sum(class_total[i])))
            results.append([str(i), 100 * class_correct[i] / class_total[i],
                np.sum(class_correct[i]), np.sum(class_total[i])])
        else:
            logging.info('Test Accuracy of %5s: N/A (no training examples)' % (classes[i]))

    logging.info('Test Accuracy (Overall): %2d%% (%2d/%2d)' % (
        100. * np.sum(class_correct) / np.sum(class_total),
        np.sum(class_correct), np.sum(class_total)))

    results = pd.DataFrame(results)
    results.to_csv(
        config['policy_dir'] + '/Accuracy__Lottery-train-{}-epochs__Prune-each-{}__Prune-rate-{}.tsv'.format(
            config['train']['epochs'], config['prune']['each'], config['prune']['rate']
        ), 
        index = False, 
        header = ['Class', 'Accuracy', 'Right_Instances', 'Total_Instances'],
        sep = '\t'
    )
Exemplo n.º 3
0
import gym
import gym_cog_ml_tasks
from LSTM.LSTM_model import Agent_LSTM
from common.utils import train, test, save_train_res, load_train_res, train_results_plots

env = gym.make('12AX_CPT-v0', size=200)

N_tr = 5000
N_tst = 500
n_hidden = 50
lr = 0.01
agent = Agent_LSTM(env.observation_space.n, env.action_space.n, n_hidden, lr)

res = train(env, agent, N_tr, print_progress=True, seed=123)
save_train_res('./save/ax_cpt/LSTM_50_res', res)

train_results_plots(dir='./save/ax_cpt',
                    figname='LSTM_50',
                    names=['LSTM_50'],
                    numbers=[res])
Exemplo n.º 4
0
def main():
    logging.basicConfig(level=logging.DEBUG,
                        format="[%(asctime)s %(filename)s] %(message)s")
    json_file = open('params.json')
    json_str = json_file.read()
    config = json.loads(json_str)

    args = __create_args()

    config = __adjust_config(args, config)

    # loading the dataset
    train_loader, valid_loader = __load_data(config)

    # Creating the model
    model = NN(config['model']['architecture'], is_maskable=True)
    model = model.to(config['device'])
    initial_mask = model.masks

    # Getting the criterion, optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = get_optimizer(config, model)

    #########################
    # Agent and Environment #
    #########################
    ACTIONS = create_environment(model.masks, config['environment_protocol'])
    random.seed(42)
    shuffle(ACTIONS)
    N_STATES = len(ACTIONS)
    N_EPISODES = config['mdp']['N_EPISODES']
    MAX_EPISODE_PER_STEPS = config['mdp']['MAX_STEPS_PER_EPISODES']
    MIN_ALPHA = config['mdp']['MIN_ALPHA']
    GAMMA = config['mdp']['GAMMA']
    alphas = np.linspace(1.0, MIN_ALPHA, N_EPISODES)

    q_table = dict()
    start_state = State(model.masks, ACTIONS)

    ##########################
    # Create sub_working_dir #
    ##########################
    sub_working_dir = '{}/results/{}/{}/{}/{}'.format(
        config['working_dir'], config['model']['name'],
        '_' + config['mdp']['Q_COMPUTATION'],
        '{}_{}_{}/{}_{}'.format(time.strftime("%d", time.localtime()),
                                time.strftime("%m", time.localtime()),
                                time.strftime("%Y", time.localtime()),
                                time.strftime("%H", time.localtime()),
                                time.strftime("%M", time.localtime())),
        'ALPHA_SEARCH__MIN_ALPHA-{}__GAMMA-{}__PRUNE_TYPE-{}__PRUNE_PERCENT-{}__EPSILON-{}__REWARD_TYPE-{}'
        .format(MIN_ALPHA,
                GAMMA if config['mdp']['Q_COMPUTATION'] != 'QL_M' else 'None',
                config['environment_protocol'],
                config['agent']['prune_percentage'],
                config['agent']['epsilon'], config['agent']['reward_type']))

    if not os.path.exists(sub_working_dir):
        os.makedirs(sub_working_dir)
    config["sub_working_dir"] = sub_working_dir
    logging.info("sub working dir: %s" % sub_working_dir)

    ###############
    # Begin Train #
    ###############
    train(model, train_loader, valid_loader, criterion, optimizer,
          config['train']['epochs'], config['train']['print_every'],
          config['device'])
    loss, accuracy = validation(model, valid_loader, criterion)
    logging.info(
        'Validation Loss performed: {}\tValidation Accuracy performed: {}'.
        format(loss, accuracy))

    if config['agent']['reward_type'] == 'ACCURACY':
        start_state.last_reward = -(1. - accuracy)
    elif config['agent']['reward_type'] == 'LOSS':
        start_state.last_reward = -loss
    elif config['agent']['reward_type'] == 'ACC_COMPRESSION':
        start_state.last_reward = -(1. - accuracy)
    elif config['agent']['reward_type'] == 'MY_RCRA':
        start_state.last_reward = -(1. - accuracy)

    #########
    # Prune #
    #########
    for e in range(N_EPISODES):

        state = deepcopy(start_state)
        total_reward = .0
        ALPHA = alphas[e]
        agent = Agent(config, ACTIONS, model, valid_loader, criterion)

        for i in range(MAX_EPISODE_PER_STEPS):
            action = agent.choose_action(q_table, state)

            next_state, reward, done = agent.act(state, action)
            total_reward += reward

            if config['mdp']['Q_COMPUTATION'] == 'QL_M':
                # Q-Learning from Ghallab, Nau and Traverso
                q_value(q_table, state)[action] = q_value(q_table, state, action) + \
                    ALPHA * (reward + np.max(q_value(q_table, next_state)) - q_value(q_table, state, action))

            elif config['mdp']['Q_COMPUTATION'] == 'QL_WIKI':
                # Q-Learning from from Wikipedia
                q_value(q_table, state)[action] = (1. - ALPHA) * q_value(q_table, state, action) + \
                    ALPHA * (reward + GAMMA * np.max(q_value(q_table, next_state)))

            del state
            state = next_state
            if done:
                break

        logging.info("Episode {}: reward type {}: total reward -> {}".format(
            e + 1, config['agent']['reward_type'], total_reward))

    #####################
    # Save the solution #
    #####################
    q_table_saver(q_table, config['sub_working_dir'], '/q_table.tsv')

    agent = Agent(config, ACTIONS, model, valid_loader, criterion)
    my_state = start_state
    result = []
    done = False
    while not done:
        sa = q_value(q_table, my_state)
        my_action = np.argmax(sa)
        action = my_state.environment[my_action]
        my_state, reward, done = agent.act(my_state, my_action)
        result.append([action, reward])

    final = pd.DataFrame(result, columns=['Action', 'Reward'])
    final.to_csv(config['sub_working_dir'] + '/actions_to_prune.tsv',
                 sep='\t',
                 index=False)
Exemplo n.º 5
0
import gym
import gym_cog_ml_tasks
from LSTM.LSTM_model import Agent_LSTM
from common.utils import train, test


env = gym.make('AX_12-v0', size=10, prob_target=0.5)

N_tr = 1000
N_tst = 100
n_hidden = 20
lr = 0.01

agent = Agent_LSTM(env.observation_space.n, env.action_space.n, n_hidden, lr)

train(env, agent, N_tr)
test(env, agent, N_tst)
Exemplo n.º 6
0
MAX_MEM_SIZE = 400
LR = 1e-3
EPSILON = 0.999
GAMMA = 0.9
PARAMS = {
    "lstm_hidden_size": 50,
    "n_lstm_layers": 2,
    "linear_hidden_size": 50,
    "n_linear_layers": 1
}

env = BabiEnv(TASK_ID)
env_test = BabiEnv(TASK_ID, mode='test')

agent = Agent_DRQN(len(env.state_space), len(env.action_space), MAX_MEM_SIZE, LR, \
                    EPSILON, GAMMA, PARAMS)

res_tr = train(env, agent, N_tr, seed=123, print_progress=False, render=False)
res_te = test(env_test, agent, N_tst, seed=123, print_progress=False)

save_train_res('./results/{0}_drqn_tr2'.format(TASK_ID), res_tr)
save_train_res('./results/{0}_drqn_te2'.format(TASK_ID), res_te)

te1, te2, te3 = load_train_res('./results/{0}_drqn_te2.npy'.format(TASK_ID))
res_tr = load_train_res('./results/{0}_drqn_tr2.npy'.format(TASK_ID))

train_results_plots(dir='./plots/', figname='{0}_tr'.format(TASK_ID), names=['DRQN_tr'], \
                    numbers=[res_tr])

print('Plots saved for task', TASK_ID)
Exemplo n.º 7
0
def main():
    logging.basicConfig(level=logging.DEBUG,
                        format="[%(asctime)s %(filename)s] %(message)s")
    json_file = open('params.json')
    json_str = json_file.read()
    config = json.loads(json_str)

    # Create sub_working_dir
    sub_working_dir = '{}/{}/try{}/{}'.format(
        config['working_dir'],
        config['model']['name'], 
        config['try'],
        # time.strftime("%Y%m%d%H%M%S", time.localtime())
        '{}_{}/{}/{}_{}'.format(
            time.strftime("%Y", time.localtime()),
            time.strftime("%m", time.localtime()),
            time.strftime("%d", time.localtime()),
            time.strftime("%H", time.localtime()),
            time.strftime("%S", time.localtime())
        ) 
    )
    if not os.path.exists(sub_working_dir):
        os.makedirs(sub_working_dir)
    config["sub_working_dir"] = sub_working_dir
    logging.info("sub working dir: %s" % sub_working_dir)

    # Creat tf_summary writer
    config["tensorboard_writer"] = SummaryWriter(sub_working_dir)
    logging.info("Please using 'python -m tensorboard.main --logdir={}'".format(sub_working_dir))

    # loading the dataset
    train_loader, valid_loader, test_loader = __load_data(config)

    # Creating the model
    model = NN(config['model']['architecture'], is_maskable = True)
    model = model.to(config['device'])

    # Getting the criterion, optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = get_optimizer(config, model)

    ###############
    # Begin Train #
    ###############
    train(model, train_loader, valid_loader, criterion, optimizer, config['train']['epochs'], config['train']['print_every'], config['device'])

    ##############
    # Begin Test #
    ##############
    # initialize lists to monitor test loss and accuracy
    test_loss = 0.0
    class_correct = list(0. for i in range(10))
    class_total = list(0. for i in range(10))

    model.eval() # prep model for evaluation

    for data, target in test_loader:
        data, target = data.to(config['device']), target.to(config['device'])

        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the loss
        loss = criterion(output, target)
        # update test loss 
        test_loss += loss.item()*data.size(0)
        # convert output probabilities to predicted class
        _, pred = torch.max(output, 1)
        # compare predictions to true label
        correct = np.squeeze(pred.eq(target.data.view_as(pred)))
        # calculate test accuracy for each object class
        for i in range(len(target)):
            label = target.data[i]
            class_correct[label] += correct[i].item()
            class_total[label] += 1

    # calculate and logging.info avg test loss
    test_loss = test_loss/len(test_loader.sampler)
    logging.info('Test Loss: {:.6f}\n'.format(test_loss))

    for i in range(10):
        if class_total[i] > 0:
            logging.info('Test Accuracy of %5s: %2d%% (%2d/%2d)' % (
                str(i), 100 * class_correct[i] / class_total[i],
                np.sum(class_correct[i]), np.sum(class_total[i])))
        else:
            logging.info('Test Accuracy of %5s: N/A (no training examples)' % (classes[i]))

    logging.info('Test Accuracy (Overall): %2d%% (%2d/%2d)' % (
        100. * np.sum(class_correct) / np.sum(class_total),
        np.sum(class_correct), np.sum(class_total)))    
import gym
import gym_cog_ml_tasks
import sys, os
sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
from common.utils import train, test, save_train_res, train_results_plots
from MonteCarlo_model import Agent_MC
import torch

torch.manual_seed(123)
env = gym.make('12_AX_S-v0', size=10, prob_target=0.5)

seed = 123
N_tr = 50000
N_tst = 1000

agent = Agent_MC(env.observation_space.n, env.action_space.n)

res = train(env, agent, N_tr, custom_reward=lambda r: r * 10, seed=seed)
test(env, agent, N_tst, seed=seed)
save_train_res(
    './agents/cog_tasks_rl_agents/MonteCarlo/save/12_ax_s/MC_10_0.5', res)
train_results_plots(
    dir='./agents/cog_tasks_rl_agents/MonteCarlo/save/12_ax_s/',
    figname='MC_10_0.5',
    names=['MC_10_0.5'],
    numbers=[res])
Exemplo n.º 9
0
def fasttext_train(trained_model_dir: OutputDirectory(type='ModelDirectory'),
                   training_data_dir: InputDirectory() = None,
                   validation_data_dir: InputDirectory() = None,
                   epochs=1,
                   batch_size=64,
                   max_len=32,
                   embed_dim=300,
                   hidden_size=256,
                   ngram_size=200000,
                   dropout=0.5,
                   learning_rate=0.001):
    print('============================================')
    print('training_data_dir:', training_data_dir)
    print('validation_data_dir:', validation_data_dir)
    path_word_to_index = os.path.join(training_data_dir, 'word_to_index.json')
    word_to_index = get_vocab(path_word_to_index)
    path_label = os.path.join(training_data_dir, 'label.txt')
    map_id_label, map_label_id = get_id_label(path_label)
    class_num = len(map_id_label)
    vocab_size = len(word_to_index)
    stop_patience = 5
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print('device:', device)
    # load training dataset
    path = os.path.join(training_data_dir, 'data.txt')
    train_samples = load_dataset(file_path=path,
                                 word_to_index=word_to_index,
                                 map_label_id=map_label_id,
                                 max_len=max_len,
                                 ngram_size=ngram_size)
    train_iter = DataIter(samples=train_samples,
                          batch_size=batch_size,
                          shuffle=True,
                          device=device)
    # load validation dataset
    path = os.path.join(validation_data_dir, 'data.txt')
    dev_samples = load_dataset(file_path=path,
                               word_to_index=word_to_index,
                               map_label_id=map_label_id,
                               max_len=max_len,
                               ngram_size=ngram_size)
    dev_iter = DataIter(samples=dev_samples,
                        batch_size=batch_size,
                        shuffle=True,
                        device=device)

    model = FastText(vocab_size=vocab_size,
                     class_num=class_num,
                     dropout=dropout,
                     embed_dim=embed_dim,
                     hidden_size=hidden_size,
                     ngram_size=ngram_size)
    # watch parameters
    print(model.parameters)
    # copy word_to_index.json and label.txt for later scoring.
    shutil.copy(src=path_word_to_index, dst=trained_model_dir)
    shutil.copy(src=path_label, dst=trained_model_dir)
    # shared parameters for loading dataset
    shared_params = {'max_len': max_len, 'ngram_size': ngram_size}
    path = os.path.join(trained_model_dir, 'shared_params.json')
    with open(path, 'w', encoding='utf-8') as f:
        json.dump(shared_params, f)
    start = time.time()
    train(model,
          trained_model_dir,
          train_iter=train_iter,
          dev_iter=dev_iter,
          epochs=epochs,
          learning_rate=learning_rate,
          stop_patience=stop_patience,
          device=device)
    end = time.time()
    print('\nduration of training process: %.2f sec' % (end - start))
    print('============================================')
Exemplo n.º 10
0
n_hidden = 10
n_layers = 1
lr = 0.01
lstm = Agent_LSTM(env.observation_space.n, env.action_space.n, n_hidden, lr,
                  n_layers)

hierarchy_num = 3
learn_mode = 'SL'
hyperparam = {
    'alpha': np.ones(hierarchy_num) * 0.075,
    'lambd': np.array([0.1, 0.5, 0.99]),
    'beta': np.ones(hierarchy_num) * 15,
    'bias': np.array([1 / (10**i) for i in range(hierarchy_num)]),
    'gamma': 15
}
her = Agent_HER(env.observation_space.n, env.action_space.n, hierarchy_num,
                learn_mode, hyperparam)

res_lstm = train(env, lstm, N_tr, seed=123)
res_her = train(env, her, N_tr, seed=123)
# save the training records, including every episode's reward, action accuracy and f1 over iteration
save_train_res('./save/LSTM', res_lstm)
save_train_res('./save/HER', res_her)

test(env, lstm, N_tst, seed=123)
test(env, her, N_tst, seed=123)

res_lstm = load_train_res('./save/LSTM.npy')
res_her = load_train_res('./save/HER.npy')
train_results_plots(dir='./save/', figname='test', names=['LSTM', 'HER'], \
                    numbers=[res_lstm, res_her])