Пример #1
0
    model.load(model_filename)
    with open(results_filename, 'rb') as input:
        res = pickle.load(input)
    epsilon = res.epsilon
    if opt.epsilon < 1:
        epsilon = opt.epsilon
    epsilons = np.maximum(
        np.arange(epsilon, epsilon - 1 + min_epsilon,
                  (-1 + min_epsilon) / n_train), min_epsilon)
else:
    epsilons = np.arange(1, min_epsilon, (-1 + min_epsilon) / n_train)

# size of memory
for i_train in range(n_train):
    epsilon = epsilons[i_train]
    res.epsilon = epsilon
    print("Training: round {}, epsilon = {}".format(i_train, round(epsilon,
                                                                   2)))
    lengths_i_train = []
    scores_i_train = []
    for i_episode in range(n_episodes):
        i = 0
        done = False
        grid = env.reset()
        grid = grid.reshape((1, n_channels, env.nrow, env.ncol))
        t0 = time.time()
        while i < imax:
            i += 1
            source = grid.copy()
            if epsilon >= np.random.rand():
                action = np.random.randint(4)