def adding_bad_features_test():
    from src.env.Amatrix_task import Amatrix

    n = 10
    m = 5
    env = Amatrix(n, m)

    features = env.get_approx_A()   # first m features
    weights = np.zeros(m)

    config = Config()
    config.parameter_size = m
    config.init_stepsize = 0.01
    autostep = AutoStep(config)

    sample_size = 50000
    additional_features = 30
    for k in range(additional_features + 1):
        print("Number of features in the representation: {0}".format(autostep.parameter_size))
        for i in range(sample_size):
            rand_row = np.random.randint(n)
            target = env.sample_target(rand_row, noisy=True)

            pred_features = features[rand_row, :]
            prediction = np.dot(pred_features, weights)
            error = target - prediction
            gradient, new_stepsize, new_weight_vector = autostep.update_weight_vector(error, pred_features, weights)
            weights = new_weight_vector
            if ((i+1) % 50000) == 0:
                print("\tSample number: {0}".format(i + 1))
                print("\t\tPrediction error: {0}".format(error))

        print("Theta star:\n{0}".format(env.theta_star))
        print("Estimated theta:\n{0}".format(weights))

        if k < additional_features:
            print("Adding new feature...")
            new_feature = env.get_new_bad_features(1)
            features = np.hstack((features, new_feature))
            autostep.increase_size(1)

            new_weights = np.zeros(m+1)
            new_weights[:m] = weights
            m += 1
            weights = new_weights
示例#2
0
def boyan_chain_test(steps=50000):
    from src.env.BoyanChain import BoyanChain
    from src.env.RandomFeatures_task import LinearFunctionApproximator
    from src.util import Config
    import matplotlib.pyplot as plt

    config = Config()
    checkpoint = 100
    """ Environment Setup """
    config.init_noise_var = 0.1
    config.num_obs_features = 4
    config.max_num_features = 9
    """ AutoTIDBD Setup """
    config.parameter_size = 4
    config.theta = 0.001
    config.tau = 10000
    config.init_stepsize = 0.001
    # to keep track of learning progress
    run_avg_msve = np.zeros(steps // checkpoint, dtype=np.float64)
    current_checkpoint = 0
    avg_msve = 0

    env = BoyanChain(config)
    approximator = LinearFunctionApproximator(config)
    optimizer = AutoTIDBD(config)
    """ Start of Learning"""
    curr_obs_feats = env.get_observable_features()
    for s in range(steps):
        state_value = approximator.get_prediction(curr_obs_feats)
        optimal_value = env.compute_true_value()
        # step in the environment
        _, r, next_obs_feats, term = env.step()
        next_state_value = approximator.get_prediction(next_obs_feats)
        # compute td error
        td_error = r + (1 - term) * next_state_value - state_value
        # update weights
        _, _, new_weights = optimizer.update_weight_vector(
            td_error,
            features=curr_obs_feats,
            weights=approximator.get_weight_vector(),
            discounted_next_features=next_obs_feats)
        approximator.update_weight_vector(new_weights)
        # update features
        curr_obs_feats = next_obs_feats
        # keep track of progress
        avg_msve += np.square(state_value - optimal_value) / checkpoint
        # check if terminal state
        if term:
            env.reset()
            curr_obs_feats = env.get_observable_features()
        # store learning progress so far
        if (s + 1) % checkpoint == 0:
            run_avg_msve[current_checkpoint] += avg_msve
            avg_msve *= 0
            current_checkpoint += 1

        if (s + 1) == (steps // 2):
            env.add_feature(k=4, noise=0.0, fake_feature=False)
            approximator.increase_num_features(4)
            optimizer.increase_size(4)
            curr_obs_feats = env.get_observable_features()

    print("The average MSVE is: {0:0.4f}".format(np.average(run_avg_msve)))

    xaxis = np.arange(run_avg_msve.size) + 1
    plt.plot(xaxis, run_avg_msve)
    plt.show()
    plt.close()