Пример #1
0
def rollout_MC_per_run(env, runtime, runtimes, episodes, target, gamma, Lambda,
                       alpha, beta):
    print('rolling out %d of %d for MC' % (runtime + 1, runtimes))
    expected_return_trace, variance_of_return_trace, return_counts = MC(
        env, episodes, target, target, None, gamma)
    stationary_dist = return_counts / np.sum(return_counts)
    return (expected_return_trace[-1], variance_of_return_trace[-1],
            stationary_dist)
    def main(self):
        # set hyperparameters
        theta = [0.01 for i in range(self.dim + 1)
                 ]  # initailize (dim + 1) delta to the same value
        threshold = 0.0001
        max_iter = 50

        pairs = []  #generate a (self.dim^2) list to hold all possible pairs
        for i in range(self.dim):
            for j in range(self.dim):
                pairs.append([i, j])

        n = MC.MC()
        f, p = n.rtm(self.data)

        log_likelihood = 0
        for k in range(max_iter):
            theta = self.update(theta, pairs, f)

            # compute new log likelihood
            new_log_likelihood = 0
            for i in range(self.dim):
                for j in range(len(pairs)):
                    # for lambda1
                    i_1, i_2 = pairs[j][0], pairs[j][1]
                    row = i_1 + i_2 * self.dim
                    denominator = self.lambda1 * self.Q[i_1][
                        i] + self.lambda2 * self.Q[i_2][i]
                    if denominator != 0:
                        new_log_likelihood += f[row][i] * math.log(denominator)
            # check whether log likelihood converges
            if log_likelihood == 0:
                log_likelihood = new_log_likelihood
            elif new_log_likelihood - log_likelihood < threshold:
                #print(new_log_likelihood - log_likelihood, "gives log_likelihood:", log_likelihood)
                return new_log_likelihood
            else:
                log_likelihood = new_log_likelihood
        print()
        print("final:", self.lambda1, self.lambda2)
        print("final:", self.Q)
        print("log_likelihood:", log_likelihood)
        return log_likelihood
Пример #3
0
import  MC
import parse_pos
import proj

list_pos= parse_pos.recup_pos()

Xchap= MC.MC(list_pos)
j=0
for i in range (len(list_pos)):
    if not list_pos[i].parent:
        list_pos[i].X_MC = Xchap[j]
        j+=1
        list_pos[i].Y_MC = Xchap[j]
        j+=1
        list_pos[i].Z_mc = Xchap[j]
        j+=1
        list_pos[i].add_to_file("resultats.csv")

points = proj.lecture("resultats.csv")
pr = proj.choix_proj_cc(points)

print('Paramètres de la projection conique conforme minimisant le module linéaire:\n', 'Phi0 =',\
      rad_to_deg(pr.phi0), '\n', 'Phi1 =', rad_to_deg(pr.phi1), '\n', 'Phi2 =',rad_to_deg(pr.phi2),\
      '\n', 'X0 :', pr.X0, 'Y0 :', pr.Y0, '\n', "ellipsoide de référence WGS 84")

proj.affiche(points, pr)

Пример #4
0
                                      lambda x: onehot(x, N),
                                      N,
                                      int(1e6),
                                      gamma=0.99)
    stationary_dist = return_counts / np.sum(return_counts)
    true_expectation, true_variance = E[-1], V[-1]
    np.savez(filename,
             true_expectation=true_expectation,
             true_variance=true_variance,
             stationary_dist=stationary_dist)

j, v, s = iterative_policy_evaluation(env, target_policy, gamma=gamma)
print('Iterative policy evaluation')

Lambda = LAMBDA(env, lambda_type='constant', initial_value=np.ones(N))
mc_j, mc_v, mc_counts = MC(env, 10000, target_policy, target_policy, Lambda,
                           gamma)

# test both on-policy and off-policy
Lambda = LAMBDA(env, lambda_type='constant', initial_value=np.ones(N))
off_mc_results, off_mc_var_results = eval_method_with_variance(
    MC,
    env,
    true_expectation,
    true_variance,
    stationary_dist,
    behavior_policy,
    target_policy,
    Lambda,
    gamma=gamma,
    alpha=0.05,
    beta=0.05,
Пример #5
0
from utils import *
from joblib import Parallel, delayed
from MC import *
import numpy.matlib, argparse

parser = argparse.ArgumentParser(description='')
parser.add_argument('--episodes', type=int, default=int(1e8), help='')
args = parser.parse_args()

env = gym.make('FrozenLake-v0'); env.reset()
N = env.observation_space.n
gamma = lambda x: 0.95

target_policy = np.matlib.repmat(np.array([0.2, 0.3, 0.3, 0.2]).reshape(1, 4), env.observation_space.n, 1)

# get ground truth expectation, variance and stationary distribution
filename = 'frozenlake_truths_heuristic_%g.npz' % args.episodes
try:
    loaded = np.load(filename)
    true_expectation, true_variance, stationary_dist = loaded['true_expectation'], loaded['true_variance'], loaded['stationary_dist']
except FileNotFoundError:
    true_expectation, true_variance, return_counts = MC(env, args.episodes, target_policy, target_policy, gamma)
    stationary_dist = return_counts / np.sum(return_counts)
    np.savez(filename, true_expectation=true_expectation, true_variance=true_variance, stationary_dist=stationary_dist)
pass