示例#1
0
from os.path import join

import numpy as np
import torch
from sacred import Experiment
from sacred.observers import FileStorageObserver

from onlikhorn.algorithm import sinkhorn, online_sinkhorn, random_sinkhorn, subsampled_sinkhorn, schedule
from onlikhorn.cache import torch_cached
from onlikhorn.dataset import get_output_dir, make_data
from onlikhorn.gaussian import sinkhorn_gaussian

exp_name = 'online_grid_quiver_2'
exp = Experiment(exp_name)
exp_dir = join(get_output_dir(), exp_name)
exp.observers = [FileStorageObserver(exp_dir)]


@exp.config
def config():
    data_source = 'gmm_1d'
    n_samples = 10000
    max_length = 20000
    device = 'cuda'

    # Overrided
    batch_size = 100
    seed = 0
    epsilon = 1e-2

    method = 'sinkhorn'
示例#2
0
            config["problem_spec"],
            config["inference_algorithm"],
            config["temperature"],
        )

        if config["inference_algorithm"] == "rlsp":
            custom_id += "_" + config["solver"]
            if config["solver"] == "ppo":
                custom_id += "_{}_{}".format(config["solver_iterations"],
                                             config["reset_solver"])

        return custom_id  # started_event returns the _run._id


ex = Experiment("rlsp")
ex.observers = [SetID(), FileStorageObserver.create("results")]


def get_all_rewards_from_latent_space(env, latent_space, r_inferred, r_task,
                                      inferred_weight):
    all_rewards_inferred = np.zeros(env.nS)
    for state_id in range(env.nS):
        obs = env.s_to_obs(env.get_state_from_num(state_id))
        state = latent_space.encoder(obs)
        reward = np.dot(r_inferred, state)
        all_rewards_inferred[state_id] = reward
    return env.f_matrix @ r_task + inferred_weight * all_rewards_inferred


def print_rollout(env, start_state, policies, last_steps_printed, horizon):
    if last_steps_printed == 0:
    priority = 50  # very high priority to set id

    def started_event(self, ex_info, command, host_info, start_time, config,
                      meta_info, _id):
        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
        if config["result_folder"] is not None:
            result_folder = config["result_folder"].strip("/").split("/")[-1]
            custom_id = f"{timestamp}_ablation_average_features_{result_folder}"
        else:
            custom_id = f"{timestamp}_ablation_average_features"
        return custom_id  # started_event returns the _run._id


ex = Experiment("mujoco-ablation-average-features")
ex.observers = [
    SetID(),
    FileStorageObserver.create("results/mujoco/ablation_average_features"),
]


@ex.config
def config():
    result_folder = None  # noqa:F841


@ex.automain
def main(_run, result_folder, seed):
    ex = FileExperimentResults(result_folder)
    env_id = ex.config["env_id"]
    latent_model_checkpoint = ex.info["latent_model_checkpoint"]
    current_states = ex.info["current_states"]
# changes the run _id and thereby the path that the FileStorageObserver
# writes the results
# cf. https://github.com/IDSIA/sacred/issues/174
class SetID(RunObserver):
    priority = 50  # very high priority to set id

    def started_event(self, ex_info, command, host_info, start_time, config,
                      meta_info, _id):
        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
        return timestamp  # started_event returns the _run._id


ex = Experiment("policy-discriminator")
ex.observers = [
    SetID(),
    FileStorageObserver.create("results/policy_discriminator")
]


def get_trajectories(env, policy_path, policy_type, n_rollouts, time_horizon):
    if policy_type == "sac":
        from stable_baselines import SAC

        model = SAC.load(policy_path)

        def get_action(obs):
            return model.predict(obs, deterministic=True)[0]

    elif policy_type == "gail":
        from imitation.policies import serialize
        from stable_baselines3.common.vec_env import DummyVecEnv
示例#5
0
# writes the results
# cf. https://github.com/IDSIA/sacred/issues/174
class SetID(RunObserver):
    priority = 50  # very high priority to set id

    def started_event(
        self, ex_info, command, host_info, start_time, config, meta_info, _id
    ):
        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
        label = config["experiment_folder"].strip("/").split("/")[-1]
        custom_id = "{}_{}".format(timestamp, label)
        return custom_id  # started_event returns the _run._id


ex = Experiment("mujoco-eval")
ex.observers = [SetID(), FileStorageObserver.create("results/mujoco/eval")]


def print_rollout(env, policy, latent_space, decode=False):
    state = env.reset()
    done = False
    while not done:
        a, _ = policy.predict(state, deterministic=False)
        state, reward, done, info = env.step(a)
        if decode:
            obs = latent_space.decoder(state)
        else:
            obs = state
        print("action", a)
        print("obs", obs)
        print("reward", reward)
示例#6
0
                scheduler_D.step(G_iter)

        if extrapolation and update:
            # Reset extrapolated optimizers
            optimizer_D.deextrapolate()
            optimizer_G.deextrapolate()

        iteration += 1
        iteration_time = time.perf_counter() - t0
        elapsed_time += iteration_time
        cur_elapsed_time += iteration_time

        # Benchmark
        if upd_D and upd_G:
            key = 'DG'
        elif upd_D:
            key = 'D'
        else:
            key = 'G'
        sum_iteration_times[key] += iteration_time
        count_iterations[key] += 1
        avg_iteration_times[
            key] = sum_iteration_times[key] / count_iterations[key]


if __name__ == '__main__':
    if not os.path.exists(exp_dir):
        os.makedirs(exp_dir)
    exp.observers = [FileStorageObserver.create(exp_dir)]
    exp.run_commandline()
    priority = 50  # very high priority to set id

    def started_event(self, ex_info, command, host_info, start_time, config,
                      meta_info, _id):
        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
        if config["result_folder"] is not None:
            result_folder = config["result_folder"].strip("/").split("/")[-1]
            custom_id = f"{timestamp}_ablation_waypoints_{result_folder}"
        else:
            custom_id = f"{timestamp}_ablation_waypoints"
        return custom_id  # started_event returns the _run._id


ex = Experiment("mujoco-ablation-waypoints")
ex.observers = [
    SetID(),
    FileStorageObserver.create("results/mujoco/ablation_waypoints"),
]


class LatentSpaceTargetStateRewardWrapper(gym.Wrapper):
    def __init__(self, env, latent_space, target_states):
        self.env = env
        self.latent_space = latent_space
        self.target_states = [ts / np.linalg.norm(ts) for ts in target_states]
        self.state = None
        self.timestep = 0
        super().__init__(env)

    def reset(self):
        obs = super().reset()
        self.state = self.latent_space.encoder(obs)