Exemplo n.º 1
0
def experiment(log_dir, variant_overwrite, cpu=False):
    if not cpu:
        ptu.set_gpu_mode(True)  # optionally set the GPU (default=False)

    # Load experiment from file.
    env, _, data, variant = load_experiment(log_dir, variant_overwrite)
    assert all([
        a == b
        for a, b in zip(env.sampled_goal, variant['env_kwargs']['goal_prior'])
    ])

    # Set log directory.
    exp_id = 'eval/ne{}-mpl{}-{}-rs{}/nhp{}'.format(
        variant['algo_kwargs']['num_episodes'],
        variant['algo_kwargs']['max_path_length'],
        ','.join(variant_overwrite['env_kwargs']['shaped_rewards']),
        variant['algo_kwargs']['reward_scale'],
        variant['historical_policies_kwargs']['num_historical_policies'],
    )
    exp_id = create_exp_name(exp_id)
    out_dir = os.path.join(log_dir, exp_id)
    print('Logging to:', out_dir)
    setup_logger(
        log_dir=out_dir,
        variant=variant,
        snapshot_mode='none',
        snapshot_gap=50,
    )

    # Load trained model from file.
    policy = data['policy']
    vf = data['vf']
    qf = data['qf']
    algorithm = SoftActorCritic(
        env=env,
        training_env=env,  # can't clone box2d env cause of swig
        save_environment=False,  # can't save box2d env cause of swig
        policy=policy,
        qf=qf,
        vf=vf,
        **variant['algo_kwargs'],
    )

    # Overwrite algorithm for p(z) adaptation (if model is SMM).
    if variant['intrinsic_reward'] == 'smm':
        discriminator = data['discriminator']
        density_model = data['density_model']
        SMMHook(base_algorithm=algorithm,
                discriminator=discriminator,
                density_model=density_model,
                **variant['smm_kwargs'])

    # Overwrite algorithm for historical averaging.
    if variant['historical_policies_kwargs']['num_historical_policies'] > 0:
        HistoricalPoliciesHook(
            base_algorithm=algorithm,
            log_dir=log_dir,
            **variant['historical_policies_kwargs'],
        )

    algorithm.to(ptu.device)
    algorithm.train()
def experiment(log_dir, variant_overwrite, cpu=False):
    if not cpu:
        ptu.set_gpu_mode(True)  # optionally set the GPU (default=False)

    # Load experiment from file.
    env, _, data, variant = load_experiment(log_dir, variant_overwrite)
    #assert all([a == b for a, b in zip(print(samples)env.sampled_goal, variant['env_kwargs']['goal_prior'])])

    # Set log directory.
    exp_id = 'eval/ne{}-mpl{}-{}-rs{}/nhp{}'.format(
        variant['algo_kwargs']['num_episodes'],
        variant['algo_kwargs']['max_path_length'],
        ','.join(variant_overwrite['env_kwargs']['shaped_rewards']),
        variant['algo_kwargs']['reward_scale'],
        variant['historical_policies_kwargs']['num_historical_policies'],
    )
    exp_id = create_exp_name(exp_id)
    out_dir = os.path.join(log_dir, exp_id)
    print('Logging to:', out_dir)
    setup_logger(
        log_dir=out_dir,
        variant=variant,
        snapshot_mode='none',
        snapshot_gap=50,
    )

    # Load trained model from file.
    policy = data['policy']
    vf = data['vf']
    qf = data['qf']
    algorithm = SoftActorCritic(
        env=env,
        training_env=env,  # can't clone box2d env cause of swig
        save_environment=False,  # can't save box2d env cause of swig
        policy=policy,
        qf=qf,
        vf=vf,
        **variant['algo_kwargs'],
    )

    # Overwrite algorithm for p(z) adaptation (if model is SMM).
    if variant['intrinsic_reward'] == 'smm':
        discriminator = data['discriminator']
        density_model = data['density_model']
        SMMHook(base_algorithm=algorithm,
                discriminator=discriminator,
                density_model=density_model,
                **variant['smm_kwargs'])

    # Overwrite algorithm for historical averaging.
    if variant['historical_policies_kwargs']['num_historical_policies'] > 0:
        HistoricalPoliciesHook(
            base_algorithm=algorithm,
            log_dir=log_dir,
            **variant['historical_policies_kwargs'],
        )

    algorithm.to(ptu.device)
    #algorithm.train()
    samples = algorithm.get_eval_paths()
    #for path in samples:
    #    print(path['observations'])

    #plt.figure()
    #plt.plot(samples[0]['observations'][:, 0], samples[0]['observations'][:, 1])
    #plt.plot(3, 2)
    #plt.show()
    print(env.reset())
    print(samples[0]['observations'])
    i = 0
    for path in samples:

        np.save('./outtem/out%i.npy' % i, path['observations'])
        i = i + 1
    #print(algorithm.policy.get_action(np.array([0,0])))
    from rlkit.samplers.util import rollout
    from rlkit.samplers.in_place import InPlacePathSampler
    #path=rollout(env,algorithm.eval_policy,50)
    eval_sampler = InPlacePathSampler(
        env=env,
        policy=algorithm.eval_policy,
        max_samples=100,
        max_path_length=50,
    )
    path = algorithm.eval_sampler.obtain_samples()
    print(path[0]['observations'])
Exemplo n.º 3
0

def gen_state_space(toposorted_nodes):
    return itertools.product((0, 1), repeat=len(toposorted_nodes))


def extract_state_space_configuration_as_dict(configuration, toposorted_nodes, index):
    configuration_dict = {}
    for node_index, node in enumerate(toposorted_nodes):
        configuration_dict[node] = configuration[node_index]

    return configuration_dict


for EXPERIMENT_ID in tqdm(EXPERIMENT_RANGE):
    model, intervention, evidence, var_to_predict = load_experiment(EXPERIMENT_ID)
    toposorted_nodes = toposort(model)


    logprobs = []

    for index, configuration in enumerate(gen_state_space(toposorted_nodes)):
        # The values of a particular state space configurations.

        configuration_dict = extract_state_space_configuration_as_dict(
            configuration, toposorted_nodes, index
        )

        # (Note: for endogenous variables, these are the values of
        #  its noisy-flipper rather than the endogenous node values themselves.)
        
Exemplo n.º 4
0
def run_pyro(experiment_id, num_samples, type_to_run):
    # Note that for Pyro we use this number of samples twice because we sample twice:
    # once for abduction step and another time for prediction step.
    # See the paper for more details.

    assert type_to_run == "pyro_with_guide" or type_to_run == "pyro_without_guide"

    model, intervention, evidence, var_to_predict = load_experiment(
        experiment_id)
    toposorted_nodes = toposort(model)

    USE_GUIDE = type_to_run == "pyro_with_guide"

    exog_sites = set()
    for var_name in toposorted_nodes:
        if model[var_name]['type'] == "exogenous":
            exog_sites.add(var_name)
        else:
            # a noisy flipper of an endogenous variable:
            exog_sites.add(get_noisy_flipper_name(var_name))
    exog_sites_list = list(exog_sites)

    def create_prob_proc_object__pyro(var_name,
                                      dict_values,
                                      data,
                                      guide,
                                      exog_values=None):
        if model[var_name]['type'] == "exogenous":
            if exog_values is not None and var_name in exog_values:
                dict_values[var_name] = exog_values[var_name]
            else:
                dict_values[var_name] = pyro.sample(
                    var_name,
                    dist.Bernoulli(model[var_name]['prior']),
                    obs=data.get(var_name, None))
        elif model[var_name]['type'] == "endogenous":
            weights = model[var_name]['parameters']
            parents = model[var_name]['parents']
            sum_parents = 0
            for index, parent in enumerate(parents):
                sum_parents += weights[index] * float(dict_values[parent])

            if sum_parents > 0.5:  # activation function
                val = 1
            else:
                val = 0

            exog_noise_parent_name = get_noisy_flipper_name(var_name)
            if exog_values is not None and exog_noise_parent_name in exog_values:
                dict_values[exog_noise_parent_name] = exog_values[
                    exog_noise_parent_name]
            else:
                if USE_GUIDE and guide == True and var_name in data:
                    if round(float(val)) == round(float(data[var_name])):
                        noise_flipper_prob = 0.0
                    else:
                        noise_flipper_prob = 1.0
                else:
                    noise_flipper_prob = model[var_name]['flip_noise']

                dict_values[exog_noise_parent_name] = pyro.sample(
                    exog_noise_parent_name,
                    dist.Bernoulli(noise_flipper_prob),
                )

            if round(float(dict_values[exog_noise_parent_name])) == 1:
                val = 1 if val == 0 else 0

            dict_values[var_name] = pyro.sample(var_name,
                                                dist.Delta(
                                                    torch.tensor(val).float()),
                                                obs=data.get(var_name, None))

    def generative_model(data={}, exog_values=None):
        dict_values = {}
        for node in toposorted_nodes:
            create_prob_proc_object__pyro(node,
                                          dict_values,
                                          data,
                                          guide=False,
                                          exog_values=exog_values)

        return dict_values

    def guide(data={}):
        dict_values = {}
        for node in toposorted_nodes:
            create_prob_proc_object__pyro(node,
                                          dict_values,
                                          data,
                                          guide=True,
                                          exog_values=None)

        return dict_values

    def abduction(evidence, n_samples):
        evidence = {d: torch.tensor(evidence[d]).float() for d in evidence}
        guide_to_use = None
        if USE_GUIDE:
            guide_to_use = guide

        posterior = pyro.infer.Importance(generative_model, guide_to_use,
                                          n_samples)
        posterior.run(evidence)
        if math.isnan(float(posterior.get_ESS())):
            raise AllSamplesRejectedException
        posterior = pyro.infer.EmpiricalMarginal(posterior,
                                                 sites=exog_sites_list)

        return posterior

    def intervention_prediction(node_of_interest, intervention, posterior,
                                n_samples):
        intervention = {
            k: torch.tensor(intervention[k]).float().flatten()
            for k in intervention
        }
        intervened_model = pyro.do(generative_model, data=intervention)

        estimate = []
        for _ in range(n_samples):
            exog_values_ = posterior.sample()
            exog_values = {}
            for index, var_name in enumerate(exog_sites_list):
                if var_name not in intervention.keys():
                    exog_values[var_name] = exog_values_[index]

            intervened_model_with_values = intervened_model(
                exog_values=exog_values)
            result = intervened_model_with_values[node_of_interest]
            estimate.append(result)

        return estimate

    start = timeit.default_timer()
    posterior = abduction(evidence, num_samples)
    results = intervention_prediction(var_to_predict, intervention, posterior,
                                      num_samples)
    stop = timeit.default_timer()
    time_took = stop - start
    result = float(numpy.mean(results))
    return result, time_took
Exemplo n.º 5
0
OUTPUT_FOLDER = "output/"

os.makedirs(OUTPUT_FOLDER, exist_ok=True)

parser = argparse.ArgumentParser(description='Visualise a network')
parser.add_argument(
    '--exp_id',
    required=False,
    type=int,
    default=0,
    help='an experiment id of a network',
)
args = parser.parse_args()

model, intervention, evidence, var_to_predict = load_experiment(args.exp_id)

Graph = DiGraph()


def get_noisy_flipper_name(node_id):
    return node_id + "_NF"


all_nodes = []
for node_id, node_data in model.items():
    all_nodes.append(node_id)
    if node_data['type'] == "endogenous":
        # Noisy Flippers:
        all_nodes.append(get_noisy_flipper_name(node_id))