Python PolicyGradientRunner примеры использования

Язык программирования: Python

Пространство имен/Пакет: generative_playground.models.pg_runner

Класс/Тип: PolicyGradientRunner

Примеров на hotexamples.com: 7

Python PolicyGradientRunner - 7 примеров найдено. Это лучшие примеры Python кода для generative_playground.models.pg_runner.PolicyGradientRunner, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

PolicyGradientRunner(4)

load(3)

get_model_coeff_vector(1)

load_from_root_name(1)

params(1)

set_model_coeff_vector(1)

Пример #1

Показать файл

Файл: runner_tests.py Проект: ZmeiGorynych/generative_playground

    def test_get_set_params_as_property(self):
        grammar_cache = 'hyper_grammar_guac_10k_with_clique_collapse.pickle'  # 'hyper_grammar.pickle'
        first_runner = PolicyGradientRunner(
            'hypergraph:' + grammar_cache,
            BATCH_SIZE=10,
            reward_fun=lambda x: 0,
            max_steps=60,
            num_batches=2,
            lr=0.05,
            entropy_wgt=0.0,
            # lr_schedule=shifted_cosine_schedule,
            root_name='test',
            preload_file_root_name=None,
            plot_metrics=True,
            save_location='./data',
            metric_smooth=0.0,
            decoder_type='graph_conditional',  # 'rnn_graph',# 'attention',
            on_policy_loss_type='advantage_record',
            rule_temperature_schedule=None,
            # lambda x: toothy_exp_schedule(x, scale=num_batches),
            eps=0.0,
            priors='conditional',
        )

        coeffs = first_runner.params
        coeffs[0] = 1
        first_runner.params = coeffs
        coeffs2 = first_runner.params
        assert coeffs2[0] == coeffs[0]

Пример #2

Показать файл

Файл: parallel_genetic_train.py Проект: ZmeiGorynych/generative_playground

def run_model(queue, root_name, run_index, save_location):
    print('Running: {}'.format(run_index))
    model = PolicyGradientRunner.load_from_root_name(save_location, root_name)
    model.run()
    queue.put((
        run_index,
        model.root_name,
    ))

Пример #3

Показать файл

Файл: genetic_opt.py Проект: iisuslik43/generative_playground

def load_coeff_vector_cache(snapshot_dir, coeff_vector_cache):
    """
        Query the file system for all snapshots so far along with their max rewards
        :param snapshot_dir: data location
        :return: dict {root_name: max_reward}
        """
    files = glob.glob(os.path.realpath(snapshot_dir) + '/*_runner.zip')
    for file in files:
        file_root = os.path.split(file)[-1].replace('_runner.zip', '')
        if file_root not in coeff_vector_cache:
            model = PolicyGradientRunner.load(file)
            coeff_vector_cache[file_root] = {'params': model.params}
    return coeff_vector_cache

Пример #4

Показать файл

Файл: parallel_genetic_train.py Проект: ZmeiGorynych/generative_playground

def run_initial_scan(num_batches=100,
                     batch_size=30,
                     snapshot_dir=None,
                     entropy_wgt=0.0,
                     root_name=None,
                     obj_num=None,
                     ver='v2',
                     lr=0.01,
                     attempt='',
                     plot=False):
    grammar_cache = 'hyper_grammar_guac_10k_with_clique_collapse.pickle'  # 'hyper_grammar.pickle'
    grammar = 'hypergraph:' + grammar_cache
    reward_funs = guacamol_goal_scoring_functions(ver)
    reward_fun = reward_funs[obj_num]

    first_runner = lambda: PolicyGradientRunner(
        grammar,
        BATCH_SIZE=batch_size,
        reward_fun=reward_fun,
        max_steps=60,
        num_batches=num_batches,
        lr=lr,
        entropy_wgt=entropy_wgt,
        # lr_schedule=shifted_cosine_schedule,
        root_name=root_name,
        preload_file_root_name=None,
        plot_metrics=plot,
        save_location=snapshot_dir,
        metric_smooth=0.0,
        decoder_type='graph_conditional',  # 'rnn_graph',# 'attention',
        on_policy_loss_type='advantage_record',
        rule_temperature_schedule=None,
        # lambda x: toothy_exp_schedule(x, scale=num_batches),
        eps=0.0,
        priors='conditional',
    )

    run = 0
    while True:
        model = first_runner()
        orig_name = model.root_name
        model.set_root_name(generate_root_name(orig_name, {}))
        model.run()

Пример #5

Показать файл

Файл: train_conditional_new_v2_0.py Проект: iisuslik43/generative_playground

root_name = 'xtest9' + ver + '_' + str(obj_num) + '_lr0.02'
max_steps = 60
root_location = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
root_location = root_location + '/../../../'
save_location = os.path.realpath(root_location + 'pretrained/')

runner_factory = lambda x: PolicyGradientRunner(grammar,
                              BATCH_SIZE=batch_size,
                              reward_fun=reward_fun,
                              max_steps=max_steps,
                              num_batches=num_batches,
                              lr=0.02,
                              entropy_wgt=0.1,
                              # lr_schedule=shifted_cosine_schedule,
                              root_name=root_name,
                              preload_file_root_name=None,
                              plot_metrics=True,
                              save_location=save_location,
                              metric_smooth=0.0,
                              decoder_type='graph_conditional',  # 'rnn_graph',# 'attention',
                              on_policy_loss_type='advantage_record',
                              rule_temperature_schedule=None, #lambda x: toothy_exp_schedule(x, scale=num_batches),
                              eps=0.0,
                              priors='conditional',
                              )
# preload_file='policy_gradient_run.h5')

runner = runner_factory()

runner.set_root_name('whatever')
save_fn = runner.run()

Пример #6

Показать файл

import os, glob, pickle, sys
import torch

if '/home/ubuntu/shared/GitHub' in sys.path:
    sys.path.remove('/home/ubuntu/shared/GitHub')
from generative_playground.models.pg_runner import PolicyGradientRunner
from generative_playground.models.decoder.decoder import get_node_decoder
snapshot_dir = os.path.realpath(
    '../generative_playground/molecules/train/genetic/data')
root_name = 'Ascan8_v2_lr0.03_ew0.1'  #'AA2scan8_v2_lr0.1_ew0.1' #
files = glob.glob(snapshot_dir + '/' + root_name + '/*_runner.zip')
coeffs = {}
for file in files:
    print(file)
    model = PolicyGradientRunner.load(file)
    coeffs[file] = model.params
    # model = get_node_decoder('hypergraph:hyper_grammar_guac_10k_with_clique_collapse.pickle',
    #                  decoder_type='graph_conditional',
    #                  priors='conditional',
    #                          batch_size=2)[0]
    # model.load_state_dict(torch.load(file))
    # coeffs[file] = model.stepper.model.get_params_as_vector()

with open(snapshot_dir + '/' + root_name + '.pkl', 'wb') as f:
    pickle.dump(coeffs, f)

Пример #7

Показать файл

Файл: parallel_genetic_train.py Проект: ZmeiGorynych/generative_playground

def run_genetic_opt(
        top_N=10,
        p_mutate=0.2,
        mutate_num_best=64,
        mutate_use_total_probs=False,
        p_crossover=0.2,
        num_batches=100,
        batch_size=30,
        snapshot_dir=None,
        entropy_wgt=0.0,
        root_name=None,
        obj_num=None,
        ver='v2',
        lr=0.01,
        num_runs=100,
        num_explore=5,
        plot_single_runs=True,
        steps_with_no_improvement=10,
        reward_aggregation=np.median,
        attempt='',  # only used for disambiguating plotting
        max_steps=90,
        past_runs_graph_file=None):

    manager = mp.Manager()
    queue = manager.Queue()

    relationships = nx.DiGraph()
    grammar_cache = 'hyper_grammar_guac_10k_with_clique_collapse.pickle'  # 'hyper_grammar.pickle'
    grammar = 'hypergraph:' + grammar_cache

    reward_funs = guacamol_goal_scoring_functions(ver)
    reward_fun = reward_funs[obj_num]

    split_name = root_name.split('_')
    split_name[0] += 'Stats'
    dash_name = '_'.join(split_name) + attempt
    vis = Dashboard(dash_name, call_every=1)

    first_runner_factory = lambda: PolicyGradientRunner(
        grammar,
        BATCH_SIZE=batch_size,
        reward_fun=reward_fun,
        max_steps=max_steps,
        num_batches=num_batches,
        lr=lr,
        entropy_wgt=entropy_wgt,
        # lr_schedule=shifted_cosine_schedule,
        root_name=root_name,
        preload_file_root_name=None,
        plot_metrics=plot_single_runs,
        save_location=snapshot_dir,
        metric_smooth=0.0,
        decoder_type='graph_conditional_sparse',
        # 'graph_conditional',  # 'rnn_graph',# 'attention',
        on_policy_loss_type='advantage_record',
        rule_temperature_schedule=None,
        # lambda x: toothy_exp_schedule(x, scale=num_batches),
        eps=0.0,
        priors='conditional',
    )

    init_thresh = 50
    pca_dim = 10
    if past_runs_graph_file:
        params, rewards = extract_params_rewards(past_runs_graph_file)
        sampler = ParameterSampler(params,
                                   rewards,
                                   init_thresh=init_thresh,
                                   pca_dim=pca_dim)
    else:
        sampler = None
    data_cache = {}
    best_so_far = float('-inf')
    steps_since_best = 0

    initial = True
    should_stop = False
    run = 0

    with mp.Pool(4) as p:
        while not should_stop:
            data_cache = populate_data_cache(snapshot_dir, data_cache)
            if run < num_explore:
                model = first_runner_factory()
                if sampler:
                    model.params = sampler.sample()
            else:
                model = (pick_model_to_run(data_cache,
                                           PolicyGradientRunner,
                                           snapshot_dir,
                                           num_best=top_N)
                         if data_cache else first_runner_factory())

            orig_name = model.root_name
            model.set_root_name(generate_root_name(orig_name, data_cache))

            if run > num_explore:
                relationships.add_edge(orig_name, model.root_name)

                if random.random() < p_crossover and len(data_cache) > 1:
                    second_model = pick_model_for_crossover(
                        data_cache, model, PolicyGradientRunner, snapshot_dir)
                    model = classic_crossover(model, second_model)
                    relationships.add_edge(second_model.root_name,
                                           model.root_name)

                if random.random() < p_mutate:
                    model = mutate(model,
                                   pick_best=mutate_num_best,
                                   total_probs=mutate_use_total_probs)
                    relationships.node[model.root_name]['mutated'] = True
                else:
                    relationships.node[model.root_name]['mutated'] = False

                with open(
                        snapshot_dir + '/' + model.root_name + '_lineage.pkl',
                        'wb') as f:
                    pickle.dump(relationships, f)

            model.save()

            if initial is True:
                for _ in range(4):
                    print('Starting {}'.format(run))
                    p.apply_async(run_model,
                                  (queue, model.root_name, run, snapshot_dir))
                    run += 1
                initial = False
            else:
                print('Starting {}'.format(run))
                p.apply_async(run_model,
                              (queue, model.root_name, run, snapshot_dir))
                run += 1

            finished_run, finished_root_name = queue.get(block=True)
            print('Finished: {}'.format(finished_root_name))

            data_cache = populate_data_cache(snapshot_dir, data_cache)
            my_rewards = data_cache[finished_root_name]['best_rewards']
            metrics = {
                'max': my_rewards.max(),
                'median': np.median(my_rewards),
                'min': my_rewards.min()
            }
            metric_dict = {
                'type': 'line',
                'X': np.array([finished_run]),
                'Y': np.array([[val for key, val in metrics.items()]]),
                'opts': {
                    'legend': [key for key, val in metrics.items()]
                }
            }

            vis.plot_metric_dict({'worker rewards': metric_dict})

            this_agg_reward = reward_aggregation(my_rewards)
            if this_agg_reward > best_so_far:
                best_so_far = this_agg_reward
                steps_since_best = 0
            else:
                steps_since_best += 1

            should_stop = (
                steps_since_best >= steps_with_no_improvement
                and finished_run > num_explore + steps_with_no_improvement)

        p.terminate()

    return extract_best(data_cache, 1)