Exemplo n.º 1
0
    def test_something(self):
        sim_data = DataNode(label='node_label', metadata='test the metadata')
        nodes_list = []
        sim_data.data = nodes_list
        data_node = DataNode(label="seed_0")
        nodes_list.append(data_node)

        # sub-nodes of sim data resource
        loss_lst = []
        train_loss_node = DataNode(label="training_loss", data=loss_lst)
        metrics_dict = {}
        metrics_node = DataNode(label="validation_metrics", data=metrics_dict)
        scores_lst = []
        scores_node = DataNode(label="validation_score", data=scores_lst)

        # add sim data nodes to parent node
        sim_data_node = data_node
        if sim_data_node:
            sim_data_node.data = [train_loss_node, metrics_node, scores_node]
        sim_data.to_json(path="./")
        self.assertIsNotNone(sim_data.to_json_str())
def main(flags):
    irl_lbl = 'no_irl' if flags.use_true_reward else 'with_irl'
    sim_label = flags.exp_name + '_min_IReLeaSE-REINFORCE_' + irl_lbl + (
        '_no_vflag' if flags.no_smiles_validity_flag else '')
    sim_data = DataNode(label=sim_label,
                        metadata={
                            'exp': flags.exp_name,
                            'date': date_label
                        })
    nodes_list = []
    sim_data.data = nodes_list

    for seed in seeds:
        summary_writer_creator = lambda: SummaryWriter(
            log_dir="irelease_tb"
            "/{}_{}_{}/".format(sim_label, seed,
                                dt.now().strftime("%Y_%m_%d__%H_%M_%S")))

        # for data collection of this round of simulation.
        data_node = DataNode(label="seed_%d" % seed)
        nodes_list.append(data_node)

        random.seed(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)

        print(
            '--------------------------------------------------------------------------------'
        )
        print(f'{device}\n{sim_label}\tDemonstrations file: {flags.demo_file}')
        print(
            '--------------------------------------------------------------------------------'
        )

        irelease = IReLeaSE()
        k = 1
        if flags.hparam_search:
            print(f'Hyperparameter search enabled: {flags.hparam_search_alg}')
            # arguments to callables
            extra_init_args = {}
            extra_data_args = {'flags': flags}
            extra_train_args = {
                'agent_net_path': flags.model_dir,
                'agent_net_name': flags.pretrained_model,
                'learn_irl': not flags.use_true_reward,
                'seed': seed,
                'n_episodes': 600,
                'is_hsearch': True,
                'tb_writer': summary_writer_creator
            }
            hparams_conf = get_hparam_config(flags)
            search_alg = {
                'random_search': RandomSearch,
                'bayopt_search': BayesianOptSearch
            }.get(flags.hparam_search_alg, BayesianOptSearch)
            search_args = GPMinArgs(n_calls=20, random_state=seed)
            hparam_search = search_alg(
                hparam_config=hparams_conf,
                num_folds=1,
                initializer=irelease.initialize,
                data_provider=irelease.data_provider,
                train_fn=irelease.train,
                save_model_fn=irelease.save_model,
                alg_args=search_args,
                init_args=extra_init_args,
                data_args=extra_data_args,
                train_args=extra_train_args,
                data_node=data_node,
                split_label='reinforce-rl',
                sim_label=sim_label,
                dataset_label=None,
                results_file=f'{flags.hparam_search_alg}_{sim_label}'
                f'_{date_label}_seed_{seed}')
            start = time.time()
            stats = hparam_search.fit()
            print(f'Duration = {time_since(start)}')
            print(stats)
            print("\nBest params = {}, duration={}".format(
                stats.best(), time_since(start)))
        else:
            hyper_params = default_hparams(flags)
            data_gens = irelease.data_provider(k, flags)
            init_args = irelease.initialize(hyper_params,
                                            data_gens['demo_data'],
                                            data_gens['unbiased_data'],
                                            data_gens['prior_data'])
            results = irelease.train(init_args,
                                     flags.model_dir,
                                     flags.pretrained_model,
                                     seed,
                                     sim_data_node=data_node,
                                     n_episodes=600,
                                     bias_mode=flags.bias_mode,
                                     learn_irl=not flags.use_true_reward,
                                     tb_writer=summary_writer_creator)
            irelease.save_model(
                results['model'][0],
                path=flags.model_dir,
                name=
                f'{flags.exp_name}_{irl_lbl}_irelease_stack-rnn_{hyper_params["agent_params"]["unit_type"]}'
                f'_reinforce_agent_{date_label}_{results["score"]}_{results["epoch"]}'
            )
            irelease.save_model(
                results['model'][1],
                path=flags.model_dir,
                name=
                f'{flags.exp_name}_{irl_lbl}_irelease_stack-rnn_{hyper_params["agent_params"]["unit_type"]}'
                f'_reinforce_reward_net_{date_label}_{results["score"]}_{results["epoch"]}'
            )

    # save simulation data resource tree to file.
    sim_data.to_json(path="./analysis/")
Exemplo n.º 3
0
def main(flags):
    sim_label = flags.exp_name if flags.exp_name else 'Irelease-pretraining-Stack-RNN'
    if flags.eval:
        sim_label += '_eval'
    sim_data = DataNode(label=sim_label,
                        metadata={
                            'exp': flags.exp_name,
                            'date': date_label
                        })
    nodes_list = []
    sim_data.data = nodes_list

    # For searching over multiple seeds
    hparam_search = None

    for seed in seeds:
        summary_writer_creator = lambda: SummaryWriter(
            log_dir="tb_gpmt"
            "/{}_{}_{}/".format(sim_label, seed,
                                dt.now().strftime("%Y_%m_%d__%H_%M_%S")))

        # for data collection of this round of simulation.
        data_node = DataNode(label="seed_%d" % seed)
        nodes_list.append(data_node)

        random.seed(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)

        print(
            '-------------------------------------------------------------------------------------------------'
        )
        print(
            f'Running on dataset: {flags.data_file}, experiment = {flags.exp_name}'
        )
        print(
            '-------------------------------------------------------------------------------------------------'
        )

        trainer = IreleasePretrain()
        k = 1
        if flags["hparam_search"]:
            print("Hyperparameter search enabled: {}".format(
                flags["hparam_search_alg"]))

            # arguments to callables
            extra_init_args = {}
            extra_data_args = {"flags": flags}
            extra_train_args = {
                "is_hsearch": True,
                "n_iters": 50000,
                "tb_writer": summary_writer_creator
            }

            hparams_conf = get_hparam_config(flags)
            if hparam_search is None:
                search_alg = {
                    "random_search": RandomSearch,
                    "bayopt_search": BayesianOptSearch
                }.get(flags["hparam_search_alg"], BayesianOptSearch)
                search_args = GPMinArgs(n_calls=20, random_state=seed)
                hparam_search = search_alg(
                    hparam_config=hparams_conf,
                    num_folds=1,
                    initializer=trainer.initialize,
                    data_provider=trainer.data_provider,
                    train_fn=trainer.train,
                    save_model_fn=trainer.save_model,
                    alg_args=search_args,
                    init_args=extra_init_args,
                    data_args=extra_data_args,
                    train_args=extra_train_args,
                    data_node=data_node,
                    split_label='',
                    sim_label=sim_label,
                    dataset_label='ChEMBL_SMILES',
                    results_file="{}_{}_gpmt_{}.csv".format(
                        flags["hparam_search_alg"], sim_label, date_label))

            stats = hparam_search.fit(model_dir="models",
                                      model_name='irelease')
            print(stats)
            print("Best params = {}".format(stats.best()))
        else:
            hyper_params = default_hparams(flags)
            model, optimizer, gen_data, rnn_args = trainer.initialize(
                hyper_params,
                gen_data=trainer.data_provider(k, flags)['train'])
            if flags.eval:
                load_model = trainer.load_model(flags.model_dir,
                                                flags.eval_model_name)
                model.load_state_dict(load_model)
                trainer.evaluate_model(model,
                                       gen_data,
                                       rnn_args,
                                       data_node,
                                       num_smiles=flags.num_smiles)
            else:
                if flags.init_model:
                    load_model = trainer.load_model(flags.model_dir,
                                                    flags.init_model)
                    model.load_state_dict(load_model)
                    print(
                        f'Model weights {flags.init_model} loaded successfully!'
                    )
                results = trainer.train(model=model,
                                        optimizer=optimizer,
                                        gen_data=gen_data,
                                        rnn_args=rnn_args,
                                        n_iters=1500000,
                                        sim_data_node=data_node,
                                        tb_writer=summary_writer_creator)
                trainer.save_model(
                    results['model'],
                    flags.model_dir,
                    name=
                    f'irelease-pretrained_stack-rnn_{hyper_params["unit_type"]}_'
                    f'{date_label}_{results["score"]}_{results["epoch"]}')

    # save simulation data resource tree to file.
    sim_data.to_json(path="./analysis/")
Exemplo n.º 4
0
def main(flags):
    mode = 'eval' if flags.eval else 'train'
    sim_label = f'expert_rnn_reg_model_{mode}'

    print(
        '--------------------------------------------------------------------------------'
    )
    print(f'{device}\n{sim_label}\tData file: {flags.data_file}')
    print(
        '--------------------------------------------------------------------------------'
    )

    hparam_search = None

    sim_data = DataNode(label=sim_label,
                        metadata=json.dumps({
                            'date': date_label,
                            'seeds': seeds,
                            'mode': mode,
                            'sim_label': sim_label,
                            'num_folds': flags.folds
                        }))
    nodes_list = []
    sim_data.data = nodes_list

    # Load the data
    data_dict, transformer = load_smiles_data(flags.data_file,
                                              flags.cv,
                                              normalize_y=True,
                                              k=flags.folds,
                                              shuffle=5,
                                              create_val=False,
                                              train_size=.8)

    for seed in seeds:
        data_node = DataNode(label="seed_%d" % seed)
        nodes_list.append(data_node)

        # ensure reproducibility
        random.seed(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)

        trainer = ExpertTrainer()
        folds = flags.folds if flags.cv else 1
        if flags.hparam_search:
            print(f'Hyperparameter search enabled: {flags.hparam_search_alg}')
            # arguments to callables
            extra_init_args = {}
            extra_data_args = {'cv': flags.cv, 'data': data_dict}
            extra_train_args = {
                'n_iterations': 5000,
                'transformer': transformer,
                'is_hsearch': True,
                'tb_writer': None
            }
            hparams_conf = hparams_config()
            if hparam_search is None:
                search_alg = {
                    'random_search': RandomSearch,
                    'bayopt_search': BayesianOptSearch
                }.get(flags.hparam_search_alg, BayesianOptSearch)
                search_args = GPMinArgs(n_calls=10, random_state=seed)
                hparam_search = search_alg(
                    hparam_config=hparams_conf,
                    num_folds=folds,
                    initializer=trainer.initialize,
                    data_provider=trainer.data_provider,
                    train_fn=trainer.train,
                    save_model_fn=trainer.save_model,
                    alg_args=search_args,
                    init_args=extra_init_args,
                    data_args=extra_data_args,
                    train_args=extra_train_args,
                    data_node=data_node,
                    split_label='random',
                    sim_label=sim_label,
                    dataset_label=os.path.split(flags.data_file)[1],
                    results_file=
                    f'{flags.hparam_search_alg}_{sim_label}_{date_label}')
            start = time.time()
            stats = hparam_search.fit()
            print(f'Duration = {time_since(start)}')
            print(stats)
            print("Best params = {}, duration={}".format(
                stats.best(), time_since(start)))
        else:
            hyper_params = default_params(flags)
            # Initialize the model and other related entities for training.
            if flags.cv:
                folds_data = []
                data_node.data = folds_data
                data_node.label = data_node.label + 'cv'
                for k in range(folds):
                    k_node = DataNode(label="fold-%d" % k)
                    folds_data.append(k_node)
                    start_fold(k_node, data_dict, transformer, flags,
                               hyper_params, trainer, k, None)
            else:
                start_fold(data_node, data_dict, transformer, flags,
                           hyper_params, trainer, folds, None)

    # save simulation data resource tree to file.
    sim_data.to_json(path="./analysis/")
Exemplo n.º 5
0
def main(flags):
    sim_label = f'RNN_XEnt_Generator_Baseline_{flags.exp_type}'
    if flags.eval:
        sim_label += '_eval'
    sim_data = DataNode(label=sim_label,
                        metadata={
                            'exp': flags.exp_type,
                            'date': date_label
                        })
    nodes_list = []
    sim_data.data = nodes_list

    # For searching over multiple seeds
    hparam_search = None

    pretraining = flags.exp_type == 'pretraining'

    for seed in seeds:
        summary_writer_creator = lambda: SummaryWriter(
            log_dir="irelease_tb_rnn_xent"
            "/{}_{}_{}/".format(sim_label, seed,
                                dt.now().strftime("%Y_%m_%d__%H_%M_%S")))

        # for data collection of this round of simulation.
        data_node = DataNode(label="seed_%d" % seed)
        nodes_list.append(data_node)

        random.seed(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)

        print(
            '--------------------------------------------------------------------------------'
        )
        print(
            f'{device}\n{sim_label}\tDemonstrations file: {flags.prior_data if pretraining else flags.demo_file}'
        )
        print(
            '--------------------------------------------------------------------------------'
        )

        trainer = RNNBaseline()
        k = 1
        if flags["hparam_search"]:
            print("Hyperparameter search enabled: {}".format(
                flags["hparam_search_alg"]))

            # arguments to callables
            extra_init_args = {}
            extra_data_args = {"flags": flags}
            extra_train_args = {
                "is_hsearch": True,
                "n_iters": 50000,
                "tb_writer": summary_writer_creator
            }

            hparams_conf = get_hparam_config(flags)
            if hparam_search is None:
                search_alg = {
                    "random_search": RandomSearch,
                    "bayopt_search": BayesianOptSearch
                }.get(flags["hparam_search_alg"], BayesianOptSearch)
                search_args = GPMinArgs(n_calls=20, random_state=seed)
                hparam_search = search_alg(
                    hparam_config=hparams_conf,
                    num_folds=1,
                    initializer=trainer.initialize,
                    data_provider=trainer.data_provider,
                    train_fn=trainer.train,
                    save_model_fn=trainer.save_model,
                    alg_args=search_args,
                    init_args=extra_init_args,
                    data_args=extra_data_args,
                    train_args=extra_train_args,
                    data_node=data_node,
                    split_label='',
                    sim_label=sim_label,
                    dataset_label='ChEMBL_SMILES',
                    results_file="{}_{}_gpmt_{}.csv".format(
                        flags["hparam_search_alg"], sim_label, date_label))

            stats = hparam_search.fit(model_dir="models",
                                      model_name='irelease')
            print(stats)
            print("Best params = {}".format(stats.best()))
        else:
            hyper_params = default_hparams(flags)
            data_gens = trainer.data_provider(k, flags)
            model, optimizer, rnn_args = trainer.initialize(
                hyper_params, data_gens['demo_data'],
                data_gens['unbiased_data'], data_gens['prior_data'])
            if flags.eval:
                load_model = trainer.load_model(flags.model_dir,
                                                flags.eval_model_name)
                model.load_state_dict(load_model)
                trainer.evaluate_model(model,
                                       data_gens['demo_data'],
                                       rnn_args,
                                       data_node,
                                       num_smiles=200)
            else:
                results = trainer.train(
                    generator=model,
                    optimizer=optimizer,
                    rnn_args=rnn_args,
                    n_iters=40000,
                    sim_data_node=data_node,
                    tb_writer=summary_writer_creator,
                    is_pretraining=pretraining,
                    pretrained_net_path=flags.model_dir,
                    pretrained_net_name=flags.pretrained_model)
                trainer.save_model(
                    results['model'],
                    flags.model_dir,
                    name=
                    f'rnn_xent_gen_baseline_{flags.exp_type}_{hyper_params["unit_type"]}_'
                    f'{date_label}_{results["score"]}_{results["epoch"]}_seed_{seed}'
                )

    # save simulation data resource tree to file.
    sim_data.to_json(path="./analysis/")