Пример #1
0
def test_train_small_bootstrap_presets(small_moddata, tf_session):
    """Tests the `fit_preset()` method."""
    from modnet.model_presets import gen_presets
    from modnet.models import EnsembleMODNetModel

    modified_presets = gen_presets(100, 100)[:2]

    for ind, preset in enumerate(modified_presets):
        modified_presets[ind]["epochs"] = 2

    data = small_moddata
    # set 'optimal' features manually
    data.optimal_features = [
        col for col in data.df_featurized.columns
        if col.startswith("ElementProperty")
    ]

    model = EnsembleMODNetModel(
        [[["eform", "egap"]]],
        weights={
            "eform": 1,
            "egap": 1
        },
        num_neurons=[[4], [2], [2], [2]],
        n_feat=3,
        n_models=2,
        bootstrap=True,
    )

    # nested=0/False -> no inner loop, so only 1 model
    # nested=1/True -> inner loop, but default n_folds so 5
    for num_nested, nested_option in zip([2, 1], [2, 0]):
        results = model.fit_preset(
            data,
            presets=modified_presets,
            nested=nested_option,
            val_fraction=0.2,
            n_jobs=2,
        )
        models = results[0]
        assert len(models) == len(modified_presets)
        assert len(models[0]) == num_nested
Пример #2
0
                num_generations=10,
                n_jobs=16,
                early_stopping=True,
                refit=True,
            )
        else:
            # ... a list of presets (kind of dynamic grid search)
            (
                models,
                val_losses,
                best_learning_curve,
                learning_curves,
                best_presets,
            ) = model.fit_preset(
                train_data,
                classification=classification,
                nested=5,
                n_jobs=16,
            )

        # Load and featurize test dataset
        test_df = task.get_test_data(fold, include_target=False, as_type="df")

        try:
            materials = test_df[
                "structure"] if "structure" in test_df.columns else train_df[
                    "composition"].map(Composition)
        except KeyError:
            raise RuntimeError(
                f"Could not find any materials data dataset for task {task!r}!"
            )