Пример #1
0
def test_vrp_transform():
    vrp = VietorisRipsPersistence()
    X_vrp_res = np.array([[[0., 0.43094373, 0],
                           [0., 0.5117411, 0],
                           [0., 0.60077095, 0],
                           [0., 0.62186205, 0],
                           [0.69093919, 0.80131882, 1]]])

    assert_almost_equal(vrp.fit_transform(X), X_vrp_res)
Пример #2
0
def topo_pipeline(x_input, bigm=160, rel_std=16, scale=100, period=1, time_delay=1, dimension=3,
                  homology_dimensions=None, max_edge_length=20, n_values_bc=2000, tech='raw', takens=True):
    if homology_dimensions is None:
        homology_dimensions = [0, 1]

    #  Drop id and neuron_id
    x_np = x_input.values[:, 2:]

    x_resamp = 0

    if tech == 'kde':
        window = signal.gaussian(bigm, std=bigm / rel_std)

        # right_lim = int((x_input[:, -1].max() + (bigm / 2))) # In theory
        right_lim = int(np.ceil(x_input.values[:, -1].max() / 10 + 1) * 10 * scale)  #  In practical

        #  Create gaussian spikes
        print('Creating gaussian spikes')
        x_gaus = gaus_kde(x_np, window, right_lim, bigm, scale)

        #  Resampling
        print('Resampling')
        resamp = Resampler(period=period)
        x_resamp = resamp.fit_transform(x_gaus.T).T

    elif tech == 'raw':
        x_resamp = x_np

    elif tech == 'intervals':
        x_resamp = x_input.values[:, 3:] - x_input.values[:, 2:-1]

    if takens:
        print('Creating Takens Embedding')
        te = TakensEmbedding(time_delay=time_delay, dimension=dimension, parameters_type='fixed')
        x_tak = np.apply_along_axis(te.fit_transform, 1, x_resamp)
    else:
        x_tak = np.expand_dims(x_resamp, axis=2)

    print('Creating V-R Persistence Diagrams')
    vrp = VietorisRipsPersistence(max_edge_length=max_edge_length, homology_dimensions=homology_dimensions, n_jobs=-1)
    x_vrpd = vrp.fit_transform(x_tak)

    print('Creating Betti curves')
    bc = BettiCurve(n_values=n_values_bc, n_jobs=-1)
    bcs = bc.fit_transform(x_vrpd)

    return bcs, x_vrpd
Пример #3
0
def computing_persistence_diagram(G, t=np.inf, homologyDimensions=(0, 1, 2)):
    """
    INPUT:
        G: a graph
        t: persistence threshold
        homologyDimensions: homology dimensions to consider
    OUTPUT:
        pd: persistence diagram calculated by Giotto
    """

    dist_mat = computing_distance_matrix(G)
    persistenceDiagram = VietorisRipsPersistence(
        metric='precomputed',
        max_edge_length=t,
        homology_dimensions=homologyDimensions,
        n_jobs=-1)
    Diagrams = persistenceDiagram.fit_transform(
        dist_mat.reshape(1, dist_mat.shape[0], dist_mat.shape[1]))
    return Diagrams
Пример #4
0
def get_pd_from_molecule(molecule_name, structures):
    """
    INPUT:
        molecule_name: name of the molecule as given in the structres file
        structures: structures file containing information (x, y, z coordinates) for all molecules

    OUTPUT:
        X_scaled: scaled persistence diagrams
    """
    m = structures[structures['molecule_name'] == molecule_name][[
        'x', 'y', 'z'
    ]].to_numpy()
    m = m.reshape((1, m.shape[0], m.shape[1]))
    homology_dimensions = [0, 1, 2]
    persistenceDiagram = VietorisRipsPersistence(
        metric='euclidean', homology_dimensions=homology_dimensions, n_jobs=1)
    persistenceDiagram.fit(m)
    X_diagrams = persistenceDiagram.transform(m)

    diagram_scaler = diag.Scaler()
    diagram_scaler.fit(X_diagrams)
    X_scaled = diagram_scaler.transform(X_diagrams)

    return X_scaled
Пример #5
0
    def _validate_k_fold_top(self, model, x_train, y_train, x_test, y_test):
        validation_quantities = []

        for k_min in self.k_mins:
            for k_max in self.k_maxs:
                for dist_percentage in self.dist_percentages:
                    print(
                        f"k_min, k_max, dist_percentage: {k_min}, {k_max}, {dist_percentage}"
                    )
                    pipeline_list = [
                        ('extract_subspaces',
                         SubSpaceExtraction(dist_percentage=dist_percentage,
                                            k_min=k_min,
                                            k_max=k_max,
                                            metric="euclidean",
                                            n_jobs=-1)),
                        ('compute_diagrams',
                         VietorisRipsPersistence(n_jobs=-1))
                    ]
                    top_pipeline = Pipeline(pipeline_list)

                    diagrams_train, _ = top_pipeline.fit_transform_resample(
                        x_train, y_train)

                    top_features_train = extract_topological_features(
                        diagrams_train)

                    x_train_model = np.concatenate(
                        [x_train, top_features_train], axis=1)
                    model.fit(x_train_model, y_train)

                    x_test_model = extract_features_for_prediction(
                        x_train, y_train, x_test, y_test, top_pipeline)

                    score = model.score(x_test_model, y_test)
                    output_dictionary = {
                        "k_min": k_min,
                        "k_max": k_max,
                        "dist_percentage": dist_percentage,
                        "score": score
                    }
                    validation_quantities.append(output_dictionary)

        return validation_quantities
Пример #6
0
def get_pipeline(top_feat_params):
    pipeline = Pipeline([
        ('extract_point_clouds', SubSpaceExtraction(**top_feat_params)),
        ('create_diagrams', VietorisRipsPersistence(n_jobs=-1))
    ])
    return pipeline
Пример #7
0
    def cross_validate(self, full_x, full_y, splitting_dates):
        train_split_date = splitting_dates[0]
        val_split_date = splitting_dates[1]
        end_date = splitting_dates[2]

        train_x = full_x[(full_x.date < train_split_date) |
                         (full_x.date >= end_date)]
        train_y = full_y[(full_x.date < train_split_date) |
                         (full_x.date >= end_date)]

        val_x = full_x[(full_x.date >= train_split_date)
                       & (full_x.date < val_split_date)]
        val_y = full_y[(full_x.date >= train_split_date)
                       & (full_x.date < val_split_date)]

        test_x = full_x[(full_x.date >= val_split_date)
                        & (full_x.date < end_date)]
        test_y = full_y[(full_x.date >= val_split_date)
                        & (full_x.date < end_date)]

        train_x.pop("date")
        val_x.pop("date")
        test_x.pop("date")

        train_x = train_x.values
        train_y = train_y.values
        val_x = val_x.values
        val_y = val_y.values
        test_x = test_x.values
        test_y = test_y.values

        print("START VALIDATING MODEL")
        models_cv = self._validate_k_fold_model(train_x, train_y, val_x, val_y)
        best_model_params = best_combination(models_cv)
        best_model_params.pop("score")
        best_model = RandomForestClassifier(**best_model_params)

        best_model.fit(train_x, train_y)

        score = best_model.score(test_x, test_y)
        print(f'score no_top {score}')
        print(f'best model parameters no_top {best_model_params}')

        print("START VALIDATING PARAMS")
        topo_cv = self._validate_k_fold_top(best_model, train_x, train_y,
                                            val_x, val_y)
        best_topo = best_combination(topo_cv)
        best_topo.pop("score")
        best_topo_pipeline_list = [
            ('extract_subspaces', SubSpaceExtraction(**best_topo)),
            ('compute_diagrams', VietorisRipsPersistence(n_jobs=-1))
        ]
        best_topo_pipeline = Pipeline(best_topo_pipeline_list)

        train_x_for_test = np.concatenate([train_x, val_x], axis=0)
        train_y_for_test = np.concatenate([train_y, val_y], axis=0)

        diagrams_train, _ = best_topo_pipeline.fit_transform_resample(
            train_x_for_test, train_y_for_test)

        print("EXTRACTING TOPOLOGICAL FEATURES TRAIN")
        top_features_train = extract_topological_features(diagrams_train)

        x_train_model = np.concatenate([train_x_for_test, top_features_train],
                                       axis=1)
        best_model.fit(x_train_model, train_y_for_test)

        print("EXTRACTING TOPOLOGICAL FEATURES TEST")
        x_test_model = extract_features_for_prediction(x_train_model,
                                                       train_y_for_test,
                                                       test_x, test_y,
                                                       best_topo_pipeline)

        score_top = best_model.score(x_test_model, test_y)

        val_x_with_topo = extract_features_for_prediction(
            train_x, train_y, val_x, val_y, best_topo_pipeline)

        print('START VALIDATING MODEL WITH OPTIMAL TOPOLOGY')
        model_config_with_topo = self._validate_k_fold_model(
            x_train_model, train_y, val_x_with_topo, val_y)
        best_model_config_with_topo = best_combination(model_config_with_topo)
        best_model_config_with_topo.pop('score')

        best_model_with_topo = RandomForestClassifier(
            **best_model_config_with_topo)
        best_model_with_topo.fit(x_train_model, train_y_for_test)

        score_best_topo_and_model = best_model_with_topo.score(
            x_test_model, test_y)
        print(f'score best model and topo_feat {score_best_topo_and_model}')

        return best_model_params, best_topo, best_model_config_with_topo, score, score_top, score_best_topo_and_model
Пример #8
0
def test_vrp_not_fitted():
    vrp = VietorisRipsPersistence()

    with pytest.raises(NotFittedError):
        vrp.transform(X)
Пример #9
0
def test_vrp_params():
    metric = 'not_defined'
    vrp = VietorisRipsPersistence(metric=metric)

    with pytest.raises(ValueError):
        vrp.fit_transform(X)