示例#1
0
    def est(self, n_pca):
        # dimension of the dataset
        N = self.aX.size()
        # Step 1: Create the current permuted dataset
        G_per = GraphSet()
        for i in range(N):
            G = copy.deepcopy(self.aX.X[i])
            G.permute(self.f[i])
            G_per.add(G)
            del (G)
        Mat = G_per.to_matrix_with_attr()
        #print(Mat)
        # Standardizing the features
        if (self.scale == True):
            Mat_scale = pd.DataFrame(scale(Mat), columns=Mat.columns)

        else:
            Mat_scale = Mat
            self.barycenter = np.mean(Mat_scale)
            print(self.barycenter)
        pca = PCA(n_components=n_pca)
        scores = pca.fit_transform(Mat_scale)
        vals = pca.explained_variance_ratio_
        #scores=pca.transform(Mat_scale)
        vecs = pd.DataFrame(pca.components_, columns=Mat_scale.columns)
        #top=np.argmax(vals_k)
        # TO HERE
        #vals=(vals_k[top]/sum(vals_k)).real
        #vecs=vecs_k[:,[top]]
        del Mat, Mat_scale, G_per
        return (vals, vecs, scores)
示例#2
0
 def predict(self, x_new, std=False):
     if (not isinstance(x_new, pd.core.frame.DataFrame)):
         print(
             "The new observation should be a pandas dataframe of real values"
         )
     self.y_vec_pred = self.model.predict(X=x_new)
     self.y_net_pred = GraphSet()
     for i in range(self.y_vec_pred.shape[0]):
         self.y_net_pred.add(
             self.give_me_a_network(geo=pd.Series(
                 data=self.y_vec_pred[i], index=self.variables_names),
                                    n_a=self.aX.node_attr,
                                    e_a=self.aX.edge_attr,
                                    s=float(x_new.loc[i])))
     if (std == True and self.model_type == 'GPR'):
         self.y_vec_pred, self.y_std_pred = self.model.predict(
             X=x_new, return_std=True)
         self.y_net_pred = GraphSet()
         for i in range(self.y_vec_pred.shape[0]):
             self.y_net_pred.add(
                 self.give_me_a_network(geo=pd.Series(
                     data=self.y_vec_pred[i], index=self.variables_names),
                                        n_a=self.aX.node_attr,
                                        e_a=self.aX.edge_attr,
                                        s=float(x_new.loc[i])))
示例#3
0
    def est(self, n_pca, k, old_pca=None):
        # dimension of the dataset
        N = self.aX.size()
        # Step 1: Create the current permuted dataset
        G_per = GraphSet()
        for i in range(N):
            G = copy.deepcopy(self.aX.X[i])
            G.permute(self.f[i])
            G_per.add(G)
            del (G)

        Mat = G_per.to_matrix_with_attr()

        # Standardizing the features
        if (self.scale == True):
            Mat_scale = pd.DataFrame(scale(Mat), columns=Mat.columns)

        else:
            Mat_scale = Mat
            # self.barycenter=np.mean(Mat_scale)
        pca = PCA(n_components=n_pca)
        scores = pca.fit_transform(Mat_scale)
        vals = pca.explained_variance_ratio_
        vecs = pd.DataFrame(pca.components_, columns=Mat_scale.columns)
        self.pcas[k] = [pca, Mat_scale]
        self.barycenter = pd.Series(
            pca.mean_, index=Mat_scale.columns)  # np.mean(Mat_scale)
        if (k > 0):
            # Compute the alignment error
            Mat_along_old = pd.DataFrame(old_pca.inverse_transform(scores),
                                         columns=Mat_scale.columns)
            for i in range(N):
                x_along = Mat_along_old.iloc[i, :]
                X_curr_pca = self.give_me_a_network(x_along,
                                                    n_a=self.aX.node_attr,
                                                    e_a=self.aX.edge_attr)
                matchID = ID(self.distance)
                a = matchID.align(G_per.X[i], X_curr_pca)
                self.pcaold_error[i, k] = a.dis()
                del (matchID, X_curr_pca, x_along, a)

        # Compute the pca error
        # FIT TRANSFORM THE DATA along the first pca
        Mat_along = pd.DataFrame(pca.inverse_transform(scores),
                                 columns=Mat_scale.columns)
        # PCA error:
        for i in range(N):
            x_along = Mat_along.iloc[i, :]
            X_curr_pca = self.give_me_a_network(x_along,
                                                n_a=self.aX.node_attr,
                                                e_a=self.aX.edge_attr)
            matchID = ID(self.distance)
            a = matchID.align(G_per.X[i], X_curr_pca)
            self.pca_error[i, k] = a.dis()
            del (matchID, X_curr_pca, x_along, a)
        del Mat, Mat_scale, G_per
        return (vals, vecs, scores, pca)
示例#4
0
 def align_G(self,*args):
         if(isinstance(args,Graph)):
             if(self.m_C==None):
                 return args
             else:
                 a=self.m_matcher.align(args,self.m_C)
                 return a.alignedSource()
         if(isinstance(args,GraphSet)):
             if(self.m_C==None):
                 return args
             else:
                     new_a_set=GraphSet()
                     i=0
                     while(i==args.size()):
                         Gi=args.X[i]
                         # add to the new graph set an aligned graph
                         new_a_set.add(self.align_G(Gi))
                         i+=1
                     return new_a_set
示例#5
0
 def variance(self):
     if (self.aX != None and self.aX.size() != 0):
         if (self.var != None):
             return self.var
         else:
             if (not isinstance(self.mean, Graph)):
                 self.mean = self.align_and_est()
             n = self.aX.size()
             if (self.m_dis == None):
                 # the variance is computed as a distance between the mean and the sample
                 align_X = GraphSet()
                 for i in range(n):
                     G = copy.deepcopy(self.aX.X[i])
                     G.permute(self.f[i])
                     align_X.add(G)
                     del (G)
                 self.m_dis = self.matcher.dis(align_X, self.mean)
             self.var = 0.0
             for i in range(n):
                 self.var += self.m_dis[i]
             self.var = self.var / n
             return self.var
     else:
         print("Sample of graphs is empty")
示例#6
0
sys.path.append("C:\\Users\\Anna\\OneDrive - Politecnico di Milano\\Windows\\Polimi\\Ricerca\\Regression\\GraphSpace\\")
os.chdir('C:\\Users\\Anna\\OneDrive - Politecnico di Milano\\Windows\\Polimi\\Ricerca\\Regression\\Simulations\\DataSets')
from core import Graph
from core import GraphSet
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.cm as cm
# Modello regression
from sklearn import linear_model,gaussian_process
import numpy as np
import random
random.seed(3)
G=GraphSet(graph_type='undirected')
G.read_from_text("C:\\Users\\Anna\\OneDrive - Politecnico di Milano\\Windows\\Polimi\\Ricerca\\Regression\\Simulations\\DataSets\\GraphSet_CryptCorrMats.txt")


# plot the true and the predicted
G_origin=r.y_net_pred.X[0]
# Network plot
# Go to networkx format
G_plot=G_origin.to_networkX(layer=0,node_too=True)
# Define the nodes positions
pos={0: [-0.16210871,  0.92931688],
1: [0.36616978, 1.        ],
2: [ 0.48415449, -0.48927315]}
# or with networkx.layout https://networkx.github.io/documentation/stable/reference/drawing.html
# Initialize the colors as egdes weights
colors=list(nx.get_edge_attributes(G_plot,'weight').values())
示例#7
0
    def align_and_est(self):
        # INITIALIZATION:
        # Select a Random Candidate:
        first_id = random.randint(0, self.aX.size() - 1)
        m_1 = self.aX.X[first_id]
        while (m_1.n_nodes == 1):
            first_id = random.randint(0, self.aX.size() - 1)
            m_1 = self.aX.X[first_id]
        # Sequential version:
        # Align all the points wrt the random candidate
        #for i in range(self.X.size()):
        #   # Align X to Y
        #   a = self.matcher.dis(self.aX.X[i],m_1)
        #   # Permutation of X to go closer to Y
        #   self.f[i] = range(0,9)#self.matcher.f
        # Parallel Version;
        Parallel(n_jobs=10, require='sharedmem')(
            delayed(self.two_net_match)(m_1, i, first_id)
            for i in range(self.aX.size()))
        # Compute the first Generalized Geodesic Regression line
        E_1 = self.est(k=0)
        # Align the set wrt the geodesic
        Parallel(n_jobs=10,
                 require='sharedmem')(delayed(self.align_pred)(E_1[1], i, 0)
                                      for i in range(self.aX.size()))
        # AAC iterative algorithm
        for k in range(1, self.nr_iterations):
            # Compute the first Generalized Geodesic Regression line
            E_2 = self.est(k)
            # Align the set wrt the geodesic
            Parallel(n_jobs=6, require='sharedmem')(
                delayed(self.align_pred)(E_2[1], i, k)
                for i in range(self.aX.size()))
            #sequential version: self.align_pred(E_2[1],k)
            # Compute the step: the algorithmic step is computed as the square difference between the coefficients
            step_range = abs(
                sum([
                    self.regression_error[i, k - 1]
                    for i in range(0, self.aX.size())
                ]) - sum([
                    self.regression_error[i, k]
                    for i in range(0, self.aX.size())
                ]))
            #self.error+=[self.regression_error.iloc[:,k].sum()]

            if (step_range < 0.05):
                self.model = E_2[0]
                if (self.model_type == 'OLS'):
                    # Return the coefficients
                    self.network_coef = GraphSet()
                    # self.vector_coef = pd.Series(data=E_2[0].coef_.flatten(), index=self.variables_names)
                    self.network_coef.add(
                        self.give_me_a_network(pd.Series(
                            data=E_2[0].intercept_.flatten(),
                            index=self.variables_names),
                                               self.aX.node_attr,
                                               self.aX.edge_attr,
                                               s='Intercept'))
                    for i_th in range(E_2[0].coef_.shape[1]):
                        self.network_coef.add(
                            self.give_me_a_network(pd.Series(
                                data=E_2[0].coef_[:, i_th],
                                index=self.variables_names),
                                                   self.aX.node_attr,
                                                   self.aX.edge_attr,
                                                   s=str('beta' + str(i_th))))
                    self.regression_error = pd.DataFrame.from_dict({
                        iteration: [
                            self.regression_error[observation, iteration]
                            for observation in range(self.aX.size())
                        ]
                        for iteration in range(k + 1)
                    })
                    self.postalignment_error = pd.DataFrame.from_dict({
                        iteration: [
                            self.postalignment_error[observation, iteration]
                            for observation in range(self.aX.size())
                        ]
                        for iteration in range(k + 1)
                    })
                    self.nr_iterations = k
                print("Step Range smaller than 0.005")
                return
            #else Go on with the computation: update the new result and restart from step 1.
            del E_1
            E_1 = E_2
            del E_2
        print("Maximum number of iteration reached.")
        # Return the result
        if ('E_2' in locals()):
            self.model = E_2[0]
            if (self.model_type == 'OLS'):
                # Return the coefficients
                self.network_coef = GraphSet()
                #self.vector_coef = pd.Series(data=E_2[0].coef_.flatten(), index=self.variables_names)
                self.network_coef.add(
                    self.give_me_a_network(pd.Series(
                        data=E_2[0].intercept_.flatten(),
                        index=self.variables_names),
                                           self.aX.node_attr,
                                           self.aX.edge_attr,
                                           s='Intercept'))
                for i_th in range(E_2[0].coef_.shape[1]):
                    self.network_coef.add(
                        self.give_me_a_network(pd.Series(
                            data=E_2[0].coef_[:, i_th],
                            index=self.variables_names),
                                               self.aX.node_attr,
                                               self.aX.edge_attr,
                                               s=str('beta' + str(i_th))))
                self.regression_error = pd.DataFrame.from_dict({
                    iteration: [
                        self.regression_error[observation, iteration]
                        for observation in range(self.aX.size())
                    ]
                    for iteration in range(self.nr_iterations)
                })
                self.postalignment_error = pd.DataFrame.from_dict({
                    iteration: [
                        self.postalignment_error[observation, iteration]
                        for observation in range(self.aX.size())
                    ]
                    for iteration in range(self.nr_iterations)
                })
            else:
                # Return the prior and the posterior
                # ATTENTION: CHECK ON THE PRIOR WITH AASA
                self.y_post = E_2[1]
                self.y_post_std = E_2[2]

            del E_2, E_1

        else:
            self.model = E_1[0]
            if (self.model_type == 'OLS'):
                # Return the coefficients
                self.network_coef = GraphSet()
                #self.vector_coef = pd.Series(data=E_2[0].coef_.flatten(), index=self.variables_names)
                self.network_coef.add(
                    self.give_me_a_network(pd.Series(
                        data=E_1[0].intercept_.flatten(),
                        index=self.variables_names),
                                           self.aX.node_attr,
                                           self.aX.edge_attr,
                                           s='Intercept'))
                for i_th in range(E_1[0].coef_.shape[1]):
                    self.network_coef.add(
                        self.give_me_a_network(pd.Series(
                            data=E_1[0].coef_[:, i_th],
                            index=self.variables_names),
                                               self.aX.node_attr,
                                               self.aX.edge_attr,
                                               s=str('beta' + str(i_th))))
                self.regression_error = pd.DataFrame.from_dict({
                    iteration: [
                        self.regression_error[observation, iteration]
                        for observation in range(self.aX.size())
                    ]
                    for iteration in range(self.nr_iterations)
                })
                self.postalignment_error = pd.DataFrame.from_dict({
                    iteration: [
                        self.postalignment_error[observation, iteration]
                        for observation in range(self.aX.size())
                    ]
                    for iteration in range(self.nr_iterations)
                })
            else:
                # Return the prior and the posterior
                # ATTENTION: CHECK ON THE PRIOR WITH AASA
                self.y_post = E_1[1]
                self.y_post_std = E_1[2]
            del E_1
示例#8
0
    def est(self, k):
        # Step 1: Create the current permuted dataset
        self.f_iteration[k] = self.f
        G_per = GraphSet()
        for i in range(self.aX.size()):
            G_temp = copy.deepcopy(self.aX.X[i])
            G_temp.permute(self.f[i])
            G_temp.s = copy.deepcopy(self.aX.X[i].s)
            G_per.add(G_temp)
            del (G_temp)
        del (self.aX)
        self.aX = copy.deepcopy(G_per)

        # Step 2: Transform it into a matrix
        y = G_per.to_matrix_with_attr()
        # parameter saved:
        self.variables_names = y.columns

        # Step 3: create the x vector
        # Create the input value
        x = pd.DataFrame(columns=range(len(G_per.X[0].s)),
                         index=range(y.shape[0]))
        for i in range(y.shape[0]):
            x.iloc[i] = [float(regressor) for regressor in G_per.X[i].s]
        self.regressor = x
        # Step 4: fit the chosen regression model
        # Ordinary Least Square
        if (self.model_type == 'OLS'):
            # Create linear regression object
            model = linear_model.LinearRegression()
            model.fit(x, y)
            along_geo_pred = pd.DataFrame(model.predict(x),
                                          columns=self.variables_names)
            self.f_all[k] = self.f
            #self.regression_error.iloc[:, k] = (along_geo_pred - y).pow(2).sum(axis=1)
            return (model, along_geo_pred)

        # Gaussian Process
        elif (self.model_type == 'GPR'):

            along_geo_pred = pd.DataFrame(index=range(y.shape[0]),
                                          columns=self.variables_names)
            along_geo_pred_sd = pd.DataFrame(index=range(y.shape[0]),
                                             columns=self.variables_names)
            # list in which we save the temporary regression error
            regression_error_temp = []
            # We are fitting a different Gaussian process for every variable (i.e. for every node or edge)
            for m in range(len(self.variables_names)):
                # Inizialize the gaussian process
                model = gaussian_process.GaussianProcessRegressor(
                    kernel=self.kernel,
                    n_restarts_optimizer=self.restarts,
                    alpha=self.alpha)
                # Fitting the Gaussian Process means finding the correct hyperparameters
                model.fit(x, y.iloc[:, m])
                # Saving the model
                self.models[self.variables_names[m]] = model
                # Predict to compute the regression error (to compare with the alignment error)
                y_pred, y_std = model.predict(x, return_std=True)
                # save both the predicted y and the std, to estimate the posterior
                along_geo_pred.loc[:,
                                   self.variables_names[m]] = pd.Series(y_pred)
                along_geo_pred_sd.loc[:, self.variables_names[m]] = pd.Series(
                    y_std)
                # Compute the error
                # HERE! YOU CAN SUBSTITUTE IT WITH AN ERROR FUNCTION
                err_euclidean = (y_tr.iloc[:, 2] - y_pred).pow(2)
                err_weighted = [
                    err_euclidean[i] / y_std[i] for i in range(len(y_std))
                ]
                self.regression_error.iloc[:, k] += err_weighted
            return (model, along_geo_pred, y_std)
        else:
            raise Exception("Wrong regression model: select either OLS or GPR")
示例#9
0
class ggr_aac(aligncompute):
    def __init__(self,
                 graphset,
                 matcher,
                 distance,
                 regression_model='OLS',
                 nr_iterations=100,
                 alpha=1e-10,
                 kernel=None,
                 restarts=0):
        # distance and matcher used to compute the alignment
        aligncompute.__init__(self, graphset, matcher)
        # distance used to compute the regression error
        self.distance = distance
        # nr of iteration of the algorithm
        self.nr_iterations = nr_iterations
        # indicate which type of regression model:
        # OLS (e.g. network on scalar regression problems)
        # GPR (e.g. network on time regression problems)
        self.model_type = regression_model
        if (self.model_type == 'GPR'):
            self.alpha = alpha
            self.restarts = restarts
            self.models = {}
            if (kernel == None):
                # by deafault we select an exponential kernel
                # See kernel section in gaussian_process documentation
                # https://scikit-learn.org/stable/modules/gaussian_process.html#gp-kernels
                # Here we used: 1/2exp(-d(x1/l,x2/l)^2)
                # - s is the parameter of the ConstantKernel
                # - l is the parameter of the RBF (radial basis function) kernel
                self.kernel = gaussian_process.kernels.ConstantKernel(
                    1.0) * gaussian_process.kernels.RBF(1.0)
            else:
                self.kernel = kernel
        # Regression error for each iteration and each observation
        self.regression_error = {
        }  #pd.DataFrame(0,index=range(graphset.size()), columns=range(self.nr_iterations))
        self.postalignment_error = {
        }  #pd.DataFrame(0,index=range(graphset.size()), columns=range(self.nr_iterations))
        self.f_iteration = {}
        self.f_all = {}

    def align_and_est(self):
        # INITIALIZATION:
        # Select a Random Candidate:
        first_id = random.randint(0, self.aX.size() - 1)
        m_1 = self.aX.X[first_id]
        while (m_1.n_nodes == 1):
            first_id = random.randint(0, self.aX.size() - 1)
            m_1 = self.aX.X[first_id]
        # Sequential version:
        # Align all the points wrt the random candidate
        #for i in range(self.X.size()):
        #   # Align X to Y
        #   a = self.matcher.dis(self.aX.X[i],m_1)
        #   # Permutation of X to go closer to Y
        #   self.f[i] = range(0,9)#self.matcher.f
        # Parallel Version;
        Parallel(n_jobs=10, require='sharedmem')(
            delayed(self.two_net_match)(m_1, i, first_id)
            for i in range(self.aX.size()))
        # Compute the first Generalized Geodesic Regression line
        E_1 = self.est(k=0)
        # Align the set wrt the geodesic
        Parallel(n_jobs=10,
                 require='sharedmem')(delayed(self.align_pred)(E_1[1], i, 0)
                                      for i in range(self.aX.size()))
        # AAC iterative algorithm
        for k in range(1, self.nr_iterations):
            # Compute the first Generalized Geodesic Regression line
            E_2 = self.est(k)
            # Align the set wrt the geodesic
            Parallel(n_jobs=6, require='sharedmem')(
                delayed(self.align_pred)(E_2[1], i, k)
                for i in range(self.aX.size()))
            #sequential version: self.align_pred(E_2[1],k)
            # Compute the step: the algorithmic step is computed as the square difference between the coefficients
            step_range = abs(
                sum([
                    self.regression_error[i, k - 1]
                    for i in range(0, self.aX.size())
                ]) - sum([
                    self.regression_error[i, k]
                    for i in range(0, self.aX.size())
                ]))
            #self.error+=[self.regression_error.iloc[:,k].sum()]

            if (step_range < 0.05):
                self.model = E_2[0]
                if (self.model_type == 'OLS'):
                    # Return the coefficients
                    self.network_coef = GraphSet()
                    # self.vector_coef = pd.Series(data=E_2[0].coef_.flatten(), index=self.variables_names)
                    self.network_coef.add(
                        self.give_me_a_network(pd.Series(
                            data=E_2[0].intercept_.flatten(),
                            index=self.variables_names),
                                               self.aX.node_attr,
                                               self.aX.edge_attr,
                                               s='Intercept'))
                    for i_th in range(E_2[0].coef_.shape[1]):
                        self.network_coef.add(
                            self.give_me_a_network(pd.Series(
                                data=E_2[0].coef_[:, i_th],
                                index=self.variables_names),
                                                   self.aX.node_attr,
                                                   self.aX.edge_attr,
                                                   s=str('beta' + str(i_th))))
                    self.regression_error = pd.DataFrame.from_dict({
                        iteration: [
                            self.regression_error[observation, iteration]
                            for observation in range(self.aX.size())
                        ]
                        for iteration in range(k + 1)
                    })
                    self.postalignment_error = pd.DataFrame.from_dict({
                        iteration: [
                            self.postalignment_error[observation, iteration]
                            for observation in range(self.aX.size())
                        ]
                        for iteration in range(k + 1)
                    })
                    self.nr_iterations = k
                print("Step Range smaller than 0.005")
                return
            #else Go on with the computation: update the new result and restart from step 1.
            del E_1
            E_1 = E_2
            del E_2
        print("Maximum number of iteration reached.")
        # Return the result
        if ('E_2' in locals()):
            self.model = E_2[0]
            if (self.model_type == 'OLS'):
                # Return the coefficients
                self.network_coef = GraphSet()
                #self.vector_coef = pd.Series(data=E_2[0].coef_.flatten(), index=self.variables_names)
                self.network_coef.add(
                    self.give_me_a_network(pd.Series(
                        data=E_2[0].intercept_.flatten(),
                        index=self.variables_names),
                                           self.aX.node_attr,
                                           self.aX.edge_attr,
                                           s='Intercept'))
                for i_th in range(E_2[0].coef_.shape[1]):
                    self.network_coef.add(
                        self.give_me_a_network(pd.Series(
                            data=E_2[0].coef_[:, i_th],
                            index=self.variables_names),
                                               self.aX.node_attr,
                                               self.aX.edge_attr,
                                               s=str('beta' + str(i_th))))
                self.regression_error = pd.DataFrame.from_dict({
                    iteration: [
                        self.regression_error[observation, iteration]
                        for observation in range(self.aX.size())
                    ]
                    for iteration in range(self.nr_iterations)
                })
                self.postalignment_error = pd.DataFrame.from_dict({
                    iteration: [
                        self.postalignment_error[observation, iteration]
                        for observation in range(self.aX.size())
                    ]
                    for iteration in range(self.nr_iterations)
                })
            else:
                # Return the prior and the posterior
                # ATTENTION: CHECK ON THE PRIOR WITH AASA
                self.y_post = E_2[1]
                self.y_post_std = E_2[2]

            del E_2, E_1

        else:
            self.model = E_1[0]
            if (self.model_type == 'OLS'):
                # Return the coefficients
                self.network_coef = GraphSet()
                #self.vector_coef = pd.Series(data=E_2[0].coef_.flatten(), index=self.variables_names)
                self.network_coef.add(
                    self.give_me_a_network(pd.Series(
                        data=E_1[0].intercept_.flatten(),
                        index=self.variables_names),
                                           self.aX.node_attr,
                                           self.aX.edge_attr,
                                           s='Intercept'))
                for i_th in range(E_1[0].coef_.shape[1]):
                    self.network_coef.add(
                        self.give_me_a_network(pd.Series(
                            data=E_1[0].coef_[:, i_th],
                            index=self.variables_names),
                                               self.aX.node_attr,
                                               self.aX.edge_attr,
                                               s=str('beta' + str(i_th))))
                self.regression_error = pd.DataFrame.from_dict({
                    iteration: [
                        self.regression_error[observation, iteration]
                        for observation in range(self.aX.size())
                    ]
                    for iteration in range(self.nr_iterations)
                })
                self.postalignment_error = pd.DataFrame.from_dict({
                    iteration: [
                        self.postalignment_error[observation, iteration]
                        for observation in range(self.aX.size())
                    ]
                    for iteration in range(self.nr_iterations)
                })
            else:
                # Return the prior and the posterior
                # ATTENTION: CHECK ON THE PRIOR WITH AASA
                self.y_post = E_1[1]
                self.y_post_std = E_1[2]
            del E_1

    # Align wrt a geodesic
    def align_pred(self, y_pred, i, k):
        #self.f.clear()
        # the alignment wrt a geodesic aiming at predicting data is an alignment wrt the prediction along
        # the regression gamma(x_i) and the data point itself y_i
        # i.e. find the optimal candidate y* in [y] st d(gamma(x)-y) is minimum
        self.aX.get_node_attr()
        self.aX.get_edge_attr()
        # Sequential Version:
        # for every graph save the new alignment
        # for i in range(self.aX.size()):
        #     # transform the estimation into a network to compute the networks distances
        #     y_pred_net= self.give_me_a_network(y_pred.iloc[i], self.aX.node_attr, self.aX.edge_attr)
        #     # Regression error:
        #     match=ID(self.distance)
        #     self.regression_error.iloc[i,k]=match.dis(self.aX.X[i],y_pred_net)
        #     # sum of squares of distances
        #     self.postalignment_error.iloc[i,k]=self.matcher.dis(self.aX.X[i],y_pred_net)
        #     self.f[i] = self.matcher.f
        #     del(y_pred_net,match)
        # Parallel Version: see the function at the end of the code
        # transform the estimation into a network to compute the networks distances
        y_pred_net = self.give_me_a_network(y_pred.iloc[i], self.aX.node_attr,
                                            self.aX.edge_attr)
        # Regression error:
        match = ID(self.distance)
        self.regression_error[i, k] = match.dis(self.aX.X[i], y_pred_net)
        self.postalignment_error[i,
                                 k] = self.matcher.dis(self.aX.X[i],
                                                       y_pred_net)
        self.f[i] = self.matcher.f
        del (y_pred_net, match)

    # Compute the generalized geodesic regression on the total space as a regression of the aligned graph set
    def est(self, k):
        # Step 1: Create the current permuted dataset
        self.f_iteration[k] = self.f
        G_per = GraphSet()
        for i in range(self.aX.size()):
            G_temp = copy.deepcopy(self.aX.X[i])
            G_temp.permute(self.f[i])
            G_temp.s = copy.deepcopy(self.aX.X[i].s)
            G_per.add(G_temp)
            del (G_temp)
        del (self.aX)
        self.aX = copy.deepcopy(G_per)

        # Step 2: Transform it into a matrix
        y = G_per.to_matrix_with_attr()
        # parameter saved:
        self.variables_names = y.columns

        # Step 3: create the x vector
        # Create the input value
        x = pd.DataFrame(columns=range(len(G_per.X[0].s)),
                         index=range(y.shape[0]))
        for i in range(y.shape[0]):
            x.iloc[i] = [float(regressor) for regressor in G_per.X[i].s]
        self.regressor = x
        # Step 4: fit the chosen regression model
        # Ordinary Least Square
        if (self.model_type == 'OLS'):
            # Create linear regression object
            model = linear_model.LinearRegression()
            model.fit(x, y)
            along_geo_pred = pd.DataFrame(model.predict(x),
                                          columns=self.variables_names)
            self.f_all[k] = self.f
            #self.regression_error.iloc[:, k] = (along_geo_pred - y).pow(2).sum(axis=1)
            return (model, along_geo_pred)

        # Gaussian Process
        elif (self.model_type == 'GPR'):

            along_geo_pred = pd.DataFrame(index=range(y.shape[0]),
                                          columns=self.variables_names)
            along_geo_pred_sd = pd.DataFrame(index=range(y.shape[0]),
                                             columns=self.variables_names)
            # list in which we save the temporary regression error
            regression_error_temp = []
            # We are fitting a different Gaussian process for every variable (i.e. for every node or edge)
            for m in range(len(self.variables_names)):
                # Inizialize the gaussian process
                model = gaussian_process.GaussianProcessRegressor(
                    kernel=self.kernel,
                    n_restarts_optimizer=self.restarts,
                    alpha=self.alpha)
                # Fitting the Gaussian Process means finding the correct hyperparameters
                model.fit(x, y.iloc[:, m])
                # Saving the model
                self.models[self.variables_names[m]] = model
                # Predict to compute the regression error (to compare with the alignment error)
                y_pred, y_std = model.predict(x, return_std=True)
                # save both the predicted y and the std, to estimate the posterior
                along_geo_pred.loc[:,
                                   self.variables_names[m]] = pd.Series(y_pred)
                along_geo_pred_sd.loc[:, self.variables_names[m]] = pd.Series(
                    y_std)
                # Compute the error
                # HERE! YOU CAN SUBSTITUTE IT WITH AN ERROR FUNCTION
                err_euclidean = (y_tr.iloc[:, 2] - y_pred).pow(2)
                err_weighted = [
                    err_euclidean[i] / y_std[i] for i in range(len(y_std))
                ]
                self.regression_error.iloc[:, k] += err_weighted
            return (model, along_geo_pred, y_std)
        else:
            raise Exception("Wrong regression model: select either OLS or GPR")

    # Given x_new is predicting the corresponding graph:
    def predict(self, x_new, std=False):
        if (not isinstance(x_new, pd.core.frame.DataFrame)):
            print(
                "The new observation should be a pandas dataframe of real values"
            )
        self.y_vec_pred = self.model.predict(X=x_new)
        self.y_net_pred = GraphSet()
        for i in range(self.y_vec_pred.shape[0]):
            self.y_net_pred.add(
                self.give_me_a_network(geo=pd.Series(
                    data=self.y_vec_pred[i], index=self.variables_names),
                                       n_a=self.aX.node_attr,
                                       e_a=self.aX.edge_attr,
                                       s=float(x_new.loc[i])))
        if (std == True and self.model_type == 'GPR'):
            self.y_vec_pred, self.y_std_pred = self.model.predict(
                X=x_new, return_std=True)
            self.y_net_pred = GraphSet()
            for i in range(self.y_vec_pred.shape[0]):
                self.y_net_pred.add(
                    self.give_me_a_network(geo=pd.Series(
                        data=self.y_vec_pred[i], index=self.variables_names),
                                           n_a=self.aX.node_attr,
                                           e_a=self.aX.edge_attr,
                                           s=float(x_new.loc[i])))

    # These functions are auxiliary function to compute the ggr

    # geo is a pd Series
    # n_a and e_a are nodes and edges attributes
    def give_me_a_network(self, geo, n_a, e_a, s=None):
        ind = [re.findall(r'-?\d+\.?\d*', k) for k in geo.axes[0]]
        x_g = {}
        for i in range(len(ind)):
            if (len(ind[i]) > 2 and int(ind[i][0]) == int(ind[i][1])
                    and not (int(ind[i][0]), int(ind[i][1])) in x_g):
                x_g[int(ind[i][0]), int(ind[i][1])] = [
                    geo.loc[geo.axes[0][i + j]] for j in range(n_a)
                ]
            elif (len(ind[i]) > 2 and int(ind[i][0]) != int(ind[i][1])
                  and not (int(ind[i][0]), int(ind[i][1])) in x_g):
                x_g[int(ind[i][0]), int(ind[i][1])] = [
                    geo.loc[geo.axes[0][i + j]] for j in range(e_a)
                ]
            elif (len(ind[i]) == 2
                  and not (int(ind[i][0]), int(ind[i][1])) in x_g):
                x_g[int(ind[i][0]), int(ind[i][1])] = [geo.loc[geo.axes[0][i]]]

        geo_net = Graph(x=x_g, adj=None, s=s)
        return geo_net

    # Conformal prediction
    def align_est_and_predRegions(
        self,
        alpha,
    ):
        # Divide training and test
        # save the training in aX
        # X.s you can find the regressors
        # self.est and self.align_pred are the two function for the estimation of the coefficients
        # you can extract the coefficient as self.network_coef  (graphset)
        # you can extract the s
        return 0

    # This function is used to parallelized the alignment procedure
    # receive two graphs, a matcher, an f where you are willing to save the permutations and gives back the
    # optimal permutation
    def two_net_match(self, X2, i, first_id):
        if (i == first_id):
            self.f[first_id] = range(self.aX.n_nodes)
        # Align X to Y
        else:
            self.matcher.the_dis(self.aX.X[i], X2)
            # Permutation of X to go closer to Y
            self.f[i] = self.matcher.f
示例#10
0
    def align_and_est(self, max_iterations=200, eps=0.001):
        # Select a Random Candidate:
        first_id = random.randint(0, self.aX.size() - 1)
        # first_id = 318
        m_1 = self.aX.X[first_id]
        self.f[first_id] = range(self.X.n_nodes)
        # k=200 maximum number of iteration
        for self.k in range(max_iterations):
            print("\n start of iteration: " + str(self.k))
            for i in range(self.X.size()):
                # print('\t already matched: ' + str(i))
                # Align X to Y
                a = self.matcher.align(self.aX.X[i], m_1)
                # Permutation of X to go closer to Y
                self.f[i] = a.f
                # self.aX.X[i]=a.alignedSource()
                # print m_1.x
                # print a.alignedSource().x

            m_2 = self.est(m_1)

            step_range = self.matcher.dis(m_1, m_2)

            if (step_range < eps):
                self.mean = m_2
                # Update aX with the final permutations:
                Aligned = GraphSet()
                # Aligned.add(self.aX.X[0])
                for i in range(self.X.size()):
                    G = self.aX.X[i]
                    G.permute(self.f[i])
                    Aligned.add(G)
                    del G
                self.aX = copy.deepcopy(Aligned)
                del Aligned
                print("Step Range smaller than 0.001")
                return
            else:
                del m_1
                m_1 = m_2
                del m_2
                # check here
                self.f.clear()
        print("Maximum number of iteration reached.")
        if ('m_2' in locals()):
            self.mean = m_2
            # Update aX with the final permutations:
            Aligned = GraphSet()
            Aligned.add(self.aX.X[0])
            for i in range(self.X.size()):
                G = self.aX.X[i]
                G.permute(self.f[i])
                Aligned.add(G)
                del G
            self.aX = copy.deepcopy(Aligned)
            del Aligned
            del m_2, m_1
        else:
            self.mean = m_1
            # Update aX with the final permutations:
            Aligned = GraphSet()
            Aligned.add(self.aX.X[0])
            for i in range(1, self.X.size()):
                G = self.aX.X[i]
                G.permute(self.f[i])
                Aligned.add(G)
                del G
            self.aX = copy.deepcopy(Aligned)
            del Aligned
            del m_1
示例#11
0
    def align_and_est(self, n_comp, scale, s):
        # If True scaling is applied to the GraphSet
        self.scale = scale
        # Range for the alignment wrt a geodesic
        self.s_min = s[0]
        self.s_max = s[1]
        # k=100 maximum number of iteration
        for k in range(100):
            # STEP 0: Align wrt an randomly selected observation, Compute the first pca
            if (k == 0):
                self.f[0] = list(range(self.aX.n_nodes))
                # PREVIOUS:
                m_1 = self.aX.X[0]
                # Align wrt one of the minimum size random element
                #size_obs = {i: len(self.aX.X[i].adj.keys()) for i in range(self.aX.size())}
                #min_size = min(size_obs.values())
                #id_min_size=[i for i, v in size_obs.items() if v == min_size]
                #m_1=self.aX.X[id_min_size[0]]
                for i in range(1, self.aX.size()):
                    # Align X to Y
                    a = self.matcher.align(self.aX.X[i], m_1)
                    # Permutation of X to go closer to Y
                    self.f[i] = a.f
                # Compute the first Principal Component in the first step
                E_1 = self.est(n_comp)
                continue
                #return E1

            # STEP 1: Align wrt the first principal component
            self.align_geo(E_1[1].loc[0, :])
            # STEP 2: Compute the principal component
            if (k > 0):
                E_2 = self.est(n_comp)
            # STEP 3: Step range is the difference between the eigenvalues
            step_range = distance = math.sqrt(
                sum([(a - b)**2 for a, b in zip(E_2[0], E_1[0])]))

            if (step_range < 0.01):
                # IF small enough, I am converging! Save and exit.
                self.e_val = E_2[0]
                self.scores = E_2[2]
                if (n_comp == 1):
                    self.e_vec = self.give_me_a_network(E_2[1].loc[0, :],
                                                        n_a=self.aX.node_attr,
                                                        e_a=self.aX.edge_attr)
                    self.barycenter_net = self.give_me_a_network(
                        self.barycenter,
                        n_a=self.aX.node_attr,
                        e_a=self.aX.edge_attr)
                else:
                    G_PCA = GraphSet()
                    for n_pca in range(n_comp):
                        G_PCA.add(
                            self.give_me_a_network(E_2[1].loc[n_pca, :],
                                                   n_a=self.aX.node_attr,
                                                   e_a=self.aX.edge_attr))
                    self.e_vec = G_PCA
                    self.barycenter_net = self.give_me_a_network(
                        self.barycenter,
                        n_a=self.aX.node_attr,
                        e_a=self.aX.edge_attr)
                print("Step Range smaller than 0.001")
                return
            else:
                # Go on with the computation: update the new result and restart from step 1.
                del E_1
                E_1 = E_2
                del E_2
        print("Maximum number of iteration reached.")
        # Return the result
        if ('E_2' in locals()):
            self.e_val = E_2[0]
            self.scores = E_2[2]
            self.barycenter_net = self.give_me_a_network(self.barycenter,
                                                         n_a=self.aX.node_attr,
                                                         e_a=self.aX.edge_attr)
            if (n_comp == 1):
                self.e_vec = self.give_me_a_network(E_2[1].loc[0, :],
                                                    n_a=self.aX.node_attr,
                                                    e_a=self.aX.edge_attr)

            else:
                G_PCA = GraphSet()
                for n_pca in range(n_comp):
                    G_PCA.add(
                        self.give_me_a_network(E_2[1].loc[n_pca, :],
                                               n_a=self.aX.node_attr,
                                               e_a=self.aX.edge_attr))
                self.e_vec = G_PCA
                del G_PCA
            del E_2
        else:
            self.e_val = E_1[0]
            self.scores = E_1[2]
            self.barycenter_net = self.give_me_a_network(self.barycenter,
                                                         n_a=self.aX.node_attr,
                                                         e_a=self.aX.edge_attr)
            if (n_comp == 1):
                self.e_vec = self.give_me_a_network(E_1[1].loc[0, :],
                                                    n_a=self.aX.node_attr,
                                                    e_a=self.aX.edge_attr)
            else:
                G_PCA = GraphSet()
                for n_pca in range(n_comp):
                    G_PCA.add(
                        self.give_me_a_network(E_1[1].loc[n_pca, :],
                                               n_a=self.aX.node_attr,
                                               e_a=self.aX.edge_attr))
                self.e_vec = G_PCA
                del G_PCA
            del E_1
示例#12
0
x1[5, 2] = [1]
x2 = {}
x2[0, 0] = [1]
x2[1, 1] = [1]
x2[2, 2] = [1]
x2[3, 3] = [1]
x2[4, 4] = [1]
x2[5, 5] = [1]
x2[0, 1] = [1]
x2[1, 0] = [1]
x2[1, 2] = [1]
x2[2, 1] = [1]
x2[3, 4] = [1]
x2[4, 3] = [1]
# Create Graph set:
G = GraphSet(graph_type='directed')
G.add(Graph(x=x1, s=[1, 2], adj=None))
G.add(Graph(x=x2, s=[2, 3], adj=None))

# Compute a distance with euclidean distance without matching the graphs
match = ID(hamming())
match.dis(G.X[0], G.X[1])

# 2) GRAPHS with Euclidean scalar and vector attributes on both nodes and edges
# Define the graphs:
x1 = {}
x1[0, 0] = [0.813, 0.630]
x1[1, 1] = [1.606, 2.488]
x1[2, 2] = [2.300, 0.710]
x1[3, 3] = [0.950, 1.616]
x1[4, 4] = [2.046, 1.560]