Пример #1
0
    def zipf(self, clusters_org='source'):
        ''' Zipf Analysis
            Local/Global Preferential attachment effect analysis

            Parameters
            ----------
            clusters_org: str
                cluster origin if from either ['source'|'model']
        '''
        expe = self.expe
        frontend = FrontendManager.load(expe)

        #
        # Get the Class/Cluster and local degree information
        # Reordering Adjacency Mmatrix based on Clusters/Class/Communities
        #
        clusters = None
        K = None
        if clusters_org == 'source':
            clusters = frontend.get_clusters()
        elif clusters_org == 'model':
            model = ModelManager.from_expe(expe, load=True)
            #clusters = model.get_clusters(K, skip=1)
            #clusters = model.get_communities(K)
            clusters = Louvain.get_clusters(frontend.to_directed(),
                                            resolution=10)
            if len(np.unique(clusters)) > 20 or False:
                self.log.info('Using Annealing clustering')
                clusters = Annealing(frontend.data,
                                     iterations=200,
                                     C_init=5,
                                     grow_rate=0).search()
            else:
                self.log.info('Using Louvain clustering')

        if clusters is None:
            lgg.error('No clusters here...passing')
            data_r = frontend.data
        else:
            block_hist = np.bincount(clusters)
            K = (block_hist != 0).sum()
            lgg.info('%d Clusters from `%s\':' % (K, clusters_org))
            data_r = reorder_mat(frontend.data, clusters)

        np.fill_diagonal(data_r, 0)

        from pymake.util.math import dilate
        dlt = lambda x: dilate(x) if x.sum() / x.shape[0]**2 < 0.1 else x
        ### Plot Adjacency matrix
        fig, (ax1, ax2) = plt.subplots(1, 2)
        fig.tight_layout(pad=1.6)
        adjshow(dlt(data_r), title=self.specname(expe.corpus), ax=ax1)
        #plt.figtext(.15, .1, homo_text, fontsize=12)
        #plt.suptitle(self.specname(expe.corpus))

        ### Plot Degree
        plot_degree_poly(data_r, ax=ax2)

        if expe._write:
            self.write_frames([fig], suffix='dd')
Пример #2
0
    def pvalue(self):
        ''' Compute Goodness of fit statistics '''
        expe = self.expe
        frontend = FrontendManager.load(expe)
        data = frontend.data

        d, dc = degree_hist(adj_to_degree(data), filter_zeros=True)
        gof = gofit(d, dc)

        if not hasattr(self.gramexp, 'Table'):
            corpuses = self.specname(self.gramexp.get_set('corpus'))
            Meas = [ 'pvalue', 'alpha', 'x_min', 'n_tail']
            Table = np.empty((len(corpuses), len(Meas)))
            Table = np.column_stack((corpuses, Table))
            self.gramexp.Table = Table
            self.gramexp.Meas = Meas
        else:
            Table = self.gramexp.Table
            Meas = self.gramexp.Meas

        for i, v in enumerate(Meas):
            Table[self.corpus_pos, i+1] = gof[v]

        if self._it == self.expe_size -1:
            tablefmt = 'latex'
            print(colored('\nPvalue Table:', 'green'))
            print (self.tabulate(Table, headers=Meas, tablefmt=tablefmt, floatfmt='.3f'))
Пример #3
0
    def get_roc(self, _ratio=100):
        from sklearn.metrics import roc_curve, auc, precision_recall_curve
        expe = self.expe
        model = self.model

        frontend = FrontendManager.load(expe)
        data = frontend.data

        _ratio = int(_ratio)
        _predictall = (_ratio >= 100) or (_ratio < 0)
        if not hasattr(expe, 'testset_ratio'):
            setattr(expe, 'testset_ratio', 20)

        y_true, probas = model.mask_probas(data)
        theta, phi = model.get_params()

        try:
            fpr, tpr, thresholds = roc_curve(y_true, probas)
        except Exception as e:
            print(e)
            self.log.error('cant format expe : %s' % (self.output_path))
            return

        roc_auc = auc(fpr, tpr)
        return roc_auc
Пример #4
0
    def load_frontend(self, skip_init=False):
        ''' See -nld and -sld option for control over load/save status
            of frontend data.
        '''
        #from pymake.frontend.manager import FrontendManager

        frontend = FrontendManager.load(self.expe, skip_init=skip_init)
        return frontend
Пример #5
0
    def load_frontend(self, load=True):
        ''' :load: boolean. Load from **fitted** file is true else
                            it is raw initialization.
        '''
        from pymake.frontend.manager import FrontendManager

        frontend = FrontendManager.load(self.expe, load=load)
        return frontend
Пример #6
0
    def load_frontend(self):
        ''' See -nld and -sld option for control over load/save status
            of frontend data.
        '''
        from pymake.frontend.manager import FrontendManager

        frontend = FrontendManager.load(self.expe)
        return frontend
Пример #7
0
    def _preprocess(self):
        expe = self.expe

        frontend = FrontendManager.load(expe)
        if frontend:
            self._N = frontend.getN()
            expe.symmetric = frontend.is_symmetric()
        else:
            self._N = expe.N
            expe.symmetric = True

        if expe._mode == 'predictive':
            ### Generate data from a fitted model
            model = ModelManager.from_expe(expe, load=True)

            try:
                # this try due to mthod modification entry in init not in picke object..
                expe.hyperparams = model.get_hyper()
            except Exception as e:
                self.log.warning('loading hyperparam error: %s' % e)
                if model is not None:
                    model._mean_w = 0
                    expe.hyperparams = 0

        elif expe._mode == 'generative':
            ### Generate data from a un-fitted model

            expe.alpha = 1
            expe.gmma = 1 / 2
            expe.delta = [0.5, 0.5]

            if 'ilfm' in expe.model:
                keys_hyper = ('alpha', 'delta')
                hyper = (expe.alpha, expe.delta)
            else:
                keys_hyper = ('alpha', 'gmma', 'delta')
                hyper = (expe.alpha, expe.gmma, expe.delta)
            expe.hyperparams = dict(zip(keys_hyper, hyper))
            expe.hyper = 'fix'  # dummy
            model = ModelManager.from_expe(expe, load=False)

        else:
            raise NotImplementedError(
                'What generation context ? predictive/generative..')

        self.log.info('=== GenNetworks === ')
        self.log.info('Mode: %s' % expe._mode)
        self.log.info('===')
        self.log.info('hyper: %s' % (str(expe.hyperparams)))

        self.frontend = frontend
        self.model = model

        if model is None:
            raise FileNotFoundError('No model for Expe at :  %s' %
                                    self.output_path)
Пример #8
0
    def _future_stats(self):
        ''' Show data stats '''
        expe = self.expe

        corpuses = self.specname(self.gramexp.get_list('corpus'))
        if not corpuses:
            corpuses = [
                'manufacturing', 'fb_uc', 'blogs', 'emaileu', 'propro',
                'euroroad', 'generator7', 'generator12', 'generator10',
                'generator4'
            ]

        Meas = ['num_nodes', 'num_edges', 'density']
        Meas += [
            'is_symmetric', 'modularity', 'clustering_coefficient', 'net_type',
            'feat_len'
        ]
        Table = np.zeros((len(corpuses), len(Meas))) * np.nan
        Table = np.column_stack((corpuses, Table))

        for _corpus_cpt, corpus_name in enumerate(corpuses):

            expe.update(corpus=corpus_name)
            # @Heeere: big problme of data management:
            #         if data_type is set heren input_path os wrong !?
            #         how to corretcly manage this, think about it.
            #         pmk looks in pmk-temp here.
            expe.update(_data_type='networks')
            frontend = FrontendManager.load(expe)

            for i, v in enumerate(Meas):
                if frontend.data is None:
                    Table[self.corpus_pos, 1:] = 'none'
                    break
                Table[self.corpus_pos, i + 1] = getattr(frontend, v)()

        tablefmt = 'simple'  # 'latex'
        print(colored('\nStats Table :', 'green'))
        print(
            self.tabulate(Table,
                          headers=Meas,
                          tablefmt=tablefmt,
                          floatfmt='.3f'))
Пример #9
0
    def stats(self):
        ''' Show data stats '''
        expe = self.expe
        frontend = FrontendManager.load(expe)

        try:
            #@ugly debug
            Table = self.gramexp.Table
            Meas = self.gramexp.Meas
        except AttributeError:
            # Warning order sensitive @deprecated Table.
            #corpuses = self.specname(self.gramexp.get_set('corpus'))
            corpuses = self.specname(self.gramexp.get_list('corpus'))
            Meas = ['num_nodes', 'num_edges', 'density']
            Meas += [
                'is_symmetric', 'modularity', 'clustering_coefficient',
                'net_type', 'feat_len'
            ]
            Table = np.zeros((len(corpuses), len(Meas))) * np.nan
            Table = np.column_stack((corpuses, Table))
            self.gramexp.Table = Table
            self.gramexp.Meas = Meas

        #print (frontend.get_data_prop())
        for i, v in enumerate(Meas):
            if frontend.data is None:
                Table[self.corpus_pos, 1:] = np.nan
                break
            value = getattr(frontend, v)()
            value = value if value is not None else np.nan
            Table[self.corpus_pos, i + 1] = value

        if self._it == self.expe_size - 1:
            tablefmt = 'simple'  # 'latex'
            print(colored('\nStats Table :', 'green'))
            print(
                self.tabulate(Table,
                              headers=Meas,
                              tablefmt=tablefmt,
                              floatfmt='.3f'))
Пример #10
0
    def stats(self):
        ''' Show data stats '''
        expe = self.expe
        frontend = FrontendManager.load(expe)

        if self.is_first_expe():
            # Warning order sensitive @deprecated Table.
            #corpuses = self.specname(self.gramexp.get_set('corpus'))
            corpuses = self.specname(self.gramexp.get_list('corpus'))
            Meas = ['num_nodes', 'num_edges', 'density',
                    'is_symmetric', 'modularity', 'diameter', 'clustering_coefficient', 'net_type', 'feat_len']
            Meas_ = ['num_nodes', 'num_edges', 'density',
                     'is_symmetric', 'modularity', 'diameter', 'cluster-coef', 'net-type', 'feat-len']
            Table = np.zeros((len(corpuses), len(Meas))) * np.nan
            Table = np.column_stack((corpuses, Table))
            self.D.Table = Table
            self.D.Meas = Meas
            self.D.Meas_ = Meas_

        Table = self.D.Table
        Meas = self.D.Meas

        for i, v in enumerate(Meas):
            if frontend.data is None:
                Table[self.corpus_pos, 1:] = np.nan
                break
            value = getattr(frontend, v)()
            value = value if value is not None else np.nan
            Table[self.corpus_pos, i+1] = value

        if hasattr(frontend, '_check'):
            frontend._check()

        if self.is_last_expe():
            tablefmt = 'simple' # 'latex'
            print(colored('\nStats Table :', 'green'))
            Meas_ = self.D.Meas_
            print(self.tabulate(Table, headers=Meas_, tablefmt=tablefmt, floatfmt='.3f'))
Пример #11
0
    def build_net(self):

        frontend = FrontendManager.load(self.expe)
        prop = frontend.get_data_prop()
        msg = frontend.template(prop)
        print(msg)
Пример #12
0
    def burstiness(self, clusters_org='source', _type='local'):
        '''Zipf Analisis
           (global burstiness) + local burstiness + feature burstiness
        '''
        expe = self.expe
        frontend = FrontendManager.load(expe)
        data = frontend.data
        figs = []

        # Global burstiness
        d, dc = degree_hist(adj_to_degree(data), filter_zeros=True)
        fig = plt.figure()
        plot_degree(data, spec=True, title=self.specname(expe.corpus))
        #plot_degree_poly(data, spec=True, title=expe.corpus)

        gof = gofit(d, dc)
        if not gof:
            return

        alpha = gof['alpha']
        x_min = gof['x_min']
        y_max = gof['y_max']
        # plot linear law from power law estimation
        #plt.figure()
        idx = d.searchsorted(x_min)
        i = int(idx  - 0.1 * len(d))
        idx = i if i  >= 0 else idx
        x = d[idx:]
        ylin = np.exp(-alpha * np.log(x/float(x_min)) + np.log(y_max))
        #ylin = np.exp(-alpha * np.log(x/float(x_min)) + np.log((alpha-1)/x_min))

        # Hack xticks
        fig.canvas.draw() # !
        lim = plt.gca().get_xlim() # !
        locs, labels = plt.xticks()

        idx_xmin = locs.searchsorted(x_min)
        locs = np.insert(locs, idx_xmin, x_min)
        labels.insert(idx_xmin, plt.Text(text='x_min'))
        plt.xticks(locs, labels)
        plt.gca().set_xlim(lim)

        fit = np.polyfit(np.log(d), np.log(dc), deg=1)
        poly_fit = fit[0] *np.log(d) + fit[1]
        diff = np.abs(poly_fit[-1] - np.log(ylin[-1]))
        ylin = np.exp( np.log(ylin) + diff*0.75)
        #\#

        plt.plot(x, ylin , 'g--', label='power %.2f' % alpha)
        figs.append(plt.gcf())

        # Local burstiness

        #
        # Get the Class/Cluster and local degree information
        # Reordering Adjacency Mmatrix based on Clusters/Class/Communities
        #
        clusters = None
        K = None
        if clusters_org == 'source':
            clusters = frontend.get_clusters()
        elif clusters_org == 'model':
            model = ModelManager.from_expe(expe, load=True)
            #clusters = model.get_clusters(K, skip=1)
            #clusters = model.get_communities(K)
            clusters = Louvain.get_clusters(frontend.to_directed(), resolution=10)
            if len(np.unique(clusters)) > 20 or True:
                clusters = Annealing(frontend.data, iterations=200, C_init=5, grow_rate=0).search()

        if clusters is None:
            lgg.error('No clusters here...passing')
            return
        else:
            block_hist = np.bincount(clusters)
            K = (block_hist != 0).sum()
            lgg.info('%d Clusters from `%s\':' % (K, clusters_org))

        expe.K = K
        assert(not 'model' in expe)
        expe.model = 'no_model'
        #data_r, labels= reorder_mat(data, clusters, labels=True)
        Table,Meas = self.init_fit_tables(_type=_type)

        # Just inner degree
        f = plt.figure()
        ax = f.gca()
        #f, (ax1, ax2) = plt.subplots(1, 2, sharey=True, sharex=True)

        # assume symmetric
        it_k = 0
        np.fill_diagonal(data, 0)
        for l in np.arange(K):
            for k in np.arange(K):
                if k != l:
                    continue

                ixgrid = np.ix_(clusters == k, clusters == l)

                if k == l:
                    title = 'Inner degree'
                    y = np.zeros(data.shape) # some zeros...
                    y[ixgrid] = data[ixgrid]
                    #ax = ax1
                else:
                    title = 'Outer degree'
                    y = np.zeros(data.shape) # some zeros...
                    y[ixgrid] = data[ixgrid]
                    #ax = ax2

                #
                title = ''
                #/#

                d, dc = degree_hist(adj_to_degree(y))
                if len(d) == 0: continue
                plot_degree_2((d,dc,None), logscale=True, colors=True, line=True, ax=ax, title=title)

                gof =  gofit(d, dc)
                if not gof:
                    continue

                for i, v in enumerate(Meas):
                    Table[self.corpus_pos, i, it_k] = gof[v] #* y.sum() / TOT
                it_k += 1

        plt.suptitle(self.specname(expe.corpus))
        figs.append(plt.gcf())

        # Features burstiness
        plt.figure()
        hist, label = sorted_perm(block_hist, reverse=True)
        bins = len(hist)
        plt.bar(range(bins), hist)
        plt.xticks(np.arange(bins)+0.5, label)
        plt.xlabel('Class labels')
        plt.title('Blocks Size (max assignement)')
        figs.append(plt.gcf())

        if expe._write:
            self.write_frames(figs)

        if self._it == self.expe_size -1:
            for _model, table in self.gramexp.tables.items():

                # Mean and standard deviation
                table_mean = np.char.array(np.around(table.mean(2), decimals=3)).astype("|S20")
                table_std = np.char.array(np.around(table.std(2), decimals=3)).astype("|S20")
                table = table_mean + b' $\pm$ ' + table_std

                # Table formatting
                corpuses = self.specname(self.gramexp.get_set('corpus'))
                table = np.column_stack((self.specname(corpuses), table))
                tablefmt = 'simple'
                table = self.tabulate(table, headers=['__'+_model.upper()+'__']+Meas, tablefmt=tablefmt, floatfmt='.3f')
                print()
                print(table)