def zipf(self, clusters_org='source'): ''' Zipf Analysis Local/Global Preferential attachment effect analysis Parameters ---------- clusters_org: str cluster origin if from either ['source'|'model'] ''' expe = self.expe frontend = FrontendManager.load(expe) # # Get the Class/Cluster and local degree information # Reordering Adjacency Mmatrix based on Clusters/Class/Communities # clusters = None K = None if clusters_org == 'source': clusters = frontend.get_clusters() elif clusters_org == 'model': model = ModelManager.from_expe(expe, load=True) #clusters = model.get_clusters(K, skip=1) #clusters = model.get_communities(K) clusters = Louvain.get_clusters(frontend.to_directed(), resolution=10) if len(np.unique(clusters)) > 20 or False: self.log.info('Using Annealing clustering') clusters = Annealing(frontend.data, iterations=200, C_init=5, grow_rate=0).search() else: self.log.info('Using Louvain clustering') if clusters is None: lgg.error('No clusters here...passing') data_r = frontend.data else: block_hist = np.bincount(clusters) K = (block_hist != 0).sum() lgg.info('%d Clusters from `%s\':' % (K, clusters_org)) data_r = reorder_mat(frontend.data, clusters) np.fill_diagonal(data_r, 0) from pymake.util.math import dilate dlt = lambda x: dilate(x) if x.sum() / x.shape[0]**2 < 0.1 else x ### Plot Adjacency matrix fig, (ax1, ax2) = plt.subplots(1, 2) fig.tight_layout(pad=1.6) adjshow(dlt(data_r), title=self.specname(expe.corpus), ax=ax1) #plt.figtext(.15, .1, homo_text, fontsize=12) #plt.suptitle(self.specname(expe.corpus)) ### Plot Degree plot_degree_poly(data_r, ax=ax2) if expe._write: self.write_frames([fig], suffix='dd')
def pvalue(self): ''' Compute Goodness of fit statistics ''' expe = self.expe frontend = FrontendManager.load(expe) data = frontend.data d, dc = degree_hist(adj_to_degree(data), filter_zeros=True) gof = gofit(d, dc) if not hasattr(self.gramexp, 'Table'): corpuses = self.specname(self.gramexp.get_set('corpus')) Meas = [ 'pvalue', 'alpha', 'x_min', 'n_tail'] Table = np.empty((len(corpuses), len(Meas))) Table = np.column_stack((corpuses, Table)) self.gramexp.Table = Table self.gramexp.Meas = Meas else: Table = self.gramexp.Table Meas = self.gramexp.Meas for i, v in enumerate(Meas): Table[self.corpus_pos, i+1] = gof[v] if self._it == self.expe_size -1: tablefmt = 'latex' print(colored('\nPvalue Table:', 'green')) print (self.tabulate(Table, headers=Meas, tablefmt=tablefmt, floatfmt='.3f'))
def get_roc(self, _ratio=100): from sklearn.metrics import roc_curve, auc, precision_recall_curve expe = self.expe model = self.model frontend = FrontendManager.load(expe) data = frontend.data _ratio = int(_ratio) _predictall = (_ratio >= 100) or (_ratio < 0) if not hasattr(expe, 'testset_ratio'): setattr(expe, 'testset_ratio', 20) y_true, probas = model.mask_probas(data) theta, phi = model.get_params() try: fpr, tpr, thresholds = roc_curve(y_true, probas) except Exception as e: print(e) self.log.error('cant format expe : %s' % (self.output_path)) return roc_auc = auc(fpr, tpr) return roc_auc
def load_frontend(self, skip_init=False): ''' See -nld and -sld option for control over load/save status of frontend data. ''' #from pymake.frontend.manager import FrontendManager frontend = FrontendManager.load(self.expe, skip_init=skip_init) return frontend
def load_frontend(self, load=True): ''' :load: boolean. Load from **fitted** file is true else it is raw initialization. ''' from pymake.frontend.manager import FrontendManager frontend = FrontendManager.load(self.expe, load=load) return frontend
def load_frontend(self): ''' See -nld and -sld option for control over load/save status of frontend data. ''' from pymake.frontend.manager import FrontendManager frontend = FrontendManager.load(self.expe) return frontend
def _preprocess(self): expe = self.expe frontend = FrontendManager.load(expe) if frontend: self._N = frontend.getN() expe.symmetric = frontend.is_symmetric() else: self._N = expe.N expe.symmetric = True if expe._mode == 'predictive': ### Generate data from a fitted model model = ModelManager.from_expe(expe, load=True) try: # this try due to mthod modification entry in init not in picke object.. expe.hyperparams = model.get_hyper() except Exception as e: self.log.warning('loading hyperparam error: %s' % e) if model is not None: model._mean_w = 0 expe.hyperparams = 0 elif expe._mode == 'generative': ### Generate data from a un-fitted model expe.alpha = 1 expe.gmma = 1 / 2 expe.delta = [0.5, 0.5] if 'ilfm' in expe.model: keys_hyper = ('alpha', 'delta') hyper = (expe.alpha, expe.delta) else: keys_hyper = ('alpha', 'gmma', 'delta') hyper = (expe.alpha, expe.gmma, expe.delta) expe.hyperparams = dict(zip(keys_hyper, hyper)) expe.hyper = 'fix' # dummy model = ModelManager.from_expe(expe, load=False) else: raise NotImplementedError( 'What generation context ? predictive/generative..') self.log.info('=== GenNetworks === ') self.log.info('Mode: %s' % expe._mode) self.log.info('===') self.log.info('hyper: %s' % (str(expe.hyperparams))) self.frontend = frontend self.model = model if model is None: raise FileNotFoundError('No model for Expe at : %s' % self.output_path)
def _future_stats(self): ''' Show data stats ''' expe = self.expe corpuses = self.specname(self.gramexp.get_list('corpus')) if not corpuses: corpuses = [ 'manufacturing', 'fb_uc', 'blogs', 'emaileu', 'propro', 'euroroad', 'generator7', 'generator12', 'generator10', 'generator4' ] Meas = ['num_nodes', 'num_edges', 'density'] Meas += [ 'is_symmetric', 'modularity', 'clustering_coefficient', 'net_type', 'feat_len' ] Table = np.zeros((len(corpuses), len(Meas))) * np.nan Table = np.column_stack((corpuses, Table)) for _corpus_cpt, corpus_name in enumerate(corpuses): expe.update(corpus=corpus_name) # @Heeere: big problme of data management: # if data_type is set heren input_path os wrong !? # how to corretcly manage this, think about it. # pmk looks in pmk-temp here. expe.update(_data_type='networks') frontend = FrontendManager.load(expe) for i, v in enumerate(Meas): if frontend.data is None: Table[self.corpus_pos, 1:] = 'none' break Table[self.corpus_pos, i + 1] = getattr(frontend, v)() tablefmt = 'simple' # 'latex' print(colored('\nStats Table :', 'green')) print( self.tabulate(Table, headers=Meas, tablefmt=tablefmt, floatfmt='.3f'))
def stats(self): ''' Show data stats ''' expe = self.expe frontend = FrontendManager.load(expe) try: #@ugly debug Table = self.gramexp.Table Meas = self.gramexp.Meas except AttributeError: # Warning order sensitive @deprecated Table. #corpuses = self.specname(self.gramexp.get_set('corpus')) corpuses = self.specname(self.gramexp.get_list('corpus')) Meas = ['num_nodes', 'num_edges', 'density'] Meas += [ 'is_symmetric', 'modularity', 'clustering_coefficient', 'net_type', 'feat_len' ] Table = np.zeros((len(corpuses), len(Meas))) * np.nan Table = np.column_stack((corpuses, Table)) self.gramexp.Table = Table self.gramexp.Meas = Meas #print (frontend.get_data_prop()) for i, v in enumerate(Meas): if frontend.data is None: Table[self.corpus_pos, 1:] = np.nan break value = getattr(frontend, v)() value = value if value is not None else np.nan Table[self.corpus_pos, i + 1] = value if self._it == self.expe_size - 1: tablefmt = 'simple' # 'latex' print(colored('\nStats Table :', 'green')) print( self.tabulate(Table, headers=Meas, tablefmt=tablefmt, floatfmt='.3f'))
def stats(self): ''' Show data stats ''' expe = self.expe frontend = FrontendManager.load(expe) if self.is_first_expe(): # Warning order sensitive @deprecated Table. #corpuses = self.specname(self.gramexp.get_set('corpus')) corpuses = self.specname(self.gramexp.get_list('corpus')) Meas = ['num_nodes', 'num_edges', 'density', 'is_symmetric', 'modularity', 'diameter', 'clustering_coefficient', 'net_type', 'feat_len'] Meas_ = ['num_nodes', 'num_edges', 'density', 'is_symmetric', 'modularity', 'diameter', 'cluster-coef', 'net-type', 'feat-len'] Table = np.zeros((len(corpuses), len(Meas))) * np.nan Table = np.column_stack((corpuses, Table)) self.D.Table = Table self.D.Meas = Meas self.D.Meas_ = Meas_ Table = self.D.Table Meas = self.D.Meas for i, v in enumerate(Meas): if frontend.data is None: Table[self.corpus_pos, 1:] = np.nan break value = getattr(frontend, v)() value = value if value is not None else np.nan Table[self.corpus_pos, i+1] = value if hasattr(frontend, '_check'): frontend._check() if self.is_last_expe(): tablefmt = 'simple' # 'latex' print(colored('\nStats Table :', 'green')) Meas_ = self.D.Meas_ print(self.tabulate(Table, headers=Meas_, tablefmt=tablefmt, floatfmt='.3f'))
def build_net(self): frontend = FrontendManager.load(self.expe) prop = frontend.get_data_prop() msg = frontend.template(prop) print(msg)
def burstiness(self, clusters_org='source', _type='local'): '''Zipf Analisis (global burstiness) + local burstiness + feature burstiness ''' expe = self.expe frontend = FrontendManager.load(expe) data = frontend.data figs = [] # Global burstiness d, dc = degree_hist(adj_to_degree(data), filter_zeros=True) fig = plt.figure() plot_degree(data, spec=True, title=self.specname(expe.corpus)) #plot_degree_poly(data, spec=True, title=expe.corpus) gof = gofit(d, dc) if not gof: return alpha = gof['alpha'] x_min = gof['x_min'] y_max = gof['y_max'] # plot linear law from power law estimation #plt.figure() idx = d.searchsorted(x_min) i = int(idx - 0.1 * len(d)) idx = i if i >= 0 else idx x = d[idx:] ylin = np.exp(-alpha * np.log(x/float(x_min)) + np.log(y_max)) #ylin = np.exp(-alpha * np.log(x/float(x_min)) + np.log((alpha-1)/x_min)) # Hack xticks fig.canvas.draw() # ! lim = plt.gca().get_xlim() # ! locs, labels = plt.xticks() idx_xmin = locs.searchsorted(x_min) locs = np.insert(locs, idx_xmin, x_min) labels.insert(idx_xmin, plt.Text(text='x_min')) plt.xticks(locs, labels) plt.gca().set_xlim(lim) fit = np.polyfit(np.log(d), np.log(dc), deg=1) poly_fit = fit[0] *np.log(d) + fit[1] diff = np.abs(poly_fit[-1] - np.log(ylin[-1])) ylin = np.exp( np.log(ylin) + diff*0.75) #\# plt.plot(x, ylin , 'g--', label='power %.2f' % alpha) figs.append(plt.gcf()) # Local burstiness # # Get the Class/Cluster and local degree information # Reordering Adjacency Mmatrix based on Clusters/Class/Communities # clusters = None K = None if clusters_org == 'source': clusters = frontend.get_clusters() elif clusters_org == 'model': model = ModelManager.from_expe(expe, load=True) #clusters = model.get_clusters(K, skip=1) #clusters = model.get_communities(K) clusters = Louvain.get_clusters(frontend.to_directed(), resolution=10) if len(np.unique(clusters)) > 20 or True: clusters = Annealing(frontend.data, iterations=200, C_init=5, grow_rate=0).search() if clusters is None: lgg.error('No clusters here...passing') return else: block_hist = np.bincount(clusters) K = (block_hist != 0).sum() lgg.info('%d Clusters from `%s\':' % (K, clusters_org)) expe.K = K assert(not 'model' in expe) expe.model = 'no_model' #data_r, labels= reorder_mat(data, clusters, labels=True) Table,Meas = self.init_fit_tables(_type=_type) # Just inner degree f = plt.figure() ax = f.gca() #f, (ax1, ax2) = plt.subplots(1, 2, sharey=True, sharex=True) # assume symmetric it_k = 0 np.fill_diagonal(data, 0) for l in np.arange(K): for k in np.arange(K): if k != l: continue ixgrid = np.ix_(clusters == k, clusters == l) if k == l: title = 'Inner degree' y = np.zeros(data.shape) # some zeros... y[ixgrid] = data[ixgrid] #ax = ax1 else: title = 'Outer degree' y = np.zeros(data.shape) # some zeros... y[ixgrid] = data[ixgrid] #ax = ax2 # title = '' #/# d, dc = degree_hist(adj_to_degree(y)) if len(d) == 0: continue plot_degree_2((d,dc,None), logscale=True, colors=True, line=True, ax=ax, title=title) gof = gofit(d, dc) if not gof: continue for i, v in enumerate(Meas): Table[self.corpus_pos, i, it_k] = gof[v] #* y.sum() / TOT it_k += 1 plt.suptitle(self.specname(expe.corpus)) figs.append(plt.gcf()) # Features burstiness plt.figure() hist, label = sorted_perm(block_hist, reverse=True) bins = len(hist) plt.bar(range(bins), hist) plt.xticks(np.arange(bins)+0.5, label) plt.xlabel('Class labels') plt.title('Blocks Size (max assignement)') figs.append(plt.gcf()) if expe._write: self.write_frames(figs) if self._it == self.expe_size -1: for _model, table in self.gramexp.tables.items(): # Mean and standard deviation table_mean = np.char.array(np.around(table.mean(2), decimals=3)).astype("|S20") table_std = np.char.array(np.around(table.std(2), decimals=3)).astype("|S20") table = table_mean + b' $\pm$ ' + table_std # Table formatting corpuses = self.specname(self.gramexp.get_set('corpus')) table = np.column_stack((self.specname(corpuses), table)) tablefmt = 'simple' table = self.tabulate(table, headers=['__'+_model.upper()+'__']+Meas, tablefmt=tablefmt, floatfmt='.3f') print() print(table)