class AP(object): def __init__(self, damping=.5, max_iter=200, convergence_iter=15, copy=True, preference=None, affinity='euclidean', verbose=False): """ :param damping: :param max_iter: :param convergence_iter: :param copy: :param preference: :param affinity: :param verbose: """ self.model = AffinityPropagation(damping=damping, max_iter=max_iter, convergence_iter=convergence_iter, copy=copy, preference=preference, affinity=affinity, verbose=verbose) def fit(self, x, y=None): self.model.fit(X=x, y=y) def fit_predict(self, x, y=None): return self.model.fit_predict(X=x, y=y) def get_params(self, deep=True): return self.model.get_params(deep=deep) def predict(self, x): return self.model.predict(X=x) def set_params(self, **params): self.model.set_params(**params) def get_attributes(self): cluster_centers = self.model.cluster_centers_ cluster_centers_indices = self.model.cluster_centers_indices_ labels = self.model.labels_ affinity_matrix = self.model.affinity_matrix_ n_iter = self.model.n_iter_ return cluster_centers, cluster_centers_indices, labels, affinity_matrix, n_iter
print "Calculating correlation matrix between gene sets... This will be used as similarity matrix for Affinity Propagation" #matrix_corr = np.corrcoef(df_reconstituted_genesets, rowvar=0) # NUMPY approach | slighty faster than Pandas matrix_corr = df_reconstituted_genesets.corr( method='pearson', min_periods=df_reconstituted_genesets.shape[0] ) # PANDAS approach | Note that "min_periods=df_reconstituted_genesets_t.shape[0]" should not be needed, but it ensures that no "NaN" values are present to give weird results print "Dimension of correlation matrix: [{} x {}]".format( matrix_corr.shape[0], matrix_corr.shape[1]) ################## Running AP ################## #sklearn.cluster.AffinityPropagation(damping=0.5, max_iter=200, convergence_iter=15, copy=True, preference=None, affinity='euclidean', verbose=False) af_obj = AffinityPropagation( affinity='precomputed', max_iter=10000, convergence_iter=1000) # using almost only default parameters print "Affinity Propagation parameters:" for param, val in af_obj.get_params().items(): print "\t{}: {}".format(param, val) print "Perfoming Affinity Propagation.." af = af_obj.fit(matrix_corr) n_iter = af.n_iter_ print "Affinity Propagation done" print "Number of iterations used: {}".format(n_iter) ### Saving labels and centers cluster_centers_indices = af.cluster_centers_indices_ # array, shape (n_clusters, n_features) | cluster center (boolean)s ("exemplars") # cluster_centers_indices take on values in the range {0...n_samples-1} labels = af.labels_ # array, shape (n_samples,) | Get the "labels"/assignments of each data point to a cluster index # labels take on values in the range: {0...n_clusters-1} ### Display some stats n_clusters = len(cluster_centers_indices)
# http://pandas.pydata.org/pandas-docs/stable/computation.html#correlation # df.corr() # Compute pairwise correlation of columns, excluding NA/null values #matrix_corr = df_reconstituted_genesets.corr(method='pearson', min_periods=df_reconstituted_genesets.shape[0]) # --> Note that "min_periods=df_reconstituted_genesets_t.shape[0]" should not be needed, but it ensures that no "NaN" values are present to give weird results ### METHOD IN USE print "Calculating correlation matrix between gene sets... This will be used as similarity matrix for Affinity Propagation" #matrix_corr = np.corrcoef(df_reconstituted_genesets, rowvar=0) # NUMPY approach | slighty faster than Pandas matrix_corr = df_reconstituted_genesets.corr(method='pearson', min_periods=df_reconstituted_genesets.shape[0]) # PANDAS approach | Note that "min_periods=df_reconstituted_genesets_t.shape[0]" should not be needed, but it ensures that no "NaN" values are present to give weird results print "Dimension of correlation matrix: [{} x {}]".format(matrix_corr.shape[0], matrix_corr.shape[1]) ################## Running AP ################## #sklearn.cluster.AffinityPropagation(damping=0.5, max_iter=200, convergence_iter=15, copy=True, preference=None, affinity='euclidean', verbose=False) af_obj = AffinityPropagation(affinity = 'precomputed', max_iter=10000, convergence_iter=1000) # using almost only default parameters print "Affinity Propagation parameters:" for param, val in af_obj.get_params().items(): print "\t{}: {}".format(param, val) print "Perfoming Affinity Propagation.." af = af_obj.fit(matrix_corr) n_iter = af.n_iter_ print "Affinity Propagation done" print "Number of iterations used: {}".format(n_iter) ### Saving labels and centers cluster_centers_indices = af.cluster_centers_indices_ # array, shape (n_clusters, n_features) | cluster center (boolean)s ("exemplars") # cluster_centers_indices take on values in the range {0...n_samples-1} labels = af.labels_ # array, shape (n_samples,) | Get the "labels"/assignments of each data point to a cluster index # labels take on values in the range: {0...n_clusters-1}
class AP(object): def __init__(self, damping=.5, max_iter=200, convergence_iter=15, copy=True, preference=None, affinity='euclidean', verbose=False, random_state='warn'): """ Parameters ---------- damping : TYPE, optional 阻尼系数 0.5~1 之间 DESCRIPTION. The default is .5. max_iter : TYPE, optional 最大迭代次数 DESCRIPTION. The default is 200. convergence_iter : TYPE, optional 停止收敛的估计簇数没有变化的迭代数 DESCRIPTION. The default is 15. copy : TYPE, optional 复制输入数据 True DESCRIPTION. The default is True. preference : TYPE, optional DESCRIPTION. The default is None. affinity : TYPE, optional {"euclidean","precomputed"} 欧氏距离 与与计算 DESCRIPTION. The default is 'euclidean'. verbose : TYPE, optional DESCRIPTION. The default is False. random_state : TYPE, optional DESCRIPTION. The default is 'warn'. Returns ------- None. """ self.ap_cluster = AffinityPropagation( damping=damping, max_iter=max_iter, convergence_iter=convergence_iter, copy=copy, preference=preference, affinity=affinity, verbose=verbose, random_state=random_state) def fit(self, x, y=None): self.ap_cluster.fit(X=x, y=y) def fit_predict(self, x, y=None): return self.ap_cluster.fit_predict(X=x, y=y) def get_params(self, deep=True): return self.ap_cluster.get_params(deep=deep) def set_params(self, params): self.ap_cluster.set_params(**params) def predict(self, x): return self.ap_cluster.predict(X=x) def get_cluster_centers_indices(self): return self.ap_cluster.cluster_centers_indices_ def get_cluster_centers(self): return self.ap_cluster.cluster_centers_ def get_labels(self): return self.ap_cluster.labels_ def get_affinity_matrix(self): return self.ap_cluster.affinity_matrix_ def get_n_iter(self): return self.ap_cluster.n_iter_
x = m83_data['x'][final_data] y = m83_data['y'][final_data] id_ = m83_data['id_'][final_data] X = np.vstack([colour1, colour2, colour3, colour4, colour5, colour6]).T #, colour4, colour5, colour6, colour7]).T similarity = pairwise_distances(X) ############################################################################## # Compute Affinity Propagation pref = -len(X)*0.1 damp = 0.95 af = AffinityPropagation(preference=pref, damping=damp).fit(similarity) cluster_centers_indices = af.cluster_centers_indices_ labels = af.labels_ print cluster_centers_indices print af.get_params() n_clusters_ = len(cluster_centers_indices) print('Estimated number of clusters: %d' % n_clusters_) print "objects: {}".format(len(colour1)) print("Silhouette Coefficient: %0.3f" % metrics.silhouette_score(X, labels)) ############################################################################## # Plot result import matplotlib.pyplot as plt from itertools import cycle #plt.close('all') fig = plt.figure() ax = fig.add_subplot(111)
[row['Plataforma'], row['Genero']], clf), axis=1) teste = teste.drop_duplicates() pd.DataFrame(data=teste, columns=cols).to_csv( r'dados/teste_final.csv', sep=',', index=False) timeEnd = time.strftime("%H:%M:%S") # Validando modelo pureza = getPureza(teste) entropia = getEntropia(teste) # Calculando resultados colunas = ['Versão', 'Pureza', 'Entropia', 'Começo', 'Término', 'Qtde Clusters', 'Parâmetros do cluster'] qtdeCluster = teste['Agrupamento'].nunique() result = pd.DataFrame(data=[], columns=colunas) result = result.append(pd.DataFrame( data=[[p, pureza, entropia, timeStart, timeEnd, qtdeCluster, str(clf.get_params())]], columns=colunas)) # avaliacao = pd.DataFrame(data=[], columns={'Escolhas de jogos', 'Recomendações', 'Curtida'}) print("Término do processamento: ", time.strftime("%H:%M:%S")) # Rotas # Info da api @app.route('/api/probe', methods=['GET']) def probe(): return result.to_json(orient='records'), 200 # Busca de jogos @app.route('/search/<name>', methods=['GET']) def search(name): lista = [] for i, row in teste[teste['Nome'].str.contains(name, case=False)].iterrows():
x = m83_data['x'][final_data] y = m83_data['y'][final_data] id_ = m83_data['id_'][final_data] X = np.vstack([colour1, colour2, colour3, colour4, colour5, colour6]).T #, colour4, colour5, colour6, colour7]).T similarity = pairwise_distances(X) ############################################################################## # Compute Affinity Propagation pref = -len(X) * 0.1 damp = 0.95 af = AffinityPropagation(preference=pref, damping=damp).fit(similarity) cluster_centers_indices = af.cluster_centers_indices_ labels = af.labels_ print cluster_centers_indices print af.get_params() n_clusters_ = len(cluster_centers_indices) print('Estimated number of clusters: %d' % n_clusters_) print "objects: {}".format(len(colour1)) print("Silhouette Coefficient: %0.3f" % metrics.silhouette_score(X, labels)) ############################################################################## # Plot result import matplotlib.pyplot as plt from itertools import cycle #plt.close('all') fig = plt.figure() ax = fig.add_subplot(111)