def plot_confusion_matrix(y_true, y_pred, classes, normalize=False, title=None, cmap=plt.cm.Blues): """ This function prints and plots the confusion matrix. Normalization can be applied by setting `normalize=True`. """ if not title: if normalize: title = 'Normalized confusion matrix' else: title = 'Confusion matrix, without normalization' # Compute confusion matrix cm = confusion_matrix(y_true, y_pred) # Only use the labels that appear in the data classes = unique_labels(Y) if normalize: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] print("Normalized confusion matrix") else: print('Confusion matrix, without normalization') print(cm) fig, ax = plt.subplots() im = ax.imshow(cm, interpolation='nearest', cmap=cmap) ax.figure.colorbar(im, ax=ax) # We want to show all ticks... ax.set(xticks=np.arange(cm.shape[1]), yticks=np.arange(cm.shape[0]), # ... and label them with the respective list entries xticklabels=classes, yticklabels=classes, title=title, ylabel='True label', xlabel='Predicted label') # Rotate the tick labels and set their alignment. plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor") # Loop over data dimensions and create text annotations. fmt = '.2f' if normalize else 'd' thresh = cm.max() / 2. s = [['TN', 'FP'], ['FN', 'TP']] for i in range(cm.shape[0]): for j in range(cm.shape[1]): ax.text(j, i, str(s[i][j]) + " = " + format(cm[i, j], fmt), ha="center", va="center", color="white" if cm[i, j] > thresh else "black") fig.tight_layout() return ax
def plotConfusionMatrix(y_true,y_pred,classes,cmap=plt.cm.Blues): title = "Normalized confusion matrix" cm = confusion_matrix(y_true,y_pred) classes = classes[unique_labels(y_true,y_pred)] cm = cm.astype('float') / cm.sum(axis=1)[:,np.newaxis] fig, ax = plt.subplots() im = ax.imshow(cm, interpolation='nearest', cmap=cmap) ax.figure.colorbar(im,ax=ax) ax.set(xticks=np.arange(cm.shape[1]),yticks=np.arange(cm.shape[0]),xtickslabels=classes,ytickslabels=classes,title=title,ylabel='True Label',xlabel='Predicted Label') plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor") fmt = '.2f' thresh = cm.max() / 2. for i in range(cm.shape[0]): for j in range(cm.shape[1]): ax.text(j, i, format(cm[i,j],fmt),ha="center",va="center",color="white" if cm[i, j] > thresh else "black") fig.tight_layout() return ax
from sklearn.model_selection import train_test_split from sklearn.metrics import r2_score, mean_squared_error, mean_squared_log_error from sklearn.neural_network import MLPRegressor from math import sqrt def rmse(a, b): return sqrt(mean_squared_error(a, b)) ############################################################################################################################# #Confirmed Module sns.relplot(x='Days', y='Confirmed', kind='line', data=global_data) plt.title("Confirmed Around The World") plt.setp(plt.xticks()[1], rotation=30, ha='right') # ha is the same as horizontalalignment plt.show() #Global Confirmed Model #Perceptron Model #model input parameters X = global_data.iloc[:, -1].values Y = global_data.iloc[:, -4].values #print("Before Reshape x \n" , x ) #print("Before Reshape y \n" , y ) X = X.reshape(-1, 1) Y = Y.reshape(-1, 1) #fitting the logistic growth curve for confirmed cases
def compute(inp_dataset, input_path, output_path, de_analysis, n_pass): print("Current pass ", n_pass) import json import matplotlib as plt import csv from sklearn.manifold import TSNE import matplotlib.pyplot as plt from sklearn.decomposition import PCA from decimal import Decimal import seaborn as sns import pandas as pd import networkx as nx from sklearn.cluster import DBSCAN from sklearn.cluster import KMeans import operator import numpy as np import random import sys #csvData=[['data','x','y','type']] print("Processing the input data into datafames....") csvData = [] count = 0 #filename = "G:/Thesis/Dropclust/plots/output_normalized_own_cc.csv" filename = "G:/Thesis/Dropclust/plots/PCA_GENES/output_normalized_own_cc.csv" filename = #"G:/Thesis/Dropclust/output_normalized_zscore_cc1.csv" filename = "C:/Users/Swagatam/IdeaProjects/openOrd/output_normalized_own_cc.csv" filename = input_path + "/output_normalized_own_cc.csv" coord_data = pd.read_csv(filename, names=['data', 'x', 'y']) coord_data.set_index('data', inplace=True) data = [] data_outlier = [] with open(filename, 'r') as csvfile: csvreader = csv.reader(csvfile) for row in csvreader: #f=0 #row=[float(i) for i in row] data.append(row) temp_outlier = [] temp_outlier.append(row[1]) temp_outlier.append(row[2]) data_outlier.append(temp_outlier) temp = row #if row[0].isnumeric(): # temp.append('cell') if len(row[0]) >= 16: temp.append('cell') else: temp.append('gene') count = count + 1 csvData.append(temp) # # DB SCAN # In[20]: if n_pass != 4: noise = [] print("Performing clustering....") db = DBSCAN(eps=180, min_samples=55).fit_predict(data_outlier) final_data = [] csvData = [['data', 'x', 'y', 'type']] for i in range(0, len(list(db))): if db[i] != -1: final_data.append(data[i]) csvData.append(data[i]) if db[i] == -1: noise.append(data[i][0]) data = final_data n_clusters = len(set(db)) - (1 if -1 in list(db) else 0) print("Clustering done. the number of obtained clusters: ", n_clusters) else: remove_data = [] prev_df = pd.read_csv( "Stardust_results/visualization_output/3_pass/data.csv", delimiter=",", index_col=False) prev_df.set_index('data', inplace=True) clusters_info = [] for i in range(0, len(csvData)): if csvData[i][3] == 'cell': if csvData[i][0] in (prev_df.index): clusters_info.append(prev_df.loc[csvData[i][0]]['cluster']) else: remove_data.append(csvData[i]) else: f = 0 import pickle with open( 'Stardust_results/visualization_output/3_pass/de_genes_cluster.txt', 'rb') as fp: de_gene_cluster = pickle.load(fp) for rank in range(0, len(de_gene_cluster)): if csvData[i][0] in de_gene_cluster[rank]: f = 1 clusters_info.append(de_gene_cluster[rank].index( csvData[i][0])) break if f == 0: remove_data.append(csvData[i]) for r in remove_data: csvData.remove(r) temp = [['data', 'x', 'y', 'type']] temp.extend(csvData) csvData = temp # In[13]: # # OUTLIER VISUALIZATION # In[21]: if n_pass != 4: print("Starting outlier detection....") data_type = [] c = 0 g = 0 for i in range(0, len(coord_data)): if db[i] != -1: data_type.append("data") else: if len(coord_data.index[i]) >= 16: data_type.append("cell_outliers") else: g = g + 1 data_type.append("gene_outliers") coord_data["data_type"] = data_type data_colors = ["lightblue"] if g > 0: noise_colors = ['blue', 'red'] else: noise_colors = ['blue'] coord_data["alpha"] = np.where(coord_data['data_type'] == 'data', 0.5, 1.0) plt.figure(figsize=(6, 4.5)) #ax = sns.scatterplot(x="x", y="y", data=coord_data[coord_data['alpha']==0.5],hue="data_type",palette=sns.xkcd_palette(data_colors),sizes=(50,100),size="data_type",alpha=0.3) #sns.scatterplot(x="x", y="y", data=coord_data[coord_data['alpha']==1.0],hue="data_type",palette=sns.xkcd_palette(noise_colors),sizes=(50,100),size="data_type",marker="^",alpha=1.0,ax=ax) marker = {"gene_outliers": "^", "cell_outliers": "^"} ax = sns.scatterplot(x="x", y="y", data=coord_data[coord_data['alpha'] == 0.5], hue="data_type", palette=sns.xkcd_palette(data_colors), sizes=(50, 100), size="data_type", linewidth=0.0, s=10, alpha=0.3) sns.scatterplot(x="x", y="y", data=coord_data[coord_data['alpha'] == 1.0], hue="data_type", palette=sns.xkcd_palette(noise_colors), sizes=(100, 50), size="data_type", style="data_type", markers=marker, alpha=1.0, linewidth=0.0, s=10, legend='brief', ax=ax) #plt.legend(title=='') ax.legend(bbox_to_anchor=(1.1, 1.05), frameon=False) sns.despine(bottom=False, left=False) plt.xlabel("dim1") plt.ylabel("dim2") plt.savefig(output_path + 'outliers_visualization.png', bbox_inches='tight') print("Outliers removed from the dataset....") # # POST-HOC CLUSTER ASSIGNMENT # In[23]: print("Starting post hoc clustering....") neighbor_df = pd.read_hdf( 'Stardust_results/build_output/1_pass/neighbor.h5', 'df') if 'Unnamed: 0' in list(neighbor_df.columns): neighbor_df.set_index('Unnamed: 0', inplace=True) p = 0 col = list(neighbor_df.columns) index = list(neighbor_df.index) cell_dict = dict() column_dict = dict() for i in range(len(col)): column_dict[i] = col[i] for i in range(len(list(neighbor_df.index))): row = neighbor_df.iloc[i] col_ind = list(row.to_numpy().nonzero())[0] for ind in col_ind: if index[i] in cell_dict.keys(): cell_dict[index[i]].append(column_dict[ind]) else: temp = [] temp.append(column_dict[ind]) cell_dict[index[i]] = temp cluster_assign = [] for key_cell in cell_dict.keys(): clust = dict() cells = cell_dict[key_cell] for cell in cells: if n_pass == 4: if cell in list(prev_df.index): cluster = prev_df.loc[cell]['cluster'] else: cluster = -1 else: cluster = db[list(coord_data.index).index(cell)] if cluster not in clust.keys(): clust[cluster] = 1 else: clust[cluster] = clust[cluster] + 1 max_cluster = max(clust.items(), key=operator.itemgetter(1))[0] if max_cluster == -1: continue cluster_assign.append(max_cluster) x_total = 0 y_total = 0 count = 0 for cell in cells: if (n_pass != 4 and db[list(coord_data.index).index(cell)] == max_cluster ) or (n_pass == 4 and cell in list(prev_df.index) and prev_df.loc[cell]['cluster'] == max_cluster): count = count + 1 x_total = x_total + coord_data.loc[cell]['x'] y_total = y_total + coord_data.loc[cell]['y'] temp = [] temp.append(key_cell) temp.append(x_total / count) temp.append(y_total / count) temp.append('cell') p = p + 1 csvData.append(temp) print("Post hoc clustering done....") # In[24]: with open(output_path + 'data.csv', 'w') as csvFile: writer = csv.writer(csvFile) writer.writerows(csvData) csvFile.close() data_df = pd.read_csv(output_path + "data.csv", delimiter=",", index_col=False) if n_pass != 4: clusters_info = [x for x in db if x != -1] clusters_info = clusters_info + cluster_assign else: clusters_info = clusters_info + cluster_assign data_df['cluster'] = clusters_info data_df.to_csv(output_path + 'data.csv') n_clusters = len(list(set(clusters_info))) print("cluster saved ....") n_clusters = len(data_df['cluster'].unique()) colors = random.sample(seaborn_colors, n_clusters) colors = random.sample(seaborn_colors, n_clusters) plt.figure(figsize=(5, 5)) #cmap = sns.cubehelix_palette(dark=.3, light=.8, as_cmap=True) ax = sns.scatterplot(x="x", y="y", data=data_df, hue="cluster", palette=sns.xkcd_palette(colors), linewidth=0.0, s=2) ax.legend(bbox_to_anchor=(1.0, 1.00), frameon=False) for cl in range(n_clusters): plt.annotate(cl, data_df.loc[data_df['cluster'] == cl, ['x', 'y']].mean(), horizontalalignment='center', verticalalignment='center', size=10, weight='bold', color="black") sns.despine(bottom=False, left=False) plt.xlabel("sd1", fontsize=20) plt.ylabel("sd2", fontsize=20) plt.setp(ax.spines.values(), linewidth=2) plt.yticks([], linewidth=20) plt.xticks([]) plt.savefig(output_path + "cluster_visualization.png", bbox_inches='tight', dpi=600) plt.savefig(output_path + "cluster_visualization.pdf", bbox_inches='tight', dpi=600) if n_pass == 3: from sklearn.datasets import make_blobs from sklearn.metrics import silhouette_samples, silhouette_score silhouette_avg = silhouette_score(data_df[['x', 'y']], data_df['cluster']) sample_silhouette_values = silhouette_samples(data_df[['x', 'y']], data_df['cluster']) print(silhouette_avg) y_lower = 10 import matplotlib.cm as cm #fig, (ax1, ax2) = plt.subplots(1, 2) fig = plt.figure(figsize=(4, 7)) #fig.set_size_inches(18, 7) for i in range(n_clusters): # Aggregate the silhouette scores for samples belonging to # cluster i, and sort them ith_cluster_silhouette_values = \ sample_silhouette_values[data_df['cluster'] == i] ith_cluster_silhouette_values.sort() size_cluster_i = ith_cluster_silhouette_values.shape[0] y_upper = y_lower + size_cluster_i color = cm.nipy_spectral(float(i) / n_clusters) plt.fill_betweenx(np.arange(y_lower, y_upper), 0, ith_cluster_silhouette_values, facecolor=color, edgecolor=color, alpha=0.7) # Label the silhouette plots with their cluster numbers at the middle plt.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i)) # Compute the new y_lower for next plot y_lower = y_upper + 10 # 10 for the 0 samples plt.title("The silhouette plot for the various clusters.") plt.xlabel("silhouette coefficient", fontsize=20) plt.ylabel("Cluster label", fontsize=20) plt.axvline(x=silhouette_avg, color="red", linestyle="--") plt.yticks([]) # Clear the yaxis labels / ticks plt.xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1]) sns.despine(bottom=False, left=False) fig.savefig(output_path + "/silhouette.pdf", bbox_inches='tight', dpi=600) fig.savefig(output_path + "/silhouette.png", bbox_inches='tight', dpi=600) # # MARKER FINDING data_df = pd.read_csv(output_path + "data.csv", delimiter=",", index_col=False) data_df.set_index('data', inplace=True) import pickle if n_pass == 2: path = 'Stardust_results/visualization_output/1_pass' if n_pass == 3: path = 'Stardust_results/visualization_output/2_pass' if n_pass == 4: path = 'Stardust_results/visualization_output/3_pass' if n_pass != 1: with open(path + '/de_genes_cluster.txt', 'rb') as fp: de_gene_cluster = pickle.load(fp) marker = [] disp_marker = [] for cl in range(n_clusters): cls = data_df[data_df['cluster'] == cl] gene_df = cls[cls['type'] == 'gene'] f = 0 for rank in range(len(de_gene_cluster)): if f == 1: break for gene in de_gene_cluster[rank]: if gene in list(gene_df.index): disp_marker.append(gene) #print(cl) f = 1 break marker = disp_marker #sys.exit(0) # # CELL GENE MARKER # In[28]: from sklearn.neighbors import KNeighborsRegressor prev_pass_data = pd.read_csv( 'Stardust_results/visualization_output/3_pass/data_openOrd.csv') prev_pass_data.set_index('data', inplace=True) data_df = pd.read_csv(output_path + '/data.csv') data_df.set_index('data', inplace=True) gene_df = data_df[data_df['type'] == 'gene'] x_gene_fit = list(gene_df['x']) y_gene_fit = list(gene_df['y']) cells = list(prev_pass_data.index) cell_list = [] x_coord = [] y_coord = [] for i in range(len(cells)): if cells[i] in list(data_df.index): cell_list.append(cells[i]) x_coord.append(prev_pass_data.iloc[i]['x']) y_coord.append(prev_pass_data.iloc[i]['y']) prev_df = pd.DataFrame(index=cell_list) prev_df['x'] = x_coord prev_df['y'] = y_coord import numpy as np from sklearn.linear_model import Lasso from sklearn.neighbors import KNeighborsRegressor import pickle cells = [] genes = [] gene_coord_x = [] gene_coord_y = [] for i in range(n_clusters): clust_data = data_df[data_df['cluster'] == i] clust_cells = clust_data[clust_data['type'] == 'cell'] clust_genes = clust_data[clust_data['type'] == 'gene'] cells.extend(list(clust_cells.index)) genes.extend(list(clust_genes.index)) if len(list(clust_genes.index)) == 0: continue model1 = KNeighborsRegressor(n_neighbors=4) model2 = KNeighborsRegressor(n_neighbors=4) temp = [] for cell in list(clust_cells.index): if cell in list(prev_df.index): temp.append(cell) clust_cells = clust_cells.loc[temp] model1.fit( np.array(list(clust_cells['x'])).reshape((-1, 1)), np.array(list(prev_df.loc[list(clust_cells.index)]['x'])).reshape( (-1, 1))) filename = output_path + '/sd_x_KNN_model.sav' pickle.dump(model1, open(filename, 'wb')) #model1 = pickle.load(open(filename, 'rb')) x_gene_pred = model1.predict( np.array(list(clust_genes['x'])).reshape((-1, 1))) gene_coord_x.extend(x_gene_pred) model2.fit( np.array(list(clust_cells['y'])).reshape((-1, 1)), np.array(list(prev_df.loc[list(clust_cells.index)]['y'])).reshape( (-1, 1))) filename = output_path + '/sd_y_KNN_model.sav' pickle.dump(model2, open(filename, 'wb')) #model2 = pickle.load(open(filename, 'rb')) y_gene_pred = model2.predict( np.array(list(clust_genes['y'])).reshape((-1, 1))) gene_coord_y.extend(y_gene_pred) with open(output_path + "/sd_gene_coord_x.txt", 'wb') as fp: pickle.dump(gene_coord_x, fp) with open(output_path + "/sd_gene_coord_y.txt", 'wb') as fp: pickle.dump(gene_coord_y, fp) #with open (output_path+"/sd_gene_coord_x.txt", 'rb') as fp: # gene_coord_x = pickle.load(fp) #with open (output_path+"/sd_gene_coord_y.txt", 'rb') as fp: # gene_coord_y = pickle.load(fp) import matplotlib.pyplot as plt, mpld3 from scipy.spatial import ConvexHull, convex_hull_plot_2d prev_pass_data = pd.read_csv( 'Stardust_results/visualization_output/3_pass/data_openOrd.csv') prev_pass_data["alpha"] = np.where(prev_pass_data['type'] == 'gene', 1.0, 0.5) color_gene = ["light blue"] color_cell = ["red"] #fig,ax1 = plt.subplots() plt.figure(figsize=(6, 6)) ax = sns.scatterplot(x="x", y="y", data=prev_pass_data[prev_pass_data['alpha'] == 0.5], hue="type", palette=sns.xkcd_palette(color_gene), sizes=(10, 5), size="type", alpha=0.3, s=10) #sns.scatterplot(x="x", y="y", data=data_df[data_df['alpha']==1.0],hue="type",palette=sns.xkcd_palette(color_cell),sizes=(20,5),size="type",marker="^",alpha=1.0,ax=ax,s=10) sns.scatterplot(x=gene_coord_x, y=gene_coord_y, palette=sns.xkcd_palette(color_cell), sizes=(20, 5), marker="^", alpha=1.0, ax=ax, s=10) for c in range(n_clusters): p = data_df[data_df["cluster"] == c] p = p[['x', 'y']] points = p.values hull = ConvexHull(points) #for simplex in hull.simplices: # sns.lineplot(points[simplex, 0], points[simplex, 1]) x_list = [] y_list = [] if n_pass != 1: for m in marker: #x_list.append(data_df.loc[m]['x']) x_list.append(gene_coord_x[genes.index(m)]) #y_list.append(data_df.loc[m]['y']) y_list.append(gene_coord_y[genes.index(m)]) for label, x, y in zip(marker, x_list, y_list): plt.annotate( label, xy=(x, y), xytext=(-20, 20), textcoords='offset points', ha='right', va='bottom', #bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.5), arrowprops=dict(arrowstyle='-', connectionstyle='arc3,rad=0')) ax.legend(bbox_to_anchor=(1.0, 1.00), frameon=False) sns.despine(bottom=False, left=False) plt.xlabel("sd1", fontsize=20) plt.ylabel("sd2", fontsize=20) plt.setp(ax.spines.values(), linewidth=2) plt.yticks([], linewidth=20) plt.xticks([]) plt.savefig(output_path + "sd_embedding.png", bbox_inches='tight', dpi=600) plt.savefig(output_path + "sd_embedding.pdf", bbox_inches='tight', dpi=600) import matplotlib.pyplot as plt, mpld3 from scipy.spatial import ConvexHull, convex_hull_plot_2d #data_df["alpha"] = np.where(data_df['type'] == 'gene', 1.0, 0.5) prev_pass_data.set_index('data', inplace=True) temp_data = prev_pass_data[prev_pass_data['type'] == 'cell'] temp_genes = data_df[data_df['type'] == 'gene'] for pos in range(0, len(genes)): temp_genes.at[genes[pos], 'x'] = gene_coord_x[pos] temp_genes.at[genes[pos], 'y'] = gene_coord_y[pos] temp_data.append(temp_genes) color_gene = ["light blue"] color_cell = ["red"] n_clusters = len(data_df['cluster'].unique()) colors = random.sample(seaborn_colors, n_clusters) #fig,ax1 = plt.subplots() plt.figure(figsize=(6, 6)) ax = sns.scatterplot(x="x", y="y", data=temp_data, hue="cluster", palette=sns.xkcd_palette(colors), s=2, linewidth=0.0) #sns.scatterplot(x="x", y="y", data=data_df[data_df['alpha']==1.0],hue="type",palette=sns.xkcd_palette(color_cell),sizes=(20,5),size="type",marker="^",alpha=1.0,ax=ax,s=10) #sns.scatterplot(x=gene_coord_x, y=gene_coord_y,palette=sns.xkcd_palette(color_cell),sizes=(20,5),marker="^",alpha=1.0,ax=ax,s=20) for c in range(n_clusters): p = data_df[data_df["cluster"] == c] p = p[['x', 'y']] points = p.values hull = ConvexHull(points) #for simplex in hull.simplices: # sns.lineplot(points[simplex, 0], points[simplex, 1]) x_list = [] y_list = [] d1 = prev_pass_data[prev_pass_data['alpha'] == 0.5] for cl in range(n_clusters): plt.annotate(cl, d1.loc[d1['cluster'] == cl, ['x', 'y']].mean(), horizontalalignment='center', verticalalignment='center', size=10, weight='bold', color="black") if n_pass != 1: for m in marker: #x_list.append(data_df.loc[m]['x']) x_list.append(gene_coord_x[genes.index(m)]) #y_list.append(data_df.loc[m]['y']) y_list.append(gene_coord_y[genes.index(m)]) for label, x, y in zip(marker, x_list, y_list): plt.annotate( label, xy=(x, y), xytext=(-20, 20), textcoords='offset points', ha='right', va='bottom', #bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.5), arrowprops=dict(arrowstyle='-', connectionstyle='arc3,rad=0')) ax.legend(bbox_to_anchor=(1.0, 1.00), frameon=False) sns.despine(bottom=False, left=False) plt.xlabel("sd1", fontsize=20) plt.ylabel("sd2", fontsize=20) plt.setp(ax.spines.values(), linewidth=2) plt.yticks([], linewidth=20) plt.xticks([]) plt.savefig(output_path + "sd_color_embedding.png", bbox_inches='tight', dpi=600) plt.savefig(output_path + "sd_color_embedding.pdf", bbox_inches='tight', dpi=600) #sys.exit(0) # # UMAP CELL GENE MARKER # # if n_pass == 4: import pickle with open('Stardust_results/build_output/1_pass/umap_coord.txt', 'rb') as fp: umap_coord = pickle.load(fp) louvain_df = pd.read_csv( 'Stardust_results/build_output/1_pass/louvain_cluster_df.csv') louvain_df.set_index('Unnamed: 0', inplace=True) #data_df = pd.read_csv('F:/output/output_visualize_melanoma_pca/3rd_pass/data.csv') data_df = pd.read_csv(output_path + '/data.csv') data_df.set_index('data', inplace=True) gene_df = data_df[data_df['type'] == 'gene'] x_gene_fit = list(gene_df['x']) y_gene_fit = list(gene_df['y']) cells = list(louvain_df.index) cell_list = [] x_coord = [] y_coord = [] for i in range(len(cells)): if cells[i] in list(data_df.index): cell_list.append(cells[i]) x_coord.append(umap_coord[i][0]) y_coord.append(umap_coord[i][1]) umap_df = pd.DataFrame(index=cell_list) umap_df['x'] = x_coord umap_df['y'] = y_coord import numpy as np from sklearn.linear_model import Lasso from sklearn.neighbors import KNeighborsRegressor import pickle cells = [] genes = [] gene_coord_x = [] gene_coord_y = [] for i in range(n_clusters): clust_data = data_df[data_df['cluster'] == i] clust_cells = clust_data[clust_data['type'] == 'cell'] clust_genes = clust_data[clust_data['type'] == 'gene'] cells.extend(list(clust_cells.index)) genes.extend(list(clust_genes.index)) if len(list(clust_genes.index)) == 0: continue model1 = KNeighborsRegressor(n_neighbors=5) model2 = KNeighborsRegressor(n_neighbors=5) model1.fit( np.array(list(clust_cells['x'])).reshape((-1, 1)), np.array(list(umap_df.loc[list( clust_cells.index)]['x'])).reshape((-1, 1))) filename = output_path + '/scanpy_x_KNN_model.sav' pickle.dump(model1, open(filename, 'wb')) #model1 = pickle.load(open(filename, 'rb')) x_gene_pred = model1.predict( np.array(list(clust_genes['x'])).reshape((-1, 1))) gene_coord_x.extend(x_gene_pred) model2.fit( np.array(list(clust_cells['y'])).reshape((-1, 1)), np.array(list(umap_df.loc[list( clust_cells.index)]['y'])).reshape((-1, 1))) filename = output_path + '/scanpy_y_KNN_model.sav' pickle.dump(model2, open(filename, 'wb')) #model2 = pickle.load(open(filename, 'rb')) y_gene_pred = model2.predict( np.array(list(clust_genes['y'])).reshape((-1, 1))) gene_coord_y.extend(y_gene_pred) with open(output_path + "/scanpy_gene_coord_x.txt", 'wb') as fp: pickle.dump(gene_coord_x, fp) with open(output_path + "/scanpy_gene_coord_y.txt", 'wb') as fp: pickle.dump(gene_coord_y, fp) #with open (output_path+"/scanpy_gene_coord_x.txt", 'rb') as fp: # gene_coord_x = pickle.load(fp) #with open (output_path+"/scanpy_gene_coord_y.txt", 'rb') as fp: # gene_coord_y = pickle.load(fp) #n_clusters = len(list(data_df['cluster'].unique())) u_map_x = [] u_map_y = [] for ind in list(data_df.index): if ind in list(louvain_df.index): u_map_x.append(umap_coord[list( louvain_df.index).index(ind)][0]) u_map_y.append(umap_coord[list( louvain_df.index).index(ind)][1]) else: u_map_x.append(gene_coord_x[genes.index(ind)]) u_map_y.append(gene_coord_y[genes.index(ind)]) data_df['umap_x'] = u_map_x data_df['umap_y'] = u_map_y # colors = random.sample(seaborn_colors,n_clusters) #colors = colors3 plt.figure(figsize=(5, 5)) #cmap = sns.cubehelix_palette(dark=.3, light=.8, as_cmap=True) ax = sns.scatterplot(x="umap_x", y="umap_y", data=data_df, hue="cluster", palette=sns.xkcd_palette(colors), linewidth=0.0, s=2) ax.legend(bbox_to_anchor=(1.0, 1.00), frameon=False) for cl in range(n_clusters): plt.annotate(cl, data_df.loc[data_df['cluster'] == cl, ['umap_x', 'umap_y']].mean(), horizontalalignment='center', verticalalignment='center', size=10, weight='bold', color="black") sns.despine(bottom=False, left=False) plt.xlabel("umap1", fontsize=20) plt.ylabel("umap2", fontsize=20) plt.setp(ax.spines.values(), linewidth=2) plt.yticks([], linewidth=20) plt.xticks([]) plt.savefig(output_path + 'umap_clustering.png', bbox_inches='tight', dpi=600) plt.savefig(output_path + 'umap_clustering.pdf', bbox_inches='tight', dpi=600) import matplotlib.pyplot as plt, mpld3 from scipy.spatial import ConvexHull, convex_hull_plot_2d data_df["alpha"] = np.where(data_df['type'] == 'gene', 1.0, 0.5) color_gene = ["light grey"] color_cell = ["red"] #fig,ax1 = plt.subplots() plt.figure(figsize=(6, 6)) ax = sns.scatterplot(x="umap_x", y="umap_y", data=data_df[data_df['alpha'] == 0.5], hue="type", palette=sns.xkcd_palette(color_gene), sizes=(10, 5), size="type", alpha=0.3, s=10) sns.scatterplot(x="umap_x", y="umap_y", data=data_df[data_df['alpha'] == 1.0], hue="type", palette=sns.xkcd_palette(color_cell), sizes=(20, 5), size="type", marker="^", alpha=1.0, ax=ax, s=10) for c in range(n_clusters): p = data_df[data_df["cluster"] == c] p = p[['umap_x', 'umap_y']] points = p.values hull = ConvexHull(points) #for simplex in hull.simplices: # sns.lineplot(points[simplex, 0], points[simplex, 1]) x_list = [] y_list = [] for m in marker: x_list.append(data_df.loc[m]['umap_x']) #x_list.append(gene_coord_x[genes.index(m)]) y_list.append(data_df.loc[m]['umap_y']) #y_list.append(gene_coord_y[genes.index(m)]) for cl in range(n_clusters): plt.annotate(cl, data_df.loc[data_df['cluster'] == cl, ['umap_x', 'umap_y']].mean(), horizontalalignment='center', verticalalignment='center', size=10, weight='bold', color="black") for label, x, y in zip(marker, x_list, y_list): plt.annotate( label, xy=(x, y), xytext=(-20, 20), textcoords='offset points', ha='right', va='bottom', #bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.5), arrowprops=dict(arrowstyle='-', connectionstyle='arc3,rad=0')) ax.legend(bbox_to_anchor=(1.0, 1.00), frameon=False) sns.despine(bottom=False, left=False) plt.xlabel("umap1", fontsize=20) plt.ylabel("umap2", fontsize=20) plt.setp(ax.spines.values(), linewidth=2) plt.yticks([], linewidth=20) plt.xticks([]) plt.savefig(output_path + 'umap_embedding.png', bbox_inches='tight', dpi=600) plt.savefig(output_path + 'umap_embedding.pdf', bbox_inches='tight', dpi=600) import matplotlib.pyplot as plt, mpld3 from scipy.spatial import ConvexHull, convex_hull_plot_2d data_df["alpha"] = np.where(data_df['type'] == 'gene', 1.0, 0.5) color_gene = ["light grey"] color_cell = ["red"] #fig,ax1 = plt.subplots() plt.figure(figsize=(6, 6)) # colors = color ax = sns.scatterplot(x="umap_x", y="umap_y", data=data_df[data_df['alpha'] == 0.5], hue="cluster", linewidth=0.0, sizes=(2, 5), size="type", palette=sns.xkcd_palette(colors), s=2) sns.scatterplot(x="umap_x", y="umap_y", data=data_df[data_df['alpha'] == 1.0], hue="type", palette=sns.xkcd_palette(color_cell), linewidth=0.1, marker="^", ax=ax, alpha=1.0, s=10) for c in range(n_clusters): p = data_df[data_df["cluster"] == c] p = p[['umap_x', 'umap_y']] points = p.values hull = ConvexHull(points) #for simplex in hull.simplices: # sns.lineplot(points[simplex, 0], points[simplex, 1]) x_list = [] y_list = [] for m in marker: x_list.append(data_df.loc[m]['umap_x']) y_list.append(data_df.loc[m]['umap_y']) for cl in range(n_clusters): plt.annotate(cl, data_df.loc[data_df['cluster'] == cl, ['umap_x', 'umap_y']].mean(), horizontalalignment='center', verticalalignment='center', size=10, weight='bold', color="black") for label, x, y in zip(marker, x_list, y_list): plt.annotate( label, xy=(x, y), xytext=(-20, 20), textcoords='offset points', ha='right', va='bottom', #bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.5), arrowprops=dict(arrowstyle='-', connectionstyle='arc3,rad=0')) ax.legend(bbox_to_anchor=(1.0, 1.00), frameon=False) sns.despine(bottom=False, left=False) plt.xlabel("umap1", fontsize=20) plt.ylabel("umap2", fontsize=20) plt.setp(ax.spines.values(), linewidth=2) plt.yticks([], linewidth=20) plt.xticks([]) plt.savefig(output_path + 'umap_color_embedding.png', bbox_inches='tight', dpi=600) plt.savefig(output_path + 'umap_color_embedding.pdf', bbox_inches='tight', dpi=600)
def process(path): dataset = pd.read_csv(path) X = dataset.iloc[:, 1:6].values y = dataset.iloc[:, 6].values #y=y.round() y = (y / 100).astype(int) * 100 print(X) print(y) X_train, X_test, y_train, y_test = train_test_split(X, y) model2 = DecisionTreeClassifier() model2.fit(X_train, y_train) y_pred = model2.predict(X_test) result2 = open("static/results/resultDT.csv", "w") result2.write("ID,Predicted Value" + "\n") for j in range(len(y_pred)): result2.write(str(j + 1) + "," + str(y_pred[j]) + "\n") result2.close() mse = abs(round(mean_squared_error(y_test, y_pred), 2)) / 1000 mae = abs(round(mean_absolute_error(y_test, y_pred), 2)) r2 = abs(round(r2_score(y_test, y_pred), 2)) print("---------------------------------------------------------") print("MSE VALUE FOR Decision Tree IS %f " % mse) print("MAE VALUE FOR Decision Tree IS %f " % mae) print("R-SQUARED VALUE FOR Decision Tree IS %f " % r2) rms = abs(round(np.sqrt(mean_squared_error(y_test, y_pred)), 2)) print("RMSE VALUE FOR Decision Tree IS %f " % rms) ac = round(accuracy_score(y_test, y_pred), 2) * 100 print("ACCURACY VALUE Decision Tree IS %f" % ac) print("---------------------------------------------------------") result2 = open('static/results/DTMetrics.csv', 'w') result2.write("Parameter,Value" + "\n") result2.write("MSE" + "," + str(mse) + "\n") result2.write("MAE" + "," + str(mae) + "\n") result2.write("R-SQUARED" + "," + str(r2) + "\n") result2.write("RMSE" + "," + str(rms) + "\n") result2.write("ACCURACY" + "," + str(ac) + "\n") result2.close() df = pd.read_csv('static/results/DTMetrics.csv') acc = df["Value"] alc = df["Parameter"] colors = ["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#8c564b"] explode = (0.1, 0, 0, 0, 0) fig = plt.figure() plt.bar(alc, acc, color=colors) plt.xlabel('Parameter') plt.ylabel('Value') plt.title(' Decision Tree Metrics Value') fig.savefig('static/results/DTMetricsValueBarChart.png') group_names = ['MSE', 'MAE', 'R2', 'RMSE', 'ACCURACY'] group_size = acc subgroup_names = acc subgroup_size = acc # Create colors a, b, c, d, e = [ plt.cm.Blues, plt.cm.Reds, plt.cm.Greens, plt.cm.Oranges, plt.cm.Purples ] # First Ring (outside) fig, ax = plt.subplots() ax.axis('equal') mypie, _ = ax.pie(group_size, radius=1.0, labels=group_names, colors=[a(0.6), b(0.6), c(0.6), d(0.1), e(0.6)]) plt.setp(mypie, width=0.3, edgecolor='white') ## Second Ring (Inside) mypie2, _ = ax.pie(subgroup_size, radius=1.0 - 0.3, labels=subgroup_names, labeldistance=0.7, colors=[a(0.6), b(0.6), c(0.6), d(0.1), e(0.6)]) plt.setp(mypie2, width=0.4, edgecolor='white') plt.margins(0, 0) plt.title('Decision Tree Metrics Value') plt.savefig('static/results/DTMetricsValue.png') # set width of bar barWidth = 0.25 fig = plt.subplots(figsize=(12, 8)) # Set position of bar on X axis br1 = np.arange(len(y_pred)) br2 = [x + barWidth for x in br1] # Make the plot plt.bar(br1, y_test, color='r', width=barWidth, edgecolor='grey', label='Original') plt.bar(br2, y_pred, color='g', width=barWidth, edgecolor='grey', label='Predicted') # Adding Xticks plt.xlabel('Number of Records', fontweight='bold', fontsize=15) plt.ylabel('Fish Weight', fontweight='bold', fontsize=15) plt.legend() plt.savefig('static/results/DTCompare.png') return y_test, y_pred #process("dataset.csv")
import numpy as np import matplotlib import matplotlib.pyplot as plt days = list(range(1,32)) months = ['January','February','March','April','May','June','July','August','September','October','November','December'] density = np.zeros((31,12)) for index,row in us_data.iterrows(): density[int(row['day']-1),int(row['month']-1)]+=1 fig,ax = plt.subplots() im = ax.imshow(density) ax.set_xticks(np.arange(len(months))) ax.set_yticks(np.arange(len(days))) ax.set_xticklabels(months) ax.set_yticklabels(days) plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor") for i in range(len(days)): for j in range(len(months)): text = ax.text(j, i, int(density[i, j]), ha="center", va="center", color="w") ax.set_title("The common days the UFO sightings are reported") #fig.tight_layout() fig.set_size_inches(10, 20, forward=True) plt.show() days = list(range(1,5)) months = ['January','February','March','April','May','June','July','August','September','October','November','December'] density = np.zeros((4,12)) for index,row in us_data.iterrows(): if int(row['day']-1)<8:
ax[0].plot(full_grouped['Confirmed'], color='red') ax[0].set_ylim(ymin=0, ymax=None) ax[0].set_ylabel('Confirmed') ax[0].set_xlabel('Months') ax[0].grid(True, which='major') # Set layout for 'Deaths' cases ax[1].plot(full_grouped['Deaths'], color='black') ax[1].set_ylim(ymin=0, ymax=None) ax[1].set_ylabel('Deaths') ax[1].set_xlabel('Months') ax[1].grid(True, which='major') # Set layout for 'Recovered' cases ax[2].plot(full_grouped['Recovered'], color='green') ax[2].set_ylim(ymin=0, ymax=None) ax[2].set_ylabel('Recovered') ax[2].set_xlabel('Months') ax[2].grid(True, which='major') # Set general layout for figure (all axis) fig.tight_layout() plt.setp(ax[2].get_xticklabels(), rotation=45, horizontalalignment='right') plt.show() plt.show('Australia') #plot('Germany') #plot('France') #plot('Spain') #plot('Italy')
# Impact velocities between 0.1 and 10m/s impact_velocity = np.arange(0.1, 10, 0.1) # Use conservation of energy, ignore aerodynamic effects height = impact_velocity**2 / (2 * g) # Plot in SI? if PLOT_SI: # Set the plot size - 3x2 aspect ratio is best fig = plt.Figure(figsize=(6, 4)) ax = plt.gca() plt.subplots_adjust(bottom=0.17, left=0.17, top=0.96, right=0.96) # Change the axis units font plt.setp(ax.get_ymajorticklabels(), fontsize=18) plt.setp(ax.get_xmajorticklabels(), fontsize=18) ax.spines['right'].set_color('none') ax.spines['top'].set_color('none') ax.xaxis.set_ticks_position('bottom') ax.yaxis.set_ticks_position('left') # Turn on the plot grid and set appropriate linestyle and color ax.grid(True, linestyle=':', color='0.75') ax.set_axisbelow(True) # Define the X and Y axis labels plt.xlabel('Impact Velocity (m/s)', fontsize=22, weight='bold', labelpad=5) plt.ylabel('Drop Height (m)', fontsize=22, weight='bold', labelpad=10)
pickle_out.close() pickle_in = open("MNIST_history.pickle", "rb") saved_history = pickle.load(pickle_in) print(saved_history) history_dict = history.history loss_values = history_dict['loss'] val_loss_values = history_dict['val_loss'] epochs = range(1, len(loss_values) + 1) # visualizar o a perca da validacao e teste... line1 = plt.plot(epochs, val_loss_values, label='Validation/Test Loss') line2 = plt.plot(epochs, loss_values, label='Training Loss') plt.setp(line1, linewidth=2.0, marker='+', markersize=10.0) plt.setp(line2, linewidth=2.0, marker='4', markersize=10.0) plt.xlabel('Epochs') plt.ylabel('Loss') plt.grid(True) plt.legend() plt.show() # visualizar a acuracia # acurácia razoavel.... # Plotting our accuracy charts history_dict = history.history acc_values = history_dict['acc'] val_acc_values = history_dict['val_acc']