def plot_horizontal_bar_from_cm(confusion_matrix = None, classes = []): # plt.rcdefaults() # plt.subplots(figsize = (10, 30)) width = 0.30 # Example data y_lables = 12 * ['a', 'b', 'c', 'd'] y_pos = list(range(len(y_lables))) print(y_pos) true_positives = 12 * [8.84036186, 12.94095337, 11.19919226, 10.64395389] false_negatives = 12 * [1, 1, 1, 1] false_positives = 12 * [2, 13, 13, 3] TP = plt.barh(y_pos, true_positives, width, color = 'green', label = 'TP') FN = plt.barh(y_pos, false_negatives, width, label = 'FN', left = TP) plt.barh(y_pos, false_positives, width, label = 'FP', left = FN) # ax.barh([p + width for p in y_pos], false_negatives, width, label = 'FN') # ax.barh([p + width * 2 for p in y_pos], false_positives, width, label = 'FP') plt.set_yticks([p + 1.5 * width for p in y_pos]) plt.set_yticklabels(y_lables) plt.invert_yaxis() # labels read top-to-bottom plt.set_xlabel('Performance') plt.set_title('How fast do you want to go today?') plt.legend(['TP', 'FN', 'FP'], loc = 'upper right') plt.show()
def fix_hplot(df, statistic, xlabel, ylabel, hue, fontsize): data = df.query('statistic == "{}"'.format(statistic)) pastel = ["#92C6FF", "#97F0AA", "#FF9F9A", "#D0BBFF", "#FFFEA3", "#B0E0E6"] pal = dict(Validation=pastel[0], Test=pastel[2]) plt = sns.barplot(x='data', y='label', data=data, hue=None, errwidth=1.0, capsize=0.15) [ticklabel.set_fontsize(fontsize) for ticklabel in (plt.get_yticklabels())] [ticklabel.set_fontsize(fontsize) for ticklabel in (plt.get_xticklabels())] plt.set_yticklabels([ticklabel._text.capitalize() for ticklabel in (plt.get_yticklabels())], ha='left') plt.get_yaxis().get_label().set_fontsize(fontsize) plt.get_xaxis().get_label().set_fontsize(fontsize) plt.get_yaxis().set_tick_params(pad=fontsize*10-10) plt.set_ylabel(ylabel) plt.set_xlabel(xlabel) plt.set_xlim(0, 1.05) return plt
color='red',marker='o',linestyle='None',markersize=3) plt.loglog(slams,sflam_star,\ color='white',marker='o',linestyle='None',markersize=3) # label='IRAS F16544$-$1604') #plt.figtext(0.4,0.83,'WISE') #print plt.gca() axes=fig1.gca() for label in axes.get_xticklabels() + axes.get_yticklabels(): label.set_fontsize('x-small') if (i < nx): plt.set_xlabel(r'$\lambda$ ($\mu$m)',size='x-small') if (np.mod(i,nx)==0): plt.set_ylabel(r'$\lambda F_\lambda$ (erg/s/cm$^{2}$)',size='x-small') else: plt.set_yticklabels('',visible='False',size='x-small') plt.axis([0.4,150,1e-14,1e-6]) # print 'i = ', i plt.text(0.7,5e-8,'M%d'%(i+1),size='small') print "%12s &%15s &\$%4.1f\pm%4.1f\$ &\$%4.1f\pm%4.1f\$ &\$%4.1f\pm%4.1f\$ &\$%4.1f\pm%4.1f\$ &\$%4.1f\pm%4.1f\$ &\$%4.2f\pm%4.2f\$ &\$%4.2f\pm%4.2f\$ &\$%4.2f\pm%4.2f\$ &\$%4.2f\pm%4.2f\$(%s%s) &\$%4.2f\pm%4.2f\$(%s%s)\\\\"%\ (scat['2MASS_name'][sstar],scat['object_type'][sstar],\ scat['J_flux_c'][sstar],scat['J_D_flux_c'][sstar],\ scat['H_flux_c'][sstar],scat['H_D_flux_c'][sstar],\ scat['Ks_flux_c'][sstar],scat['Ks_D_flux_c'][sstar],\ w1f[wstar],w1df[wstar],\ w2f[wstar],w2df[wstar],\ w3f[wstar],w3df[wstar],\ w4f[wstar],w4df[wstar],\ scat['MP1_flux_c'][sstar],scat['MP1_D_flux_c'][sstar],\ scat['MP2_flux_c'][sstar],scat['MP2_D_flux_c'][sstar],\ scat['MP2_Q_det_c'][sstar],scat['MP2_imtype'][sstar],\
#plt.bar(np.arange(69),weights[0,:],label = features) # order = np.argsort(np.abs(weights[:]))[::-1] plt.bar(np.arange(20),weights[order[:20]]) plt.xticks(np.arange(20), features[order[:20]],fontsize =12) plt.xticks(rotation="vertical") plt.ylabel("Feature weights") plt.savefig("/neurospin/brainomics/2016_schizConnect/analysis/all_studies+VIP/Freesurfer/all_subjects/results/ROIs_analysis/weights/svm_weights_top20.png") plt.barh(np.arange(20),weights[order[:20]]) plt.set_yticks(np.arange(20)) plt.set_yticklabels(features[order[:20]]) plt.xticks(np.arange(20), features[order[:20]],fontsize =12) plt.xticks(rotation="vertical") plt.ylabel("Feature weights") plt.rc('font', family='serif') plt.figure plt.grid() fig, ax = plt.subplots() # Example data features_names = features[order[:20]] y_pos = np.arange(len(features_names)) performance = weights[order[:20]]
import pickle import matplotlib.pyplot as plt import numpy as np # Example data people = ('TP', 'FP', 'FN', 'TN') y_pos = np.arange(len(people)) performance = (64354, 2701, 17007, 2417826) plt.barh(y_pos, performance, align='center', color='green', ecolor='black') plt.set_yticks(y_pos) plt.set_yticklabels(people) plt.invert_yaxis() # labels read top-to-bottom plt.set_xlabel('Performance') plt.set_title('Precission - 0.959 Recal - 0.790') plt.show()
fig1.clf() i = 0 j = 0 for i in range(len(files)): plt = fig1.add_subplot(ny,nx,j+1) histogram = hist(files[i]) bin_edges=histogram[0] x = histogram[1] C = colour plt.scatter(bin_edges[:-1],x,c=u'r') axes=fig1.gca() plt.set_xticklabels('',visible='False',size='small') plt.set_yticklabels('',visible='True',size='small') if (j==4): plt.set_xlabel(r'Temperature (K)',size='small') plt.set_xticklabels('',visible='True',size='small') plt.axis([5,60,0,1.1E-3]) plt.set_ylabel('Normalised Number of pixels',size='small') j = j + 1 fig1.show()
def main(TRAIN=False, TUNING=False, ANCHOR=False, LIME=True, STATISTICS=False, PROTODASH=False): # read poems using simplereader poems_english = readPoems('tsv/english.tsv') poems_german = readPoems('tsv/emotion.german.tsv') poems_chinese = readPoems('tsv/chinese.tsv') print(len(poems_english)) print(len(poems_german)) print(len(poems_chinese)) # set up label dictionary label_dict = { 'Sadness': 0, 'Humor': 1, 'Suspense': 2, 'Nostalgia': 3, 'Uneasiness': 4, 'Annoyance': 5, 'Awe / Sublime': 6, 'Awe/Sublime': 6, 'Vitality': 7, 'Beauty / Joy': 8, 'Beauty/Joy': 8 } # array of stanzas stanzas = [] # array of most prominent label for each stanza labels = [] # list of languages lang = [] # extract sentences with one label for poem in itertools.chain(poems_english, poems_german, poems_chinese): for stanza in poem[1:]: if poem in poems_english: lang.append(0) elif poem in poems_german: lang.append(1) else: lang.append(2) labelsPerStanza = [] currentStanzaIndex = len(stanzas) newStanza = 1 for line in stanza: if newStanza: stanzas.append(line[0]) newStanza = 0 else: stanzas[currentStanzaIndex] += " " + line[0] labelsPerStanza.extend(line[1].split(" --- ")) if len(line) > 2: labelsPerStanza.extend(line[2].split(" --- ")) counter = [0, 0, 0, 0, 0, 0, 0, 0, 0] for label in labelsPerStanza: counter[label_dict[label]] += 1 labels.append(np.argmax(counter)) # plot dataset statistics if STATISTICS is True: df = pd.DataFrame({ "stanzas": stanzas, "labels": labels, "languages": lang }) bar_labels = [lab.replace(" ", "") for lab in label_dict.keys()] ger_values = df.loc[df["languages"] == 1, "labels"].value_counts() en_values = df.loc[df["languages"] == 0, "labels"].value_counts() ch_values = df.loc[df["languages"] == 2, "labels"].value_counts() print(type(df.loc[df["languages"] == 1, "labels"].value_counts())) ger_values[3] = 0 ger_values.sort_index(inplace=True) en_values.sort_index(inplace=True) ch_values.sort_index(inplace=True) width = 0.5 fig, ax = plt.subplots() plt.grid(zorder=0, alpha=0.7) ax.bar(bar_labels, ger_values, width, label='German') ax.bar(bar_labels, en_values, width, bottom=ger_values, label='English') ax.bar(bar_labels, ch_values, width, bottom=en_values + ger_values, label='Chinese') ax.set_ylabel('Number of stanzas', fontsize=18) ax.legend(prop={'size': 18}) ax.tick_params(axis='both', which='major', labelsize=18) plt.xticks(rotation=16) plt.show() # transform labels into one hot encodings one_hot_labels = to_categorical(labels) # analyze distribution of labels in dataset df = pd.DataFrame({"labels": labels}) print(df['labels'].value_counts()) # use pretrained multilingual model to encode sentences model = SentenceTransformer('distiluse-base-multilingual-cased-v1') embeddings = model.encode(stanzas) # shuffle data and split into train and test set all_data = [(embeddings[i], one_hot_labels[i], i) for i in range(len(embeddings))] unshuffled_data = all_data random.shuffle(all_data) embeddings = [emb for emb, _, _ in all_data] labels = [lab for _, lab, _ in all_data] indices = [idx for _, _, idx in all_data] train_data = np.array(embeddings[:int(0.75 * len(embeddings))]) train_labels = np.array(labels[:int(0.75 * len(embeddings))]) dev_data = np.array( embeddings[int(0.75 * len(embeddings)):int(0.875 * len(embeddings))]) dev_labels = np.array( labels[int(0.75 * len(embeddings)):int(0.875 * len(embeddings))]) test_data = np.array(embeddings[int(0.875 * len(embeddings)):]) test_labels = np.array(labels[int(0.875 * len(embeddings)):]) # Hyperparameter Tuning if TUNING is True: learning_rates = [0.001, 0.01, 0.1] epochs = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] middle_nodes = [20, 50, 100, 150, 200] losses = [] accuracies = [] max_loss = 100000 min_acc = 0 max_config = None for lr in learning_rates: for epoch in epochs: for middle_node in middle_nodes: print("Training with following hyperparameters:", lr, epoch, middle_node) adam = Adam(learning_rate=lr) mdl = Sequential() mdl.add( Dense(middle_node, input_dim=512, kernel_initializer="uniform", activation="relu")) mdl.add( Dense(9, activation="softmax", kernel_initializer="uniform")) mdl.compile(loss="categorical_crossentropy", optimizer=adam, metrics=["categorical_accuracy"]) mdl.fit(train_data, train_labels, epochs=epoch, verbose=1) print("evaluating on dev set...") (loss, accuracy) = mdl.evaluate(dev_data, dev_labels, verbose=1) print("loss: {:.4f}, accuracy: {:.4f}%".format( loss, accuracy * 100)) losses.append(loss) accuracies.append(accuracy) if accuracy > min_acc: min_acc = accuracy max_config = (lr, epoch, middle_node) print(max_config) max_config = (0.01, 7, 150) mdl = Sequential() if TRAIN is True: # use final model adam = Adam(learning_rate=max_config[0]) mdl = Sequential() mdl.add( Dense(max_config[2], input_dim=512, kernel_initializer="uniform", activation="relu")) mdl.add(Dense(9, activation="softmax", kernel_initializer="uniform")) mdl.compile(loss="categorical_crossentropy", optimizer=adam, metrics=["categorical_accuracy"]) mdl.fit(train_data, train_labels, epochs=max_config[1], verbose=1) print("evaluating on test set...") (loss, accuracy) = mdl.evaluate(test_data, test_labels, verbose=1) print("loss={:.4f}, accuracy: {:.4f}%".format(loss, accuracy * 100)) #print("precision={:.4f}%".format(precision * 100)) #print("recall={:.4f}%".format(recall * 100)) # mdl.save('emotion_classifier') #mdl = keras.models.load_model('emotion_classifier') (loss, accuracy) = mdl.evaluate(test_data, test_labels, verbose=1) y_pred = mdl.predict(test_data, batch_size=test_data.shape[0]) wrong_classified_idx = [] for j, idx in enumerate(indices[int(0.875 * len(embeddings)):]): if np.argmax(y_pred[j]) != np.where(test_labels[j] == 1.0)[0]: wrong_classified_idx.append(idx) print("These stanzas were wronlgy classified:") print(wrong_classified_idx) wrong_classified_en = [idx for idx in wrong_classified_idx if idx < 167] wrong_classified_ger = [ idx for idx in wrong_classified_idx if (idx >= 167 and idx < 688) ] wrong_classified_ch = [idx for idx in wrong_classified_idx if idx >= 688] total_en = [ idx for idx in indices[int(0.875 * len(embeddings)):] if idx < 167 ] total_ger = [ idx for idx in indices[int(0.875 * len(embeddings)):] if (idx >= 167 and idx < 688) ] total_ch = [ idx for idx in indices[int(0.875 * len(embeddings)):] if idx >= 688 ] print("Number of wrongly classified stanzas - English: ", len(wrong_classified_en)) print("Number of wrongly classified stanzas - German: ", len(wrong_classified_ger)) print("Number of wrongly classified stanzas - Chinese: ", len(wrong_classified_ch)) print("Total - English: ", len(total_en)) print("Total - German: ", len(total_ger)) print("Total - Chinese: ", len(total_ch)) class_names = [ 'Sadness', 'Humor', 'Suspense', 'Nostalgia', 'Uneasiness', 'Annoyance', 'Awe / Sublime', 'Vitality', 'Beauty / Joy' ] examples = [592, 9, 5] # ------------------------------------------------------------LIME-------------------------------------------------------------------------------------------- # apply LIME to obtain explanations for a specific instance def pipeline(stanza, mdl=mdl, model=model): embedded = model.encode(stanza) return mdl.predict(embedded, batch_size=embedded.shape[0]) if LIME is True: # apply LIME to 10 uncorreclty classified stanzas for idx in examples: print("True Label: ", one_hot_labels[idx]) emb = np.array(model.encode(stanzas[idx])) emb = emb.reshape((512, 1)) emb = emb.T print("Predicted Probabilities: ", mdl.predict(emb, batch_size=1)) explainer = LimeTextExplainer(class_names=class_names) exp = explainer.explain_instance(stanzas[idx], pipeline, num_features=6, top_labels=2) top_labs = exp.available_labels() print("Explanation for class {}".format(top_labs[0])) print('\n'.join(map(str, exp.as_list(label=top_labs[0])))) print("Explanation for class {}".format(top_labs[1])) print('\n'.join(map(str, exp.as_list(label=top_labs[1])))) fig = exp.as_pyplot_figure(top_labs[0]) plt.show() fig_2 = exp.as_pyplot_figure(top_labs[1]) plt.show() # apply LIME to different correctly classified stanzas idx = 5 print("True Label: ", one_hot_labels[idx]) emb = np.array(model.encode(stanzas[idx])) emb = emb.reshape((512, 1)) emb = emb.T print("Predicted Probabilities: ", mdl.predict(emb, batch_size=1)) print(mdl.predict(emb, batch_size=1).sum()) explainer = LimeTextExplainer(class_names=class_names) exp = explainer.explain_instance(stanzas[idx], pipeline, num_features=6, top_labels=2) pickle.dump(exp, open("explanation.pkl", "wb")) top_labs = exp.available_labels() print("Explanation for class {}".format(top_labs[0])) print('\n'.join(map(str, exp.as_list(label=top_labs[0])))) print("Explanation for class {}".format(top_labs[1])) print('\n'.join(map(str, exp.as_list(label=top_labs[1])))) fig = exp.as_pyplot_figure(top_labs[0]) plt.legend(prop={'size': 600}) plt.tick_params(axis='both', which='major', labelsize=600) plt.set_yticklabels(x, fontsize=600) plt.show() fig_2 = exp.as_pyplot_figure(top_labs[1]) plt.legend(prop={'size': 20}) plt.tick_params(axis='both', which='major', labelsize=20) plt.show() # ----------------------------------------------------------ANCHOR--------------------------------------------------------------------------------------------- def predict_label(stanza): embedded = model.encode(stanza) probs = mdl.predict(embedded, batch_size=embedded.shape[0]) return [np.argmax(probs[0])] def predict_second_label(stanza, predicted_label): embedded = model.encode(stanza) probs = mdl.predict(embedded, batch_size=embedded.shape[0]) probs[0][np.argmax(probs[0])] = 0 return [np.argmax(probs)] if ANCHOR is True: ids = np.zeros(3) print() # for i in examples: # lowest = 500 # lowest_id = 500 # for j in range(len(stanzas)): # if len(stanzas[j]) < lowest: # if j not in ids and len(stanzas[j]) > 85 and j < 174: # lowest = len(stanzas[j]) # lowest_id = j # ids[i] = lowest_id # print("Ausgewähltes Stanza: ", stanzas[lowest_id]) # print("Länge: ", len(stanzas[lowest_id]), " id: ", lowest_id) # print() nlp = spacy.load('en_core_web_lg') explainer = anchor_text.AnchorText(nlp, class_names, use_unk_distribution=True) print("GPU's: ", get_available_gpus()) for idx in examples: print() print("------------STANZA-", idx, "------------") print() text = stanzas[idx] print(predict_label([text])) pred = explainer.class_names[predict_label([text])[0]] alternative = explainer.class_names[predict_second_label( [text], predict_label([text])[0])[0]] print('Prediction: %s' % pred) print("Stanza: ", stanzas[idx], " True Label: ", labels[idx]) exp = explainer.explain_instance(text, predict_label, threshold=0.95) print('Anchor: %s' % (' AND '.join(exp.names()))) print('Precision: %.2f' % exp.precision()) print() print('Examples where anchor applies and model predicts %s:' % pred) print() print('\n'.join( [x[0] for x in exp.examples(only_same_prediction=True)])) print() print('Examples where anchor applies and model predicts %s:' % alternative) print() print('\n'.join([ x[0] for x in exp.examples(partial_index=0, only_different_prediction=True) ])) # ----------------------------------------------------------PROTODASH------------------------------------------------------------------------------------------ if PROTODASH is True: for idx in examples: from aix360.algorithms.protodash import ProtodashExplainer def predict_label(stanza): embedded = model.encode(stanza) embedded = embedded.reshape((512, 1)) embedded = embedded.T probs = mdl.predict(embedded, batch_size=1) return [np.argmax(probs)] def index_to_vector(index): for k, data in enumerate(all_data): if data[2] == index: return embeddings[k] return None explainer = ProtodashExplainer() num_prototypes = 5 print(train_data.shape) vector = index_to_vector(idx) vector = vector.reshape((1, 512)) (weights, proto_ind, _) = explainer.explain(vector, train_data, m=num_prototypes) weights = np.around(weights / np.sum(weights), 2) print() print("example: ", stanzas[idx]) print("prototypes with weights:") print() print() for i in range(num_prototypes): j = proto_ind[i] print(weights[i], stanzas[indices[j]]) all_indices = [idx] for i in range(num_prototypes): j = proto_ind[i] stanza_ind = indices[j] all_indices.append(stanza_ind) for l in all_indices: print() print(stanzas[l]) print("Predicted Label: ", predict_label(stanzas[l])) print("True Label: ", np.argmax(one_hot_labels[l]))
# building graphix plt.figure() plt.subplots() # plot figure 1 #ax1=plt.subplot(221) plt.xlabel(r'Stress, $S$ (GPa)', fontsize=labelFontSizeX) plt.ylabel('Probability density', fontsize=labelFontSizeY) #plt.title('Histogram of Stress') plt.grid(True) n, bins, patches = plt.hist(data.x, num.bins, facecolor='green', normed=True, alpha=0.5) data.linspace = np.linspace(bins[0], bins[num.bins], num.weib) plt.plot(data.x_hist, weib.pdf(data.x_hist, coeff.shape, coeff.scale), 'r--') ticks = plt.get_yticks()/sum(n) newTicks = ['%.2f' % a for a in ticks] plt.set_yticklabels(newTicks) plt.savefig('xxx6_v2_%s_1.png' % (data.numfile), dpi=300, transparent=True) # plot figure 2 plt.figure() #ax2=plt.subplot(222) plt.xlabel(r'Stress, $S$ (GPa)', fontsize=labelFontSizeX) plt.ylabel('Cumulative distribution', fontsize=labelFontSizeY) #plt.title('Histogram of Stress') plt.grid(True) plt.ylim(0., 1.) plt.plot(data.x_hist, data.y_hist, 'go', alpha=0.5) plt.plot(data.linspace, weib.cdf(data.linspace, coeff.shape, coeff.scale), 'r--')