Exemplo n.º 1
0
def plot_horizontal_bar_from_cm(confusion_matrix = None, classes = []):
    # plt.rcdefaults()
    # plt.subplots(figsize = (10, 30))
    width = 0.30

    # Example data
    y_lables = 12 * ['a', 'b', 'c', 'd']
    y_pos = list(range(len(y_lables)))
    print(y_pos)
    true_positives = 12 * [8.84036186, 12.94095337, 11.19919226, 10.64395389]
    false_negatives = 12 * [1, 1, 1, 1]
    false_positives = 12 * [2, 13, 13, 3]

    TP = plt.barh(y_pos, true_positives, width, color = 'green', label = 'TP')
    FN = plt.barh(y_pos, false_negatives, width, label = 'FN', left = TP)
    plt.barh(y_pos, false_positives, width, label = 'FP', left = FN)
    # ax.barh([p + width for p in y_pos], false_negatives, width, label = 'FN')
    # ax.barh([p + width * 2 for p in y_pos], false_positives, width, label = 'FP')
    plt.set_yticks([p + 1.5 * width for p in y_pos])
    plt.set_yticklabels(y_lables)
    plt.invert_yaxis()  # labels read top-to-bottom
    plt.set_xlabel('Performance')
    plt.set_title('How fast do you want to go today?')

    plt.legend(['TP', 'FN', 'FP'], loc = 'upper right')
    plt.show()
Exemplo n.º 2
0
def fix_hplot(df, statistic, xlabel, ylabel, hue, fontsize):
    data = df.query('statistic == "{}"'.format(statistic))
    pastel = ["#92C6FF", "#97F0AA", "#FF9F9A",
            "#D0BBFF", "#FFFEA3", "#B0E0E6"]
    pal = dict(Validation=pastel[0], Test=pastel[2])
    plt = sns.barplot(x='data', y='label', data=data, hue=None, errwidth=1.0, capsize=0.15)

    [ticklabel.set_fontsize(fontsize) for ticklabel in (plt.get_yticklabels())]
    [ticklabel.set_fontsize(fontsize) for ticklabel in (plt.get_xticklabels())]
    plt.set_yticklabels([ticklabel._text.capitalize() for ticklabel in (plt.get_yticklabels())], ha='left')

    plt.get_yaxis().get_label().set_fontsize(fontsize)
    plt.get_xaxis().get_label().set_fontsize(fontsize)
    plt.get_yaxis().set_tick_params(pad=fontsize*10-10)
    plt.set_ylabel(ylabel)
    plt.set_xlabel(xlabel)
    plt.set_xlim(0, 1.05)

    return plt
Exemplo n.º 3
0
           color='red',marker='o',linestyle='None',markersize=3)
    plt.loglog(slams,sflam_star,\
           color='white',marker='o',linestyle='None',markersize=3)
#           label='IRAS F16544$-$1604')
#plt.figtext(0.4,0.83,'WISE')
#print plt.gca()
    axes=fig1.gca()
    for label in axes.get_xticklabels() + axes.get_yticklabels():
        label.set_fontsize('x-small')

    if (i < nx):
        plt.set_xlabel(r'$\lambda$ ($\mu$m)',size='x-small')
    if (np.mod(i,nx)==0):
        plt.set_ylabel(r'$\lambda F_\lambda$ (erg/s/cm$^{2}$)',size='x-small')
    else:
        plt.set_yticklabels('',visible='False',size='x-small')
    plt.axis([0.4,150,1e-14,1e-6])
#    print 'i = ', i
    plt.text(0.7,5e-8,'M%d'%(i+1),size='small')
    print "%12s &%15s &\$%4.1f\pm%4.1f\$ &\$%4.1f\pm%4.1f\$ &\$%4.1f\pm%4.1f\$ &\$%4.1f\pm%4.1f\$ &\$%4.1f\pm%4.1f\$ &\$%4.2f\pm%4.2f\$ &\$%4.2f\pm%4.2f\$ &\$%4.2f\pm%4.2f\$ &\$%4.2f\pm%4.2f\$(%s%s) &\$%4.2f\pm%4.2f\$(%s%s)\\\\"%\
    (scat['2MASS_name'][sstar],scat['object_type'][sstar],\
    scat['J_flux_c'][sstar],scat['J_D_flux_c'][sstar],\
    scat['H_flux_c'][sstar],scat['H_D_flux_c'][sstar],\
    scat['Ks_flux_c'][sstar],scat['Ks_D_flux_c'][sstar],\
    w1f[wstar],w1df[wstar],\
    w2f[wstar],w2df[wstar],\
    w3f[wstar],w3df[wstar],\
    w4f[wstar],w4df[wstar],\
    scat['MP1_flux_c'][sstar],scat['MP1_D_flux_c'][sstar],\
    scat['MP2_flux_c'][sstar],scat['MP2_D_flux_c'][sstar],\
    scat['MP2_Q_det_c'][sstar],scat['MP2_imtype'][sstar],\
Exemplo n.º 4
0
#plt.bar(np.arange(69),weights[0,:],label = features)
#

order = np.argsort(np.abs(weights[:]))[::-1]

plt.bar(np.arange(20),weights[order[:20]])
plt.xticks(np.arange(20),  features[order[:20]],fontsize =12)
plt.xticks(rotation="vertical")
plt.ylabel("Feature weights")
plt.savefig("/neurospin/brainomics/2016_schizConnect/analysis/all_studies+VIP/Freesurfer/all_subjects/results/ROIs_analysis/weights/svm_weights_top20.png")



plt.barh(np.arange(20),weights[order[:20]])
plt.set_yticks(np.arange(20))
plt.set_yticklabels(features[order[:20]])


plt.xticks(np.arange(20),  features[order[:20]],fontsize =12)
plt.xticks(rotation="vertical")
plt.ylabel("Feature weights")


plt.rc('font', family='serif')
plt.figure
plt.grid()
fig, ax = plt.subplots()
# Example data
features_names = features[order[:20]]
y_pos = np.arange(len(features_names))
performance = weights[order[:20]]
Exemplo n.º 5
0
Arquivo: temp.py Projeto: shmakn99/GCN
import pickle 
import matplotlib.pyplot as plt
import numpy as np



# Example data
people = ('TP', 'FP', 'FN', 'TN')
y_pos = np.arange(len(people))
performance = (64354, 2701, 17007, 2417826)


plt.barh(y_pos, performance, align='center',
        color='green', ecolor='black')
plt.set_yticks(y_pos)
plt.set_yticklabels(people)
plt.invert_yaxis()  # labels read top-to-bottom
plt.set_xlabel('Performance')
plt.set_title('Precission - 0.959 Recal - 0.790')

plt.show()
Exemplo n.º 6
0
fig1.clf()

i = 0
j = 0

for i in range(len(files)):
    plt = fig1.add_subplot(ny,nx,j+1)
    histogram = hist(files[i])
    bin_edges=histogram[0]
    x = histogram[1]
    C = colour

    plt.scatter(bin_edges[:-1],x,c=u'r')

    axes=fig1.gca()
    
    plt.set_xticklabels('',visible='False',size='small')
    plt.set_yticklabels('',visible='True',size='small')
    
    if (j==4):
        plt.set_xlabel(r'Temperature (K)',size='small')
        plt.set_xticklabels('',visible='True',size='small')
        
    plt.axis([5,60,0,1.1E-3])
    
    plt.set_ylabel('Normalised Number of pixels',size='small')

    j = j + 1

fig1.show()
Exemplo n.º 7
0
def main(TRAIN=False,
         TUNING=False,
         ANCHOR=False,
         LIME=True,
         STATISTICS=False,
         PROTODASH=False):
    # read poems using simplereader
    poems_english = readPoems('tsv/english.tsv')
    poems_german = readPoems('tsv/emotion.german.tsv')
    poems_chinese = readPoems('tsv/chinese.tsv')
    print(len(poems_english))
    print(len(poems_german))
    print(len(poems_chinese))
    # set up label dictionary
    label_dict = {
        'Sadness': 0,
        'Humor': 1,
        'Suspense': 2,
        'Nostalgia': 3,
        'Uneasiness': 4,
        'Annoyance': 5,
        'Awe / Sublime': 6,
        'Awe/Sublime': 6,
        'Vitality': 7,
        'Beauty / Joy': 8,
        'Beauty/Joy': 8
    }

    # array of stanzas
    stanzas = []

    # array of most prominent label for each stanza
    labels = []

    # list of languages
    lang = []

    # extract sentences with one label
    for poem in itertools.chain(poems_english, poems_german, poems_chinese):
        for stanza in poem[1:]:
            if poem in poems_english:
                lang.append(0)
            elif poem in poems_german:
                lang.append(1)
            else:
                lang.append(2)
            labelsPerStanza = []
            currentStanzaIndex = len(stanzas)
            newStanza = 1
            for line in stanza:
                if newStanza:
                    stanzas.append(line[0])
                    newStanza = 0
                else:
                    stanzas[currentStanzaIndex] += " " + line[0]
                labelsPerStanza.extend(line[1].split(" --- "))
                if len(line) > 2:
                    labelsPerStanza.extend(line[2].split(" --- "))
            counter = [0, 0, 0, 0, 0, 0, 0, 0, 0]
            for label in labelsPerStanza:
                counter[label_dict[label]] += 1
            labels.append(np.argmax(counter))

    # plot dataset statistics
    if STATISTICS is True:
        df = pd.DataFrame({
            "stanzas": stanzas,
            "labels": labels,
            "languages": lang
        })

        bar_labels = [lab.replace(" ", "") for lab in label_dict.keys()]
        ger_values = df.loc[df["languages"] == 1, "labels"].value_counts()
        en_values = df.loc[df["languages"] == 0, "labels"].value_counts()
        ch_values = df.loc[df["languages"] == 2, "labels"].value_counts()
        print(type(df.loc[df["languages"] == 1, "labels"].value_counts()))
        ger_values[3] = 0
        ger_values.sort_index(inplace=True)
        en_values.sort_index(inplace=True)
        ch_values.sort_index(inplace=True)

        width = 0.5

        fig, ax = plt.subplots()
        plt.grid(zorder=0, alpha=0.7)
        ax.bar(bar_labels, ger_values, width, label='German')
        ax.bar(bar_labels,
               en_values,
               width,
               bottom=ger_values,
               label='English')
        ax.bar(bar_labels,
               ch_values,
               width,
               bottom=en_values + ger_values,
               label='Chinese')

        ax.set_ylabel('Number of stanzas', fontsize=18)
        ax.legend(prop={'size': 18})
        ax.tick_params(axis='both', which='major', labelsize=18)
        plt.xticks(rotation=16)

        plt.show()

    # transform labels into one hot encodings
    one_hot_labels = to_categorical(labels)

    # analyze distribution of labels in dataset
    df = pd.DataFrame({"labels": labels})
    print(df['labels'].value_counts())

    # use pretrained multilingual model to encode sentences
    model = SentenceTransformer('distiluse-base-multilingual-cased-v1')
    embeddings = model.encode(stanzas)

    # shuffle data and split into train and test set
    all_data = [(embeddings[i], one_hot_labels[i], i)
                for i in range(len(embeddings))]
    unshuffled_data = all_data
    random.shuffle(all_data)
    embeddings = [emb for emb, _, _ in all_data]
    labels = [lab for _, lab, _ in all_data]
    indices = [idx for _, _, idx in all_data]

    train_data = np.array(embeddings[:int(0.75 * len(embeddings))])
    train_labels = np.array(labels[:int(0.75 * len(embeddings))])
    dev_data = np.array(
        embeddings[int(0.75 * len(embeddings)):int(0.875 * len(embeddings))])
    dev_labels = np.array(
        labels[int(0.75 * len(embeddings)):int(0.875 * len(embeddings))])
    test_data = np.array(embeddings[int(0.875 * len(embeddings)):])
    test_labels = np.array(labels[int(0.875 * len(embeddings)):])

    # Hyperparameter Tuning
    if TUNING is True:
        learning_rates = [0.001, 0.01, 0.1]
        epochs = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
        middle_nodes = [20, 50, 100, 150, 200]
        losses = []
        accuracies = []
        max_loss = 100000
        min_acc = 0
        max_config = None
        for lr in learning_rates:
            for epoch in epochs:
                for middle_node in middle_nodes:
                    print("Training with following hyperparameters:", lr,
                          epoch, middle_node)
                    adam = Adam(learning_rate=lr)
                    mdl = Sequential()
                    mdl.add(
                        Dense(middle_node,
                              input_dim=512,
                              kernel_initializer="uniform",
                              activation="relu"))
                    mdl.add(
                        Dense(9,
                              activation="softmax",
                              kernel_initializer="uniform"))
                    mdl.compile(loss="categorical_crossentropy",
                                optimizer=adam,
                                metrics=["categorical_accuracy"])

                    mdl.fit(train_data, train_labels, epochs=epoch, verbose=1)
                    print("evaluating on dev set...")
                    (loss, accuracy) = mdl.evaluate(dev_data,
                                                    dev_labels,
                                                    verbose=1)
                    print("loss: {:.4f}, accuracy: {:.4f}%".format(
                        loss, accuracy * 100))
                    losses.append(loss)
                    accuracies.append(accuracy)
                    if accuracy > min_acc:
                        min_acc = accuracy
                        max_config = (lr, epoch, middle_node)
        print(max_config)

    max_config = (0.01, 7, 150)
    mdl = Sequential()
    if TRAIN is True:
        # use final model
        adam = Adam(learning_rate=max_config[0])
        mdl = Sequential()
        mdl.add(
            Dense(max_config[2],
                  input_dim=512,
                  kernel_initializer="uniform",
                  activation="relu"))
        mdl.add(Dense(9, activation="softmax", kernel_initializer="uniform"))
        mdl.compile(loss="categorical_crossentropy",
                    optimizer=adam,
                    metrics=["categorical_accuracy"])

        mdl.fit(train_data, train_labels, epochs=max_config[1], verbose=1)
        print("evaluating on test set...")
        (loss, accuracy) = mdl.evaluate(test_data, test_labels, verbose=1)
        print("loss={:.4f}, accuracy: {:.4f}%".format(loss, accuracy * 100))
        #print("precision={:.4f}%".format(precision * 100))
        #print("recall={:.4f}%".format(recall * 100))
        # mdl.save('emotion_classifier')

    #mdl = keras.models.load_model('emotion_classifier')
    (loss, accuracy) = mdl.evaluate(test_data, test_labels, verbose=1)

    y_pred = mdl.predict(test_data, batch_size=test_data.shape[0])

    wrong_classified_idx = []

    for j, idx in enumerate(indices[int(0.875 * len(embeddings)):]):
        if np.argmax(y_pred[j]) != np.where(test_labels[j] == 1.0)[0]:
            wrong_classified_idx.append(idx)

    print("These stanzas were wronlgy classified:")
    print(wrong_classified_idx)

    wrong_classified_en = [idx for idx in wrong_classified_idx if idx < 167]
    wrong_classified_ger = [
        idx for idx in wrong_classified_idx if (idx >= 167 and idx < 688)
    ]
    wrong_classified_ch = [idx for idx in wrong_classified_idx if idx >= 688]

    total_en = [
        idx for idx in indices[int(0.875 * len(embeddings)):] if idx < 167
    ]
    total_ger = [
        idx for idx in indices[int(0.875 * len(embeddings)):]
        if (idx >= 167 and idx < 688)
    ]
    total_ch = [
        idx for idx in indices[int(0.875 * len(embeddings)):] if idx >= 688
    ]

    print("Number of wrongly classified stanzas - English: ",
          len(wrong_classified_en))
    print("Number of wrongly classified stanzas - German: ",
          len(wrong_classified_ger))
    print("Number of wrongly classified stanzas - Chinese: ",
          len(wrong_classified_ch))

    print("Total - English: ", len(total_en))
    print("Total - German: ", len(total_ger))
    print("Total - Chinese: ", len(total_ch))

    class_names = [
        'Sadness', 'Humor', 'Suspense', 'Nostalgia', 'Uneasiness', 'Annoyance',
        'Awe / Sublime', 'Vitality', 'Beauty / Joy'
    ]

    examples = [592, 9, 5]

    # ------------------------------------------------------------LIME--------------------------------------------------------------------------------------------
    # apply LIME to obtain explanations for a specific instance

    def pipeline(stanza, mdl=mdl, model=model):
        embedded = model.encode(stanza)
        return mdl.predict(embedded, batch_size=embedded.shape[0])

    if LIME is True:
        # apply LIME to 10 uncorreclty classified stanzas

        for idx in examples:
            print("True Label: ", one_hot_labels[idx])
            emb = np.array(model.encode(stanzas[idx]))
            emb = emb.reshape((512, 1))
            emb = emb.T
            print("Predicted Probabilities: ", mdl.predict(emb, batch_size=1))

            explainer = LimeTextExplainer(class_names=class_names)
            exp = explainer.explain_instance(stanzas[idx],
                                             pipeline,
                                             num_features=6,
                                             top_labels=2)
            top_labs = exp.available_labels()

            print("Explanation for class {}".format(top_labs[0]))
            print('\n'.join(map(str, exp.as_list(label=top_labs[0]))))

            print("Explanation for class {}".format(top_labs[1]))
            print('\n'.join(map(str, exp.as_list(label=top_labs[1]))))

            fig = exp.as_pyplot_figure(top_labs[0])
            plt.show()
            fig_2 = exp.as_pyplot_figure(top_labs[1])
            plt.show()
        # apply LIME to different correctly classified stanzas
        idx = 5
        print("True Label: ", one_hot_labels[idx])
        emb = np.array(model.encode(stanzas[idx]))
        emb = emb.reshape((512, 1))
        emb = emb.T
        print("Predicted Probabilities: ", mdl.predict(emb, batch_size=1))
        print(mdl.predict(emb, batch_size=1).sum())

        explainer = LimeTextExplainer(class_names=class_names)
        exp = explainer.explain_instance(stanzas[idx],
                                         pipeline,
                                         num_features=6,
                                         top_labels=2)
        pickle.dump(exp, open("explanation.pkl", "wb"))
        top_labs = exp.available_labels()

        print("Explanation for class {}".format(top_labs[0]))
        print('\n'.join(map(str, exp.as_list(label=top_labs[0]))))

        print("Explanation for class {}".format(top_labs[1]))
        print('\n'.join(map(str, exp.as_list(label=top_labs[1]))))

        fig = exp.as_pyplot_figure(top_labs[0])
        plt.legend(prop={'size': 600})
        plt.tick_params(axis='both', which='major', labelsize=600)
        plt.set_yticklabels(x, fontsize=600)
        plt.show()
        fig_2 = exp.as_pyplot_figure(top_labs[1])
        plt.legend(prop={'size': 20})
        plt.tick_params(axis='both', which='major', labelsize=20)
        plt.show()

    # ----------------------------------------------------------ANCHOR---------------------------------------------------------------------------------------------
    def predict_label(stanza):
        embedded = model.encode(stanza)
        probs = mdl.predict(embedded, batch_size=embedded.shape[0])
        return [np.argmax(probs[0])]

    def predict_second_label(stanza, predicted_label):
        embedded = model.encode(stanza)
        probs = mdl.predict(embedded, batch_size=embedded.shape[0])
        probs[0][np.argmax(probs[0])] = 0
        return [np.argmax(probs)]

    if ANCHOR is True:
        ids = np.zeros(3)
        print()
        # for i in examples:
        #     lowest = 500
        #     lowest_id = 500
        #     for j in range(len(stanzas)):
        #         if len(stanzas[j]) < lowest:
        #             if j not in ids and len(stanzas[j]) > 85 and j < 174:
        #                 lowest = len(stanzas[j])
        #                 lowest_id = j
        #     ids[i] = lowest_id
        #     print("Ausgewähltes Stanza: ", stanzas[lowest_id])
        #     print("Länge: ", len(stanzas[lowest_id]), "   id: ", lowest_id)
        #     print()

        nlp = spacy.load('en_core_web_lg')
        explainer = anchor_text.AnchorText(nlp,
                                           class_names,
                                           use_unk_distribution=True)
        print("GPU's: ", get_available_gpus())

        for idx in examples:
            print()
            print("------------STANZA-", idx, "------------")
            print()
            text = stanzas[idx]
            print(predict_label([text]))
            pred = explainer.class_names[predict_label([text])[0]]
            alternative = explainer.class_names[predict_second_label(
                [text],
                predict_label([text])[0])[0]]
            print('Prediction: %s' % pred)
            print("Stanza: ", stanzas[idx], "   True Label: ", labels[idx])
            exp = explainer.explain_instance(text,
                                             predict_label,
                                             threshold=0.95)

            print('Anchor: %s' % (' AND '.join(exp.names())))
            print('Precision: %.2f' % exp.precision())
            print()
            print('Examples where anchor applies and model predicts %s:' %
                  pred)
            print()
            print('\n'.join(
                [x[0] for x in exp.examples(only_same_prediction=True)]))
            print()
            print('Examples where anchor applies and model predicts %s:' %
                  alternative)
            print()
            print('\n'.join([
                x[0] for x in exp.examples(partial_index=0,
                                           only_different_prediction=True)
            ]))

    # ----------------------------------------------------------PROTODASH------------------------------------------------------------------------------------------

    if PROTODASH is True:

        for idx in examples:

            from aix360.algorithms.protodash import ProtodashExplainer

            def predict_label(stanza):
                embedded = model.encode(stanza)
                embedded = embedded.reshape((512, 1))
                embedded = embedded.T
                probs = mdl.predict(embedded, batch_size=1)
                return [np.argmax(probs)]

            def index_to_vector(index):
                for k, data in enumerate(all_data):
                    if data[2] == index:
                        return embeddings[k]
                return None

            explainer = ProtodashExplainer()

            num_prototypes = 5

            print(train_data.shape)

            vector = index_to_vector(idx)
            vector = vector.reshape((1, 512))

            (weights, proto_ind, _) = explainer.explain(vector,
                                                        train_data,
                                                        m=num_prototypes)

            weights = np.around(weights / np.sum(weights), 2)

            print()
            print("example: ", stanzas[idx])
            print("prototypes with weights:")
            print()
            print()
            for i in range(num_prototypes):
                j = proto_ind[i]
                print(weights[i], stanzas[indices[j]])

            all_indices = [idx]
            for i in range(num_prototypes):
                j = proto_ind[i]
                stanza_ind = indices[j]
                all_indices.append(stanza_ind)

            for l in all_indices:
                print()
                print(stanzas[l])
                print("Predicted Label: ", predict_label(stanzas[l]))
                print("True Label: ", np.argmax(one_hot_labels[l]))
# building graphix
plt.figure()
plt.subplots()
# plot figure 1
#ax1=plt.subplot(221)
plt.xlabel(r'Stress, $S$ (GPa)', fontsize=labelFontSizeX)
plt.ylabel('Probability density', fontsize=labelFontSizeY)
#plt.title('Histogram of Stress')
plt.grid(True)

n, bins, patches = plt.hist(data.x, num.bins, facecolor='green', normed=True, alpha=0.5)
data.linspace = np.linspace(bins[0], bins[num.bins], num.weib)
plt.plot(data.x_hist, weib.pdf(data.x_hist, coeff.shape, coeff.scale), 'r--')
ticks = plt.get_yticks()/sum(n)
newTicks = ['%.2f' % a for a in ticks]
plt.set_yticklabels(newTicks)

plt.savefig('xxx6_v2_%s_1.png' % (data.numfile), dpi=300, transparent=True)

# plot figure 2
plt.figure()
#ax2=plt.subplot(222)
plt.xlabel(r'Stress, $S$ (GPa)', fontsize=labelFontSizeX)
plt.ylabel('Cumulative distribution', fontsize=labelFontSizeY)
#plt.title('Histogram of Stress')
plt.grid(True)
plt.ylim(0., 1.)

plt.plot(data.x_hist, data.y_hist, 'go', alpha=0.5)
plt.plot(data.linspace, weib.cdf(data.linspace, coeff.shape, coeff.scale), 'r--')