Exemplo n.º 1
0
def plot_convergence_iterations(stoch, batch, models, nb_documents,
                                batch_size):
    nb_mini_batches = len(gen_batches(nb_documents, batch_size))
    mini_batch_ll = [iter for epoch in stoch for iter in epoch[:-1]]
    epoch_ll = [epoch[-1:][0] for epoch in stoch]
    mini_batch_iter = [
        (idx + 1) + np.arange(5, nb_mini_batches, 5) / nb_mini_batches
        for idx, epoch in enumerate(stoch)
    ]
    flat_mini_batch_iter = [
        iter for epoch in mini_batch_iter for iter in epoch
    ]
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.xaxis.set_major_locator(MaxNLocator(integer=True))
    plt.plot(flat_mini_batch_iter, mini_batch_ll, lw=1, marker='+')
    plt.plot(np.arange(1,
                       len(batch) + 1) + 1,
             epoch_ll,
             marker='o',
             linestyle='None',
             markersize=4)
    plt.plot(np.arange(1,
                       len(batch) + 1) + 1,
             batch,
             lw=1,
             marker='*',
             markerfacecolor='None')
    plt.legend(models)
    plt.xlabel("number of passes")
    plt.ylabel("heldout log-likelihood per word")
    plt.show()
Exemplo n.º 2
0
def main():
    try:
        #read input file with words
        fileName = input("Enter filename with words: ")
        inputFile = open(fileName, "r", encoding='ascii')
        words = inputFile.read().strip().split()

        #read file with excluded words
        excludeFileName = input("Enter exclude filename with words: ")
        excludeFile = open(excludeFileName, "r")
        excludeWords = excludeFile.read().strip().split()

        #build hashmap with input word list
        wordsMap = buildHashmap(words)

        #delete excluded words from hashmap
        for key in excludeWords:
            delete(wordsMap, key.lower())

        #get top 10 list
        top10 = sorted(wordsMap.items(),
                       key=operator.itemgetter(1),
                       reverse=True)[:10]

        #prepare output table with Word and Count column
        df = pd.DataFrame(top10, columns=['Word', 'Count'])

        #find total count which is required to calculate %
        total = 0
        c3 = []
        for key in wordsMap:
            total = total + wordsMap[key]

        #create % column
        c3 = get_column(top10, 1, total)

        #add % column to existing table
        df['Count %'] = c3

        #print final result as a table
        print(df)

        #draw bar chart
        ax = plt.figure().gca()  #to show wholenumbers on the y-axis
        ax.yaxis.set_major_locator(MaxNLocator(integer=True))
        plt.bar(range(len(top10)), [val[1] for val in top10],
                align='center',
                color='gray')
        plt.xticks(range(len(top10)), [val[0] for val in top10])
        plt.xticks(rotation=90)
        plt.xlabel('Words')
        plt.ylabel('Frequency')
        plt.title('Top 10 Words')

        plt.show()

    finally:
        inputFile.close()
        excludeFile.close()
Exemplo n.º 3
0
def plot_corr(draw, vars=(0, 1)):
    from pylab import axes, setp, MaxNLocator

    _, _ = vars  # Make sure vars is length 2
    labels = [draw.labels[v] for v in vars]
    values = [draw.points[:, v] for v in vars]

    # Form kernel density estimates of the parameters
    xmin, xmax = min(values[0]), max(values[0])
    density_x = KDE1D(values[0])
    x = linspace(xmin, xmax, 100)
    px = density_x(x)

    density_y = KDE1D(values[1])
    ymin, ymax = min(values[1]), max(values[1])
    y = linspace(ymin, ymax, 100)
    py = density_y(y)

    nbins = 50
    ax_data = axes([0.1, 0.1, 0.63, 0.63])  # x,y,w,h

    #density_xy = KDE2D(values[vars])
    #dxy = density_xy(x,y)*points.shape[0]
    #ax_data.pcolorfast(x,y,dxy,cmap=cm.gist_earth_r) #@UndefinedVariable

    ax_data.plot(values[0], values[1], 'k.', markersize=1)
    ax_data.set_xlabel(labels[0])
    ax_data.set_ylabel(labels[1])
    ax_hist_x = axes([0.1, 0.75, 0.63, 0.2], sharex=ax_data)
    ax_hist_x.hist(values[0], nbins, orientation='vertical', normed=1)
    ax_hist_x.plot(x, px, 'k-')
    ax_hist_x.yaxis.set_major_locator(MaxNLocator(4, prune="both"))
    setp(
        ax_hist_x.get_xticklabels(),
        visible=False,
    )
    ax_hist_y = axes([0.75, 0.1, 0.2, 0.63], sharey=ax_data)
    ax_hist_y.hist(values[1], nbins, orientation='horizontal', normed=1)
    ax_hist_y.plot(py, y, 'k-')
    ax_hist_y.xaxis.set_major_locator(MaxNLocator(4, prune="both"))
    setp(ax_hist_y.get_yticklabels(), visible=False)
Exemplo n.º 4
0
def plot_convergence_epochs(nb_iterations, inspectors, models):
    markers = ['o', 's']
    iterations = np.arange(nb_iterations) + 1
    fig = plt.figure()
    for insp, marker in zip(inspectors, markers):
        ax = fig.add_subplot(111)
        ax.xaxis.set_major_locator(MaxNLocator(integer=True))
        ax.plot(iterations, insp, lw=1, marker=marker, markerfacecolor='None')
        plt.xlabel("number of passes")
        plt.ylabel("heldout log-likelihood per word")
    plt.legend(models)
    plt.show()
Exemplo n.º 5
0
def create_end_graphs(acc, val_acc, loss, val_loss):
    plt.figure(figsize=(10, 4))

    sp = plt.subplot(1, 2, 1)
    # noinspection PyUnresolvedReferences
    sp.yaxis.set_major_formatter(mlp.ticker.StrMethodFormatter('{x}%'))
    sp.xaxis.set_major_locator(MaxNLocator(integer=True))
    plt.title("Accuracy")
    plt.xlabel("Epoch")
    plt.plot(acc, 'b-', label='training')
    plt.plot(val_acc, 'g-', label='test')
    plt.legend(loc='lower right')

    sp = plt.subplot(1, 2, 2)
    plt.title("Loss")
    plt.xlabel("Epoch")
    sp.xaxis.set_major_locator(MaxNLocator(integer=True))
    plt.plot(loss, 'b-', label='training')
    plt.plot(val_loss, 'g-', label='test')
    plt.legend(loc='upper right')

    plt.show()
Exemplo n.º 6
0
def create_random_bar_pot():
    # parameters for random plot data
    N = randint(2, 2)
    x0 = randint(1950, 1995)
    y_mu = gauss(6, 3)
    y_sd = expovariate(0.7)

    # parameterd for random plot style
    plotwidth = 4 * 0.5
    plotheight = 3 * 0.5
    barwidth_sd = plotwidth*2/N + gauss(0, 0.05)

    # random data
    X = np.linspace(start=x0, stop=x0+N-1, num=N).astype(int)
    Y = [gauss(y_mu, y_sd) for x in range(N)]

    fig = plt.figure(figsize=(plotwidth, plotheight), facecolor=bgcolor)
    ax = plt.subplot(1,1,1, facecolor=bgcolor)

    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_visible(False)

    ax.xaxis.set_major_locator(MaxNLocator(integer=True))
    ax.yaxis.set_major_locator(MaxNLocator(integer=True))

    ax.tick_params(colors=labelcolor)
    ax.yaxis.tick_right()
    ax.yaxis.set_label_position("right")
    ax.yaxis.set_ticks_position('none')

    ax.set_axisbelow(True)
    ax.yaxis.grid(color=linecolor, linestyle=':', linewidth=1)

    ax.bar(X, Y, width=0.5, facecolor=fill, edgecolor=fill)

    return fig
Exemplo n.º 7
0
    def plot(self, plot_num, data, xlabel, ylabel, txt_position=None):
        """Create histogram for statistic."""

        self.axis = self.fig.add_subplot(self.rows, self.cols, plot_num)

        weights = np_ones_like(data) / float(len(data))
        counts, bins, patches = self.axis.hist(data,
                                               bins=min(
                                                   20,
                                                   len(set(data)) - 1),
                                               rwidth=0.9,
                                               weights=weights,
                                               color='#fdae6b',
                                               align='mid')

        self.axis.set_xlabel(xlabel)
        self.axis.set_ylabel(ylabel)

        self.axis.xaxis.set_major_locator(MaxNLocator(integer=True))
        self.axis.yaxis.set_major_formatter(
            FuncFormatter(lambda y, _: '{:.1%}'.format(y)))

        # report summary statistics
        stat_txt = f'median = {np_median(data):.1f}\n'
        stat_txt += f'mean = {np_mean(data):.1f}\n'
        stat_txt += f'std = {np_std(data):.1f}'
        if txt_position == 'left':
            self.axis.text(0.05,
                           0.95,
                           stat_txt,
                           transform=self.axis.transAxes,
                           fontsize=self.options.tick_font_size,
                           verticalalignment='top')
        elif txt_position == 'right':
            self.axis.text(0.95,
                           0.95,
                           stat_txt,
                           transform=self.axis.transAxes,
                           fontsize=self.options.tick_font_size,
                           verticalalignment='top',
                           horizontalalignment='right')

        self.prettify(self.axis)
        for loc, spine in self.axis.spines.items():
            if loc in ['right', 'top']:
                spine.set_color('none')

        self.fig.tight_layout(pad=0.1, w_pad=1.0, h_pad=1.0)
        self.draw()
Exemplo n.º 8
0
              (len(all_nodes), t))
        infection_times_per_k.append(t)
        net_increases_per_k.append(net_increase_per_round)
        if t > longest_process_len:
            longest_process_len = t

    k_avgInfecTime_map[k] = round(
        (sum(infection_times_per_k) / len(infection_times_per_k)))
    k_netIncrease_map[k] = net_increases_per_k

    k += 1

fig1 = plt.figure(fig_idx, figsize=(9, 7.2))
plt.plot(k_avgInfecTime_map.keys(),
         k_avgInfecTime_map.values(),
         'b--',
         label='average infection time')
#plt.plot(k_avgInfecTime_map.keys(), [math.log2(n)**2/ math.log2(i)**2 for i in k_avgInfecTime_map.keys()], 'r-', label='$\log^2\ n\ /\ \log^2\ k$')
plt.yscale('log')
plt.yticks(y_ticks, y_ticks_labels)
xa = plt.gca().get_xaxis()
xa.set_major_locator(MaxNLocator(integer=True))
plt.xlabel('Branching factor (k)')
plt.ylabel('Average infection time (in rounds)\nlogarithmic scale')
plt.title('k-BIPS on hypercube graph with degree %d and %d nodes' % (d, n))
plt.grid(True)
#plt.legend(loc='best')
fig1.savefig('bips_hypercube_fixed-d_n' + str(len(all_nodes)) + '.png',
             bbox_inches='tight')
plt.close(fig1)
Exemplo n.º 9
0
def main(pdf, db, grp, s_data):
    """
    Generate page of the group report pdf that contains:
    - bar plots of the number of acquired volumes for each subject
    - violin plots for outlier distributions
    - violin plots for absolute and relative motion
    - violin plots for CNR and SNR
    
    Arguments:
        - pdf: qc pdf file
        - db: dictionary database
        - grp: optional grouping variable
        - s_data: single subject dictionary to update pdf
    """

    #================================================
    # Prepare figure
    #================================================
    plt.figure(figsize=(8.27, 11.69))  # Standard portrait A4 sizes
    plt.suptitle("SQUAD: Group report", fontsize=10, fontweight='bold')

    # Groups and acquired volumes
    if grp is not False:
        ax1_00 = plt.subplot2grid((3, 4), (0, 0), colspan=1)
        g = seaborn.distplot(
            grp[grp.dtype.names[0]][1:],
            vertical=True,
            bins=np.arange(-1.5 + round(min(grp[grp.dtype.names[0]][1:])),
                           1.5 + round(max(grp[grp.dtype.names[0]][1:]))),
            norm_hist=False,
            kde=False,
            ax=ax1_00)
        ax1_00.set_ylabel(grp.dtype.names[0])
        ax1_00.set_xlabel("N")
        # ax1_00.set_xlim([-1+round(min(grp[grp.dtype.names[0]][1:])),1+round(max(grp[grp.dtype.names[0]][1:]))])
        # ax1_00.set_xticks(np.unique(np.round(grp[grp.dtype.names[0]])))
        ax1_00.xaxis.set_major_locator(MaxNLocator(integer=True))
        ax1_00.set_xticks([0, np.max(ax1_00.get_xticks())])

        ax1_01 = plt.subplot2grid((3, 4), (0, 1), colspan=3)
    else:
        ax1_01 = plt.subplot2grid((3, 4), (0, 0), colspan=4)
    seaborn.barplot(x=np.arange(1, 1 + db['data_no_subjects']),
                    y=np.sum(db['data_protocol'], axis=1),
                    color='blue',
                    ax=ax1_01)
    n_vols, counts = np.unique(np.sum(db['data_protocol'], axis=1),
                               return_counts=True)
    n_vols_mode = n_vols[np.argmax(counts)]
    n_vols_ol = 1 + np.where(
        np.sum(db['data_protocol'], axis=1) != n_vols_mode)[0]
    ax1_01.set_xticks(n_vols_ol)
    ax1_01.set_xticklabels(n_vols_ol)
    ax1_01.tick_params(labelsize=6)
    plt.setp(ax1_01.get_xticklabels(), rotation=90)
    ax1_01.set_ylim(bottom=0)
    ax1_01.set_xlabel("Subject")
    ax1_01.set_ylabel("No. acquired volumes")

    # MOTION
    # Absolute
    ax2_00 = plt.subplot2grid((3, 4), (1, 0), colspan=1)
    seaborn.violinplot(data=db['qc_motion'][:, 0],
                       scale='width',
                       width=0.5,
                       palette='Set3',
                       linewidth=1,
                       inner='point',
                       ax=ax2_00)
    seaborn.despine(left=True, bottom=True, ax=ax2_00)
    ax2_00.set_ylabel("mm (avg)")
    ax2_00.set_ylim(bottom=0)
    ax2_00.set_title("Abs. motion")
    ax2_00.set_xticklabels([""])
    # Relative
    ax2_01 = plt.subplot2grid((3, 4), (1, 1), colspan=1)
    seaborn.violinplot(data=db['qc_motion'][:, 1],
                       scale='width',
                       width=0.5,
                       palette='Set3',
                       linewidth=1,
                       inner='point',
                       ax=ax2_01)
    seaborn.despine(left=True, bottom=True, ax=ax2_01)
    ax2_01.set_ylabel("mm (avg)")
    ax2_01.set_ylim(bottom=0)
    ax2_01.set_title("Rel. motion")
    ax2_01.set_xticklabels([""])
    # Check if needs to update single subject reports
    if s_data is not None:
        ax2_00.scatter(0,
                       s_data['qc_mot_abs'],
                       s=100,
                       marker='*',
                       c='w',
                       edgecolors='k',
                       linewidths=1)
        ax2_01.scatter(0,
                       s_data['qc_mot_rel'],
                       s=100,
                       marker='*',
                       c='w',
                       edgecolors='k',
                       linewidths=1)

    # EDDY PARAMETERS
    if db['par_flag']:
        # Translations
        ax2_02 = plt.subplot2grid((3, 4), (1, 2), colspan=1)
        seaborn.violinplot(data=db['qc_parameters'][:, 0:3],
                           scale='width',
                           width=0.5,
                           palette='Set3',
                           linewidth=1,
                           inner='point',
                           ax=ax2_02)
        seaborn.despine(left=True, bottom=True, ax=ax2_02)
        ax2_02.set_ylabel("mm (avg)")
        ax2_02.set_title("Translations")
        ax2_02.set_xticklabels(["x", "y", "z"])
        # Rotations
        ax2_03 = plt.subplot2grid((3, 4), (1, 3), colspan=1)
        seaborn.violinplot(data=np.rad2deg(db['qc_parameters'][:, 3:6]),
                           scale='width',
                           width=0.5,
                           palette='Set3',
                           linewidth=1,
                           inner='point',
                           ax=ax2_03)
        seaborn.despine(left=True, bottom=True, ax=ax2_03)
        ax2_03.set_ylabel("deg (avg)")
        ax2_03.set_title("Rotations")
        ax2_03.set_xticklabels(["x", "y", "z"])

        # Eddy currents
        ec_span = 4
        vd_span = 0
        if db['susc_flag']:
            ec_span = ec_span - 1
            vd_span = 1
        if db['s2v_par_flag']:
            ec_span = ec_span - 2
        ax3_00 = plt.subplot2grid((3, 4), (2, 0), colspan=ec_span)
        seaborn.violinplot(data=db['qc_parameters'][:, 6:9],
                           scale='width',
                           width=0.5,
                           palette='Set3',
                           linewidth=1,
                           inner='point',
                           ax=ax3_00)
        seaborn.despine(left=True, bottom=True, ax=ax3_00)
        ax3_00.set_title("EC linear terms")
        ax3_00.set_ylabel("Hz/mm (std)")
        ax3_00.set_xticklabels(["x", "y", "z"])
        ax3_00.set_ylim(bottom=0)
        # Check if needs to update single subject reports
        if s_data is not None:
            ax2_02.scatter([0, 1, 2],
                           s_data['qc_params_avg'][0:3],
                           s=100,
                           marker='*',
                           c='w',
                           edgecolors='k',
                           linewidths=1)
            ax2_03.scatter([0, 1, 2],
                           np.rad2deg(s_data['qc_params_avg'][3:6]),
                           s=100,
                           marker='*',
                           c='w',
                           edgecolors='k',
                           linewidths=1)
            ax3_00.scatter([0, 1, 2],
                           s_data['qc_params_avg'][6:9],
                           s=100,
                           marker='*',
                           c='w',
                           edgecolors='k',
                           linewidths=1)

        # Susceptibility
        if db['susc_flag']:
            ax3_00 = plt.subplot2grid((3, 4), (2, ec_span), colspan=vd_span)
            seaborn.violinplot(data=db['qc_susceptibility'],
                               scale='width',
                               width=0.5,
                               palette='Set3',
                               linewidth=1,
                               inner='point',
                               ax=ax3_00)
            seaborn.despine(left=True, bottom=True, ax=ax3_00)
            ax3_00.set_title("Susceptibility")
            ax3_00.set_ylabel("Vox (std)")
            ax3_00.set_xticklabels([""])
            ax3_00.set_ylim(bottom=0)
            if s_data is not None:
                ax3_00.scatter(0,
                               s_data['qc_vox_displ_std'],
                               s=100,
                               marker='*',
                               c='w',
                               edgecolors='k',
                               linewidths=1)

        # S2V motion
        if db['s2v_par_flag']:
            # Translations
            ax3_00 = plt.subplot2grid((3, 4), (2, ec_span + vd_span),
                                      colspan=1)
            seaborn.violinplot(data=db['qc_s2v_parameters'][:, 0:3],
                               scale='width',
                               width=0.5,
                               palette='Set3',
                               linewidth=1,
                               inner='point',
                               ax=ax3_00)
            seaborn.despine(left=True, bottom=True, ax=ax3_00)
            ax3_00.set_title("S2V translations")
            ax3_00.set_ylabel("mm (std)")
            ax3_00.set_xticklabels(["x", "y", "z"])
            ax3_00.set_ylim(bottom=0)
            if s_data is not None:
                ax3_00.scatter([0, 1, 2],
                               s_data['qc_s2v_params_avg_std'][0:3],
                               s=100,
                               marker='*',
                               c='w',
                               edgecolors='k',
                               linewidths=1)
            # Rotations
            ax3_00 = plt.subplot2grid((3, 4), (2, ec_span + vd_span + 1),
                                      colspan=1)
            seaborn.violinplot(data=db['qc_s2v_parameters'][:, 3:6],
                               scale='width',
                               width=0.5,
                               palette='Set3',
                               linewidth=1,
                               inner='point',
                               ax=ax3_00)
            seaborn.despine(left=True, bottom=True, ax=ax3_00)
            ax3_00.set_title("S2V rotations")
            ax3_00.set_ylabel("deg (std)")
            ax3_00.set_xticklabels(["x", "y", "z"])
            ax3_00.set_ylim(bottom=0)
            if s_data is not None:
                ax3_00.scatter([0, 1, 2],
                               s_data['qc_s2v_params_avg_std'][3:6],
                               s=100,
                               marker='*',
                               c='w',
                               edgecolors='k',
                               linewidths=1)

    #================================================
    # Format figure, save and close it
    #================================================
    plt.tight_layout(h_pad=1, pad=4)
    plt.savefig(pdf, format='pdf')
    plt.close()

    # OUTLIERS AND CNR
    if db['ol_flag'] or db['cnr_flag']:
        plt.figure(figsize=(8.27, 11.69))  # Standard portrait A4 sizes
        plt.suptitle("SQUAD: Group report", fontsize=10, fontweight='bold')

        # Look for shared b-values and PE directions if updating single subject reports
        if s_data is not None:
            b_db = (np.array(db['data_unique_bvals'])).reshape(-1, 1)
            b_sub = (np.array(s_data['data_unique_bvals'])).reshape(-1, 1)
            common_b = np.array(
                np.all((np.abs(b_db[:, None, :] - b_sub[None, :, :]) < 100),
                       axis=-1).nonzero()).T
            pe_db = np.reshape(np.atleast_2d(db['data_unique_pes']),
                               (-1, 4))[:, 0:3]
            pe_sub = np.reshape(np.atleast_2d(s_data['data_eddy_para']),
                                (-1, 4))[:, 0:3]
            common_pe = np.array(
                np.all((pe_db[:, None, :] == pe_sub[None, :, :]),
                       axis=-1).nonzero()).T

        # OUTLIERS
        if db['ol_flag']:
            # Total
            ax1_00 = plt.subplot2grid((2, 3), (0, 0), colspan=1)
            seaborn.violinplot(data=db['qc_outliers'][:, 0],
                               scale='width',
                               width=0.5,
                               palette='Set3',
                               linewidth=1,
                               inner='point',
                               ax=ax1_00)
            seaborn.despine(left=True, bottom=True, ax=ax1_00)
            ax1_00.set_title("Total outliers")
            ax1_00.set_ylabel("%")
            ax1_00.set_ylim(bottom=0)
            ax1_00.set_xticklabels([""])
            # b-shell
            ax1_01 = plt.subplot2grid((2, 3), (0, 1), colspan=1)
            seaborn.violinplot(data=db['qc_outliers'][:, 1:1 +
                                                      db['data_no_shells']],
                               scale='width',
                               width=0.5,
                               palette='Set3',
                               linewidth=1,
                               inner='point',
                               ax=ax1_01)
            seaborn.despine(left=True, bottom=True, ax=ax1_01)
            ax1_01.set_ylabel("%")
            ax1_01.set_ylim(bottom=0)
            ax1_01.set_title("b-value outliers")
            ax1_01.set_xticklabels(db['data_unique_bvals'])
            ax1_01.set_xlabel("b-value")
            # PE direction
            ax1_02 = plt.subplot2grid((2, 3), (0, 2), colspan=1)
            seaborn.violinplot(data=db['qc_outliers'][:, 1 +
                                                      db['data_no_shells']:],
                               scale='width',
                               width=0.5,
                               palette='Set3',
                               linewidth=1,
                               inner='point',
                               ax=ax1_02)
            seaborn.despine(left=True, bottom=True, ax=ax1_02)
            ax1_02.set_title("PE dir. outliers")
            ax1_02.set_ylabel("%")
            ax1_02.set_ylim(bottom=0)
            ax1_02.set_xlabel("PE direction")
            # Check if needs to update single subject reports
            if (s_data is not None and s_data['qc_ol_flag']):
                ax1_00.scatter(0,
                               s_data['qc_outliers_tot'],
                               s=100,
                               marker='*',
                               c='w',
                               edgecolors='k',
                               linewidths=1)
                ax1_01.scatter(common_b[:, 0],
                               np.array(s_data['qc_outliers_b'])[common_b[:,
                                                                          1]],
                               s=100,
                               marker='*',
                               c='w',
                               edgecolors='k',
                               linewidths=1)
                ax1_02.scatter(common_pe[:, 0],
                               np.array(
                                   s_data['qc_outliers_pe'])[common_pe[:, 1]],
                               s=100,
                               marker='*',
                               c='w',
                               edgecolors='k',
                               linewidths=1)

        if db['cnr_flag']:
            vox_volume = np.prod(np.array(db['data_vox_size']))
            # SNR
            ax2_01 = plt.subplot2grid((2, 3), (1, 0), colspan=1)
            # seaborn.violinplot(data=np.sqrt(db['data_no_b0_vols'])*db['qc_cnr'][:,0]/np.sqrt(vox_volume), scale='width', width=0.5, palette='Set3', linewidth=1, inner='point', ax=ax2_01)

            seaborn.violinplot(data=db['qc_cnr'][:, 0],
                               scale='width',
                               width=0.5,
                               palette='Set3',
                               linewidth=1,
                               inner='point',
                               ax=ax2_01)
            seaborn.despine(left=True, bottom=True, ax=ax2_01)
            ax2_01.set_ylim(bottom=0)
            ax2_01.set_title("SNR (avg)")
            ax2_01.set_xticklabels("0")
            ax2_01.set_xlabel("b-value")

            # CNR
            ax2_02 = plt.subplot2grid((2, 3), (1, 1), colspan=2)
            # seaborn.violinplot(data=np.sqrt(db['data_no_dw_vols']/db['data_no_shells'])*db['qc_cnr'][:,1:]/np.sqrt(vox_volume), scale='width', width=0.5, palette='Set3', linewidth=1, inner='point', ax=ax2_02)

            seaborn.violinplot(data=db['qc_cnr'][:, 1:],
                               scale='width',
                               width=0.5,
                               palette='Set3',
                               linewidth=1,
                               inner='point',
                               ax=ax2_02)
            seaborn.despine(left=True, bottom=True, ax=ax2_02)
            ax2_02.set_ylim(bottom=0)
            ax2_02.set_title("CNR (avg)")
            ax2_02.set_xlabel("b-value")
            ax2_02.set_xticklabels(db['data_unique_bvals'])
            # Check if needs to update single subject reports
            if (s_data is not None and s_data['qc_cnr_flag']):
                ax2_01.scatter(0,
                               s_data['qc_cnr_avg'][0],
                               s=100,
                               marker='*',
                               c='w',
                               edgecolors='k',
                               linewidths=1)
                ax2_02.scatter(common_b[:, 0],
                               np.array(s_data['qc_cnr_avg'][1:])[common_b[:,
                                                                           1]],
                               s=100,
                               marker='*',
                               c='w',
                               edgecolors='k',
                               linewidths=1)

        #================================================
        # Format figure, save and close it
        #================================================
        plt.tight_layout(h_pad=1, pad=4)
        plt.savefig(pdf, format='pdf')
        plt.close()
Exemplo n.º 10
0
def format_yax(ax):
    ax.yaxis.set_major_locator(MaxNLocator(5))
    ax.yaxis.set_major_formatter(ticker.FormatStrFormatter('%i'))
    pass
Exemplo n.º 11
0
        else:
            ax4.plot(freq[freq_idx], QY_fit[ant][chan_sel], '-k')

    # --------------------------------------------------------------------------------- #

    #legend(loc=[0,0.85],prop=legendfont)

    ax1.legend(prop=legendfont)
    ax1.set_title("Jones matrix element P <- X")
    ax2.set_title("Jones matrix element P <- Y")
    ax3.set_title("Jones matrix element Q <- X")
    ax4.legend(prop=legendfont)
    ax4.set_title("Jones matrix element Q <- Y")

    for ax in [ax1, ax2, ax3, ax4]:
        ax.xaxis.set_major_locator(MaxNLocator(4))
        ax.yaxis.set_major_locator(MaxNLocator(5))
        if plot_chan:
            if not args.outname:
                # This makes the plot a bit busier but much easier to identify channels that need flagging
                ax.xaxis.set_major_locator(MultipleLocator(1))
            ax.xaxis.set_major_formatter(FormatStrFormatter("%d"))
            ax.set_xlabel("dumb chan # (doesn't skip flagged channels)")
        else:
            ax.set_xlabel("MHz")
        if args.phases:
            ax.set_ylabel("degrees")
        else:
            ax.set_ylabel("gain relative to band average")
        ax.grid(True)
Exemplo n.º 12
0
def statistic(trainFrames, testFrames, denoise=False, bagging=0, weakness=0.3, detail_show=False, roc_plot=False, global_plot=False):
    accuracy = []   # 准确度列表
    feature = []    # 特征列表
    confusionBase = np.array([0, 0, 0, 0])  # 混淆矩阵
    for i in range(len(trainFrames)):
        clf = GaussianNaiveBayesClassfier(has_denoise=denoise, bagging_rate=bagging, bagging_weakness=weakness)
        clf.fit(trainFrames[i])
        voiceProb = clf.predict(testFrames[i].iloc[:,0:-1], 'prob')
        #voicePredict = voiceProb.argmax(axis=1)
        label = testFrames[i].loc[:, 'label']
        #print(label)
        p = Performance(list(label), list(voiceProb[:, 1]))
        confusion = p.get_confusion_matrix()
        confusionBase = confusionBase + np.array(confusion)
        #print(confusion)
        acc = p.ACC()
        accuracy.append(acc)
        fea = trainFrames[i].columns.values.tolist()
        feature.append(fea)
        if detail_show: # 单次统计数据
            prevalence = p.Prevalence()
            ppv = p.PPV()
            npv = p.NPV()
            tpr = p.TPR()
            tnr = p.TNR()
            plr = p.PLR()
            nlr = p.NLR()
            dor = p.DOR()
            f1 = p.F_score(1.0)
            #print((confusion))
            matDict = {'男声': [int(confusion[0]), int(confusion[2])],
                       '女声': [int(confusion[1]), int(confusion[3])]}
            matrix = pd.DataFrame(matDict, index=['预测男声', '预测女声'])
            print('\n------------第%d次测试------------' % (i+1))
            print('特征:', fea)
            print('动态降噪:', denoise)
            print('集成:', bagging)
            print('混淆矩阵:')
            print(matrix)
            print('总男声比例:%.2f%%' % (prevalence * 100))
            print('总体准确度:%.2f%%' % (acc * 100))
            print('男声查准率:%.2f%%' % (ppv * 100))
            print('男声查全率: %.2f%%' % (tpr * 100))
            print('女声查准率:%.2f%%' % (npv * 100))
            print('女声查全率: %.2f%%' % (tnr * 100))
            print('男声似然比:%.4f' % plr)
            print('女声似然比:%.4f' % (1 / nlr))
            print('判别男声相关:')
            print('诊断比值比:%.4f' % dor)
            print('F1分数:%.4f' % f1)
        if roc_plot:    # 以男声为阳例的ROC曲线
            p.roc_plot()
    # 总体统计数据
    res = Performance([0,1], [0.2,0.8])
    res.set(list(confusionBase))
    prevalence = res.Prevalence()
    acc = res.ACC()
    ppv = res.PPV()
    npv = res.NPV()
    tpr = res.TPR()
    tnr = res.TNR()
    plr = res.PLR()
    nlr = res.NLR()
    dor = res.DOR()
    f1 = res.F_score(1.0)
    max_acc = max(accuracy)
    max_fea = feature[accuracy.index(max_acc)]
    min_acc = min(accuracy)
    min_fea = feature[accuracy.index(min_acc)]
    var = np.var(accuracy)
    #print(confusionBase)
    matDict = {'男声':[int(confusionBase[0]), int(confusionBase[2])],
              '女声':[int(confusionBase[1]), int(confusionBase[3])]}
    matrix = pd.DataFrame(matDict, index=['预测男声','预测女声'])
    print('\n\n------------测试总次数=%d------------' % len(trainFrames))
    print('动态降噪:', denoise)
    print('集成:', bagging)
    print('累积混淆矩阵:')
    print(matrix)
    print('总男声比例:%.2f%%' % (prevalence * 100))
    print('最大准确度:%.2f%%' % (max_acc * 100))
    print('最大准确度使用特征:', max_fea)
    print('最低准确度:%.2f%%' % (min_acc * 100))
    print('最低准确度使用特征:', min_fea)
    print('平均准确度:%.2f%%' % (acc * 100))
    print('准确度方差:%g' % var)
    print('平均男声查准率:%.2f%%' % (ppv * 100))
    print('平均男声查全率:%.2f%%' % (tpr * 100))
    print('平均女声查准率:%.2f%%' % (npv * 100))
    print('平均女声查全率:%.2f%%' % (tnr * 100))
    print('平均男声似然比:%.4f' % plr)
    print('平均女声似然比:%.4f' % (1 / nlr))
    print('判别男声相关:')
    print('平均诊断比值比:%.4f' % dor)
    print('平均F1分数:%.4f' % f1)
    if global_plot: # 显示各次试验的准确率图
        ax = plt.figure().gca()
        ax.xaxis.set_major_locator(MaxNLocator(integer=True))
        plt.xticks(np.arange(1, len(accuracy) + 1, 1))
        #plt.title("Accuracy for %d turns" % (len(accuracy)))
        plt.title("Accuracy for %d turns(denoise=%d,bagging=%d)" % (len(accuracy), denoise, bagging))
        plt.ylabel("Accuracy")
        plt.xlabel("turns")
        if len(accuracy) > 1:
            plt.plot(list(range(1, len(accuracy) + 1)), accuracy)
        else:
            plt.scatter(1, accuracy, c=2, cmap=plt.cm.spring, edgecolors='k')
        plt.show()
    return denoise, bagging, accuracy, feature, matrix, prevalence, max_acc, max_fea, min_acc, min_fea, acc, var, ppv, npv, tpr, tnr, plr, nlr, dor, f1
Exemplo n.º 13
0
    count_by_artist = count_by_artist.sort_values('song')
    count_by_artist = count_by_artist.tail(20)  # top 20

    df_list.append(count_by_artist)
    years = years + 1

from matplotlib.pyplot import figure
from pylab import MaxNLocator

yr = 1985
ax = plt.figure(num=None,
                figsize=(14, 10),
                dpi=80,
                facecolor='w',
                edgecolor='b').gca()
ax.xaxis.set_major_locator(MaxNLocator(integer=True))
ax.grid(False)
ax.set_xlabel("# Of Times on Billboard's Year End Top 100")
#ax.set_ylabel("Artist")
plt.gcf().subplots_adjust(left=0.20)
plt.gcf().subplots_adjust(bottom=0.14)

for i in df_list:
    #figure(num=None, figsize=(20, 20), dpi=80, facecolor='w', edgecolor='k')
    plt.tight_layout()
    plt.grid(False)
    plt.gcf().subplots_adjust(left=0.20)
    plt.gcf().subplots_adjust(bottom=0.14)

    yr_str = 'Year %s' % yr
    plt.figtext(0.76,
Exemplo n.º 14
0
def Graph_for_ind(pd, type, ys):

    series['postal_code'] = series['postal_code'].apply(str)
    pd_data = series.loc[series[type] == pd]
    pd_data['online_for'] = pd_data['online_for'].apply(
        get_timing_year) if ys else pd_data['online_for'].apply(
            get_timing_month)
    months = pd_data['online_for'].unique().tolist()
    if (ys == True):
        months.sort(key=lambda date: datetime.datetime.strptime(date, '%Y'))
    else:
        months.sort(key=lambda date: datetime.datetime.strptime(date, '%b %Y'))
    final_data = {
        'Average Price': {},
        'Average Size': {},
        'Average €/qm': {},
        'Count Furnished': {},
        'Count Unfurnished': {},
        'Average Miscellaneous Cost': {},
        'Rent': {},
        'Average Utilities Cost': {},
        'Average Base Rent': {}
    }
    for month in months:
        month_data = pd_data.loc[pd_data['online_for'] == month]
        kpis = Analyze_data(month_data)
        index = months
        for kpi in kpis:
            final_data[kpi][month] = kpis[kpi]
    count = [
        x + y for x, y in zip(list(final_data['Count Furnished'].values()),
                              list(final_data['Count Unfurnished'].values()))
    ]
    count_map = Include_Empty_months(dict(zip(months, count)), ys)
    for k in final_data:
        final_data[k] = Include_Empty_months(final_data[k], ys)
        data = list(final_data[k].values())
        index = list(final_data[k].keys())
        width = len(index) * 0.3
        if (width < 15):
            width = 15
        fig, axs = pyplot.subplots(figsize=(width, 5))
        dataFrame = pandas.DataFrame(data={k: data}, index=index)
        color = 'tab:blue'
        dataFrame.plot.bar(
            ax=axs,
            width=0.9,
            title=
            f"{'Yearly' if ys else 'Monthly'} analysis for {city}: {pd} ({k})",
            alpha=foreground_opacity)
        axs.tick_params(axis='y', colors=color)
        axs.set_ylim(ymin=0)
        yb = axs.get_yaxis()
        yb.set_major_locator(MaxNLocator(integer=True))
        axs.set_ylabel('Measure\'s value',
                       color=color,
                       fontsize='large',
                       fontweight='bold')
        axs.grid(axis='y')
        ax2 = axs.twinx()
        color = 'tab:red'
        ax2.set_xlabel('Month', fontsize='large', fontweight='bold')
        ax2.set_ylabel('Count',
                       color=color,
                       fontsize='large',
                       fontweight='bold')
        ya = ax2.get_yaxis()
        ya.set_major_locator(MaxNLocator(integer=True))
        ax2.plot(list(count_map.keys()), list(count_map.values()), color=color)
        ax2.set_ylim(ymin=0)
        ax2.format_xdata = mdates.DateFormatter('%b %Y')
        fig.autofmt_xdate()
        fig.tight_layout()
        pdf.savefig()
        pyplot.close()
Exemplo n.º 15
0
def plot_pacf_stem(ds,
                   rgi_df,
                   xlim=None,
                   path=True,
                   nlags=200,
                   slice_start=3000,
                   plot_confint=True):
    """

    Parameters
    ----------
    ds
    rgi_df
    xlim
    path
    nlags
    slice_start
    plot_confint

    """
    # iterate over all above selected glaciers

    for rgi_id, glacier in rgi_df.iterrows():
        # select glacier
        rgi_id = rgi_id
        name = glacier['name']
        log.info('PACF plots for {} ({})'.format(name, rgi_id))

        # create figure and axes
        fig, ax = plt.subplots(1, 1)
        # compute acf over 1000 years
        lags = np.arange(0, nlags + 1)

        # select the complete dataset
        ds_sel = ds.sel(mb_model='random', normalized=False, rgi_id=rgi_id)
        # select time frame
        slice_end = None
        ds_sel = ds_sel.isel(time=slice(slice_start, slice_end))

        xoffset = 0

        # plot zero aux line
        ax.axhline(0, c='k', ls=':')

        for i, b in enumerate(np.sort(ds.temp_bias)):
            # get length data
            length = ds_sel.sel(temp_bias=b).length

            # FLOWLINE MODEL
            # --------------

            # compute autocorrelation and confidence intervals
            acf, confint = stattools.pacf(length.sel(model='fl'),
                                          nlags=nlags,
                                          alpha=0.01,
                                          method='ywmle')
            # plot autocorrelation function
            ml, sl, bl = ax.stem(lags[1:] + xoffset,
                                 acf[1:],
                                 markerfmt=f'o',
                                 linefmt=':',
                                 basefmt='None',
                                 label='{:+.1f} °C'.format(b))
            plt.setp(sl, 'color', fl_cycle[i])
            plt.setp(ml, 'color', fl_cycle[i])
            if plot_confint:
                # fill confidence interval
                ax.fill_between(lags[1:],
                                confint[1:, 0] - acf[1:],
                                confint[1:, 1] - acf[1:],
                                color=fl_cycle[i],
                                alpha=0.1)

            # V/A SCALING MODEL
            # -----------------

            # compute autocorrelation and confidence intervals
            acf, confint = stattools.pacf(length.sel(model='vas'),
                                          nlags=nlags,
                                          alpha=0.01,
                                          method='ywmle')
            # plot autocorrelation function
            ml, sl, bl = ax.stem(lags[1:] - xoffset,
                                 acf[1:],
                                 markerfmt=f'o',
                                 linefmt=':',
                                 basefmt='None',
                                 label='{:+.1f} °C'.format(b))
            plt.setp(sl, 'color', vas_cycle[i])
            plt.setp(ml, 'color', vas_cycle[i])
            if plot_confint:
                # fill confidence interval
                ax.fill_between(lags[1:],
                                confint[1:, 0] - acf[1:],
                                confint[1:, 1] - acf[1:],
                                color=vas_cycle[i],
                                alpha=0.1)

        # adjust axes
        if not xlim:
            xlim = [0, nlags]
        ax.set_xlim(xlim)
        ax.set_ylim([-1.1, 1.1])
        xa = ax.get_xaxis()
        xa.set_major_locator(MaxNLocator(integer=True))
        # add grid
        ax.grid()

        # get legend handles and labels
        handles, labels = ax.get_legend_handles_labels()
        title_proxy, = plt.plot(0,
                                marker='None',
                                linestyle='None',
                                label='dummy')

        # create list of handles and labels in correct order
        my_handles = list([title_proxy])
        my_handles.extend(handles[::2])
        my_handles.extend([title_proxy])
        my_handles.extend(handles[1::2])
        my_labels = list(["$\\bf{Flowline\ model}$"])
        my_labels.extend(labels[::2])
        my_labels.extend(["$\\bf{VAS\ model}$"])
        my_labels.extend(labels[1::2])
        # add single two-column legend
        ax.legend(my_handles, my_labels, ncol=2)

        # labels, title, ...
        ax.set_xlabel('Lag [years]')
        ax.set_ylabel('Correlation coefficient')

        # store plot
        dir_path = '/Users/oberrauch/work/master/plots/final_plots/pacf/'
        f_name = '{}.pdf'.format(name.replace(' ', '_'))
        path = os.path.join(dir_path, f_name)
        plt.savefig(path, bbox_inches='tight')
def best_so_far(results_directory, num_iterations):
    """
    Function that plots:

        1) The best feasible value obtained so far as a function of the number of iterations
        2) A scatterplot showing the data points collected

    :param results_directory: directory to save the plots to.
    :param num_iterations: the number of iterations for which data collection is being carried out.
    """

    best_vals = []

    # coordinates of collected data points

    x1_vals = []
    x2_vals = []
    counter = 0
    first_find = 0

    for iteration in range(num_iterations):

        # We monitor the best value obtained so far

        evaluations = load_object(results_directory +
                                  "/scores{}.dat".format(iteration))
        best_value = min(evaluations)
        constraint_value = load_object(results_directory +
                                       "/con_scores{}.dat".format(iteration))

        # We DON'T use the best value found in the training data if the first collected point is not feasible

        if constraint_value[0] == 1 and counter == 0:
            counter += 1
            best_vals.append(best_value[0])
            first_find += 1

        if counter > 0:
            if first_find == 1:
                first_find += 1
            else:
                counter += 1
                if best_value[0] < min(best_vals):
                    best_vals.append(best_value[0])
                else:
                    best_vals.append(min(best_vals))

        # We collect the data points for plotting

        next_inputs = load_object(results_directory +
                                  "/next_inputs{}.dat".format(iteration))

        for data_point in next_inputs:
            x1_vals.append(data_point[0])
            x2_vals.append(data_point[1])

    iterations = range((num_iterations - counter) + 1, num_iterations + 1)

    # We plot the best value obtained so far as a function of iterations

    plt.figure(2)
    axes = plt.figure(2).gca()
    xa, ya = axes.get_xaxis(), axes.get_yaxis()
    xa.set_major_locator(
        MaxNLocator(integer=True))  # force axis ticks to be integers
    ya.set_major_locator(MaxNLocator(integer=True))
    plt.xlim((num_iterations - counter) + 1, num_iterations)
    plt.xlabel('Function Evaluations')
    plt.ylabel('Best Feasible Value')
    plt.plot(iterations, best_vals)
    pylab.savefig(results_directory + "/best_so_far.png")
    plt.close()

    save_object(iterations, results_directory + "/iterations.dat")
    save_object(best_vals, results_directory + "/best_vals.dat")

    # We plot the data points collected

    plt.figure(3)
    plt.title('Data Points Collected')
    plt.gca().set_aspect('equal')
    plt.xlim(-5, 10)
    plt.ylim(0, 15)
    plt.xlabel('x1')
    plt.ylabel('x2')
    plt.scatter(x1_vals, x2_vals)
    pylab.savefig(results_directory + "/data_collected.png")
    plt.close()
Exemplo n.º 17
0
def main(pdf, data, eddy):
    """
    Generate page of the single subject report pdf that contains:
    - Per-shell average CNR bar plots (error bars are standard deviations).
    - Per-volume mean squared residuals plots (one plot for each b-value, including 0). Outliers
    (MSR > mean + std for each shell) are marked as red stars with the number of corresponding volume
    (0-based) next to them.
    
    Arguments:
        - pdf: qc pdf file
        - data: data dictionary containg information about the dataset
        - eddy: EDDY dictionary containg useful qc information
    """
    #================================================
    # Prepare figure
    plt.figure(figsize=(8.27, 11.69))  # Standard portrait A4 sizes
    plt.suptitle('Subject ' + data['subj_id'], fontsize=10, fontweight='bold')

    # Divide the page in two sections. Top one will have the bar plots, bottom (and bigger)
    # one will have the MSR plots.
    gs0 = gridspec.GridSpec(2, 1, height_ratios=[0.16, 0.8], hspace=0.2)

    gs00 = gridspec.GridSpecFromSubplotSpec(1,
                                            3,
                                            subplot_spec=gs0[0],
                                            wspace=1)
    ax1_00 = plt.subplot(gs00[0, 0])
    sb = seaborn.barplot(y=eddy['avg_cnr'][0], ax=ax1_00)
    sb.errorbar(x=0,
                y=eddy['avg_cnr'][0],
                yerr=eddy['std_cnr'][0],
                ecolor='black',
                fmt="none")
    ax1_00.set_xlabel("b-value [s/mm$^2$]")
    ax1_00.set_ylabel("tSNR")
    ax1_00.set_ylim(0, eddy['avg_cnr'][0] + 2 * eddy['std_cnr'][0])
    ax1_00.set_xticklabels([0])

    ax2_00 = plt.subplot(gs00[0, 1:])
    sb = seaborn.barplot(x=np.arange(1, 1 + data['unique_bvals'].size),
                         y=eddy['avg_cnr'][1:],
                         ci=3.0,
                         ax=ax2_00)
    sb.errorbar(x=np.arange(0, data['unique_bvals'].size),
                y=eddy['avg_cnr'][1:],
                yerr=eddy['std_cnr'][1:],
                ecolor='black',
                fmt="none")
    ax2_00.set_xlabel("b-value [s/mm$^2$]")
    ax2_00.set_ylabel("CNR")
    ax2_00.set_xticklabels(data['unique_bvals'])

    if eddy['rssFlag']:
        gs01 = gridspec.GridSpecFromSubplotSpec(1 + data['unique_bvals'].size,
                                                1,
                                                subplot_spec=gs0[1])
        x = np.arange(data['bvals'].size)
        ax = plt.subplot(gs01[0, 0])
        tmp_rss = eddy['avg_rss'][np.abs(data['bvals']) <= 100]
        x_rss = x[np.abs(data['bvals']) <= 100]
        idxs = np.array(
            np.where(tmp_rss > np.mean(tmp_rss) + 2 * np.std(tmp_rss)))
        ax.plot(np.arange(1, 1 + data['no_b0_vols']), tmp_rss, label="b=0")
        ax.scatter(idxs + 1,
                   np.ones(idxs.size) * np.max(tmp_rss) + 200,
                   s=50,
                   c='r',
                   marker='*',
                   label='Outliers')
        ol_vols = x_rss[idxs]
        ax.xaxis.set_major_locator(MaxNLocator(integer=True))
        ax.set_title("Mean squared residuals (MSR)")
        ax.set_xlim(0, 1 + data['no_b0_vols'])
        ax.set_ylabel("MSR")
        ax.legend(loc='best', frameon=True, framealpha=0.5)
        for i in range(0, data['unique_bvals'].size):
            tmp_rss = eddy['avg_rss'][np.abs(data['bvals'] -
                                             data['unique_bvals'][i]) <= 100]
            x_rss = x[np.abs(data['bvals'] - data['unique_bvals'][i]) <= 100]
            idxs = np.array(
                np.where(tmp_rss > np.mean(tmp_rss) + 2 * np.std(tmp_rss)))
            ax = plt.subplot(gs01[i + 1, 0])
            ax.set_ylabel("MSR")
            ax.plot(np.arange(1, 1 + data['bvals_dirs'][i]),
                    tmp_rss,
                    label="b=%d" % data['unique_bvals'][i])
            ax.scatter(idxs + 1,
                       np.ones(idxs.size) * np.max(tmp_rss) +
                       0.5 * np.max(tmp_rss),
                       s=50,
                       c='r',
                       marker='*',
                       label='Outliers')
            ol_vols = np.append(ol_vols, x_rss[idxs])
            ax.xaxis.set_major_locator(MaxNLocator(integer=True))
            ax.set_xlim(0, 1 + data['bvals_dirs'][i])
            ax.legend(loc='best', frameon=True, framealpha=0.5)
        ax.set_xlabel("Volume")

        # Save volumes without outliers to text files.
        # If bvecs have been specified, then also save reduced bvals and bvecs
        vols_no_outliers = np.delete(x, ol_vols)
        np.savetxt(data['qc_path'] + '/vols_no_outliers.txt',
                   np.reshape(vols_no_outliers, (1, -1)),
                   fmt='%d',
                   delimiter=' ')
        if data['bvecs'].size != 0:
            np.savetxt(data['qc_path'] + '/bvecs_no_outliers.txt',
                       data['bvecs'][:, vols_no_outliers],
                       fmt='%.5f',
                       delimiter=' ')
            np.savetxt(data['qc_path'] + '/bvals_no_outliers.txt',
                       np.reshape(data['bvals'][vols_no_outliers], (1, -1)),
                       fmt='%f',
                       delimiter=' ')

    # Format figure, save and close it
    plt.savefig(pdf, format='pdf')
    plt.close()
def plot_from_dicts(plot_dicts,
                    out,
                    title='Training Curve',
                    x_label="Iterations",
                    legend_box_pos=(1, 1)):
    '''
    Making learning curve
    '''

    max_y = max(
        [max(plot_dict.y) for plot_dict in plot_dicts if plot_dict.y != []])

    if max_y > 2.0:  # If Loss is too high make two plot - one with original loss and one with y_max = 2 so that Accuracy is clear
        y_maxes = [2.0, max_y]
    else:
        y_maxes = [2.0]  # Always make minimum y_max 2.0 for legend box

    for y_max in y_maxes:

        fig, ax1 = plt.subplots()

        fig.set_size_inches(10, 10)

        ax1.xaxis.grid(True)

        plt.grid()

        ax1.get_yaxis().set_major_locator(MaxNLocator(integer=True))
        ax1.get_xaxis().set_major_locator(MaxNLocator(integer=True))

        ax1.set_ylim(0, y_max)
        ax1.set_xlabel(x_label, fontsize=15)
        ax1.tick_params(labelsize=10)

        legends_plots = []
        legends_strs = []
        x_ticks = []
        for plot_dict in plot_dicts:
            if plot_dict.plot:

                if len(plot_dict.x) > len(x_ticks):
                    x_ticks = plot_dict.x
                tmp, = ax1.plot(plot_dict.x,
                                plot_dict.y,
                                linewidth=2,
                                color=plot_dict.color,
                                marker=plot_dict.marker,
                                markersize=plot_dict.markersize,
                                markevery=plot_dict.markevery)

                legends_plots.append(tmp)
                legends_strs.append(plot_dict.legend)

        if y_max < 3.5:
            y_ticks_steps = 0.1
        elif y_max < 5:
            y_ticks_steps = 0.5
        elif y_max < 20:
            y_ticks_steps = 1
        else:
            y_ticks_steps = 5

        y_ticks_until_1 = np.arange(0, 1.05, 0.1)
        if y_max > 5:
            yticks = np.arange(0, y_max + (float(y_ticks_steps) / 2),
                               y_ticks_steps)
        elif y_max > 1:
            yticks = np.concatenate(
                (y_ticks_until_1,
                 np.arange(1, y_max + (float(y_ticks_steps) / 2),
                           y_ticks_steps)))
        else:
            yticks = y_ticks_until_1

        ax1.yaxis.set_ticks(yticks)

        ax1.xaxis.set_ticks(x_ticks)

        max_accuracy_line = plt.axhline(y=1,
                                        xmin=0,
                                        xmax=3,
                                        linewidth=1.5,
                                        zorder=0,
                                        color='green',
                                        linestyle='dashed')
        min_loss_line = plt.axhline(y=0,
                                    xmin=0,
                                    xmax=3,
                                    linewidth=1.2,
                                    zorder=0,
                                    color='m',
                                    linestyle='dashed')

        # Adding legend
        lgd = plt.legend(legends_plots + [min_loss_line, max_accuracy_line],
                         legends_strs +
                         ["Min Loss/Accuracy = 0", "Max Accuracy = 1"],
                         bbox_to_anchor=legend_box_pos)

        plt.title(wrap_title(title), fontsize=18)
        # plt.tight_layout() # not good
        # Saving learning curve

        if out.endswith(".png"):
            out = out.replace(".png", "")

        if len(y_maxes) > 1:
            if y_max > 2.0:
                fig.set_size_inches(15, 10)

                plt.savefig(out,
                            bbox_extra_artists=(lgd, ),
                            bbox_inches='tight',
                            pad_inches=0.5)
            else:
                plt.savefig(out + "_closeup",
                            bbox_extra_artists=(lgd, ),
                            bbox_inches='tight',
                            pad_inches=0.5)
        else:
            plt.savefig(out,
                        bbox_extra_artists=(lgd, ),
                        bbox_inches='tight',
                        pad_inches=0.5)

        if demo_mode:
            plt.show()
        if not demo_mode:
            plt.close()