示例#1
0
def plot_network(res):
    """Plot network of multivariate TE between processes.

    Plot graph of the network of (multivariate) interactions between
    processes (e.g., multivariate TE). The function uses  the
    networkx class for directed graphs (DiGraph) internally.
    Plots a network and adjacency matrix.

    Args:
        res : dict
            output of multivariate_te.analyse_network()

    Returns:
        instance of a directed graph class from the networkx
        package (DiGraph)
    """
    try:
        res = res['fdr']
    except KeyError:
        print('plotting non-corrected network!')

    g = generate_network_graph(res)
    print(g.node)
    f, (ax1, ax2) = plt.subplots(1, 2)
    adj_matrix = nx.to_numpy_matrix(g)
    cmap = sns.light_palette('cadetblue', n_colors=2, as_cmap=True)
    sns.heatmap(adj_matrix, cmap=cmap, cbar=False, ax=ax1,
                square=True, linewidths=1, xticklabels=g.nodes(),
                yticklabels=g.nodes())
    ax1.xaxis.tick_top()
    plt.setp(ax1.yaxis.get_majorticklabels(), rotation=0)
    nx.draw_circular(g, with_labels=True, node_size=300, alpha=1.0, ax=ax2,
                     node_color='cadetblue', hold=True, font_weight='bold')
    plt.show()
    return g
示例#2
0
def do_pairplots(counts, base_dir, sample):
    """
    Produces three pairplots - one for each group and a joint plot.
    """
    markers = ["o", "s"]
    r, total_gems, assigned_gems, assigned_gems_by_para = assign_gems(counts)
    df = pd.DataFrame.from_dict(r)
    unique_gems = find_unique_gems(assigned_gems_by_para)
    num_unique = len(unique_gems)
    num_not_unique = len(df) - num_unique
    unique_bins = ["{:,} unique".format(num_unique) if x in unique_gems else "{:,} not unique".format(num_not_unique) for x in df["GemId"]]
    df["Unique mappings"] = unique_bins
    sns_plot = sns.pairplot(df, hue="Unique mappings", markers=markers, plot_kws=dict(s=10))
    sns_plot.fig.text(0.87, 0.6, "{:,} Total Gems".format(len(total_gems)))
    sns_plot.savefig(os.path.join(base_dir, "{}_combined_plot.pdf".format(sample)), format="pdf")
    # now re-label to simply unique/not unique and make separate pairplots
    unique_simple_bins = ["Unique" if x in unique_gems else "Not Unique" for x in df["GemId"]]
    df["Unique mappings"] = unique_simple_bins
    for i, subset in enumerate(["Unique", "Not Unique"]):
        df2 = df[df["Unique mappings"] == subset]
        color = sns.color_palette()[i]
        cmap = sns.light_palette(color, as_cmap=True)
        sns_plot = sns.pairplot(df2, markers=markers[i], plot_kws=dict(color=color, s=10))
        sns_plot.map_lower(sns.kdeplot, cmap=cmap, n_levels=50)
        p = subset.replace(" ", "_").lower()
        sns_plot.savefig(os.path.join(base_dir, "{}_{}_combined_plot.pdf".format(sample, p)), format="pdf")
    plt.close('all')
示例#3
0
文件: color.py 项目: briney/abtools
def cmap_from_color(color, dark=False):
    '''
    Generates a matplotlib colormap from a single color.

    Colormap will be built, by default, from white to ``color``.

    Args:

        color: Can be one of several things:

            1. Hex code
            2. HTML color name
            3. RGB tuple

        dark (bool): If ``True``, colormap will be built from ``color`` to
            black. Default is ``False``, which builds a colormap from
            white to ``color``.

    Returns:

        colormap: A matplotlib colormap

    '''
    if dark:
        return sns.dark_palette(color, as_cmap=True)
    else:
        return sns.light_palette(color, as_cmap=True)
def mag_vs_length():
    # bar = bar[(bar.kind == 'Composite')]
    # sns.lmplot(x='Mr', y='length_scaled', data=bar, hue='kind', palette=flatui, scatter_kws={'s': 9}, fit_reg=False, size=10).set(ylim=(0,1), xlim=(-18, -23))
    for ax in range(1, 6):
        plt.subplot(2, 3, ax)
        sample = bar[bar.kind == kind[ax]]
        sns.kdeplot(sample.length_scaled, sample.Mr, cmap=sns.light_palette(color=flatui[1], as_cmap=True), shade=True, shade_lowest=True).set(xlim=(0, 1.1), ylim=(-18, -23), title=kind[ax])
示例#5
0
文件: dataframe.py 项目: jpn--/larch
def global_background_gradient(s, m, M, cmap=None, low=0, high=0):
	if cmap is None:
		cmap = seaborn.light_palette("seagreen", as_cmap=True)
	rng = M - m
	norm = colors.Normalize(m - (rng * low),
							M + (rng * high))
	normed = norm(s.values)
	c = [colors.rgb2hex(x) for x in cmap(normed)]
	return ['background-color: %s' % color for color in c]
示例#6
0
def get_means():
    ol = pd.read_sql_table('olives', db.engine)
    desc = ol.groupby('area_main').mean()
    desc = desc.drop(desc.columns[[0, 1, 2]], axis=1)
    cm = sns.light_palette("green", as_cmap=True)
    s = desc.style.background_gradient(cmap=cm)
    s = s.set_properties(**{'cellpadding': '30',
                            'border-color': 'white'})
    return s.render()
示例#7
0
文件: plot.py 项目: tamasgal/km3pipe
def hexbin(x, y, color="purple", **kwargs):
    """Seaborn-compatible hexbin plot.

    See also: http://seaborn.pydata.org/tutorial/axis_grids.html#mapping-custom-functions-onto-the-grid
    """
    if HAS_SEABORN:
        cmap = sns.light_palette(color, as_cmap=True)
    else:
        cmap = "Purples"
    plt.hexbin(x, y, cmap=cmap, **kwargs)
示例#8
0
def build_table(dic):
    colLabel = dic['colLabel']
    filterName = dic['filterName']
    filterQuery = dic['filterQuery']
    filterValue = dic['filterValue']
    aggregationCol = dic['aggregationCol']
    
    conn = sqlite3.connect('USIODB.db')
    if (filterQuery == 'contains'):
        sql = "SELECT Period,SUM({}),AVG({}),MAX({}),MIN({}),COUNT({}),{} FROM USIODB WHERE {} LIKE '%{}%' ".format(to_valid_query(colLabel),
                                                                                 to_valid_query(colLabel),
                                                                                to_valid_query(colLabel),
                                                                                 to_valid_query(colLabel),
                                                                                 to_valid_query(colLabel),
                                                                                 to_valid_query(aggregationCol),
                                                                                to_valid_query(filterName),
                                                                                filterValue.strip())
    elif (filterQuery == 'does not contain'):
        sql = "SELECT Period,SUM({}),AVG({}),MAX({}),MIN({}),COUNT({}),{} FROM USIODB WHERE {} NOT LIKE '%{}%'".format(to_valid_query(colLabel),
                                                                                 to_valid_query(colLabel),
                                                                                to_valid_query(colLabel),
                                                                                 to_valid_query(colLabel),
                                                                                 to_valid_query(colLabel),
                                                                                 to_valid_query(aggregationCol),
                                                                                to_valid_query(filterName),
                                                                                filterValue.strip())
    else:    
        sql = "SELECT Period,SUM({}),AVG({}),MAX({}),MIN({}),COUNT({}),{} FROM USIODB WHERE {} {} {}".format(to_valid_query(colLabel),
                                                                                 to_valid_query(colLabel),
                                                                                 to_valid_query(colLabel),
                                                                                to_valid_query(colLabel),
                                                                                 to_valid_query(colLabel),
                                                                                 to_valid_query(aggregationCol),
                                                                                 to_valid_query(filterName),
                                                                                 filterQuery,
                                                                                 filterValue)

    sql+=" GROUP BY {}".format(to_valid_query(aggregationCol))
    
    print(sql)
    
    cm = sns.light_palette("yellow", as_cmap=True)
    
    df = (pd.read_sql_query(sql, conn) 
            .loc[:4] 
            .style 
            .background_gradient(cmap='viridis', low=.5, high=0) 
            .highlight_null('red')
            .background_gradient(cmap=cm)
    )
    
    conn.close()


    return df.render()
示例#9
0
def shot_chart(x, y, title="", kind="scatter", color="b", cmap=None,
               xlim=(-250, 250), ylim=(422.5, -47.5),
               court_color="gray", outer_lines=False, court_lw=1,
               flip_court=False, kde_shade=True, hex_gridsize=None,
               ax=None, **kwargs):
    """
    Returns an Axes object with player shots plotted.

    TODO: explain the parameters
    """

    if ax is None:
        ax = plt.gca()

    if cmap is None:
        cmap = sns.light_palette(color, as_cmap=True)

    if not flip_court:
        ax.set_xlim(xlim)
        ax.set_ylim(ylim)
    else:
        ax.set_xlim(xlim[::-1])
        ax.set_ylim(ylim[::-1])

    ax.tick_params(labelbottom="off", labelleft="off")
    ax.set_title(title, fontsize=18)

    draw_court(ax, color=court_color, lw=court_lw, outer_lines=outer_lines)

    if kind == "scatter":
        ax.scatter(x, y, c=color, **kwargs)

    elif kind == "kde":
        sns.kdeplot(x, y, shade=kde_shade, cmap=cmap,
                    ax=ax, **kwargs)
        ax.set_xlabel('')
        ax.set_ylabel('')

    elif kind == "hex":
        if hex_gridsize is None:
            # Get the number of bins for hexbin using Freedman-Diaconis rule
            # This is idea was taken from seaborn, which got the calculation
            # from http://stats.stackexchange.com/questions/798/
            from seaborn.distributions import _freedman_diaconis_bins
            x_bin = _freedman_diaconis_bins(x)
            y_bin = _freedman_diaconis_bins(y)
            hex_gridsize = int(np.mean([x_bin, y_bin]))

        ax.hexbin(x, y, gridsize=hex_gridsize, cmap=cmap, **kwargs)

    else:
        raise ValueError("kind must be 'scatter', 'kde', or 'hex'.")

    return ax
示例#10
0
文件: dataframe.py 项目: jpn--/larch
def apply_global_background_gradient(df, override_min=None, override_max=None, cmap=None, subset=None):
	if cmap is None:
		seagreen = seaborn.light_palette("seagreen", as_cmap=True)
		cmap = seagreen
	df = df.apply(
		global_background_gradient,
		cmap=cmap,
		m=override_min if override_min is not None else df.data.min().min(),
		M=override_max if override_max is not None else df.data.max().max(),
		subset=subset,
	)
	return df
def violin_by_reso(wp):
  p1 = sns.color_palette('Paired')[0:4]
  p2 = [tuple(i) for i in sns.light_palette("red", 10)]
  p3 = [tuple(i) for i in sns.light_palette("blue", 10)]
  pal =  p1 + p2[1:-1] + p3[1:-1]
  eh = wp.minor_xs('EH')
  amb = wp.minor_xs('0.025')
  reso_bins = np.array([0.9, 1.0, 2.0, 3.0, 4.0])
  labels=["<1.0", "<2.0", "<3.0", "<4.0"]
  reso_bins = np.arange(.9,4.5,.4)
  labels=[i+.2 for i in reso_bins][:-1]
  eh['reso_range'] = pd.cut(eh['Reso'], reso_bins, labels=labels)
  amb['reso_range'] = pd.cut(amb['Reso'], reso_bins, labels=labels)
  for feature in wp.items:
    # feature = 'clash'
    # import code ; code.interact(local=dict(globals(), **locals()))
    sns.boxplot(eh[feature], eh.reso_range, color = pal[11])
    sns.boxplot(amb[feature], amb.reso_range, color = pal[19], alpha=0.5)
    # plt.ylim(-10,60)
    plt.savefig('violin_reso_plots/%s_reso.png' %feature)
    plt.clf()
示例#12
0
def jointgrid(x, y, kind="scatter", data = None, title="", color="b",
              xlim=(-250, 250), ylim=(422.5,-47.5), court_color="gray",
              joint_color="b", marginals_color="b", chart = None,
              joint_kde_shade=True, marginals_kde_shade=True, court_lw=1, 
              joint_kws=None, marginal_kws=None, outer_lines=False, cmap=None,
              space=0, set_size_inches=(12,11), **kwargs):

    joint_kws = {}
    joint_kws.update(kwargs)

    marginal_kws = {}

    cmap = sns.light_palette(color, as_cmap=True)

    grid = sns.JointGrid(x,y, data=None, xlim=xlim, ylim=ylim, space=space)
    
    if kind=="kde":
        grid = grid.plot_joint(sns.kdeplot, cmap=cmap, shade=joint_kde_shade,
                               **joint_kws)
    else:
        grid = grid.plot_joint(plt.scatter, color=joint_color, **joint_kws)

    grid = grid.plot_marginals(sns.distplot, color=marginals_color,
            **marginal_kws)

    grid.fig.set_size_inches(set_size_inches)

    ax = grid.fig.get_axes()[0]

    ax.set_xlim(xlim)
    ax.set_ylim(ylim)

    draw_court(ax, color=court_color, lw=court_lw, outer_lines=outer_lines)

    ax.set_xlabel("")
    ax.set_ylabel("")
    ax.tick_params(labelbottom="off", labelleft="off")

    title = chart.playername + " FGA \n" + chart.season + " Regular Season"

    ax.set_title(title, y=1.2, fontsize=18)



    return grid 
示例#13
0
    def to_dataframe(self, cmap=None):
        """
        Return a pandas dataframe version of the `ErrorMatrix`. Do not
        use with the `with_labels` property. Labels are included without that
        and it will make this function fail.

        Parameters
        ----------
        cmap : matplotlib colormap or `True`
            If `None` (default), the returned dataframe will not be styled with
            background colors. Otherwise cell colors will be added to the error
            matrix when the data frame is viewed in Jupyter Notebook (aka
            IPython Notebook). If `True` one of two default colormaps will be
            used. First, an attempt will be made to get a colormap from seaborn.
            If seaborn is not installed, an attempt will be made to get a
            matplotlib colormap (that's a bit uglier). The use can also supply
            their own colormap instead.

        Returns
        -------
        pandas dataframe or dataframe styler
            A dataframe representation of the error matrix that looks nice in a
            Jupyter Notebook. If a cmap is applied, a `pandas.core.style.Styler`
            object will be returned. The dataframe can be accessed via the
            `.data` property of the `Styler`.
        """
        import pandas as pd
        df = pd.DataFrame(self, columns=self.categories, index=self.categories)
        df = df.replace('None',np.nan)
        if cmap is None:
            return df
        else:
            if cmap is True:
                # Try to provide a default color map
                try:
                    from seaborn import light_palette
                    cmap = light_palette('steelblue', as_cmap=True)
                except ImportError:
                    # seaborn is less common than matplotlib. I don't really
                    # want to make either one a dependency for this module.
                    import matplotlib.pyplot as plt
                    cmap = plt.cm.GnBu
            subst = df.columns.difference(['Totals','Accuracy'])
            return df.style.background_gradient(cmap=cmap,
                                                subset=(subst, subst))
示例#14
0
def shot_chart_jointplot(x, y, data=None, title="", kind="scatter", color="b", 
                         cmap=None, xlim=(-250, 250), ylim=(422.5, -47.5),
                         space=0, court_color="gray", outer_lines=False,
                         court_lw=1, flip_court=False,
                         set_size_inches=(12, 11), **kwargs):
    """
    Returns a seaborn JointGrid using sns.jointplot

    TODO: Better documentation
    """

    # If a colormap is not provided, then it is based off of the color
    if cmap is None:
        cmap = sns.light_palette(color, as_cmap=True)

    plot = sns.jointplot(x, y, data=None, stat_func=None, kind=kind, space=0,
                         color=color, cmap=cmap, **kwargs)

    plot.fig.set_size_inches(set_size_inches)


    # A joint plot has 3 Axes, the first one called ax_joint 
    # is the one we want to draw our court onto and adjust some other settings
    ax = plot.ax_joint

    if not flip_court:
        ax.set_xlim(xlim)
        ax.set_ylim(ylim)
    else:
        ax.set_xlim(xlim[::-1])
        ax.set_ylim(ylim[::-1])

    draw_court(ax, color=court_color, lw=court_lw, outer_lines=outer_lines)

    # Get rid of axis labels and tick marks
    ax.set_xlabel('')
    ax.set_ylabel('')
    ax.tick_params(labelbottom='off', labelleft='off')

    # Add a title
    ax.set_title(title, y=1.2, fontsize=18)

    return plot
示例#15
0
def posts_by_category(df): 
    sns.set_context('talk', font_scale=1.5)
    cat_counts = df.groupby('category_code').url.count()
    x = cat_counts.index
    y = cat_counts.values
    f, ax = plt.subplots(1,1, figsize=(10,8))
    ax.set_ylabel('Postings')
    sns.barplot(x,y,palette=sns.light_palette('#008080', reverse=True, n_colors=10), linewidth=0)
    ax.set_xlabel('')
    ax.set_title('Postings by Category')
    for item in ax.get_xticklabels():
        item.set_rotation(15)
    sns.despine(bottom=True, right=True, trim=True)

    percentage = [np.round((float(y_)*100/sum(y)),2) for y_ in y] 
    for i,p  in enumerate(ax.patches):
        height = p.get_height()
        ax.text(p.get_x(), p.get_height()+ 10, '{}%'.format(percentage[i]), fontsize=20)
    plt.show()
示例#16
0
 def plot_projection(self, colors = None, palette = 'husl', plot_density = False):
     if not hasattr(self, 'cluster_labels'):
         sns.set(style="white")
         g = sns.PairGrid(self.features, diag_sharey=False)
         g.map_upper(pyplot.scatter)
         g.map_diag(sns.kdeplot, lw=3)
         if plot_density:
             g.map_lower(sns.kdeplot, cmap="Blues_d")
             
     else:
     
         if colors is None:
             colors = sns.color_palette(palette, len(self.cluster_labels))
         
         df = pd.concat([self.features, self.labels], axis=1)
         hue_kws = {'cmap':[sns.light_palette(color, as_cmap=True) for color in colors]}
         g = sns.PairGrid(df, diag_sharey=False,hue='label', vars = self.features.columns, hue_kws=hue_kws)
         g.map_diag(sns.kdeplot, lw=3)
         g.map_upper(pyplot.scatter)
         if plot_density:
             g.map_lower(sns.kdeplot)
    def ClusterMap(self, label=None, fig_title='', transform='log'):
        # Use seaborn's clustermap function to plot a heatmap of relative abundances and do hiearchical clustering
        # Optionally, label the samples by the specified label (in self.abun_df, samples are in the rows)
        # Transform the values according to what's specified in transform

        # scale fontsize so labels are visible..
        sb.set(font_scale=0.6)
        cmap = sb.light_palette('navy', as_cmap=True)
        if transform == 'log':
            data = np.log(raw2abun(self.raw_df + 1))
        elif transform == 'presence_absence':
            presence_map = lambda x: 1 if x else 0
            data = self.raw_df.applymap(presence_map)
        elif transform == 'logit':
            def logit(X):
                x = np.log(X/(1-X))
                return x
            data = logit(raw2abun(self.raw_df + 1))
        else:
            data = copy(self.abun_df)

        if label:
            rows = self.abun_df.index
            row_vals = [self.meta_df.loc[smpl, label] for smpl in rows]
            row_pal = sb.cubehelix_palette(len(set(row_vals)))
            row_dict = dict(zip(map(str, set(row_vals)),row_pal))
            row_colors = pd.Series(row_vals).map(row_dict)
            fg = sb.clustermap(data, row_colors=row_colors, cmap=cmap)
        else:
            fg = sb.clustermap(data, cmap=cmap)

        if fig_title:
            if label:
                new_title = fig_title.split('.')[0] + '_' + label + fig_title.split('.')[1]
            else:
                new_title = fig_title
            fg.savefig(new_title)
            plt.close()
        sb.set(font_scale=1.0)
        return fg
示例#18
0
def topic_brand_hm(review_inf, brands, topics, data):
    ''' This function creates a heatmap of the topic review rates of all the 
        topics vs all the brands.
        
        Inputs:
                review_inf: The dictionary with the text and dataframe for 
                            each topic, the second output of 
                            get_topics_and_reviews().
                            
                brands: A list of all the unique brands that appear in the 
                        data.
                        
                topics: A list of all the topics output by 
                        get_topics_and_reviews().
                        
                data: The dataframe from which all the topics and reviews
                      were generated.
                      
        Outputs: A seaborn heatmap described above.'''
    
    df = pd.DataFrame()
    for brand in brands:
        topic_pcts = []
        for topic in topics:
            df_t = review_inf[topic]['df']
            tot = data[data['Brand']==brand]['Brand'].shape[0]
            
            if tot > 0:
                topic_pcts.append(100* df_t[df_t['Brand']==brand]['Brand'].shape[0]
                / tot)
            else:
                topic_pcts.append(0)
                              
        df[brand] = topic_pcts
    df.index = topics
    cmap = sns.light_palette((147, 100, 39), input="husl",as_cmap=True)
    return sns.heatmap(df,cmap=cmap)
示例#19
0
def sub_heatmap_plot(df, gs, title, loc, total, satuation, vmin, vmax, flag,
                     cancertype, hm):
    # plot each heatmap panel
    all_values = [i for col in df.columns for i in df[col].values]
    df = df.clip(upper=np.percentile(all_values, satuation))
    ax = plt.subplot(gs[0, loc])
    #pal = sns.light_palette(cancertype_heatmap_color(cancertype),as_cmap=True)
    pal = sns.light_palette('blue', as_cmap=True)
    if hm == 'CTCF':
        pal = sns.light_palette('red', as_cmap=True)
    cbarvmin = 0
    if loc % 3 == 2:
        ax.set_axis_off()
    elif loc == 0:
        g = sns.heatmap(df,
                        ax=ax,
                        yticklabels=False,
                        xticklabels=True,
                        cbar=True,
                        cmap=pal,
                        vmin=cbarvmin,
                        vmax=vmax,
                        cbar_kws={"shrink": 0.5})
        ax.set_ylabel('{} ChIP-seq'.format(hm), fontsize=13)
        xp = g.get_xticks()  #;print(xp)
        ax.set_xticks([xp[0], xp[-1]])
        ax.set_xticklabels(['-1kb', '1kb'], rotation=30, fontsize=13)
        ax.set_title('{}'.format(title), fontsize=14)
        ax.tick_params(axis='x',
                       direction='out',
                       length=0,
                       width=1,
                       colors='black')
        cbar = g.collections[0].colorbar
        cbar.set_clim(vmax * .15, vmax)
        cbar.remove()
    elif loc == total - 1:
        g = sns.heatmap(df,
                        ax=ax,
                        yticklabels=False,
                        xticklabels=False,
                        cbar=True,
                        cmap=pal,
                        vmin=cbarvmin,
                        vmax=vmax,
                        cbar_kws={"shrink": 0.5})
        ax.set_title('{}'.format(title), fontsize=15)
        ax.set_ylabel('')
        ax.tick_params(axis='y',
                       direction='out',
                       length=0,
                       width=1,
                       colors='black')
        cbar = g.collections[0].colorbar
        cbar.set_clim(vmax * .15, vmax)
        cbar.set_ticks([cbarvmin, vmax])
        cbar.set_ticklabels([vmin, vmax])
        cbar.ax.set_position([.9, 0.35, .8, .5])
        cbar.ax.tick_params(axis='y',
                            direction='out',
                            length=0,
                            width=1,
                            colors='black')

    else:
        g = sns.heatmap(df,
                        ax=ax,
                        yticklabels=False,
                        xticklabels=False,
                        cbar=True,
                        cmap=pal,
                        vmin=cbarvmin,
                        vmax=vmax)
        ax.set_title('{}'.format(title), fontsize=15)
        ax.set_ylabel('')
        ax.tick_params(axis='y',
                       direction='out',
                       length=0,
                       width=1,
                       colors='black')
        cbar = g.collections[0].colorbar
        cbar.set_clim(vmax * .15, vmax)
        cbar.remove()

    if hm == 'H3K27me3' and loc % 3 == 1:
        ax.text(210,
                df.shape[0] * 1.15,
                '{}'.format(df.shape[0]),
                fontsize=12,
                ha='left')

    if loc % 3 == 1:
        # ==== add additional title
        ax.text(-100, -0.22 * df.shape[0] - .3, cancertype, fontsize=15)
        ax.hlines(y=-0.2 * df.shape[0] - .3,
                  xmin=-230,
                  xmax=200,
                  clip_on=False,
                  lw=1.1)
    plt.title(f'{img_num}: {class_label}')
    y.axes.get_xaxis().set_visible(False)
    y.axes.get_yaxis().set_visible(False)
plt.show()
# plt.savefig('Class Results')

# In[29]:

imgs = [img.split('.')[0] for img in next(os.walk(ALIGNED_TEST_DIR))[2]]

# ### Tabulated Prediction Probabilities

# In[30]:

# Creates a HeatMap using the seaborn library
cm = sns.light_palette("blue", as_cmap=True)
df = pd.DataFrame.from_dict(results, orient='index', columns=imgs)
df.style.format("{:.2%}").set_caption('Confidence Values').background_gradient(
    cmap=cm)

# In[31]:
"""
Re-structures the results dictionary so that each class_label points to another dictionary {k, v}
where k = the Image_Id number and v = the confidence value
"""


def gen_results(results):
    my_dict = {}
    for cls in LABELS:
        probs = iter(results[cls])
示例#21
0
def shot_chart(x,
               y,
               kind="scatter",
               title="",
               color="b",
               cmap=None,
               xlim=(-250, 250),
               ylim=(422.5, -47.5),
               court_color="gray",
               court_lw=1,
               outer_lines=False,
               flip_court=False,
               kde_shade=True,
               gridsize=None,
               ax=None,
               despine=False,
               **kwargs):
    """
    Returns an Axes object with player shots plotted.
    Parameters
    ----------
    x, y : strings or vector
        The x and y coordinates of the shots taken. They can be passed in as
        vectors (such as a pandas Series) or as columns from the pandas
        DataFrame passed into ``data``.
    data : DataFrame, optional
        DataFrame containing shots where ``x`` and ``y`` represent the
        shot location coordinates.
    kind : { "scatter", "kde", "hex" }, optional
        The kind of shot chart to create.
    title : str, optional
        The title for the plot.
    color : matplotlib color, optional
        Color used to plot the shots
    cmap : matplotlib Colormap object or name, optional
        Colormap for the range of data values. If one isn't provided, the
        colormap is derived from the valuue passed to ``color``. Used for KDE
        and Hexbin plots.
    {x, y}lim : two-tuples, optional
        The axis limits of the plot.
    court_color : matplotlib color, optional
        The color of the court lines.
    court_lw : float, optional
        The linewidth the of the court lines.
    outer_lines : boolean, optional
        If ``True`` the out of bound lines are drawn in as a matplotlib
        Rectangle.
    flip_court : boolean, optional
        If ``True`` orients the hoop towards the bottom of the plot.  Default
        is ``False``, which orients the court where the hoop is towards the top
        of the plot.
    kde_shade : boolean, optional
        Default is ``True``, which shades in the KDE contours.
    gridsize : int, optional
        Number of hexagons in the x-direction.  The default is calculated using
        the Freedman-Diaconis method.
    ax : Axes, optional
        The Axes object to plot the court onto.
    despine : boolean, optional
        If ``True``, removes the spines.
    kwargs : key, value pairs
        Keyword arguments for matplotlib Collection properties or seaborn plots.
    Returns
    -------
     ax : Axes
        The Axes object with the shot chart plotted on it.
    """

    if ax is None:
        ax = plt.gca()

    if cmap is None:
        cmap = sns.light_palette(color, as_cmap=True)

    if not flip_court:
        ax.set_xlim(xlim)
        ax.set_ylim(ylim)
    else:
        ax.set_xlim(xlim[::-1])
        ax.set_ylim(ylim[::-1])

    ax.tick_params(labelbottom="off", labelleft="off")
    ax.set_title(title, fontsize=18)

    draw_court(ax, color=court_color, lw=court_lw, outer_lines=outer_lines)

    if kind == "scatter":
        ax.scatter(x, y, c=color, **kwargs)

    elif kind == "kde":
        sns.kdeplot(x, y, shade=kde_shade, cmap=cmap, ax=ax, **kwargs)
        ax.set_xlabel('')
        ax.set_ylabel('')

    elif kind == "hex":
        if gridsize is None:
            # Get the number of bins for hexbin using Freedman-Diaconis rule
            # This is idea was taken from seaborn, which got the calculation
            # from http://stats.stackexchange.com/questions/798/
            from seaborn.distributions import _freedman_diaconis_bins
            x_bin = _freedman_diaconis_bins(x)
            y_bin = _freedman_diaconis_bins(y)
            gridsize = int(np.mean([x_bin, y_bin]))

        ax.hexbin(x, y, gridsize=gridsize, cmap=cmap, **kwargs)

    else:
        raise ValueError("kind must be 'scatter', 'kde', or 'hex'.")

    # Set the spines to match the rest of court lines, makes outer_lines
    # somewhate unnecessary
    for spine in ax.spines:
        ax.spines[spine].set_lw(court_lw)
        ax.spines[spine].set_color(court_color)

    if despine:
        ax.spines["top"].set_visible(False)
        ax.spines["bottom"].set_visible(False)
        ax.spines["right"].set_visible(False)
        ax.spines["left"].set_visible(False)

    return ax
示例#22
0
def shot_chart_jointplot(x,
                         y,
                         data=None,
                         kind="scatter",
                         title="",
                         color="b",
                         cmap=None,
                         xlim=(-250, 250),
                         ylim=(422.5, -47.5),
                         court_color="gray",
                         court_lw=1,
                         outer_lines=False,
                         flip_court=False,
                         size=(12, 11),
                         space=0,
                         despine=False,
                         joint_kws=None,
                         marginal_kws=None,
                         **kwargs):
    """
    Returns a seaborn JointGrid using sns.jointplot
    Parameters
    ----------
    x, y : strings or vector
        The x and y coordinates of the shots taken. They can be passed in as
        vectors (such as a pandas Series) or as column names from the pandas
        DataFrame passed into ``data``.
    data : DataFrame, optional
        DataFrame containing shots where ``x`` and ``y`` represent the
        shot location coordinates.
    kind : { "scatter", "kde", "hex" }, optional
        The kind of shot chart to create.
    title : str, optional
        The title for the plot.
    color : matplotlib color, optional
        Color used to plot the shots
    cmap : matplotlib Colormap object or name, optional
        Colormap for the range of data values. If one isn't provided, the
        colormap is derived from the valuue passed to ``color``. Used for KDE
        and Hexbin joint plots.
    {x, y}lim : two-tuples, optional
        The axis limits of the plot.  The defaults represent the out of bounds
        lines and half court line.
    court_color : matplotlib color, optional
        The color of the court lines.
    court_lw : float, optional
        The linewidth the of the court lines.
    outer_lines : boolean, optional
        If ``True`` the out of bound lines are drawn in as a matplotlib
        Rectangle.
    flip_court : boolean, optional
        If ``True`` orients the hoop towards the bottom of the plot.  Default
        is ``False``, which orients the court where the hoop is towards the top
        of the plot.
    gridsize : int, optional
        Number of hexagons in the x-direction.  The default is calculated using
        the Freedman-Diaconis method.
    size : tuple, optional
        The width and height of the plot in inches.
    space : numeric, optional
        The space between the joint and marginal plots.
    {joint, marginal}_kws : dicts
        Additional kewyord arguments for joint and marginal plot components.
    kwargs : key, value pairs
        Keyword arguments for matplotlib Collection properties or seaborn plots.
    Returns
    -------
     grid : JointGrid
        The JointGrid object with the shot chart plotted on it.
   """

    # If a colormap is not provided, then it is based off of the color
    if cmap is None:
        cmap = sns.light_palette(color, as_cmap=True)

    if kind not in ["scatter", "kde", "hex"]:
        raise ValueError("kind must be 'scatter', 'kde', or 'hex'.")

    grid = sns.jointplot(x=x,
                         y=y,
                         data=data,
                         stat_func=None,
                         kind=kind,
                         space=0,
                         color=color,
                         cmap=cmap,
                         joint_kws=joint_kws,
                         marginal_kws=marginal_kws,
                         **kwargs)

    grid.fig.set_size_inches(size)

    # A joint plot has 3 Axes, the first one called ax_joint
    # is the one we want to draw our court onto and adjust some other settings
    ax = grid.ax_joint

    if not flip_court:
        ax.set_xlim(xlim)
        ax.set_ylim(ylim)
    else:
        ax.set_xlim(xlim[::-1])
        ax.set_ylim(ylim[::-1])

    draw_court(ax, color=court_color, lw=court_lw, outer_lines=outer_lines)

    # Get rid of axis labels and tick marks
    ax.set_xlabel('')
    ax.set_ylabel('')
    ax.tick_params(labelbottom='off', labelleft='off')

    # Add a title
    ax.set_title(title, y=1.2, fontsize=18)

    # Set the spines to match the rest of court lines, makes outer_lines
    # somewhate unnecessary
    for spine in ax.spines:
        ax.spines[spine].set_lw(court_lw)
        ax.spines[spine].set_color(court_color)
        # set the margin joint spines to be same as the rest of the plot
        grid.ax_marg_x.spines[spine].set_lw(court_lw)
        grid.ax_marg_x.spines[spine].set_color(court_color)
        grid.ax_marg_y.spines[spine].set_lw(court_lw)
        grid.ax_marg_y.spines[spine].set_color(court_color)

    if despine:
        ax.spines["top"].set_visible(False)
        ax.spines["bottom"].set_visible(False)
        ax.spines["right"].set_visible(False)
        ax.spines["left"].set_visible(False)

    return grid
示例#23
0
    if droplabelx and sharex and axs.ndim > 1:
        for ax in axs[:-1,:].flat:
            ax.xaxis.set_tick_params(which='both', labelbottom=False, labeltop=False)
            ax.xaxis.offsetText.set_visible(False)

    # Turn off y tick labels and offset text for all but the left most column
    if droplabely and sharey and axs.ndim > 1:
        for ax in axs[:,1:].flat:
            ax.yaxis.set_tick_params(which='both', labelleft=False, labelright=False)
            ax.yaxis.offsetText.set_visible(False)


HEATMAP_COLORMAPS = {
    'pvalue': sns.diverging_palette(h_neg = 10, h_pos = 240, l = 50, center = 'light'),
    #'pvalue': sns.light_palette("red", reverse = True),
    'error': sns.light_palette("seagreen", reverse=True),
    'error_gap': sns.diverging_palette(h_neg = 10, h_pos = 240, l = 50, center = 'light'),
    'error_relgap': sns.diverging_palette(h_neg = 10, h_pos = 240, l = 50, center = 'light'),
    }


def plot_group_heatmap(data, groups, p, ax, cmap, metric_type = 'error_gap', stat_field = 'train'):

    title_dict = {
        'error': 'Error',
        'error_gap': 'Error Gap',
        'pvalue': 'Pr(Envyfree)',
        'dcp_root_error': 'Pooled Error',
        'dcp_root_error_gap': 'Rationality Gap',
        'dcp_root_pvalue': 'Pr(Rational)',
        'dcp_root_error_relgap': 'Rationality Gap (Relative)',
示例#24
0
def one_plot(opt):
    sns.set(style="whitegrid", palette="pastel", color_codes=True)
    # Font settings for plot
    import matplotlib
    # matplotlib.rc('font', family='sans-serif') 
    # matplotlib.rc('font', serif='Helvetica Neue') 
    # matplotlib.rc('text', usetex='false') 
    # matplotlib.rcParams['font.family'] = 'cursive'

    # Load dictionary
    lanecheck_dict = utils.load_pickle(opt.lanecheck_path)

    # Lanecheck out
    sub_out     = []
    vcpt_out    = []
    vid_out     = []
    reg_out     = []
    regtopk_out = []

    # Check what out features are needed
    sub_flag    = True
    vcpt_flag   = True
    vid_flag    = True
    reg_flag    = True
    regtopk_flag= True
    check = random.choice(list(lanecheck_dict.values()))
    if check.get('sub_out') is None:
        sub_flag = False
    if check.get('vcpt_out') is None:
        vcpt_flag = False
    if check.get('vid_out') is None:
        vid_flag = False
    if check.get('reg_out') is None:
        reg_flag = False
    if check.get('regtopk_out') is None:
        regtopk_flag = False    

    # Iterate through the lanecheck items
    del lanecheck_dict['acc']
    for qid, q_dict in lanecheck_dict.items():
        if sub_flag:
            sub_out.append( q_dict['sub_out'] )
        if vcpt_flag:
            vcpt_out.append( q_dict['vcpt_out'] )
        if vid_flag:    
            vid_out.append( q_dict['vid_out'] )
        if reg_flag:    
            reg_out.append( q_dict['reg_out'] )
        if regtopk_flag:
            regtopk_out.append( q_dict['regtopk_out'] )
    if sub_flag:
        sub_out = np.stack(sub_out)
    if vcpt_flag:
        vcpt_out = np.stack(vcpt_out)
    if vid_flag: 
        vid_out = np.stack(vid_out)
    if reg_flag:
        reg_out = np.stack(reg_out)
    if regtopk_flag:
        regtopk_out = np.stack(regtopk_out)

    import pandas as pd

    # Plot settings
    pal_tp_fp = {"True Positive":sns.light_palette("green")[1], "False Positive":sns.light_palette("red")[1]}
    pal_tn_fn = {"True Negative":sns.light_palette("red")[1], "False Negative":sns.light_palette("orange")[1]}
    plot_no = 1

    sns.set(font_scale=3.0)
    sns.set_style("whitegrid")
    fig, ax = plt.subplots()
    x_labels = []
    if sub_flag:
        sub_out = [ ('Subtitles', value, aa[5], aa[6], confusion_matrix_tn_fn(a_idx, aa[5], aa[6])) for aa in sub_out for a_idx, value in enumerate(aa[:5])  ]
        sub_out = [ element for element in sub_out if element[4] != 'Ignore' ]
        x_labels.append('Subtitles')
    if vcpt_flag:
        vcpt_out = [ ('Visual Concepts', value, aa[5], aa[6], confusion_matrix_tn_fn(a_idx, aa[5], aa[6])) for aa in vcpt_out for a_idx, value in enumerate(aa[:5])  ]
        vcpt_out = [ element for element in vcpt_out if element[4] != 'Ignore' ]
        x_labels.append('Visual Concepts')
    if vid_flag:
        vid_out = [ ('ImageNet', value, aa[5], aa[6], confusion_matrix_tn_fn(a_idx, aa[5], aa[6])) for aa in vid_out for a_idx, value in enumerate(aa[:5])  ]
        vid_out = [ element for element in vid_out if element[4] != 'Ignore' ]
        x_labels.append('ImageNet')
    if regtopk_flag:
        regtopk_out = [ ('Regional Features', value, aa[5], aa[6], confusion_matrix_tn_fn(a_idx, aa[5], aa[6])) for aa in regtopk_out for a_idx, value in enumerate(aa[:5])  ]
        regtopk_out = [ element for element in regtopk_out if element[4] != 'Ignore' ]
        x_labels.append('Regional Features')
    x_labels.append('Nothing inparticular')
    #plt.xticks([])
    data = []
    data += [('', 38, 1, 1, "True Negative")]
    data += [('1', -7, 1, 1, "True Negative")]
    data += sub_out
    data += vcpt_out
    data += vid_out
    data += regtopk_out

    maxx = 0
    minn = 0
    for dtuple in data:
        if maxx < dtuple[1]:
            maxx = dtuple[1]
        if minn > dtuple[1]:
            minn = dtuple[1]
    print(maxx)
    print(minn)

    # data += [('', 38.594997, 1, 1, "False Positive")]
    #data += [('1', -5.7718792, 1, 1, "False Positive")]
    data = pd.DataFrame(data, columns=['', 'Vote Contribution', 'ground_truth', 'prediction', 'Answer Type'])
    sns.violinplot(data=data, palette=pal_tn_fn, inner="quart", linewidth=2.5, hue='Answer Type', x='', y='Vote Contribution', split=True, legend=False, legend_out=True)
    plt.title('SVIR Trained Model')
    plt.show()
def heatmap_plot_zscore_ivscc(df_zscore_features, df_all, output_dir, title=None):
    print "heatmap plot:ivscc"
    sns.set_context("talk", font_scale=1.4)
    # Create a custom palette for dendrite_type colors
    dendrite_types = [np.nan, 'aspiny', 'sparsely spiny', 'spiny']
    # dendrite_type_pal = sns.color_palette("coolwarm", len(dendrite_types))
    dendrite_type_pal = sns.color_palette(["gray","black","purple","red"])
    dendrite_type_lut = dict(zip(dendrite_types, dendrite_type_pal))
    dendrite_type_colors = df_all['dendrite_type'].map(dendrite_type_lut)


    # Create a custom palette for creline colors
    cre_lines = np.unique(df_all['cre_line'])
    #print cre_lines
    #cre_lines = ['Pvalb-IRES-Cre','Sst-IRES-Cre','Gad2-IRES-Cre', 'Htr3a-Cre_NO152',
    #             'Nr5a1-Cre', 'Ntsr1-Cre','Rbp4-Cre_KL100' ,'Rorb-IRES2-Cre-D', 'Scnn1a-Tg2-Cre',
    #             'Scnn1a-Tg3-Cre','Slc17a6-IRES-Cre','Cux2-CreERT2']

    cre_line_pal = sns.color_palette("BrBG", len(cre_lines))

    cre_line_lut = dict(zip(cre_lines, cre_line_pal))  # map creline type to color
    cre_line_colors = df_all['cre_line'].map(cre_line_lut)


    layers = np.unique(df_all['layer'])
    layer_pal = sns.light_palette("black", len(layers))
    layer_lut = dict(zip(layers, layer_pal))
    layer_colors = df_all['layer'].map(layer_lut)

    # # only if types are available
    # types = np.unique(df_all['types'])
    # #reorder
    # types = ['NGC','multipolar','symm', 'bitufted','bipolar','tripod', 'Martinotti','cortico-cortical', 'cortico-thal','non-tufted', 'short-thick-tufted', 'tufted','thick-tufted']
    # type_pal = sns.color_palette("coolwarm", len(types))#  sns.diverging_palette(220, 20, n=len(types))# sns.color_palette("husl", len(types))
    # type_lut = dict(zip(types, type_pal))
    # type_colors = df_all['types'].map(type_lut)


    # Create a custom colormap for the heatmap values
    #cmap = sns.diverging_palette(240, 10, as_cmap=True)

    linkage = hierarchy.linkage(df_zscore_features, method='ward', metric='euclidean')
    print "linkage genearted"
    data = df_zscore_features.transpose()
    row_linkage = hierarchy.linkage(data, method='ward', metric='euclidean')
    feature_order = hierarchy.leaves_list(row_linkage)


    matchIndex = [data.index[x] for x in feature_order]

    data = data.reindex(matchIndex)

    print "plot heatmap"
    g = sns.clustermap(data, row_cluster = False, col_linkage=linkage, method='ward', metric='euclidean',
                       linewidths = 0.0,col_colors = [cre_line_colors,layer_colors],
                       cmap = sns.cubehelix_palette(light=1, as_cmap=True),figsize=(60,15))

    pl.setp(g.ax_heatmap.xaxis.get_majorticklabels(), rotation=90 )
    pl.setp(g.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
    pl.subplots_adjust(left=0.1, bottom=0.5, right=0.8, top=0.95)  # !!!!!

    #pl.tight_layout( fig, h_pad=20.0, w_pad=20.0)

    if title:
        pl.title(title)
    location ="best"
    num_cols=1
    # Legend for row and col colors

    for label in cre_lines:
         g.ax_row_dendrogram.bar(0, 0, color=cre_line_lut[label], label=label, linewidth=0.0)
         g.ax_row_dendrogram.legend(loc=location, ncol=num_cols,borderpad=0)

    for i in range(3):
        g.ax_row_dendrogram.bar(0, 0, color = "white", label=" ", linewidth=0)
        g.ax_row_dendrogram.legend(loc=location, ncol=num_cols, borderpad=0.0)

    for label in layers:
        g.ax_row_dendrogram.bar(0, 0, color=layer_lut[label], label=label, linewidth=0)
        g.ax_row_dendrogram.legend(loc=location, ncol=1,borderpad=0.0)

    #
    # for label in types:
    #      g.ax_row_dendrogram.bar(0, 0, color=type_lut[label], label=label,linewidth=0)
    #      g.ax_row_dendrogram.legend(loc=location, ncol=num_cols,borderpad=0.0)
    #
    #
    # g.ax_row_dendrogram.bar(0, 0, color = "white", label=" ", linewidth=0)
    # g.ax_row_dendrogram.legend(loc=location, ncol=num_cols, borderpad=0.0)


    # for label in dendrite_types:
    #     g.ax_row_dendrogram.bar(0, 0, color = dendrite_type_lut[label], label=label, linewidth=0)
    #     g.ax_row_dendrogram.legend(loc=location, ncol= num_cols, borderpad=0.0)


    filename = output_dir + '/zscore_feature_heatmap.png'
    pl.savefig(filename, dpi=300)
    #pl.show()
    print("save zscore matrix heatmap figure to :" + filename)
    pl.close()
    return linkage
示例#26
0
def projection_pca(ds_d, ds_l, dest_path, pca=None, colors=None, do_3d=True,
                     target_names=None):
    """

    Parameters
    ----------
    ds_d : np.array
        data in feature space, e.g. (#data, #feature)
    ds_l :
        sparse labels, i.e. (#data, 1)
    dest_path: str
        file name of plot
    pca: PCA
        prefitted PCA object to use to prject data of ds_d
    """
    log_proc.info("Starting pca visualisation.")
    # pca vis
    paper_rc = {'lines.linewidth': 1, 'lines.markersize': 1}
    sns.set_context(rc=paper_rc)
    if ds_l.ndim == 2:
        ds_l = ds_l[:, 0]
    nb_labels = np.unique(ds_l)
    if pca is None:
        pca = PCA(3, whiten=True, random_state=0)
        pca.fit(ds_d)
    res = pca.transform(ds_d)
    # density plot 1st and 2nd PC
    plt.figure()
    plt.ylabel('$Z_2$', fontsize=15)
    plt.xlabel('$Z_1$', fontsize=15)
    if colors is None:
        # colors = ["r", "g", "b", "y", "k"]
        if len(target_names) == 5:
            colors = ["r", "g", "b", "y", "k"]
        else:
            cmap = plt.cm.get_cmap("Accent", len(target_names))
            colors = [cmap(i) for i in range(len(target_names))]
    if target_names is None:
        target_names = ["%d" % i for i in nb_labels]
    for i in nb_labels:
        cur_pal = sns.light_palette(colors[i], as_cmap=True)
        d0, d1 = res[ds_l == i][:, 0], res[ds_l == i][:, 1]
        ax = sns.kdeplot(d0, d1, shade=False, cmap=cur_pal,
                         alpha=0.6, shade_lowest=False, gridsize=100)
        ax.patch.set_facecolor('white')
        ax.collections[0].set_alpha(0)
        plt.scatter(res[ds_l == i][:, 0], res[ds_l == i][:, 1],
                                s=1.2, lw=0, alpha=0.5, color=colors[i], label=target_names[i])
    handles = []
    for ii in range(len(target_names)):
        handles.append(mpatches.Patch(color=colors[ii], label=target_names[ii]))
    plt.legend(handles=handles, loc="best")
    plt.savefig(dest_path, dpi=300)
    plt.close()
    if do_3d:
        # density plot 1st and 3rd PC
        plt.figure()
        plt.ylabel('$Z_3$', fontsize=15)
        plt.xlabel('$Z_1$', fontsize=15)
        if colors is None:
            colors = ["r", "g", "b", "y", "k"]
        if target_names is None:
            target_names = ["%d" % i for i in nb_labels]
        for i in nb_labels:
            cur_pal = sns.light_palette(colors[i], as_cmap=True)
            d0, d2 = res[ds_l == i][:, 0], res[ds_l == i][:, 2]
            ax = sns.kdeplot(d0, d2, shade=False, cmap=cur_pal,
                             alpha=0.6, shade_lowest=False, gridsize=100)
            ax.patch.set_facecolor('white')
            ax.collections[0].set_alpha(0)
            plt.scatter(res[ds_l == i][:, 0], res[ds_l == i][:, 2],
                                    s=1.2, lw=0, alpha=0.5, color=colors[i], label=target_names[i])
        handles = []
        for ii in range(len(target_names)):
            handles.append(mpatches.Patch(color=colors[ii], label=target_names[ii]))
        plt.legend(handles=handles, loc="best")
        plt.savefig(os.path.splitext(dest_path)[0] + "_2.png", dpi=300)
        plt.close()

        # density plot 2nd and 3rd PC
        plt.figure()
        plt.ylabel('$Z_3$', fontsize=15)
        plt.xlabel('$Z_2$', fontsize=15)
        if colors is None:
            colors = ["r", "g", "b", "y", "k"]
        if target_names is None:
            target_names = ["%d" % i for i in nb_labels]
        for i in nb_labels:
            cur_pal = sns.light_palette(colors[i], as_cmap=True)
            d1, d2 = res[ds_l == i][:, 1], res[ds_l == i][:, 2]
            ax = sns.kdeplot(d1, d2, shade=False, cmap=cur_pal,
                             alpha=0.6, shade_lowest=False, gridsize=100)
            ax.patch.set_facecolor('white')
            ax.collections[0].set_alpha(0)
            plt.scatter(res[ds_l == i][:, 1], res[ds_l == i][:, 2],
                                    s=1.2, lw=0, alpha=0.5, color=colors[i], label=target_names[i])
        handles = []
        for ii in range(len(target_names)):
            handles.append(mpatches.Patch(color=colors[ii], label=target_names[ii]))
        plt.legend(handles=handles, loc="best")
        plt.savefig(os.path.splitext(dest_path)[0] + "_3.png", dpi=300)
        plt.close()
    return pca
def heatmap(df, color='green'):
    return df.fillna(0).style.background_gradient(
        cmap=sns.light_palette(color, as_cmap=True))
示例#28
0
    def hierarchical_clustering(self,
                                X=None,
                                markers=None,
                                filename_ext='',
                                export=True,
                                indices=None,
                                load_clustering=False,
                                method='ward',
                                distance='euclidean',
                                cluster_fusion=False):

        Xs = X.copy()

        if markers is None:
            markers = self.markers
        else:
            marker_indices = np.array([self.markers.index(x) for x in markers])
            Xs = Xs[:, marker_indices]

        columns = markers
        df = pd.DataFrame(Xs, columns=columns)

        # color palette for clusters
        col_pal = sns.color_palette("husl", self.nb_clusters)

        # perform clustering
        if not load_clustering:
            full_res = None
            cluster_res = hierarchy.linkage(Xs, method=method, metric=distance)
        else:
            filename = os.path.join(
                self.cluster_folder,
                'cluster_assignment%s.pickle' % filename_ext)
            print('loading clustering from %s' % filename)
            fp = open(filename, 'rb')
            full_res = pickle.load(fp)
            fp.close()
            cluster_res = full_res['linkage']

        # cut the tree
        ct = hierarchy.cut_tree(cluster_res, n_clusters=self.nb_clusters)
        cluster_assignment = ct.T[0]
        col_vec = np.array(col_pal)[ct.T[0]]

        # main result
        res = dict(
            zip(range(self.nb_clusters), [
                np.where(cluster_assignment == i)
                for i in range(self.nb_clusters)
            ]))

        if cluster_fusion:
            indices = full_res['indices']
            if indices is None:
                raise ValueError(
                    'It is not possible to make cluster fusions and downsampling.'
                )
            fused_clusters = {}
            fusion_info = self.settings.cluster_fusion[self.settings.dataset]
            cluster_names = list(fusion_info.keys())

            fused_cluster_assignment_indices = np.zeros(
                self.nb_clusters, dtype=np.uint8) + len(cluster_names)
            for k, population_name in enumerate(cluster_names):
                fused_clusters[k] = (np.hstack(
                    [res[i][0] for i in fusion_info[population_name]]), )
                fused_cluster_assignment_indices[
                    fusion_info[population_name]] = k

            cluster_assignment = fused_cluster_assignment_indices[
                cluster_assignment]
            col_pal = sns.color_palette("husl", len(fusion_info)) + [(1, 1, 1)]
            col_vec = np.array(col_pal)[cluster_assignment]
            res = fused_clusters
            res[len(cluster_names)] = np.where(
                cluster_assignment == len(cluster_names))
            filename_ext = '%s_cluster_fusion' % filename_ext

        cmap = sns.light_palette("navy", as_cmap=True)

        print('starting clustering/heatmap generation ... ')
        g = sns.clustermap(df,
                           row_linkage=cluster_res,
                           robust=True,
                           cmap=cmap,
                           col_cluster=False,
                           yticklabels=False,
                           row_colors=col_vec)
        print('clustering/heatmap generation succeeded ... ')

        print('starting legend ... ')
        indices_ordered = g.dendrogram_row.reordered_ind
        ordered_cluster_labels = cluster_assignment[indices_ordered]
        cluster_order = list(dict.fromkeys(ordered_cluster_labels))

        # legend for class colors
        for label in cluster_order:  #range(self.nb_clusters):
            if not cluster_fusion:
                g.ax_col_dendrogram.bar(0,
                                        0,
                                        color=col_pal[label],
                                        label='%i(%i)' %
                                        (label, len(res[label][0])),
                                        linewidth=0)
            else:
                if label < len(cluster_names):
                    cluster_name = cluster_names[label]
                else:
                    cluster_name = 'not assigned'
                g.ax_col_dendrogram.bar(0,
                                        0,
                                        color=col_pal[label],
                                        label='%s(%i)' %
                                        (cluster_name, len(res[label][0])),
                                        linewidth=0)
        if cluster_fusion:
            legend_ncol = 3
        else:
            legend_ncol = 5

        lgd = g.ax_col_dendrogram.legend(loc="center", ncol=legend_ncol)

        # to avoid and overlap of this HUGE legend with the heatmap.
        dendro_col = g.ax_col_dendrogram.get_position()
        standard_height = 0.18
        #new_height = max(dendro_col.height / 4.0 * (self.nb_clusters // 5) - dendro_col.height, dendro_col.height)
        new_height = max(
            standard_height / 4.0 * (len(res) // legend_ncol) -
            standard_height, standard_height)
        g.ax_col_dendrogram.set_position(
            [dendro_col.x0, dendro_col.y0, dendro_col.width, new_height])

        print('saving figure ... ')
        g.savefig(
            os.path.join(self.cluster_folder,
                         'clustering%s.png' % filename_ext))

        full_res = {
            'res': res,
            'colors': col_pal,
            'linkage': cluster_res,
            'indices': indices
        }
        if export and not load_clustering:
            print('exporting results ... ')
            filename = os.path.join(
                self.cluster_folder,
                'cluster_assignment%s.pickle' % filename_ext)
            fp = open(filename, 'wb')
            pickle.dump(full_res, fp)
            fp.close()

        # save dendrogram
        #fig = plt.figure(figsize=(Xs.shape[0] / 10, 8))
        #dn = hierarchy.dendrogram(cluster_res)
        #plt.savefig(os.path.join(self.cluster_folder, 'dendrogram_ward%s.pdf' % filename_ext))
        #plt.close('all')

        return full_res
示例#29
0
    def plot_position(self,
        ref_id:str,
        pos:int=None,
        split_samples:bool=False,
        figsize:tuple=(30,10),
        palette:str="Set2",
        plot_style:str="ggplot",
        xlim:tuple=(None,None),
        ylim:tuple=(None,None),
        alpha:float=0.3,
        pointSize:int=20,
        scatter:bool=True,
        kde:bool=True,
        model:bool=False,
        gmm_levels:int=50):
        """
        Plot the dwell time and median intensity at the given position as a scatter plot.
        * ref_id
            Valid reference id name in the database
        * pos
            Position of interest
        * split_samples
            If True, samples for a same condition are represented separately. If False, they are merged per condition
        * figsize
            Length and heigh of the output plot
        * palette
            Colormap. See https://matplotlib.org/users/colormaps.html, https://matplotlib.org/examples/color/named_colors.html
        * plot_style
            Matplotlib plotting style. See https://matplotlib.org/users/style_sheets.html
        * xlim
            A tuple of explicit limits for the x axis
        * ylim
            A tuple of explicit limits for the y axis
        * kde
            plot the KDE of the intensity/dwell bivarariate distributions in the two samples
        * scatter
            if True, plot the individual data points
        * pointSize
            int specifying the point size for the scatter plot
        * model
            If true, plot the GMM density estimate
        * gmm_levels
            number of contour lines to use for the GMM countour plot
        """
        # Extract data for ref_id
        ref_data = self[ref_id]

        # Check that position is valid
        if not isinstance(pos, int):
            raise NanocomporeError("pos must be a single position")
        if pos > len(ref_data):
            raise NanocomporeError("Position out of range")
        # if not ref_data[pos]['data']["intensity"] or not ref_data[pos]['data']["dwell"]:
        #     raise NanocomporeError("No data found for selected position")

        # Extract data from database if position in db
        ref_kmer = ref_data[pos]['ref_kmer']
        data = ref_data[pos]['data']

        # Sample colors in palette
        col_gen = self.__color_generator(palette=palette, n=self._metadata["n_samples"] if split_samples else 2)

        # Collect and transform data in dict
        plot_data_dict = OrderedDict()
        for cond_lab, cond_dict in ref_data[pos]["data"].items():
            if split_samples:
                for samp_lab, sample_val in cond_dict.items():
                    plot_data_dict["{}_{}".format(cond_lab, samp_lab)] = {
                        "intensity":scale(sample_val["intensity"]),
                        "dwell":scale(np.log10(sample_val["dwell"])),
                        "color":next(col_gen)}
            else:
                intensity_list = []
                dwell_list = []
                for samp_lab, sample_val in cond_dict.items():
                    intensity_list.append(sample_val["intensity"])
                    dwell_list.append(sample_val["dwell"])
                plot_data_dict[cond_lab] = {
                    "intensity":scale(np.concatenate(intensity_list)),
                    "dwell":scale(np.log10(np.concatenate(dwell_list))),
                    "color":next(col_gen)}

        # Add GMM model if required and available
        if model and 'txComp' in ref_data[pos] and 'GMM_model' in ref_data[pos]['txComp']:
            model = ref_data[pos]['txComp']['GMM_model']['model']
            if not isinstance(model, GaussianMixture):
                raise NanocomporeError("The GMM_model slot for this position is not an instance of the GaussianMixture class")
            condition_labels = tuple(data.keys())
            global_intensity = scale(np.concatenate(([v['intensity'] for v in data[condition_labels[0]].values()]+[v['intensity'] for v in data[condition_labels[1]].values()]), axis=None))
            global_dwell = scale(np.log10(np.concatenate(([v['dwell'] for v in data[condition_labels[0]].values()]+[v['dwell'] for v in data[condition_labels[1]].values()]), axis=None)))
            x = np.linspace(min(global_intensity), max(global_intensity), num=1000)
            y = np.linspace(min(global_dwell), max(global_dwell), num=1000)
            X, Y = np.meshgrid(x, y)
            XX = np.array([X.ravel(), Y.ravel()]).T
            Z = -model.score_samples(XX)
            Z = Z.reshape(X.shape)
        else:
            model = None

        # plot collected data
        with pl.style.context(plot_style):
            fig, ax = pl.subplots(figsize=figsize)

            for label, d in plot_data_dict.items():
                if kde:
                    _ = sns.kdeplot(
                        data=d["intensity"],
                        data2=d["dwell"],
                        cmap=sns.light_palette(d["color"], as_cmap=True),
                        ax=ax,
                        clip=((min(d["intensity"]), max(d["intensity"])), (min(d["dwell"]),max(d["dwell"]))))
                if scatter:
                    _ = ax.scatter(
                        x=d["intensity"],
                        y=d["dwell"],
                        color=d["color"],
                        label=label,
                        alpha=alpha,
                        s=pointSize)
            if model:
                _ = ax.contour(X, Y, Z, levels=gmm_levels, alpha=alpha, colors="black")

            # Adjust display
            _ = ax.set_title("%s\n%s (%s)"%(ref_id,pos, ref_kmer))
            _ = ax.set_ylabel("log10 (Dwell Time)")
            _ = ax.set_xlabel("Median Intensity")
            _ = ax.set_xlim(xlim)
            _ = ax.set_ylim(ylim)
            _ = ax.legend()
            pl.tight_layout()

            return(fig, ax)
示例#30
0
def plot_series_statistics(observed=None,
                           expected=None,
                           total_stdev=None,
                           explained_stdev=None,
                           color_set='Set2',
                           xscale="linear",
                           yscale="linear",
                           xlabel="feature",
                           ylabel="value",
                           y_cutoff=None,
                           sort_by='expected',
                           sort_ascending=True,
                           despine=True,
                           legend_enable=True,
                           legend_title=None,
                           legend_loc='best',
                           alpha=None,
                           markersize=1.0,
                           linewdith=1.2,
                           fontsize=8,
                           ax=None,
                           title=None,
                           return_handles=False,
                           return_indices=False):
  """ This function can plot 2 comparable series, and the
  scale are represented in 2 y-axes (major axis - left) and
  the right one


  Parameters
  ----------
  xcale, yscale : {"linear", "log", "symlog", "logit", ...}
      text or instance in `matplotlib.scale`

  despine : bool (default: True)
      if True, remove the top and right spines from plot,
      otherwise, only remove the top spine

  Example
  -------
  >>> import numpy as np
  >>> from matplotlib import pyplot as plt
  >>> np.random.seed(1234)
  >>> x = np.random.randn(8000)
  >>> y = np.random.randn(8000)
  ...
  >>> z = np.random.rand(8000) + 3
  >>> w = np.random.rand(8000) + 3
  ...
  >>> ax, handles1 = V.plot_series_statistics(observed=x, expected=y,
  ...                                        explained_stdev=np.std(x),
  ...                                        total_stdev=np.std(y),
  ...                                        color_set='Set1',
  ...                                        legend_enable=False, legend_title="Series_1",
  ...                                        return_handles=True)
  >>> _, handles2 = V.plot_series_statistics(observed=z, expected=w,
  ...                                        explained_stdev=np.std(z),
  ...                                        total_stdev=np.std(w),
  ...                                        color_set='Set2',
  ...                                        legend_enable=False, legend_title="Series_2",
  ...                                        return_handles=True,
  ...                                        ax=ax.twinx(), alpha=0.2)
  >>> plt.legend(handles=handles1 + handles2, loc='best', fontsize=8)
  """
  import seaborn
  import matplotlib

  ax = to_axis2D(ax)
  observed, expected, total_stdev, explained_stdev = _preprocess_series(
      observed, expected, total_stdev, explained_stdev)
  # ====== color palette ====== #
  if isinstance(color_set, (tuple, list)):
    observed_color, expected_color, \
    expected_total_standard_deviations_color, \
    expected_explained_standard_deviations_color = color_set
  else:
    standard_palette = seaborn.color_palette(color_set, 8)
    observed_color = standard_palette[0]
    expected_palette = seaborn.light_palette(standard_palette[1], 5)
    expected_color = expected_palette[-1]
    expected_total_standard_deviations_color = expected_palette[1]
    expected_explained_standard_deviations_color = expected_palette[3]
  # ====== prepare ====== #
  sort_indices = _get_sort_indices(observed, expected, sort_by, sort_ascending)
  # ====== plotting expected and observed ====== #
  indices = np.arange(
      len(observed) if observed is not None else len(expected)) + 1
  handles = []
  # ====== series title ====== #
  if legend_title is not None:
    _, = ax.plot([],
                 marker='None',
                 linestyle='None',
                 label="$%s$" % legend_title)
    handles.append(_)
  # ====== plotting expected and observed ====== #
  if observed is not None:
    _, = ax.plot(indices,
                 observed[sort_indices],
                 label="Observations",
                 color=observed_color,
                 linestyle="",
                 marker="o",
                 zorder=2,
                 markersize=markersize)
    handles.append(_)
  if expected is not None:
    _, = ax.plot(indices,
                 expected[sort_indices],
                 label="Expectation",
                 color=expected_color,
                 linestyle="-",
                 marker="",
                 zorder=3,
                 linewidth=linewdith)
    handles.append(_)
  # ====== plotting stdev ====== #
  if total_stdev is not None:
    lower = expected - total_stdev
    upper = expected + total_stdev
    ax.fill_between(
        indices,
        lower[sort_indices],
        upper[sort_indices],
        color=expected_total_standard_deviations_color,
        zorder=0,
        alpha=alpha,
    )
    _ = matplotlib.patches.Patch(label="Stdev(Total)",
                                 color=expected_total_standard_deviations_color)
    handles.append(_)
  if explained_stdev is not None:
    lower = expected - explained_stdev
    upper = expected + explained_stdev
    ax.fill_between(
        indices,
        lower[sort_indices],
        upper[sort_indices],
        color=expected_explained_standard_deviations_color,
        zorder=1,
        alpha=alpha,
    )
    _ = matplotlib.patches.Patch(
        label="Stdev(Explained)",
        color=expected_explained_standard_deviations_color)
    handles.append(_)
  # ====== legend ====== #
  if legend_enable:
    ax.legend(handles=handles, loc=legend_loc, fontsize=fontsize)
  # ====== adjusting ====== #
  if bool(despine):
    seaborn.despine(top=True, right=True)
  else:
    seaborn.despine(top=True, right=False)
  ax.set_yscale(yscale, nonposy="clip")
  ax.set_ylabel('[%s]%s' % (yscale, ylabel), fontsize=fontsize)
  ax.set_xscale(xscale)
  ax.set_xlabel('[%s]%s%s' %
                (xscale, xlabel, ' (sorted by "%s")' %
                 str(sort_by).lower() if sort_by is not None else ''),
                fontsize=fontsize)
  # ====== set y-cutoff ====== #
  y_min, y_max = ax.get_ylim()
  if y_cutoff is not None:
    if yscale == "linear":
      y_max = y_cutoff
    elif yscale == "log":
      y_min = y_cutoff
  ax.set_ylim(y_min, y_max)
  ax.tick_params(axis='both', labelsize=fontsize)
  # ====== title ====== #
  if title is not None:
    ax.set_title(title, fontsize=fontsize, fontweight='bold')
  ret = [ax]
  if return_handles:
    ret.append(handles)
  if return_indices:
    ret.append(sort_indices)
  return ax if len(ret) == 1 else tuple(ret)
示例#31
0
from dataclasses import dataclass, field

import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

GREEN = "#679436"
BLUE = "#4F6DB8"
BLUE_CMAP = sns.light_palette(BLUE, n_colors=10, reverse=False, as_cmap=False)
CMAP = BLUE_CMAP

OUTPUTS = [
    "System cost (€)", "Solar (MW)", "Wind (MW)", "Bioenergy (MW)",
    "Storage (MW)", "Storage (MWh)"
]
DIFF_OUTPUTS = [
    "System cost", "Total supply\ncapacity", "Total balancing\ncapacity"
]

ROW_INDEX = [
    "y-continental-scale-cost-eur", "y-national-scale-cost-eur",
    "y-cost-diff-eur", "y-cost-diff-relative", "y-supply-diff-relative",
    "y-wind-diff-relative", "y-balancing-diff-relative",
    "y-continental-scale-pv-gw", "y-national-scale-pv-gw",
    "y-continental-scale-wind-gw", "y-national-scale-wind-gw",
    "y-continental-scale-hydro-gw", "y-national-scale-hydro-gw",
    "y-continental-scale-biofuel-gw", "y-national-scale-biofuel-gw",
    "y-continental-scale-storage-gw", "y-national-scale-storage-gw",
    "y-continental-scale-storage-gwh", "y-national-scale-storage-gwh",
    "y-continental-scale-transmission-gwkm", "y-regional-scale-cost-eur",
    "y-regional-scale-pv-gw", "y-regional-scale-wind-gw",
示例#32
0
def set_legend(count_df,
               ax,
               color_species=[],
               color_gradient='red',
               size='xx-small'):
    '''
    Function that plot the legend of the headmap and barplot on the bottom left

    :param count_df: Number of the genomes for each phyla order in the same order as the heatmap
    :type: pandas.DataFrame
    :param ax: The axe on which to plot the sub figure
    :type: matplotlib.axes.Axes  
    :param color_species: List of the color for the group of phyla used in the barplot
    :type: list of rgb color
    :param color_gradient: color for the gradient used in the heatmap
    :type: color in str, rgb or hex
    '''

    axins = inset_axes(
        ax,
        width="50%",  # width = 50% of parent_bbox width
        height="15%",  # height : 15%
        loc='upper center')

    axins.set_title(label='Colour key (% of genomes with at least one genes)',
                    fontdict={'fontsize': size})

    # do the gradient legend oan the first ax
    cmap = sns.light_palette(color_gradient, as_cmap=True)

    cbar = plt.colorbar(
        ScalarMappable(cmap=cmap),
        cax=axins,
        orientation='horizontal',
    )

    cbar.ax.tick_params(labelsize=size)

    cbar.set_ticks([0, 0.25, 0.5, 0.75, 1])
    cbar.set_ticklabels(["0", "25", "50", "75", '100'])

    ax.tick_params(
        axis='both',
        which='both',
        left=False,
        bottom=False,
        labelleft=False,
        labelbottom=False,
    )

    sns.despine(ax=ax, left=True, bottom=True)

    # ax.set_title('Colour key (% of\ngenomes with at least one genes)', size = 'x-small')

    # Do the square on the second
    mini_tab = pd.DataFrame(0,
                            index=['Proteobacteria (𝛼, 𝛽, 𝛾)', 'Rest'],
                            columns=["Count"])
    mini_tab.loc['Proteobacteria (𝛼, 𝛽, 𝛾)'] = count_df.loc[
        'Gammaproteobacteria'] + count_df.loc[
            'Betaproteobacteria'] + count_df.loc['Alphaproteobacteria']
    mini_tab.loc['Rest'] = count_df.sum(
    ) - mini_tab.loc['Proteobacteria (𝛼, 𝛽, 𝛾)']

    if color_species == []:
        color = [(0.65098041296005249, 0.80784314870834351,
                  0.89019608497619629, 1.0),
                 (0.3997693305214246, 0.6478123867044262, 0.80273742044673246,
                  1.0)]
    else:
        color = color_species

    legend = [
        r'Proteobacteria ($\alpha$, $\beta$, $\gamma$) ({} genomes)'.format(
            int(mini_tab.loc['Proteobacteria (𝛼, 𝛽, 𝛾)'].values)),
        'Rest of the dataset ({} genomes)'.format(
            int(mini_tab.loc['Rest'].values))
    ]

    handles = [
        mpatches.Patch(color=color[i], label=legend[i]) for i in range(2)
    ]

    ax.legend(handles=handles,
              frameon=False,
              fontsize=size,
              loc='lower center')

    return
              for k in keys])
X = np.transpose(X, (-1, 1, 2, 0))
X = moving_average(X, ma_window_size, axis=2)

if 3 in to_plot:
    fig, axes = plt.subplots(2, 2, figsize=(16, 12), sharex=False,
                             sharey=False)
    axes = axes.ravel()
    n_yticks = [0, 200, 400, 700]
    for i, (key, color) in enumerate(zip(keys, colors_context)):
        ax = axes[i]
        ax.set_ylim(-100, 700)
        ax.text(50, 710, 'abcd'[i], weight='bold', fontsize=24)
        for degree, (x, subcolor) in enumerate(
                zip(X,
                    sns.light_palette(color, len(X))[::-1]), 1):
            sns.tsplot(rescale(x[::decim, :, i:i + 1] + 1, axis=1),
                       color=subcolor, condition='degree %i' % degree,
                       ax=ax, ci=ci)
        # plt.xticks(times, times[::1000])
        if i == 0:
            ax.set_ylabel('Relative change in CP [percent]', labelpad=10)
        ax.set_title(mapping[key])
        ax.set_xlim(0, max(n_xticks))
        ax.set_xticks(n_xticks)
        if i in (1, 3):
            ax.set_ylabel('')
            ax.set_yticks(n_yticks, ['', '', '', ''])
        if i in (0, 1):
            ax.set_xlabel('')
            ax.set_yticks(n_yticks, ['', '', '', ''])
Diff_stats['NSE_Kustas'] = {
    'Rn'    : NSE(Rn_EC, Rn_mod_Kustas),
    'G'     : NSE(G_EC, G_mod_Kustas),
    'H_BR'  : NSE(H_EC_corr_EB, H_mod_Kustas),
    'LE_BR' : NSE(LE_EC_corr_EB, LE_mod_Kustas)} 
    
diff_Stats = pd.DataFrame(Diff_stats, columns = ['NSE_kB23', 'NSE_Lhomme', 
             'NSE_Kustas'])

#%% Plotting against raw EC fluxes

colors = ['#3498db', '#2ecc71', '#f7cf33', '#fc9d1d','#fd484d', '#9b59b6', '#51677b']
pal = sns.color_palette(colors)

# http://www.husl-colors.org/
pal_red_light = sns.light_palette((11.4, 97.4, 58.1), input="husl")
pal_red_dark = sns.dark_palette((11.4, 97.4, 58.1), input="husl")
pal_blue_light = sns.light_palette((242.2, 90.1, 60.2), input="husl")
pal_blue_dark = sns.dark_palette((242.2, 90.1, 60.2), input="husl")
pal_orange_light = sns.light_palette((41.2, 96.8, 72.8), input="husl")
pal_orange_dark = sns.dark_palette((41.2, 96.8, 72.8), input="husl")
pal_green_light = sns.light_palette((137.9, 93.2, 72.9), input="husl")
pal_green_dark = sns.dark_palette((137.9, 93.2, 72.9), input="husl")

sns.set(context = "paper", style = 'white',  palette = pal,
        rc = {'axes.labelsize': 18.0, 'figure.figsize': [14, 7], 
              'legend.fontsize': 16.0, 'xtick.labelsize': 18.0,
              'ytick.labelsize': 18.0, 'xtick.major.size': 4.0,
              'ytick.major.size': 4.0})

# Options: EC_raw,
示例#35
0
def corrplots_from_flatmat(ipfile,opname):
     

    if not os.path.isfile(opname.replace('.png','.csv')):
        print '### Loading and unstacking corrmat'
        #flatmat=pd.read_csv('/data/ss_nifti/analysis_res/flatcorrmats/CC200_mask_2mm.csv',header=None)
        ## Above flatmat has no diagonal
        #corrmat=pd.read_csv('/data/ss_nifti/analysis_res/stdcorrmats/CC200_mask_2mm.csv',index_col=0)
        corrmat=pd.read_csv(ipfile,index_col=0)
        #flatmat=corrdf_to_coldf(corrmat, 'yes')
        flatmat=corrmat.unstack()

        flatmat.to_csv('temp.csv')
        flatmat=pd.read_csv('temp.csv',header=None)
        os.remove('temp.csv')
        

        print '### Loading QCMat'
        qcmat=pd.read_csv('/data/ss_nifti/analysis_res/cpac_motionop_qc.csv')

        flatmat.columns = ['u1','u2','p']

        flatmat['u1']=flatmat.u1.str.split('/').str[-1]
        flatmat['u2']=flatmat.u2.str.split('/').str[-1]

        flatmat=flatmat[~flatmat.u1.str.contains('unknown')]           
        flatmat=flatmat[~flatmat.u2.str.contains('unknown')]

        mdf=flatmat
        #mdf=pd.merge(flatmat,qcmat,left_on='u1',right_on='subseshscan',how='outer')
        print '##Excluding Stuff'
        ## Exclusion Criteria
        #mdf=mdf[mdf.p.notnull()] ##Getting rid of unmatched data
        mdf=mdf[~mdf.u1.isin(list(qcmat['subseshscan'][qcmat.Signal == 0].values))]##Getting rid of scans with poor snr
        mdf=mdf[~mdf.u2.isin(list(qcmat['subseshscan'][qcmat.Signal == 0].values))]##Getting rid of scans with poor snr
        mdf=mdf[~mdf.u1.isin(list(qcmat['subseshscan'][qcmat.Registration == 0].values))]##Getting rid of scans with poor registration
        mdf=mdf[~mdf.u2.isin(list(qcmat['subseshscan'][qcmat.Registration == 0].values))]##Getting rid of scans with poor registration
        mdf=mdf[~mdf.u1.str.contains('ssc_8')]
        mdf=mdf[~mdf.u2.str.contains('ssc_8')]
        mdf=mdf[~mdf.u1.str.contains('ssc_1-')]
        mdf=mdf[~mdf.u2.str.contains('ssc_1-')]
        mdf=mdf[~mdf.u1.str.contains('M00475776')]
        mdf=mdf[~mdf.u2.str.contains('M00475776')]
        mdf=mdf[~mdf.u1.str.contains('M00448814')]
        mdf=mdf[~mdf.u2.str.contains('M00448814')]
        mdf=mdf[~mdf.u1.str.contains('M00421916')]
        mdf=mdf[~mdf.u2.str.contains('M00421916')]
        mdf=mdf[~mdf.u1.str.contains('M00499588-ssc_7-_scan_inscapes')]
        mdf=mdf[~mdf.u2.str.contains('M00499588-ssc_7-_scan_inscapes')]
        mdf=mdf[~mdf.u1.str.contains('M00499588-ssc_7-_scan_flanker')]
        mdf=mdf[~mdf.u2.str.contains('M00499588-ssc_7-_scan_flanker')]

        mdf=mdf.reset_index()

        unqvals1=set(mdf.u1.values)
        unqvals2=set(mdf.u2.values)
  
        if len(unqvals1 - unqvals2) != 0:
            raise Exception('Columns dont match')

        print '###Sorting data'
        unqvalssort=natsorted(list(unqvals1),key=lambda s : s.split('-')[1])
        unqvalssort=natsorted(list(unqvalssort),key=lambda s : s.split('-')[0])
        unqvalssort=natsorted(list(unqvalssort),key=lambda s : s.split('-')[2])

        temp=zip(mdf.u1.values,[i for i in range(0,len(mdf.u1.values))],mdf.u2.values)

        temp=natsorted(temp,key=lambda s : s[2].split('-')[1])
        temp=natsorted(temp,key=lambda s : s[2].split('-')[0])
        temp=natsorted(temp,key=lambda s : s[2].split('-')[2])
        temp=natsorted(temp,key=lambda s : s[0].split('-')[1])
        temp=natsorted(temp,key=lambda s : s[0].split('-')[0])
        temp=natsorted(temp,key=lambda s : s[0].split('-')[2])

        img1,ind1,img2=zip(*temp)
        
        print '### Making new DF'
        newarr=np.zeros((np.sqrt(len(ind1)),np.sqrt(len(ind1))))
        ##Create New CorrMat
        for i,row in enumerate(ind1):
            #newdf.set_value(unqvalssort.index(mdf.u1.loc[row]),unqvalssort.index(mdf.u2[row]),mdf.p[row])
            #newdf.ix[mdf.u1.loc[row],unqvalssort.index(mdf.u2[row])]=mdf.p[row]
            newarr[unqvalssort.index(mdf.u1.loc[row]),unqvalssort.index(mdf.u2[row])]=mdf.p[row]
            print i,row

        newdf=pd.DataFrame(newarr,columns=unqvalssort,index=unqvalssort)
        newdf.to_csv(opname.replace('.png','.csv'))

    else:
        print 'Already have mat!!!'
        newdf=pd.read_csv(opname.replace('.png','.csv'),index_col=0)

    newdf=newdf.drop([c for c in newdf.columns if 'M00475776' in c],0)
    newdf=newdf.drop([c for c in newdf.columns if 'M00475776' in c],1)

    print '##Shape @@@@', newdf.shape

    newcols=[tuple(n) for n in newdf.columns.str.split('-')]
    newinds=[tuple(n) for n in newdf.index.str.split('-')]

    newdf.columns=pd.MultiIndex.from_tuples(newcols)
    newdf.index=pd.MultiIndex.from_tuples(newinds)

    subject_labels=newdf.columns.get_level_values(0)
    subject_pal = sns.light_palette('green',n_colors=subject_labels.unique().size)
    subject_lut = dict(zip(map(str, subject_labels.unique()), subject_pal))
    subject_colors = pd.Series(subject_labels).map(subject_lut)

    session_labels=newdf.columns.get_level_values(1)
    session_pal = sns.light_palette('blue',n_colors=session_labels.unique().size)
    session_lut = dict(zip(map(str, session_labels.unique()), session_pal))
    session_colors = pd.Series(session_labels).map(session_lut)

    scan_labels=newdf.columns.get_level_values(2)
    scan_pal = sns.light_palette('orange',n_colors=scan_labels.unique().size)
    scan_lut = dict(zip(map(str, scan_labels.unique()), scan_pal))
    scan_colors = pd.Series(scan_labels).map(scan_lut)

    rowcols=[subject_colors,session_colors,scan_colors]
    colcols=[subject_colors,session_colors,scan_colors]

    #plt.figure(figsize=(12,12))

    print "###### Generating Heatmap ######"
    #sns.set_context("poster")
    g=sns.clustermap(newdf, row_cluster=False, col_cluster=False,xticklabels=False,yticklabels=False,row_colors=rowcols,col_colors=colcols, vmin=0, vmax=1,cmap='Reds')

    #for label in subject_labels.unique():
    #    g.ax_col_dendrogram.bar(0, 0, color=subject_lut[label], label=label, linewidth=0)
    #    g.ax_col_dendrogram.legend(loc="center top", ncol=2)

    #for label in session_labels.unique():
    #    g.ax_col_dendrogram.bar(0, 0, color=session_lut[label], label=label, linewidth=0)
    #    g.ax_col_dendrogram.legend(loc="center upper", ncol=2)

    for label in scan_labels.unique():
        g.ax_col_dendrogram.bar(0, 0, color=scan_lut[label], label=label, linewidth=0)
        g.ax_col_dendrogram.legend(loc="upper center", ncol=2)


    opdir='/'.join(opname.split('/')[:-1])
    if not os.path.isdir(opdir):
        print "### Making Directory ####"
        os.makedirs(opdir)

    print "###### Saving fig to "+opname+" ######"
    #plt.title(opname.split('/')[-1].split('.')[0])
    #plt.tight_layout()
    #plt.gca().tight_layout()
    g.savefig(opname,dpi=300)
    plt.close()
    plt.cla()
output_train_nb = accuracy_metrics(gnb_model, y_train, x_train)
output_valid_nb = accuracy_metrics(gnb_model, y_valid, x_valid)
output_test_nb = accuracy_metrics(gnb_model, y_test, x_test)

result_nb.append(output_train_nb + output_valid_nb + output_test_nb)

s_nb = pd.DataFrame(result_nb,
                    columns=[
                        'Traindata Accuracy', 'Traindata AUROC',
                        'Validationdata Accuracy', 'Validationdata AUROC',
                        'Testdata Accuracy', 'Testdata AUROC'
                    ])

import seaborn as sns

cm = sns.light_palette("steelblue", as_cmap=True)

Final_output_nb = s_nb.style.background_gradient(cmap=cm)
Final_output_nb
"""**Decision Tree**"""

from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import roc_auc_score, accuracy_score
from IPython.display import HTML


def accuracy_metrics(model, true_label, data):
    prediction = model.predict(data)
    acc = accuracy_score(true_label, prediction)
    auc_roc = roc_auc_score(true_label, prediction)
    return [acc, auc_roc]
# pairwise distances between all data points
D = pairwise_distances(X,squared=True)

# Similarity with constant sigma
P_constant = _joint_probabilities_constant_sigma(D, .002)

# Similarity with variable sigma
P_binary = _joint_probabilities(D, 30., False)

# output of this function needs to be reshaped to a square matrix
P_binary_s = squareform(P_binary)

# plot this similarity matrix
plt.figure(figsize=(12,4))
pal = sns.light_palette("blue",as_cmap=True)

plt.subplot(131)
plt.imshow(D[::10, ::10], interpolation='none',cmap=pal)
plt.axis('off')
plt.title("Distance matrix", fontdict={'fontsize': 16})

plt.subplot(132)
plt.imshow(P_constant[::10,::10],interpolation='none',cmap=pal)
plt.axis('off')
plt.title("$p_{j|i}$ (constant $\sigma$)",fontdict={'fontsize': 16})

plt.subplot(133)
plt.imshow(P_binary_s[::10,::10],interpolation='none',cmap=pal)
plt.axis('off')
plt.title("$p_{j|i}$ (variable $\sigma$)",fontdict={'fontsize': 16})
示例#38
0
    return ((raw_data - np.mean(raw_data, axis = 0)) / np.std(raw_data, axis = 0))


# In[65]:


df2[numerics] = standardize(df2[numerics])
import scipy as sp
df2 = df2[(np.abs(sp.stats.zscore(df2[numerics])) < 3).all(axis=1)]


# In[66]:


from matplotlib.colors import ListedColormap
my_cmap = ListedColormap(sns.light_palette((250, 100, 50), input="husl", n_colors=50).as_hex())
table = df2.drop(['patient_nbr', 'encounter_id'], axis=1).corr(method='pearson')
table.style.background_gradient(cmap=my_cmap, axis = 0)


# In[67]:


df2['level1_diag1'] = df2['level1_diag1'].astype('object')
df_pd = pd.get_dummies(df2, columns=['gender', 'admission_type_id', 'discharge_disposition_id',
                                      'admission_source_id', 'max_glu_serum', 'A1Cresult', 'level1_diag1'], drop_first = True)
just_dummies = pd.get_dummies(df_pd['race'])
df_pd = pd.concat([df_pd, just_dummies], axis=1)      
df_pd.drop(['race'], inplace=True, axis=1)

示例#39
0
    def map(self):
        import matplotlib.pyplot as plt
        import seaborn as sns

        import cartopy.crs as ccrs
        from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter


        print("Plotting global distribution of samples...")

        dlon, dlat = 2.5, 1.9
        npoints = len(self._df)
        df_map = self._df.copy()

        def rand_fact(npoints, width):
            """ Rescale a number from [0, 1) to [-width, width) """
            points = 2.*np.random.random(npoints) - 1. 
            points *= width
            return points

        l1 = df_map['lon'].copy()
        l2 = df_map['lat'].copy()

        df_map['lon'] = df_map['lon'] + rand_fact(npoints, dlon/2.)
        df_map['lat'] = df_map['lat'] + rand_fact(npoints, dlat/2.)

        lon, lat = df_map.lon, df_map.lat
        # Correct lon: 
        # 1) some values may be < 0 or > 360, map these into [0, 360]
        lon[lon < 0] = lon[lon < 0] + 360.
        lon[lon > 360] = lon[lon > 360] - 360.
        # 2) map from [0, 360] -> [-180, 180]
        lon -= 180.

        df_map['lon'] = lon[:]     

        proj = ccrs.PlateCarree()
        cmap = sns.light_palette("navy", 12, as_cmap=True)

        fig, ax = plt.subplots(1, 1, figsize=(10, 5),
                               subplot_kw=dict(projection=proj))
        cax = fig.add_axes([0, 0, 0.1, 0.1])
        fig.subplots_adjust(hspace=0, wspace=0, top=0.925, left=0.1)

        hb = ax.hexbin(lon, lat, gridsize=(50, 15), bins='log', 
                       transform=proj, cmap=cmap)
        
        # This block of code helps correctly size and place the colorbar
        def resize_colorbar(event):
            plt.draw()
            posn = ax.get_position()
            cax.set_position([posn.x0 + posn.width + 0.01, posn.y0, 
                              0.04, posn.height])
        fig.canvas.mpl_connect('resize_event', resize_colorbar)

        ax.coastlines()
        ax.set_global()   

        ax.set_xticks([-180, -120, -60, 0, 60, 120, 180], crs=proj)
        ax.set_yticks([-90, -60, -30, 0, 30, 60, 90], crs=proj)
        lon_formatter = LongitudeFormatter(zero_direction_label=True)
        lat_formatter = LatitudeFormatter()
        ax.xaxis.set_major_formatter(lon_formatter)
        ax.yaxis.set_major_formatter(lat_formatter)

        plt.colorbar(hb, cax, ax)

        plt.show()
                   hue='repayment_interval')
g1.set_xticklabels(g1.get_xticklabels(), rotation=90)
g1.set_title("Mean Loan by Month Year", fontsize=15)
g1.set_xlabel("")
g1.set_ylabel("Loan Amount", fontsize=12)
plt.show()
kiva_loans_data['Century'] = kiva_loans_data.date.dt.year
loan = kiva_loans_data.groupby(['country',
                                'Century'])['loan_amount'].mean().unstack()
loan = loan.sort_values([2017], ascending=False)
f, ax = plt.subplots(figsize=(15, 20))
loan = loan.fillna(0)
temp = sns.heatmap(loan, cmap='Reds')
plt.show()
sector_repayment = ['sector', 'repayment_interval']
cm = sns.light_palette("red", as_cmap=True)
pd.crosstab(
    kiva_loans_data[sector_repayment[0]],
    kiva_loans_data[sector_repayment[1]]).style.background_gradient(cmap=cm)
sector_repayment = ['country', 'repayment_interval']
cm = sns.light_palette("red", as_cmap=True)
pd.crosstab(
    kiva_loans_data[sector_repayment[0]],
    kiva_loans_data[sector_repayment[1]]).style.background_gradient(cmap=cm)
#Correlation Matrix
corr = kiva_loans_data.corr()
plt.figure(figsize=(12, 12))
sns.heatmap(corr,
            xticklabels=corr.columns.values,
            yticklabels=corr.columns.values,
            annot=True,
示例#41
0
        mPlateTbl = makeByWellTbl(in_qcsummary,
                                  o.col_platewell,
                                  o.col_platename,
                                  'frac_pairs_mapped',
                                  dtype=float)

        for plate in mPlateTbl:

            f, ax = plotByWellTbl(100. * mPlateTbl[plate],
                                  heatmap_kwargs={
                                      'annot':
                                      True,
                                      'fmt':
                                      '.1f',
                                      'cmap':
                                      sns.light_palette("red", as_cmap=True),
                                      'vmin':
                                      30.,
                                      'vmax':
                                      90.
                                  },
                                  annot_kwargs={'size': 7.})

            plt.title('Capture set %s, %% properly mapped paired reads' %
                      plate)

            plt.tight_layout()

            f.savefig('%s_%s.by_well.frac_pairs_mapped.pdf' %
                      (o.out_base, plate))
示例#42
0
def shot_chart_jointgrid(x,
                         y,
                         data=None,
                         joint_type="scatter",
                         title="",
                         joint_color="b",
                         cmap=None,
                         xlim=(-250, 250),
                         ylim=(422.5, -47.5),
                         court_color="gray",
                         court_lw=1,
                         outer_lines=False,
                         flip_court=False,
                         joint_kde_shade=True,
                         gridsize=None,
                         marginals_color="b",
                         marginals_type="both",
                         marginals_kde_shade=True,
                         size=(12, 11),
                         space=0,
                         despine=False,
                         joint_kws=None,
                         marginal_kws=None,
                         **kwargs):
    """
    Returns a JointGrid object containing the shot chart.
    This function allows for more flexibility in customizing your shot chart
    than the ``shot_chart_jointplot`` function.
    Parameters
    ----------
    x, y : strings or vector
        The x and y coordinates of the shots taken. They can be passed in as
        vectors (such as a pandas Series) or as columns from the pandas
        DataFrame passed into ``data``.
    data : DataFrame, optional
        DataFrame containing shots where ``x`` and ``y`` represent the shot
        location coordinates.
    joint_type : { "scatter", "kde", "hex" }, optional
        The type of shot chart for the joint plot.
    title : str, optional
        The title for the plot.
    joint_color : matplotlib color, optional
        Color used to plot the shots on the joint plot.
    cmap : matplotlib Colormap object or name, optional
        Colormap for the range of data values. If one isn't provided, the
        colormap is derived from the value passed to ``color``. Used for KDE
        and Hexbin joint plots.
    {x, y}lim : two-tuples, optional
        The axis limits of the plot.  The defaults represent the out of bounds
        lines and half court line.
    court_color : matplotlib color, optional
        The color of the court lines.
    court_lw : float, optional
        The linewidth the of the court lines.
    outer_lines : boolean, optional
        If ``True`` the out of bound lines are drawn in as a matplotlib
        Rectangle.
    flip_court : boolean, optional
        If ``True`` orients the hoop towards the bottom of the plot. Default is
        ``False``, which orients the court where the hoop is towards the top of
        the plot.
    joint_kde_shade : boolean, optional
        Default is ``True``, which shades in the KDE contours on the joint plot.
    gridsize : int, optional
        Number of hexagons in the x-direction. The default is calculated using
        the Freedman-Diaconis method.
    marginals_color : matplotlib color, optional
        Color used to plot the shots on the marginal plots.
    marginals_type : { "both", "hist", "kde"}, optional
        The type of plot for the marginal plots.
    marginals_kde_shade : boolean, optional
        Default is ``True``, which shades in the KDE contours on the marginal
        plots.
    size : tuple, optional
        The width and height of the plot in inches.
    space : numeric, optional
        The space between the joint and marginal plots.
    despine : boolean, optional
        If ``True``, removes the spines.
    {joint, marginal}_kws : dicts
        Additional kewyord arguments for joint and marginal plot components.
    kwargs : key, value pairs
        Keyword arguments for matplotlib Collection properties or seaborn plots.
    Returns
    -------
     grid : JointGrid
        The JointGrid object with the shot chart plotted on it.
    """

    # The joint_kws and marginal_kws idea was taken from seaborn
    # Create the default empty kwargs for joint and marginal plots
    if joint_kws is None:
        joint_kws = {}
    joint_kws.update(kwargs)

    if marginal_kws is None:
        marginal_kws = {}

    # If a colormap is not provided, then it is based off of the joint_color
    if cmap is None:
        cmap = sns.light_palette(joint_color, as_cmap=True)

    # Flip the court so that the hoop is by the bottom of the plot
    if flip_court:
        xlim = xlim[::-1]
        ylim = ylim[::-1]

    # Create the JointGrid to draw the shot chart plots onto
    grid = sns.JointGrid(x=x,
                         y=y,
                         data=data,
                         xlim=xlim,
                         ylim=ylim,
                         space=space)

    # Joint Plot
    # Create the main plot of the joint shot chart
    if joint_type == "scatter":
        grid = grid.plot_joint(plt.scatter, color=joint_color, **joint_kws)

    elif joint_type == "kde":
        grid = grid.plot_joint(sns.kdeplot,
                               cmap=cmap,
                               shade=joint_kde_shade,
                               **joint_kws)

    elif joint_type == "hex":
        if gridsize is None:
            # Get the number of bins for hexbin using Freedman-Diaconis rule
            # This is idea was taken from seaborn, which got the calculation
            # from http://stats.stackexchange.com/questions/798/
            from seaborn.distributions import _freedman_diaconis_bins
            x_bin = _freedman_diaconis_bins(x)
            y_bin = _freedman_diaconis_bins(y)
            gridsize = int(np.mean([x_bin, y_bin]))

        grid = grid.plot_joint(plt.hexbin,
                               gridsize=gridsize,
                               cmap=cmap,
                               **joint_kws)

    else:
        raise ValueError("joint_type must be 'scatter', 'kde', or 'hex'.")

    # Marginal plots
    # Create the plots on the axis of the main plot of the joint shot chart.
    if marginals_type == "both":
        grid = grid.plot_marginals(sns.distplot,
                                   color=marginals_color,
                                   **marginal_kws)

    elif marginals_type == "hist":
        grid = grid.plot_marginals(sns.distplot,
                                   color=marginals_color,
                                   kde=False,
                                   **marginal_kws)

    elif marginals_type == "kde":
        grid = grid.plot_marginals(sns.kdeplot,
                                   color=marginals_color,
                                   shade=marginals_kde_shade,
                                   **marginal_kws)

    else:
        raise ValueError("marginals_type must be 'both', 'hist', or 'kde'.")

    # Set the size of the joint shot chart
    grid.fig.set_size_inches(size)

    # Extract the the first axes, which is the main plot of the
    # joint shot chart, and draw the court onto it
    ax = grid.fig.get_axes()[0]
    draw_court(ax, color=court_color, lw=court_lw, outer_lines=outer_lines)

    # Get rid of the axis labels
    grid.set_axis_labels(xlabel="", ylabel="")
    # Get rid of all tick labels
    ax.tick_params(labelbottom="off", labelleft="off")
    # Set the title above the top marginal plot
    ax.set_title(title, y=1.2, fontsize=18)

    # Set the spines to match the rest of court lines, makes outer_lines
    # somewhate unnecessary
    for spine in ax.spines:
        ax.spines[spine].set_lw(court_lw)
        ax.spines[spine].set_color(court_color)
        # set the marginal spines to be the same as the rest of the spines
        grid.ax_marg_x.spines[spine].set_lw(court_lw)
        grid.ax_marg_x.spines[spine].set_color(court_color)
        grid.ax_marg_y.spines[spine].set_lw(court_lw)
        grid.ax_marg_y.spines[spine].set_color(court_color)

    if despine:
        ax.spines["top"].set_visible(False)
        ax.spines["bottom"].set_visible(False)
        ax.spines["right"].set_visible(False)
        ax.spines["left"].set_visible(False)

    return grid
示例#43
0
    """ Aggregate data and calculate the failures % withthin the aggregated group 
        Args: table
              groupby_col: list, column names 
              failure: number of failured patients in that group
              total: total number of patients in that group 
        Returns: an aggregated table with failure %
    """
    table_new = table.groupby(groupby_col).count().reset_index()
    table_new['failure_percentage'] = table_new[failure]/ table_new[total] * 100
    
    return table_new



#aggregate failure % within each age group 
color = sns.light_palette("seagreen")[-2]
patients_age = failure_rate(patients, 'member_age', 'outcome', 'event_id')
#plot
barplot(patients_age, 'member_age', 'failure_percentage',None,  
        'Failure % for Each Age Group', 'Member Age', 'Failure% = failures/patients', 
        (35, 10), color)


#data process: group by sex
patients.loc[:,'member_sex'] = patients.loc[:,'member_sex'].map({0: 'female', 1: 'male'})
patients.loc[:,'outcome'] = patients.loc[:,'outcome'].fillna('non-failure')#count
# of failures for each gender
patients_sex = patients.groupby(['member_sex','outcome']).count().reset_index()
#plot
barplot(patients_sex, 'member_sex', 'event_id', 'outcome', 
        '# of Failure vs Non-failuare Patients for Each Gender', 'Member Gender', 'Failure% = failures/patients', 
示例#44
0
def PlotID(DATA, IDs, ID, xx, yy, style='Paper', xlabel='default', ylabel='default', xsize=3, ysize=3, mode='-', \
           hold=0, palete='Normal', legendMode=True, PlotName='default'):
    #DEtermine the number of dataframes in list

    if (xlabel == 'default'):
        xlabel = xx
    if (ylabel == 'default'):
        ylabel = yy
    NIds = 1
    #Set color palete
    if (palete == 'Normal'):
        Pt = sns.color_palette('husl')
    elif (palete == 'VTANURA'):
        personalized_color = [
            "#8b1f41", '#011627', '#ff6600', '#41ead4', '#808080', '#5f8297',
            '#000000'
        ]
        Pt = sns.color_palette(personalized_color)
    elif (palete == 'HCONTRAST'):
        personalized_color = [
            "#004488", '#BB5566', '#575757', '#DDAA33', '#000000', '#8A8A8A'
        ]
        Pt = sns.color_palette(personalized_color)
    else:
        Pt = sns.light_palette(sns.xkcd_rgb[palete], NIds + 1, reverse=True)
        #sns.set_palette(Pt)

    # Select style
    if (style == 'Paper'):
        SMALL_SIZE = 8
        MEDIUM_SIZE = 10
        BIGGER_SIZE = 12
        plt.rc('text', usetex=True)
        plt.rcParams['text.latex.preamble'] = [r'\usepackage{bm}']
        plt.rc('font', family='serif')
        plt.rc('font', size=SMALL_SIZE)  # controls default text sizes
        plt.rc('axes', titlesize=SMALL_SIZE)  # fontsize of the axes title
        plt.rc('axes', labelsize=BIGGER_SIZE)  # fontsize of the x and y labels
        plt.rc('xtick', labelsize=SMALL_SIZE)  # fontsize of the tick labels
        plt.rc('ytick', labelsize=SMALL_SIZE)  # fontsize of the tick labels
        plt.rc('legend', fontsize=SMALL_SIZE)  # legend fontsize
        plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title
        plt.rcParams['figure.dpi'] = 400
        plt.rcParams["figure.figsize"] = (xsize, ysize)
        lw = 1

    elif (style == 'Slide'):

        SMALL_SIZE = 14
        MEDIUM_SIZE = 16
        BIGGER_SIZE = 24
        plt.rc('text', usetex=True)
        plt.rc('font', family='serif')
        plt.rc('font', size=SMALL_SIZE)  # controls default text sizes
        plt.rc('axes', titlesize=MEDIUM_SIZE)  # fontsize of the axes title
        plt.rc('axes', labelsize=BIGGER_SIZE)  # fontsize of the x and y labels
        plt.rc('xtick', labelsize=SMALL_SIZE)  # fontsize of the tick labels
        plt.rc('ytick', labelsize=SMALL_SIZE)  # fontsize of the tick labels
        plt.rc('legend', fontsize=SMALL_SIZE)  # legend fontsize
        plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title
        plt.rcParams['figure.dpi'] = 400
        plt.rcParams["figure.figsize"] = (xsize, ysize)
        lw = 2
    #else:

    #Loop trough data

    if (isinstance(ID, str)):  #Id is string
        a = 0
        for Frame in DATA:
            a = a + 1
            if (ID == IDs[a - 1]):
                if (PlotName == 'default'):
                    plt.plot(Frame[xx],Frame[yy], mode, \
                             label=IDs[a-1],linewidth=lw, color=Pt[0])
                else:
                    PlotName.plot(Frame[xx],Frame[yy], mode, \
                             label=IDs[a-1],linewidth=lw, color=Pt[0])

                break
    else:
        if (PlotName == 'default'):
            plt.plot(DATA[ID][xx],DATA[ID][yy], mode , label=IDs[ID]\
                     ,linewidth=lw, color=Pt[0])
        else:
            PlotName.plot(DATA[ID][xx],DATA[ID][yy], mode , label=IDs[ID]\
                     ,linewidth=lw, color=Pt[0])

    if (PlotName == 'default'):
        plt.xlabel(xlabel)
        plt.ylabel(ylabel)
        # # show a legend on the plot
        if (legendMode == True):
            plt.legend(edgecolor='k',fancybox=False, framealpha=1, shadow=False, \
                       borderpad=1)

        if (hold == 0):
            plt.show()
    else:
        PlotName.set_xlabel(xlabel)
        PlotName.set_ylabel(ylabel)
        # # show a legend on the plot
        if (legendMode == True):
            PlotName.legend(edgecolor='k',fancybox=False, framealpha=1, shadow=False, \
                       borderpad=1)

        if (hold == 0):
            PlotName.show()
示例#45
0
文件: BICCN.py 项目: Edouard360/scVI
    ['L5 NP', 'L5 NP Slc17a8'],
    [
        'L6 IT Car3', 'L6 CT Olig', 'L6 IT Maf', 'L6 IT Ntn5 Mgp',
        'L6 IT Ntn5 Inpp4b'
    ], ['L6 CT Nxph2', 'L6 CT Astro', 'L6 CT',
        'L6 CT Grp'], ['L6b', 'L6b F2r'],
    ['Lamp5 Sncg', 'Lamp5 Egln3', 'Lamp5 Slc35d3'],
    ['Vip Rspo4', 'Vip Serpinf1', 'Vip'], ['Astro Ex', 'Astro Aqp4'],
    ['OPC Pdgfra'], ['VLMC Osr1'],
    ['Oligo Enpp6_1', 'Oligo Enpp6_2', 'Oligo Opalin'], ['Sncg Ptprk'],
    ['Endo Slc38a5', 'Endo Slc38a5_Peri_2', 'Endo Slc38a5_Peri_1']
]

macosko_regev_colors = [
    sns.color_palette("Greens")[2:6],  # Pvalb
    sns.light_palette("green", 5)[0:3],  # Pvalb Ex
    sns.light_palette("green", 5)[3:5],  # Pvalb Astro
    sns.light_palette("orange", 6),  # L2/3
    sns.light_palette('red')[1:6],  # Sst
    sns.light_palette("cyan", 3),  # L5 PT
    sns.light_palette('purple', 8)[1:8],  # L5 IT Tcap
    sns.light_palette('purple', 7)[4:7],  # L5 IT Aldh1a7
    sns.light_palette("navy", 7)[3:5],  # L5 NP
    sns.light_palette("brown", 7)[2:7],  # L6 IT
    sns.dark_palette("brown", 8)[1:5],  # L6 CT
    sns.dark_palette("green", 8)[5:7],  # L6
    sns.dark_palette("yellow", 7)[1:4],  # Lamp5
    sns.dark_palette("yellow", 7)[4:7],  # Vip
    sns.color_palette("Paired", 4),  # Astro OPC VLMC
    sns.color_palette('Greys', 3),  # Oligo
    sns.dark_palette('tan'),  # sncg
示例#46
0
def make_IF_fig2(chosen_test_examples,
                 folder_influence,
                 model_name,
                 formatname='png',
                 name="IF"):
    # Ready to use

    # Overriding fonts
    plt.rcParams.update({
        "pgf.texsystem":
        "pdflatex",
        "pgf.preamble": [
            r"\usepackage[utf8x]{inputenc}",
            r"\usepackage[T1]{fontenc}",
            r"\usepackage{cmbright}",
        ]
    })
    plt.rc('text', usetex=True)

    # Loading the mask
    mask = np.load('model/' + model_name + '_mask.npy')
    antimask = np.argsort(mask)

    # Training points
    U_array = np.concatenate((np.linspace(0, 1,
                                          500), np.linspace(1.01, 40, 500)))

    U_testarray = np.concatenate(
        (np.linspace(0.01, 0.999,
                     20), np.linspace(1.02, 2, 5), np.linspace(2.066, 39, 20)))
    max_x = 40
    min_y = -1e-2
    max_y = 1e-1
    trans_point = 1
    U_value = '0'
    xticks_location = np.concatenate((np.array([0, 1,
                                                2]), np.array([4, 10, 40])))
    xticks_labels = np.concatenate(
        (np.array(['0', '1', '2']), np.array(['4', '10', '40'])))
    yticks_location = np.array([-1e-3, 0, 1e-3, 1e-1])

    # Seaborn style set
    sns.set(style="whitegrid")
    sns.set_style("whitegrid", {
        'grid.linestyle': 'dashed',
        "grid.color": "0.6",
        'axes.edgecolor': '.1'
    })

    # Plot colors
    colors = [
        "windows blue", "amber", "greyish", "faded green", "dusty purple"
    ]
    palette_background = sns.light_palette((210, 90, 60), input="husl")
    palette_background = sns.light_palette("lightsteelblue", 6)
    c_left = sns.light_palette("navy")[-2]
    c_right = sns.light_palette("purple")[-2]
    c_help = sns.light_palette("green")[-2]
    c_harm = sns.light_palette("red")[-2]
    c_phase1 = palette_background[0]
    c_phase2 = palette_background[2]
    c_test = sns.xkcd_palette(colors)[1]

    marker_size = 0.5
    marker_size_help = 1.5

    i = 0
    j = 0
    fig, axs = plt.subplots(2,
                            2,
                            figsize=(3 + 3 / 8, 2.5),
                            sharex=True,
                            sharey=True)
    fig.subplots_adjust(wspace=0.1, hspace=0.1)
    plt.rc('font', size=9)
    plt.rc('axes', labelsize=8)

    for test_sample in chosen_test_examples:

        # Influence functions of all train elements for one test example
        with open(folder_influence + '/original_influence_test' +
                  str(test_sample) + '.txt') as filelabels:
            influence_functions = np.loadtxt(filelabels, dtype=float)

            antimasked_inf_funs = influence_functions[antimask]
            sorting_indices = np.argsort(antimasked_inf_funs)

        antimasked_inf_funs_phase1 = antimasked_inf_funs[0:502]
        antimasked_inf_funs_phase2 = antimasked_inf_funs[502:1001]
        U_array_phase1 = U_array[0:502]
        U_array_phase2 = U_array[502:1001]

        U_test_value = U_testarray[test_sample]

        # Figure
        axs[i][j].scatter(U_array_phase1,
                          antimasked_inf_funs_phase1,
                          marker='o',
                          c=c_left,
                          s=marker_size,
                          label='training points, phase 1')
        axs[i][j].scatter(U_array_phase2,
                          antimasked_inf_funs_phase2,
                          marker='o',
                          c=c_right,
                          s=marker_size,
                          label='training points, phase 2')
        axs[i][j].plot(U_array[sorting_indices[:5]],
                       antimasked_inf_funs[sorting_indices[:5]],
                       'o',
                       c=c_harm,
                       markersize=marker_size_help,
                       label='most harmful')
        axs[i][j].plot(U_array[sorting_indices[-5:]],
                       antimasked_inf_funs[sorting_indices[-5:]],
                       'o',
                       c=c_help,
                       markersize=marker_size_help,
                       label='most helpful')
        axs[i][j].plot([U_test_value, U_test_value], [min_y, max_y],
                       color=c_test,
                       label='test point (U\'\'=' + U_value + ')')

        axs[i][j].set_yscale('symlog', linthreshy=1e-3)
        axs[i][j].set_xscale('symlog', linthreshx=3)

        axs[i][j].set_xticks(xticks_location)
        axs[i][j].set_xticklabels(xticks_labels)
        axs[i][j].set_yticks(yticks_location)
        axs[i][j].tick_params(which='both', labelsize='small')

        axs[i][j].set_ylim(min_y, max_y)
        axs[i][j].set_xlim(0, max_x)
        axs[i][j].tick_params(which='both', direction='in')

        axs[i][j].grid(linewidth=0.1)

        for axis in ['top', 'bottom', 'left', 'right']:
            axs[i][j].spines[axis].set_linewidth(0.05)

        # Two colors background
        axs[i][j].axvspan(0, trans_point, facecolor=c_phase1, zorder=0, lw=0)
        axs[i][j].axvspan(trans_point,
                          max_x,
                          facecolor=c_phase2,
                          zorder=0,
                          lw=0)

        j += 1
        if (j % 2 == 0):
            i += 1
            j = 0

    axs[0][0].text(35,
                   0.06,
                   '(a)',
                   verticalalignment='top',
                   horizontalalignment='right',
                   family="serif")
    axs[0][1].text(0.15,
                   0.06,
                   '(b)',
                   verticalalignment='top',
                   horizontalalignment='left',
                   family="serif")
    axs[1][0].text(35,
                   0.06,
                   '(c)',
                   verticalalignment='top',
                   horizontalalignment='right',
                   family="serif")
    axs[1][1].text(0.15,
                   0.06,
                   '(d)',
                   verticalalignment='top',
                   horizontalalignment='left',
                   family="serif")

    IFtext = fig.text(-0.03,
                      0.5,
                      'Influence function value',
                      family="serif",
                      va='center',
                      rotation='vertical')
    Utext = fig.text(0.5, -0.01, '$V_1/\,J$ ', family="serif", ha='center')

    #fig.savefig('./figures/' + name + '.' + formatname, bbox_extra_artists=(Utext, IFtext), bbox_inches='tight')
    return fig
示例#47
0
    def print_confusion_matrix(confusion_matrix,
                               class_names,
                               figsize=(10, 7),
                               fontsize=14):
        """Prints a confusion matrix, as returned by sklearn.metrics.confusion_matrix, as a heatmap.

        Arguments
        ---------
        confusion_matrix: numpy.ndarray
            The numpy.ndarray object returned from a call to sklearn.metrics.confusion_matrix.
            Similarly constructed ndarrays can also be used.
        class_names: list
            An ordered list of class names, in the order they index the given confusion matrix.
        figsize: tuple
            A 2-long tuple, the first value determining the horizontal size of the ouputted figure,
            the second determining the vertical size. Defaults to (10,7).
        fontsize: int
            Font size for axes labels. Defaults to 14.

        Returns
        -------
        matplotlib.figure.Figure
            The resulting confusion matrix figure
        """

        confusion_matrix = confusion_matrix.astype(
            'float') / confusion_matrix.sum(axis=1)[:, np.newaxis]

        boundaries = [0.0, 0.5, 1.0]

        hex_colors = sns.light_palette('navy',
                                       n_colors=len(boundaries) * 2,
                                       as_cmap=False).as_hex()
        hex_colors = [hex_colors[i] for i in range(0, len(hex_colors), 2)]

        colors = list(zip(boundaries, hex_colors))

        custom_color_map = LinearSegmentedColormap.from_list(
            name='custom_navy',
            colors=['blue', 'cyan', 'green', 'yellow', 'red'],
        )

        df_cm = pd.DataFrame(
            confusion_matrix,
            index=class_names,
            columns=class_names,
        )
        fig = plt.figure(figsize=figsize)
        try:
            heatmap = sns.heatmap(df_cm,
                                  annot=False,
                                  cbar=True,
                                  xticklabels=15,
                                  yticklabels=15,
                                  cmap=custom_color_map)
            # heatmap = sns.heatmap(df_cm, annot=False, cbar=False, xticklabels=15, yticklabels=15, cmap=sns.cubehelix_palette(50, hue=0.05, rot=0, light=0.9, dark=0, as_cmap=True))
        except ValueError:
            raise ValueError("Confusion matrix values must be integers.")
        # heatmap.yaxis.set_ticklabels(10, rotation=0, ha='right', fontsize=fontsize)
        # heatmap.xaxis.set_ticklabels(10, rotation=0, ha='right', fontsize=fontsize)
        plt.yticks(rotation=0)
        plt.ylabel('True label')
        plt.xlabel('Predicted label')
        return fig
示例#48
0
def heatmap_df(report_pivot,
               count_df,
               list_wanted,
               ax,
               size='xx-small',
               rotation=0,
               color_gradient='red'):
    '''
    Function to plot the heatmap on the figure based on the presence absence data

    :param report_pivot: Pivot table of the report that represent the presence absence table of the gene/systems 
    :type: pandas.DataFrame
    :param count_df: Number of the genomes for each phyla order in the same order as the heatmap
    :type: pandas.DataFrame
    :param list_wanted: List of all the genes/systems wanted to apear on the figure
    :type: list of str
    :param ax: The axe on which to plot the sub figure
    :type: matplotlib.axes.Axes
    :param size: Font size in points or as a string (e.g., 'large')
    :type: float or str
    :param rotation: The angle to which the label of the heatmap to rotate
    :type: int
    :param color_gradient:
    :type:
    :return: Nothing
    '''

    cmap = sns.light_palette(color_gradient, as_cmap=True)

    try_missing = list(set(list_wanted) - set(report_pivot.index))

    if try_missing:
        for missing in try_missing:
            report_pivot.loc[missing] = 0

    df_annot = report_pivot.reindex(list_wanted)

    df_heatmap = df_annot.div(count_df.Count, axis=0)

    sns.heatmap(
        df_heatmap,
        cmap=cmap,
        linewidths=1,
        linecolor=(0.3997693305214246, 0.6478123867044262, 0.80273742044673246,
                   1.0),
        annot=df_annot,
        annot_kws={
            'color': 'black',
            'fontsize': size
        },
        cbar=False,
        ax=ax,
        fmt="d",
        yticklabels=False,
    )

    ax.set_xticklabels(ax.get_xticklabels(), rotation=rotation)

    # The mesh is the figure itself here, so to change the facecolor of the cell in the heatmap
    # we need to parse the mesh as in the seaborn instance code
    # So now the 0 are white

    mesh = ax.collections[0]
    all_values_mesh = mesh.get_array()
    all_color_mesh = mesh.get_facecolors()
    new_color = []
    len_values = len(all_values_mesh)

    ax.collections[0].set_facecolor('none')
    for i in range(len_values):
        if all_values_mesh[i] == 0:
            new_color.append('white')
        else:
            new_color.append(all_color_mesh[i])

    mesh.set_facecolor(new_color)

    # Modify axis and ticks
    ax.xaxis.set_ticks_position('top')
    ax.tick_params(
        axis='x',
        which='both',
        labelsize=size,
        length=0,
    )

    ax.xaxis.set_label_text("")
    ax.yaxis.set_label_text("")

    return
示例#49
0
from models import BayesianMLP

sns.set_style("white")
n = 9
bnn_col = ["deep sky blue", "bright sky blue"]
gpp_bnn_col = ["red", "salmon"]
gp_col = ["green", "light green"]
colors = {"bnn": bnn_col, "gpp": gpp_bnn_col, "gp": gp_col}
sample_col = {
    "bnn": "bright sky blue",
    "gpp": "watermelon",
    "gp": "light lime"
}
pal_col = {
    "bnn": sns.light_palette("#3498db", n_colors=n),  # nice blue
    "gpp": sns.light_palette("#e74c3c", n_colors=n),  # nice red
    "gp": sns.light_palette("#2ecc71", n_colors=n),
}  # nice green eh not so nice

project_dir = "../figures/"


def plot_deciles(x_all,
                 y_all_pred,
                 y_all_ground_truth=None,
                 x_train=None,
                 y_train=None,
                 mode="bnn",
                 title=None):
    """
示例#50
0
# xkcd中包含了一套针对随机RGB颜色的命名,产生了954个可以随时从xkcd_rgb字典中调用的已经被命名的颜色
# plt.plot([0, 1], [0, 1], sns.xkcd_rgb["pale red"], lw=3)  # lw表示线宽
# plt.plot([0, 1], [0, 2], sns.xkcd_rgb["medium green"], lw=3)
# plt.plot([0, 1], [0, 3], sns.xkcd_rgb["denim blue"], lw=3)

# colors = ["windows blue", "amber", "greyish", "faded green", "dusty purple"]
# sns.palplot(sns.xkcd_palette(colors))

# -------------------------------------------------------------------------------

# 连续色板——色彩随数据变换,比如数据越重要则颜色就越深
# sns.palplot(sns.color_palette("Blues"))

# 如果想要翻转渐变,可以在面板名称中添加一个_r后缀
# sns.palplot(sns.color_palette("BuGn_r"))

# 色调线性变化
# 颜色的亮度和饱和度呈线性变化
# sns.palplot(sns.color_palette("cubehelix", 8))
# sns.palplot(sns.cubehelix_palette(8, start=0.5, rot=-0.75))

# light_palette()和dark_palette()调用定制连续调色板
# sns.palplot(sns.light_palette("green"))
# sns.palplot(sns.dark_palette("purple"))  # 颜色由浅到深变化
# sns.palplot(sns.light_palette("purple", reverse=True))  # 颜色由深到浅变化

sns.palplot(sns.light_palette((210, 90, 60), input="husl"))


plt.show()
def heatmap_plot_zscore_bbp(df_zscore_features, df_all, output_dir, title=None):

    print "heatmap plot:bbp"
    metric ='m-type'
    mtypes = np.unique(df_all[metric])
    print mtypes
    mtypes_pal = sns.color_palette("hls", len(mtypes))

    mtypes_lut = dict(zip(mtypes, mtypes_pal))  # map creline type to color
    mtypes_colors = df_all[metric].map(mtypes_lut)


    layers = np.unique(df_all['layer'])
    layer_pal = sns.light_palette("green", len(layers))
    layers_lut = dict(zip(layers, layer_pal))
    layer_colors = df_all['layer'].map(layers_lut)


    # Create a custom colormap for the heatmap values
    #cmap = sns.diverging_palette(240, 10, as_cmap=True)

    linkage = hierarchy.linkage(df_zscore_features, method='ward', metric='euclidean')

    data = df_zscore_features.transpose()
    row_linkage = hierarchy.linkage(data, method='ward', metric='euclidean')
    feature_order = hierarchy.leaves_list(row_linkage)

    #print data.index
    matchIndex = [data.index[x] for x in feature_order]
    #print matchIndex
    data = data.reindex(matchIndex)


    g = sns.clustermap(data, row_cluster = False, col_linkage=linkage, method='ward', metric='euclidean',
                       linewidths = 0.0,col_colors = [mtypes_colors,layer_colors],
                       cmap = sns.cubehelix_palette(light=1, as_cmap=True),figsize=(40,20))
    #g.ax_heatmap.xaxis.set_xticklabels()
    pl.setp(g.ax_heatmap.xaxis.get_majorticklabels(), rotation=90 )
    pl.setp(g.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
    pl.subplots_adjust(left=0.1, bottom=0.5, right=0.9, top=0.95)  # !!!!!

    #pl.tight_layout( fig, h_pad=20.0, w_pad=20.0)


    if title:
        pl.title(title)
    location ="best"
    num_cols=1
    # Legend for row and col colors

    for label in mtypes:
         g.ax_row_dendrogram.bar(0, 0, color=mtypes_lut[label], label=label, linewidth=0.0)
         g.ax_row_dendrogram.legend(loc=location, ncol=num_cols,borderpad=0)

    for i in range(3):
        g.ax_row_dendrogram.bar(0, 0, color = "white", label=" ", linewidth=0)
        g.ax_row_dendrogram.legend(loc=location, ncol=num_cols, borderpad=0.0)

    for label in layers:
         g.ax_row_dendrogram.bar(0, 0, color=layers_lut[label], label=label, linewidth=0.0)
         g.ax_row_dendrogram.legend(loc=location, ncol=num_cols,borderpad=0)

    filename = output_dir + '/zscore_feature_heatmap.png'
    pl.savefig(filename, dpi=300)
    #pl.show()
    print("save zscore matrix heatmap figure to :" + filename)
    pl.close()
    return linkage
from scipy import misc
from PIL import Image 
import numpy as np
im_array = misc.imread("U.png",flatten=True)

X = []
Y = []

for i in range(im_array.shape[0]):
    for j in range(im_array.shape[1]):
        if im_array[i][j]<10:
            X.append(i)
            Y.append(j)
X.reverse()
X = np.array(X)
Y = np.array(Y)
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_style("white")
pal = sns.light_palette("green", as_cmap = True)
#pal = sns.dark_palette("palegreen", as_cmap=True)
with sns.axes_style(None):
    ax = sns.kdeplot(Y, X, cmap = pal, shade=True, shade_lowest=False)
    sns.despine(left=True,bottom=True,trim=True)
plt.show()
print(df.head(10))
sns.pairplot(df,
             height=3,
             aspect=1.5,
             vars=['salary', 'credit', 'married'],
             diag_kind='kde',
             kind='reg')
print()
print('корреляция между числовыми признаками')
fields = ['age', 'salary', 'married', 'children', 'credit']
corr = df[fields].corr()
print(corr)

mask = np.zeros_like(corr, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True
sns.heatmap(corr, mask=mask, cmap=sns.light_palette('grey'))

from mpl_toolkits.mplot3d import axes3d
fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(111, projection='3d')
x1 = df['salary']
y1 = df['married']
z1 = df['credit']
ax.scatter(x1, y1, z1, marker='o', label='blue')

plt.figure(figsize=(16, 6))
df.boxplot(column=['salary', 'credit'], vert=False)

plt.show()
print('построение регрессионно модели')
from sklearn.model_selection import train_test_split
示例#54
0
def shot_chart_jointgrid(x, y, data=None, title="", joint_type="scatter",
                         marginals_type="both", cmap=None, joint_color="b",
                         marginals_color="b", xlim=(-250, 250),
                         ylim=(422.5, -47.5), joint_kde_shade=True,
                         marginals_kde_shade=True, hex_gridsize=None, space=0,
                         size=(12, 11), court_color="gray", outer_lines=False,
                         court_lw=1, flip_court=False, joint_kws=None,
                         marginal_kws=None, **kwargs):
    """
    Returns a JointGrid object containing the shot chart.

    TODO: explain the parameters
    """

    # The joint_kws and marginal_kws idea was taken from seaborn
    # Create the default empty kwargs for joint and marginal plots
    if joint_kws is None:
        joint_kws = {}
    joint_kws.update(kwargs)

    if marginal_kws is None:
        marginal_kws = {}

    # If a colormap is not provided, then it is based off of the joint_color
    if cmap is None:
        cmap = sns.light_palette(joint_color, as_cmap=True)

    # Flip the court so that the hoop is by the bottom of the plot
    if flip_court:
        xlim = xlim[::-1]
        ylim = ylim[::-1]

    # Create the JointGrid to draw the shot chart plots onto
    grid = sns.JointGrid(x=x, y=y, data=data, xlim=xlim, ylim=ylim,
                         space=space)

    # Joint Plot
    # Create the main plot of the joint shot chart
    if joint_type == "scatter":
        grid = grid.plot_joint(plt.scatter, color=joint_color, **joint_kws)

    elif joint_type == "kde":
        grid = grid.plot_joint(sns.kdeplot, cmap=cmap,
                               shade=joint_kde_shade, **joint_kws)

    elif joint_type == "hex":
        if hex_gridsize is None:
            # Get the number of bins for hexbin using Freedman-Diaconis rule
            # This is idea was taken from seaborn, which got the calculation
            # from http://stats.stackexchange.com/questions/798/
            from seaborn.distributions import _freedman_diaconis_bins
            x_bin = _freedman_diaconis_bins(x)
            y_bin = _freedman_diaconis_bins(y)
            hex_gridsize = int(np.mean([x_bin, y_bin]))

        grid = grid.plot_joint(plt.hexbin, gridsize=hex_gridsize, cmap=cmap,
                               **joint_kws)

    else:
        raise ValueError("joint_type must be 'scatter', 'kde', or 'hex'.")

    # Marginal plots
    # Create the plots on the axis of the main plot of the joint shot chart.
    if marginals_type == "both":
        grid = grid.plot_marginals(sns.distplot, color=marginals_color,
                                   **marginal_kws)

    elif marginals_type == "hist":
        grid = grid.plot_marginals(sns.distplot, color=marginals_color,
                                   kde=False, **marginal_kws)

    elif marginals_type == "kde":
        grid = grid.plot_marginals(sns.kdeplot, color=marginals_color,
                                   shade=marginals_kde_shade, **marginal_kws)

    else:
        raise ValueError("marginals_type must be 'both', 'hist', or 'kde'.")

    # Set the size of the joint shot chart
    grid.fig.set_size_inches(size)

    # Extract the the first axes, which is the main plot of the
    # joint shot chart, and draw the court onto it
    ax = grid.fig.get_axes()[0]
    draw_court(ax, color=court_color, lw=court_lw, outer_lines=outer_lines)

    # Get rid of the axis labels
    grid.set_axis_labels(xlabel="", ylabel="")
    # Get rid of all tick labels
    ax.tick_params(labelbottom="off", labelleft="off")
    # Set the title above the top marginal plot
    ax.set_title(title, y=1.2, fontsize=18)

    return grid
示例#55
0

if __name__ == "__main__":
    import pandas as pd
    import numpy as np

    np.random.seed(24)
    df = pd.DataFrame({"A": np.linspace(1, 10, 10)})
    df = pd.concat(
        [df, pd.DataFrame(np.random.randn(10, 4), columns=list("BCDE"))],
        axis=1)
    df.iloc[3, 3] = np.nan
    df.iloc[0, 2] = np.nan
    import seaborn as sns

    cm = sns.light_palette("green", as_cmap=True)
    df = df.style.background_gradient(cmap=cm).highlight_null(
        null_color="red")  # element wise
    # df.style.bar(subset=['A', 'B'], color='#d65f5f')
    # df.style.bar(subset=['A', 'B'], align='mid', color=['#d65f5f', '#5fba7d'])
    df = df.applymap(color_highlight_extreme)  # .format(None, na_rep="-")
    df = df.apply(color_highlight_extreme, color="darkorange")
    df = df.apply(
        color_highlight_extreme,
        extreme_func=NDFrameExtremeEnum.max,
        color="green",
        axis=None,
    )

    html_ = df.render()
    # html_ = df.to_html()
示例#56
0
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns


locale.setlocale(locale.LC_ALL, 'en_US')



darkblue, green, red, purple, yellow, lightblue = sns.color_palette('deep')
MODALITY_ORDER = ['~0', 'middle', '~1', 'bimodal', 'multimodal']
MODALITY_TO_COLOR = {'~0': lightblue, 'middle': yellow, '~1': red,
                     'bimodal': purple, 'multimodal': 'lightgrey'}
MODALITY_PALETTE = [MODALITY_TO_COLOR[m] for m in MODALITY_ORDER]
MODALITY_TO_CMAP = {'~0': sns.light_palette(lightblue, as_cmap=True),
                    'middle': sns.light_palette(yellow, as_cmap=True),
                    '~1': sns.light_palette(red, as_cmap=True),
                    'bimodal': sns.light_palette(purple, as_cmap=True),
                    'multimodal': mpl.cm.Greys}
MODALITY_FACTORPLOT_KWS = dict(hue_order=MODALITY_ORDER, palette=MODALITY_PALETTE)

def violinplot(x=None, y=None, data=None, bw=0.2, scale='width',
               inner=None, ax=None, **kwargs):
    """Wrapper around Seaborn's Violinplot specifically for [0, 1] ranged data

    What's different:
    - bw = 0.2: Sets bandwidth to be small and the same between datasets
    - scale = 'width': Sets the width of all violinplots to be the same
    - inner = None: Don't plot a boxplot or points inside the violinplot
    """
示例#57
0
        r_ref = pd.concat(subsets)
        r_ref.to_csv(ref_path, index=False)
    else:
        r_ref = pd.read_csv(ref_path)
    # %%
    pool_over_subjects = False
    temp = r_ref
    pipelines = list(temp['pipeline'].unique())
    ACROSS_DATASET_PLOTS_FOLDER = PLOTS_FOLDER / 'across_datasets'
    SELECTED_FOLDER = PLOTS_FOLDER / 'across_datasets' / 'selected_pipelines'
    os.makedirs(ACROSS_DATASET_PLOTS_FOLDER, exist_ok=True)
    os.makedirs(SELECTED_FOLDER, exist_ok=True)
    n_channel_configurations = len(temp['channels'].unique())

    with sns.color_palette(sns.light_palette("navy", n_colors=6)[1:]):
        for compare_pipeline in pipelines:
            plt.close("all")
            fig, ax = plt.subplots(1, 1, figsize=(9, 4))
            asd_pool = temp.groupby(['dataset', 'pipeline']).aggregate([np.mean, np.std]).reset_index()
            asd_pool = asd_pool.loc[asd_pool['pipeline'] == compare_pipeline]
            col_order = asd_pool.sort_values(by=('samples', 'mean'))['dataset']
            asd_single = temp.groupby(['dataset', 'subject', 'pipeline']).aggregate([np.mean, np.std]).reset_index()
            asd_single = asd_single.loc[asd_single['pipeline'] == compare_pipeline]
            asd = asd_pool if pool_over_subjects else asd_single
            scatter_alpha = 1 if pool_over_subjects else 0.4

            for d in asd['dataset'].unique():
                replace_dict = {
                    d: f'{_ds_pretty(d, bold=True)} \n({asd_pool.loc[asd_pool["dataset"] == d]["samples"]["mean"].iloc[0]:1.0f}'
                       f'$\\pm${asd_pool.loc[asd_pool["dataset"] == d]["samples"]["std"].iloc[0]:1.1f})'
#G_mod_PT = np.array(extractFluxes(PT_Flux, 'Modelled_Flux_masked', 2))
#G_mod_OS = np.array(extractFluxes(OS_Flux, 'Modelled_Flux_masked', 2))
#
#EF_mod_PT     = H_mod_PT/LE_mod_PT
#EF_EC = H_EC/LE_EC

#%% Plotting timeseries
colors = ['#3498db', '#2ecc71', '#f7cf33', '#fc9d1d','#fd484d', '#9b59b6', '#51677b']
#colors = ['#3498db', '#2ecc71', '#f7cf33', '#fA8e63','#fd484d', '#51677b']
colors = ['#3498db', '#2ecc71', '#f7cf33', '#fc9d1d','#fd484d', '#9b59b6', '#51677b']

pal = sns.color_palette(colors)

# created from '#e74c3c' from colors and husl input from 
# http://www.husl-colors.org/
pal_red_light = sns.light_palette((11.4, 97.4, 58.1), input="husl")
pal_red_dark = sns.dark_palette((11.4, 97.4, 58.1), input="husl")
pal_blue_light = sns.light_palette((242.2, 90.1, 60.2), input="husl")
pal_blue_dark = sns.dark_palette((242.2, 90.1, 60.2), input="husl")

sns.set()
sns.set(context = "poster", style = 'darkgrid',  palette = pal,
        rc = {'axes.labelsize': 20.0, 'figure.figsize': [22, 14], 
              'legend.fontsize': 20.0, 'xtick.labelsize': 20.0,
              'ytick.labelsize': 20.0})

#%% First plot: Air temperature and humidity
fig = plt.figure()
ax1 = fig.add_subplot(311)
Ta_plot = plt.plot_date(ecTime[dateCol_start: dateCol_stop], Ta_C, '-', 
                        color = pal[0], alpha = 1.0, lw=2.5, 
    "outputrunlabel":
    "alldata2paymentfuns",
    "numbers_hours_next":
    [1, 1.5, 2],  # [1, 1.5, 2],#[.75, 1, 1.25, 1.5],#, 1.25, 1.5],
    "match_functions": [
        get_match_for_row_lastdriverinarea_dispatchtime,
        get_match_for_row_nextdrivermatched,
    ],
    "functions_to_run": [
        "plot_tripindifference_histogram",
        "plot_drivershift_earnings",
        "supplementary_facts",
        # "plot_tripindifference_variancebyaddmult",
    ],
    "plot_colors": [
        sns.light_palette("black", 2, input="xkcd").as_hex()[-1],
        sns.light_palette((210, 90, 60), 2, input="husl").as_hex()[-1],
    ],
    # "payment_functions": payment_functions_2months_withmin,
    # "payment_function_names": payment_function_2months_withmin_names,
    "skip_mimicfare_in_plot_stuff":
    False,
}

settings_plotting_puresurgeonly = copy.copy(settings_server_2months)
settings_plotting_puresurgeonly.update(plotting_differences)
settings_plotting_puresurgeonly["outputrunlabel"] = "pureonly"
settings_plotting_puresurgeonly[
    "payment_functions"] = payment_functions_2months_pureonly
settings_plotting_puresurgeonly[
    "payment_function_names"] = payment_function_2months_pureonly_names