Exemplo n.º 1
0
def dropplot(data, feature='median_conservation', genome_len=10**4):
    mapping = {}
    vals = np.sort(data[feature].unique())
    for i, cons in enumerate(vals):
        mapping[str(cons)] = i

    n_colors = 2
    if vals.shape[0] > 2:
        n_colors = max(8, vals.shape[0])

    with sns.plotting_context(
            rc={
                "font.size": 14,
                "axes.titlesize": 18,
                "axes.labelsize": 18,
                "xtick.labelsize": 14,
                "ytick.labelsize": 14,
                'y.labelsize': 16
            }):

        pal = sns.mpl_palette('seismic', n_colors)
        with sns.plotting_context(
                rc={
                    "font.size": 12,
                    "axes.labelsize": 15,
                    "xtick.labelsize": 14,
                    "ytick.labelsize": 12,
                    'aspect': 10
                }):
            f, ax = plt.subplots(figsize=(14, 4))
            for i, seq in enumerate(g['seq_id'].unique()):
                g_tag = data[data['seq_id'] == seq]
                ax.plot([1, genome_len], [i, i],
                        color="black",
                        alpha=0.7,
                        linewidth=4)
                for row in g_tag.iterrows():
                    row = row[1]
                    ax.scatter([row['start'], row['end']], [i, i],
                               marker='s',
                               s=2 * row['drop_size'],
                               c=pal[mapping[str(row[feature])]],
                               label="{} {}".format(row['product'],
                                                    row['start']))

        plt.legend(bbox_to_anchor=[1.1, 1.1])
        sns.palplot(sns.mpl_palette('seismic', n_colors))
        plt.show()
Exemplo n.º 2
0
def plot_heatmap(X, y, top_n=10, metric='correlation', method='complete'):
    '''
    Plot heatmap which shows features with classes.

    :param X: list of dict
    :param y: labels
    :param top_n: most important n feature
    :param metric: metric which will be used for clustering
    :param method: method which will be used for clustering
    '''
    sns.set(color_codes=True)

    df = feature_importance_report(X, y)

    df_sns = pd.DataFrame().from_records(X)[df[:top_n].index].T
    df_sns.columns = y

    color_mapping = dict(zip(set(y), sns.mpl_palette("Set2", len(set(y)))))

    return sns.clustermap(df_sns,
                          figsize=(22, 22),
                          z_score=0,
                          metric=metric,
                          method=method,
                          col_colors=[color_mapping[i] for i in y])
Exemplo n.º 3
0
def qualitative_cmap(n_colors=17):
    """Returns a colormap suitable for a categorical plot with many categories.


    Parameters
    ----------
    n_colors : int, default is 17
        The number of colors that, usually, matches with the number of
        categories.


    Returns
    -------
    list
        A list of hex colors.
    """
    set1 = sns.mpl_palette("Set1", n_colors=9)
    hex_colors = [rgb2hex(rgb) for rgb in set1]
    hex_colors[5] = '#FFDE00'
    if n_colors <= 9:
        return hex_colors
    if n_colors <= 17:
        n_colors = 17
    else:
        n_colors = 8 * ceil((n_colors - 1) / 8)
    gradient = polylinear_gradient(hex_colors, n_colors)
    return gradient
Exemplo n.º 4
0
def plot_pca(pX, palette='Spectral', labels=None, ax=None, colors=None):
    """Plot PCA result, input should be a dataframe"""

    if ax==None:
        fig,ax=plt.subplots(1,1,figsize=(6,6))
    cats = pX.index.unique()
    colors = sns.mpl_palette(palette, len(cats)+1)
    print (len(cats), len(colors))
    for c, i in zip(colors, cats):
        #print (i, len(pX.ix[i]))
        #if not i in pX.index: continue
        ax.scatter(pX.ix[i, 0], pX.ix[i, 1], color=c, s=90, label=i,
                   lw=.8, edgecolor='black', alpha=0.8)
    ax.set_xlabel('PC1')
    ax.set_ylabel('PC2')
    i=0
    if labels is not None:
        for n, point in pX.iterrows():
            l=labels[i]
            ax.text(point[0]+.1, point[1]+.1, str(l),fontsize=(9))
            i+=1
    ax.legend(fontsize=10,bbox_to_anchor=(1.5, 1.05))
    sns.despine()
    plt.tight_layout()
    return
Exemplo n.º 5
0
def plotClusterExpression(self, cluster_labels,
                          row_cluster=False, col_clusters=False,
                          yticks=False, xticks=False):
    # Plots heatmap with labels for columns sorted together
    expression = pd.DataFrame.copy(self.expression)
    expression.columns = cluster_labels
    expression.sort_index(axis=1, inplace=True)

    lut = dict(zip(set(expression.columns),
                   sns.mpl_palette("hsv", len(set(expression.columns)))))
    col_colors = pd.DataFrame(expression.columns)[0].map(lut)

    expression.columns.name = "Cells clusters"
    expression.index.name = "Genes"
    sns_plot = sns.clustermap(expression, col_colors=col_colors.values,
                              col_cluster=col_clusters, row_cluster=row_cluster,
                              cmap="gnuplot2",
                              yticklabels=yticks, xticklabels=xticks)
    #sns_plot.ax_heatmap.set_xlabel = "Cells cluster"
    #sns_plot.ax_heatmap.set_ylabel = "Genes"

    # Add legend for clusters
    for label in sorted(set(cluster_labels)):
        sns_plot.ax_col_dendrogram.bar(0, 0, color=lut[label],
                                       label=label, linewidth=0)
        sns_plot.ax_col_dendrogram.legend(loc="center", ncol=6)

    # Move color bar
    sns_plot.cax.set_position([.15, .2, .03, .45])
def figure_S2d():
    nonneuronal_final = load_adata("nonneuronal")
    nonneuronal_sample_props = get_sample_proportions(
        nonneuronal_final, "cluster_final", "sample_name"
    )
    fig2 = plot_celltype_proportions(
        nonneuronal_sample_props, sns.mpl_palette("tab20", 4)[::-1]
    )
    save_figure(fig2, "figure_S02", "figS2d_nonneuronal_sample_props")
Exemplo n.º 7
0
 def __color_generator(palette, n):
     if type(palette) in (list, tuple, set):
         for col in itertools.islice(itertools.cycle(palette), n):
             yield(col)
     elif type(palette) == str:
         palette = sns.mpl_palette(palette, n)
         for i in range(n):
             yield(palette[i])
     else:
         raise NanocomporeError ("Invalid palette type")
Exemplo n.º 8
0
class FavorGradColor:
    GreyBlueRed = gradients.gradient_hsl(
        (230, 235, 240),
        (30, 40, 60), 150, value_scale=256)[::-1] + gradients.gradient_hsl(
            (245, 250, 255), (230, 235, 240), 35,
            value_scale=256)[::-1] + gradients.gradient_hsl(
                (255, 250, 245),
                (240, 230, 220), 35, value_scale=256) + gradients.gradient_hsl(
                    (240, 230, 220), (100, 10, 10), 150, value_scale=256)
    BlueRed = sns.color_palette('RdBu_r', n_colors=400)
    DardRed = sns.mpl_palette("Reds_d", 400)[::-1]
Exemplo n.º 9
0
def rand_color_palette(N):
    col = []
    colors = sns.mpl_palette('Set1', 9)
    for j in range(N):
        if j == 9:
            colors = sns.mpl_palette('Set3', 12)

        elif j == 21:
            colors = sns.mpl_palette('Set2', 8)

        elif j == 29:
            colors = list(sns.crayons.keys())

        i = np.random.randint(0, high=len(colors))
        if j >= 29:
            col += [sns.crayon_palette(colors.pop(i))]

        else:
            col += [colors.pop(i)]

    return col
Exemplo n.º 10
0
def plot_pca(pX,
             plot3d=False,
             palette='Spectral',
             labels=False,
             ax=None,
             colors=None,
             **kwargs):
    """Plot PCA result, input should be a dataframe"""

    if ax == None:
        fig, ax = plt.subplots(1, 1, figsize=(6, 6))
    #print (kwargs)
    colormap = kwargs['colormap']
    fs = kwargs['fontsize']
    ms = kwargs['ms'] * 12
    kwargs = {k: kwargs[k] for k in ('linewidth', 'alpha')}

    cats = pX.index.unique()
    import seaborn as sns
    colors = sns.mpl_palette(colormap, len(cats))

    for c, i in zip(colors, cats):
        print(i, len(pX.ix[i]))
        if plot3d == True:
            ax.scatter(pX.ix[i, 0],
                       pX.ix[i, 1],
                       pX.ix[i, 2],
                       color=c,
                       s=ms,
                       label=i,
                       edgecolor='black',
                       **kwargs)
        else:
            ax.scatter(pX.ix[i, 0],
                       pX.ix[i, 1],
                       color=c,
                       s=ms,
                       label=i,
                       edgecolor='black',
                       **kwargs)

    ax.set_xlabel('PC1')
    ax.set_ylabel('PC2')
    if labels == True:
        for i, point in pX.iterrows():
            ax.text(point[0] + .3, point[1] + .3, str(i), fontsize=(9))
    if len(cats) < 20:
        ax.legend(fontsize=fs * .8)
    return
Exemplo n.º 11
0
def plot_by_label(X, palette='Set1'):
    """Color scatter plot by dataframe index label"""

    import seaborn as sns
    cats = X.index.unique()
    colors = sns.mpl_palette(palette, len(cats))
    #sns.palplot(colors)
    f,ax = plt.subplots(figsize=(6,6))
    for c, i in zip(colors, cats):
        #print X.ix[i,0]
        ax.scatter(X.ix[i, 0], X.ix[i, 1], color=c, s=100, label=i,
                   lw=1, edgecolor='black')
    ax.legend(fontsize=10)
    sns.despine()
    return
Exemplo n.º 12
0
def plot_by_label(X, palette='Set1'):
    """Color scatter plot by dataframe index label"""

    import seaborn as sns
    cats = X.index.unique()
    colors = sns.mpl_palette(palette, len(cats))
    #sns.palplot(colors)
    f, ax = plt.subplots(figsize=(6, 6))
    for c, i in zip(colors, cats):
        #print X.ix[i,0]
        ax.scatter(X.ix[i, 0],
                   X.ix[i, 1],
                   color=c,
                   s=100,
                   label=i,
                   lw=1,
                   edgecolor='black')
    ax.legend(fontsize=10)
    sns.despine()
    return
def graph_utility_scaled_cap_colour(metrics: Metrics, path_prefix: str):
    fig = plt.figure()
    ax = fig.gca()

    sources_utilities = {(b.source, b.capability) for b in metrics.buffers}

    grouped_utility = {
        (asrc, acap): [
            (b.t, b.utility / b.max_utility)

            for b in metrics.buffers
            if asrc == b.source and acap == b.capability
        ]
        for (asrc, acap) in sources_utilities
    }

    sequential_cmaps = [seaborn.mpl_palette(name, n_colors=len(metrics.agent_names)) for name in ("Greens", "Purples")]
    cmap_for_cap = {
        c: sequential_cmaps[c]
        for c in {int(c[1:]) for c in metrics.capability_names}
    }

    for ((src, cap), utilities) in sorted(grouped_utility.items(), key=lambda x: x[0]):
        X, Y = zip(*utilities)
        ax.plot(X, Y, label=f"{src} {cap}", color=cmap_for_cap[int(cap[1:])][metrics.agent_names.index(src)])

    ax.set_ylim(0, 1)

    ax.set_xlabel('Time (secs)')
    ax.set_ylabel('Normalised Utility (\\%)')
    ax.yaxis.set_major_formatter(ticker.PercentFormatter(xmax=1, symbol=''))

    ax.legend(bbox_to_anchor=(1.5, 1), loc="upper right", ncol=2)

    savefig(fig, f"{path_prefix}norm-utility-cc.pdf")

    plt.close(fig)
Exemplo n.º 14
0
def get_colormap(groups: Optional[List[int]] = None) -> np.ndarray:

    if groups is None:
        return sns.color_palette()

    # Color palette names
    names = ["Blues", "Reds", "Greens", "Purples", "Greys"]

    # Get group size (and check that group indices are consecutive)
    n = check_groups(groups)

    if n > len(names):
        raise ValueError("Too many groups for the available color palettes.")

    # Setup n color palettes, indexed by group
    # Get a MLP palette by name (as list of RGB values), reverse the color order
    # (with [::-1]) and make it iterable (so that next can be called later)
    palettes = {g: iter(sns.mpl_palette(names[g])[::-1]) for g in range(n)}

    colors = []
    for group in groups:
        colors.append(next(palettes[group]))

    return np.asarray(colors)
Exemplo n.º 15
0
def create_cmap(name: str = None, palette_type: str = None, as_cmap: bool = True, **kwargs) -> Union[list, plt.Axes]:
    """Create a colormap or color palette object.

    Parameters
    ----------
    name
        Name of the pyrates colormap. If specified, palette_type will be ignored.
    palette_type
        Type of the seaborn color palette to use. Only necessary if no name is specified.
    as_cmap
        If true, a matplotlib colormap object will be returned. Else a seaborn color palette (list).
    kwargs
        Keyword arguments for the wrapped seaborn functions.

    Returns
    -------
    Union[list, plt.Axes]
        cmap or seaborn color palette.

    """

    from seaborn import cubehelix_palette, dark_palette, light_palette, diverging_palette, hls_palette, husl_palette, \
        color_palette, crayon_palette, xkcd_palette, mpl_palette
    import matplotlib.colors as mcolors

    if '/' in name:

        # create diverging colormap
        name1, name2 = name.split('/')
        vmin = kwargs.pop('vmin', 0.)
        vmax = kwargs.pop('vmax', 1.)
        if type(vmin) is float:
            vmin = (vmin, vmin)
        if type(vmax) is float:
            vmax = (vmax, vmax)
        kwargs1 = kwargs.pop(name1, kwargs)
        kwargs2 = kwargs.pop(name2, kwargs)
        cmap1 = create_cmap(name1, **kwargs1, as_cmap=True)
        cmap2 = create_cmap(name2, **kwargs2, as_cmap=True)
        n = kwargs.pop('n_colors', 10)
        if type(n) is int:
            n = (n, n)
        colors = np.vstack((cmap1(np.linspace(vmin[0], vmax[0], n[0])),
                            cmap2(np.linspace(vmin[1], vmax[1], n[1])[::-1])))
        return mcolors.LinearSegmentedColormap.from_list('cmap_diverging', colors)

    # extract colorrange
    if as_cmap:
        vmin = kwargs.pop('vmin', 0.)
        vmax = kwargs.pop('vmax', 1.)
        n = kwargs.pop('n_colors', 10)
        crange = np.linspace(vmin, vmax, n) if vmax-vmin < 1. else None
    else:
        crange = None

    if 'pyrates' in name:

        # create pyrates colormap
        if name == 'pyrates_red':
            cmap = cubehelix_palette(as_cmap=as_cmap, start=-2.0, rot=-0.1, **kwargs)
        elif name == 'pyrates_green':
            cmap = cubehelix_palette(as_cmap=as_cmap, start=2.5, rot=-0.1, **kwargs)
        elif name == 'pyrates_blue':
            cmap = dark_palette((210, 90, 60), as_cmap=as_cmap, input='husl', **kwargs)
        elif name == 'pyrates_yellow':
            cmap = dark_palette((70, 95, 65), as_cmap=as_cmap, input='husl', **kwargs)
        elif name == 'pyrates_purple':
            cmap = dark_palette((270, 50, 55), as_cmap=as_cmap, input='husl', **kwargs)

    else:

        # create seaborn colormap
        if palette_type == 'cubehelix':
            cmap = cubehelix_palette(name, as_cmap=as_cmap, **kwargs)
        elif palette_type == 'dark':
            cmap = dark_palette(name, as_cmap=as_cmap, **kwargs)
        elif palette_type == 'light':
            cmap = light_palette(name, as_cmap=as_cmap, **kwargs)
        elif palette_type == 'hls':
            cmap = hls_palette(name, **kwargs)
        elif palette_type == 'husl':
            cmap = husl_palette(name, **kwargs)
        elif palette_type == 'diverging':
            cmap = diverging_palette(name, as_cmap=as_cmap, **kwargs)
        elif palette_type == 'crayon':
            cmap = crayon_palette(name, **kwargs)
        elif palette_type == 'xkcd':
            cmap = xkcd_palette(name, **kwargs)
        elif palette_type == 'mpl':
            cmap = mpl_palette(name, **kwargs)
        else:
            cmap = color_palette(name, **kwargs)

    # apply colorrange
    if crange is not None:
        cmap = mcolors.LinearSegmentedColormap.from_list(name, cmap(crange))

    return cmap
Exemplo n.º 16
0
import seaborn as sns
sns.set()
sns.palplot(sns.mpl_palette("Set2", 8))
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import numpy as np

import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import cmocean as cmo

red_colormap = color_map = sns.blend_palette(
    ["0.9", sns.xkcd_rgb["bright red"]], as_cmap=True)
tab20a = sns.mpl_palette("tab20_r", 20)
tab20b = sns.mpl_palette("tab20b_r", 20)
tab20c = sns.mpl_palette("tab20c_r", 20)
main_palette = (tab20c[17:] + [tab20a[1]] + tab20c[6:8] + tab20b[17:] +
                tab20b[5:7] + [tab20a[8]] + [tab20b[2]] + [tab20b[1]] +
                tab20b[9:11] + tab20c[9:12] + [tab20c[14]])
nonneuronal_palette = (tab20c[8:12] + tab20b[14:16] + tab20b[4:7] +
                       tab20c[18:] + tab20b[9:11] + tab20b[1:3] +
                       tab20b[16:19:2] + [tab20a[8]])
neuronal_palette = (tab20c[13:16:2] + tab20c[9:12] + tab20b[1:3] +
                    tab20b[4:7] + tab20c[16:] + [tab20a[1]] + [tab20b[9]] +
                    [tab20a[5]] + tab20b[13:15] + [tab20a[9]])
neuronal_nonneuronal_palette = sns.xkcd_palette(["cerulean", "kelly green"])

heatmap_cmap = cmo.tools.cmap(cmo.cm.balance(np.linspace(0, 1, 256), 0.9))

gaba_vs_glut_vs_hdc_palette = sns.xkcd_palette(
    ["yellow orange", "medium blue", "tree green"])
        ransac.fit(np.array(user_count).reshape(-1, 1), np.array(answerer_count).reshape(-1, 1))
        inlier_mask = ransac.inlier_mask_
        r_answerer.append(ransac.score(np.array(user_count).reshape(-1, 1)[inlier_mask], np.array(answerer_count).reshape(-1, 1)[inlier_mask]))

        ransac = linear_model.RANSACRegressor()
        ransac.fit(np.array(user_count).reshape(-1, 1), np.array(commenter_count).reshape(-1, 1))
        inlier_mask = ransac.inlier_mask_
        r_commenter.append(ransac.score(np.array(user_count).reshape(-1, 1)[inlier_mask], np.array(commenter_count).reshape(-1, 1)[inlier_mask]))


        current_site = row[0]
        
        asker_count[:] = []
        answerer_count[:] = []
        commenter_count[:] = []
        user_count[:] = []
        
        asker_count.append(int(row[6]))
        answerer_count.append(int(row[7]))
        commenter_count.append(int(row[12]))
        user_count.append(int(row[14]))

df = pd.DataFrame({'Asker': r_asker, 'Answerer': r_answerer, 'Commenter': r_commenter})
ax = sns.lvplot(data = df, palette=sns.mpl_palette("gist_yarg"))
ax.set(ylabel='Coeff. of Determination, $R^2$')
ax.set_yticks(np.arange(0.0, 1.0, 0.05), minor=True)
sns.despine(offset = 10, trim=True, bottom = True)
sns.plt.tight_layout()
plt.savefig('User_to_Roles_R_Squared_LV.pdf')

Exemplo n.º 19
0
t_sne = make_pipeline(ExtractNames(), DictVectorizer())
clf0 = t_sne.fit_transform(speaker_gender["name_in_profile"])

from sklearn.manifold import TSNE

tsne_model = TSNE(perplexity=40,
                  n_components=2,
                  init='pca',
                  n_iter=2500,
                  random_state=23)
new_values = tsne_model.fit_transform(clf0.todense())

import seaborn as sns

colors = sns.mpl_palette("Dark2", 7)
points = np.array(new_values)
import matplotlib.pyplot as plt

colors_p = speaker_gender['he_she'].map(lambda x: colors[x]).tolist()
for i in range(len(points)):
    plt.scatter(points[i, 0], points[i, 1], color=colors_p[i])

# plt.colorbar()
plt.savefig('t_sne_2d.png', dpi=200)
plt.clf()
plt.cla()
plt.close()

tsne_model_3d = TSNE(perplexity=40,
                     n_components=3,
Exemplo n.º 20
0
import pandas as pd
import matplotlib.pyplot as plt
array = [
    [500, 0, 0, 0, 0, 0, 0, 0, 39, 0, 3, 0, 1, 1, 0, 7],  # A
    [3, 432, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],  # B
    [1, 121, 513, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],  # C
    [1, 0, 0, 522, 31, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 34],  # D
    [0, 0, 0, 33, 532, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 17],  # E
    [0, 0, 0, 0, 1, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],  # F
    [3, 3, 2, 0, 0, 0, 559, 0, 0, 0, 2, 0, 0, 0, 0, 0],  # G
    [0, 0, 0, 0, 0, 0, 0, 501, 7, 0, 0, 0, 0, 0, 0, 0],  # H
    [1, 0, 0, 0, 0, 0, 0, 0, 1918, 0, 0, 0, 0, 0, 0, 1],  # I
    [1, 0, 0, 0, 0, 0, 0, 0, 1, 37, 0, 0, 0, 0, 0, 0],  # J
    [10, 0, 0, 1, 0, 0, 1, 0, 0, 0, 358, 6, 0, 1, 0, 46],  # K
    [18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 578, 0, 0, 0, 2],  # L
    [1, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 1063, 0, 1, 0],  # M
    [3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 188, 1, 0],  # N
    [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 2, 395, 1],  # O
    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 165],  # P
]
df_cm = pd.DataFrame(array,
                     index=[i for i in "ABCDEFGHIJKLMNOP"],
                     columns=[i for i in "ABCDEFGHIJKLMNOP"])
plt.figure(figsize=(13, 10))
sn.heatmap(df_cm,
           annot=True,
           fmt="d",
           linewidths=.5,
           cmap=sn.mpl_palette("Set3_r", 20))
plt.savefig("output.svg")
for i, p in enumerate(new_values):
    dic_point[labels[i]] = p

# x, y = zip(*new_values)

# for i in range(len(x)):
#     plt.scatter(x[i], y[i])
#     plt.annotate(labels[i], xy=(x[i], y[i]), xytext=(5, 2),
#                  textcoords='offset points', ha='right', va='bottom')
# plt.show()
from scipy.spatial import ConvexHull

# http://matplotlib.org/examples/color/colormaps_reference.html
import seaborn as sns

colors = sns.mpl_palette("Dark2", len(sentence_list))

from shapely.geometry import box, Polygon


def sent_plot(i):
    sent = sentence_list[i]
    all_w = list(
        set([w for w in sent.lower().split(' ') if w in embeddings_index]))
    points = np.array([dic_point[w] for w in all_w])
    hull = ConvexHull(points)
    plt.plot(points[:, 0], points[:, 1], 'o', c=colors[i])
    for simplex in hull.simplices:
        plt.plot(points[simplex, 0], points[simplex, 1], 'k-')
    plt.plot(points[hull.vertices, 0],
             points[hull.vertices, 1],
Exemplo n.º 22
0
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

sns.set(style="whitegrid")
graycolors = sns.mpl_palette('Set2', 2)
plt.rcParams.update({'figure.max_open_warning': 0})

df = pd.read_csv("./bytecode_instruction_costs.csv")
for col in df.columns:
    sns.distplot(df[col])

for i, col in enumerate(df.columns):
    plt.figure(i)
    df_col = df[col]
    median = df_col.median()
    mode = df_col.mode().get_values()[0]
    mean = df_col.mean()
    print 'Instruction: ', col
    print '\tMean: ', mean
    print '\tMedian: ', median
    print '\tMode: ', mode
    plt.axvline(median, color='r', linestyle='--')
    plt.axvline(mode, color='g', linestyle='-')
    plt.axvline(mean, color='b', linestyle='-')
    plt.legend({'Mean': mean, 'Mode': mode, 'Median': median})
    sns.distplot(df[col], norm_hist=True)

plt.show()
Exemplo n.º 23
0
def main():
    args = parse_args()
    meta = load_split(args.split)
    meta["lang"].replace(iso639_3, inplace=True)

    if args.embeddings:
        representations = get_fingerprints(args.embeddings, meta.filename)
    elif args.model:
        representations = get_model_representations(args.model, args.split)

    logger.info("Computing embeddings ...")
    if args.decomposition == "tsne":
        decomposer = TSNE(n_components=2, verbose=True)
    elif args.decomposition == "pca":
        decomposer = PCA(n_components=2)
    embedded = decomposer.fit_transform(representations)
    meta["Component 1"] = embedded[:, 0]
    meta["Component 2"] = embedded[:, 1]
    meta.rename(
        {
            "cefr": "CEFR",
            "testlevel": "Test level",
            "num_tokens": "Length",
            "lang": "L1"
        },
        axis="columns",
        inplace=True)
    meta["Test level"].replace(
        {
            "Språkprøven": "IL test",
            "Høyere nivå": "AL test"
        }, inplace=True)

    fig, ax = (plt.gcf(), plt.gca())
    if args.hue == "CEFR":
        palette = sns.mpl_palette('cool', 7)
        hue_order = CEFR_LABELS
    else:
        palette = None
        hue_order = None
    sns.scatterplot(
        x="Component 1",
        y="Component 2",
        hue=args.hue,
        style="Test level",
        data=meta,
        ax=ax,
        size="Length",
        palette=palette,
        hue_order=hue_order,
    )
    ax.tick_params(
        axis="both",
        which="both",
        bottom="off",
        top="off",
        labelbottom="off",
        right="off",
        left="off",
        labelleft="off",
    )
    handles, labels = ax.get_legend_handles_labels()
    cefr_legend = ax.legend(handles[:8],
                            labels[:8],
                            loc="center right",
                            bbox_to_anchor=(-0.1, 0.5))
    ax.legend(handles[8:],
              labels[8:],
              loc="center left",
              bbox_to_anchor=(1.05, 0.5))
    ax.add_artist(cefr_legend)
    fig.set_size_inches(5, 3)
    plt.tight_layout()
    plt.show()
Exemplo n.º 24
0
def plot_curve(data: list,
               fname: str,
               class_counts: tuple,
               is_roc: bool = True,
               min_score_fraction: float = 0.5):
    """
    Plot ROC or pr curves for all tools at a given bin

    :param list data: ROC analysis results for
    all the tools at the given intronic bin
    :param str fname: Output basename
    :param tuple class_counts: Number of positive and
    negative variants at the given intronic bin
    :param bool is_roc: Whether analysis refers to
    ROC curve. If `False`, precision-recall curves are
    drawn. Default: `True`
    :param float min_score_fraction: Minimum
    fraction of predictive power of a given
    tool for the curve to be drawn. Default: `0.5`
    """
    if is_roc:
        colnames = [
            'tool', 'fraction_nan', 'label', 'thresholds',
            'True Positive Rate (TPR)', 'False Positive Rate (FPR)', 'roc_auc'
        ]
        to_explode = [
            'thresholds', 'True Positive Rate (TPR)',
            'False Positive Rate (FPR)'
        ]
    else:
        colnames = [
            'tool', 'fraction_nan', 'label', 'thresholds', 'Recall',
            'Precision', 'ap_score'
        ]
        to_explode = ['thresholds', 'Recall', 'Precision']

    df_metrics = pd.DataFrame.from_records(data, columns=colnames)
    df_metrics = df_metrics.reset_index().apply(lambda x: x.explode()
                                                if x.name in to_explode else x)

    if is_roc:
        df_metrics['True Positive Rate (TPR)'] = pd.to_numeric(
            df_metrics['True Positive Rate (TPR)'])
        df_metrics['False Positive Rate (FPR)'] = pd.to_numeric(
            df_metrics['False Positive Rate (FPR)'])
        df_metrics["tool_with_roc_auc"] = df_metrics["label"] + " auROC=" + \
                                          df_metrics["roc_auc"].round(2).map(str) + ")"
        hue = "tool_with_roc_auc"
        x = "False Positive Rate (FPR)"
        y = "True Positive Rate (TPR)"
        df_metrics = df_metrics.sort_values('roc_auc', ascending=False)
    else:
        df_metrics['Recall'] = pd.to_numeric(df_metrics['Recall'])
        df_metrics['Precision'] = pd.to_numeric(df_metrics['Precision'])
        df_metrics["tool_with_ap_score"] = df_metrics["label"] + " AP=" + \
                                           df_metrics["ap_score"].round(2).map(str) + ")"
        hue = "tool_with_ap_score"
        x = "Recall"
        y = "Precision"
        df_metrics = df_metrics.sort_values('ap_score', ascending=False)

    df_metrics = df_metrics[df_metrics['fraction_nan'] <= min_score_fraction]

    # Since S-CAP has several different reference
    # threshold, S-CAP is removed from these analyses
    df_metrics = df_metrics[~df_metrics.tool.str.contains("S-CAP")]

    # If many tools to plot, change color pallette
    if df_metrics.tool.unique().size > 12:
        sns.set_palette(sns.mpl_palette("magma",
                                        df_metrics.tool.unique().size))
    else:
        sns.set_palette(sns.color_palette("Paired"))

    ax = sns.lineplot(x=x, y=y, data=df_metrics, hue=hue)
    ax.set_aspect(1.15)
    plt.title("N pos = {}; N neg = {}".format(class_counts[0],
                                              class_counts[1]))
    plt.legend(bbox_to_anchor=(1.1, 1), loc=2, borderaxespad=0.)
    plt.ylim(0, 1.05)
    plt.tight_layout()
    out = fname + '.pdf'
    plt.savefig(out)
    plt.close()
    sns.reset_defaults()
Exemplo n.º 25
0
def genSpecColors(numCols, colType):
    # if manualCols or numCols > 19:
    if colType == "mc":
        hsvCols = [(x / numCols, 1, 0.75) for x in range(numCols)]
        colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsvCols))
        colors = [[255 * color[0], 255 * color[1], 255 * color[2]]
                  for color in colors]
        # CHP
    elif colType == "chp":
        colors = sns.cubehelix_palette(numCols)
    elif colType == "chp_rnd4":
        colors = sns.cubehelix_palette(numCols, rot=-.4)
    elif colType == "chp_s2d8_rd1":
        colors = sns.cubehelix_palette(numCols, start=2.8, rot=.1)
        # MPLP
    elif colType == "mplp_GnBu_d":
        colors = sns.mpl_palette("GnBu_d", numCols)
    elif colType == "mplp_seismic":
        colors = sns.mpl_palette("seismic", numCols)
        # CP_Misc
    elif colType == "cp":
        colors = sns.color_palette(n_colors=numCols)
    elif colType == "cp_Accent":
        colors = sns.color_palette("Accent", n_colors=numCols)
    elif colType == "cp_cubehelix":
        colors = sns.color_palette("cubehelix", n_colors=numCols)
    elif colType == "cp_flag":
        colors = sns.color_palette("flag", n_colors=numCols)
    elif colType == "cp_Paired":
        colors = sns.color_palette("Paired", n_colors=numCols)
    elif colType == "cp_Pastel1":
        colors = sns.color_palette("Pastel1", n_colors=numCols)
    elif colType == "cp_Pastel2":
        colors = sns.color_palette("Pastel2", n_colors=numCols)
    elif colType == "cp_tab10":
        colors = sns.color_palette("tab10", n_colors=numCols)
    elif colType == "cp_tab20":
        colors = sns.color_palette("tab20", n_colors=numCols)
    elif colType == "cp_tab20c":
        colors = sns.color_palette("tab20c", n_colors=numCols)
        # CP_Rainbow
    elif colType == "cp_gistncar":
        colors = sns.color_palette("gist_ncar", n_colors=numCols)
    elif colType == "cp_gistrainbow":
        colors = sns.color_palette("gist_rainbow", n_colors=numCols)
    elif colType == "cp_hsv":
        colors = sns.color_palette("hsv", n_colors=numCols)
    elif colType == "cp_nipyspectral":
        colors = sns.color_palette("nipy_spectral", n_colors=numCols)
    elif colType == "cp_rainbow":
        colors = sns.color_palette("rainbow", n_colors=numCols)
        # CP_Grad2
    elif colType == "cp_afmhot":
        colors = sns.color_palette("afmhot", n_colors=numCols)
    elif colType == "cp_autumn":
        colors = sns.color_palette("autumn", n_colors=numCols)
    elif colType == "cp_binary":
        colors = sns.color_palette("binary", n_colors=numCols)
    elif colType == "cp_bone":
        colors = sns.color_palette("bone", n_colors=numCols)
    elif colType == "cp_cividis":
        colors = sns.color_palette("cividis", n_colors=numCols)
    elif colType == "cp_cool":
        colors = sns.color_palette("cool", n_colors=numCols)
    elif colType == "cp_copper":
        colors = sns.color_palette("copper", n_colors=numCols)
    elif colType == "cp_hot":
        colors = sns.color_palette("hot", n_colors=numCols)
    elif colType == "cp_inferno":
        colors = sns.color_palette("inferno", n_colors=numCols)
    elif colType == "cp_magma":
        colors = sns.color_palette("magma", n_colors=numCols)
    elif colType == "cp_mako":
        colors = sns.color_palette("mako", n_colors=numCols)
    elif colType == "cp_plasma":
        colors = sns.color_palette("plasma", n_colors=numCols)
    elif colType == "cp_PuBuGn":
        colors = sns.color_palette("PuBuGn", n_colors=numCols)
    elif colType == "cp_Purples":
        colors = sns.color_palette("Purples", n_colors=numCols)
    elif colType == "cp_RdPu":
        colors = sns.color_palette("RdPu", n_colors=numCols)
    elif colType == "cp_rocket":
        colors = sns.color_palette("rocket", n_colors=numCols)
    elif colType == "cp_spring":
        colors = sns.color_palette("spring", n_colors=numCols)
    elif colType == "cp_summer":
        colors = sns.color_palette("summer", n_colors=numCols)
    elif colType == "cp_viridis":
        colors = sns.color_palette("viridis", n_colors=numCols)
    elif colType == "cp_winter":
        colors = sns.color_palette("winter", n_colors=numCols)
    elif colType == "cp_Wistia":
        colors = sns.color_palette("Wistia", n_colors=numCols)
    elif colType == "cp_YlOrRd":
        colors = sns.color_palette("YlOrRd", n_colors=numCols)
        # CP_Grad3
    elif colType == "cp_BrBG":
        colors = sns.color_palette("BrBG", n_colors=numCols)
    elif colType == "cp_brg":
        colors = sns.color_palette("brg", n_colors=numCols)
    elif colType == "cp_bwr":
        colors = sns.color_palette("bwr", n_colors=numCols)
    elif colType == "cp_CMRmap":
        colors = sns.color_palette("CMRmap", n_colors=numCols)
    elif colType == "cp_gistearth":
        colors = sns.color_palette("gist_earth", n_colors=numCols)
    elif colType == "cp_giststern":
        colors = sns.color_palette("gist_stern", n_colors=numCols)
    elif colType == "cp_gnuplot":
        colors = sns.color_palette("gnuplot", n_colors=numCols)
    elif colType == "cp_gnuplot2":
        colors = sns.color_palette("gnuplot2", n_colors=numCols)
    elif colType == "cp_icefire":
        colors = sns.color_palette("icefire", n_colors=numCols)
    elif colType == "cp_ocean":
        colors = sns.color_palette("ocean", n_colors=numCols)
    elif colType == "cp_PiYG":
        colors = sns.color_palette("PiYG", n_colors=numCols)
    elif colType == "cp_PRGn":
        colors = sns.color_palette("PRGn", n_colors=numCols)
    elif colType == "cp_prism":
        colors = sns.color_palette("prism", n_colors=numCols)
    elif colType == "cp_RdBu":
        colors = sns.color_palette("RdBu", n_colors=numCols)
    elif colType == "cp_RdGy":
        colors = sns.color_palette("RdGy", n_colors=numCols)
    elif colType == "cp_RdYlBu":
        colors = sns.color_palette("RdYlBu", n_colors=numCols)
    elif colType == "cp_RdYlGn":
        colors = sns.color_palette("RdYlGn", n_colors=numCols)
    elif colType == "cp_seismic":
        colors = sns.color_palette("seismic", n_colors=numCols)
    elif colType == "cp_Spectral":
        colors = sns.color_palette("Spectral", n_colors=numCols)
    elif colType == "cp_terrein":
        colors = sns.color_palette("terrein", n_colors=numCols)
    elif colType == "cp_vlag":
        colors = sns.color_palette("vlag", n_colors=numCols)
    else:
        hsvCols = [(x / numCols, 1, 0.75) for x in range(numCols)]
        colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsvCols))
        colors = [[255 * color[0], 255 * color[1], 255 * color[2]]
                  for color in colors]

    return colors
Exemplo n.º 26
0
    6: '#bcc2f2',
    7: '#eebcbc',
    8: '#f1f0c0',
    9: '#d2ffe7',
    10: '#caf3a6',
    11: '#ffdf55',
    12: '#ef77aa',
    13: '#d6dcff',
    14: '#d2f5f0'
}
df_nodes['c'] = pd.Categorical.from_array(df_nodes.group).labels
plt.figure(figsize=(25, 25))
group_len = len(df_nodes['group'].value_counts())

import seaborn as sns
colors = sns.mpl_palette("tab20", group_len)

options = {
    'nodelist': df_nodes['name'].tolist(),
    'node_size':
    [nodesize * 1.1 for nodesize in df_nodes['nodesize'].tolist()],
    #'node_color': [colors[group - 1] for group in df_nodes['group'].tolist()],
    'node_color': [colors[c] for c in df_nodes['c'].tolist()],
    'edgelist': list(zip(df_edges['source'], df_edges['target'])),
    'width': [value * 0.1 for value in df_edges['value'].tolist()],
    'edge_color': 'gray',
    'with_labels': True,
    'alpha': 1,
    'font_weight': 'regular',
}
"""
Exemplo n.º 27
0
    fig.subplots_adjust(wspace=0.02)
    plt.rcParams['text.usetex'] = False
    plt.rcParams['font.family'] = "sans-serif"
    plt.rcParams['font.sans-serif'] = "Helvetica"
    plt.rcParams['axes.labelsize'] = 16
    plt.rcParams['font.size'] = 16
    plt.rcParams['legend.fontsize'] = 16
    plt.rcParams['xtick.labelsize'] = 16
    plt.rcParams['ytick.labelsize'] = 16

    ax = fig.add_subplot(111)
    from matplotlib import cm

    import seaborn as sns
    sns.set()
    colours = sns.mpl_palette("Set2", 4)

    for i, lambdax in enumerate([0.0, 1.0, 3.0]):

        ax.plot(gsw_vals,
                y[i, :],
                label="$\lambda$ = %d" % (lambdax),
                color=colours[i],
                lw=2)
        if lambdax > 0.0:
            ax.plot(gsw_max[i], y_max[i], marker="o", color=colours[i])

    from matplotlib.ticker import MaxNLocator
    ax.yaxis.set_major_locator(MaxNLocator(5))
    ax.xaxis.set_major_locator(MaxNLocator(5))
    ax.set_xlabel(r"g$_{\mathrm{s}}$ (mol m$^{-2}$ s$^{-1}$)")
Exemplo n.º 28
0
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import requests
import seaborn as sns
from sklearn.metrics import r2_score

## Figure layout
plt.rcParams.update({'font.size': 15, 'figure.figsize': (11, 5)})
sns.set(style='ticks', palette="deep")
sns.palplot(sns.mpl_palette("RdBu"))

### Load data sets
resp = requests.get('http://api.statbank.dk/v1/data/FOLK1A/CSV?lang=en')

dst_url_a = 'http://api.statbank.dk/v1/data/FT/CSV?lang=en&TID=*'
data = pd.read_csv(dst_url_a, sep=';')
data.rename(columns={'INDHOLD': 'Population', 'TID': 'Year'}, inplace=True)

extra_years = [y for y in np.arange(2020, 2055, 5) if y > data.Year.max()]
data = data.append(pd.DataFrame(extra_years, columns=['Year']))

p1975 = data.Year < 1976
p2018 = data.Year < 2019


### Define a plot function
def make_pop_plot(period=p1975):
    f, ax = plt.subplots(figsize=(10, 5))

    ax.scatter(data[period].Year,
Exemplo n.º 29
0
def plot_features(
    args, sim_path: str, real_path: str, vcf_path: str, out_dir_path: str
):
    """Generate pairwise plot of simulated and 'real' features
    
    Args:
        args (argparse.Namespace): Additional command line arguments
        sim_path (str): Path to NPSV features from 'simulated' data
        real_path (str): Path to NPSV features from 'real' data
        vcf_path (str): Path to input VCF file
        out_dir_path (str): Directory for plot files
    """
    # Create output directory if it doesn't exist
    os.makedirs(out_dir_path, exist_ok=True)
    logging.info("Generating plots in %s", out_dir_path)

    # Group the data to prepare for querying variants
    sim_data = pd.read_table(sim_path, na_values=".", dtype={"#CHROM": str, "AC": int})
    add_derived_features(sim_data)
    sim_data = sim_data.groupby(VARIANT_COL)

    real_data = pd.read_table(real_path, na_values=".", dtype={"#CHROM": str})
    add_derived_features(real_data)
    real_data = real_data.groupby(VARIANT_COL)

    # Depending on feature extractor, not all features may be available
    available_features = set(sim_data.obj) & set(real_data.obj)
    features = [feature for feature in FEATURE_COL if feature in available_features]

    vcf_reader = vcf.Reader(filename=vcf_path)
    for record in vcf_reader:
        variant = (
            record.CHROM,
            int(record.POS),
            int(record.sv_end),
            record.var_subtype,
        )

        try:
            current_sim = sim_data.get_group(variant)
            current_real = real_data.get_group(variant)
        except KeyError:
            # No data available for this variant, skipping
            logging.debug(
                "No simulated or real data found for %s. Skipping.",
                variant_descriptor(record),
            )
            continue
        current_real["AC"] = [-1]

        # Remove outliers with Z score above threshold
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            current_sim = (
                current_sim.groupby("AC")
                .apply(filter_by_zscore, features, 5)
                .reset_index(drop=True)
            )

        plot_data = current_sim.append(current_real)
        # Don't yet know how to encode AC directly (need strings for plotting)
        plot_data["AC"] = pd.Categorical(
            plot_data["AC"], categories=[0, 1, 2, -1]
        ).rename_categories(["REF", "HET", "HOM", "Act"])

        colors = sns.mpl_palette("Set1", 3) + [(0, 0, 0)]  # Actual data is black
        markers = { "REF": "o", "HET": "o", "HOM": "o", "Act": "s"}
        
        fig, ((ax11, ax12, ax13, ax14), (ax21, ax22, ax23, ax24)) = plt.subplots(2, 4, figsize=(14, 8))
        
        sns.scatterplot(ax=ax11, x="REF_READ", y="ALT_READ", data=plot_data, hue="AC", style="AC", markers=markers, palette=colors)
        _set_axis_limits(ax11)
        sns.scatterplot(ax=ax12, x="REF_WEIGHTED_SPAN", y="ALT_WEIGHTED_SPAN", data=plot_data, hue="AC", style="AC", markers=markers, palette=colors)
        _set_axis_limits(ax12)
        sns.scatterplot(ax=ax13, x="INSERT_LOWER", y="INSERT_UPPER", data=plot_data, hue="AC", style="AC", markers=markers, palette=colors)
        plot_hist(ax=ax14, col="CLIP_PRIMARY", data=plot_data, colors=colors)
        
        plot_hist(ax=ax21, col="COVERAGE", data=plot_data, colors=colors)
        plot_hist(ax=ax22, col="DHFC", data=plot_data, colors=colors)
        plot_hist(ax=ax23, col="DHBFC", data=plot_data, colors=colors)
        plot_hist(ax=ax24, col="DHFFC", data=plot_data, colors=colors)

        # Make plots square
        for ax in fig.get_axes():
            ax.set_aspect(1.0/ax.get_data_ratio(), adjustable='box')

        fig.suptitle("{}:{}-{}".format(*variant), size=16)
        fig.subplots_adjust(top=0.95, wspace=0.3, hspace=0.3)

        # Save plot to file name based on variant descriptor
        description = variant_descriptor(record)
        logging.info("Plotting variant into %s.pdf", description)
        plt.savefig(os.path.join(out_dir_path, f"{description}.pdf"))
Exemplo n.º 30
0
def plot_PCA(X,
             cmap='Spectral',
             colors=None,
             dims=(0, 1),
             ax=None,
             annotate=None,
             legend=True,
             **kwargs):
    '''
    plot PCA from matrix and label
        :X:  dataframe with index as categories
        :dims:  dimensions to plot
        :return: None
    '''

    from sklearn import preprocessing
    from sklearn.decomposition.pca import PCA
    X = X._get_numeric_data()
    S = pd.DataFrame(preprocessing.scale(X), columns=X.columns)
    pca = PCA(n_components=4)
    pca.fit(S)
    out = 'explained variance %s' % pca.explained_variance_ratio_
    print(out)
    #print pca.components_
    w = pd.DataFrame(pca.components_, columns=S.columns)
    #print (w.T.max(1).sort_values())
    pX = pca.fit_transform(S)
    pX = pd.DataFrame(pX, index=X.index)

    ### graph
    if ax is None:
        fig, ax = plt.subplots(1, 1, figsize=(8, 8))

    cats = pX.index.unique()
    if colors is None:
        colors = sns.mpl_palette(cmap, len(cats))

    y1, y2 = dims
    offset = 7
    for c, i in zip(colors, cats):
        ax.scatter(pX.loc[i, y1],
                   pX.loc[i, y2],
                   color=c,
                   label=i,
                   edgecolor='black',
                   **kwargs)

    if annotate is not None:
        pX['lab#el'] = annotate
        i = 0
        for idx, r in pX.iterrows():
            x = r[y1]
            y = r[y2]
            l = annotate[i]
            ax.annotate(l, (x, y),
                        xycoords='data',
                        xytext=(2, 5),
                        textcoords='offset points',
                        fontsize=12)
            i += 1

    ax.set_xlabel("X[%s]" % y1)
    ax.set_ylabel("X[%s]" % y2)
    if legend == True:
        ax.set_position([0.1, 0.1, 0.5, 0.8])
        ax.legend(loc="best", bbox_to_anchor=(1.0, .9))
    ax.set_title("PCA")
    return pX