示例#1
0
def test_arrow():
    p = (ggplot(df, aes('x', 'y', xend='xend', yend='yend')) +
         geom_segment(aes('x+2', xend='xend+2'), arrow=arrow(), size=2) +
         geom_segment(
             aes('x+4', xend='xend+4'), arrow=arrow(ends='first'), size=2) +
         geom_segment(
             aes('x+6', xend='xend+6'), arrow=arrow(ends='both'), size=2))

    assert p == 'arrow'
示例#2
0
def test_aesthetics():
    p = (
        ggplot(df, aes('x', 'y', xend='xend', yend='yend')) +
        geom_segment(size=2) +
        # Positive slope segments
        geom_segment(aes(yend='yend+1', color='factor(z)'), size=2) +
        geom_segment(aes(yend='yend+2', linetype='factor(z)'), size=2) +
        geom_segment(aes(yend='yend+3', size='z'), show_legend=False) +
        geom_segment(aes(yend='yend+4', alpha='z'), size=2, show_legend=False))

    assert p + _theme == 'aesthetics'
示例#3
0
def test_arrow():
    p = (ggplot(df, aes('x', 'y', xend='xend', yend='yend')) +
         geom_segment(aes('x+2', xend='xend+2'),
                      arrow=arrow(), size=2) +
         geom_segment(aes('x+4', xend='xend+4'),
                      arrow=arrow(ends='first'), size=2) +
         geom_segment(aes('x+6', xend='xend+6'),
                      arrow=arrow(ends='both'), size=2)
         )

    assert p == 'arrow'
示例#4
0
def test_aesthetics():
    p = (ggplot(df, aes('x', 'y', xend='xend', yend='yend')) +
         geom_segment(size=2) +
         # Positive slope segments
         geom_segment(aes(yend='yend+1', color='factor(z)'), size=2) +
         geom_segment(aes(yend='yend+2', linetype='factor(z)'), size=2) +
         geom_segment(aes(yend='yend+3', size='z'),
                      show_legend=False) +
         geom_segment(aes(yend='yend+4', alpha='z'), size=2,
                      show_legend=False))

    assert p + _theme == 'aesthetics'
示例#5
0
def plot(solu, k):

    # Generates a plot of the four bar mechanism, which represents a frame in the animation

    print("Frame: ", k)

    sol = solu[k:k + 1]

    p = ( ggplot(sol) +
         # MAIN LINKAGE
         geom_segment(aes(x = 0, y = 0, xend = sol.Ro4[k].real, yend = sol.Ro4[k].imag)) +
         geom_point(aes(x=0, y=0), shape = 'o', size = 3) +
         geom_point(aes(x = sol.Ro4[k].real, y = sol.Ro4[k].imag), shape = 'o', size = 3) +
         # 2ND LINKAGE
         geom_segment(aes(x = 0, y = 0, xend = sol.Ra[k].real, yend = sol.Ra[k].imag)) +
         geom_point(aes(x = sol.Ra[k].real, y = sol.Ra[k].imag), shape = 'o', size = 3) +
         # AP LINKAGE
         geom_segment(aes(x = sol.Ra[k].real, y = sol.Ra[k].imag, xend = sol.Rpa[k].real, yend = sol.Rpa[k].imag)) +
         geom_point(aes(x = sol.Rpa[k].real, y = sol.Rpa[k].imag), shape = 'o', size = 3) +
         # 3RD LINKAGE
         geom_segment(aes(x = sol.Ra[k].real, y = sol.Ra[k].imag, xend = sol.Rba[k].real, yend = sol.Rba[k].imag)) +
         geom_point(aes(x = sol.Rba[k].real, y = sol.Rba[k].imag), shape = 'o', size = 3) +
         # 4TH LINKAGE
         geom_segment(aes(x = sol.Rba[k].real, y = sol.Rba[k].imag, xend = sol.Ro4[k].real, yend = sol.Ro4[k].imag)) +
         geom_point(aes(x = sol.Rba[k].real, y = sol.Rba[k].imag), shape = 'o', size = 3) +
         # NODES IDENTIFICATION
         annotate("text", x = 0, y = -20, label = "$O_1$") +
         annotate("text", x = sol.Ro4[k].real, y = sol.Ro4[k].imag -20, label = "$O_4$") +
         annotate("text", x = sol.Ra[k].real+10, y = sol.Ra[k].imag, label = "$A$") +
         annotate("text", x = sol.Rba[k].real +20, y = sol.Rba[k].imag -10, label = "$B$") +
         annotate("text", x = sol.Rpa[k].real, y = sol.Rpa[k].imag -40, label = "$P$") +
         # ACCELERATIONS ARROWS (you may remove if you wish to remove acceleration informations)
         geom_segment(aes(x = sol.Rba[k].real, y = sol.Rba[k].imag, \
                          xend = sol.Rba[k].real + sol.Aba[k].real * ACC_SCALE, \
                          yend = sol.Rba[k].imag + sol.Aba[k].imag * ACC_SCALE),\
                      colour='red', arrow=arrow()) + # Point B
        geom_segment(aes(x = sol.Ra[k].real, y = sol.Ra[k].imag, \
                          xend = sol.Ra[k].real + sol.Aa[k].real * ACC_SCALE, \
                          yend = sol.Ra[k].imag + sol.Aa[k].imag * ACC_SCALE),\
                      colour='red', arrow=arrow()) + # Point A
        geom_segment(aes(x = sol.Rpa[k].real, y = sol.Rpa[k].imag, \
                          xend = sol.Rpa[k].real + sol.Apaa[k].real * ACC_SCALE, \
                          yend = sol.Rpa[k].imag + sol.Apaa[k].imag * ACC_SCALE),\
                      colour='red', arrow=arrow()) + # Point C
         # ACCELERATIONS TEXTS (you may comment if you wish to remove acceleration informations)
         # inputting text between '$ $' makes plotnine produce beautiful LaTeX text
         annotate("text", x = sol.Rba[k].real-30, y = sol.Rba[k].imag+10, label = f'${np.absolute(sol.Aba[k])/1000:.2f}~m/s^2$', colour='red') +
         annotate("text", x = sol.Ra[k].real+20, y = sol.Ra[k].imag-20, label = f'${np.absolute(sol.Aa[k])/1000:.2f}~m/s^2$', colour='red') +
         annotate("text", x = sol.Rpa[k].real+10, y = sol.Rpa[k].imag+20, label = f'${np.absolute(sol.Apaa[k])/1000:.2f}~m/s^2$', colour='red') +
         # TIME IDENTIFICATION
         annotate("label", x = 120, y = -80, label = f'Time: ${sol.time[k]:.2f}~s$', alpha = 1) +
         #
         labs(x='$x~[mm]$', y='$y~[mm]$') +
         coord_cartesian(xlim=SCALE_X, ylim=SCALE_Y) + # Scales plot limits, avoiding it to be bigger than necessary. You may comment this out if you wish to do so.
         theme_bw() # Plot is prettier with this theme compared to the default.
         )

    return p
示例#6
0
def create_length_plot(len_df, legend_position='right', legend_box='vertical'):
    mean_len_df = len_df.groupby(['Task', 'Method']).mean().reset_index()
    mean_len_df[' '] = 'Mean Length'

    plt = (ggplot(len_df) + aes(x='x', fill='Method', y='..density..') +
           geom_histogram(binwidth=2, position='identity', alpha=.6) +
           geom_text(aes(x='x', y=.22, label='x', color='Method'),
                     mean_len_df,
                     inherit_aes=False,
                     format_string='{:.1f}',
                     show_legend=False) +
           geom_segment(aes(x='x', xend='x', y=0, yend=.205, linetype=' '),
                        mean_len_df,
                        inherit_aes=False,
                        color='black') + scale_linetype_manual(['dashed']) +
           facet_wrap('Task') + xlim(0, 20) + ylim(0, .23) +
           xlab('Example Length') + ylab('Frequency') +
           scale_color_manual(values=COLORS) +
           scale_fill_manual(values=COLORS) + theme_fs() + theme(
               aspect_ratio=1,
               legend_title=element_blank(),
               legend_position=legend_position,
               legend_box=legend_box,
           ))

    return plt
示例#7
0
def plot_ROC(label_list, pred_list, names=None, **args):
    """
    複数の ROC 曲線をプロットする 
    :param: label_list: 正解ラベルリストの配列. [(y1, y2, ...), (y1, y2, ...)]  のようにして与える,  pred_list に対応させる
    :param: pred_list: 予測確率リストの配列. label_list と同じ長さにすること
    :param: names=None: モデルの名称. None または同じ長さにすること. 指定しない場合,
            ラベルの組が 2~3  ならば ['train', 'valid', 'test'] を与える. 3より多い場合は通し番号にする.
    :param args: sklearn.metrics.roc_curve に与えるパラメータ
    :return: plotnine オブジェクト
    """
    if names is None:
        if len(label_list) == 2:
            names = ('train', 'test')
        elif len(label_list) == 3:
            names = ('train', 'valid', 'test')
        else:
            names = list(range(len(label_list)))
    else:
        pass
    roc = [roc_curve(y, p, **args) for y, p in zip(label_list, pred_list)]
    fpr, tpr = tuple([list(chain.from_iterable(x)) for x in zip(*roc)][0:2])
    models = chain.from_iterable([[name] * l for name, l in zip(names, [len(x) for x, y, _ in roc])])
    d_roc = pd.DataFrame({'fpr': fpr, 'tpr': tpr, 'model': models})
    return ggplot(
            d_roc,
            aes(x='fpr', y='tpr', group='model', color='model')
    ) + geom_segment(x=0, y=0, xend=1, yend=1, linetype=':', color='grey'
    ) + geom_line(
    ) + scale_color_discrete(breaks=names
    ) + labs(x='false positive rate', y='true positive rate'
    ) + coord_equal(ratio=1, xlim=[0, 1], ylim=[0, 1]
    ) + theme_classic() + theme(figure_size=(4, 4))
示例#8
0
def add_mirna_g(g,df, str_name,str_start,str_end,dis_pos,l_s,l_e,l_score=[]):
    # print(str_name,str_start,str_end,dis_pos,l_s,l_e)
    df[str_start]= pd.Series(l_s)
    df[str_end] = pd.Series(l_e)

    g+= pt.annotate("text", x=0,y=dis_pos,label=str_name)
    g+= pt.geom_errorbarh(df,pt.aes(xmin=str_start,y=(dis_pos),xmax=str_end,color='mi_name'))
    g+= pt.geom_segment(df,pt.aes(x=str_start,y=(dis_pos),yend=0,xend=str_start,color='mi_name'))
    if(l_score):
        # print(l_score)
        # pd.options.display.float_format = '{:.1f}'.format
        score_column_name = 'score'+str_name
        # print(l_score,score_column_name,str_start,dis_pos)
        df[score_column_name] = pd.Series(l_score,dtype=np.float).map('{:.0f}'.format)
        
        g+= pt.geom_text(df, pt.aes(x=str_start,y=dis_pos,label=score_column_name,color='mi_name'),
                          nudge_x=0.1, nudge_y=0.1)#,adjust_text=adjust_text_dict)
示例#9
0
def plot_contour(df, var=None, out="out", level="level", aux=False):
    r"""Plot 2d contours

    Plot contours.

    Usually called as a dispatch from plot_auto().

    Args:
        var (array of str): Variables for plot axes
        out (str): Name of output identifier column
        level (str): Name of level identifier column
        aux (bool): Auxillary variables present?

    Returns:
        ggplot: Contour image

    Examples:

        >>> import grama as gr
        >>> from grama.models import make_cantilever_beam

    """
    # Check invariants
    if var is None:
        raise ValueError("Must provide input columns list as keyword var")
    if aux:
        raise ValueError(
            "Autoplot plot_contour not designed to handle auxiliary variables. " +
            "Regenerate contour data with fixed auxilary variables, " +
            "or try creating a manual plot."
        )

    return (
        df
        >> ggplot()
        + geom_segment(
            aes(
                var[0],
                var[1],
                xend=var[0]+"_end",
                yend=var[1]+"_end",
                linetype=out,
                color=level,
            )
        )
    )
示例#10
0
def plot_calibration(label_list, pred_list, names=None, **args):
    """
    カリブレーションカーブを複数描く.
    :param: label_list: 正解ラベルリストの配列. [(y1, y2, ...), (y1, y2, ...)]  のようにして与える,  pred_list に対応させる
    :param: pred_list: 予測確率リストの配列. label_list と同じ長さにすること
    :param: names=None: モデルの名称. None または同じ長さにすること. 指定しない場合, ラベルの組が 2~3  ならば ['train', 'valid', 'test'] を与える. 3より多い場合は通し番号にする.
    :param: args: sklearn.metrics.roc_curve に与えるパラメータ.
        :param: strategy='quantile': 分割方法. 'quantile' または 'uniform'
        :param: n_bins=10: ビン数.
        :param: normalize=False: 予測確率の0-1正規化が必要かどうか
    :return: plotnine オブジェクト
    TODO: 入力データがすごい偏ってるときの表示範囲
    """
    if names is None:
        if len(label_list) == 2:
            names = ('train', 'test')
        elif len(label_list) == 3:
            names = ('train', 'valid', 'test')
        elif len(label_list) == 1:
            names = 'model',
        else:
            names = list(range(len(label_list)))
    else:
        pass
    if args is None:
        args = {'strategy': 'quantile', 'n_bins': 5}
    else:
        args['strategy'] = args['strategy'] if 'strategy' in args.keys() else 'quantile'
        args['n_bins'] = args['n_bins'] if 'n_bins' in args.keys() else 10
    calib = [calibration_curve(y, p, **args) for y, p in zip(label_list, pred_list)]
    frac, pred = tuple([list(chain.from_iterable(x)) for x in zip(*calib)][0:2])
    models = chain.from_iterable([[name] * l for name, l in zip(names, [len(x) for x, y in calib])])
    d_calib = pd.DataFrame({'pred': pred, 'frac': frac, 'model': models})
    return ggplot(
            d_calib,
            aes(x='pred', y='frac', group='model', color='model')
    ) + geom_segment(x=0, y=0, xend=1, yend=1, linetype=':', color='grey'
    ) + geom_line(
    ) + geom_point(
    ) + scale_color_discrete(breaks=names
    ) + labs(x='mean estimated probability', y='fraction of positives'
    ) + coord_equal(ratio=1) + theme_classic() + theme(figure_size=(4, 4))
示例#11
0
def lollipop(data):
    data = data.sort_values(by=['probability']).reset_index(drop=True)
    custom_order = pd.Categorical(data['label'], categories=data.label)
    data = data.assign(label_custom=custom_order)


    p = ggplot(data, aes('label_custom', 'probability')) + \
        geom_point(color = "#88aa88", size = 4) + \
            geom_segment(aes(x = 'label_custom', y = 0, xend = 'label_custom', yend = 'probability'), color = "#88aa88") + \
                coord_flip(expand=True) + \
                    theme_minimal() + \
                        labs(x="", y="probability", title = "Most Likely Object") + \
                            guides(title_position = "left") + \
                                theme(plot_title = element_text(size = 20, face = "bold", ha= "right"))

    fig = p.draw()
    figfile = BytesIO()
    plt.savefig(figfile, format='png', bbox_inches='tight')
    figfile.seek(0)  # rewind to beginning of file
    figdata_png = base64.b64encode(figfile.getvalue()).decode()
    return p, figdata_png
示例#12
0
def plot_cor(df):
    # drop missing correlations
    out = df[~df['corr'].isnull()]
    # add pair column
    out = out.assign(pair=out.col_1 + '&' + out.col_2)
    # add a sign column
    sign = ((out['corr'] > 0).astype('int')).to_list()
    sign = [['Negative', 'Positive'][i] for i in sign]
    out['sign'] = sign
    #out  = out.sort_values('pair', ascending = False).reset_index(drop = True)
    # add ind column
    out['ind'] = [out.shape[0] - i for i in range(out.shape[0])]
    # plot using bands
    ggplt = p9.ggplot(data = out, mapping = p9.aes(x = 'pair', y = 'corr')) \
        + p9.geom_hline(
            yintercept = 0,
            linetype = "dashed",
            color = "#c2c6cc"
            ) \
        + p9.geom_rect(
            alpha = 0.4,
            xmin = out.ind.values - 0.4,
            xmax = out.ind.values + 0.4,
            ymin = out.lower.values,
            ymax = out.upper.values,
            fill = [['b', '#abaeb3'][int(x > 0.05)] for x in out.p_value]
          ) \
        + p9.geom_segment(
            x = out.ind.values - 0.4,
            y = out['corr'].values,
            xend = out.ind.values + 0.4,
            yend = out['corr'].values
          ) \
        + p9.coord_flip() \
        + p9.ylim(np.min(out.lower.values), np.max(out.upper.values)) \
        + p9.labs(x = "", y = "Correlation")
    return ggplt
示例#13
0
def create_length_plot(len_df, legend_position='right', legend_box='vertical'):
    mean_len_df = len_df.groupby(['Task', 'Method']).mean().reset_index()
    mean_len_df[' '] = 'Mean Length'

    plt = (
        ggplot(len_df)
        + aes(x='x', fill='Method', y='..density..')
        + geom_histogram(binwidth=2, position='identity', alpha=.6)
        + geom_text(
            aes(x='x', y=.22, label='x', color='Method'),
            mean_len_df,
            inherit_aes=False,
            format_string='{:.1f}',
            show_legend=False
        )
        + geom_segment(
            aes(x='x', xend='x', y=0, yend=.205, linetype=' '),
            mean_len_df,
            inherit_aes=False, color='black'
        )
        + scale_linetype_manual(['dashed'])
        + facet_wrap('Task')
        + xlim(0, 20) + ylim(0, .23)
        + xlab('Example Length') + ylab('Frequency')
        + scale_color_manual(values=COLORS)
        + scale_fill_manual(values=COLORS)
        + theme_fs()
        + theme(
            aspect_ratio=1,
            legend_title=element_blank(),
            legend_position=legend_position,
            legend_box=legend_box,
        )
    )

    return plt
示例#14
0
def scatter_cell_cycle(
    adata,
    scores=["signatures", "components"][0],
    size=1.5,
    alpha=1,
    curvature_shrink=1,
    lab_ypos=2,
):
    """Plots cell cycle signatures vs pseudotime

    Parameters
    ----------------
    adata: AnnData
        The AnnData object being used for the analysis. Must be previously
        evaluated by `tl.cell_cycle_phase`.
    scores: str
        A string indicating what to plot as cell cycle scores against pseudotime.
        If 'signatures', standard S-phase, G2-M and Histones signatures are used;
        if 'components', the 4 cell cycle related components are used.
    size: float
        Controls the point size of the plot.
    alpha: float
        A value between 0 and 1. Controls point transparency.
    lab_ypos: float
        Controls the y-axis position of the cell cycle phase annotation.

    Returns
    --------------
    A plotnine scatter plot of pseudotime vs 3 cell cycle signatures.

    """
    if scores == "signatures":
        y = ["S-phase", "G2-M", "Histones"]
        colors = ["#66c2a5", "#fc8d62", "#8da0cb", "black"]
    elif scores == "components":
        _add_compScores(adata)
        y = ["G1/S comp", "G2/M+ comp", "G2/M- comp", "Histones comp"]
        colors = ["#66c2a5", "#fc8d62", "#8da0cb", "#e5c494", "black"]

    time_scatter = scatter_pseudotime(
        adata, y=y, size=size, alpha=alpha) + labs(
            x="Pseudotime", y="Signature scores", color="Signature")

    # -- Add cell cycle annotations
    if "cell_cycle_division" in adata.uns["scycle"]:
        cc_divs = adata.uns["scycle"]["cell_cycle_division"]

        # -- Curvature data
        curv_data = cc_divs["curvature"]
        curv = curv_data["curvature"].values
        cvz = zscore(curv) / curvature_shrink
        cvz = cvz - np.max(cvz)
        curv_data.loc[:, "curvature"] = cvz
        curv_data.loc[:, "signature"] = "Curvature"

        # -- Peak data (for segments)
        gr_min = np.min(curv_data["curvature"])
        pk_data = curv_data[curv_data["ispeak"] == "peak"]
        pk_data.loc[:, "ymin"] = gr_min

        # -- Cell cycle annotation
        cc_phase = pd.DataFrame(
            dict(
                starts=[
                    None,
                    cc_divs["s_start"],
                    cc_divs["g2_start"],
                    cc_divs["m_start"],
                ],
                labels=["G1", "S", "G2", "M"],
                labpos=[
                    np.mean([0, cc_divs["s_start"]]),
                    np.mean([cc_divs["s_start"], cc_divs["g2_start"]]),
                    np.mean([cc_divs["g2_start"], cc_divs["m_start"]]),
                    np.mean([cc_divs["m_start"], 1]),
                ],
                y=lab_ypos,
            ))

        cell_cycle_plt = (
            time_scatter +
            geom_point(aes("pseudotime", "curvature", color="signature"),
                       data=curv_data) +
            geom_line(aes("pseudotime", "curvature"), data=curv_data) +
            scale_color_manual(values=colors) + geom_segment(
                aes(x="pseudotime",
                    xend="pseudotime",
                    y="ymin",
                    yend="curvature"),
                linetype="dotted",
                data=pk_data,
            ) + geom_vline(
                aes(xintercept="starts"), linetype="dashed", data=cc_phase) +
            geom_text(aes(x="labpos", y="y", label="labels"), data=cc_phase))

        return cell_cycle_plt
    else:
        return time_scatter
示例#15
0
def PlotPG(X,
           TargetPG,
           BootPG=None,
           PGCol="",
           PlotProjections="none",
           GroupsLab=None,
           PointViz="points",
           Main='',
           p_alpha=.3,
           PointSize=None,
           NodeLabels=None,
           LabMult=1,
           Do_PCA=True,
           DimToPlot=[0, 1],
           VizMode=("Target", "Boot")):
    '''
    work in progress, only basic plotting supported
    #' Plot data and principal graph(s) 
    #'
    #' @param X numerical 2D matrix, the n-by-m matrix with the position of n m-dimensional points
    #' @param TargetPG the main principal graph to plot
    #' @param BootPG A list of principal graphs that will be considered as bostrapped curves
    #' @param PGCol string, the label to be used for the main principal graph
    #' @param PlotProjections string, the plotting mode for the node projection on the principal graph.
    #' It can be "none" (no projections will be plotted), "onNodes" (the projections will indicate how points are associated to nodes),
    #' and "onEdges" (the projections will indicate how points are projected on edges or nodes of the graph)
    #' @param GroupsLab factor or numeric vector. A vector indicating either a category or a numeric value associted with
    #' each data point
    #' @param PointViz string, the modality to show points. It can be 'points' (data will be represented a dot) or
    #' 'density' (the data will be represented by a field)
    #' @param Main string, the title of the plot
    #' @param p.alpha numeric between 0 and 1, the alpha value of the points. Lower values will prodeuce more transparet points
    #' @param PointSize numeric vector, a vector indicating the size to be associted with each node of the graph.
    #' If NA points will have size 0.
    #' @param NodeLabels string vector, a vector indicating the label to be associted with each node of the graph
    #' @param LabMult numeric, a multiplier controlling the size of node labels
    #' @param Do_PCA bolean, should the node of the principal graph be used to derive principal component projections and
    #' rotate the space? If TRUE the plots will use the "EpG PC" as dimensions, if FALSE, the original dimensions will be used. 
    #' @param DimToPlot a integer vector specifing the PCs (if Do_PCA=TRUE) or dimension (if Do_PCA=FALSE) to plot. All the
    #' combination will be considered, so, for example, if DimToPlot = 1:3, three plot will be produced.
    #' @param VizMode vector of string, describing the ElPiGraphs to visualize. Any combination of "Target" and "Boot".
    #'
    #' @return
    #' @export
    #'
    #' @examples'''

    if len(PGCol) == 1:
        PGCol = [PGCol] * len(TargetPG['NodePositions'])

    if GroupsLab is None:
        GroupsLab = ["N/A"] * len(X)

    #    levels(GroupsLab) = c(levels(GroupsLab), unique(PGCol))

    if PointSize is not None:
        if (len(PointSize) == 1):
            PointSize = [PointSize] * len(TargetPG['NodePositions'])

    if (Do_PCA):
        # Perform PCA on the nodes
        mv = TargetPG['NodePositions'].mean(axis=0)
        data_centered = TargetPG['NodePositions'] - mv
        vglobal, NodesPCA, explainedVariances = PCA(data_centered)
        # Rotate the data using eigenvectors
        BaseData = np.dot((X - mv), vglobal)
        DataVarPerc = np.var(BaseData, axis=0) / np.sum(np.var(X, axis=0))

    else:
        NodesPCA = TargetPG['NodePositions']
        BaseData = X
        DataVarPerc = np.var(X, axis=0) / np.sum(np.var(X, axis=0))

    # Base Data

    AllComb = list(combinations(DimToPlot, 2))

    PlotList = list()

    for i in range(len(AllComb)):

        Idx1 = AllComb[i][0]
        Idx2 = AllComb[i][1]

        df1 = pd.DataFrame.from_dict(
            dict(PCA=BaseData[:, Idx1], PCB=BaseData[:, Idx2],
                 Group=GroupsLab))
        # Initialize plot

        Initialized = False

        if (PointViz == "points"):
            p = (plotnine.ggplot(data=df1,
                                 mapping=plotnine.aes(x='PCA', y='PCB')) +
                 plotnine.geom_point(alpha=p_alpha,
                                     mapping=plotnine.aes(color='Group')))
            Initialized = True

        if (PointViz == "density"):
            p = (plotnine.ggplot(data=df1,
                                 mapping=plotnine.aes(x='PCA', y='PCB')) +
                 plotnine.stat_density_2d(
                     contour=True,
                     alpha=.5,
                     geom='polygon',
                     mapping=plotnine.aes(fill='..level..')))
            Initialized = True

    #             p = sns.kdeplot(df1['PCA'], df1['PCB'], cmap="Reds", shade=True, bw=.15)

        if (not Initialized):
            raise ValueError("Invalid point representation selected")

        # Target graph

        tEdg = dict(x=[], y=[], xend=[], yend=[], Col=[])
        for i in range(len(TargetPG['Edges'][0])):
            Node_1 = TargetPG['Edges'][0][i][0]
            Node_2 = TargetPG['Edges'][0][i][1]
            if PGCol:
                if (PGCol[Node_1] == PGCol[Node_2]):
                    tCol = "ElPiG" + str(PGCol[Node_1])

                if (PGCol[Node_1] != PGCol[Node_2]):
                    tCol = "ElPiG Multi"

                if (any(PGCol[(Node_1, Node_2)] == "None")):
                    tCol = "ElPiG None"

            tEdg['x'].append(NodesPCA[Node_1, Idx1])
            tEdg['y'].append(NodesPCA[Node_1, Idx2])
            tEdg['xend'].append(NodesPCA[Node_2, Idx1])
            tEdg['yend'].append(NodesPCA[Node_2, Idx2])
            if PGCol:
                tEdg['Col'].append(tCol)
            else:
                tEdg['Col'].append(1)
        if (Do_PCA):
            TarPGVarPerc = explainedVariances.sum() / explainedVariances.sum(
            ) * 100
        else:
            TarPGVarPerc = np.var(TargetPG['NodePositions'], axis=0) / np.sum(
                np.var(TargetPG['NodePositions'], axis=0))

        df2 = pd.DataFrame.from_dict(tEdg)

        # Replicas

        #         if(BootPG is not None) and ("Boot" is in VizMode):
        #             AllEdg = lapply(1:length(BootPG), function(i){
        #             tTree = BootPG[[i]]

        #             if(Do_PCA):
        #                 RotData = t(t(tTree$NodePositions) - NodesPCA$center) %*% NodesPCA$rotation
        #             else: {
        #                 RotData = tTree$NodePositions
        #             }

        #             tEdg = t(sapply(1:nrow(tTree$Edges$Edges), function(i){
        #               c(RotData[tTree$Edges$Edges[i, 1],c(Idx1, Idx2)], RotData[tTree$Edges$Edges[i, 2],c(Idx1, Idx2)])
        #             }))

        #             cbind(tEdg, i)
        #             })

        #             AllEdg = do.call(rbind, AllEdg)

        #             df3 = data.frame(x = AllEdg[,1], y = AllEdg[,2], xend = AllEdg[,3], yend = AllEdg[,4], Rep = AllEdg[,5])

        #             p = p + plotnine.geom_segment(data = df3, mapping = plotnine.aes(x=x, y=y, xend=xend, yend=yend),
        #                                          inherit.aes = False, alpha = .2, color = "black")

        # Plot projections

        if (PlotProjections == "onEdges"):

            if (Do_PCA):
                Partition = PartitionData(X=BaseData,
                                          NodePositions=NodesPCA,
                                          MaxBlockSize=100000000,
                                          SquaredX=np.sum(BaseData**2,
                                                          axis=1,
                                                          keepdims=1),
                                          TrimmingRadius=float('inf'))[0]
                OnEdgProj = project_point_onto_graph(X=BaseData,
                                                     NodePositions=NodesPCA,
                                                     Edges=TargetPG['Edges'],
                                                     Partition=Partition)
            else:
                Partition = PartitionData(
                    X=BaseData,
                    NodePositions=TargetPG['NodePositions'],
                    MaxBlockSize=100000000,
                    SquaredX=np.sum(BaseData**2, axis=1, keepdims=1),
                    TrimmingRadius=float('inf'))[0]
                OnEdgProj = project_point_onto_graph(
                    X=BaseData,
                    NodePositions=TargetPG['NodePositions'],
                    Edges=TargetPG['Edges'],
                    Partition=Partition)

            ProjDF = pd.DataFrame.from_dict(
                dict(X=BaseData[:, Idx1],
                     Y=BaseData[:, Idx2],
                     Xend=OnEdgProj['X_projected'][:, Idx1],
                     Yend=OnEdgProj['X_projected'][:, Idx2],
                     Group=GroupsLab))

            p = p + plotnine.geom_segment(
                data=ProjDF,
                mapping=plotnine.aes(
                    x='X', y='Y', xend='Xend', yend='Yend', col='Group'),
                inherit_aes=False)

        elif (PlotProjections == "onNodes"):

            if (Do_PCA):
                Partition = PartitionData(X=BaseData,
                                          NodePositions=NodesPCA,
                                          MaxBlockSize=100000000,
                                          SquaredX=np.sum(BaseData**2,
                                                          axis=1,
                                                          keepdims=1),
                                          TrimmingRadius=float('inf'))[0]
                ProjDF = pd.DataFrame.from_dict(
                    dict(X=BaseData[:, Idx1],
                         Y=BaseData[:, Idx2],
                         Xend=NodesPCA[Partition, Idx1],
                         Yend=NodesPCA[Partition, Idx2],
                         Group=GroupsLab))
            else:
                Partition = PartitionData(
                    X=BaseData,
                    NodePositions=TargetPG['NodePositions'],
                    MaxBlockSize=100000000,
                    SquaredX=np.sum(BaseData**2, axis=1, keepdims=1),
                    TrimmingRadius=float('inf'))[0]
                ProjDF = pd.DataFrame.from_dict(
                    dict(X=BaseData[:, Idx1],
                         Y=BaseData[:, Idx2],
                         Xend=TargetPG['NodePositions'][Partition, Idx1],
                         Yend=TargetPG['NodePositions'][Partition, Idx2],
                         Group=GroupsLab))

            p = p + plotnine.geom_segment(
                data=ProjDF,
                mapping=plotnine.aes(
                    x='X', y='Y', xend='Xend', yend='Yend', col='Group'),
                inherit_aes=False,
                alpha=.3)

        if ("Target" in VizMode):
            if GroupsLab is not None:
                p = p + plotnine.geom_segment(
                    data=df2,
                    mapping=plotnine.aes(
                        x='x', y='y', xend='xend', yend='yend', col='Col'),
                    inherit_aes=True) + plotnine.labs(linetype="")
            else:
                p = p + plotnine.geom_segment(
                    data=df2,
                    mapping=plotnine.aes(
                        x='x', y='y', xend='xend', yend='yend'),
                    inherit_aes=False)

        if (Do_PCA):
            df4 = pd.DataFrame.from_dict(
                dict(PCA=NodesPCA[:, Idx1], PCB=NodesPCA[:, Idx2]))
        else:
            df4 = pd.DataFrame.from_dict(
                dict(PCA=TargetPG['NodePositions'][:, Idx1],
                     PCB=TargetPG['NodePositions'][:, Idx2]))

        if ("Target" in VizMode):
            if (PointSize is not None):

                p = p + plotnine.geom_point(mapping=plotnine.aes(
                    x='PCA', y='PCB', size=PointSize),
                                            data=df4,
                                            inherit_aes=False)

            else:
                p = p + plotnine.geom_point(mapping=plotnine.aes(x='PCA',
                                                                 y='PCB'),
                                            data=df4,
                                            inherit_aes=False)

    #         if(NodeLabels):

    #             if(Do_PCA){
    #                 df4 = data.frame(PCA = NodesPCA$x[,Idx1], PCB = NodesPCA$x[,Idx2], Lab = NodeLabels)
    #             else {
    #                 df4 = data.frame(PCA = TargetPG$NodePositions[,Idx1], PCB = TargetPG$NodePositions[,Idx2], Lab = NodeLabels)
    #           }

    #           p = p + plotnine.geom_text(mapping = plotnine.aes(x = PCA, y = PCB, label = Lab),
    #                                       data = df4, hjust = 0,
    #                                       inherit.aes = False, na.rm = True,
    #                                       check_overlap = True, color = "black", size = LabMult)

    #         }

    #         if(Do_PCA){
    #             LabX = "EpG PC", Idx1, " (Data var = ",  np.round(100*DataVarPerc[Idx1], 3), "% / PG var = ", signif(100*TarPGVarPerc[Idx1], 3), "%)"
    #             LabY = "EpG PC", Idx2, " (Data var = ",  np.round(100*DataVarPerc[Idx2], 3), "% / PG var = ", signif(100*TarPGVarPerc[Idx2], 3), "%)"
    #         else {
    #             LabX = paste0("Dimension ", Idx1, " (Data var = ",  np.round(100*DataVarPerc[Idx1], 3), "% / PG var = ", np.round(100*TarPGVarPerc[Idx1], 3), "%)")
    #             LabY = paste0("Dimension ", Idx2, " (Data var = ",  np.round(100*DataVarPerc[Idx2], 3), "% / PG var = ", np.round(100*TarPGVarPerc[Idx2], 3), "%)")
    #         }

    #         if(!is.na(TargetPG$FinalReport$FVEP)){
    #             p = p + plotnine.labs(x = LabX,
    #                                  y = LabY,
    #                                  title = paste0(Main,
    #                                                 "/ FVE=",
    #                                                 signif(as.numeric(TargetPG$FinalReport$FVE), 3),
    #                                                 "/ FVEP=",
    #                                                 signif(as.numeric(TargetPG$FinalReport$FVEP), 3))
    #           ) +
    #             plotnine.theme(plot.title = plotnine.element_text(hjust = 0.5))
    #         else {
    #           p = p + plotnine.labs(x = LabX,
    #                                  y = LabY,
    #                                  title = paste0(Main,
    #                                                 "/ FVE=",
    #                                                 signif(as.numeric(TargetPG$FinalReport$FVE), 3))
    #           ) +
    #             plotnine.theme(plot.title = plotnine.element_text(hjust = 0.5))
    #         }

        PlotList.append(p)

    return (PlotList)
示例#16
0
def cell_cycle_scores(adata,
                      scores=["signatures", "components"][0],
                      size=1.5,
                      alpha=1,
                      curvature_shrink=1,
                      lab_ypos=2,
                      show_curvature=True):
    """Plots cell cycle signatures vs pseudotime

    Parameters
    ----------------
    adata: AnnData
        The AnnData object being used for the analysis. Must be previously
        evaluated by `tl.cell_cycle_phase`.
    scores: str
        A string indicating what to plot as cell cycle scores against pseudotime.
        If 'signatures', standard S-phase, G2-M and Histones signatures are used;
        if 'components', the 4 cell cycle related components are used.
    size: float
        Controls the point size of the plot.
    alpha: float
        A value between 0 and 1. Controls point transparency.
    lab_ypos: float
        Controls the y-axis position of the cell cycle phase annotation.
    show_curvature:
        Controls whether curvature is shown

    Returns
    --------------
    A plotnine scatter plot of pseudotime vs 3 cell cycle signatures.

    """
    if scores == "signatures":
        y = ["G1-S", "G2-M", "Histones"]
        colors = ['#8ca0c9', '#ff8d68', '#5cc2a6', "black"]
    elif scores == "components":
        _add_compScores(adata)
        y = ["G1-S comp", "G2-M comp", "G2-M- comp", "Histone comp"]
        colors = ['#8ca0c9', '#ff8d68', "#e5c494", '#5cc2a6', "black"]

    time_scatter = (
        pseudotime_scatter(
            adata, y=y, facet=False, size=size, alpha=alpha, lab_ypos=lab_ypos)
        + labs(x="Pseudotime", y="Signature scores", color="Signature"))

    # -- Add cell cycle annotations
    if ("cell_cycle_division" in adata.uns["scycle"]) and show_curvature:
        cc_divs = adata.uns["scycle"]["cell_cycle_division"]
        # -- Curvature data
        curv_data = cc_divs["curvature"]
        curv = curv_data["curvature"].values
        cvz = zscore(curv) / curvature_shrink
        cvz = cvz - np.max(cvz)
        curv_data.loc[:, "curvature"] = cvz
        curv_data.loc[:, "signature"] = "Curvature"

        # -- Peak data (for segments)
        gr_min = np.min(curv_data["curvature"])
        pk_data = curv_data[curv_data["ispeak"] == "peak"]
        pk_data.loc[:, "ymin"] = gr_min

        cell_cycle_plt = (
            time_scatter +
            geom_point(aes("pseudotime", "curvature", color="signature"),
                       data=curv_data) +
            geom_line(aes("pseudotime", "curvature"), data=curv_data) +
            scale_color_manual(values=colors) + geom_segment(
                aes(x="pseudotime",
                    xend="pseudotime",
                    y="ymin",
                    yend="curvature"),
                linetype="dotted",
                data=pk_data,
            ))
        return cell_cycle_plt
    else:
        return time_scatter + scale_color_manual(values=colors[0:-1])
示例#17
0
full_plot_df.head()

plot_df = (full_plot_df.sort_values(
    "odds_ratio", ascending=False).head(subset).append(
        full_plot_df.sort_values("odds_ratio", ascending=False).iloc[:-2].tail(
            subset)).replace("rna", "RNA").assign(
                odds_ratio=lambda x: x.odds_ratio.apply(lambda x: np.log2(x)),
                lower_odds=lambda x: x.lower_odds.apply(lambda x: np.log2(x)),
                upper_odds=lambda x: x.upper_odds.apply(lambda x: np.log2(x)),
            ))
plot_df.head()

g = (p9.ggplot(
    plot_df, p9.aes(y="lemma", x="lower_odds", xend="upper_odds",
                    yend="lemma")) +
     p9.geom_segment(color="#253494", size=6, alpha=0.7) +
     p9.scale_y_discrete(limits=(
         plot_df.sort_values("odds_ratio", ascending=True).lemma.tolist())) +
     p9.scale_x_continuous(limits=(-3, 3)) +
     p9.geom_vline(p9.aes(xintercept=0), linetype="--", color="grey") +
     p9.annotate(
         "segment",
         x=0.5,
         xend=2.5,
         y=1.5,
         yend=1.5,
         colour="black",
         size=0.5,
         alpha=1,
         arrow=p9.arrow(length=0.1),
     ) + p9.annotate(
示例#18
0
def gene_profile(genes: list, 
                 weights: pd.DataFrame, 
                 stddev: pd.DataFrame=None,
                 y_axis_label: str=None,
                 highlight_n: int=None, 
                 highlight_anno: list=None, 
                 figsize: tuple=None,
                 ylim: tuple=None) -> p9.ggplot:
    """
    
    Parameters
    ----------
    weights            : DataFrame of ES weights
    genes          : a single str or list of genes to include in plot as facets
    highlight_n    : number of highest ESw to highlight
    highlight_anno : specific annotations to highlight
    figsize : (float, float), optional (default: None)
        Specify width and height of plot.
    
    Returns
    -------
        g    : ggplot
        
    Todo:
        * find a better way for sorting cell-types along x-axis
        * report if gene in genes is not found in df
        * report if duplicate genes
        * replace hacky x-axis labelling
    
    """
    
    ### Reduce dataframe to genes of interest
    genes = [str.upper(s) for s in genes]
    idx = np.char.upper(weights.index.values.astype(str))
    mask = np.isin(idx, genes)
    df_tidy = weights[mask]
    n_genes = len(df_tidy)

    assert (n_genes >= 1), "No matching genes found in dataframe."

    stddev_tidy = None
    if stddev is not None:
        idx = np.char.upper(stddev.index.values.astype(str))
        mask = np.isin(idx, genes)
        stddev_tidy = stddev[mask]
        n_genes = len(df_tidy)
        assert (n_genes >= 1), "No matching genes found in stddev dataframe."

    # Constants, height and width of plot.
    if figsize is None:
        H = 5*n_genes
        W = 15
    else:
        W, H = figsize

    if ylim is None:
        ylim = (-1,1)
    
    if y_axis_label is None:
        y_axis_label = "Expression Specificity"
    
    ### Convert to tidy / long format if necessary
    # Org:
    #       ABC  ACBG  ACMB
    # POMC  0.0   0.5   0.9
    # AGRP  0.2   0.0   0.0
    # LEPR  0.1   0.1   0.4
    
    # Tidy:
    #   gene_name annotation    es_weight
    # 1 POMC      ABC           0.0
    # 2 AGRP      ABC           0.6
    # 3 LEPR      ABC           1.0     

    df_tidy.index.name = None # ensure that index name is none, so "index" is used for id_vars
    df_tidy = pd.melt(df_tidy.reset_index(), id_vars="index", var_name="annotation", value_name="weight")
    
    if stddev_tidy is not None:
        stddev_tidy.index.name = None
        stddev_tidy = pd.melt(stddev_tidy.reset_index(), id_vars="index", var_name="annotation", value_name="stddev")
        df_tidy = df_tidy.merge(stddev_tidy, on=["index", "annotation"])


    ### Sort values by gene_name and es_weight and add order
    # Sorted:
    #   gene_name annotation   es_weight   x_order
    # 1 AGRP      MOL2         0.0         1
    # 2 AGRP      ACNT1        0.1         2
    # 3 AGRP      MOL1         0.2         3
    
    df_tidy = df_tidy.sort_values(by=["index", "weight"])
    df_tidy["order"] = np.arange(len(df_tidy)) + 1
    
    ### Generate highlight
    # Default: highlight top 5
    if ((highlight_n is None) and (highlight_anno is None)):
        highlight_n = 5

    # highlight list of 
    if (highlight_anno is not None):
        df_tidy["highlight"] = df_tidy["annotation"].isin(highlight_anno)
    elif (highlight_n is not None):
        df_tidy["highlight"] = df_tidy.groupby("index")["order"].rank("first", ascending=False) <= highlight_n
    else:
        df_tidy["highlight"] = np.array([False] * len(df_tidy))
    
    df_highlight = df_tidy[df_tidy["highlight"]]
    
    ### Plot
    # linear function to compute x_axis text-size.
    # Mainly depends on number of genes in df per faceet, i.e. len(df_tidy) / len(genes).
    SIZE_TEXT_X_AXIS = 10.161 - 0.023 * (len(df_tidy) / len(genes))
    
    # Limits of the order for each index gene / facet, e.g. [0, 266, 531]
    # These limits are necessary to only plot the labels
    order_lims = [0, *(df_tidy.groupby("index")["order"].max().values)]
    
    def find_nearest(array,value):
        array = np.asarray(array)
        idx = (np.abs(array - value)).argmin()
        return array[idx]
        
    def getbreaks(lims):
        # function defined for use in debugging
        l = find_nearest(order_lims, lims[0])
        r = find_nearest(order_lims, lims[1])
        breaks = np.arange(l, r)
        return breaks

    def getlbls(idx):
        # function defined for use in debugging
        idx = idx
        lbls = df_tidy["annotation"].iloc[idx].values
        return lbls
    
    p = (
        ### data
        p9.ggplot(data=df_tidy, mapping=p9.aes(x="order", y="weight", label="annotation"))

        ### theming
        + p9.theme_classic()
        + p9.theme(
            figure_size = (W,H),
            axis_ticks_major_x = p9.element_blank(),
            axis_text_x = p9.element_text(rotation=75, hjust=0, size=SIZE_TEXT_X_AXIS), # 
            axis_text_y = p9.element_text(size=W),
            panel_spacing = 1,
            strip_background = p9.element_blank()
        )

        + p9.ylim(ylim[0],ylim[1])

        + p9.labs(
            x="", # e.g. "Cell-type"
            y=y_axis_label, # e.g. "ES weight"
        )

        ### viz
        # all
        + p9.geom_segment(mapping=p9.aes(x="order", xend="order", y=0, yend="weight"),
                       color="grey",
                       alpha=0.3,
                       show_legend=False
        )

        + p9.geom_point(mapping=p9.aes(size=2),
                     color="grey",
                    show_legend=False
        )

        # highlight
        + p9.geom_point(data=df_highlight, mapping=p9.aes(size=2), 
                     color="dodgerblue",
                    show_legend=False
        )

        + p9.geom_segment(data=df_highlight, mapping=p9.aes(x="order", xend="order", y=0, yend="weight"),
                       color="dodgerblue",
                       alpha=0.3,
                       show_legend=False
        )

        + p9.facet_wrap("index",
                     scales="free",
                     nrow=n_genes
                    )
        
        + p9.scale_x_continuous(
            # order_scale is continuous across all annotations
            # so the scale will look weird for each facet, e.g.
            # facet 1 may have order 1-7, and facet 2 has order 8-14.
            # therefore we must use a labeller function to get the 
            # correct labels for each interval of order.
            breaks = lambda lims: getbreaks(lims),
            labels = lambda idx: getlbls(idx)
        )
    )
    
    if stddev_tidy is not None:
        p = p + p9.geom_errorbar(mapping=p9.aes(ymin="weight-stddev", ymax="weight+stddev"), 
                                    color="grey", width=0.1)\
                + p9.geom_errorbar(data=df_highlight, mapping=p9.aes(ymin="weight-stddev", ymax="weight+stddev"),
                                color="dodgerblue", width=0.1)

    # add labels last for them to be on top
    p = p + p9.geom_label(data=df_highlight,
                    color = "dodgerblue",
                    adjust_text = {'expand_points': (2,2)}
        )

    return p
    
示例#19
0
                        'theta3c': mech.theta3[1], 
                        'theta4a': mech.theta4[0], 
                        'theta4c': mech.theta4[1], 
                        'omega3a': mech.omega3[0], 
                        'omega3c': mech.omega3[1],
                        'omega4a': mech.omega4[0], 
                        'omega4c': mech.omega4[1], 
                        'alpha3a': mech.alpha3[0], 
                        'alpha3c': mech.alpha3[1],
                        'alpha4a': mech.alpha4[0],
                        'alpha4c': mech.alpha4[1]},
                        index = [0])
k = 0
plot = ( ggplot(sol) + 
         # MAIN LINKAGE
         geom_segment(aes(x = 0, y = 0, xend = sol.Ro4[k].real, yend = sol.Ro4[k].imag)) +
         geom_point(aes(x=0, y=0), shape = 'o', size = 3) +
         geom_point(aes(x = sol.Ro4[k].real, y = sol.Ro4[k].imag), shape = 'o', size = 3) +
         # 2ND LINKAGE
         geom_segment(aes(x = 0, y = 0, xend = sol.Ra[k].real, yend = sol.Ra[k].imag)) +
         geom_point(aes(x = sol.Ra[k].real, y = sol.Ra[k].imag), shape = 'o', size = 3) +
         # AP LINKAGE
         geom_segment(aes(x = sol.Ra[k].real, y = sol.Ra[k].imag, xend = sol.Rpa[k].real, yend = sol.Rpa[k].imag)) +
         geom_point(aes(x = sol.Rpa[k].real, y = sol.Rpa[k].imag), shape = 'o', size = 3) +
         # 3RD LINKAGE
         geom_segment(aes(x = sol.Ra[k].real, y = sol.Ra[k].imag, xend = sol.Rba[k].real, yend = sol.Rba[k].imag)) +
         geom_point(aes(x = sol.Rba[k].real, y = sol.Rba[k].imag), shape = 'o', size = 3) +
         # 4TH LINKAGE
         geom_segment(aes(x = sol.Rba[k].real, y = sol.Rba[k].imag, xend = sol.Ro4[k].real, yend = sol.Ro4[k].imag)) +
         geom_point(aes(x = sol.Rba[k].real, y = sol.Rba[k].imag), shape = 'o', size = 3) +
         # NODES IDENTIFICATION
示例#20
0
文件: gap.py 项目: tommens/gap
def cli():
    parser = argparse.ArgumentParser(
        description='GAP - Git Activity Predictor')
    parser.add_argument('paths',
                        metavar='PATH',
                        type=str,
                        nargs='*',
                        default=['.'],
                        help='Paths to one or more git repositories')
    parser.add_argument(
        '--date',
        type=lambda d: dateutil.parser.parse(d).date(),
        required=False,
        default=datetime.date.today(),
        help='Date used for predictions (default to current date)')
    parser.add_argument('--obs',
                        type=int,
                        required=False,
                        default=20,
                        help='Number of observations to consider')
    parser.add_argument('--probs',
                        metavar='PROB',
                        type=float,
                        nargs='*',
                        required=False,
                        default=[0.5, 0.6, 0.7, 0.8, 0.9],
                        help='Probabilities to output, strictly in [0,1].')
    parser.add_argument(
        '--limit',
        type=int,
        required=False,
        default=30,
        help=
        'Limit contributors to the one that were active at least once during the last x days (default 30)'
    )
    parser.add_argument(
        '--mapping',
        type=str,
        nargs='?',
        help=
        'Mapping file to merge identities. This file must be a csv file where each line contains two values: the name to be merged, and the corresponding identity. Use "IGNORE" as identity to ignore specific names.'
    )
    parser.add_argument('--branches',
                        metavar='BRANCH',
                        type=str,
                        nargs='*',
                        default=list(),
                        help='Git branches to analyse (default to all).')
    parser.add_argument(
        '--as-dates',
        dest='as_dates',
        action='store_true',
        help=
        'Express predictions using dates instead of time differences in days')

    group = parser.add_mutually_exclusive_group()
    group.add_argument('--text',
                       action='store_true',
                       help='Print results as text.')
    group.add_argument('--csv',
                       action='store_true',
                       help='Print results as csv.')
    group.add_argument('--json',
                       action='store_true',
                       help='Print results as json.')
    group.add_argument(
        '--plot',
        nargs='?',
        const=True,
        help='Export results to a plot. Filepath can be optionaly specified.')

    args = parser.parse_args()

    # Default plot location
    if args.plot is True:
        args.plot = str(args.date) + '.pdf'

    # Default to text if not other option is provided
    if not args.csv and not args.json and not args.plot:
        args.text = True

    # Identity mapping
    if args.mapping:
        d = pandas.read_csv(args.mapping, names=['source', 'target'])
        mapping = {r.source: r.target for r in d.itertuples()}
    else:
        mapping = {}

    raw_data = dict()  # author -> dates of activity

    # Get data from git
    for path in args.paths:
        try:
            repo = git.Repo(path)
        except Exception as e:  # Must be refined
            print('Unable to access repository {} ({}:{})'.format(
                path, e.__class__.__name__, e))
            sys.exit()

        # Default branches
        if len(args.branches) == 0:
            commits = repo.iter_commits('--all')
        else:
            commits = repo.iter_commits(' '.join(args.branches))

        for commit in commits:
            try:
                author = commit.author.name
                identity = mapping.get(author, author)
                if author.lower() != 'ignore' and identity.lower() == 'ignore':
                    continue

                date = datetime.date.fromtimestamp(commit.authored_date)
                raw_data.setdefault(identity, []).append(date)
            except Exception as e:
                print('Unable to read commit ({}: {}): {}'.format(
                    e.__class__.__name__, e, commit))

    # Compute durations and apply model
    data = []  # (author, past activities, predicted durations)

    for author, commits in raw_data.items():
        commits = sorted([e for e in commits if e <= args.date])
        durations = dates_to_duration(commits, window_size=args.obs)

        if len(durations) >= args.obs:
            # Currently implemented with no censor
            surv = SurvfuncRight(durations, [1] * len(durations))
            predictions = [surv.quantile(p) for p in args.probs]
            last_day = commits[-1]

            if last_day >= args.date - datetime.timedelta(args.limit):
                data.append((
                    author,
                    commits,
                    predictions,
                ))

    # Prepare dataframe
    df = pandas.DataFrame(index=set([a for a, c, p in data]),
                          columns=['last'] + args.probs)
    if len(df) == 0:
        print(
            'No author has {} observations and was active at least once during the last {} days'
            .format(args.obs, args.limit))
        sys.exit()

    df.index.name = 'author'

    if not args.plot:
        for author, commits, predictions in data:
            last = commits[-1]
            if args.as_dates:
                df.at[author, 'last'] = last
            else:
                df.at[author, 'last'] = (last - args.date).days

            for prob, p in zip(args.probs, predictions):
                if args.as_dates:
                    df.at[author,
                          prob] = last + datetime.timedelta(days=int(p))
                else:
                    df.at[author,
                          prob] = (last + datetime.timedelta(days=int(p)) -
                                   args.date).days

        df = df.sort_values(['last'] + args.probs,
                            ascending=[False] + [True] * len(args.probs))
        df = df.astype(str)

        if args.text:
            pandas.set_option('expand_frame_repr', False)
            pandas.set_option('display.max_columns', 999)
            print(df)
        elif args.csv:
            print(df.to_csv())
        elif args.json:
            print(df.to_json(orient='index'))
    else:
        # Because of plotnine's way of initializing matplotlib
        import warnings
        warnings.filterwarnings("ignore")

        VIEW_LIMIT = 28

        activities = [
        ]  # List of (author, day) where day is a delta w.r.t. given date
        forecasts = [
        ]  # List of (author, from_day, to_day, p) where probability p
        # applies between from_day and to_day (delta w.r.t. given date)

        for author, commits, predictions in data:
            last = (commits[-1] - args.date).days
            for e in commits:
                activities.append((author, (e - args.date).days))

            previous = previous_previous = 0
            for d, p in zip(predictions, args.probs):
                if d > previous:
                    forecasts.append((author, last + previous, last + d, p))
                    previous_previous = previous
                    previous = d
                else:
                    forecasts.append(
                        (author, last + previous_previous, last + d, p))

        activities = pandas.DataFrame(columns=['author', 'day'],
                                      data=activities)
        forecasts = pandas.DataFrame(columns=['author', 'fromd', 'tod', 'p'],
                                     data=forecasts)

        plot = (p9.ggplot(p9.aes(y='author')) + p9.geom_segment(
            p9.aes('day - 0.5', 'author', xend='day + 0.5', yend='author'),
            data=activities,
            size=4,
            color='orange',
        ) + p9.geom_segment(
            p9.aes('fromd + 0.5',
                   'author',
                   xend='tod + 0.5',
                   yend='author',
                   alpha='factor(p)'),
            data=forecasts.sort_values('p').drop_duplicates(
                ['author', 'fromd', 'tod'], keep='last'),
            size=4,
            color='steelblue',
        ) + p9.geom_vline(
            xintercept=0,
            color='r', alpha=0.5, linetype='dashed') + p9.scale_x_continuous(
                name='  <<  past days {:^20} future days  >>'.format(
                    str(args.date)),
                breaks=range(-VIEW_LIMIT // 7 * 7,
                             (VIEW_LIMIT // 7 * 7) + 1, 7),
                minor_breaks=6) + p9.scale_y_discrete(
                    name='',
                    limits=activities.sort_values(
                        'day', ascending=False)['author'].unique()) +
                p9.scale_alpha_discrete(range=(0.2, 1), name=' ') +
                p9.coord_cartesian(xlim=(-VIEW_LIMIT, VIEW_LIMIT)) +
                p9.theme_matplotlib() + p9.theme(
                    figure_size=(6, 4 * activities['author'].nunique() / 15)))

        fig = plot.draw()
        fig.savefig(args.plot, bbox_inches='tight')
        print('Plot exported to {}'.format(args.plot))