Exemplos de geom_col em Python, exemplos de plotnine.geom_col em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: h_parameters.py Projeto: inbalros/repository_mining

def plot_hypothesis(hypothesis, file_name):
    bin_types = list(hypothesis)
    scores = list(hypothesis[bin_types[0]])
    plots = []
    for bin_type, score in product(bin_types, scores):
        mean_name = "Mean: " + score
        df = pd.DataFrame(columns=["Bin", "Dataset", mean_name])
        df2 = pd.DataFrame(columns=["Bin", "t-statistic", 'p-value'])
        for bin_ in hypothesis[bin_type][score]:
            h = list(bin_.values())[0]
            bin_name = list(bin_)[0]
            parameter1 = h.p1
            parameter2 = h.p2
            mean1 = h.mean1
            mean2 = h.mean2
            row1 = {
                "Bin": bin_name,
                'Dataset': parameter1,
                mean_name: str(round(float(mean1), 3))
            }
            row2 = {
                "Bin": bin_name,
                'Dataset': parameter2,
                mean_name: str(round(float(mean2), 3))
            }
            df = df.append(row1, ignore_index=True)
            df = df.append(row2, ignore_index=True)
            t_statistic = h.t
            p_value = h.p
            row = {
                "Bin":
                bin_name,
                't-statistic':
                str(round(t_statistic, 3)),
                'p-value':
                str(p_value),
                '95% Confidence':
                "Significant" if p_value <= 0.05 else "Not Significant"
            }
            df2 = df2.append(row, ignore_index=True)
        plots.append(
            (ggplot(df, aes(x='Bin', y=mean_name, fill='Dataset')) +
             geom_col(stat='identity', position='dodge') +
             ggtitle("{0} bin distribution| {1}\nBin's Average Scores".format(
                 bin_type, score))))
        plots.append(
            (ggplot(df2, aes(x='Bin', y='p-value', fill='95% Confidence')) +
             geom_col(stat='identity', width=0.2) + ggtitle(
                 "{0} bin distribution| {1}\nBin's 95% Confidence Level Test".
                 format(bin_type, score)) +
             scale_fill_manual(values={
                 'Significant': "#214517",
                 'Not Significant': '#c62f2d'
             })))
    save_as_pdf_pages(plots, file_name)

    return

Exemplo n.º 2

0

Exibir arquivo

Arquivo: scale_dependent_correlation.py Projeto: AlFontal/sdcpy

    def plot_range_comparison(self,
                              xlabel: str = '',
                              figsize: Tuple[int] = (7, 3),
                              add_text_label: bool = True,
                              **kwargs):
        df = self.get_ranges_df(**kwargs)
        fig = (p9.ggplot(df) +
               p9.aes('cat_value', 'counts', fill='direction') +
               p9.geom_col(alpha=.8) +
               p9.theme(figure_size=figsize,
                        axis_text_x=p9.element_text(rotation=45)) +
               p9.scale_fill_manual(['#3f7f93', '#da3b46', '#4d4a4a']) +
               p9.labs(x=xlabel, y='Number of Comparisons', fill='R'))

        if add_text_label:
            if df.loc[df.direction == 'Positive'].loc[df.counts > 0].size > 0:
                fig += p9.geom_text(
                    p9.aes(label='label', x='cat_value', y='n + max(n) * .15'),
                    inherit_aes=False,
                    size=9,
                    data=df.loc[df.direction == 'Positive'].loc[df.counts > 0],
                    color='#3f7f93')
            if df.loc[df.direction == 'Negative'].loc[df.counts > 0].size > 0:
                fig += p9.geom_text(
                    p9.aes(label='label', x='cat_value', y='n + max(n) * .05'),
                    inherit_aes=False,
                    size=9,
                    data=df.loc[df.direction == 'Negative'].loc[df.counts > 0],
                    color='#da3b46')

        return fig

Exemplo n.º 3

0

Exibir arquivo

def plot_bargraph(count_plot_df, plot_df):
    """
    Plots the bargraph 
    Arguments:
        count_plot_df - The dataframe that contains lemma counts
        plot_df - the dataframe that contains the odds ratio and lemmas
    """

    graph = (
        p9.ggplot(count_plot_df.astype({"count": int}),
                  p9.aes(x="lemma", y="count")) +
        p9.geom_col(position=p9.position_dodge(width=0.5), fill="#253494") +
        p9.coord_flip() + p9.facet_wrap("repository", scales='free_x') +
        p9.scale_x_discrete(limits=(plot_df.sort_values(
            "odds_ratio", ascending=True).lemma.tolist())) +
        p9.scale_y_continuous(labels=custom_format('{:,.0g}')) +
        p9.labs(x=None) + p9.theme_seaborn(
            context='paper', style="ticks", font="Arial", font_scale=0.95) +
        p9.theme(
            # 640 x 480
            figure_size=(6.66, 5),
            strip_background=p9.element_rect(fill="white"),
            strip_text=p9.element_text(size=12),
            axis_title=p9.element_text(size=12),
            axis_text_x=p9.element_text(size=10),
        ))
    return graph

Exemplo n.º 4

0

Exibir arquivo

    def plot_zmw_stats(self, **kwargs):
        """Plot of ZMW stats for all runs.

        Note
        ----
        Raises an error if :meth:`Summaries.has_zmw_stats` is not `True`.

        Parameters
        ----------
        ``**kwargs`` : dict
            Keyword arguments passed to :meth:`Summaries.zmw_stats`.

        Returns
        -------
        plotnine.ggplot.ggplot
            Stacked bar graph of ZMW stats for each run.

        """
        df = self.zmw_stats(**kwargs)

        p = (p9.ggplot(df, p9.aes(x='name', y='number', fill='status')) +
             p9.geom_col(position=p9.position_stack(reverse=True), width=0.8) +
             p9.theme(axis_text_x=p9.element_text(angle=90,
                                                  vjust=1,
                                                  hjust=0.5),
                      figure_size=(0.4 * len(df['name'].unique()), 2.5)
                      ) +
             p9.ylab('number of ZMWs') +
             p9.xlab('')
             )

        if len(df['status'].unique()) < len(CBPALETTE):
            p = p + p9.scale_fill_manual(CBPALETTE[1:])

        return p

Exemplo n.º 5

0

Exibir arquivo

def test_col():
    # The color indicates reveals the edges and the stacking
    # that is going on.
    p = (ggplot(df) +
         geom_col(aes('x', 'z', fill='factor(z)'), color='black'))

    assert p + _theme == 'col'

Exemplo n.º 6

0

Exibir arquivo

def frequency_TL(Data):
    print('======= Creating frequency_TL =======')
    #Filtering
    Data['date_4'] = Data['date'].dt.date
    tl4 = Data.groupby("date_4", sort = False, as_index = False).count()
    tl4 = tl4.iloc[:, 0:2]
    tl4 = tl4.rename(columns = {"Unnamed: 0": "n"})    
    
    sdate = min(tl4["date_4"])  # start date
    edate = max(tl4["date_4"])   # end date
    delta = edate - sdate       # as timedelta
    
#    tl4 = Data.groupby("Date", sort = False, as_index = False).count()
#    tl4 = tl4.iloc[:, 0:2]
#    tl4 = tl4.rename(columns = {"Unnamed: 0": "n"})
#    tl4['Date'] = pd.to_datetime(tl4['Date'])
    
#    #Setting data with missing times
#    sdate = min(tl4["Date"])  # start date
#    edate = max(tl4["Date"])   # end date
#    delta = edate - sdate       # as timedelta
    
    from datetime import timedelta    
    day = []
    for i in range(delta.days + 1):
        d= sdate + timedelta(days=i)
        day.append(d)
        
    DF = pd.DataFrame(day)
    DF.columns = ['date_4']
    data_with_missing_times = pd.merge(DF, tl4, on='date_4', how='outer')
    if delta.days > 1825:
                datebreaks = '18 months'
    else:
        if delta.days > 1095:
            datebreaks = '12 months'                
        else:
            datebreaks = '6 months'
    #Creating and saving TL_4
    
    plot =(p9.ggplot(data=data_with_missing_times,
                     mapping=p9.aes(x='date_4',y='n'))
        + p9.geom_col(fill = 'red')
        + p9.theme_classic()
        + p9.theme(axis_text = p9.element_text(size=40),
                   axis_title = p9.element_text(size = 40,face = 'bold'))
        + p9.scale_x_datetime(date_labels = '%Y-%m', date_breaks = datebreaks)
        + p9.labs(x='',y='')
        )
        
    if (len(data_with_missing_times) > 0):
        plot.save(filename = 'TL_4.jpeg',
                 plot = plot,
                 path = "pdf/iteration/",
                 width = 25, height = 5,
                 dpi = 320)
    else: 
        print('Plot not created; no data found.')
    return(print('=================================frequency_TL DONE ============================='))

Exemplo n.º 7

0

Exibir arquivo

Arquivo: image_histogram.py Projeto: kistlin/python_opencv_scratch

def image_histogram():
    # create windows
    cv2.namedWindow('image', cv2.WINDOW_NORMAL)
    cv2.namedWindow('image_bw', cv2.WINDOW_NORMAL)
    cv2.namedWindow('image_bw_eq', cv2.WINDOW_NORMAL)

    # read and work with image
    image = cv2.imread(r"image.jpg")
    image_bw = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image_bw_eq = cv2.equalizeHist(image_bw)

    # display images
    cv2.imshow('image', image)
    cv2.imshow('image_bw', image_bw)
    cv2.imshow('image_bw_eq', image_bw_eq)

    # calculate histogram
    # np_hist_y, bins = np.histogram(image_bw.ravel(), 256, [0, 256])
    # hist = np.bincount(image_bw.ravel(), minlength=256) # faster version of np.histogram
    # plt.hist(image_bw.ravel(), bins=256)
    hist_bw = cv2.calcHist([image_bw], [0], None, [256], [0, 255])
    hist_bw_eq = cv2.calcHist([image_bw_eq], [0], None, [256], [0, 255])
    np_hist_x = np.arange(len(hist_bw))
    d = {
        'np_hist_x': np_hist_x,
        'hist_bw': hist_bw.flatten(),
        'hist_bw_eq': hist_bw_eq.flatten()
    }
    df = pd.DataFrame(data=d)

    # plot histogram
    pn_handle = pn.ggplot(df) + pn.geom_col(pn.aes(x='np_hist_x', y='hist_bw'), color=None, fill='red', alpha=0.5) + pn.ylab('occurences') \
                              + pn.geom_col(pn.aes(x='np_hist_x', y='hist_bw_eq'), color=None, fill='green', alpha=0.5) \
                              + pn.ggtitle('Histograms of bw images')
    pn_handle.draw()
    plt.show()

    while True:
        pressed_key = cv2.waitKey(16)
        if pressed_key == ord('q'):
            break

    # cleanup opencv
    cv2.destroyAllWindows()

Exemplo n.º 8

0

Exibir arquivo

def test_ordinal_scale():
    df = pd.DataFrame({
        'x': pd.Categorical(list('abcd'), ordered=True),
        'y': [1, 2, 3, 4]
    })

    p = (ggplot(df) + aes('x', 'y', color='-y', fill='x') + geom_col(size=4) +
         _theme)

    assert p + _theme == 'ordinal_scale'

Exemplo n.º 9

0

Exibir arquivo

Arquivo: test_position.py Projeto: tr8dr/plotnine

def test_stack_negative():
    df = df1.copy()
    _loc = df.columns.get_loc
    df.iloc[0, _loc('y')] *= -1
    df.iloc[len(df) - 1, _loc('y')] *= -1
    p = (ggplot(df) +
         geom_col(aes('factor(x)', 'y', fill='factor(y)'), position='stack') +
         geom_text(aes('factor(x)', 'y', label='y'),
                   position=position_stack(vjust=0.5)))

    assert p + _theme == 'stack-negative'

Exemplo n.º 10

0

Exibir arquivo

Arquivo: test_position.py Projeto: jwhendy/plotnine

def test_stack_negative():
    df = df1.copy()
    _loc = df.columns.get_loc
    df.iloc[0, _loc('y')] *= -1
    df.iloc[len(df)-1, _loc('y')] *= -1
    p = (ggplot(df)
         + geom_col(aes('factor(x)', 'y', fill='factor(y)'),
                    position='stack')
         + geom_text(aes('factor(x)', 'y', label='y'),
                     position=position_stack(vjust=0.5))
         )

    assert p + _theme == 'stack-negative'

Exemplo n.º 11

0

Exibir arquivo

Arquivo: scale_dependent_correlation.py Projeto: AlFontal/sdcpy

    def plot_consecutive(self, alpha: float = .05, **kwargs):
        f = (
            self.sdc_df.loc[lambda dd: dd.p_value < alpha]
            # Here I make groups of consecutive significant values and report the longest for each lag.
            .groupby('lag', as_index=True).apply(
                lambda gdf: gdf.sort_values('start_1').assign(
                    group=lambda dd: (dd.start_1 != dd.start_1.shift(1) + 1).
                    cumsum()).groupby(['group']).size().max()).rename(
                        'Max Consecutive steps').reset_index().pipe(
                            lambda dd: p9.ggplot(dd) + p9
                            .aes('lag', 'Max Consecutive steps') + p9.geom_col(
                            ) + p9.theme(**kwargs) + p9.labs(x='Lag [days]')))

        return f

Exemplo n.º 12

0

Exibir arquivo

def plot_result_stats(results, title):
    stats = results.describe().unstack().reset_index().rename(columns={
        "level_0": "metric",
        "level_1": "group",
        0: "value"
    })
    stats = stats[~stats["group"].isin(["count", "min", "max"])]
    stats["value_presentation"] = round(stats["value"], 2)
    plot = (p9.ggplot(stats) + p9.aes("metric", "value", fill="group") +
            p9.geom_col(position="dodge") + p9.theme_bw() +
            p9.coord_cartesian(ylim=[0, 1.0]) + p9.ggtitle(title) +
            p9.geom_text(p9.aes(label="value_presentation"),
                         position=p9.position_dodge(width=0.9),
                         va="bottom"))
    return plot

Exemplo n.º 13

0

Exibir arquivo

Arquivo: plot_num.py Projeto: alastairrushworth/inspectpd

def plot_num(df) :
  x = df.copy()
  # add group column to the 
  z = x['hist'].to_list()
  for i in range(len(z)) : 
    z[i]['groups'] = x['col_name'][i] 
  z = pd.concat(z)
  # generate the plot
  ggplt = p9.ggplot(z, p9.aes(x = 'value', y = 'prop', group = 'groups'))\
    + p9.geom_col()\
    + p9.guides(fill = False) \
    + p9.ylab('Proportion') \
    + p9.xlab('') \
    + p9.theme(axis_text_x=p9.element_text(rotation = 45, hjust=1))\
    + p9.facet_wrap(facets = ['groups'], ncol = 3, scales = 'free')
  # return the plot object
  return ggplt

Exemplo n.º 14

0

Exibir arquivo

Arquivo: plot.py Projeto: NPSDC/qb

def protobowl(fold=BUZZER_DEV_FOLD):
    df_rnn = pickle.load(
        open("output/buzzer/RNNBuzzer/{}_protobowl.pkl".format(fold), "rb")
    )
    df_rnn = df_rnn.groupby(["Possibility", "Outcome"])
    df_rnn = df_rnn.size().reset_index().rename(columns={0: "Count"})
    df_rnn["Model"] = pd.Series(["RNN" for _ in range(len(df_rnn))], index=df_rnn.index)

    df_mlp = pickle.load(
        open("output/buzzer/MLPBuzzer/{}_protobowl.pkl".format(fold), "rb")
    )
    df_mlp = df_mlp.groupby(["Possibility", "Outcome"])
    df_mlp = df_mlp.size().reset_index().rename(columns={0: "Count"})
    df_mlp["Model"] = pd.Series(["MLP" for _ in range(len(df_mlp))], index=df_mlp.index)

    df_thr = pickle.load(
        open("output/buzzer/ThresholdBuzzer/{}_protobowl.pkl".format(fold), "rb")
    )
    df_thr = df_thr.groupby(["Possibility", "Outcome"])
    df_thr = df_thr.size().reset_index().rename(columns={0: "Count"})
    df_thr["Model"] = pd.Series(
        ["Threshold" for _ in range(len(df_thr))], index=df_thr.index
    )

    df = df_rnn.append(df_mlp, ignore_index=True)
    df = df.append(df_thr, ignore_index=True)

    outcome_type = CategoricalDtype(categories=[15, 10, 5, 0, -5, -10, -15])
    df["Outcome"] = df["Outcome"].astype(outcome_type)
    model_type = CategoricalDtype(categories=["Threshold", "MLP", "RNN"])
    df["Model"] = df["Model"].astype(model_type)

    p = (
        ggplot(df)
        + geom_col(aes(x="Possibility", y="Count", fill="Outcome"), width=0.7)
        + facet_grid("Model ~")
        + coord_flip()
        + theme_fs()
        + theme(aspect_ratio=0.17)
        + scale_fill_brewer(type="div", palette=7)
    )

    figure_dir = os.path.join("output/buzzer/{}_protobowl.pdf".format(fold))
    p.save(figure_dir)

Exemplo n.º 15

0

Exibir arquivo

def cell_division(adata):
    """ Plots total_counts as a function of the principal circle nodes to
    visualize the moment of cell division.

    Parameters
    ----------------
    adata: AnnData
        The AnnData object being used for the analysis. Must be previously
        evaluated by `tl.celldiv_moment`.

    Returns
    ------------
    A plotnine line-plot to help visualize the moment of cell division and
    direction of the cell cycle.

    If method = 'counts' when tl.celldiv_moment was run,
    cell division is defined by the largest drop in total_counts. The changes in
    counts are represented by the
    bars at the bottom, and the suggested moment of cell division is marked in
    red. The cell cycle should follow an incremental increase in total counts
    until around the moment of cell division.

    Alternatively, if method='g2m' in tl.celldiv_moment, the G2-M signature
    dynamics are used to define the moment of cell division.
    """
    ref_var = adata.uns['scycle']['cell_div_moment']['ref_var']
    edge_to_0 = adata.uns['scycle']['cell_div_moment']['cell_div_edge'][0]
    edges = adata.uns['princirc_gr']['edges']
    edges['cell_div'] = edges['e1'] == edge_to_0

    cell_div_count = edges[edges['e1'] == edge_to_0]['mean_var']

    cell_div_plot = (ggplot(edges, aes('e1', 'mean_var'))
     + geom_point(aes(y = 'mean_var'), size = 2)
     + geom_path(aes(y = 'mean_var'))
     + geom_smooth(aes(y = 'mean_var'), method = 'lm', linetype = 'dashed')
     + annotate("point", x = edge_to_0, y = cell_div_count, color = 'red', size = 2)
     + labs(x = 'Edge position', y = ref_var)
     + geom_col(aes(y = 'diff_var', fill = 'cell_div'))
     + scale_fill_manual(values = ['darkgrey', 'red'], guide = False)
     + theme_std)

    return cell_div_plot

Exemplo n.º 16

0

Exibir arquivo

Arquivo: plot.py Projeto: Pinafore/qb

def protobowl(fold=BUZZER_DEV_FOLD):
    df_rnn = pickle.load(
        open('output/buzzer/RNNBuzzer/{}_protobowl.pkl'.format(fold), 'rb'))
    df_rnn = df_rnn.groupby(['Possibility', 'Outcome'])
    df_rnn = df_rnn.size().reset_index().rename(columns={0: 'Count'})
    df_rnn['Model'] = pd.Series(['RNN' for _ in range(len(df_rnn))], index=df_rnn.index)

    df_mlp = pickle.load(
        open('output/buzzer/MLPBuzzer/{}_protobowl.pkl'.format(fold), 'rb'))
    df_mlp = df_mlp.groupby(['Possibility', 'Outcome'])
    df_mlp = df_mlp.size().reset_index().rename(columns={0: 'Count'})
    df_mlp['Model'] = pd.Series(['MLP' for _ in range(len(df_mlp))], index=df_mlp.index)

    df_thr = pickle.load(
        open('output/buzzer/ThresholdBuzzer/{}_protobowl.pkl'.format(fold), 'rb'))
    df_thr = df_thr.groupby(['Possibility', 'Outcome'])
    df_thr = df_thr.size().reset_index().rename(columns={0: 'Count'})
    df_thr['Model'] = pd.Series(['Threshold' for _ in range(len(df_thr))], index=df_thr.index)

    df = df_rnn.append(df_mlp, ignore_index=True)
    df = df.append(df_thr, ignore_index=True)

    outcome_type = CategoricalDtype(categories=[15, 10, 5, 0, -5, -10, -15])
    df['Outcome'] = df['Outcome'].astype(outcome_type)
    model_type = CategoricalDtype(
        categories=['Threshold', 'MLP', 'RNN'])
    df['Model'] = df['Model'].astype(model_type)

    p = (
        ggplot(df)
        + geom_col(aes(x='Possibility', y='Count', fill='Outcome'),
                   width=0.7)
        + facet_grid('Model ~')
        + coord_flip()
        + theme_fs()
        + theme(aspect_ratio=0.17)
        + scale_fill_brewer(type='div', palette=7)
    )

    figure_dir = os.path.join('output/buzzer/{}_protobowl.pdf'.format(fold))
    p.save(figure_dir)

Exemplo n.º 17

0

Exibir arquivo

def protobowl(fold=BUZZER_DEV_FOLD):
    df_rnn = pickle.load(
        open('output/buzzer/RNNBuzzer/{}_protobowl.pkl'.format(fold), 'rb'))
    df_rnn = df_rnn.groupby(['Possibility', 'Outcome'])
    df_rnn = df_rnn.size().reset_index().rename(columns={0: 'Count'})
    df_rnn['Model'] = pd.Series(['RNN' for _ in range(len(df_rnn))],
                                index=df_rnn.index)

    df_mlp = pickle.load(
        open('output/buzzer/MLPBuzzer/{}_protobowl.pkl'.format(fold), 'rb'))
    df_mlp = df_mlp.groupby(['Possibility', 'Outcome'])
    df_mlp = df_mlp.size().reset_index().rename(columns={0: 'Count'})
    df_mlp['Model'] = pd.Series(['MLP' for _ in range(len(df_mlp))],
                                index=df_mlp.index)

    df_thr = pickle.load(
        open('output/buzzer/ThresholdBuzzer/{}_protobowl.pkl'.format(fold),
             'rb'))
    df_thr = df_thr.groupby(['Possibility', 'Outcome'])
    df_thr = df_thr.size().reset_index().rename(columns={0: 'Count'})
    df_thr['Model'] = pd.Series(['Threshold' for _ in range(len(df_thr))],
                                index=df_thr.index)

    df = df_rnn.append(df_mlp, ignore_index=True)
    df = df.append(df_thr, ignore_index=True)

    outcome_type = CategoricalDtype(categories=[15, 10, 5, 0, -5, -10, -15])
    df['Outcome'] = df['Outcome'].astype(outcome_type)
    model_type = CategoricalDtype(categories=['Threshold', 'MLP', 'RNN'])
    df['Model'] = df['Model'].astype(model_type)

    p = (ggplot(df) +
         geom_col(aes(x='Possibility', y='Count', fill='Outcome'), width=0.7) +
         facet_grid('Model ~') + coord_flip() + theme_fs() +
         theme(aspect_ratio=0.17) + scale_fill_brewer(type='div', palette=7))

    figure_dir = os.path.join('output/buzzer/{}_protobowl.pdf'.format(fold))
    p.save(figure_dir)

Exemplo n.º 18

0

Exibir arquivo

Arquivo: mcmc_diagnostics.py Projeto: jtouyz/Bayesian_foundations

def plot_acf(data_in, figure_size=(15, 5)):
    """
    Plots the autocorrelation function

    Parameteres
    -----------
    data_in : pd.DataFrame
        Dataframe containing the autcorrelation of our mcmc samples
    figure_size : tuple, default = (15,5)
        Optional input for figure size

    Returns
    -------
    pn.ggplot:
        Plotnine ggplot object containing autocorrelation plot
    """
    pn.options.figure_size = figure_size
    plot_out = pn.ggplot(pn.aes(x = 'lag', y = 'autocorrelation'), data = data_in)\
        + pn.geom_hline(pn.aes(yintercept= 0))\
        + pn.geom_hline(pn.aes(yintercept= 0.05), color = 'red', linetype = 'dashed')\
        + pn.geom_hline(pn.aes(yintercept= -0.05), color = 'red', linetype = 'dashed')\
        + pn.geom_col()
    return (plot_out)

Exemplo n.º 19

0

Exibir arquivo

Arquivo: time_to_published_refurbished.py Projeto: MarvinT/annorxiver

    (temp_df["time_to_published"].dt.total_seconds() / 60 / 60 / 24).max())
category_half_life

# In[14]:

g = (p9.ggplot(
    category_half_life.query("category!='none'").assign(
        half_life_time=lambda x: pd.to_timedelta(x.half_life_time, "D"),
        half_life_ci_l=lambda x: pd.to_timedelta(x.half_life_ci_l, "D"),
        half_life_ci_u=lambda x: pd.to_timedelta(x.half_life_ci_u, "D"),
    ),
    p9.aes(x="category",
           y="half_life_time",
           ymin="half_life_ci_l",
           ymax="half_life_ci_u"),
) + p9.geom_col(fill="#1f78b4") + p9.geom_errorbar() + p9.scale_x_discrete(
    limits=(category_half_life.query("category!='none'").sort_values(
        "half_life_time").category.tolist()[::-1]), ) +
     p9.scale_y_timedelta(labels=timedelta_format("d")) + p9.coord_flip() +
     p9.labs(
         x="Preprint Categories",
         y="Time Until 50% of Preprints are Published",
         title="Preprint Category Half-Life",
     ) + p9.theme_seaborn(context="paper", style="white", font_scale=1.2) +
     p9.theme(axis_ticks_minor_x=p9.element_blank(), ))
g.save("output/preprint_category_halflife.svg", dpi=250)
g.save("output/preprint_category_halflife.png", dpi=250)
print(g)

# Take home Results:
#     1. The average amount of time for half of all preprints to be published is 348 days (~1 year)

Exemplo n.º 20

0

Exibir arquivo

    datarows.append({
        "edges":
        df.query("pred > @optimal_threshold").hetionet.value_counts()[0],
        "in_hetionet":
        "Novel",
        "relation":
        rel
    })
edges_df = pd.DataFrame.from_records(datarows)
edges_df

# In[11]:

import math
g = (p9.ggplot(edges_df, p9.aes(x="relation", y="edges", fill="in_hetionet")) +
     p9.geom_col(position="dodge") +
     p9.scale_fill_manual(values={
         "Existing": color_map["Existing"],
         "Novel": color_map["Novel"]
     }) + p9.geom_text(p9.aes(label=(
         edges_df.apply(lambda x: f"{x['edges']}\n({x['recall']*100:.0f}%)"
                        if not math.isnan(x['recall']) else f"{x['edges']}",
                        axis=1))),
                       position=p9.position_dodge(width=0.9),
                       size=9,
                       va="bottom") + p9.scale_y_log10() +
     p9.labs(y="# of Edges",
             x="Relation Type",
             title="Reconstructing Edges in Hetionet") +
     p9.guides(fill=p9.guide_legend(title="In Hetionet?")) + p9.theme(
         axis_text_y=p9.element_blank(),

Exemplo n.º 21

0

Exibir arquivo

Arquivo: edge_prediction_experiment.py Projeto: zorrotrying/snorkeling

    int(grouped_candidates_pred_df.hetionet.value_counts()[1]),
    "relation":
    "DaG"
})
datarows.append({
    "edges": (grouped_candidates_pred_df.query(
        "pred_max > 0.5").hetionet.value_counts()[0]),
    "in_hetionet":
    "Novel",
    "relation":
    "DaG"
})
edges_df = pd.DataFrame.from_records(datarows)
edges_df

# In[20]:

g = (p9.ggplot(edges_df, p9.aes(x="relation", y="edges", fill="in_hetionet")) +
     p9.geom_col(position="dodge") + p9.geom_text(p9.aes(label=(
         edges_df.apply(lambda x: f"{x['edges']} ({x['recall']*100:.0f}%)"
                        if not math.isnan(x['recall']) else f"{x['edges']}",
                        axis=1))),
                                                  position=p9.position_dodge(
                                                      width=1),
                                                  size=9,
                                                  va="bottom") +
     p9.scale_y_log10() + p9.theme(axis_text_y=p9.element_blank(),
                                   axis_ticks_major=p9.element_blank(),
                                   rect=p9.element_blank()))
print(g)

Exemplo n.º 22

0

Exibir arquivo

Arquivo: utils.py Projeto: cookesd/mcda

def plot_alt_benefit(plot_df,
                     title='Benefit by Alternative',
                     which='both',
                     sensitivity=False,
                     legend=True):
    '''Builds a stacked bar chart of the alternative benefits
    @ param plot_df: The df containing benefits for each alt by the criteria and total benefit
    @ param title: The title for the graph
    @ param which: which parts to plot. Acceptable values are
    'total' for just total value.
    'criteria' for just criteria level stacked bars'
    'both' for total and criteria. The graphs will be faceted in this case
    
    Returns the ggplot graph to be displayed elsewhere'''

    _facet = which == 'both'
    if which == 'both':
        plot_df = plot_df
    elif which == 'total':
        plot_df = plot_df.loc[plot_df['type'] == 'Total Value']
    elif which == 'criteria':
        plot_df = plot_df.loc[plot_df['type'] == 'Weighted Criterion Value']
    else:
        print(
            which,
            'is not an approved value for which.\n Enter "total", "criteria", or "both"'
        )
        return (None)

    if legend:
        g = (
            p9.ggplot(plot_df,
                      p9.aes(x='Alternative', y='Benefit', fill='Criterion')) +
            p9.geom_col(stat='identity', position=p9.position_stack(
                vjust=.5))  # makes stacked bar plot
            + p9.scale_fill_brewer(type='qual', palette='Paired')
        )  # changes the color palette to one for qualitative scales)
    else:
        g = (
            p9.ggplot(plot_df,
                      p9.aes(x='Alternative', y='Benefit', fill='Criterion')) +
            p9.geom_col(
                p9.aes(show_legend=False),
                stat='identity',
                position=p9.position_stack(vjust=.5))  # makes stacked bar plot
            + p9.scale_fill_brewer(
                type='qual', palette='Paired', guide=False
            )  # changes the color palette to one for qualitative scales
            + p9.theme(legend_position=None))

        # Builds the base plot
    g = (
        g
        # + p9.geom_col(stat='identity',position=p9.position_stack(vjust=.5)) # makes stacked bar plot
        # + p9.scale_fill_brewer(type='qual',palette='Paired') # changes the color palette to one for qualitative scales
        + p9.geom_text(p9.aes(label='print_value'),
                       position=p9.position_stack(vjust=.5),
                       size=6,
                       hjust='center')  # adds weighted value to bars
        + p9.ggtitle(title)  # makes the title
        + p9.theme(axis_text_x=p9.element_text(
            rotation=45, hjust=1))  # rotates x axis labels
    )
    # Adds the facet if required
    if sensitivity:
        if _facet:
            return ((g + p9.facet_grid('type~Criterion Weight')))
        else:
            return ((g + p9.facet_grid('Criterion Weight~')))
    elif _facet:
        return ((g + p9.facet_grid('~type')))
    else:
        return (g)

Exemplo n.º 23

0

Exibir arquivo

def test_labels_series():
    p = (ggplot(df, aes(x=df.x, y=df.y)) + geom_col())
    assert p.labels == {'x': 'x', 'y': 'y'}

Exemplo n.º 24

0

Exibir arquivo

    def plot(self):
        """Plot the figures using R"""
        df = pandas.DataFrame(
            self.data,
            columns=self.datacols,
        )
        with capture_c_msg("datar", prefix=f"[r]{self.title}[/r]: "):
            df.columns = make_unique(df.columns.tolist())

        if self.savedata:
            datafile = self.outprefix + ".csv"
            logger.info(
                "[r]%s[/r]: Saving data to: %r",
                self.title,
                datafile,
                extra={"markup": True},
            )
            df.to_csv(datafile, index=False)

        if df.shape[0] == 0:
            logger.warning("No data points to plot")
            return

        aes_for_geom_fill = None
        aes_for_geom_color = None
        theme_elems = p9.theme(axis_text_x=p9.element_text(angle=60, hjust=2))
        if df.shape[1] > 2:
            aes_for_geom_fill = p9.aes(fill=df.columns[2])
            aes_for_geom_color = p9.aes(color=df.columns[2])
        plt = p9.ggplot(df, p9.aes(y=df.columns[0], x=df.columns[1]))
        if self.figtype == "scatter":
            plt = plt + p9.geom_point(aes_for_geom_color)
            theme_elems = None
        elif self.figtype == "line":
            pass
        elif self.figtype == "bar":
            plt = plt + p9.geom_bar(p9.aes(fill=df.columns[0]))
        elif self.figtype == "col":
            plt = plt + p9.geom_col(aes_for_geom_fill)
        elif self.figtype == "pie":
            logger.warning("Pie chart is not support by plotnine yet, "
                           "plotting bar chart instead.")
            col0 = df.iloc[:, 0]
            if df.shape[1] > 2:
                plt = plt + p9.geom_bar(
                    p9.aes(x=df.columns[2], y=col0.name, fill=df.columns[2]),
                    stat="identity"
                    # aes_for_geom_fill,
                    # x=df.Group,
                    # y=col0,
                    # label=paste0(round_(100 * col0 / sum_(col0), 1), "%"),
                    # show_legend=False,
                    # position=p9.position_adjust_text(),
                )
            else:
                col0 = factor(col0, levels=rev(unique(as_character(col0))))
                fills = rev(levels(col0))
                sums = map(lambda x: sum(col0 == x), fills)
                print(col0)
                print(fills)
                plt = (p9.ggplot(df, p9.aes(x=df.columns[1])) +
                       p9.geom_bar(p9.aes(fill=df.columns[0])) + p9.geom_label(
                           x=1,
                           y=cumsum(sums) - sums / 2,
                           label=paste0(round(sums / sum(sums) * 100, 1), "%"),
                           show_legend=False,
                       ))
                theme_elems = p9.theme(
                    axis_title_x=p9.element_blank(),
                    axis_title_y=p9.element_blank(),
                    axis_text_y=p9.element_blank(),
                )
        elif self.figtype == "violin":
            plt = plt + p9.geom_violin(aes_for_geom_fill)
        elif self.figtype == "boxplot":
            plt = plt + p9.geom_boxplot(aes_for_geom_fill)
        elif self.figtype in ("histogram", "density"):
            plt = p9.ggplot(df, p9.aes(x=df.columns[0]))
            geom = getattr(p9, f"geom_{self.figtype}")
            if df.columns[1] != "ONE":
                plt = plt + geom(p9.aes(fill=df.columns[1]), alpha=0.6)
                theme_elems = None
            else:
                plt = plt + geom(alpha=0.6)
                theme_elems = p9.theme(legend_position="none")
        elif self.figtype == "freqpoly":
            plt = p9.ggplot(df, p9.aes(x=df.columns[0]))
            if df.columns[1] != "ONE":
                plt = plt + p9.geom_freqpoly(p9.aes(fill=df.columns[1]))
            else:
                plt = plt + p9.geom_freqpoly()
            theme_elems = None
        else:
            raise ValueError(f"Unknown figure type: {self.figtype}")

        plt = plt + p9.ggtitle(self.title)
        self.save_plot(plt, theme_elems)

Exemplo n.º 25

0

Exibir arquivo

    def barchart_make(roi, df, list_rois, config, ylimit, save_function,
                      find_ylim_function):
        thisroi = list_rois[roi]

        current_df = df.loc[df['index'] == thisroi]

        current_df = current_df.sort_values([config.single_roi_fig_x_axis])
        current_df = current_df.reset_index(
            drop=True)  # Reset index to remove grouping
        current_df[config.single_roi_fig_x_axis] = pd.Categorical(
            current_df[config.single_roi_fig_x_axis],
            categories=current_df[config.single_roi_fig_x_axis].unique())

        figure = (
            pltn.ggplot(
                current_df,
                pltn.aes(x=config.single_roi_fig_x_axis,
                         y='Mean',
                         ymin="Mean-Conf_Int_95",
                         ymax="Mean+Conf_Int_95",
                         fill='factor({colour})'.format(
                             colour=config.single_roi_fig_colour))) +
            pltn.theme_538() + pltn.geom_col(position=pltn.position_dodge(
                preserve='single', width=0.8),
                                             width=0.8,
                                             na_rm=True) +
            pltn.geom_errorbar(size=1,
                               position=pltn.position_dodge(
                                   preserve='single', width=0.8)) +
            pltn.labs(x=config.single_roi_fig_label_x,
                      y=config.single_roi_fig_label_y,
                      fill=config.single_roi_fig_label_fill) +
            pltn.scale_x_discrete(labels=[]) +
            pltn.theme(panel_grid_major_x=pltn.element_line(alpha=0),
                       axis_title_x=pltn.element_text(
                           weight='bold', color='black', size=20),
                       axis_title_y=pltn.element_text(
                           weight='bold', color='black', size=20),
                       axis_text_y=pltn.element_text(size=20, color='black'),
                       legend_title=pltn.element_text(size=20, color='black'),
                       legend_text=pltn.element_text(size=18, color='black'),
                       subplots_adjust={'right': 0.85},
                       legend_position=(0.9, 0.8),
                       dpi=config.plot_dpi) +
            pltn.geom_text(pltn.aes(y=-.7, label=config.single_roi_fig_x_axis),
                           color='black',
                           size=20,
                           va='top') + pltn.scale_fill_manual(
                               values=config.colorblind_friendly_plot_colours))

        if ylimit:
            # Set y limit of figure (used to make it the same for every barchart)
            figure += pltn.ylim(None, ylimit)
            thisroi += '_same_ylim'

        returned_ylim = 0
        if config.use_same_axis_limits in ('Same limits',
                                           'Create both') and ylimit == 0:
            returned_ylim = find_ylim_function(thisroi, figure, 'yaxis')

        if config.use_same_axis_limits == 'Same limits' and ylimit == 0:
            return returned_ylim
        elif ylimit != 0:
            folder = 'Same_yaxis'
        else:
            folder = 'Different_yaxis'

        save_function(figure, thisroi, config, folder, 'barchart')

        return returned_ylim

Exemplo n.º 26

0

Exibir arquivo

def intensity_graph(Data, Data_m):
    print('======= Creating intensity_graph =======')
    x = Data.Intensity[pd.isna(Data.Intensity) == True]
    if (len(x) == len(Data)):
       print("WARNING: All values for Intensity are NA's")
    
    else:
    #Filter ever and monthly symptomes and correct Intensity
        Data_m_int = Data_m[(Data_m.Group == "sy") & (pd.isna(Data_m.Intensity) == 0)]
        Data_all_int = Data[(Data.Group == "sy") & (pd.isna(Data.Intensity) == 0)]
        
        Test_3_m = Data_m_int.groupby("Intensity", sort = True, as_index = False).count()
        Test_3_m = Test_3_m.iloc[:, 0:2]
        Test_3_m= Test_3_m.rename(columns = {"Unnamed: 0": "n"})
        
        Test_3 = Data_all_int.groupby("Intensity", sort = True, as_index = False).count()
        Test_3 = Test_3.iloc[:, 0:2]
        Test_3 = Test_3.rename(columns = {"Unnamed: 0": "n"})
        #Test_3.Intensity = Test_3.Intensity.astype(str)
    
        
        plot =(p9.ggplot(data=Test_3,
                         mapping=p9.aes(x='Intensity',y='n'))
            + p9.geom_col(fill = 'red')
            + p9.theme_classic()
            + p9.theme(axis_text = p9.element_text(size=40),
                       axis_title = p9.element_text(size = 40,face = 'bold'))
            + p9.coord_cartesian(xlim = (1,10))
            + p9.scale_x_continuous(labels = list(range(1,11)), breaks = list(range(1,11)))
            + p9.labs(x='',y='No. of attacks')
            )    
    
        plot_month =(p9.ggplot(data=Test_3_m,
                         mapping=p9.aes(x='Intensity',y='n'))
            + p9.geom_col(fill = 'red')
            + p9.theme_classic()
            + p9.theme(axis_text = p9.element_text(size=40),
                       axis_title = p9.element_text(size = 40,face = 'bold'))
            + p9.coord_cartesian(xlim = (1,10))
            + p9.scale_x_continuous(labels = list(range(1,11)), breaks = list(range(1,11)))
            + p9.labs(x='',y='No. of attacks')
            )

    #Creating and saving EVER Graph_1
    if (len(Data_m_int) > 0):
        #G1 = graph_1(Data_all_int)
        plot_month.save(filename = 'Graph_1.jpeg',
                 plot = plot_month,
                 path = "pdf/iteration/",
                 width = 25, height = 5,
                 dpi = 320)
    else: 
        print('Plot not created; no data found.')
    if (len(Data_all_int) > 0):
        #G1 = graph_1(Data_all_int)
        plot.save(filename = 'Graph_ALL_1.jpeg',
                 plot = plot,
                 path = "pdf/iteration/",
                 width = 25, height = 5,
                 dpi = 320)    
    else: 
        print('Plot not created; no data found.')
    return(print('=================================intensity_graph DONE ============================='))

Exemplo n.º 27

0

Exibir arquivo

def test_reorder():
    p = (ggplot(df, aes('reorder(x, y)', 'y', fill='reorder(x, y)')) +
         geom_col())
    assert p + _theme == 'reorder'

Exemplo n.º 28

0

Exibir arquivo

def test_labels_lists():
    p = (ggplot(df, aes(x=[1, 2, 3], y=[1, 2, 3])) + geom_col())
    assert p.labels == {'x': None, 'y': None}

Exemplo n.º 29

0

Exibir arquivo

    ["is_same_paper_1", "is_same_paper_2", "is_same_paper_3"]].mode(axis=1))))
final_annotated_df.head()

# In[6]:

binned_stats_df = (final_annotated_df.groupby(
    "distance_bin").final_same_paper.mean().to_frame().rename(
        index=str, columns={
            "final_same_paper": "frac_correct"
        }).reset_index())
binned_stats_df

# In[7]:

g = (p9.ggplot(binned_stats_df, p9.aes(x="distance_bin", y="frac_correct")) +
     p9.geom_col(fill="#a6cee3") + p9.coord_flip() +
     p9.labs(x="Fraction Correct", y="Euclidean Distance Bins") +
     p9.theme_seaborn(
         context="paper", style="ticks", font="Arial", font_scale=1.5))
g.save("output/figures/distance_bin_accuracy.svg")
g.save("output/figures/distance_bin_accuracy.png", dpi=250)
print(g)

# # Logsitic Regression Performance

# In[8]:

biorxiv_embed_df = (pd.read_csv(Path("../word_vector_experiment/output/") /
                                "word2vec_output/" /
                                "biorxiv_all_articles_300.tsv.xz",
                                sep="\t").set_index("document"))

Exemplo n.º 30

0

Exibir arquivo

def test_reorder_index():
    # The dataframe is created with ordering according to the y
    # variable. So the x index should be ordered acc. to y too
    p = (ggplot(df, aes('reorder(x, x.index)', 'y')) + geom_col())
    assert p + _theme == 'reorder_index'

Exemplo n.º 31

0

Exibir arquivo

best_result = list(filter(lambda x: x[1] == model.C_, enumerate(model.Cs_)))[0]
print(best_result)

print("Best CV Fold")
print(model.scores_["polka"][:, best_result[0]])
model.scores_["polka"][:, best_result[0]].mean()

model_weights_df = pd.DataFrame.from_dict({
    "weight": model.coef_[0],
    "pc": list(range(1, 51)),
})
model_weights_df["pc"] = pd.Categorical(model_weights_df["pc"])
model_weights_df.head()

g = (p9.ggplot(model_weights_df, p9.aes(x="pc", y="weight")) +
     p9.geom_col(position=p9.position_dodge(width=5), fill="#253494") +
     p9.coord_flip() +
     p9.scale_x_discrete(limits=list(sorted(range(1, 51), reverse=True))) +
     p9.theme_seaborn(
         context="paper", style="ticks", font_scale=1.1, font="Arial") +
     p9.theme(figure_size=(10, 8)) + p9.labs(title="Regression Model Weights",
                                             x="Princpial Component",
                                             y="Model Weight"))
# g.save("output/figures/pca_log_regression_weights.svg")
# g.save("output/figures/pca_log_regression_weights.png", dpi=250)
print(g)

fold_features = model.coefs_paths_["polka"].transpose(1, 0, 2)
model_performance_df = pd.DataFrame.from_dict({
    "feat_num": ((fold_features.astype(bool).sum(axis=1)) > 0).sum(axis=1),
    "C":

Exemplo n.º 32

0

Exibir arquivo

    items = {}
    data = {}
    all_models = set()
    all_tasks = set()
    data = {}
    with gzip.open(options.input) as ifd:
        for row in csv.DictReader(ifd, delimiter="\t"):
            for k, v in row.items():
                data[k] = data.get(k, [])
                data[k].append(v)
    
    for k in data.keys():
        floats = [maybe_float(x) for x in data[k]]
        if all([re.match(r"^\d+$", x) for x in data[k]]):
            data[k] = [int(x) for x in data[k]]
        elif all(floats):
            data[k] = floats

    df = pandas.DataFrame(data)
    #print df
    x = (ggplot(df, aes("factor(%s)" % (options.x), options.y, color="factor(%s)" % (options.color)))) + \
        ggtitle(options.title.strip("'")) + \
        ylab(options.ylabel.strip("'")) + \
        xlab(options.xlabel.strip("'")) + \
        labs(color=options.color_label.strip("'")) + \
        geom_col(show_legend=False) + \
        lims(y=(0.0, 1.0))
    x.save(options.output)

    #theme(legend_title=element_text("")) + \