示例#1
0
文件: protobowl.py 项目: Pinafore/qb
def plot():
    outdir = 'output/protobowl/'
    pathlib.Path(outdir).mkdir(parents=True, exist_ok=True)

    df = load_protobowl()
    df.result = df.result.apply(lambda x: x is True)
    df['log_n_records'] = df.user_n_records.apply(np.log)

    df_user_grouped = df.groupby('uid')
    user_stat = df_user_grouped.agg(np.mean)
    print('{} users'.format(len(user_stat)))
    print('{} records'.format(len(df)))
    max_color = user_stat.log_n_records.max()
    user_stat['alpha'] = pd.Series(
        user_stat.log_n_records.apply(lambda x: x / max_color), index=user_stat.index)

    # 2D user plot
    p0 = ggplot(user_stat) \
        + geom_point(aes(x='relative_position', y='result',
                     size='user_n_records', color='log_n_records', alpha='alpha'),
                     show_legend={'color': False, 'alpha': False, 'size': False}) \
        + scale_color_gradient(high='#e31a1c', low='#ffffcc') \
        + labs(x='Average buzzing position', y='Accuracy') \
        + theme(aspect_ratio=1)
    p0.save(os.path.join(outdir, 'protobowl_users.pdf'))
    # p0.draw()
    print('p0 done')

    # histogram of number of records
    p1 = ggplot(user_stat, aes(x='log_n_records', y='..density..')) \
        + geom_histogram(color='#e6550d', fill='#fee6ce') \
        + geom_density() \
        + labs(x='Log number of records', y='Density') \
        + theme(aspect_ratio=0.3)
    p1.save(os.path.join(outdir, 'protobowl_hist.pdf'))
    # p1.draw()
    print('p1 done')

    # histogram of accuracy
    p2 = ggplot(user_stat, aes(x='result', y='..density..')) \
        + geom_histogram(color='#31a354', fill='#e5f5e0') \
        + geom_density() \
        + labs(x='Accuracy', y='Density') \
        + theme(aspect_ratio=0.3)
    p2.save(os.path.join(outdir, 'protobowl_acc.pdf'))
    # p2.draw()
    print('p2 done')

    # histogram of buzzing position
    p3 = ggplot(user_stat, aes(x='relative_position', y='..density..')) \
        + geom_histogram(color='#3182bd', fill='#deebf7') \
        + geom_density() \
        + labs(x='Average buzzing position', y='Density') \
        + theme(aspect_ratio=0.3)
    p3.save(os.path.join(outdir, 'protobowl_pos.pdf'))
    # p3.draw()
    print('p3 done')
示例#2
0
def test_exceptions():
    # no x limits
    with pytest.raises(PlotnineError):
        p = ggplot(df)
        print(p + stat_function(fun=np.sin))

    # fun not callable
    with pytest.raises(PlotnineError):
        p = ggplot(df, aes('x'))
        print(p + stat_function(fun=1))
示例#3
0
文件: figures.py 项目: Pinafore/qb
 def plot_char_percent_vs_accuracy_histogram(self, category=False):
     if category:
         return (
             ggplot(self.char_plot_df) + facet_wrap('category_jmlr')
             + aes(x='char_percent', fill='Outcome')
             + geom_histogram(binwidth=.05)
         )
     else:
         return (
             ggplot(self.char_plot_df)
             + aes(x='char_percent', fill='Outcome')
             + geom_histogram(binwidth=.05)
         )
示例#4
0
文件: figures.py 项目: Pinafore/qb
 def plot_char_percent_vs_accuracy_smooth(self, category=False):
     if category:
         return (
             ggplot(self.char_plot_df)
             + aes(x='char_percent', y='correct', color='category_jmlr')
             + geom_smooth()
         )
     else:
         return (
             ggplot(self.char_plot_df)
             + aes(x='char_percent', y='correct')
             + geom_smooth(method='mavg')
         )
示例#5
0
文件: figures.py 项目: Pinafore/qb
 def plot_compare_accuracy(self, expo=False):
     if expo:
         return (
             ggplot(self.acc_df) + facet_wrap('position')
             + aes(x='guesser', y='accuracy', fill='Dataset')
             + geom_bar(stat='identity', position='dodge')
             + xlab('Guessing Model')
             + ylab('Accuracy')
         )
     else:
         return (
             ggplot(self.acc_df) + facet_wrap('position')
             + aes(x='guesser', y='accuracy')
             + geom_bar(stat='identity')
         )
示例#6
0
def test_limits():
    p = (ggplot(df, aes('x')) +
         stat_function(fun=np.cos, size=2,
                       color='blue', arrow=arrow(ends='first')) +
         stat_function(fun=np.cos, xlim=(10, 20), size=2,
                       color='red', arrow=arrow(ends='last')))
    assert p == 'limits'
示例#7
0
def test_aesthetics():
    df = pd.DataFrame({
            'a': range(5),
            'b': 2,
            'c': 3,
            'd': 4,
            'e': 5,
            'f': 6,
            'g': 7,
            'h': 8,
            'i': 9
        })

    p = (ggplot(df, aes(y='a')) +
         geom_point(aes(x='b')) +
         geom_point(aes(x='c', size='a')) +
         geom_point(aes(x='d', alpha='a'),
                    size=10, show_legend=False) +
         geom_point(aes(x='e', shape='factor(a)'),
                    size=10, show_legend=False) +
         geom_point(aes(x='f', color='factor(a)'),
                    size=10, show_legend=False) +
         geom_point(aes(x='g', fill='a'), stroke=0,
                    size=10, show_legend=False) +
         geom_point(aes(x='h', stroke='a'), fill='white',
                    color='green', size=10) +
         geom_point(aes(x='i', shape='factor(a)'),
                    fill='brown', stroke=2, size=10, show_legend=False) +
         theme(subplots_adjust={'right': 0.85}))

    assert p == 'aesthetics'
def test_step():
    p = (ggplot(df, aes('x')) +
         geom_step(aes(y='y'), size=4) +
         geom_step(aes(y='y+2'), color='red',
                   direction='vh', size=4))

    assert p == 'step'
示例#9
0
def test_expand_limits():
    df = pd.DataFrame({'x': range(5, 11), 'y': range(5, 11)})
    p = (ggplot(aes('x', 'y'), data=df)
         + geom_point()
         + expand_limits(y=(0, None))
         )
    assert p == 'expand_limits'
示例#10
0
def test_bool_mapping():
    df = pd.DataFrame({
        'x': [1, 2, 3],
        'y': [True, False, False]
    })
    p = ggplot(df, aes('x', 'y')) + geom_point()
    assert p == 'bool_mapping'
示例#11
0
def test_continuous_x():
    n = len(df_continuous_x)
    p = (ggplot(df_continuous_x, aes('x', 'y'))
         + geom_point()
         + geom_smooth(df_continuous_x[3:n-3], method='loess',
                       color='blue', fullrange=False))
    assert p == 'continuous_x'
示例#12
0
def test_legend_fill_ratio():
    p = (ggplot(df_linear, aes('x', color='x<0.5'))
         + geom_point(aes(y='y_noisy'))
         + geom_smooth(aes(y='y_noisy'), method='lm', size=0.5, span=.3)
         )

    assert p == 'legend_fill_ratio'
示例#13
0
def test_normal_with_line():
    p = (ggplot(df_normal, aes(sample='x'))
         + geom_qq()
         + geom_qq_line()
         )
    # Roughly a straight line of points through the origin
    assert p == 'normal_with_line'
示例#14
0
def my_plot(df, x, y, color=None, clab=None):
    aes = { 'color': color, 'group': color } if color else {}
    if clab is None and color is not None:
        clab = color.replace('pr', "'")

    return (gg.ggplot(df, gg.aes(x, y, **aes))
            + labs(x, y)
            + (colors(clab) if color else []))
示例#15
0
def test_non_linear_smooth_no_ci():
    p = (ggplot(df_linear, aes('x'))
         + geom_point(aes(y='y_noisy'))
         + geom_smooth(aes(y='y_noisy'), method='loess', span=.3,
                       color='blue', se=False)
         )

    assert p == 'non_linear_smooth_no_ci'
示例#16
0
def test_linear_smooth():
    p = (ggplot(df_linear, aes('x'))
         + geom_point(aes(y='y_noisy'))
         + geom_smooth(aes(y='y_noisy'), method='lm', span=.3,
                       color='blue')
         )

    assert p == 'linear_smooth'
示例#17
0
def test_summary_functions():
    p = (ggplot(df, aes('x', 'y'))
         + stat_summary(fun_y=np.mean,
                        fun_ymin=np.min,
                        fun_ymax=np.max,
                        size=2))

    assert p == 'summary_functions'
示例#18
0
def test_discrete_x():
    p = (ggplot(df, aes('xd', 'y'))
         + stat_summary_bin(fun_y=np.mean,
                            fun_ymin=np.min,
                            fun_ymax=np.max,
                            geom='bar'))

    assert p == 'discrete_x'
示例#19
0
def test_hull():
    p = (ggplot(mtcars)
         + aes('wt', 'mpg', color='factor(cyl)')
         + geom_point()
         + stat_hull(size=1)
         )

    assert p + _theme == 'hull'
示例#20
0
def test_aes_inheritance():
    # A default line (intercept = 0, slope = 1)
    p = (ggplot(df, aes('x', 'y', color='factor(z)',
                        slope='slope', intercept='intercept')) +
         geom_point(size=10, show_legend=False) +
         geom_abline(size=2))

    assert p == 'aes_inheritance'
示例#21
0
def test_ribbon_facetting():
    p = (ggplot(df, aes('x', ymin='ymin', ymax='ymax',
                        fill='factor(z)')) +
         geom_ribbon() +
         facet_wrap('~ z')
         )

    assert p + _theme == 'ribbon_facetting'
示例#22
0
def test_ellipse():
    p = (ggplot(df, aes('x', 'y'))
         + geom_point()
         + stat_ellipse(type='t')
         + stat_ellipse(type='norm', color='red')
         + stat_ellipse(type='euclid', color='blue')
         )

    assert p == 'ellipse'
示例#23
0
def test_continuous_x():
    p = (ggplot(df, aes('xc', 'y'))
         + stat_summary_bin(fun_y=np.mean,
                            fun_ymin=np.min,
                            fun_ymax=np.max,
                            bins=5,
                            geom='bar'))

    assert p == 'continuous_x'
示例#24
0
def test_funargs():
    p = (ggplot(df, aes('x', 'y'))
         + stat_summary(fun_data='mean_cl_normal',
                        size=2, color='blue')
         + stat_summary(fun_data='mean_cl_normal',
                        fun_args={'confidence_interval': .5},
                        size=2, color='green'))

    assert p == 'fun_args'
示例#25
0
def test_aesthetics():
    p = (ggplot(df, aes(y='y', angle='angle', radius='radius')) +
         geom_spoke(aes('x'), size=2) +
         geom_spoke(aes('x+2', alpha='z'), size=2) +
         geom_spoke(aes('x+4', linetype='factor(z)'), size=2) +
         geom_spoke(aes('x+6', color='factor(z)'), size=2) +
         geom_spoke(aes('x+8', size='z')))

    assert p + _theme == 'aesthetics'
def test_arrow():
    p = (ggplot(df, aes('x', 'y')) +
         geom_path(size=2, arrow=arrow(ends='both', type='closed')) +
         geom_path(aes(y='y+2'), color='red', size=2,
                   arrow=arrow(angle=60, length=1, ends='first')) +
         geom_path(aes(y='y+4'), color='blue', size=2,
                   arrow=arrow(length=1)))

    assert p == 'arrow'
示例#27
0
def test_scale_without_a_mapping():
    df = pd.DataFrame({
        'x': [1, 2, 3],
    })
    p = (ggplot(df, aes('x', 'x'))
         + geom_point()
         + scale_color.scale_color_continuous())
    with pytest.warns(UserWarning):
        p.draw_test()
def test_changing_xlim_in_stat_density():
    n = 100
    _xlim = (5, 10)
    df = pd.DataFrame({'x': np.linspace(_xlim[0]-1, _xlim[1]+1, n)})
    p = (ggplot(df, aes('x'))
         + stat_density()
         + xlim(*_xlim)
         )
    # No exceptions
    p._build()
def test_errorbarh_aesthetics():
    p = (ggplot(df, aes(xmin='ymin', xmax='ymax')) +
         geom_errorbarh(aes(y='x'), size=2) +
         geom_errorbarh(aes(y='x+1', alpha='z'), height=0.2, size=2) +
         geom_errorbarh(aes(y='x+2', linetype='factor(z)'), size=2) +
         geom_errorbarh(aes(y='x+3', color='factor(z)'), size=2) +
         geom_errorbarh(aes(y='x+4', size='z'))
         )

    assert p + _theme == 'errorbarh_aesthetics'
示例#30
0
def test_lines():
    p = (ggplot(df, aes(x='x', y='y')) +
         geom_point(alpha=.5) +
         geom_quantile(quantiles=[.001, .5, .999], formula='y~x',
                       size=2))

    # Two (.001, .999) quantile lines should bound the points
    # from below and from above, and the .5 line should go
    # through middle (approximately).
    assert p == 'lines'
示例#31
0
def test_no_fill():
    p = (ggplot(df, aes('x', group='factor(z)')) +
         geom_polygon(aes(y='y'), fill=None, color='red', size=2) +
         geom_polygon(aes(y='y+2'), fill='None', color='green', size=2) +
         geom_polygon(aes(y='y+4'), fill='none', color='blue', size=2))
    assert p + _theme == 'no_fill'
示例#32
0
#Topic: Common Plots
#-----------------------------
#libraries https://pythonplot.com/#bar-count

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#pip install plotnine #similar to ggplots
#https://plotnine.readthedocs.io/en/stable/index.html
import plotnine  #ggplot type

from plotnine import ggplot, geom_point, aes, stat_smooth, facet_wrap
plotnine.facet_wrap?
from plotnine.data import mtcars

(ggplot(mtcars, aes('wt', 'mpg', color='factor(gear)')) + geom_point() + stat_smooth(method='lm') + facet_wrap('~gear'))

from plotnine import *
(ggplot(mtcars, aes('factor(cyl)', fill='factor(am)')) + geom_bar( position='fill') )

(ggplot(mtcars, aes('factor(cyl)', fill='factor(am)')) + geom_bar(position='fill') + geom_text(aes(label='stat(count)'), stat='count', position='fill' ))

(ggplot(mpg)+ aes(x='manufacturer') + geom_bar(size=20) + coord_flip() + labs(y='Count', x='Manufacturer', title='Number of Cars by Make'))
#https://plotnine.readthedocs.io/en/stable/tutorials/miscellaneous-order-plot-series.html


from pydataset import data
data()
mtcars = data('mtcars')
data1 = mtcars.copy()
data1.head()
示例#33
0
def analyze_index(index_array, mask, histplot=False, bins=100):
    """This extracts the hyperspectral index statistics and writes the values  as observations out to
       the Outputs class.

    Inputs:
    index_array  = Instance of the Spectral_data class, usually the output from pcv.hyperspectral.extract_index
    mask         = Binary mask made from selected contours
    histplot     = if True plots histogram of intensity values
    bins         = optional, number of classes to divide spectrum into


    :param array: __main__.Spectral_data
    :param mask: numpy array
    :param histplot: bool
    :param bins: int
    """
    params.device += 1

    debug = params.debug
    params.debug = None

    if len(np.shape(mask)) > 2 or len(np.unique(mask)) > 2:
        fatal_error("Mask should be a binary image of 0 and nonzero values.")

    if len(np.shape(index_array.array_data)) > 2:
        fatal_error("index_array data should be a grayscale image.")

    # Mask data and collect statistics about pixels within the masked image
    masked_array = index_array.array_data[np.where(mask > 0)]
    index_mean = np.average(masked_array)
    index_median = np.median(masked_array)
    index_std = np.std(masked_array)

    # Calculate histogram
    maxval = round(np.amax(index_array.array_data[0]), 4)
    hist_nir = [float(l[0]) for l in cv2.calcHist([index_array.array_data.astype(np.float32)],
                                                  [0], mask, [bins], [-2, 2])]

    # Create list of bin labels
    bin_width = maxval / float(bins)
    b = 0
    bin_labels = [float(b)]
    plotting_labels = [float(b)]
    for i in range(bins - 1):
        b += bin_width
        bin_labels.append(b)
        plotting_labels.append(round(b, 2))

    # Make hist percentage for plotting
    pixels = cv2.countNonZero(mask)
    hist_percent = [(p / float(pixels)) * 100 for p in hist_nir]

    # Reset debug mode and make plot
    params.debug = debug

    if histplot is True:
        hist_x = hist_percent
        dataset = pd.DataFrame({'Index Reflectance': bin_labels,
                                'Proportion of pixels (%)': hist_x})
        fig_hist = (ggplot(data=dataset,
                           mapping=aes(x='Index Reflectance',
                                       y='Proportion of pixels (%)'))
                    + geom_line(color='red')
                    + scale_x_continuous(breaks=plotting_labels, labels=plotting_labels))

        analysis_image = fig_hist
        if params.debug == "print":
            fig_hist.save(os.path.join(params.debug_outdir, str(params.device) + index_array.array_type + '_hist.png'))
        elif params.debug == "plot":
            print(fig_hist)

    # Make sure variable names should be unique within a workflow
    outputs.add_observation(variable='mean_' + index_array.array_type,
                            trait='Average ' + index_array.array_type + ' reflectance',
                            method='plantcv.plantcv.hyperspectral.analyze_index', scale='reflectance', datatype=float,
                            value=float(index_mean), label='none')

    outputs.add_observation(variable='med_' + index_array.array_type,
                            trait='Median ' + index_array.array_type + ' reflectance',
                            method='plantcv.plantcv.hyperspectral.analyze_index', scale='reflectance', datatype=float,
                            value=float(index_median), label='none')

    outputs.add_observation(variable='std_' + index_array.array_type,
                            trait='Standard deviation ' + index_array.array_type + ' reflectance',
                            method='plantcv.plantcv.hyperspectral.analyze_index', scale='reflectance', datatype=float,
                            value=float(index_std), label='none')

    outputs.add_observation(variable='index_frequencies_' + index_array.array_type, trait='index frequencies',
                            method='plantcv.plantcv.analyze_nir_intensity', scale='frequency', datatype=list,
                            value=hist_percent, label=bin_labels)

    if params.debug == "plot":
        plot_image(masked_array)
    elif params.debug == "print":
        img_name = str(params.device) + index_array.array_type + ".png"
        print_image(img=masked_array, filename=os.path.join(params.debug_outdir, img_name))
示例#34
0
def histogram(gray_img, mask=None, bins=256, color='red', title=None):
    """Plot a histogram using ggplot.

    Inputs:
    gray_img = grayscale image to analyze
    mask     = binary mask made from selected contours
    bins     = number of classes to divide spectrum into
    color    = color of the line drawn
    title    = custom title for the plot gets drawn if title is not None

    :param gray_img: numpy.ndarray
    :param mask: numpy.ndarray
    :param bins: int
    :param color: str
    :param title: str
    :return fig_hist: ggplot
    """

    params.device += 1
    debug = params.debug
    # Apply mask if one is supplied
    if mask is not None:
        # apply plant shaped mask to image
        params.debug = None
        mask1 = binary_threshold(mask, 0, 255, 'light')
        mask1 = (mask1 / 255)
        masked = np.multiply(gray_img, mask1)
    else:
        masked = gray_img

    params.debug = debug

    if gray_img.dtype == 'uint16':
        maxval = 65536
    else:
        maxval = 256

    # Store histogram data
    hist_gray_data, hist_bins = np.histogram(masked, bins, (1, maxval))
    hist_bins1 = hist_bins[:-1]
    hist_bins2 = [l for l in hist_bins1]
    hist_gray = [l for l in hist_gray_data]
    # make hist percentage for plotting
    pixels = cv2.countNonZero(masked)
    hist_percent = (hist_gray_data / float(pixels)) * 100

    hist_x = hist_percent
    bin_labels = np.arange(0, bins)
    dataset = pd.DataFrame({
        'Grayscale pixel intensity': bin_labels,
        'Proportion of pixels (%)': hist_x
    })
    if title is None:
        fig_hist = (ggplot(data=dataset,
                           mapping=aes(x='Grayscale pixel intensity',
                                       y='Proportion of pixels (%)')) +
                    geom_line(color=color) +
                    scale_x_continuous(breaks=list(range(0, bins, 25))))
    elif title is not None:
        fig_hist = (ggplot(data=dataset,
                           mapping=aes(x='Grayscale pixel intensity',
                                       y='Proportion of pixels (%)')) +
                    geom_line(color=color) +
                    scale_x_continuous(breaks=list(range(0, bins, 25))) +
                    labels.ggtitle(title))

    if params.debug is not None:
        if params.debug == "print":
            fig_hist.save(
                os.path.join(params.debug_outdir,
                             str(params.device) + '_hist.png'))
        if params.debug == "plot":
            print(fig_hist)

    return fig_hist
示例#35
0
def test_nudge():
    p = (ggplot(df1, aes('x', 'y')) + geom_point(size=10) +
         geom_point(size=10, color='red', position=position_nudge(.25, .25)))
    assert p + _theme == 'nudge'
from plotnine.data import economics

from plotnine import ggplot, aes, geom_line, labs

g = (ggplot(economics) +
     aes(x="date", y="uempmed") +
     geom_line() +
     labs(x="date", y="median duration of unemployment"))

g.save("08.png")
示例#37
0
def test_nonzero_indexed_data():
    df = pd.DataFrame({98: {'blip': 0, 'blop': 1},
                       99: {'blip': 1, 'blop': 3}}).T
    p = ggplot(aes(x='blip', y='blop'), data=df) + geom_line()
    p.draw_test()
示例#38
0
def gg_f_plot(f, X):
    if len(np.shape(X)) == 1: X = np.array(X)[:, None]
    Y = np.vectorize(f)(X)
    df = pd.DataFrame({'X': X[:, 0], 'Y': Y[:, 0]})
    return ggplot(df) + geom_line(aes('X', 'Y'), color='blue')
output = output.assign(dummy_y = 0)
output
print(output) # printing the result table

# Perform a t-test to determine if weights are significantly different
targene_geo_mutant = output[output['status_sign'] == 1]
targene_geo_wt = output[output['status_sign'] == -1]

# Output t-test results
t_results_geo_targene = ttest_ind(a = targene_geo_mutant['weight'],
                              b = targene_geo_wt['weight'], equal_var = False)
print('Statistic = {:.2f}, p = {:.2E}'.format(t_results_geo_targene[0],
                                              Decimal(t_results_geo_targene[1])))

# graphical output for predictions
p = (gg.ggplot(output,
               gg.aes(x='weight', y='dummy_y', color='factor(status_sign)')) +
     gg.geom_hline(gg.aes(yintercept=0), linetype='solid') +
     gg.geom_point(size=4) +
     gg.scale_color_manual(values=["#377eb8", "#ff7f00"], labels=['WT', 'Mutant']) +
     gg.ylim([-0.1, 0.1]) +
     gg.xlim([-0.001, 1.001]) +
     gg.theme_seaborn(style='whitegrid') +
     gg.xlab('Targene Classifier Score') +
     gg.ylab('') +
     gg.labs(color='Sample_status') +
     gg.ggtitle('Mutant vs WT \n') +
     gg.theme(
        plot_title=gg.element_text(size=22),
        axis_title_x=gg.element_text(size=16),
        axis_text_x=gg.element_text(size=16),
        axis_text_y=gg.element_blank(),
示例#40
0
def gg_plot(X, Y):
    if len(np.shape(X)) == 1: X = np.array(X)[:, None]
    if len(np.shape(Y)) == 1: Y = np.array(Y)[:, None]
    df = pd.DataFrame({'X': X[:, 0], 'Y': Y[:, 0]})
    return ggplot(df) + geom_line(aes('X', 'Y'), color='blue')
示例#41
0
文件: pyramid.py 项目: oadams/pyramid
def create_plots(df: pd.DataFrame) -> List[p9.ggplot]:
    plots = [p9.ggplot(df) + p9.geom_bar(p9.aes(x='Ewbanks Grade'))]
    return plots
from plotnine.data import mpg
from plotnine import ggplot, aes, facet_grid, labs, geom_point, stat_smooth

g = (ggplot(mpg) + facet_grid(facets="year~class") + aes(x="displ", y="hwy") +
     labs(x="Engine Size",
          y="Miles per Gallon",
          title="Miles per Gallon for Each Year and Vehicle Class") +
     geom_point())

g.save("16.png")
def analyze_nir_intensity(gray_img, mask, bins=256, histplot=False):
    """This function calculates the intensity of each pixel associated with the plant and writes the values out to
       a file. It can also print out a histogram plot of pixel intensity and a pseudocolor image of the plant.

    Inputs:
    gray_img     = 8- or 16-bit grayscale image data
    mask         = Binary mask made from selected contours
    bins         = number of classes to divide spectrum into
    histplot     = if True plots histogram of intensity values

    Returns:
    analysis_images = NIR histogram image

    :param gray_img: numpy array
    :param mask: numpy array
    :param bins: int
    :param histplot: bool
    :return analysis_images: plotnine ggplot
    """
    # apply plant shaped mask to image
    mask1 = binary_threshold(mask, 0, 255, 'light')
    mask1 = (mask1 / 255)
    # masked = np.multiply(gray_img, mask1)

    # calculate histogram
    if gray_img.dtype == 'uint16':
        maxval = 65536
    else:
        maxval = 256

    # Make a pseudo-RGB image
    rgbimg = cv2.cvtColor(gray_img, cv2.COLOR_GRAY2BGR)

    # Calculate histogram
    hist_nir = [
        float(l[0])
        for l in cv2.calcHist([gray_img], [0], mask, [bins], [0, maxval])
    ]
    # Create list of bin labels
    bin_width = maxval / float(bins)
    b = 0
    bin_labels = [float(b)]
    for i in range(bins - 1):
        b += bin_width
        bin_labels.append(b)

    # make hist percentage for plotting
    pixels = cv2.countNonZero(mask1)
    hist_percent = [(p / float(pixels)) * 100 for p in hist_nir]

    masked1 = cv2.bitwise_and(rgbimg, rgbimg, mask=mask)
    if params.debug is not None:
        params.device += 1
        if params.debug == "print":
            print_image(
                masked1,
                os.path.join(params.debug_outdir,
                             str(params.device) + "_masked_nir_plant.png"))
        if params.debug == "plot":
            plot_image(masked1)

    analysis_image = None

    if histplot is True:
        hist_x = hist_percent
        # bin_labels = np.arange(0, bins)
        dataset = pd.DataFrame({
            'Grayscale pixel intensity': bin_labels,
            'Proportion of pixels (%)': hist_x
        })
        fig_hist = (ggplot(data=dataset,
                           mapping=aes(x='Grayscale pixel intensity',
                                       y='Proportion of pixels (%)')) +
                    geom_line(color='red') +
                    scale_x_continuous(breaks=list(range(0, maxval, 25))))

        analysis_image = fig_hist
        if params.debug == "print":
            fig_hist.save(
                os.path.join(params.debug_outdir,
                             str(params.device) + '_nir_hist.png'))
        elif params.debug == "plot":
            print(fig_hist)

    outputs.add_observation(variable='nir_frequencies',
                            trait='near-infrared frequencies',
                            method='plantcv.plantcv.analyze_nir_intensity',
                            scale='frequency',
                            datatype=list,
                            value=hist_nir,
                            label=bin_labels)

    # Store images
    outputs.images.append(analysis_image)

    return analysis_image
示例#44
0
sample_sub.head()

train.shape
test.shape
# 거의 비슷한 숫자
sample_sub.shape
# test set에 있는 실제 인원 수는 95천명
train.isnull().sum()
test.isnull().sum()
# Train과 Test 모두 Null 존재

########## EDA
#### TripType
train["TripType"] = train["TripType"].apply(lambda x: -1 if x == 999 else x)
# 999코드를 -1로 변경
p9.ggplot(train, p9.aes(x='TripType', fill='TripType')) + p9.geom_bar()
# 39와 40이 상당히 많음. 특히 40이 엄청 많음.
train["TripType"].value_counts()
train.info()

#### Weekday
p9.ggplot(train, p9.aes(x='Weekday', fill='Weekday')) + p9.geom_bar()
# 역시 토일이 쇼핑이 많음

#### Upc
len(train['Upc'].unique())
# Upc Number에는 중복값이 많군.
train[train['Upc'].isnull()]
# Upc가 Missing이면 DepartmentDescription, FinelineNumber 모두 Missing.
# 특히, FinelineNumber의 Missing은 Upc와 동일. 두 변수가 밀접한 관계가 있음을 알 수 있음.
    "--input-file",
    type=argparse.FileType("r"),
    required=True,
    help="the benchmark output file to load",
)
parser.add_argument("--output-file",
                    type=str,
                    required=True,
                    help="the name for the output plot")
args = parser.parse_args()

# %%
data = load_benchmark_output(args.input_file)

# %%
print(data.head())

# %%
print(data.describe())

# %%
(p9.ggplot(
    data=data[(data.Op != "CREATE") & (data.Op != "DELETE")],
    mapping=p9.aes(x="Op", y="MiBs", color="Api"),
) + p9.facet_wrap(facets="Op", labeller="label_both", scales="free") +
 p9.geom_boxplot()).save(args.output_file)

# %%
compare_api(data, "READ")
compare_api(data, "WRITE")
示例#46
0
def analyze_color(rgb_img, mask, bins, hist_plot_type=None):
    """Analyze the color properties of an image object

    Inputs:
    rgb_img          = RGB image data
    mask             = Binary mask made from selected contours
    bins             = number of color bins the channel is divided into
    hist_plot_type   = 'None', 'all', 'rgb','lab' or 'hsv'

    Returns:
    hist_header      = color histogram data table headers
    hist_data        = color histogram data table values
    analysis_image   = histogram output

    :param rgb_img: numpy.ndarray
    :param mask: numpy.ndarray
    :param bins: int
    :param hist_plot_type: str
    :return hist_header: list
    :return hist_data: list
    :return analysis_images: list
    """

    params.device += 1

    if len(np.shape(rgb_img)) < 3:
        fatal_error("rgb_img must be an RGB image")

    masked = cv2.bitwise_and(rgb_img, rgb_img, mask=mask)
    b, g, r = cv2.split(masked)
    lab = cv2.cvtColor(masked, cv2.COLOR_BGR2LAB)
    l, m, y = cv2.split(lab)
    hsv = cv2.cvtColor(masked, cv2.COLOR_BGR2HSV)
    h, s, v = cv2.split(hsv)

    # Color channel dictionary
    norm_channels = {"b": np.divide(b, (256 / bins)).astype(np.uint8),
                     "g": np.divide(g, (256 / bins)).astype(np.uint8),
                     "r": np.divide(r, (256 / bins)).astype(np.uint8),
                     "l": np.divide(l, (256 / bins)).astype(np.uint8),
                     "m": np.divide(m, (256 / bins)).astype(np.uint8),
                     "y": np.divide(y, (256 / bins)).astype(np.uint8),
                     "h": np.divide(h, (256 / bins)).astype(np.uint8),
                     "s": np.divide(s, (256 / bins)).astype(np.uint8),
                     "v": np.divide(v, (256 / bins)).astype(np.uint8)
                     }

    # Histogram plot types
    hist_types = {"all": ("b", "g", "r", "l", "m", "y", "h", "s", "v"),
                  "rgb": ("b", "g", "r"),
                  "lab": ("l", "m", "y"),
                  "hsv": ("h", "s", "v")}

    # # If the user-input pseudo_channel is not None and is not found in the list of accepted channels, exit
    # if pseudo_channel is not None and pseudo_channel not in norm_channels:
    #     fatal_error("Pseudocolor channel was " + str(pseudo_channel) +
    #                 ', but can only be one of the following: None, "l", "m", "y", "h", "s" or "v"!')
    # # If the user-input pseudocolored image background is not in the accepted input list, exit
    # if pseudo_bkg not in ["white", "img", "both"]:
    #     fatal_error("The pseudocolored image background was " + str(pseudo_bkg) +
    #                 ', but can only be one of the following: "white", "img", or "both"!')
    # # If the user-input histogram color-channel plot type is not in the list of accepted channels, exit
    if hist_plot_type is not None and hist_plot_type not in hist_types:
        fatal_error("The histogram plot type was " + str(hist_plot_type) +
                    ', but can only be one of the following: None, "all", "rgb", "lab", or "hsv"!')
    histograms = {
        "b": {"label": "blue", "graph_color": "blue",
              "hist": cv2.calcHist([norm_channels["b"]], [0], mask, [bins], [0, (bins - 1)])},
        "g": {"label": "green", "graph_color": "forestgreen",
              "hist": cv2.calcHist([norm_channels["g"]], [0], mask, [bins], [0, (bins - 1)])},
        "r": {"label": "red", "graph_color": "red",
              "hist": cv2.calcHist([norm_channels["r"]], [0], mask, [bins], [0, (bins - 1)])},
        "l": {"label": "lightness", "graph_color": "dimgray",
              "hist": cv2.calcHist([norm_channels["l"]], [0], mask, [bins], [0, (bins - 1)])},
        "m": {"label": "green-magenta", "graph_color": "magenta",
              "hist": cv2.calcHist([norm_channels["m"]], [0], mask, [bins], [0, (bins - 1)])},
        "y": {"label": "blue-yellow", "graph_color": "yellow",
              "hist": cv2.calcHist([norm_channels["y"]], [0], mask, [bins], [0, (bins - 1)])},
        "h": {"label": "hue", "graph_color": "blueviolet",
              "hist": cv2.calcHist([norm_channels["h"]], [0], mask, [bins], [0, (bins - 1)])},
        "s": {"label": "saturation", "graph_color": "cyan",
              "hist": cv2.calcHist([norm_channels["s"]], [0], mask, [bins], [0, (bins - 1)])},
        "v": {"label": "value", "graph_color": "orange",
              "hist": cv2.calcHist([norm_channels["v"]], [0], mask, [bins], [0, (bins - 1)])}
    }

    hist_data_b = [l[0] for l in histograms["b"]["hist"]]
    hist_data_g = [l[0] for l in histograms["g"]["hist"]]
    hist_data_r = [l[0] for l in histograms["r"]["hist"]]
    hist_data_l = [l[0] for l in histograms["l"]["hist"]]
    hist_data_m = [l[0] for l in histograms["m"]["hist"]]
    hist_data_y = [l[0] for l in histograms["y"]["hist"]]
    hist_data_h = [l[0] for l in histograms["h"]["hist"]]
    hist_data_s = [l[0] for l in histograms["s"]["hist"]]
    hist_data_v = [l[0] for l in histograms["v"]["hist"]]

    binval = np.arange(0, bins)
    bin_values = [l for l in binval]

    # Store Color Histogram Data
    hist_header = [
        'HEADER_HISTOGRAM',
        'bin-number',
        'bin-values',
        'blue',
        'green',
        'red',
        'lightness',
        'green-magenta',
        'blue-yellow',
        'hue',
        'saturation',
        'value'
    ]

    hist_data = [
        'HISTOGRAM_DATA',
        bins,
        bin_values,
        hist_data_b,
        hist_data_g,
        hist_data_r,
        hist_data_l,
        hist_data_m,
        hist_data_y,
        hist_data_h,
        hist_data_s,
        hist_data_v
    ]

    analysis_images = []
    dataset = pd.DataFrame({'bins': binval, 'blue': hist_data_b,
                            'green': hist_data_g, 'red': hist_data_r,
                            'lightness': hist_data_l, 'green-magenta': hist_data_m,
                            'blue-yellow': hist_data_y, 'hue': hist_data_h,
                            'saturation': hist_data_s, 'value': hist_data_v})

    # Make the histogram figure using plotnine
    if hist_plot_type is not None:
        if hist_plot_type == 'rgb':
            df_rgb = pd.melt(dataset, id_vars=['bins'], value_vars=['blue', 'green', 'red'],
                             var_name='Color Channel', value_name='Pixels')
            hist_fig = (ggplot(df_rgb, aes(x='bins', y='Pixels', color='Color Channel'))
                        + geom_line()
                        + scale_x_continuous(breaks=list(range(0, bins, 25)))
                        + scale_color_manual(['blue', 'green', 'red'])
                        )
            analysis_images.append(hist_fig)

        elif hist_plot_type == 'lab':
            df_lab = pd.melt(dataset, id_vars=['bins'],
                             value_vars=['lightness', 'green-magenta', 'blue-yellow'],
                             var_name='Color Channel', value_name='Pixels')
            hist_fig = (ggplot(df_lab, aes(x='bins', y='Pixels', color='Color Channel'))
                        + geom_line()
                        + scale_x_continuous(breaks=list(range(0, bins, 25)))
                        + scale_color_manual(['yellow', 'magenta', 'dimgray'])
                        )
            analysis_images.append(hist_fig)

        elif hist_plot_type == 'hsv':
            df_hsv = pd.melt(dataset, id_vars=['bins'],
                             value_vars=['hue', 'saturation', 'value'],
                             var_name='Color Channel', value_name='Pixels')
            hist_fig = (ggplot(df_hsv, aes(x='bins', y='Pixels', color='Color Channel'))
                        + geom_line()
                        + scale_x_continuous(breaks=list(range(0, bins, 25)))
                        + scale_color_manual(['blueviolet', 'cyan', 'orange'])
                        )
            analysis_images.append(hist_fig)

        elif hist_plot_type == 'all':
            s = pd.Series(['blue', 'green', 'red', 'lightness', 'green-magenta',
                           'blue-yellow', 'hue', 'saturation', 'value'], dtype="category")
            color_channels = ['blue', 'yellow', 'green', 'magenta', 'blueviolet',
                              'dimgray', 'red', 'cyan', 'orange']
            df_all = pd.melt(dataset, id_vars=['bins'], value_vars=s, var_name='Color Channel',
                             value_name='Pixels')
            hist_fig = (ggplot(df_all, aes(x='bins', y='Pixels', color='Color Channel'))
                        + geom_line()
                        + scale_x_continuous(breaks=list(range(0, bins, 25)))
                        + scale_color_manual(color_channels)
                        )
            analysis_images.append(hist_fig)

    # Store into global measurements
    if not 'color_histogram' in outputs.measurements:
        outputs.measurements['color_histogram'] = {}
    outputs.measurements['color_histogram']['bin-number'] = bins
    outputs.measurements['color_histogram']['bin-values'] = bin_values
    outputs.measurements['color_histogram']['blue'] = hist_data_b
    outputs.measurements['color_histogram']['green'] = hist_data_g
    outputs.measurements['color_histogram']['red'] = hist_data_r
    outputs.measurements['color_histogram']['lightness'] = hist_data_l
    outputs.measurements['color_histogram']['green-magenta'] = hist_data_m
    outputs.measurements['color_histogram']['blue-yellow'] = hist_data_y
    outputs.measurements['color_histogram']['hue'] = hist_data_h
    outputs.measurements['color_histogram']['saturation'] = hist_data_s
    outputs.measurements['color_histogram']['value'] = hist_data_v

    # Store images
    outputs.images.append(analysis_images)

    return hist_header, hist_data, analysis_images
示例#47
0
def quick_color_check(target_matrix, source_matrix, num_chips):
    """ Quickly plot target matrix values against source matrix values to determine
    over saturated color chips or other issues.

    Inputs:
    source_matrix      = an nrowsXncols matrix containing the avg red, green, and blue values for each color chip
                            of the source image
    target_matrix      = an nrowsXncols matrix containing the avg red, green, and blue values for each color chip
                            of the target image
    num_chips          = number of color card chips included in the matrices (integer)

    :param source_matrix: numpy.ndarray
    :param target_matrix: numpy.ndarray
    :param num_chips: int
    """
    # Imports
    from plotnine import ggplot, geom_point, geom_smooth, theme_seaborn, facet_grid, geom_label, scale_x_continuous, \
        scale_y_continuous, scale_color_manual, aes
    import pandas as pd

    # Extract and organize matrix info
    tr = target_matrix[:num_chips, 1:2]
    tg = target_matrix[:num_chips, 2:3]
    tb = target_matrix[:num_chips, 3:4]
    sr = source_matrix[:num_chips, 1:2]
    sg = source_matrix[:num_chips, 2:3]
    sb = source_matrix[:num_chips, 3:4]

    # Create columns of color labels
    red = []
    blue = []
    green = []
    for i in range(num_chips):
        red.append('red')
        blue.append('blue')
        green.append('green')

    # Make a column of chip numbers
    chip = np.arange(0, num_chips).reshape((num_chips, 1))
    chips = np.row_stack((chip, chip, chip))

    # Combine info
    color_data_r = np.column_stack((sr, tr, red))
    color_data_g = np.column_stack((sg, tg, green))
    color_data_b = np.column_stack((sb, tb, blue))
    all_color_data = np.row_stack((color_data_b, color_data_g, color_data_r))

    # Create a dataframe with headers
    dataset = pd.DataFrame({'source': all_color_data[:, 0], 'target': all_color_data[:, 1],
                            'color': all_color_data[:, 2]})

    # Add chip numbers to the dataframe
    dataset['chip'] = chips
    dataset = dataset.astype({'color': str, 'chip': str, 'target': float, 'source': float})

    # Make the plot
    p1 = ggplot(dataset, aes(x='target', y='source', color='color', label='chip')) + \
        geom_point(show_legend=False, size=2) + \
        geom_smooth(method='lm', size=.5, show_legend=False) + \
        theme_seaborn() + facet_grid('.~color') + \
        geom_label(angle=15, size=7, nudge_y=-.25, nudge_x=.5, show_legend=False) + \
        scale_x_continuous(limits=(-5, 270)) + scale_y_continuous(limits=(-5, 275)) + \
        scale_color_manual(values=['blue', 'green', 'red'])

    # Autoincrement the device counter
    params.device += 1

    # Reset debug
    if params.debug is not None:
        if params.debug == 'print':
            p1.save(os.path.join(params.debug_outdir, 'color_quick_check.png'), verbose=False)
        elif params.debug == 'plot':
            print(p1)
示例#48
0
from plotnine import ggplot, aes, geom_point, facet_grid, facet_wrap
from plotnine import geom_abline, annotate
from plotnine.data import mpg
from plotnine.exceptions import PlotnineWarning

n = 10
df = pd.DataFrame({
    'x': range(n),
    'y': range(n),
    'var1': np.repeat(range(n // 2), 2),
    'var2': np.tile(['a', 'b'], n // 2),
})
df['class'] = df['var1']  # python keyword as column
df['g'] = df['var1']  # variable as a column

g = (ggplot(df, aes('x', 'y')) +
     geom_point(aes(color='factor(var1)'), size=5, show_legend=False))

# facet_wrap


def test_facet_wrap_one_var():
    p = g + facet_wrap('~var1')
    p2 = g + facet_wrap('~class')  # python keyword in formula
    p3 = g + facet_wrap('~g')  # variable in formula
    assert p == 'facet_wrap_one_var'
    assert p2 == 'facet_wrap_one_var'
    assert p3 == 'facet_wrap_one_var'


# https://github.com/pandas-dev/pandas/issues/16276
示例#49
0
def test_dodge_preserve_single():
    df1 = pd.DataFrame({'x': ['a', 'b', 'b'], 'y': ['a', 'a', 'b']})
    p = (ggplot(df1, aes('x', fill='y')) +
         geom_bar(position=position_dodge(preserve='single')))
    assert p + _theme == 'dodge_preserve_single'
示例#50
0
class TestThemes:
    g = (ggplot(mtcars, aes(x='wt', y='mpg', color='factor(gear)'))
         + geom_point()
         + facet_grid('vs ~ am'))

    def test_theme_538(self):
        p = self.g + labs(title='Theme 538') + theme_538()

        assert p + _theme == 'theme_538'

    def test_theme_bw(self):
        p = self.g + labs(title='Theme BW') + theme_bw()

        assert p + _theme == 'theme_bw'

    def test_theme_classic(self):
        p = self.g + labs(title='Theme Classic') + theme_classic()

        assert p + _theme == 'theme_classic'

    def test_theme_dark(self):
        p = self.g + labs(title='Theme Dark') + theme_dark()

        assert p + _theme == 'theme_dark'

    def test_theme_gray(self):
        p = self.g + labs(title='Theme Gray') + theme_gray()

        assert p + _theme == 'theme_gray'

    def test_theme_light(self):
        p = self.g + labs(title='Theme Light') + theme_light()

        assert p + _theme == 'theme_light'

    def test_theme_linedraw(self):
        p = self.g + labs(title='Theme Linedraw') + theme_linedraw()

        assert p + _theme == 'theme_linedraw'

    def test_theme_matplotlib(self):
        p = self.g + labs(title='Theme Matplotlib') + theme_matplotlib()

        assert p + _theme == 'theme_matplotlib'

    def test_theme_minimal(self):
        p = self.g + labs(title='Theme Minimal') + theme_minimal()

        assert p + _theme == 'theme_minimal'

    def test_theme_seaborn(self):
        p = self.g + labs(title='Theme Seaborn') + theme_seaborn()

        assert p + _theme == 'theme_seaborn'

    def test_theme_void(self):
        p = self.g + labs(title='Theme Void') + theme_void()

        assert p + _theme == 'theme_void'

    def test_theme_xkcd(self):
        p = self.g + labs(title='Theme Xkcd') + theme_xkcd()

        if os.environ.get('TRAVIS'):
            # Travis does not have the fonts, we still check
            # to catch any other errors
            assert p + _theme != 'theme_gray'
        else:
            assert p + _theme == 'theme_xkcd'
示例#51
0
def plot_bar(data,nuclstr,column='value',factor=None,ymin=None,ymax=None,stat='identity',dpi=300,features=None,feature_types=['all'],add_features=[],funcgroups=None,shading_modes=['charge_functional'],usd=False,right_overhang_fix=None,debug=False,startnumber=1,cropseq=(0,None),aspect_ratio=None,reverse_seq=False,double_seq=False,transparent=True,fill_params=None,bar_position='stack',title=None):
    """
    A wrapper function to make a plot of data with bars along the sequnce
    input should be a dataframe with resid, segid column and 'value' 
    This one is inspired by seqplot/seqplot/pdb_plot.py
    """
    
    segid=data['segid'].values[0]
    
    if title is None:
        title="Segid: %s, Type: %s"%(segid,nuclstr.components[segid]['type'])
    
    seq=Seq(str(nuclstr.seqs[segid]['fullseq']),generic_protein \
                if nuclstr.components[segid]['entity'] is 'DNA' or 'histone' or 'protein' else generic_dna)
    msar=MultipleSeqAlignment([SeqRecord(seq=seq,id=nuclstr.components[segid]['type']+':'+segid,\
                                         name=nuclstr.components[segid]['type']+':'+segid)])
    if(reverse_seq):
        logger.info("Experimental feature will reverse the sequence")
        msar[0].seq=msar[0].seq[::-1]

    if double_seq:
          msar.add_sequence('reverse',str(msar[0].seq[::-1]))

        
    msar=msar[:,cropseq[0]:cropseq[1]]
        
    
#     print("Seq to plot:",msar)
             
    #We need to get starting residue, currently for DNA chains only cifseq gets it correctly
    resid_start=nuclstr.seqs[segid]['resid_start']
    
    logger.debug("Starting resid",resid_start)
    

    overhang=nuclstr.seqs[segid]['overhangL']
    
    datafixed=data.copy()
    datafixed.loc[:,'resid']=datafixed.loc[:,'resid']-resid_start+overhang+1-cropseq[0]

    
    sl=len(msar[0].seq)

#     fn=shade.seqfeat2shadefeat(msar,feature_types=feature_types,force_feature_pos='bottom',debug=debug)
    if features is None:
        fn=nuclstr.shading_features[segid]
    else:
        fn=features
    fn2=[]
    for i in fn:
        if (i['style'] in feature_types) or ('all' in feature_types) :
            fn2.append(i)
            
    fn2.extend(add_features)
    if usd:
        ruler='top'
    else:
        ruler=None
    shaded=ipyshade.shadedmsa4plot(msar,features=fn2,shading_modes=shading_modes,debug=debug,startnumber=startnumber,setends=[startnumber-2,sl+startnumber+2],funcgroups=funcgroups,ruler=ruler,density=200)
        
    #If sl%10=10 se will have a ruler number hanging beyond the sequence image, and we need to correct for that.
    if right_overhang_fix is None:
        if sl%10==0:
            if sl<100:
                rof= 0.1
            else:
                rof=0.5
        else:
            rof=0
    else:
        rof=right_overhang_fix
    if (not aspect_ratio is None ):
        ar=aspect_ratio
    else:
        ar=0.2*100./sl
#     print(datafixed)
    plot=(ggplot(data=datafixed,mapping=aes(x='resid', y=column))
#         + geom_point(size=0.1)
#           +geom_bar(stat='identity',width=0.5,mapping=aes(fill=factor))
        + scale_x_continuous(limits=(0.5,sl+0.5+rof),expand=(0,0.2),name='',breaks=[])
       # + scale_y_continuous(breaks=[0,0.5,1.0])
        + theme_light()+theme(aspect_ratio=ar,dpi=dpi,plot_margin=0,text=element_text(size=6), legend_key_size=5 ,legend_position='bottom',legend_direction='horizontal'))
    #+ facet_wrap('~ segid',dir='v') +guides(color=guide_legend(ncol=10))
    if factor is None:
        plot=plot+geom_bar(stat=stat,width=0.5)
    else:
        plot=plot+geom_bar(stat=stat,width=0.5,mapping=aes(fill=factor),position=bar_position)
        
    if fill_params is not None:
        plot=plot+scale_fill_manual(**fill_params)
    
    if not usd:
        if (ymax is not None) :
            plot=plot+scale_y_continuous(limits=(None,ymax))
    else:
        if (ymin is not None) :
            plot=plot+scale_y_continuous(limits=(ymin,None))
    
    if ymax is None:
        ymax=data[column].max()
    if ymin is None:
        ymin=data[column].min()
#     print(ymax)
    plot = plot + geom_seq_x(seqimg=shaded.img,\
                   xlim=(1,sl+rof),ylim=(ymin,ymax),usd=usd,aspect_ratio=ar,transparent=transparent)+ggtitle(title)
    
    
    return plot
示例#52
0
def test_dodge():
    p = (ggplot(df2, aes('factor(z)')) +
         geom_bar(aes(fill='factor(x)'), position='dodge'))
    assert p + _theme == 'dodge'
示例#53
0
def plot_line(data,nuclstr,columns=['value'],ymin=None,ymax=None,dpi=300,features=None,feature_types=['all'],add_features=[],funcgroups=None,shading_modes=['charge_functional'],right_overhang_fix=None,debug=False,startnumber=1,cropseq=(0,None),aspect_ratio=None,reverse_seq=False,transparent=True,xshift=0):
    """
    A wrapper function to make a plot of data with bars along the sequnce
    input should be a dataframe with resid, segid column and 'value' 
    This one is inspired by seqplot/seqplot/pdb_plot.py
    funcgroup example fg="\\funcgroup{xxx}{CT}{White}{Green}{upper}{up} \\funcgroup{xxx}{GA}{White}{Blue}{upper}{up}"
    """
    if isinstance(columns,str):
        columns=[columns]
    segid=data['segid'].values[0]
    
    title="Segid: %s, Type: %s"%(segid,nuclstr.components[segid]['type'])

    seq=Seq(str(nuclstr.seqs[segid]['fullseq']),generic_protein \
                if nuclstr.components[segid]['entity'] is 'DNA' or 'histone' or 'protein' else generic_dna)
    msar=MultipleSeqAlignment([SeqRecord(seq=seq,id=nuclstr.components[segid]['type']+':'+segid,\
                                         name=nuclstr.components[segid]['type']+':'+segid)])
    if(reverse_seq):
        logger.info("Experimental feature will reverse the sequence")
        msar[0].seq=msar[0].seq[::-1]
        
    msar=msar[:,cropseq[0]:cropseq[1]]

    
#     print("Seq to plot:",msar)
             
    #We need to get starting residue, currently for DNA chains only cifseq gets it correctly
    resid_start=nuclstr.seqs[segid]['resid_start']
    
    logger.debug("Starting resid %d"%int(resid_start))
    

    overhang=nuclstr.seqs[segid]['overhangL']
    
    datafixed=data.copy()
    datafixed.loc[:,'resid']=datafixed.loc[:,'resid']-resid_start+overhang+1-cropseq[0]+xshift

#     print(datafixed)
    sl=len(msar[0].seq)

#     fn=shade.seqfeat2shadefeat(msar,feature_types=feature_types,force_feature_pos='bottom',debug=debug)
    if features is None:
        fn=nuclstr.shading_features[segid]
    else:
        fn=features
    fn2=[]
    for i in fn:
        if (i['style'] in feature_types) or ('all' in feature_types) :
            fn2.append(i)
            
    fn2.extend(add_features)
    shaded=ipyshade.shadedmsa4plot(msar,features=fn2,shading_modes=shading_modes,debug=debug,startnumber=startnumber,setends=[startnumber-2,sl+startnumber+2],funcgroups=funcgroups,density=200)
        
    #If sl%10=10 se will have a ruler number hanging beyond the sequence image, and we need to correct for that.
    if right_overhang_fix is None:
        if sl%10==0:
            if sl<100:
                rof= 0.1
            else:
                rof=0.5
        else:
            rof=0
    else:
        rof=right_overhang_fix
    if (not aspect_ratio is None ):
        ar=aspect_ratio
    else:
        ar=0.15*100./sl
        
    md=pd.melt(datafixed,id_vars=['segid','resid'],value_vars=columns)
#     print(md)
#     print(md)
#     print(md['variable'])
    plot=(ggplot(data=md,mapping=aes(x='resid', y='value'))
        + geom_point(aes(color='variable'),size=0.1)+geom_line(aes(color='variable'),stat='identity')
        + scale_x_continuous(limits=(0.5,sl+0.5+rof),expand=(0,0.2),name='',breaks=[])
#         + scale_y_continuous()
        + theme_light()+theme(aspect_ratio=ar,dpi=dpi,plot_margin=0)) #+ facet_wrap('~ segid',dir='v')

    if ymax is not None:
        plot=plot+scale_y_continuous(limits=(None,ymax))
    
    if ymin is None:
        ymin=md['value'].min()
    if ymax is None:
        ymax=md['value'].max()
    plot = plot + geom_seq_x(seqimg=shaded.img,\
                   xlim=(1,sl+rof),ylim=(ymin,ymax),aspect_ratio=ar,transparent=transparent)+ggtitle(title)
    

    
    return plot
示例#54
0
df_2.to_csv('/home/treelab/Documents/CUDAGP/script_GP1/graphs/mean_%s_%s.csv' %
            (df_new_2['popsize'][0], df_new_2['indsize'][0]))
df_3 = df_new_3.groupby(['nrow', 'nvar'])['timewr'].mean()
df_3.to_csv('/home/treelab/Documents/CUDAGP/script_GP1/graphs/mean_%s_%s.csv' %
            (df_new_3['popsize'][0], df_new_3['indsize'][0]))
try:
    df_4 = df_new_4.groupby(['nrow', 'nvar'])['timewr'].mean()
    df_4.to_csv(
        '/home/treelab/Documents/CUDAGP/script_GP1/graphs/mean_%s_%s.csv' %
        (df_new_4['popsize'][0], df_new_4['indsize'][0]))
except:
    print 'error'

for ielem in (df_new_1, df_new_2, df_new_3, df_new_4):
    surveys_plot = (
        p9.ggplot(data=ielem,
                  mapping=p9.aes(x='run', y='timewr', color='factor(nvar)')) +
        p9.geom_point() + p9.facet_grid("~nrow") +
        p9.scale_y_continuous(limits=(0, 500)) +
        p9.scale_x_discrete(breaks=range(0, 35, 5)) +
        p9.theme(text=p9.element_text(size=10, family="serif"),
                 plot_title=p9.element_text(weight='bold', size=14),
                 legend_title=p9.element_text(weight='bold', size=14),
                 legend_text=p9.element_text(weight='bold', size=10),
                 axis_title_y=p9.element_text(weight='bold', size=14),
                 axis_title_x=p9.element_text(weight='bold', size=14)) +
        p9.labs(y='Time (s)',
                x='Number of run',
                title='Population Size [%s]' % ielem['popsize'][0],
                color='Features'))
    #  Cambiar a la direccion donde quieres guardarlos
    surveys_plot.save("./data_%s_%s.pdf" %
示例#55
0
import pandas as pd
titanic = pd.read_csv("/home/shaury/Downloads/nptel/titanic/train.csv",
                      delimiter=",")
test = pd.read_csv("/home/shaury/Downloads/nptel/titanic/test.csv")

from plotnine import ggplot, aes, geom_bar
t = titanic[titanic["Survived"] == 1]
ggplot(t, aes(t["Pclass"], fill=t["Sex"])) + geom_bar()

t = titanic[titanic["Survived"] == 0]
ggplot(t, aes(t["Pclass"], fill=t["Sex"])) + geom_bar()

t = titanic[titanic["Survived"] == 1]
ggplot(t, aes(t["Pclass"], fill=t["Survived"])) + geom_bar()

t = titanic[titanic["Survived"] == 0]
ggplot(t, aes(t["Pclass"], fill=t["Survived"])) + geom_bar()
o = titanic["Sex"]
p = test["Sex"]
l = []
for i in range(0, len(titanic)):
    if (o[i] == "female"):
        l.append(1)
    else:
        l.append(0)
l = pd.DataFrame(l)
l = pd.concat([l, titanic["Pclass"]], join='outer', axis=1)
l

l1 = []
for i in range(0, len(test)):
示例#56
0
def htcalc(air_velocity_inside, air_velocity_outside, t_inside, t_outside,
           surface, layers, wall_thickness, thermal_conductivity):
    # We need the convective heat resistance on both sides of the wall
    res_conv_inside = heattransfer.convective_resistance(
        heattransfer.heat_transfer_coef(air_velocity_inside), surface)
    res_conv_outside = heattransfer.convective_resistance(
        heattransfer.heat_transfer_coef(air_velocity_outside), surface)

    # We need the total resistance over all wall layers
    total_layer_resistance = []
    total_layer_resistance.append(res_conv_inside)
    for i in range(layers):
        total_layer_resistance.append(
            heattransfer.conductive_resistance(wall_thickness[i],
                                               thermal_conductivity[i],
                                               surface))

    total_layer_resistance.append(res_conv_outside)

    total_resistance = sum(total_layer_resistance)

    heat_transfer = heattransfer.conduction(t_inside, t_outside,
                                            total_resistance)

    # Calculating the temperatures between each layer
    temperatures = []
    temperatures.append(t_inside)
    layer_resistance = 0
    for resistance in total_layer_resistance:
        layer_resistance += resistance
        temperatures.append(
            heattransfer.layer_temperature(heat_transfer, layer_resistance,
                                           t_inside))

    # Preparing the x axis, position of the temperature and transition labels for the graph
    position = [0, 0.02]
    labels = ['fluid inside', 'inner surface']

    i = 0
    for entry in wall_thickness:
        position.append(position[-1] + entry)
        i += 1
        labels.append("layer" + str(i))

    labels[-1] = "outer surface"
    position.append(position[-1] + 0.02)
    labels.append("fluid outside")

    # print(f"\nThe total resistance is {round(total_resistance, 2)} K/W")
    # print(f"Total heat transfer from inside to outside is {round(heat_transfer, 2)} W\n")

    df = pd.DataFrame({'pos': position, 'temp': temperatures})

    gg = p9.ggplot(df, p9.aes(x='pos', y='temp'))
    gg += p9.geom_line(p9.aes(color='temp'), size=2)

    for ws in df.pos.values.tolist():
        gg += p9.geom_vline(xintercept=ws, color='grey')

    # gg += p9.geom_hline(yintercept=110, color='red', size=2, alpha=0.8)
    gg += p9.ggtitle('heat transfer through wall')
    gg += p9.scale_x_continuous(name='Position',
                                breaks=df.pos.values.tolist(),
                                labels=labels)
    gg += p9.scale_y_continuous(name='Temperature')
    gg += p9.theme(axis_text_x=p9.element_text(angle=45))
    gg += p9.scale_colour_gradient(low="yellow", high="orange")

    i = 0
    for temp in temperatures:
        gg += p9.geom_text(
            p9.aes(x=position[i], y=temp + 30, label=round(temp, 2)))
        i += 1

    for i in range(layers):
        labtext = 'Thermal cond.: ' + str(
            thermal_conductivity[i]) + ' [W/m°K]\nLayer thickness: ' + str(
                round(wall_thickness[i], 3)) + ' [m]'
        gg += p9.annotate(geom='text',
                          x=((position[i + 2] - position[i + 1]) / 2) +
                          position[i + 1],
                          y=temperatures[i] + 30,
                          label=labtext,
                          color='blue')

    return gg
示例#57
0
def clone_rarefaction(self,
                      groupby,
                      clone_key=None,
                      palette=None,
                      figsize=(6, 4),
                      save=None):
    """
    Plots rarefaction curve for cell numbers vs clone size.

    Parameters
    ----------
    self : AnnData
        `AnnData` object.
    groupby : str
        Column name to split the calculation of clone numbers for a given number of cells for e.g. sample, patient etc.
    clone_key : str, optional
        Column name specifying the clone_id column in metadata/obs.
    palette : sequence, optional
        Color mapping for unique elements in groupby. Will try to retrieve from AnnData `.uns` slot if present.
    figsize :  tuple[float, float]
        Size of plot.
    save : str, optional
        Save path.
    
    Returns
    -------
    rarefaction curve plot.
    """

    if self.__class__ == AnnData:
        metadata = self.obs.copy()
    if clone_key is None:
        clonekey = 'clone_id'
    else:
        clonekey = clone_key

    groups = list(set(metadata[groupby]))
    metadata = metadata[metadata['bcr_QC_pass'].isin([True, 'True'])]
    metadata[clonekey] = metadata[clonekey].cat.remove_unused_categories()
    res = {}
    for g in groups:
        _metadata = metadata[metadata[groupby] == g]
        res[g] = _metadata[clonekey].value_counts()
    res_ = pd.DataFrame.from_dict(res, orient='index')

    # remove those with no counts
    rowsum = res_.sum(axis=1)
    print(
        'removing due to zero counts:', ', '.join(
            [res_.index[i] for i, x in enumerate(res_.sum(axis=1) == 0) if x]))
    sleep(0.5)
    res_ = res_[~(res_.sum(axis=1) == 0)]

    # set up for calculating rarefaction
    tot = res_.apply(sum, axis=1)
    S = res_.apply(lambda x: x[x > 0].shape[0], axis=1)
    nr = res_.shape[0]

    # append the results to a dictionary
    rarecurve = {}
    for i in tqdm(range(0, nr), desc='Calculating rarefaction curve '):
        n = np.arange(1, tot[i], step=10)
        if (n[-1:] != tot[i]):
            n = np.append(n, tot[i])
        rarecurve[res_.index[i]] = [
            rarefun(np.array(res_.iloc[i, ]), z) for z in n
        ]
    y = pd.DataFrame([rarecurve[c] for c in rarecurve]).T
    pred = pd.DataFrame(
        [np.append(np.arange(1, s, 10), s) for s in res_.sum(axis=1)],
        index=res_.index).T

    y = y.melt()
    pred = pred.melt()
    pred['yhat'] = y['value']

    options.figure_size = figsize
    if palette is None:
        if self.__class__ == AnnData:
            try:
                pal = self.uns[str(groupby) + '_colors']
            except:
                if len(list(set((pred.variable)))) <= 20:
                    pal = palettes.default_20
                elif len(list(set((pred.variable)))) <= 28:
                    pal = palettes.default_28
                elif len(list(set((pred.variable)))) <= 102:
                    pal = palettes.default_102
                else:
                    pal = None

            if pal is not None:
                p = (ggplot(pred, aes(x="value", y="yhat", color="variable")) +
                     theme_classic() + xlab('number of cells') +
                     ylab('number of clones') + ggtitle('rarefaction curve') +
                     labs(color=groupby) + scale_color_manual(values=(pal)) +
                     geom_line())
            else:
                p = (ggplot(pred, aes(x="value", y="yhat", color="variable")) +
                     theme_classic() + xlab('number of cells') +
                     ylab('number of clones') + ggtitle('rarefaction curve') +
                     labs(color=groupby) + geom_line())
        else:
            if len(list(set((pred.variable)))) <= 20:
                pal = palettes.default_20
            elif len(list(set((pred.variable)))) <= 28:
                pal = palettes.default_28
            elif len(list(set((pred.variable)))) <= 102:
                pal = palettes.default_102
            else:
                pal = None

            if pal is not None:
                p = (ggplot(pred, aes(x="value", y="yhat", color="variable")) +
                     theme_classic() + xlab('number of cells') +
                     ylab('number of clones') + ggtitle('rarefaction curve') +
                     labs(color=groupby) + scale_color_manual(values=(pal)) +
                     geom_line())
            else:
                p = (ggplot(pred, aes(x="value", y="yhat", color="variable")) +
                     theme_classic() + xlab('number of cells') +
                     ylab('number of clones') + ggtitle('rarefaction curve') +
                     labs(color=groupby) + geom_line())
    else:
        p = (ggplot(pred, aes(x="value", y="yhat", color="variable")) +
             theme_classic() + xlab('number of cells') +
             ylab('number of clones') + ggtitle('rarefaction curve') +
             labs(color=groupby) + geom_line())
    if save:
        p.save(filename='figures/rarefaction' + str(save),
               height=plt.rcParams['figure.figsize'][0],
               width=plt.rcParams['figure.figsize'][1],
               units='in',
               dpi=plt.rcParams["savefig.dpi"])

    return (p)
示例#58
0
def test_calculated_expressions():
    p = (ggplot(mtcars, aes(x='factor(cyl)', y='..count..+1')) + geom_bar())
    # No exception
    p._build()
示例#59
0
def test_dir_v_ncol():
    p = (ggplot(mpg) + aes(x='displ', y='hwy') +
         facet_wrap('class', dir='v', ncol=4, as_table=False) + geom_point())
    assert p == 'dir_v_ncol'
示例#60
0
def PlotPG(X,
           TargetPG,
           BootPG=None,
           PGCol="",
           PlotProjections="none",
           GroupsLab=None,
           PointViz="points",
           Main='',
           p_alpha=.3,
           PointSize=None,
           NodeLabels=None,
           LabMult=1,
           Do_PCA=True,
           DimToPlot=[0, 1],
           VizMode=("Target", "Boot")):
    '''
    work in progress, only basic plotting supported
    #' Plot data and principal graph(s) 
    #'
    #' @param X numerical 2D matrix, the n-by-m matrix with the position of n m-dimensional points
    #' @param TargetPG the main principal graph to plot
    #' @param BootPG A list of principal graphs that will be considered as bostrapped curves
    #' @param PGCol string, the label to be used for the main principal graph
    #' @param PlotProjections string, the plotting mode for the node projection on the principal graph.
    #' It can be "none" (no projections will be plotted), "onNodes" (the projections will indicate how points are associated to nodes),
    #' and "onEdges" (the projections will indicate how points are projected on edges or nodes of the graph)
    #' @param GroupsLab factor or numeric vector. A vector indicating either a category or a numeric value associted with
    #' each data point
    #' @param PointViz string, the modality to show points. It can be 'points' (data will be represented a dot) or
    #' 'density' (the data will be represented by a field)
    #' @param Main string, the title of the plot
    #' @param p.alpha numeric between 0 and 1, the alpha value of the points. Lower values will prodeuce more transparet points
    #' @param PointSize numeric vector, a vector indicating the size to be associted with each node of the graph.
    #' If NA points will have size 0.
    #' @param NodeLabels string vector, a vector indicating the label to be associted with each node of the graph
    #' @param LabMult numeric, a multiplier controlling the size of node labels
    #' @param Do_PCA bolean, should the node of the principal graph be used to derive principal component projections and
    #' rotate the space? If TRUE the plots will use the "EpG PC" as dimensions, if FALSE, the original dimensions will be used. 
    #' @param DimToPlot a integer vector specifing the PCs (if Do_PCA=TRUE) or dimension (if Do_PCA=FALSE) to plot. All the
    #' combination will be considered, so, for example, if DimToPlot = 1:3, three plot will be produced.
    #' @param VizMode vector of string, describing the ElPiGraphs to visualize. Any combination of "Target" and "Boot".
    #'
    #' @return
    #' @export
    #'
    #' @examples'''

    if len(PGCol) == 1:
        PGCol = [PGCol] * len(TargetPG['NodePositions'])

    if GroupsLab is None:
        GroupsLab = ["N/A"] * len(X)

    #    levels(GroupsLab) = c(levels(GroupsLab), unique(PGCol))

    if PointSize is not None:
        if (len(PointSize) == 1):
            PointSize = [PointSize] * len(TargetPG['NodePositions'])

    if (Do_PCA):
        # Perform PCA on the nodes
        mv = TargetPG['NodePositions'].mean(axis=0)
        data_centered = TargetPG['NodePositions'] - mv
        vglobal, NodesPCA, explainedVariances = PCA(data_centered)
        # Rotate the data using eigenvectors
        BaseData = np.dot((X - mv), vglobal)
        DataVarPerc = np.var(BaseData, axis=0) / np.sum(np.var(X, axis=0))

    else:
        NodesPCA = TargetPG['NodePositions']
        BaseData = X
        DataVarPerc = np.var(X, axis=0) / np.sum(np.var(X, axis=0))

    # Base Data

    AllComb = list(combinations(DimToPlot, 2))

    PlotList = list()

    for i in range(len(AllComb)):

        Idx1 = AllComb[i][0]
        Idx2 = AllComb[i][1]

        df1 = pd.DataFrame.from_dict(
            dict(PCA=BaseData[:, Idx1], PCB=BaseData[:, Idx2],
                 Group=GroupsLab))
        # Initialize plot

        Initialized = False

        if (PointViz == "points"):
            p = (plotnine.ggplot(data=df1,
                                 mapping=plotnine.aes(x='PCA', y='PCB')) +
                 plotnine.geom_point(alpha=p_alpha,
                                     mapping=plotnine.aes(color='Group')))
            Initialized = True

        if (PointViz == "density"):
            p = (plotnine.ggplot(data=df1,
                                 mapping=plotnine.aes(x='PCA', y='PCB')) +
                 plotnine.stat_density_2d(
                     contour=True,
                     alpha=.5,
                     geom='polygon',
                     mapping=plotnine.aes(fill='..level..')))
            Initialized = True

    #             p = sns.kdeplot(df1['PCA'], df1['PCB'], cmap="Reds", shade=True, bw=.15)

        if (not Initialized):
            raise ValueError("Invalid point representation selected")

        # Target graph

        tEdg = dict(x=[], y=[], xend=[], yend=[], Col=[])
        for i in range(len(TargetPG['Edges'][0])):
            Node_1 = TargetPG['Edges'][0][i][0]
            Node_2 = TargetPG['Edges'][0][i][1]
            if PGCol:
                if (PGCol[Node_1] == PGCol[Node_2]):
                    tCol = "ElPiG" + str(PGCol[Node_1])

                if (PGCol[Node_1] != PGCol[Node_2]):
                    tCol = "ElPiG Multi"

                if (any(PGCol[(Node_1, Node_2)] == "None")):
                    tCol = "ElPiG None"

            tEdg['x'].append(NodesPCA[Node_1, Idx1])
            tEdg['y'].append(NodesPCA[Node_1, Idx2])
            tEdg['xend'].append(NodesPCA[Node_2, Idx1])
            tEdg['yend'].append(NodesPCA[Node_2, Idx2])
            if PGCol:
                tEdg['Col'].append(tCol)
            else:
                tEdg['Col'].append(1)
        if (Do_PCA):
            TarPGVarPerc = explainedVariances.sum() / explainedVariances.sum(
            ) * 100
        else:
            TarPGVarPerc = np.var(TargetPG['NodePositions'], axis=0) / np.sum(
                np.var(TargetPG['NodePositions'], axis=0))

        df2 = pd.DataFrame.from_dict(tEdg)

        # Replicas

        #         if(BootPG is not None) and ("Boot" is in VizMode):
        #             AllEdg = lapply(1:length(BootPG), function(i){
        #             tTree = BootPG[[i]]

        #             if(Do_PCA):
        #                 RotData = t(t(tTree$NodePositions) - NodesPCA$center) %*% NodesPCA$rotation
        #             else: {
        #                 RotData = tTree$NodePositions
        #             }

        #             tEdg = t(sapply(1:nrow(tTree$Edges$Edges), function(i){
        #               c(RotData[tTree$Edges$Edges[i, 1],c(Idx1, Idx2)], RotData[tTree$Edges$Edges[i, 2],c(Idx1, Idx2)])
        #             }))

        #             cbind(tEdg, i)
        #             })

        #             AllEdg = do.call(rbind, AllEdg)

        #             df3 = data.frame(x = AllEdg[,1], y = AllEdg[,2], xend = AllEdg[,3], yend = AllEdg[,4], Rep = AllEdg[,5])

        #             p = p + plotnine.geom_segment(data = df3, mapping = plotnine.aes(x=x, y=y, xend=xend, yend=yend),
        #                                          inherit.aes = False, alpha = .2, color = "black")

        # Plot projections

        if (PlotProjections == "onEdges"):

            if (Do_PCA):
                Partition = PartitionData(X=BaseData,
                                          NodePositions=NodesPCA,
                                          MaxBlockSize=100000000,
                                          SquaredX=np.sum(BaseData**2,
                                                          axis=1,
                                                          keepdims=1),
                                          TrimmingRadius=float('inf'))[0]
                OnEdgProj = project_point_onto_graph(X=BaseData,
                                                     NodePositions=NodesPCA,
                                                     Edges=TargetPG['Edges'],
                                                     Partition=Partition)
            else:
                Partition = PartitionData(
                    X=BaseData,
                    NodePositions=TargetPG['NodePositions'],
                    MaxBlockSize=100000000,
                    SquaredX=np.sum(BaseData**2, axis=1, keepdims=1),
                    TrimmingRadius=float('inf'))[0]
                OnEdgProj = project_point_onto_graph(
                    X=BaseData,
                    NodePositions=TargetPG['NodePositions'],
                    Edges=TargetPG['Edges'],
                    Partition=Partition)

            ProjDF = pd.DataFrame.from_dict(
                dict(X=BaseData[:, Idx1],
                     Y=BaseData[:, Idx2],
                     Xend=OnEdgProj['X_projected'][:, Idx1],
                     Yend=OnEdgProj['X_projected'][:, Idx2],
                     Group=GroupsLab))

            p = p + plotnine.geom_segment(
                data=ProjDF,
                mapping=plotnine.aes(
                    x='X', y='Y', xend='Xend', yend='Yend', col='Group'),
                inherit_aes=False)

        elif (PlotProjections == "onNodes"):

            if (Do_PCA):
                Partition = PartitionData(X=BaseData,
                                          NodePositions=NodesPCA,
                                          MaxBlockSize=100000000,
                                          SquaredX=np.sum(BaseData**2,
                                                          axis=1,
                                                          keepdims=1),
                                          TrimmingRadius=float('inf'))[0]
                ProjDF = pd.DataFrame.from_dict(
                    dict(X=BaseData[:, Idx1],
                         Y=BaseData[:, Idx2],
                         Xend=NodesPCA[Partition, Idx1],
                         Yend=NodesPCA[Partition, Idx2],
                         Group=GroupsLab))
            else:
                Partition = PartitionData(
                    X=BaseData,
                    NodePositions=TargetPG['NodePositions'],
                    MaxBlockSize=100000000,
                    SquaredX=np.sum(BaseData**2, axis=1, keepdims=1),
                    TrimmingRadius=float('inf'))[0]
                ProjDF = pd.DataFrame.from_dict(
                    dict(X=BaseData[:, Idx1],
                         Y=BaseData[:, Idx2],
                         Xend=TargetPG['NodePositions'][Partition, Idx1],
                         Yend=TargetPG['NodePositions'][Partition, Idx2],
                         Group=GroupsLab))

            p = p + plotnine.geom_segment(
                data=ProjDF,
                mapping=plotnine.aes(
                    x='X', y='Y', xend='Xend', yend='Yend', col='Group'),
                inherit_aes=False,
                alpha=.3)

        if ("Target" in VizMode):
            if GroupsLab is not None:
                p = p + plotnine.geom_segment(
                    data=df2,
                    mapping=plotnine.aes(
                        x='x', y='y', xend='xend', yend='yend', col='Col'),
                    inherit_aes=True) + plotnine.labs(linetype="")
            else:
                p = p + plotnine.geom_segment(
                    data=df2,
                    mapping=plotnine.aes(
                        x='x', y='y', xend='xend', yend='yend'),
                    inherit_aes=False)

        if (Do_PCA):
            df4 = pd.DataFrame.from_dict(
                dict(PCA=NodesPCA[:, Idx1], PCB=NodesPCA[:, Idx2]))
        else:
            df4 = pd.DataFrame.from_dict(
                dict(PCA=TargetPG['NodePositions'][:, Idx1],
                     PCB=TargetPG['NodePositions'][:, Idx2]))

        if ("Target" in VizMode):
            if (PointSize is not None):

                p = p + plotnine.geom_point(mapping=plotnine.aes(
                    x='PCA', y='PCB', size=PointSize),
                                            data=df4,
                                            inherit_aes=False)

            else:
                p = p + plotnine.geom_point(mapping=plotnine.aes(x='PCA',
                                                                 y='PCB'),
                                            data=df4,
                                            inherit_aes=False)

    #         if(NodeLabels):

    #             if(Do_PCA){
    #                 df4 = data.frame(PCA = NodesPCA$x[,Idx1], PCB = NodesPCA$x[,Idx2], Lab = NodeLabels)
    #             else {
    #                 df4 = data.frame(PCA = TargetPG$NodePositions[,Idx1], PCB = TargetPG$NodePositions[,Idx2], Lab = NodeLabels)
    #           }

    #           p = p + plotnine.geom_text(mapping = plotnine.aes(x = PCA, y = PCB, label = Lab),
    #                                       data = df4, hjust = 0,
    #                                       inherit.aes = False, na.rm = True,
    #                                       check_overlap = True, color = "black", size = LabMult)

    #         }

    #         if(Do_PCA){
    #             LabX = "EpG PC", Idx1, " (Data var = ",  np.round(100*DataVarPerc[Idx1], 3), "% / PG var = ", signif(100*TarPGVarPerc[Idx1], 3), "%)"
    #             LabY = "EpG PC", Idx2, " (Data var = ",  np.round(100*DataVarPerc[Idx2], 3), "% / PG var = ", signif(100*TarPGVarPerc[Idx2], 3), "%)"
    #         else {
    #             LabX = paste0("Dimension ", Idx1, " (Data var = ",  np.round(100*DataVarPerc[Idx1], 3), "% / PG var = ", np.round(100*TarPGVarPerc[Idx1], 3), "%)")
    #             LabY = paste0("Dimension ", Idx2, " (Data var = ",  np.round(100*DataVarPerc[Idx2], 3), "% / PG var = ", np.round(100*TarPGVarPerc[Idx2], 3), "%)")
    #         }

    #         if(!is.na(TargetPG$FinalReport$FVEP)){
    #             p = p + plotnine.labs(x = LabX,
    #                                  y = LabY,
    #                                  title = paste0(Main,
    #                                                 "/ FVE=",
    #                                                 signif(as.numeric(TargetPG$FinalReport$FVE), 3),
    #                                                 "/ FVEP=",
    #                                                 signif(as.numeric(TargetPG$FinalReport$FVEP), 3))
    #           ) +
    #             plotnine.theme(plot.title = plotnine.element_text(hjust = 0.5))
    #         else {
    #           p = p + plotnine.labs(x = LabX,
    #                                  y = LabY,
    #                                  title = paste0(Main,
    #                                                 "/ FVE=",
    #                                                 signif(as.numeric(TargetPG$FinalReport$FVE), 3))
    #           ) +
    #             plotnine.theme(plot.title = plotnine.element_text(hjust = 0.5))
    #         }

        PlotList.append(p)

    return (PlotList)