def test_add_element_blank(): # Adding onto a blanked themeable theme1 = theme_gray() + theme(axis_line_x=l1) # not blank theme2 = theme1 + theme(axis_line_x=blank) # blank theme3 = theme2 + theme(axis_line_x=l3) # not blank theme4 = theme_gray() + theme(axis_line_x=l3) # for comparison assert theme3 != theme1 assert theme3 != theme2 assert theme3 == theme4 # blanking cleans the slate # When a themeable is blanked, the apply method # is replaced with the blank method. th2 = theme2.themeables['axis_line_x'] th3 = theme3.themeables['axis_line_x'] assert th2.apply.__name__ == 'blank' assert th3.apply.__name__ == 'apply'
def test_inplace_add(): p = _p = ggplot(df) p += aes('x', 'y') assert p is _p p += geom_point() assert p is _p p += stat_identity() assert p is _p p += scale_x_continuous() assert p is _p with pytest.warns(PlotnineWarning): # Warning for; replacing existing scale added above p += xlim(0, 10) assert p is _p p += lims(y=(0, 10)) assert p is _p p += labs(x='x') assert p is _p p += coord_trans() assert p is _p p += facet_null() assert p is _p p += annotate('point', 5, 5, color='red', size=5) assert p is _p p += guides() assert p is _p p += theme_gray() assert p is _p th = _th = theme_gray() th += theme(aspect_ratio=1) assert th is _th
def test_add_element_heirarchy(): # parent themeable modifies child themeable theme1 = theme_gray() + theme(axis_line_x=l1) # child theme2 = theme1 + theme(axis_line=l2) # parent theme3 = theme1 + theme(axis_line_x=l3) # child, for comparison assert theme2.themeables['axis_line_x'] == \ theme3.themeables['axis_line_x'] theme1 = theme_gray() + theme(axis_line_x=l1) # child theme2 = theme1 + theme(line=l2) # grand-parent theme3 = theme1 + theme(axis_line_x=l3) # child, for comparison assert theme2.themeables['axis_line_x'] == \ theme3.themeables['axis_line_x'] # child themeable does not affect parent theme1 = theme_gray() + theme(axis_line=l1) # parent theme2 = theme1 + theme(axis_line_x=l2) # child theme3 = theme1 + theme(axis_line=l3) # parent, for comparison assert theme3.themeables['axis_line'] != \ theme2.themeables['axis_line']
def test_add_complete_partial(): theme1 = theme_gray() theme2 = theme1 + theme(axis_line_x=element_line()) assert theme2 != theme1 assert theme2.themeables != theme1.themeables assert theme2.rcParams == theme1.rcParams # specific difference for name in theme2.themeables: if name == 'axis_line_x': assert theme2.themeables[name] != theme1.themeables[name] else: assert theme2.themeables[name] == theme1.themeables[name]
def plot_violinbox_plots_per_category( dataframe: pandas.DataFrame, plot_type: str, target_feature: str, label_column: str, colors: List[str], coloring_style: str, value_skip_list: List = [], jitter_alpha: float = 0.7, plot_alpha: float = 0.5, log_10_scale: bool = False, theme: str = 'gray', save_to_file: str = None, dpi: int = 150, show: bool = True ) -> p9.ggplot: """ The :func:`plot_violinbox_plots_per_category` helps with providing the user with nicely plotted violin and box plots of the distribution of data points. Parameters ---------- dataframe: `pandas.DataFrame`, required This is the main parameter that this method is supposed to work with, which is a dataframe that has a label column in which we have integer values starting from 0, and a float feature column the distribution of which we tend to monitor. plot_type: `str`, required This value, either `box` or `violin`, determines the type of plot. target_feature: `str`, required This parameter is the column name of the features that we want to monitor. label_column: `str`, required The input dataframe must have a label_column (preferably integer starting from 0), the name of that column should be input here. colors: `List[str]`, required Depending on whether or not our `coloring_style` is manual or automatic, this can either be a list of colors or a list of two colors indicating a range of color values. coloring_style: `str`, optional (default='manual') Either `manual` or `gradient` which helps assigning colors to clusters. value_skip_list: `List`, optional (default=[]) If some values in the feature column are to be skipped, they should be put in here so that they are ignored in the plots. For example, if for some reason some values are -10000000, they can be taken care of in here. jitter_alpha: `float`, optional (default=0.7) The jitter value transparency is set in this parameter. plot_alpha: `float`, optional (default=0.5) The transparency intensity can be determined by setting this parameter. log_10_scale: `bool`, optional (default=False) If the user wants to take the logarithm in the basis of 10, this parameter should be set to 1. theme: `str`, optional (default='gray') This is the `theme` types, the acceped values are: ``['gray', 'dark', 'seaborn', 'light']``, the values are consistent with `plotnine` package's format. save_to_file: `str`, optional (default=None) If the user intends to save the plot in a file, this parameter should have a value. The value must be a filepath. dpi: `int`, optional (default=150) The dpi for saving the plots indicating the image quality. show: `bool`, optional (default=True) Whether or not the plot is to be shown is set in this parameter. Returns ---------- The output of this method is of `p9.ggplot` type. """ if len(value_skip_list) > 0: df = dataframe[~dataframe[target_feature].isin(value_skip_list)] if coloring_style == 'gradient': assert len(colors) == 2, "you have chosen gradient style coloring, for colors you have to provide a list with the \ First element being the color for low and the second the color for high." pplot = p9.ggplot(data=dataframe, mapping=p9.aes(x='factor(' + label_column + ')', y=target_feature, color=label_column)) pplot += p9.scale_color_gradient(low=colors[0], high=colors[1]) elif coloring_style == 'manual': assert len(colors) == len(df[label_column].unique()), "You have chosen per category manual coloring, therefore you have to provide the same number of colors" pplot = p9.ggplot(data=dataframe, mapping=p9.aes(x='factor(' + label_column + ')', y=target_feature, color='factor(' + label_column + ')')) pplot += p9.scale_alpha_manual(colors) pplot += p9.geom_jitter(alpha=jitter_alpha) if plot_type == 'box': pplot += p9.geom_boxplot(alpha=plot_alpha) elif plot_type == 'violin': pplot += p9.geom_violin(alpha=plot_alpha) else: raise Exception('unknown plot type, it must be violin or box.') if theme == 'gray': pplot += p9.theme_gray() elif theme == 'dark': pplot += p9.theme_dark() elif theme == 'seaborn': pplot += p9.theme_seaborn() elif theme == 'light': pplot += p9.theme_light() else: raise Exception('Theme type not supported, please add.') if log_10_scale: pplot += p9.scale_x_log10() if save_to_file is not None: save_directory, filename = separate_path_and_file(filepath=save_to_file) pplot.save(filename=filename, path=save_directory, dpi=dpi) if show: pplot.draw() return pplot
def test_add_empty_theme_element(): # An empty theme element does not alter the theme theme1 = theme_gray() + theme(axis_line_x=element_line(color='red')) theme2 = theme1 + theme(axis_line_x=element_line()) assert theme1 == theme2
def test_add_partial_complete(): theme1 = theme(axis_line_x=element_line()) theme2 = theme_gray() theme3 = theme1 + theme2 assert theme3 == theme2
def test_add_complete_complete(): theme1 = theme_gray() theme2 = theme_matplotlib() theme3 = theme1 + theme2 assert theme3 == theme2
def test_theme_gray(self): p = self.g + labs(title='Theme Gray') + theme_gray() assert p + _theme == 'theme_gray'
def plot_2d_distribution_per_category( dataframe: pandas.DataFrame, label_column: str, coordinates: Tuple[str], colors: List[str], coloring_style: str = 'manual', log_10_scale: bool = False, theme: str = 'gray', alpha: float = 0.5, save_to_file: str = None, dpi: int = 150 ) -> p9.ggplot: """ The :func:`plot_2d_distribution_per_category` helps with providing the user with a 2-dimensional plot of the whole distribution. Parameters ---------- dataframe: `pandas.DataFrame`, required This is the main parameter that this method is supposed to work with, which is a dataframe with a label column (which is to help us determine the column) and coordinates for x and y axes. label_column: `str`, required The input dataframe must have a label_column (preferably integer starting from 0), the name of that column should be input here. coordinates: `Tuple[str]`, required This is a tuple of column names, the first one being the column in which the `x` values for our 2d plot are stored, and the other one corresponds to the `y` axis. colors: `List[str]`, required Depending on whether or not our `coloring_style` is manual or automatic, this can either be a list of colors or a list of two colors indicating a range of color values. coloring_style: `str`, optional (default='manual') Either `manual` or `gradient` which helps assigning colors to clusters. log_10_scale: `bool`, optional (default=False) If the user wants to take the logarithm in the basis of 10, this parameter should be set to 1. theme: `str`, optional (default='gray') This is the `theme` types, the acceped values are: ``['gray', 'dark', 'seaborn', 'light']``, the values are consistent with `plotnine` package's format. alpha: `float`, optional (default=0.5) The transparency intensity can be determined by setting this parameter. save_to_file: `str`, optional (default=None) If the user intends to save the plot in a file, this parameter should have a value. The value must be a filepath. dpi: `int`, optional (default=150) The dpi for saving the plots indicating the image quality. Returns ---------- The output of this method is of `p9.ggplot` type. """ assert coloring_style in ['manual', 'gradient'], "invalid coloring style" if coloring_style == 'gradient': assert len(colors) == 2, "you have chosen gradient style coloring, for colors you have to provide a list with the \ First element being the color for low and the second the color for high." pplot = p9.ggplot(data=dataframe, mapping=p9.aes(x=coordinates[0], y=coordinates[1], color=label_column)) pplot += p9.scale_color_gradient(low=colors[0], high=colors[1]) elif coloring_style == 'manual': assert len(colors) == len(dataframe[label_column].unique()), "You have chosen per category manual coloring, therefore you have to provide the same number of colors" pplot = p9.ggplot(data=dataframe, mapping=p9.aes(x=coordinates[0], y=coordinates[1], color='factor(' + label_column + ')')) pplot += p9.scale_alpha_manual(colors) pplot += p9.geom_point(alpha=alpha) pplot += p9.xlab(coordinates[0]) + p9.ylab(coordinates[1]) if log_10_scale: pplot += p9.scale_x_log10() if theme == 'gray': pplot += p9.theme_gray() elif theme == 'dark': pplot += p9.theme_dark() elif theme == 'seaborn': pplot += p9.theme_seaborn() elif theme == 'light': pplot += p9.theme_light() else: raise Exception('Theme type not supported, please add.') pplot += p9.theme(text=p9.element_text(size=8)) if save_to_file is not None: save_directory, filename = separate_path_and_file(filepath=save_to_file) pplot.save(filename=filename, path=save_directory, dpi=dpi) else: pplot.draw() return pplot