def build_training_string(model_string, epoch_start, number_of_epochs, data_string): if epoch_start == 0: training_string = "Training {} for {} epochs on {}.".format( model_string, number_of_epochs, data_string) elif epoch_start < number_of_epochs: training_string = ( "Continue training {} for {} additionally epochs (up to {} epochs)" " on {}.".format(model_string, number_of_epochs - epoch_start, number_of_epochs, data_string)) elif epoch_start == number_of_epochs: training_string = ( "{} has already been trained for {} epochs on {}.".format( capitalise_string(model_string), number_of_epochs, data_string)) elif epoch_start > number_of_epochs: training_string = ( "{} has already been trained for more than {} epochs on {}. " "Loading model trained for {} epochs.".format( capitalise_string(model_string), number_of_epochs, data_string, epoch_start)) else: raise ValueError("Cannot train a negative amount.") return training_string
def plot_series(series, x_label, y_label, sort=False, scale="linear", bar=False, colour=None, name=None): figure_name = saving.build_figure_name("series", name) if not colour: colour = style.STANDARD_PALETTE[0] series_length = series.shape[0] x = numpy.linspace(0, series_length, series_length) y_log = scale == "log" if sort: # Sort descending series = numpy.sort(series)[::-1] x_label = "sorted " + x_label figure_name += "-sorted" figure = pyplot.figure() axis = figure.add_subplot(1, 1, 1) seaborn.despine() if bar: axis.bar(x, series, log=y_log, color=colour, alpha=0.4) else: axis.plot(x, series, color=colour) axis.set_yscale(scale) axis.set_xlabel(capitalise_string(x_label)) axis.set_ylabel(capitalise_string(y_label)) return figure, figure_name
def plot_accuracy_evolution(accuracies, name=None): figure_name = saving.build_figure_name("accuracies", name) figure = pyplot.figure() axis = figure.add_subplot(1, 1, 1) seaborn.despine() for accuracies_kind, accuracies in sorted(accuracies.items()): if accuracies is None: continue elif accuracies_kind == "training": line_style = "solid" colour = style.STANDARD_PALETTE[0] elif accuracies_kind == "validation": line_style = "dashed" colour = style.STANDARD_PALETTE[1] label = "{} set".format(capitalise_string(accuracies_kind)) epochs = numpy.arange(len(accuracies)) + 1 axis.plot(epochs, 100 * accuracies, color=colour, linestyle=line_style, label=label) handles, labels = axis.get_legend_handles_labels() labels, handles = zip(*sorted(zip(labels, handles), key=lambda t: t[0])) axis.legend(handles, labels, loc="best") axis.set_xlabel("Epoch") axis.set_ylabel("Accuracies") return figure, figure_name
def plot_variable_label_correlations(variable_vector, variable_name, colouring_data_set, name="variable_label_correlations"): figure_name = saving.build_figure_name(name) n_examples = variable_vector.shape[0] class_names_to_class_ids = numpy.vectorize( lambda class_name: colouring_data_set.class_name_to_class_id[class_name ]) class_ids_to_class_names = numpy.vectorize( lambda class_name: colouring_data_set.class_id_to_class_name[class_name ]) labels = colouring_data_set.labels class_names = colouring_data_set.class_names number_of_classes = colouring_data_set.number_of_classes class_palette = colouring_data_set.class_palette label_sorter = colouring_data_set.label_sorter if not class_palette: index_palette = style.lighter_palette(number_of_classes) class_palette = { class_name: index_palette[i] for i, class_name in enumerate( sorted(class_names, key=label_sorter)) } random_state = numpy.random.RandomState(117) shuffled_indices = random_state.permutation(n_examples) variable_vector = variable_vector[shuffled_indices] labels = labels[shuffled_indices] label_ids = numpy.expand_dims(class_names_to_class_ids(labels), axis=-1) colours = [class_palette[label] for label in labels] unique_class_ids = numpy.unique(label_ids) unique_class_names = class_ids_to_class_names(unique_class_ids) figure = pyplot.figure() axis = figure.add_subplot(1, 1, 1) seaborn.despine() axis.scatter(variable_vector, label_ids, c=colours, s=1) axis.set_yticks(unique_class_ids) axis.set_yticklabels(unique_class_names) axis.set_xlabel(variable_name) axis.set_ylabel(capitalise_string(colouring_data_set.terms["class"])) return figure, figure_name
def validate_model_parameters(reconstruction_distribution=None, number_of_reconstruction_classes=None, model_type=None, latent_distribution=None, parameterise_latent_posterior=None): # Validate piecewise categorical likelihood if reconstruction_distribution and number_of_reconstruction_classes: if number_of_reconstruction_classes > 0: piecewise_categorical_likelihood_errors = [] if reconstruction_distribution == "bernoulli": piecewise_categorical_likelihood_errors.append( "the Bernoulli distribution") if "zero-inflated" in reconstruction_distribution: piecewise_categorical_likelihood_errors.append( "zero-inflated distributions") if "constrained" in reconstruction_distribution: piecewise_categorical_likelihood_errors.append( "constrained distributions") if len(piecewise_categorical_likelihood_errors) > 0: piecewise_categorical_likelihood_error = ( "{} cannot be piecewise categorical.".format( capitalise_string( enumerate_strings( piecewise_categorical_likelihood_errors, conjunction="or")))) raise ValueError(piecewise_categorical_likelihood_error) # Validate parameterisation of latent posterior for VAE if model_type and latent_distribution and parameterise_latent_posterior: if "VAE" in model_type: if (not (model_type in ["VAE"] and latent_distribution == "gaussian mixture") and parameterise_latent_posterior): parameterise_error = ( "Cannot parameterise latent posterior parameters for {} " "or {} distribution.".format(model_type, latent_distribution)) raise ValueError(parameterise_error)
def analyse_decompositions(data_sets, other_data_sets=None, centroids=None, colouring_data_set=None, sampled_data_set=None, decomposition_methods=None, highlight_feature_indices=None, symbol=None, title="data set", specifier=None, analysis_level=None, export_options=None, analyses_directory=None): if analysis_level is None: analysis_level = defaults["analyses"]["analysis_level"] centroids_original = centroids if isinstance(data_sets, dict): data_sets = list(data_sets.values()) if not isinstance(data_sets, (list, tuple)): data_sets = [data_sets] if other_data_sets is None: other_data_sets = [None] * len(data_sets) elif not isinstance(other_data_sets, (list, tuple)): other_data_sets = [other_data_sets] if len(data_sets) != len(other_data_sets): raise ValueError( "Lists of data sets and alternative data sets do not have the " "same length.") specification = None base_symbol = symbol original_title = title if decomposition_methods is None: decomposition_methods = [defaults["decomposition_method"]] elif not isinstance(decomposition_methods, (list, tuple)): decomposition_methods = [decomposition_methods] else: decomposition_methods = decomposition_methods.copy() decomposition_methods.insert(0, None) if highlight_feature_indices is None: highlight_feature_indices = defaults["analyses"][ "highlight_feature_indices"] elif not isinstance(highlight_feature_indices, (list, tuple)): highlight_feature_indices = [highlight_feature_indices] else: highlight_feature_indices = highlight_feature_indices.copy() if analyses_directory is None: analyses_directory = defaults["analyses"]["directory"] for data_set, other_data_set in zip(data_sets, other_data_sets): if data_set.values.shape[1] <= 1: continue title = original_title name = normalise_string(title) if specifier: specification = specifier(data_set) if specification: name += "-" + str(specification) title += " for " + specification title += " set" if not colouring_data_set: colouring_data_set = data_set if data_set.version in ["z", "z1"]: centroids = copy.deepcopy(centroids_original) else: centroids = None if other_data_set: title = "{} set values in {}".format(other_data_set.version, title) name = other_data_set.version + "-" + name decompositions_directory = os.path.join(analyses_directory, name) for decomposition_method in decomposition_methods: other_values = None sampled_values = None if other_data_set: other_values = other_data_set.values if sampled_data_set: sampled_values = sampled_data_set.values if not decomposition_method: if data_set.number_of_features == 2: values_decomposed = data_set.values other_values_decomposed = other_values sampled_values_decomposed = sampled_values centroids_decomposed = centroids else: continue else: decomposition_method = proper_string( decomposition_method, DECOMPOSITION_METHOD_NAMES) values_decomposed = data_set.values other_values_decomposed = other_values sampled_values_decomposed = sampled_values centroids_decomposed = centroids other_value_sets_decomposed = {} if other_values is not None: other_value_sets_decomposed["other"] = other_values if sampled_values is not None: other_value_sets_decomposed["sampled"] = sampled_values if not other_value_sets_decomposed: other_value_sets_decomposed = None if decomposition_method == "t-SNE": if (data_set.number_of_examples > MAXIMUM_NUMBER_OF_EXAMPLES_FOR_TSNE): print( "The number of examples for {}".format(title), "is too large to decompose it", "using {}. Skipping.".format(decomposition_method)) print() continue elif (data_set.number_of_features > MAXIMUM_NUMBER_OF_FEATURES_FOR_TSNE): number_of_pca_components_before_tsne = min( MAXIMUM_NUMBER_OF_PCA_COMPONENTS_BEFORE_TSNE, data_set.number_of_examples - 1) print( "The number of features for {}".format(title), "is too large to decompose it", "using {} in due time.".format( decomposition_method)) print("Decomposing {} to {} components using PCA " "beforehand.".format( title, number_of_pca_components_before_tsne)) decompose_time_start = time() (values_decomposed, other_value_sets_decomposed, centroids_decomposed) = decompose( values_decomposed, other_value_sets=other_value_sets_decomposed, centroids=centroids_decomposed, method="pca", number_of_components=( number_of_pca_components_before_tsne)) decompose_duration = time() - decompose_time_start print("{} pre-decomposed ({}).".format( capitalise_string(title), format_duration(decompose_duration))) else: if scipy.sparse.issparse(values_decomposed): values_decomposed = values_decomposed.A if scipy.sparse.issparse(other_values_decomposed): other_values_decomposed = other_values_decomposed.A if scipy.sparse.issparse(sampled_values_decomposed): sampled_values_decomposed = ( sampled_values_decomposed.A) print("Decomposing {} using {}.".format( title, decomposition_method)) decompose_time_start = time() (values_decomposed, other_value_sets_decomposed, centroids_decomposed) = decompose( values_decomposed, other_value_sets=other_value_sets_decomposed, centroids=centroids_decomposed, method=decomposition_method, number_of_components=2) decompose_duration = time() - decompose_time_start print("{} decomposed ({}).".format( capitalise_string(title), format_duration(decompose_duration))) print() if other_value_sets_decomposed: other_values_decomposed = other_value_sets_decomposed.get( "other") sampled_values_decomposed = ( other_value_sets_decomposed.get("sampled")) if base_symbol: symbol = base_symbol else: symbol = specification x_label = _axis_label_for_symbol( symbol=symbol, coordinate=1, decomposition_method=decomposition_method, ) y_label = _axis_label_for_symbol( symbol=symbol, coordinate=2, decomposition_method=decomposition_method, ) figure_labels = { "title": decomposition_method, "x label": x_label, "y label": y_label } if other_data_set: plot_values_decomposed = other_values_decomposed else: plot_values_decomposed = values_decomposed if plot_values_decomposed is None: print("No values to plot.\n") return print("Plotting {}{}.".format( "decomposed " if decomposition_method else "", title)) # No colour-coding plot_time_start = time() figure, figure_name = figures.plot_values( plot_values_decomposed, centroids=centroids_decomposed, figure_labels=figure_labels, example_tag=data_set.tags["example"], name=name) figures.save_figure(figure=figure, name=figure_name, options=export_options, directory=decompositions_directory) plot_duration = time() - plot_time_start print(" {} plotted and saved ({}).".format( capitalise_string(title), format_duration(plot_duration))) # Samples if sampled_data_set: plot_time_start = time() figure, figure_name = figures.plot_values( plot_values_decomposed, centroids=centroids_decomposed, sampled_values=sampled_values_decomposed, figure_labels=figure_labels, example_tag=data_set.tags["example"], name=name) figures.save_figure(figure=figure, name=figure_name, options=export_options, directory=decompositions_directory) plot_duration = time() - plot_time_start print(" {} (with samples) plotted and saved ({}).".format( capitalise_string(title), format_duration(plot_duration))) # Labels if colouring_data_set.labels is not None: plot_time_start = time() figure, figure_name = figures.plot_values( plot_values_decomposed, colour_coding="labels", colouring_data_set=colouring_data_set, centroids=centroids_decomposed, figure_labels=figure_labels, example_tag=data_set.tags["example"], name=name) figures.save_figure(figure=figure, name=figure_name, options=export_options, directory=decompositions_directory) plot_duration = time() - plot_time_start print(" {} (with labels) plotted and saved ({}).".format( capitalise_string(title), format_duration(plot_duration))) # Superset labels if colouring_data_set.superset_labels is not None: plot_time_start = time() figure, figure_name = figures.plot_values( plot_values_decomposed, colour_coding="superset labels", colouring_data_set=colouring_data_set, centroids=centroids_decomposed, figure_labels=figure_labels, example_tag=data_set.tags["example"], name=name) figures.save_figure(figure=figure, name=figure_name, options=export_options, directory=decompositions_directory) plot_duration = time() - plot_time_start print(" " "{} (with superset labels) plotted and saved ({}).". format(capitalise_string(title), format_duration(plot_duration))) # For each class if analysis_level == "extensive": if colouring_data_set.number_of_classes <= 10: plot_time_start = time() for class_name in colouring_data_set.class_names: figure, figure_name = figures.plot_values( plot_values_decomposed, colour_coding="class", colouring_data_set=colouring_data_set, centroids=centroids_decomposed, class_name=class_name, figure_labels=figure_labels, example_tag=data_set.tags["example"], name=name) figures.save_figure( figure=figure, name=figure_name, options=export_options, directory=decompositions_directory) plot_duration = time() - plot_time_start print( " {} (for each class) plotted and saved ({}).". format(capitalise_string(title), format_duration(plot_duration))) if (colouring_data_set.superset_labels is not None and data_set.number_of_superset_classes <= 10): plot_time_start = time() for superset_class_name in ( colouring_data_set.superset_class_names): figure, figure_name = figures.plot_values( plot_values_decomposed, colour_coding="superset class", colouring_data_set=colouring_data_set, centroids=centroids_decomposed, class_name=superset_class_name, figure_labels=figure_labels, example_tag=data_set.tags["example"], name=name) figures.save_figure( figure=figure, name=figure_name, options=export_options, directory=decompositions_directory) plot_duration = time() - plot_time_start print(" {} (for each superset class) plotted and " "saved ({}).".format( capitalise_string(title), format_duration(plot_duration))) # Batches if colouring_data_set.has_batches: plot_time_start = time() figure, figure_name = figures.plot_values( plot_values_decomposed, colour_coding="batches", colouring_data_set=colouring_data_set, centroids=centroids_decomposed, figure_labels=figure_labels, example_tag=data_set.tags["example"], name=name, ) figures.save_figure(figure=figure, name=figure_name, options=export_options, directory=decompositions_directory) plot_duration = time() - plot_time_start print(" " "{} (with batches) plotted and saved ({}).".format( capitalise_string(title), format_duration(plot_duration))) # Cluster IDs if colouring_data_set.has_predicted_cluster_ids: plot_time_start = time() figure, figure_name = figures.plot_values( plot_values_decomposed, colour_coding="predicted cluster IDs", colouring_data_set=colouring_data_set, centroids=centroids_decomposed, figure_labels=figure_labels, example_tag=data_set.tags["example"], name=name, ) figures.save_figure(figure=figure, name=figure_name, options=export_options, directory=decompositions_directory) plot_duration = time() - plot_time_start print( " " "{} (with predicted cluster IDs) plotted and saved ({}).". format(capitalise_string(title), format_duration(plot_duration))) # Predicted labels if colouring_data_set.has_predicted_labels: plot_time_start = time() figure, figure_name = figures.plot_values( plot_values_decomposed, colour_coding="predicted labels", colouring_data_set=colouring_data_set, centroids=centroids_decomposed, figure_labels=figure_labels, example_tag=data_set.tags["example"], name=name, ) figures.save_figure(figure=figure, name=figure_name, options=export_options, directory=decompositions_directory) plot_duration = time() - plot_time_start print(" " "{} (with predicted labels) plotted and saved ({}).". format(capitalise_string(title), format_duration(plot_duration))) if colouring_data_set.has_predicted_superset_labels: plot_time_start = time() figure, figure_name = figures.plot_values( plot_values_decomposed, colour_coding="predicted superset labels", colouring_data_set=colouring_data_set, centroids=centroids_decomposed, figure_labels=figure_labels, example_tag=data_set.tags["example"], name=name, ) figures.save_figure(figure=figure, name=figure_name, options=export_options, directory=decompositions_directory) plot_duration = time() - plot_time_start print( " {} (with predicted superset labels) plotted and saved" " ({}).".format(capitalise_string(title), format_duration(plot_duration))) # Count sum plot_time_start = time() figure, figure_name = figures.plot_values( plot_values_decomposed, colour_coding="count sum", colouring_data_set=colouring_data_set, centroids=centroids_decomposed, figure_labels=figure_labels, example_tag=data_set.tags["example"], name=name) figures.save_figure(figure=figure, name=figure_name, options=export_options, directory=decompositions_directory) plot_duration = time() - plot_time_start print(" {} (with count sum) plotted and saved ({}).".format( capitalise_string(title), format_duration(plot_duration))) # Features for feature_index in highlight_feature_indices: plot_time_start = time() figure, figure_name = figures.plot_values( plot_values_decomposed, colour_coding="feature", colouring_data_set=colouring_data_set, centroids=centroids_decomposed, feature_index=feature_index, figure_labels=figure_labels, example_tag=data_set.tags["example"], name=name) figures.save_figure(figure=figure, name=figure_name, options=export_options, directory=decompositions_directory) plot_duration = time() - plot_time_start print(" {} (with {}) plotted and saved ({}).".format( capitalise_string(title), data_set.feature_names[feature_index], format_duration(plot_duration))) print()
def plot_profile_comparison(observed_series, expected_series, expected_series_total_standard_deviations=None, expected_series_explained_standard_deviations=None, x_name="feature", y_name="value", sort=True, sort_by="expected", sort_direction="ascending", x_scale="linear", y_scale="linear", y_cutoff=None, name=None): sort_by = normalise_string(sort_by) sort_direction = normalise_string(sort_direction) figure_name = saving.build_figure_name("profile_comparison", name) if scipy.sparse.issparse(observed_series): observed_series = observed_series.A.squeeze() if scipy.sparse.issparse(expected_series_total_standard_deviations): expected_series_total_standard_deviations = ( expected_series_total_standard_deviations.A.squeeze()) if scipy.sparse.issparse(expected_series_explained_standard_deviations): expected_series_explained_standard_deviations = ( expected_series_explained_standard_deviations.A.squeeze()) observed_colour = style.STANDARD_PALETTE[0] expected_palette = seaborn.light_palette(style.STANDARD_PALETTE[1], 5) expected_colour = expected_palette[-1] expected_total_standard_deviations_colour = expected_palette[1] expected_explained_standard_deviations_colour = expected_palette[3] if sort: x_label = "{}s sorted {} by {} {}s [sort index]".format( capitalise_string(x_name), sort_direction, sort_by, y_name.lower()) else: x_label = "{}s [original index]".format(capitalise_string(x_name)) y_label = capitalise_string(y_name) + "s" observed_label = "Observed" expected_label = "Expected" expected_total_standard_deviations_label = "Total standard deviation" expected_explained_standard_deviations_label = ( "Explained standard deviation") # Sorting if sort_by == "expected": sort_series = expected_series expected_marker = "" expected_line_style = "solid" expected_z_order = 3 observed_marker = "o" observed_line_style = "" observed_z_order = 2 elif sort_by == "observed": sort_series = observed_series expected_marker = "o" expected_line_style = "" expected_z_order = 2 observed_marker = "" observed_line_style = "solid" observed_z_order = 3 if sort: sort_indices = numpy.argsort(sort_series) if sort_direction == "descending": sort_indices = sort_indices[::-1] elif sort_direction != "ascending": raise ValueError( "Sort direction can either be ascending or descending.") else: sort_indices = slice(None) # Standard deviations if expected_series_total_standard_deviations is not None: with_total_standard_deviations = True expected_series_total_standard_deviations_lower = ( expected_series - expected_series_total_standard_deviations) expected_series_total_standard_deviations_upper = ( expected_series + expected_series_total_standard_deviations) else: with_total_standard_deviations = False if (expected_series_explained_standard_deviations is not None and expected_series_explained_standard_deviations.mean() > 0): with_explained_standard_deviations = True expected_series_explained_standard_deviations_lower = ( expected_series - expected_series_explained_standard_deviations) expected_series_explained_standard_deviations_upper = ( expected_series + expected_series_explained_standard_deviations) else: with_explained_standard_deviations = False # Figure if y_scale == "both": figure, axes = pyplot.subplots(nrows=2, sharex=True) figure.subplots_adjust(hspace=0.1) axis_upper = axes[0] axis_lower = axes[1] axis_upper.set_zorder = 1 axis_lower.set_zorder = 0 else: figure = pyplot.figure() axis = figure.add_subplot(1, 1, 1) axes = [axis] handles = [] feature_indices = numpy.arange(len(observed_series)) + 1 for i, axis in enumerate(axes): observed_plot, = axis.plot( feature_indices, observed_series[sort_indices], label=observed_label, color=observed_colour, marker=observed_marker, linestyle=observed_line_style, zorder=observed_z_order ) if i == 0: handles.append(observed_plot) expected_plot, = axis.plot( feature_indices, expected_series[sort_indices], label=expected_label, color=expected_colour, marker=expected_marker, linestyle=expected_line_style, zorder=expected_z_order ) if i == 0: handles.append(expected_plot) if with_total_standard_deviations: axis.fill_between( feature_indices, expected_series_total_standard_deviations_lower[sort_indices], expected_series_total_standard_deviations_upper[sort_indices], color=expected_total_standard_deviations_colour, zorder=0 ) expected_plot_standard_deviations_values = ( matplotlib.patches.Patch( label=expected_total_standard_deviations_label, color=expected_total_standard_deviations_colour ) ) if i == 0: handles.append(expected_plot_standard_deviations_values) if with_explained_standard_deviations: axis.fill_between( feature_indices, expected_series_explained_standard_deviations_lower[ sort_indices], expected_series_explained_standard_deviations_upper[ sort_indices], color=expected_explained_standard_deviations_colour, zorder=1 ) expected_plot_standard_deviations_expectations = ( matplotlib.patches.Patch( label=expected_explained_standard_deviations_label, color=expected_explained_standard_deviations_colour ) ) if i == 0: handles.append(expected_plot_standard_deviations_expectations) if y_scale == "both": axis_upper.legend( handles=handles, loc="best" ) seaborn.despine(ax=axis_upper) seaborn.despine(ax=axis_lower) axis_upper.set_yscale("log", nonposy="clip") axis_lower.set_yscale("linear") figure.text(0.04, 0.5, y_label, va="center", rotation="vertical") axis_lower.set_xscale(x_scale) axis_lower.set_xlabel(x_label) y_upper_min, y_upper_max = axis_upper.get_ylim() y_lower_min, y_lower_max = axis_lower.get_ylim() axis_upper.set_ylim(y_cutoff, y_upper_max) y_lower_min = max(-1, y_lower_min) axis_lower.set_ylim(y_lower_min, y_cutoff) else: axis.legend( handles=handles, loc="best" ) seaborn.despine() y_scale_arguments = {} if y_scale == "log": y_scale_arguments["nonposy"] = "clip" axis.set_yscale(y_scale, **y_scale_arguments) axis.set_ylabel(y_label) axis.set_xscale(x_scale) axis.set_xlabel(x_label) y_min, y_max = axis.get_ylim() y_min = max(-1, y_min) if y_cutoff: if y_scale == "linear": y_max = y_cutoff elif y_scale == "log": y_min = y_cutoff axis.set_ylim(y_min, y_max) return figure, figure_name
def plot_histogram(series, excess_zero_count=0, label=None, normed=False, discrete=False, x_scale="linear", y_scale="linear", colour=None, name=None): series = series.copy() figure_name = "histogram" if normed: figure_name += "-normed" figure_name = saving.build_figure_name(figure_name, name) figure = pyplot.figure() axis = figure.add_subplot(1, 1, 1) seaborn.despine() series_length = len(series) + excess_zero_count series_max = series.max() if discrete and series_max < MAXIMUM_NUMBER_OF_BINS_FOR_HISTOGRAMS: number_of_bins = int(numpy.ceil(series_max)) + 1 bin_range = numpy.array((-0.5, series_max + 0.5)) else: if series_max < MAXIMUM_NUMBER_OF_BINS_FOR_HISTOGRAMS: number_of_bins = "auto" else: number_of_bins = MAXIMUM_NUMBER_OF_BINS_FOR_HISTOGRAMS bin_range = numpy.array((series.min(), series_max)) if colour is None: colour = style.STANDARD_PALETTE[0] if x_scale == "log": series += 1 bin_range += 1 label += " (shifted one)" figure_name += "-log_values" y_log = y_scale == "log" histogram, bin_edges = numpy.histogram( series, bins=number_of_bins, range=bin_range ) histogram[0] += excess_zero_count width = bin_edges[1] - bin_edges[0] bin_centres = bin_edges[:-1] + width / 2 if normed: histogram = histogram / series_length axis.bar( bin_centres, histogram, width=width, log=y_log, color=colour, alpha=0.4 ) axis.set_xscale(x_scale) axis.set_xlabel(capitalise_string(label)) if normed: axis.set_ylabel("Frequency") else: axis.set_ylabel("Number of counts") return figure, figure_name