Exemplo n.º 1
0
def plot_boxplot(column_data=None, output=None, path=None):
    """
    Box plot
    :param column_data: column data in json format
    :param output: image, base64 or plot. Image output a file, base64 output a base64 encoded image and plot output the
    image to the notebook
    :param path:
    :return:
    """

    for col_name, stats in column_data.items():
        fig, axes = plt.subplots(1, 1)

        bp = axes.bxp(stats, patch_artist=True)

        axes.set_title(col_name)
        plt.figure(figsize=(12, 5))

        # 'fliers', 'means', 'medians', 'caps'
        for element in ['boxes', 'whiskers']:
            plt.setp(bp[element], color='#1f77b4')

        for patch in bp['boxes']:
            patch.set(facecolor='white')

            # Tweak spacing to prevent clipping of tick-labels

        # Save as base64
        if output is "base64":
            return output_base64(fig)
        elif output is "image":
            output_image(fig, path)
            print_html("<img src='" + path + "'>")
Exemplo n.º 2
0
def plot_correlation(cols_data, output=None, path=None):
    """
    Plot a correlation plot
    :param cols_data:
    :param output:
    :param path:
    :return:
    """
    import pandas as pd
    df = pd.DataFrame(data=cols_data["data"],
                      columns=cols_data["cols"],
                      index=cols_data["cols"])

    sns_plot = sns.heatmap(df,
                           mask=np.zeros_like(cols_data["data"],
                                              dtype=np.bool),
                           cmap=sns.diverging_palette(220, 10, as_cmap=True),
                           annot=True)

    if output is "base64":
        # fig = sns.get_figure()
        fig = sns_plot.get_figure()
        return output_base64(fig)
    elif output is "image":
        # Save image
        fig = sns_plot.get_figure()
        fig.savefig(path)
        print_html("<img src='" + path + "'>")
Exemplo n.º 3
0
def plot_scatterplot(column_data=None, output=None, path=None):
    """
    Scatter plot
    :param column_data: column data in json format
    :param output: image or base64
    :param path:
    :return:
    """

    fig = plt.figure(figsize=(12, 5))
    plt.scatter(column_data["x"]["data"],
                column_data["y"]["data"],
                s=column_data["s"],
                alpha=0.5)
    plt.xlabel(column_data["x"]["name"])
    plt.ylabel(column_data["y"]["name"])

    if output is "base64":
        return output_base64(fig)
    elif output is "image":
        output_image(fig, path)
        print_html("<img src='" + path + "'>")
    elif output is "plot":
        # Tweak spacing to prevent clipping of tick-labels
        plt.subplots_adjust(left=0.05, right=0.99, top=0.9, bottom=0.3)
Exemplo n.º 4
0
def plot_hist(column_data=None, output=None, sub_title="", path=None):
    """
    Plot a histogram
    obj = {"col_name":[{'lower': -87.36666870117188, 'upper': -70.51333465576172, 'value': 0},
    {'lower': -70.51333465576172, 'upper': -53.66000061035157, 'value': 22094},
    {'lower': -53.66000061035157, 'upper': -36.80666656494141, 'value': 2},
    ...
    ]}
    :param column_data: column data in json format
    :param output: image, base64 or plot. Image output a file, base64 output a base64 encoded image and plot output the
    image to the notebook
    :param sub_title: plot subtitle
    :param path:
    :return: plot, image or base64
    """

    for col_name, data in column_data.items():
        bins = []
        # print(data)
        # print("**********")
        for d in data:
            bins.append(d['lower'])

        last = data[len(data) - 1]["upper"]
        bins.append(last)

        # Transform hist Optimus format to matplot lib format
        hist = []
        for d in data:
            if d is not None:
                hist.append(d["count"])

        array_bins = array(bins)
        center = (array_bins[:-1] + array_bins[1:]) / 2
        width = 0.9 * (array_bins[1] - array_bins[0])

        hist = one_list_to_val(hist)

        # Plot
        fig = plt.figure(figsize=(12, 5))
        plt.bar(center, hist, width=width)
        plt.title("Histogram '" + col_name + "' " + sub_title)

        # fig.tight_layout()

        if output is "base64":
            return output_base64(fig)
        elif output is "image":
            # Save image
            output_image(plt, path)
            print_html("<img src='" + path + "'>")
            # Print in jupyter notebook

        elif output is "plot":
            plt.subplots_adjust(left=0.05, right=0.99, top=0.9, bottom=0.3)
Exemplo n.º 5
0
def plot_frequency(column_data=None, output=None, path=None):
    """
    Frequency plot
    :param column_data: column data in json format
    :param output: image, base64 or plot. Image output a file, base64 output a base64 encoded image and plot output the
    image to the notebook
    :param path:
    :return:
    """

    for col_name, data in column_data.items():

        # Transform Optimus' format to matplotlib's format
        x = []
        h = []

        for d in data:
            x.append(ellipsis(d["value"]))
            h.append(d["count"])

        # Plot
        fig = plt.figure(figsize=(12, 5))

        # Need to to this to plot string labels on x
        x_i = range(len(x))
        plt.bar(x_i, h)
        plt.xticks(x_i, x)

        plt.title("Frequency '" + col_name + "'")

        plt.xticks(rotation=45, ha="right")
        plt.subplots_adjust(left=0.05, right=0.99, top=0.9, bottom=0.3)

        if output is "base64":
            return output_base64(fig)
        elif output is "image":
            output_image(plt, path)
            print_html("<img src='" + path + "'>")
        elif output is "plot":
            # Tweak spacing to prevent clipping of tick-labels
            plt.subplots_adjust(left=0.05, right=0.99, top=0.9, bottom=0.3)
Exemplo n.º 6
0
def plot_qqplot(col_name, sample_data, output="plot", path=None):
    """
    Plot a qqplot
    :param col_name:
    :param sample_data:
    :param output:
    :param path:
    :return:
    """
    fig = plt.figure(figsize=(12, 5))

    sm.qqplot(sample_data.toPandas()[col_name],
              line='q',
              color='C0',
              alpha=0.3)

    plt.title("qqplot '" + col_name + "' ")

    if output is "base64":
        return output_base64(fig)
    elif output is "image":
        output_image(plt, path)
Exemplo n.º 7
0
def plot_missing_values(column_data=None, output=None, path=None):
    """
    Plot missing values
    :param column_data:
    :param output: image, base64 or plot. Image output a file, base64 output a base64 encoded image and plot output the
    :param path:
    image to the notebook
    :return:
    """
    values = []
    columns = []
    labels = []
    for col_name, data in column_data["data"].items():
        values.append(data["missing"])
        columns.append(col_name)
        labels.append(data["%"])

    # Plot
    fig = plt.figure(figsize=(12, 5))
    plt.bar(columns, values)
    plt.xticks(columns, columns)

    # Highest limit
    highest = column_data["count"]
    plt.ylim(0, 1.05 * highest)
    plt.title("Missing Values")
    i = 0
    for label, val in zip(labels, values):
        plt.text(x=i - 0.5,
                 y=val + (highest * 0.05),
                 s="{}({})".format(val, label))
        i = i + 1

    if output is "base64":
        return output_base64(fig)
    elif output is "image":
        output_image(plt, path)
    elif output is "plot":
        plt.subplots_adjust(left=0.05, right=0.99, top=0.9, bottom=0.3)