Пример #1
0
def test_qqplot_unequal():
    rs = np.random.RandomState(0)
    data1 = rs.standard_normal(100)
    data2 = rs.standard_normal(200)
    fig1 = qqplot_2samples(data1, data2)
    fig2 = qqplot_2samples(data2, data1)
    x1, y1 = fig1.get_axes()[0].get_children()[0].get_data()
    x2, y2 = fig2.get_axes()[0].get_children()[0].get_data()
    np.testing.assert_allclose(x1, x2)
    np.testing.assert_allclose(y1, y2)
    numobj1 = len(fig1.get_axes()[0].get_children())
    numobj2 = len(fig2.get_axes()[0].get_children())
    assert numobj1 == numobj2

    @pytest.mark.matplotlib
    def test_qqplot(self, close_figures):
        qqplot(self.res, line="r")

    @pytest.mark.matplotlib
    def test_qqplot_2samples_prob_plot_obj(self, close_figures):
        # also tests all values for line
        for line in ["r", "q", "45", "s"]:
            # test with `ProbPlot` instances
            qqplot_2samples(self.prbplt, self.other_prbplot, line=line)

    @pytest.mark.matplotlib
    def test_qqplot_2samples_arrays(self, close_figures):
        # also tests all values for line
        for line in ["r", "q", "45", "s"]:
            # test with arrays
            qqplot_2samples(self.res, self.other_array, line=line)
Пример #2
0
 def qq_plot_2samples(self):
     """
     :return: Q-Q plot between two samples
     """
     self.ax = self.figure.add_subplot(111)
     self.ax.hold(True)
     pp_x = sm.ProbPlot(self.column_data)
     pp_y = sm.ProbPlot(self.var_data)
     qqplot_2samples(pp_x, pp_y, ax=self.ax)
     self.canvas.draw()
Пример #3
0
def athlete_qqplot(df1, df2):
    plot = [2, 2, 0]
    for var in athlete_var_list:
        plot[2] += 1
        ax = plt.subplot(plot[0], plot[1], plot[2])
        ax.axis(facecolor='blue')
        qqplot_2samples(df1[0][var], df2[0][var], xlabel=df1[1], ylabel=df2[1], line='45', ax=ax)
        plt.title('{var}'.format(var=var))
    plt.subplots_adjust(top=0.9)
    plt.gcf().suptitle('Comparison of Athlete Variables')
    plt.show()
def test_correct_labels(close_figures, reset_randomstate, line, x_size, y_size,
                        labels):
    rs = np.random.RandomState(9876554)
    x = rs.normal(loc=0, scale=0.1, size=x_size)
    y = rs.standard_t(3, size=y_size)
    pp_x = sm.ProbPlot(x)
    pp_y = sm.ProbPlot(y)
    fig = qqplot_2samples(pp_x, pp_y, line=line, **labels)
    ax = fig.get_axes()[0]
    x_label = ax.get_xlabel()
    y_label = ax.get_ylabel()
    if x_size <= y_size:
        if not labels:
            assert "2nd" in x_label
            assert "1st" in y_label
        else:
            assert "Y" in x_label
            assert "X" in y_label
    else:
        if not labels:
            assert "1st" in x_label
            assert "2nd" in y_label
        else:
            assert "X" in x_label
            assert "Y" in y_label
Пример #5
0
def test_axis_order(close_figures):
    xx = np.random.normal(10, 1, (100,))
    xy = np.random.normal(1, 0.01, (100,))
    fig = qqplot_2samples(xx, xy, "x", "y")
    ax = fig.get_axes()[0]
    y_range = np.diff(ax.get_ylim())[0]
    x_range = np.diff(ax.get_xlim())[0]
    assert y_range < x_range

    xx_long = np.random.normal(10, 1, (1000,))
    fig = qqplot_2samples(xx_long, xy, "x", "y")
    ax = fig.get_axes()[0]
    y_range = np.diff(ax.get_ylim())[0]
    x_range = np.diff(ax.get_xlim())[0]
    assert y_range < x_range

    xy_long = np.random.normal(1, 0.01, (1000,))
    fig = qqplot_2samples(xx, xy_long, "x", "y")
    ax = fig.get_axes()[0]
    y_range = np.diff(ax.get_ylim())[0]
    x_range = np.diff(ax.get_xlim())[0]
    assert x_range < y_range
Пример #6
0
def qqplot_2(var, medal_df, non_medal_df, male_df, female_df, winter_df,
             summer_df):
    qqplot_2samples(medal_df[var],
                    non_medal_df[var],
                    xlabel='Medal',
                    ylabel='Non-Medal',
                    line='45')
    plt.title('{var} for Medal v. Non-Medal'.format(var=var))

    qqplot_2samples(female_df[var],
                    male_df[var],
                    xlabel='Male',
                    ylabel='Female',
                    line='45')
    plt.title('{var} for Female v. Male'.format(var=var))

    qqplot_2samples(winter_df[var],
                    summer_df[var],
                    xlabel='Winter',
                    ylabel='Summer',
                    line='45')
    plt.title('{var} for Winter v. Summer'.format(var=var))
Пример #7
0
# Figure 14.1: comparing ACFs
plt.figure()
nlags = 160
cols = {'Gibbs': 'gray', 'marginal': 'black'}
lss = {'Gibbs': '--', 'marginal': '-'}
for t in [0, 49, 99, 149, 199]:
    for alg_name, alg in algos.items():
        if isinstance(alg, mcmc.MCMC):
            burnin = int(alg.niter / 10)
            acf_x = acf(alg.chain.x[burnin:, t], nlags=nlags, fft=True)
            lbl = '_' if t > 0 else alg_name  # set label only once
            plt.plot(acf_x,
                     label=lbl,
                     color=cols[alg_name],
                     linestyle=lss[alg_name],
                     linewidth=2)
plt.axis([0, nlags, -0.03, 1.])
plt.xlabel('lag')
plt.ylabel('ACF')
plt.legend()
if savefigs:
    plt.savefig('acf_gibbs_marginal_smoothing_stochvol.pdf')

# Figure 14.2: qq-plots to check that MCMC samplers target the same posterior
plt.figure()
qqplot_2samples(algos['Gibbs'].chain.x[:, 0], algos['marginal'].chain.x[:, 0])
if savefigs:
    plt.savefig('qqplots_gibbs_vs_marginal_stochvol.pdf')

plt.show()
Пример #8
0
mean2 = _sum2/len(marks2)
sd2 = statistics.stdev(marks2)
step2 = sd2/3
print('mean '+str(mean)+ ' mean2 '+str(mean2))
print('sd '+str(sd)+'sd2 '+str(sd2))
print('step '+str(step)+'step2 '+str(step2))
while(i<len(marks2)):
    Z2.append((marks2[i]-mean2)/step2)
    i+=1
print(Z2)

plt.figure()
#plt.scatter(Z, Z2)
pp_x = sm.ProbPlot(np.array(marks))
pp_y = sm.ProbPlot(np.array(marks2))
qqplot_2samples(pp_x, pp_y, line='45')
#qqplot(Z, Z2, c='r', alpha=0.5, edgecolor='k')
plt.xlabel('Section-1')
plt.ylabel('Section-2')
#plt.show()


i=0
while (j < len(marks)):
    idx = 0
    flag = False
    i = 0
    while i < len(scores):
        if (marks[j] >= scores[i]):
            flag = True
            if (i != 0):
Пример #9
0
'''
Create Q-Q Plot of two samples quanitiles of randomnly generated data.

'''

import statsmodels.api as sm
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.graphics.gofplots import qqplot_2samples

x = np.random.normal(loc=8.5, scale=2.5, size=37)
y = np.random.normal(loc=8.0, scale=3.0, size=37)
pp_x = sm.ProbPlot(x)
pp_y = sm.ProbPlot(y)
qqplot_2samples(pp_x, pp_y)
plt.show()
Пример #10
0
 def test_qqplot_2samples_prob_plot_obj(self, close_figures):
     # also tests all values for line
     for line in ["r", "q", "45", "s"]:
         # test with `ProbPlot` instances
         qqplot_2samples(self.prbplt, self.other_prbplot, line=line)
Пример #11
0
def show_qq_plot(data, current, previous, title, ax, is_spiral=False):
    pp_x = sm.ProbPlot(current)
    pp_y = sm.ProbPlot(previous)
    qqplot_2samples(pp_x, pp_y, line="r", ax=ax)
    ax.grid()
    ax.set_title(title)
Пример #12
0
# -*- coding: utf-8 -*-

import numpy as np
import statsmodels.api as sm
from statsmodels.graphics.gofplots import qqplot_2samples
from matplotlib import pyplot as plt
###################################################
# QQ-plot

x = np.random.normal(loc=8.5, scale=2.5, size=37)
y = np.random.normal(loc=8.0, scale=3.0, size=37)
pp_x = sm.ProbPlot(x)
pp_y = sm.ProbPlot(y)
fig = qqplot_2samples(pp_x,
                      pp_y,
                      xlabel="N(8.5,2.5)",
                      ylabel="N(8,3)",
                      line=None,
                      ax=None)
fig.show(warn=True)
raw_input("Enter: ")
Пример #13
0
def plotDists(topmedDict, nontopmedDict, topmedKeys, nontopmedKeys, graphFileName):

    logntmList, logtmList = createListsFromDict(nontopmedDict, topmedDict)
    lowerBound = min([min(logntmList), min(logtmList)])
    upperBound = max([max(logntmList), max(logtmList)])
    lineNumbers = numpy.arange(lowerBound, upperBound, 0.1)

    # plot scatter
    plotScatter(logntmList, logtmList, lineNumbers, graphFileName, 'non_topmed', 'just_topmed')

    # plot PDF
    plotHist(logntmList, logtmList, graphFileName, 'non_topmed', 'just_topmed')

    # plot QQ-plot
    n = len(logntmList)
    plt.title('all non-topmed vs all just-topmed QQ n=' + str(n))
    ax = plt.gca()
    qqplot_2samples(data1=sm.ProbPlot(numpy.array(logtmList)), data2=sm.ProbPlot(numpy.array(logntmList)),
                    xlabel='non-topmed', ylabel='just-topmed',
                    line="45",ax = ax)
    plt.savefig(graphFileName + '_' + 'non_topmed' + '_vs_' + 'just_topmed' + '_QQ_n=' + str(n) + '.png')
    plt.close()

    # create non-zero lists
    nonZeroTM = [x for x in logtmList if x != 0]
    nonZeroNTM = [x for x in logntmList if x != 0]

    # run KS test
    # ksTest = ks_2samp(topmedDict[tmkey], nontopmedDict[ntmkey])
    ksTest = ks_2samp(nonZeroNTM, nonZeroTM)

    print('ksTest for non-zero: ' + 'just_topmed' + ' vs ' + 'non_topmed' + ' : ' + str(ksTest))

    for i in range(len(topmedKeys)):
        tmkey = topmedKeys[i]
        ntmkey = nontopmedKeys[i]

        logntmList, logtmList = createListsPerEthnicity(nontopmedDict, topmedDict, ntmkey, tmkey)

        lowerBound = min([min(logntmList), min(logtmList)])
        upperBound = max([max(logntmList), max(logtmList)])
        lineNumbers = numpy.arange(lowerBound, upperBound, 0.1)

        # plot scatter
        plotScatter(logntmList, logtmList, lineNumbers, graphFileName, tmkey, ntmkey)

        # plot PDF
        plotHist(logntmList, logtmList, graphFileName, tmkey, ntmkey)

        # plot QQ-plot
        n = len(logntmList)
        plt.title(graphFileName + '_' + ntmkey + '_vs_' + tmkey + '_QQ_' + 'n=' + str(n) )
        ax = plt.gca()
        ntmData = sm.ProbPlot(numpy.array(logntmList))
        tmData = sm.ProbPlot(numpy.array(logtmList))
        qqplot_2samples(data1=tmData, data2=ntmData,
                        xlabel='non-topmed', ylabel='just-topmed',
                        line="45", ax=ax)
        plt.savefig(graphFileName + '_' + ntmkey + '_vs_' + tmkey + '_QQ_n=' + str(n) + '.png')

        plt.close()

        # create non-zero lists
        nonZeroTM = [x for x in topmedDict[tmkey] if x!= 0 ]
        nonZeroNTM = [x for x in nontopmedDict[ntmkey] if x !=0 ]

        # run KS test
        #ksTest = ks_2samp(topmedDict[tmkey], nontopmedDict[ntmkey])
        ksTest = ks_2samp(nonZeroNTM, nonZeroTM)

        print('ksTest for non-zero: ' + ntmkey + ' vs ' + tmkey + ' : ' + str(ksTest))
Пример #14
0
# -*- coding: utf-8 -*-

import numpy as np
import statsmodels.api as sm
from statsmodels.graphics.gofplots import qqplot_2samples
from matplotlib import pyplot as plt
###################################################
# QQ-plot

x = np.random.normal(loc=8.5, scale=2.5, size=37)
y = np.random.normal(loc=8.0, scale=3.0, size=37)
pp_x = sm.ProbPlot(x)
pp_y = sm.ProbPlot(y)
fig = qqplot_2samples(pp_x, pp_y, xlabel="N(8.5,2.5)", ylabel="N(8,3)", line=None, ax=None)
fig.show(warn=True)
raw_input("Enter: ")
Пример #15
0
def qqplot(model, X, y, ax=None):
    sample = model.predict_f_samples(X, 1)[0, :, 0]
    y = y[:, 0]
    qqplot_2samples(y, sample, ylabel='Posterior quantiles',
                    xlabel='Data quantiles', line='45', ax=ax)
    plt.show()
Пример #16
0
import statsmodels.api as sm
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.graphics.gofplots import qqplot_2samples

fig, ax = plt.subplots()

x = np.array([
    0.2938 * 5000, 0.205 * 5000, 0.1532 * 5000, 0.1092 * 5000, 0.077 * 5000,
    0.0598 * 5000, 0.0522 * 5000, 0.0498 * 5000
])
y = np.array([
    0.354 * 5000, 0.216 * 5000, 0.144 * 5000, 0.109 * 5000, 0.0706 * 5000,
    0.058 * 5000, 0.028 * 5000, 0.016 * 5000
])

# pp_x = sm.ProbPlot(x)
# pp_y = ProbPlot(y)
qqplot_2samples(x, y, ax=ax)

x = np.linspace(*ax.get_xlim())
ax.plot(x, x)

plt.xlabel("Quantiles of wins in BBE")
plt.ylabel("Quantiles of wins in real horse race-events")
plt.show()
Пример #17
0
 def make_qqplot(x: List, y: List) -> None:
     plt_x = sm.ProbPlot(x)
     plt_y = sm.ProbPlot(y)
     qqplot_2samples(plt_x, plt_y)
     plt.savefig("testing")
Пример #18
0
        for value in line.split(","):
            if counter == 0:
                k = value
                data[value] = {}
            elif counter < 5:
                data[k]["r" + str(counter)] = value
            else:
                data[k]["genre"] = value

            counter += 1

r1 = []
r3 = []

for k, v in data.items():
    for key, value in data[k].items():
        if key == "r1":
            r1.append(float(value))
        if key == "r3":
            r3.append(float(value))

x = sm.ProbPlot(np.array(r1))
y = sm.ProbPlot(np.array(r3))
fig = sm.qqplot_2samples(x,
                         y,
                         xlabel="avg rating website 1 quantiles",
                         ylabel="avg rating website 3 quantiles",
                         line="r")
plt.title("Q-Q Plot")
plt.show()
Пример #19
0
 def test_qqplot_2samples_arrays(self, close_figures):
     # also tests all values for line
     for line in ["r", "q", "45", "s"]:
         # test with arrays
         qqplot_2samples(self.res, self.other_array, line=line)
Пример #20
0
]

#2.4(1)
age_mean = statistics.mean(ages)
age_median = statistics.median(ages)
age_deviation = statistics.pstdev(ages)
print('%.2f' % age_mean, '%.2f' % age_median, '%.2f' % age_deviation)

fats_mean = statistics.mean(fats)
fats_median = statistics.median(fats)
fats_deviation = statistics.pstdev(fats)
print('%.2f' % fats_mean, '%.2f' % fats_median, '%.2f' % fats_deviation)

#2.4(2)
plt.boxplot(ages, patch_artist=True, labels=['ages'])
plt.show()
plt.boxplot(fats, patch_artist=True, labels=['fats%'])
plt.show()

#2.4(3)
plt.scatter(ages, fats)
plt.xlabel("ages")
plt.ylabel("fats%")
plt.show()

ages_array = np.asarray(ages)
fats_array = np.asarray(fats)
pp_ages = sm.ProbPlot(ages_array)
pp_fats = sm.ProbPlot(fats_array)
qqplot_2samples(pp_fats, pp_ages, xlabel='ages', ylabel='fats%', line='r')
plt.show()
Пример #21
0
def main():

    outdir = '../../Supplemental_Figures/SGA_Scaling/scaler_output'

    # make output folder
    try:
        os.makedirs(outdir)
    except FileExistsError:
        pass

    # define datasets, datasetB is scaled to match datasetA
    datasetA = '../../Data/SGA_Scaling/cF3.txt'
    datasetB = '../../Data/SGA_Scaling/SGA_NxN_avg.txt'

    # read in the two datasets
    ints, profs, genes = read_square_dataset_small(datasetA,
                                                   "",
                                                   "\t",
                                                   split=True,
                                                   profiles=False)
    b_ints, b_profs, b_genes = read_square_dataset_small(datasetB,
                                                         "",
                                                         "\t",
                                                         split=True,
                                                         profiles=False)

    datasetA = datasetA.split('/')[-1].split('.')[0]
    datasetB = datasetB.split('/')[-1].split('.')[0]

    avalues = []
    bvalues = []
    for i in ints:
        if i in b_ints:
            avalues.append(ints[i])
            bvalues.append(b_ints[i])

    asorted = sorted(avalues)
    bsorted = sorted(bvalues)

    # shift datasetB so that it has the same number of negative values
    # as datasetA (makes it a little easier to scale)
    adjustment = -bsorted[len([x for x in asorted if x < 0])]
    bsorted = [x + adjustment for x in bsorted]

    # plot scatter plot showing shared interactions
    density_scatter_plot(ints,
                         b_ints,
                         outdir + '/unscaled_scatter.png',
                         xlabel='S-score',
                         ylabel='SGA score')

    # record dataset information in log
    with open(outdir + '/scaler_log.txt', 'w') as f:
        f.write("cF3 EMAP has {} interactions\n".format(len(ints)))
        f.write("SGA_NxN has {} interactions\n".format(len(b_ints)))
        f.write("The sets have {} interactions in common\n".format(
            len(avalues)))
        f.write("Dataset correlation = {}\n".format(
            np.corrcoef(avalues, bvalues)[0][1]))
        f.write("Adjustment so that the SGA_NxN shared interaction "
                "set has the same number of negative values as the "
                "cF3 EMAP.\nadjustment={}\n".format(adjustment))

    ## Computing scaling values
    #essentially the data is partitioned into 100 overlapping bins
    #the mean value of bin[0] in datasetB is divided by the mean value of bin[0] from datasetA
    #this gives a scaling factor for values in the range (min(bin[0]), max(bin[0]))
    #values close to zero give unpredictable scaling factors, so they are ignored.
    #Depending on the size of your overlap you may want to tweak the number of bins

    bins = 500
    binsize = len(avalues) / bins
    score = []
    scale = []

    lower_threshold = 0.05
    upper_threshold = 0.99

    for i in np.arange(1, bins * lower_threshold):
        start = int(i * binsize - binsize)
        end = int(i * binsize + binsize)
        score.append(np.mean(bsorted[start:end]))
        scale.append(np.mean(asorted[start:end]) / np.mean(bsorted[start:end]))
    for i in np.arange(bins * upper_threshold, bins):
        start = int(i * binsize - binsize)
        end = int(i * binsize + binsize)
        score.append(np.mean(bsorted[start:end]))
        scale.append(np.mean(asorted[start:end]) / np.mean(bsorted[start:end]))

    # This function creates a curve which maps scores to scaling factors
    # the s=0.02 defines how close the curve fits your data points
    # large values give crap curves, small values may overfit your data
    # it's best to look at the resulting curve and tweak s= as appropriate
    svalue = 0.02
    s = UnivariateSpline(score, scale, s=svalue)

    #displays the scaling values(in red) and the fitted curve (in black)
    fig = plt.figure(figsize=(2, 2), dpi=300, facecolor='w', edgecolor='k')
    plt.plot(
        np.arange(min(score), max(score), 0.01),  # changed from scatter
        [s(x) for x in np.arange(min(score), max(score), 0.01)],
        color="red",
        linewidth=1)
    plt.scatter(score, scale, color="black", s=3)
    plt.xlim(1.1 * min(score), 1.1 * max(score))
    plt.ylim(0.9 * min(scale), 1.1 * max(scale))
    plt.ylabel('Scaling Factor', fontname='Helvetica', fontsize=6)
    plt.xlabel('SGA Score', fontname='Helvetica', fontsize=6)
    pylab.savefig(outdir + "/scaling_factor_curve.png",
                  format='png',
                  transparent=True,
                  bbox_inches='tight',
                  dpi=300)

    # if the value to be scaled is larger than any value in our training set, we use
    # the scaling factor from the largest observed value
    def s_bounded(x):
        if x < min(score):
            x = min(score)
        elif x > max(score):
            x = max(score)
        return s(x)

    #This function applies our scaling factor to a given value
    g = lambda x: (x + adjustment) * s_bounded(x + adjustment)

    for i in b_ints:
        b_ints[i] = float(g(b_ints[i]))

    scaled_dataset_file = "../../Data/SGA_Scaling/SGA_NxN_scaled_to_cF3.txt"
    output_delimited_text(scaled_dataset_file, b_genes, b_genes, b_ints, True)

    # save scaling info to log
    with open(outdir + '/scaler_log.txt', 'a') as f:
        f.write("Number of bins used: {}\n".format(bins))
        f.write("Lower threshold for bins: {}\n".format(lower_threshold))
        f.write("Upper threshold for bins: {}\n".format(upper_threshold))
        f.write("S value for fitting spline: {}\n".format(svalue))
        f.write("max_score={}\n".format(max(score)))
        f.write("min_score={}\n".format(min(score)))

    # save spline for scaling full SGA in R
    scores = np.arange(min(score), max(score), 0.01)
    scales = [float(s(x)) for x in np.arange(min(score), max(score), 0.01)]
    spline = pd.DataFrame(data=np.stack((scores, scales)).T,
                          columns=['score', 'scale'])
    spline.to_csv(outdir + '/spline.txt', sep='\t', index=False)

    # Plot scatter plot of shared interactions after scaling
    density_scatter_plot(ints,
                         b_ints,
                         outdir + '/scaled_scatter.png',
                         xlabel='S-score',
                         ylabel='Scaled SGA score')

    # Make QQ Plots using the interactions before and after scaling
    avalues_after_scaling = []
    bvalues_after_scaling = []
    for i in ints:
        if i in b_ints:
            avalues_after_scaling.append(ints[i])
            bvalues_after_scaling.append(b_ints[i])

    # qqplot_2samples puts the "2nd Sample" on the x-axis
    # see documentation for statsmodels.graphics.gofplots
    fig, ax = plt.subplots(figsize=(2, 2))
    fig = qqplot_2samples(np.array(bvalues_after_scaling),
                          np.array(avalues_after_scaling),
                          line='r',
                          ax=ax)
    ax.set_xlabel('S-score Quantiles', fontname='Helvetica', fontsize=6)
    ax.set_ylabel('Scaled SGA score Quantiles',
                  fontname='Helvetica',
                  fontsize=6)
    pylab.savefig(outdir + "/qq_scaled.png",
                  transparent=True,
                  bbox_inches='tight',
                  dpi=300)

    fig, ax = plt.subplots(figsize=(2, 2))
    fig = qqplot_2samples(np.array(bvalues),
                          np.array(avalues),
                          line='r',
                          ax=ax)
    ax.set_xlabel('S-score Quantiles', fontname='Helvetica', fontsize=6)
    ax.set_ylabel('SGA score Quantiles', fontname='Helvetica', fontsize=6)
    pylab.savefig(outdir + "/qq_unscaled.png",
                  transparent=True,
                  bbox_inches='tight',
                  dpi=300)
Пример #22
0
if not os.path.exists("plots"):
    os.makedirs("plots")

for label in top_features:
    features = top_features[label]
    plt.scatter(data[features[0]], data[features[1]], s=7)
    plt.xlabel(features[0].title())
    plt.ylabel(features[1].title())
    plt.title("Top Features (" + label + ")")
    plt.savefig("plots/scatter-plot-" + label.lower() + ".png")
    plt.clf()

    plot_feature1 = gofplots.ProbPlot(data[features[0]])
    plot_feature2 = gofplots.ProbPlot(data[features[1]])
    fig = gofplots.qqplot_2samples(
        plot_feature1, plot_feature2, line="r", xlabel=features[0], ylabel=features[1])

    dots = fig.findobj(lambda x: hasattr(
        x, 'get_color') and x.get_color() == 'b')
    [d.set_ms(3) for d in dots]

    plt.title("Probability Plot (" + label + ")")
    plt.savefig("plots/pp-plot-" + label.lower() + ".png")
    plt.clf()


def intuitive_partion(data, clip=True):
    if len(data) == 0:
        return data

    data = np.array(data, dtype=float)