Пример #1
0
def test_threshold():

    X, y = iris_data()
    ax, threshold, count = ecdf(x=X[:, 0],
                                x_label='sepal length (cm)',
                                percentile=0.8)
    assert threshold == 6.5
    assert count == 120
Пример #2
0
def test_threshold():

    X, y = iris_data()
    ax, threshold, count = ecdf(x=X[:, 0],
                                x_label='sepal length (cm)',
                                percentile=0.8)
    assert threshold == 6.5
    assert count == 120
Пример #3
0
    def plot_ecdf(self, result_max):

        plt.clf()
        axis = None

        strategy_icon = ['1', '|', '_', '.', 'o', '2']

        for indx, result in enumerate(result_max):
            ax, _, _ = ecdf(x=result, ecdf_marker=strategy_icon[indx])
            axis = ax

        plt.legend([
            'Fixed 1', 'Fixed 2', 'Fixed 4', 'Fixed 8', 'Fixed 12', 'MARTINI'
        ],
                   loc='upper center',
                   ncol=3,
                   fancybox=True,
                   bbox_to_anchor=(0.5, 1.15))
        plt.xlabel('Time Interval (minutes)')

        plt.savefig('metric_max_ecdf.pdf', bbox_inches="tight", format='pdf')
# check unique -not repeated- labels/item in the data-set
#summary_stats_unique = raw_data_frame.nunique().T

# check data types
#data_frame_data_types = raw_data_frame.info()

###############################################################################
## EXPLORATORY DATA  ANALYSIS (EDA): Empirical Cumulative Density Function (ECDF)

# close picture in order to avoid overwriting with previous pics
plt.clf()

# set ECDF
ax, _, _ = ecdf(x=df_A.loc[:, 'RE'],
                x_label='relative engagement',
                ecdf_color='green')
ax, _, _ = ecdf(x=df_B.loc[:, 'RE'], ecdf_color='red')
#ax, _, _ = ecdf(x = test_c, ecdf_color = 'yellow')
#ax, _, _ = ecdf(x = merged_df_percentile_2.loc[:, 'percentiles'], ecdf_color = 'red')
#ax, _, _ = ecdf(x = merged_df_percentile_3.loc[:, 'percentiles'], ecdf_color = 'blue')
#ax, _, _ = ecdf(x = merged_df_percentile.loc[:, 'percentiles'], ecdf_color = 'yellow')

handles, labels = ax.get_legend_handles_labels()
ax.legend(
    handles,
    labels=['test_a', 'test_b'],
    #labels = ['RE_100_quantiles', 'RE_1000_quantiles', 'RE_log', 'RE_log_10'],
    framealpha=0.3,
    scatterpoints=1,
    loc='upper left')