def test_threshold(): X, y = iris_data() ax, threshold, count = ecdf(x=X[:, 0], x_label='sepal length (cm)', percentile=0.8) assert threshold == 6.5 assert count == 120
def plot_ecdf(self, result_max): plt.clf() axis = None strategy_icon = ['1', '|', '_', '.', 'o', '2'] for indx, result in enumerate(result_max): ax, _, _ = ecdf(x=result, ecdf_marker=strategy_icon[indx]) axis = ax plt.legend([ 'Fixed 1', 'Fixed 2', 'Fixed 4', 'Fixed 8', 'Fixed 12', 'MARTINI' ], loc='upper center', ncol=3, fancybox=True, bbox_to_anchor=(0.5, 1.15)) plt.xlabel('Time Interval (minutes)') plt.savefig('metric_max_ecdf.pdf', bbox_inches="tight", format='pdf')
# check unique -not repeated- labels/item in the data-set #summary_stats_unique = raw_data_frame.nunique().T # check data types #data_frame_data_types = raw_data_frame.info() ############################################################################### ## EXPLORATORY DATA ANALYSIS (EDA): Empirical Cumulative Density Function (ECDF) # close picture in order to avoid overwriting with previous pics plt.clf() # set ECDF ax, _, _ = ecdf(x=df_A.loc[:, 'RE'], x_label='relative engagement', ecdf_color='green') ax, _, _ = ecdf(x=df_B.loc[:, 'RE'], ecdf_color='red') #ax, _, _ = ecdf(x = test_c, ecdf_color = 'yellow') #ax, _, _ = ecdf(x = merged_df_percentile_2.loc[:, 'percentiles'], ecdf_color = 'red') #ax, _, _ = ecdf(x = merged_df_percentile_3.loc[:, 'percentiles'], ecdf_color = 'blue') #ax, _, _ = ecdf(x = merged_df_percentile.loc[:, 'percentiles'], ecdf_color = 'yellow') handles, labels = ax.get_legend_handles_labels() ax.legend( handles, labels=['test_a', 'test_b'], #labels = ['RE_100_quantiles', 'RE_1000_quantiles', 'RE_log', 'RE_log_10'], framealpha=0.3, scatterpoints=1, loc='upper left')