def calc_pairs(self,labels,data): """ Calculate the ustatistic for each operation and every label pairing Parameters: ----------- labels : ndarray 1-D array containing the labels for each row in data. data : ndarray Array containing the data. Each row corresponds to a timeseries and each column to an operation. Returns: -------- ranks : ndarray Returns the scaled U statistic for each label pairing and each operation. """ ranks,ustat_norm = fistat.u_stat_all_label(data,labels=labels)[0:2] return ranks/ustat_norm[:,np.newaxis]
def calc_pairs(self, labels, data): """ Calculate the ustatistic for each operation and every label pairing Parameters: ----------- labels : ndarray 1-D array containing the labels for each row in data. data : ndarray Array containing the data. Each row corresponds to a timeseries and each column to an operation. Returns: -------- ranks : ndarray Returns the scaled U statistic for each label pairing and each operation. """ ranks, ustat_norm = fistat.u_stat_all_label(data, labels=labels)[0:2] return ranks / ustat_norm[:, np.newaxis]
if COMPUTE_COMPLETE_DATA: data_all,op_id_good = fap.cat_data_from_matfile_root(mat_file_paths, count_op_id_min,is_from_old_matlab = IS_FROM_OLD_MATLAB, data_all_good_op_path = data_all_good_op_path,op_id_good_path = op_id_good_path,is_return_masked = False) # -- Create masked array from data_all # data_all = np.ma.masked_invalid(data_all) # --------------------------------------------------------------------------------- # -- Calculate U_statistics for the problems # --------------------------------------------------------------------------------- if CALCULATE_U_STATS: # -- skip problems with already calculated U-stats if CALCULATE_ONLY_NEW_U_STATS: task_names = tstat.filter_calculated(mat_file_root,HCTSA_name_search_pattern = 'HCTSA_(.*)_N_70_100_reduced.mat') file_paths = [mat_file_root+"HCTSA_{0:s}_N_70_100_reduced.mat".format(s) for s in task_names] # -- calculate U-stats for all problems else: file_paths = mat_file_paths _,task_names = tstat.get_calculated_names(mat_file_root,HCTSA_name_search_pattern = 'HCTSA_(.*)_N_70_100_reduced.mat') u_stat_file_paths = tstat.calculate_ustat_mult_tasks(mat_file_paths,task_names,ustat_data_out_folder,is_from_old_matlab = IS_FROM_OLD_MATLAB) if CALCULATE_U_STATS_ALL_CLASSES_AVG: all_classes_avg = tstat.calculate_ustat_avg_mult_task(mat_file_paths,u_stat_file_paths,all_classes_avg_out_path ,is_from_old_matlab = IS_FROM_OLD_MATLAB) # ---------------------------------------------------------------------------------
def calc_pairs(self, labels, data): ranks, ustat_norm = fistat.u_stat_all_label(data, labels=labels)[0:2] return ranks / ustat_norm[:, np.newaxis]
# -- Calculate the average min (for each label pair separately) score for every problem if False: avg_min_u_score = np.ones(problem_paths.shape[0]) * np.NaN ustat_paths = np.array(glob.glob(intermediate_data_root + "/*_ustat.npy")) reg_ex = re.compile("../data/(.*)_ustat.npy") ustat_names = np.array([reg_ex.match(ustat_path).group(1) for ustat_path in ustat_paths]) # -- sort ustat paths to match the problem_paths ustat_sort_ind = hlp.ismember(problem_names, ustat_names) ustat_paths = ustat_paths[ustat_sort_ind] ustat_names = ustat_names[ustat_sort_ind] for i, (ustat_path, mat_file_path) in enumerate(zip(ustat_paths, problem_paths)): ustat = np.load(ustat_path) # -- calculate the scaling factor for every label pairing of the current classification problem u_scale = testst.u_stat_norm_factor(mat_file_path, is_from_old_matlab="True") print ustat_path avg_min_u_score[i] = (np.min(ustat, axis=1) / u_scale).mean() np.save(avg_min_u_score_path, avg_min_u_score) else: avg_min_u_score = np.load(avg_min_u_score_path) # -- average minimum (for each class pair) U-score for top features ax_measures10.plot(x_loc, avg_min_u_score[porblem_sort_ind], marker="o", label="avg. min. U-score all") ax_measures10.legend(loc=2, fontsize="small", labelspacing=0.1) ax_measures10.set_ylabel("u-score") # ax_measures1.plot(x_loc,(~all_classes_avg_masked_sort.mask).sum(axis=1)) # plt.savefig('/home/philip/Desktop/tmp/figure_tmp/u_stat_array.png') # plt.savefig('/home/philip/Desktop/tmp/figure_tmp/u_stat_array_z_column.png') plt.show()
if False: avg_min_u_score = np.ones(problem_paths.shape[0]) * np.NaN ustat_paths = np.array(glob.glob(intermediate_data_root + '/*_ustat.npy')) reg_ex = re.compile('../data/(.*)_ustat.npy') ustat_names = np.array( [reg_ex.match(ustat_path).group(1) for ustat_path in ustat_paths]) # -- sort ustat paths to match the problem_paths ustat_sort_ind = hlp.ismember(problem_names, ustat_names) ustat_paths = ustat_paths[ustat_sort_ind] ustat_names = ustat_names[ustat_sort_ind] for i, (ustat_path, mat_file_path) in enumerate(zip(ustat_paths, problem_paths)): ustat = np.load(ustat_path) # -- calculate the scaling factor for every label pairing of the current classification problem u_scale = testst.u_stat_norm_factor(mat_file_path, is_from_old_matlab='True') print ustat_path avg_min_u_score[i] = (np.min(ustat, axis=1) / u_scale).mean() np.save(avg_min_u_score_path, avg_min_u_score) else: avg_min_u_score = np.load(avg_min_u_score_path) # -- average minimum (for each class pair) U-score for top features ax_measures10.plot(x_loc, avg_min_u_score[porblem_sort_ind], marker='o', label='avg. min. U-score all') ax_measures10.legend(loc=2, fontsize='small', labelspacing=.1) ax_measures10.set_ylabel('u-score') # ax_measures1.plot(x_loc,(~all_classes_avg_masked_sort.mask).sum(axis=1)) #plt.savefig('/home/philip/Desktop/tmp/figure_tmp/u_stat_array.png')
data_all_good_op_path=data_all_good_op_path, op_id_good_path=op_id_good_path, is_return_masked=False) # -- Create masked array from data_all # data_all = np.ma.masked_invalid(data_all) # --------------------------------------------------------------------------------- # -- Calculate U_statistics for the problems # --------------------------------------------------------------------------------- if CALCULATE_U_STATS: # -- skip problems with already calculated U-stats if CALCULATE_ONLY_NEW_U_STATS: task_names = tstat.filter_calculated( mat_file_root, HCTSA_name_search_pattern='HCTSA_(.*)_N_70_100_reduced.mat') file_paths = [ mat_file_root + "HCTSA_{0:s}_N_70_100_reduced.mat".format(s) for s in task_names ] # -- calculate U-stats for all problems else: file_paths = mat_file_paths _, task_names = tstat.get_calculated_names( mat_file_root, HCTSA_name_search_pattern='HCTSA_(.*)_N_70_100_reduced.mat') u_stat_file_paths = tstat.calculate_ustat_mult_tasks( mat_file_paths,