def data_process(): # data = sio.loadmat('planetary_time_signal.mat') # x_data = data['planetary_feature'] # y_data = data['planetary_feature_target'] data = sio.loadmat('dataset.mat') x_data = data['f_data0'] y_data = data['f_label'] #x_data=norm_ZS(x_data) #balanced dataset x_train_b, x_test_b, y_train_b, y_test_b = train_test_split(x_data, y_data, test_size=0.2) #imbalanced dataset # imbalanced_dict = {0: 50, 1: 20, 2:20, 3:20, 4:5, 5:5, 6:5, 7:2} imbalanced_dict = {0: 50, 1: 30, 2: 30, 3: 30, 4: 15, 5: 15, 6: 15, 7: 10} # imbalanced_dict = {0: 50, 1: 10, 2:10, 3:10, 4:3, 5:3, 6:3, 7:1} x_train_im, y_train_im, x_test_im, y_test_im, imbalanced_dict_1 = imbalanced_data( x_data, y_data, imbalanced_dict, refresh=False, seed=1) #x_train_i, y_train_i = shuffle_data(x_train_im, y_train_im) #x_test_i, y_test_i = shuffle_data(x_test_im, y_test_im) #np.savetxt("y_train_i.txt", y_train_i) #y_train_i = to_categorical(y_train_i) #y_test_i = to_categorical(y_test_i) #%%computer class weight #sklearn class_weight #multi_class_weight=calculate_class_weigh(y_train) #own design class_weight multi_class_weight = create_class_weight(imbalanced_dict_1) split = 0.6 multi_sample_weight, ir_overall = creat_sample_weight( imbalanced_dict_1, multi_class_weight, split) x_train_i, y_train_i = shuffle_data(x_train_im, y_train_im) xx_train, train_weight = shuffle_data(x_train_im, multi_sample_weight) train_weight = train_weight.reshape((len(train_weight), 1)) x_test_i, y_test_i = shuffle_data(x_test_im, y_test_im) np.savetxt("y_train_i.txt", y_train_i) y_train_i = to_categorical(y_train_i) y_test_i = to_categorical(y_test_i) x_test_b, y_test_b = shuffle_data(x_test_b, y_test_b) # x_train_i = x_train_i[:,0:1920] # x_test_i = x_test_i[:,0:1920] return x_train_i, y_train_i, x_test_b, y_test_b, train_weight, ir_overall
def data_process(): # data = sio.loadmat('planetary_time_signal.mat') # x_data = data['planetary_feature'] # y_data = data['planetary_feature_target'] data = sio.loadmat('dataset.mat') x_data = data['f_data0'] y_data = data['f_label'] #x_data=norm_ZS(x_data) #balanced dataset x_train_b, x_test_b, y_train_b, y_test_b = train_test_split(x_data, y_data, test_size=0.2) #imbalanced dataset imbalanced_dict = {0: 50, 1: 20, 2: 20, 3: 20, 4: 5, 5: 5, 6: 5, 7: 2} # imbalanced_dict = {0: 50, 1: 30, 2:30, 3:30, 4:15, 5:15, 6:15, 7:10} # imbalanced_dict = {0: 50, 1: 10, 2:10, 3:10, 4:3, 5:3, 6:3, 7:1} x_train_im, y_train_im, x_test_im, y_test_im, imbalanced_dict_1 = imbalanced_data( x_data, y_data, imbalanced_dict, refresh=False, seed=1) #x_train_i, y_train_i = shuffle_data(x_train_im, y_train_im) #x_test_i, y_test_i = shuffle_data(x_test_im, y_test_im) #np.savetxt("y_train_i.txt", y_train_i) #y_train_i = to_categorical(y_train_i) #y_test_i = to_categorical(y_test_i) smote_tomek = SMOTETomek(random_state=0) x_train_im, y_train_im = smote_tomek.fit_sample(x_train_im, y_train_im) x_train_i, y_train_i = shuffle_data(x_train_im, y_train_im) y_train_i = to_categorical(y_train_i) x_test_b, y_test_b = shuffle_data(x_test_b, y_test_b) # x_train_i = x_train_i[:,0:1920] # x_test_i = x_test_i[:,0:1920] return x_train_i, y_train_i, x_test_b, y_test_b