def initData(): initialization = InitDataSet() doas = initialization.get_dataset_as_doas() encoding = Encoding('./../../data_to_be_saved/alphabet_3.txt') # SAU 1 SAU 2 # 1 th mark_bursts_regions_one_threshold(doas) # diff th # mark_bursts_regions(doas) # remove_bursted_trials_when_segment(doas) # doas_train, doas_test, ind_test = train_test_doa(doas, 0.2) # sa imi fac propriul test and train doas_train, doas_test, ind_test = train_test_doa_check_trials(doas, 0.2) train_data = ExtractData(doas_train, [channel], ['light', 'medium', 'deep'], [segment], ['all']) test_data = ExtractData(doas_test, [channel], ['light', 'medium', 'deep'], [segment], ['all']) # doar fft features X_train, y_train = obtain_TESPAR_A_FFT_features(train_data) x_test, y_test = obtain_TESPAR_A_FFT_features(test_data) return X_train, y_train, x_test, y_test
def get_data(self): # initialization = InitDataSet() # doas = initialization.get_dataset_as_doas() # dataset, channels, levels, segment, orientation # split_data = SplitData(self.doas, self.channels, self.level, self.segment, self.orientation) split_data = ExtractData(self.doas, self.channels, self.level, self.segment, self.orientation) self.X = [] self.X_validate = [] for i in range(len(split_data.result.arrays)): for j in range(len(split_data.result.arrays[i].array_data)): self.X.append(split_data.result.arrays[i].array_data[j]) for j in range(len(split_data.result.arrays[i].array_validate)): self.X_validate.append( split_data.result.arrays[i].array_validate[j]) return self.X, self.X_validate
for run in range(run_nr): print('************************RUN ' + str(run) + '************************') # firstly split the input into train test doas_train, doas_test, ind_test = train_test_doa(doas, 0.2) np.savetxt(write_file, np.array(ind_test), fmt="%s", newline=' ') write_file.write('\n') for ind_segment, segment in enumerate(segments): for channel in range(len(all_channels)): print("start running for channel " + str(all_channels[channel]) + ' ' + segment + '\n') # SplitData(self, doas, channels, levels, segment, orientation): train_data = ExtractData(doas_train, [all_channels[channel]], ['light', 'deep'], [segment], ['all']) test_data = ExtractData(doas_test, [all_channels[channel]], ['light', 'deep'], [segment], ['all']) X_train, y_train = obtain_features_labels(train_data, encoding) x_test, y_test = obtain_features_labels(test_data, encoding) model = DecisionTreeClassifier(random_state=99, criterion='gini', max_depth=2) model.fit(X_train, y_train) predictions = model.predict(x_test) report = classification_report(y_test, predictions, output_dict=True)
good_channels_spont = [1, 3, 5, 12, 14] run_nr = 5 channel = 2 segment = 'stimulus' print('test for overfitting RF 0.2') for run in range(run_nr): # firstly split the input into train test doas_train, doas_test = train_test_doa_remake_balanced(doas) # print() train_data = ExtractData(doas_train, [2], ['light', 'medium', 'deep'], ['spontaneous', 'stimulus'], ['all']) test_data = ExtractData(doas_test, [2], ['light', 'medium', 'deep'], ['spontaneous', 'stimulus'], ['all']) X_train, y_train = obtain_concatenate_segments_fft(train_data) x_test, y_test = obtain_concatenate_segments_fft(test_data) # classify with parameters tunning model = RandomForestClassifier(n_estimators=5000, max_depth=5, min_samples_split=5, min_samples_leaf=10) # model = RandomForestClassifier() model.fit(X_train, y_train) predictions_train = model.predict(X_train) predictions_test = model.predict(x_test)
f1scores = [[[] for i in range(channels_range - 1)] for j in range(len(segments))] for run in range(run_nr): # firstly split the input into train test doas_train, doas_test = train_test_doa_remake_balanced(doas) # np.savetxt(write_file, np.array(ind_test), fmt="%s", newline=' ') # write_file.write('\n') for ind_segment, segment in enumerate(segments): for channel in range(len(all_channels)): print("start running for channel " + str(channel) + '\n') # SplitData(self, doas, channels, levels, segment, orientation): train_data = ExtractData(doas_train, [all_channels[channel]], ['medium', 'deep'], ['spontaneous', 'stimulus'], ['all']) test_data = ExtractData(doas_test, [all_channels[channel]], ['medium', 'deep'], ['spontaneous', 'stimulus'], ['all']) X_train, y_train = obtain_concatenate_segments_fft(train_data) x_test, y_test = obtain_concatenate_segments_fft(test_data) model = RandomForestClassifier(n_estimators=5000, max_depth=5, min_samples_split=5, min_samples_leaf=10) model.fit(X_train, y_train) predictions = model.predict(x_test)
2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32 ] encoding = Encoding('./../../data_to_be_saved/alphabet_3.txt') data_dir = os.path.join('../..', '..') initialization = InitDataSet() doas = initialization.get_dataset_as_doas() mark_bursts_regions(doas) for segment in segments: for channel in all_channels: print("start running for channel " + str(channel) + ' ' + segment) data = ExtractData(doas, [channel], ['light', 'medium', 'deep'], [segment], ['all']) X, y = obtain_TESPAR_A_FFT_features(data, encoding) model = RandomForestClassifier(n_estimators=5000, max_depth=5, min_samples_split=5, min_samples_leaf=10) skf = StratifiedKFold(n_splits=10) skf.get_n_splits(X, y) results = cross_validate(model, X, y, scoring=['accuracy', 'f1_weighted'],