def prepare_training(list_of_training_patients): # creates a giant hidden sequence array and train_df from all training patients # create array of hidden state sequence of traning dataset training_class_array = [] # create df of observation sequence of traning dataset train_df = pd.DataFrame() for j in range(list_of_training_patients.shape[0]): path = data_path + str(list_of_training_patients['file_name'][j]) if os.path.isfile(path) == True: data = dp.data_import(path) binary_features = [ "Gain", "Bradycardia", "LegMovement", "CentralApnea", "Arousal", "Hypopnea", "RelativeDesaturation", "Snore", "ObstructiveApnea", "MixedApnea", "LongRR", "Tachycardia" ] for feature in binary_features: if feature in data.columns: data = data.drop(feature, axis=1) df1 = data.pop('hypnogram_User') data['hypnogram_User'] = df1 data_columns, hidden_sequence, observation_sequence, train, test = preprocess_data( data=data) training_class_array.append(hidden_sequence) train_df = train_df.append(train) else: print('File not found.') pass feature_names = data.drop(['hypnogram_User', 'hypnogram_Machine'], axis=1).columns.values.tolist() del data, observation_sequence, test return train_df, training_class_array, feature_names,
dist, state_names = hmm_dist.gauss_kernel_dist(feature_names) print('Observation probabilities prepared') # * initiate HMM * model = pg.NaiveBayes(dist) # * test the model list_of_testing_patients = list_of_testing_patients.reset_index() for k in range(list_of_testing_patients.shape[0]): path = data_path + str(list_of_testing_patients['file_name'][k]) if os.path.isfile(path) == True: patient_data = dp.data_import(path) binary_features = ["Gain", "Bradycardia", "LegMovement", "CentralApnea", "Arousal", "Hypopnea", "RelativeDesaturation", "Snore", "ObstructiveApnea", "MixedApnea", "LongRR", "Tachycardia"] for feature in binary_features: if feature in patient_data.columns: patient_data = patient_data.drop(feature, axis=1) df1 = patient_data.pop('hypnogram_User') patient_data['hypnogram_User'] = df1 n_features = patient_data.shape[1] - 2 data_columns, hidden_sequence, observation_sequence, train1, test = preprocess_data(data=patient_data) test_observation_sequence = train1.iloc[:, 0:n_features].values.tolist() path = model.predict(test_observation_sequence) conf_hmm = metrics.confusion_matrix(hidden_sequence, [state_names[id] for id in path], states)
"experimenty/list_of_patients_with_attributes.csv") #LEARN MODEL ON FIRST TRAINING SET PATIENT score = [] print("zacal") for i in range(0, len(list_of_patients['file_name'])): list_of_testing_patients = list_of_patients.iloc[[i]] list_of_testing_patients = list_of_testing_patients.reset_index() list_of_training_patients = list_of_patients.drop([i], axis=0) list_of_training_patients = list_of_training_patients.reset_index() print(".") # PREPROCESS TESTING FILE testing_patient_path = "Data/" + str( list_of_testing_patients['file_name'][0]) testing_patient_data = dp.data_import(testing_patient_path) binary_features = [ "Gain", "Bradycardia", "LegMovement", "CentralApnea", "Arousal", "Hypopnea", "RelativeDesaturation", "Snore", "ObstructiveApnea", "MixedApnea", "LongRR", "Tachycardia" ] for feature in binary_features: if feature in testing_patient_data.columns: testing_patient_data = testing_patient_data.drop(feature, axis=1) df1 = testing_patient_data.pop('hypnogram_User') testing_patient_data['hypnogram_User'] = df1 testing_patient_data = testing_patient_data.drop(['hypnogram_Machine'], axis=1) test_observation_sequence = testing_patient_data.iloc[:, :-1].values test_hidden_sequence = testing_patient_data.iloc[:, -1].values
import os import pandas as pd import hmm as myhmm import matplotlib.pyplot as plt import data_preprocessing as dp #hypnogram of average person average_person = dp.data_import( '/Users/kristina/PycharmProjects/vyskumak/Data/12.10.2016-Z-M-39let.csv') average_person['time'] = [(0.5 / 60 * i) for i in range(0, 973)] cleanup_nums = { "hypnogram_User": { "Wake": 5, "REM": 4, "NonREM1": 3, "NonREM2": 2, "NonREM3": 1 } } average_person.replace(cleanup_nums, inplace=True) plt.plot(average_person['time'], average_person['hypnogram_User']) y = [5, 4, 3, 2, 1] labels = ["Wake", "REM", "NonREM1", "NonREM2", "NonREM3"] plt.yticks(y, labels) plt.xlabel('Measurement length [hours]') plt.ylabel('Sleep stage') plt.title('Hypnogram of average person from dataset Hradec Kralove') plt.show() #age histogram plt.hist(males['age'], 10, color='grey')
for file in os.listdir(directory): try: filename = os.fsdecode(file) if filename.endswith(".csv") and forbidden_ps1[ forbidden_ps1['Pacient'].str.contains( filename[:-4])].empty == True: path = str(directory)[2:-1] + "/" + str(filename) y_pred = pd.read_csv(path, delim_whitespace=True, header=0) preds.extend(y_pred['0']) path1 = hs_dir + str(filename) y = dp.data_import(path1) for i in reversed(range(0, len(y['hypnogram_User']))): if y['hypnogram_User'][i] == "NotScored": y = y.drop([i]) hs.extend(y['hypnogram_User']) if len(y_pred['0']) != len(y['hypnogram_User']): print( str(filename) + ' ' + str(len(y_pred['0'])) + ' ' + str(len(y['hypnogram_User']))) except: pass difs = [0 if preds[i] != hs[i] else 1 for i in range(0, len(hs))] dif = pd.DataFrame({'difs': difs})