def train(): #train_data_names = ['p10a','p011','p013','p014','p020','p022','p040','p045','p048'] train_data_names = ['p10a','p011','p013'] valid_data = ['p09b','p023','p035','p038'] test_data = ['p09a','p033'] train_reader = ICHISeqDataReader(train_data_names) #get data divided on sequences with respect to labels train_set_x, train_set_y = train_reader.read_all_for_second_hmm() valid_reader = ICHISeqDataReader(valid_data) valid_set_x, valid_set_y = valid_reader.read_all_for_second_hmm() test_reader = ICHISeqDataReader(test_data) test_set_x, test_set_y = test_reader.read_all_for_second_hmm() datasets = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)] debug_file.write('data is got') rank = 1 base = pow(10, rank) + 1 n_visible_labels = pow(base, 3) trained_HMM = HMM_second(n_visible=n_visible_labels, train_set=(train_set_x, train_set_y), train_patient_list = train_data_names) gc.collect() debug_file.write('Hmm created') debug_file.write('Start validation') validation(HMM = trained_HMM, patient_list = valid_data, valid_set = (valid_set_x, valid_set_y))
def train_all_data(): # train_data_names = ['p10a','p011','p013','p014','p020','p022','p040','p045','p048'] train_data_names = ["p10a"] valid_data = ["p09b", "p023", "p035", "p038"] # valid_data=['p10a'] test_data = ["p09a", "p033"] n_train_patients = len(train_data_names) n_valid_patients = len(valid_data) n_test_patients = len(test_data) rank = 1 start_base = 5 base = pow(start_base, rank) + 1 train_reader = ICHISeqDataReader(train_data_names) # get data divided on sequences with respect to labels train_set_x, train_set_y = train_reader.read_all_for_second_hmm(rank=rank, start_base=start_base) valid_reader = ICHISeqDataReader(valid_data) valid_set_x, valid_set_y = valid_reader.read_all_for_second_hmm(rank=rank, start_base=start_base) test_reader = ICHISeqDataReader(test_data) test_set_x, test_set_y = test_reader.read_all_for_second_hmm(rank=rank, start_base=start_base) print("data is got") n_visible_labels = pow(base, 3) n_hidden = 7 window_size = 1 new_train_set_x, new_train_set_y = change_data_for_ws( dataset=(train_set_x, train_set_y), window_size=window_size, base_for_labels=n_visible_labels, n_patients=n_train_patients, ) new_valid_set_x, new_valid_set_y = change_data_for_ws( dataset=(valid_set_x, valid_set_y), window_size=window_size, base_for_labels=n_visible_labels, n_patients=n_valid_patients, ) new_test_set_x, new_test_set_y = change_data_for_ws( dataset=(test_set_x, test_set_y), window_size=window_size, base_for_labels=n_visible_labels, n_patients=n_test_patients, ) trained_HMM = create_hmm_for_all_data( n_hidden=n_hidden, n_visible=pow(n_visible_labels, window_size), train_set=(new_train_set_x, new_train_set_y), n_patients=n_train_patients, window_size=window_size, ) gc.collect() print("Hmm created") get_error_on_model( model=trained_HMM, n_patients=n_valid_patients, test_set=(new_valid_set_x, new_valid_set_y), window_size=1 )