def test_all_params(): learning_rates = [0.0001] window_sizes = [13] train_data = ['p10a','p011','p013','p014','p020','p022','p040','p045','p048'] valid_data = ['p09b','p023','p035','p038'] test_data = ['p09a','p033'] train_reader = ICHISeqDataReader(train_data) train_set_x, train_set_y = train_reader.read_all() valid_reader = ICHISeqDataReader(valid_data) valid_set_x, valid_set_y = valid_reader.read_all() test_reader = ICHISeqDataReader(test_data) test_set_x, test_set_y = test_reader.read_all() datasets = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)] output_folder=('[%s], [%s], [%s]')%(",".join(train_data), ",".join(valid_data), ",".join(test_data)) for lr in learning_rates: for ws in window_sizes: test_params(learning_rate=lr, n_epochs=1, window_size = ws, datasets=datasets, output_folder=output_folder, base_folder='regression_plots')
def test_da_params(corruption_level): learning_rates = [0.001, 0.003, 0.005, 0.007, 0.009, 0.011, 0.013, 0.015] window_sizes = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] train_data = ['p10a','p011','p013','p014','p020','p022','p040','p045','p048'] valid_data = ['p09b','p023','p035','p038'] test_data = ['p09a','p033'] train_reader = ICHISeqDataReader(train_data) train_set, train_labels = train_reader.read_all() valid_reader = ICHISeqDataReader(valid_data) valid_set, valid_labels = valid_reader.read_all() test_reader = ICHISeqDataReader(test_data) test_set, test_labels = test_reader.read_all() output_folder=('[%s], [%s], [%s]')%(",".join(train_data), ",".join(valid_data), ",".join(test_data)) for lr in learning_rates: for ws in window_sizes: train_dA(learning_rate=lr, training_epochs=1, window_size = ws, corruption_level=corruption_level, n_hidden=ws*2, train_set=train_set, output_folder=output_folder, base_folder='dA_plots')
def test_all_params(): window_sizes = [13] train_data = ['p10a','p011','p013','p014','p020','p022','p040','p045','p048'] valid_data = ['p09b','p023','p035','p038'] test_data = ['p09a','p033'] train_reader = ICHISeqDataReader(train_data) train_set_x, train_set_y = train_reader.read_all() valid_reader = ICHISeqDataReader(valid_data) valid_set_x, valid_set_y = valid_reader.read_all() test_reader = ICHISeqDataReader(test_data) test_set_x, test_set_y = test_reader.read_all() datasets = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)] output_folder=('[%s], [%s], [%s]')%(",".join(train_data), ",".join(valid_data), ",".join(test_data)) for ws in window_sizes: test_SdA(datasets=datasets, output_folder=output_folder, base_folder='SdA_cg_plots', window_size=ws, pretraining_epochs=100, training_epochs=1000)
def test_sda(sda, test_names, base, window_size=1, algo='viterbi'): test_reader = ICHISeqDataReader(test_names) test_set_x, test_set_y = test_reader.read_all() n_test_patients = len(test_names) for test_patient in xrange(n_test_patients): #get data divided on sequences with respect to labels test_set_x, test_set_y = test_reader.read_next_doc() test_x_array = test_set_x.get_value() n_test_times = test_x_array.shape[0] - window_size + 1 test_visible_after_sda = numpy.array([sda.get_da_output( test_x_array[time: time+window_size]).ravel() for time in xrange(n_test_times)]).ravel() new_test_visible, new_test_hidden = change_data_for_one_patient( hiddens_patient=test_set_y.eval(), visibles_patient=test_visible_after_sda, window_size=sda.da_layers_output_size, base_for_labels=base ) patient_error = get_error_on_patient( model=sda.hmmLayer, visible_set=new_test_visible, hidden_set=new_test_hidden, algo=algo ) print(patient_error, ' error for patient ' + str(test_patient)) gc.collect()
def test_all_params(): window_sizes = [1] train_data = ['p10a','p011','p013','p014','p020','p022','p040','p045','p048'] valid_data = ['p09b','p023','p035','p038'] test_data = ['p09a','p033'] train_reader = ICHISeqDataReader(train_data) train_set_x, train_set_y = train_reader.read_all() valid_reader = ICHISeqDataReader(valid_data) valid_set_x, valid_set_y = valid_reader.read_all() test_reader = ICHISeqDataReader(test_data) test_set_x, test_set_y = test_reader.read_all() datasets = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)] output_folder=('[%s], [%s], [%s]')%(",".join(train_data), ",".join(valid_data), ",".join(test_data)) corruption_levels = [.1, .2] pretrain_lr=.03 rank = 1 start_base=5 base = pow(start_base, rank) + 1 for ws in window_sizes: trained_sda = train_SdA( datasets=datasets, train_names=train_data, output_folder=output_folder, base_folder='SdA_second_hmm_without_window', window_size=ws, corruption_levels=corruption_levels, pretrain_lr=pretrain_lr, base=base, pretraining_epochs=15 ) test_sda(sda=trained_sda, test_names=test_data, base = base )
def test_all_params(): window_sizes = [10] # train_data = ['p10a','p011','p013','p014','p020','p022','p040','p045','p048'] train_data = ["p10a"] valid_data = ["p09b", "p023", "p035", "p038"] test_data = ["p09a", "p033"] train_reader = ICHISeqDataReader(train_data) train_set_x, train_set_y = train_reader.read_all() valid_reader = ICHISeqDataReader(valid_data) valid_set_x, valid_set_y = valid_reader.read_all() test_reader = ICHISeqDataReader(test_data) test_set_x, test_set_y = test_reader.read_all() datasets = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)] output_folder = ("[%s], [%s], [%s]") % (",".join(train_data), ",".join(valid_data), ",".join(test_data)) corruption_levels = [0.1, 0.2] pretrain_lr = 0.03 finetune_lr = 0.03 for ws in window_sizes: test_SdA( datasets=datasets, output_folder=output_folder, base_folder="SdA_sgd_cg_plots", window_size=ws, corruption_levels=corruption_levels, pretrain_lr=pretrain_lr, finetune_lr=finetune_lr, pretraining_epochs=1, training_epochs=1, )
def test_da_params(corruption_level): window_sizes = [13, 30, 50, 75, 100] train_data = ['p10a','p011','p013','p014','p020','p022','p040','p045','p048'] train_reader = ICHISeqDataReader(train_data) train_set, train_labels = train_reader.read_all() output_folder=('[%s]')%(",".join(train_data)) for ws in window_sizes: train_dA(training_epochs=1, window_size = ws, corruption_level=corruption_level, n_hidden=ws*2, dataset=train_set, output_folder=output_folder, base_folder='dA_cg_plots')
def test_sda(sda, test_names, rank, start_base, window_size=1, algo='viterbi'): test_reader = ICHISeqDataReader(test_names) test_set_x, test_set_y = test_reader.read_all() n_test_patients = len(test_names) for test_patient in xrange(n_test_patients): #get data divided on sequences with respect to labels test_set_x, test_set_y = test_reader.read_next_doc() test_set_x = test_set_x.get_value() test_set_y = test_set_y.eval() n_test_times = test_set_x.shape[0] - window_size test_visible_after_sda = numpy.array([sda.get_da_output( test_set_x[time: time+window_size]).ravel() for time in xrange(n_test_times)]) new_test_visible = create_labels_after_das( da_output_matrix=test_visible_after_sda, rank=rank, start_base=start_base, window_size=window_size ) n_patient_samples = len(test_set_y) half_window_size = int(window_size/2) new_test_hidden=test_set_y[half_window_size:n_patient_samples-half_window_size] patient_error = get_error_on_patient( model=sda.hmmLayer, visible_set=new_test_visible, hidden_set=new_test_hidden, algo=algo ) print(patient_error, ' error for patient ' + str(test_patient)) gc.collect()