def test_sda(sda, test_names, rank, start_base, window_size=1, algo='viterbi'): test_reader = ICHISeqDataReader(test_names) n_test_patients = len(test_names) for test_patient in xrange(n_test_patients): test_set_x, test_set_y = test_reader.read_one_with_window( window_size=window_size, divide=False ) test_set_x = test_set_x.eval() test_set_y = test_set_y.eval() n_test_times = test_set_x.shape[0] test_visible_after_sda = numpy.array([sda.get_da_output( numpy.array(test_set_x[time]).reshape(1, -1)) for time in xrange(n_test_times)]) new_test_visible = create_labels_after_das( da_output_matrix=test_visible_after_sda, rank=rank, start_base=start_base ) ''' n_patient_samples = len(test_set_y) half_window_size = int(window_size/2) new_test_hidden=test_set_y[half_window_size:n_patient_samples-half_window_size] ''' predicted_states = sda.hmmLayer.define_labels_seq(new_test_visible) error_array=errors(predicted_states=numpy.array(predicted_states), actual_states=numpy.array(test_set_y)) patient_error = error_array.eval().mean() print(patient_error, ' error for patient ' + str(test_patient)) gc.collect()
def train(self, train_names, valid_names, window_size, rank, start_base): train_reader = ICHISeqDataReader(train_names) n_train_patients = len(train_names) #train hmms on data of each pattient for train_patient in xrange(n_train_patients): #get data divided on sequences with respect to labels train_set = train_reader.read_one_with_window( window_size = window_size, divide = True ) for i in xrange(self.n_hmms): #get (avg, disp) labels for x-values x_labels = create_labels( matrix = train_set[i].eval(), rank=rank, start_base=start_base ) self.hmm_models[i].fit([numpy.array(x_labels).reshape(-1, 1)]) error_cur_epoch = self.validate_model( valid_names = valid_names, window_size = window_size, rank = rank, start_base = start_base ) self.valid_error_array.append([]) self.valid_error_array[-1].append(train_patient) self.valid_error_array[-1].append(error_cur_epoch) gc.collect()
def validate_model(self, valid_names, window_size, rank, start_base): valid_reader = ICHISeqDataReader(valid_names) all_valid_x = [] all_valid_y = [] for i in xrange (len(valid_names)): valid_x, valid_y = valid_reader.read_one_with_window( window_size = window_size, divide = False ) valid_x = create_labels( matrix = valid_x.eval(), rank=rank, start_base=start_base ) all_valid_x = numpy.concatenate((all_valid_x, valid_x)) all_valid_y = numpy.concatenate((all_valid_y, valid_y.eval())) print(len(all_valid_x), 'x') print(len(all_valid_y), 'y') #compute mean error value for patients in validation set error = mean_error( gen_hmm = self, obs_seq = all_valid_x, actual_states = all_valid_y ) return error
def test_hmm(gen_hmm, test_names, window_size, rank, start_base): test_reader = ICHISeqDataReader(test_names) n_test_patients = len(test_names) for i in xrange(n_test_patients): #get data divided on sequences with respect to labels test_x, test_y = test_reader.read_one_with_window( window_size = window_size, divide = False ) test_x = create_labels( matrix = test_x.eval(), rank=rank, window_size = window_size, start_base=start_base ) #compute mean error value for one patient in test set patient_error = mean_error( gen_hmm = gen_hmm, obs_seq = test_x, actual_states = test_y.eval() ) print(patient_error, ' error for patient ' + str(test_names[i])) gc.collect()
def train_SdA(train_names, valid_names, output_folder, base_folder, window_size, corruption_levels, pretraining_epochs, start_base, rank, pretrain_lr): """ Demonstrates how to train and test a stochastic denoising autoencoder. This is demonstrated on ICHI. :type pretraining_epochs: int :param pretraining_epochs: number of epoch to do pretraining :type n_iter: int :param n_iter: maximal number of iterations ot run the optimizer :type datasets: array :param datasets: [train_set, valid_set, test_set] :type output_folder: string :param output_folder: folder for costand error graphics with results """ # compute number of examples given in training set n_in = window_size*3 # number of input units n_out = 7 # number of output units # numpy random generator # start-snippet-3 numpy_rng = numpy.random.RandomState(89677) print '... building the model' # construct the stacked denoising autoencoder class sda = SdA( numpy_rng=numpy_rng, n_ins=n_in, hidden_layers_sizes=[window_size*2, window_size], n_outs=n_out ) # end-snippet-3 start-snippet-4 ######################### # PRETRAINING THE MODEL # ######################### start_time = timeit.default_timer() ''' pretrained_sda = pretrain_sda_sgd(sda=sda, train_names=train_names, window_size=window_size, pretraining_epochs=pretraining_epochs, pretrain_lr=pretrain_lr, corruption_levels=corruption_levels) ''' pretrained_sda = pretrain_sda_cg(sda=sda, train_names=train_names, window_size=window_size, pretraining_epochs=pretraining_epochs, corruption_levels=corruption_levels) end_time = timeit.default_timer() for i in xrange(sda.n_layers): print(i, 'i pretrained') visualize_pretraining(train_cost=pretrained_sda.dA_layers[i].train_cost_array, window_size=window_size, learning_rate=0, corruption_level=corruption_levels[i], n_hidden=sda.dA_layers[i].n_hidden, da_layer=i, datasets_folder=output_folder, base_folder=base_folder) print >> sys.stderr, ('The pretraining code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) # end-snippet-4 ######################## # FINETUNING THE MODEL # ######################## #create matrices for params of HMM layer n_hiddens=[5]*n_out #create hmm container hmmLayer = GeneralHMM( n_hiddens = n_hiddens, n_hmms = n_out ) #train_hmm train_reader = ICHISeqDataReader(train_names) n_train_patients = len(train_names) #train hmms on data of each pattient for train_patient in xrange(n_train_patients): #get data divided on sequences with respect to labels train_set = train_reader.read_one_with_window( window_size=window_size, divide=True ) for i in xrange(hmmLayer.n_hmms): cur_train_set = train_set[i].eval() if cur_train_set.shape[0] <= 0: continue print('train_set[i].eval(): ', train_set[i].eval().shape) #get (avg, disp) labels for x-values train_visible_after_sda = numpy.array([sda.get_da_output( numpy.array(cur_train_set[time]).reshape(1, -1)) for time in xrange(cur_train_set.shape[0])]) x_labels = create_labels_after_das( da_output_matrix = train_visible_after_sda, rank=rank, start_base=start_base ) hmmLayer.hmm_models[i].fit([numpy.array(x_labels).reshape(-1, 1)]) error_cur_epoch = hmmLayer.validate_model( valid_names = valid_names, window_size = window_size, rank = rank, start_base = start_base ) hmmLayer.valid_error_array.append([]) hmmLayer.valid_error_array[-1].append(train_patient) hmmLayer.valid_error_array[-1].append(error_cur_epoch) gc.collect() gc.collect() print('MultinomialHMM created') sda.set_hmm_layer( hmm_model=hmmLayer ) return sda