def test_sda(sda, test_names, rank, start_base, window_size=1, algo='viterbi'):
    test_reader = ICHISeqDataReader(test_names)    
    n_test_patients = len(test_names)
    
    for test_patient in xrange(n_test_patients):
        test_set_x, test_set_y = test_reader.read_one_with_window(
            window_size=window_size,
            divide=False
        )
        test_set_x = test_set_x.eval()
        test_set_y = test_set_y.eval()
        n_test_times = test_set_x.shape[0]
        
        test_visible_after_sda = numpy.array([sda.get_da_output(
                numpy.array(test_set_x[time]).reshape(1, -1))
                for time in xrange(n_test_times)])
                    
        new_test_visible = create_labels_after_das(
            da_output_matrix=test_visible_after_sda,
            rank=rank,
            start_base=start_base
        )
        '''
        n_patient_samples = len(test_set_y)
        half_window_size = int(window_size/2)
        new_test_hidden=test_set_y[half_window_size:n_patient_samples-half_window_size]
        '''
        predicted_states = sda.hmmLayer.define_labels_seq(new_test_visible)
        error_array=errors(predicted_states=numpy.array(predicted_states),
                       actual_states=numpy.array(test_set_y))
                       
        patient_error = error_array.eval().mean()
        
        print(patient_error, ' error for patient ' + str(test_patient))
        gc.collect()  
 def train(self, train_names, valid_names, window_size, rank, start_base):
     train_reader = ICHISeqDataReader(train_names)
     n_train_patients = len(train_names)
     #train hmms on data of each pattient
     for train_patient in xrange(n_train_patients):
         #get data divided on sequences with respect to labels
         train_set = train_reader.read_one_with_window(
             window_size = window_size,
             divide = True
         )
         for i in xrange(self.n_hmms):
             #get (avg, disp) labels for x-values
             x_labels = create_labels(
                 matrix = train_set[i].eval(),
                 rank=rank,
                 start_base=start_base
             )
             self.hmm_models[i].fit([numpy.array(x_labels).reshape(-1, 1)])
                     
         error_cur_epoch = self.validate_model(
             valid_names = valid_names,
             window_size = window_size,
             rank = rank,
             start_base = start_base
         )
         self.valid_error_array.append([])
         self.valid_error_array[-1].append(train_patient)
         self.valid_error_array[-1].append(error_cur_epoch)
         
         gc.collect()
 def validate_model(self, valid_names, window_size, rank, start_base):
     valid_reader = ICHISeqDataReader(valid_names)
     all_valid_x = []
     all_valid_y = []
     for i in xrange (len(valid_names)):
         valid_x, valid_y = valid_reader.read_one_with_window(
             window_size = window_size,
             divide = False
         )
         valid_x = create_labels(
             matrix = valid_x.eval(),
             rank=rank,
             start_base=start_base
         )
         all_valid_x = numpy.concatenate((all_valid_x, valid_x))
         all_valid_y = numpy.concatenate((all_valid_y, valid_y.eval()))
     print(len(all_valid_x), 'x')
     print(len(all_valid_y), 'y')
     #compute mean error value for patients in validation set
     error = mean_error(
         gen_hmm = self,
         obs_seq = all_valid_x,
         actual_states = all_valid_y
     )
     return error
def test_hmm(gen_hmm, test_names, window_size, rank, start_base):
    test_reader = ICHISeqDataReader(test_names)
    n_test_patients = len(test_names)

    for i in xrange(n_test_patients):
        #get data divided on sequences with respect to labels
        test_x, test_y = test_reader.read_one_with_window(
            window_size = window_size,
            divide = False
        )
        test_x = create_labels(
            matrix = test_x.eval(),
            rank=rank,
            window_size = window_size,
            start_base=start_base
        )
        
        #compute mean error value for one patient in test set
        patient_error = mean_error(
            gen_hmm = gen_hmm,
            obs_seq = test_x,
            actual_states = test_y.eval()
        )
        
        print(patient_error, ' error for patient ' + str(test_names[i]))

        gc.collect()  
def train_SdA(train_names, valid_names,
             output_folder, base_folder,
             window_size,
             corruption_levels,
             pretraining_epochs,
             start_base,
             rank,
             pretrain_lr):
    """
    Demonstrates how to train and test a stochastic denoising autoencoder.
    This is demonstrated on ICHI.
    :type pretraining_epochs: int
    :param pretraining_epochs: number of epoch to do pretraining
    :type n_iter: int
    :param n_iter: maximal number of iterations ot run the optimizer
    :type datasets: array
    :param datasets: [train_set, valid_set, test_set]
    
    :type output_folder: string
    :param output_folder: folder for costand error graphics with results
    """

    # compute number of examples given in training set
    n_in = window_size*3  # number of input units
    n_out = 7  # number of output units
    
    # numpy random generator
    # start-snippet-3
    numpy_rng = numpy.random.RandomState(89677)
    print '... building the model'
    # construct the stacked denoising autoencoder class
    sda = SdA(
        numpy_rng=numpy_rng,
        n_ins=n_in,
        hidden_layers_sizes=[window_size*2, window_size],
        n_outs=n_out
    )
    # end-snippet-3 start-snippet-4
        
    #########################
    # PRETRAINING THE MODEL #
    #########################
    
    start_time = timeit.default_timer()
    '''
    pretrained_sda = pretrain_sda_sgd(sda=sda,
                                  train_names=train_names,
                                  window_size=window_size,
                                  pretraining_epochs=pretraining_epochs,
                                  pretrain_lr=pretrain_lr,
                                  corruption_levels=corruption_levels)
    
    '''
    pretrained_sda = pretrain_sda_cg(sda=sda,
                                  train_names=train_names,
                                  window_size=window_size,
                                  pretraining_epochs=pretraining_epochs,
                                  corruption_levels=corruption_levels)
                         
    end_time = timeit.default_timer()
    
    for i in xrange(sda.n_layers):
        print(i, 'i pretrained')
        visualize_pretraining(train_cost=pretrained_sda.dA_layers[i].train_cost_array,
                              window_size=window_size,
                              learning_rate=0,
                              corruption_level=corruption_levels[i],
                              n_hidden=sda.dA_layers[i].n_hidden,
                              da_layer=i,
                              datasets_folder=output_folder,
                              base_folder=base_folder)

    print >> sys.stderr, ('The pretraining code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
    # end-snippet-4
    
    ########################
    # FINETUNING THE MODEL #
    ########################
                      
    #create matrices for params of HMM layer

    n_hiddens=[5]*n_out
    
    #create hmm container
    hmmLayer = GeneralHMM(
        n_hiddens = n_hiddens,
        n_hmms = n_out
    )
    
    #train_hmm        
    train_reader = ICHISeqDataReader(train_names)
    n_train_patients = len(train_names)
    #train hmms on data of each pattient
    for train_patient in xrange(n_train_patients):
        #get data divided on sequences with respect to labels
        train_set = train_reader.read_one_with_window(
            window_size=window_size,
            divide=True
        )
        for i in xrange(hmmLayer.n_hmms):
            cur_train_set = train_set[i].eval()
            if cur_train_set.shape[0] <= 0:
                continue
            print('train_set[i].eval(): ', train_set[i].eval().shape)
            #get (avg, disp) labels for x-values
            train_visible_after_sda = numpy.array([sda.get_da_output(
                numpy.array(cur_train_set[time]).reshape(1, -1))
                for time in xrange(cur_train_set.shape[0])])
                    
            x_labels = create_labels_after_das(
                da_output_matrix = train_visible_after_sda,
                rank=rank,
                start_base=start_base
            )
            hmmLayer.hmm_models[i].fit([numpy.array(x_labels).reshape(-1, 1)])
        
        error_cur_epoch = hmmLayer.validate_model(
            valid_names = valid_names,
            window_size = window_size,
            rank = rank,
            start_base = start_base
        )
        hmmLayer.valid_error_array.append([])
        hmmLayer.valid_error_array[-1].append(train_patient)
        hmmLayer.valid_error_array[-1].append(error_cur_epoch)
            
        gc.collect()
        
    gc.collect()
    print('MultinomialHMM created')
    
    sda.set_hmm_layer(
        hmm_model=hmmLayer
    )
    return sda