def test_all_params():
    learning_rates = [0.0001]
    window_sizes = [13]
    
    train_data = ['p10a','p011','p013','p014','p020','p022','p040','p045','p048']
    valid_data = ['p09b','p023','p035','p038']
    test_data = ['p09a','p033']
    
    train_reader = ICHISeqDataReader(train_data)
    train_set_x, train_set_y = train_reader.read_all()
    
    valid_reader = ICHISeqDataReader(valid_data)
    valid_set_x, valid_set_y = valid_reader.read_all()

    test_reader = ICHISeqDataReader(test_data)
    test_set_x, test_set_y = test_reader.read_all()
    
    datasets = [(train_set_x, train_set_y), (valid_set_x, valid_set_y),
            (test_set_x, test_set_y)]
            
    output_folder=('[%s], [%s], [%s]')%(",".join(train_data), ",".join(valid_data), ",".join(test_data))

    for lr in learning_rates:
        for ws in window_sizes:
            test_params(learning_rate=lr,
                        n_epochs=1,
                        window_size = ws,
                        datasets=datasets,
                        output_folder=output_folder,
                        base_folder='regression_plots')
Пример #2
0
def test_da_params(corruption_level):
    learning_rates = [0.001, 0.003, 0.005, 0.007, 0.009, 0.011, 0.013, 0.015]
    window_sizes = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
    
    train_data = ['p10a','p011','p013','p014','p020','p022','p040','p045','p048']
    valid_data = ['p09b','p023','p035','p038']
    test_data = ['p09a','p033']
    
    train_reader = ICHISeqDataReader(train_data)
    train_set, train_labels = train_reader.read_all()
    
    valid_reader = ICHISeqDataReader(valid_data)
    valid_set, valid_labels = valid_reader.read_all()

    test_reader = ICHISeqDataReader(test_data)
    test_set, test_labels = test_reader.read_all()
    
    output_folder=('[%s], [%s], [%s]')%(",".join(train_data), ",".join(valid_data), ",".join(test_data))
    
    for lr in learning_rates:
        for ws in window_sizes:
            train_dA(learning_rate=lr,
                     training_epochs=1,
                     window_size = ws, 
                     corruption_level=corruption_level,
                     n_hidden=ws*2,
                     train_set=train_set,
                     output_folder=output_folder,
                     base_folder='dA_plots')
Пример #3
0
def test_all_params():
    window_sizes = [13]
    
    train_data = ['p10a','p011','p013','p014','p020','p022','p040','p045','p048']
    valid_data = ['p09b','p023','p035','p038']
    test_data = ['p09a','p033']
    
    train_reader = ICHISeqDataReader(train_data)
    train_set_x, train_set_y = train_reader.read_all()
    
    valid_reader = ICHISeqDataReader(valid_data)
    valid_set_x, valid_set_y = valid_reader.read_all()

    test_reader = ICHISeqDataReader(test_data)
    test_set_x, test_set_y = test_reader.read_all()
    
    datasets = [(train_set_x, train_set_y), (valid_set_x, valid_set_y),
            (test_set_x, test_set_y)]

    output_folder=('[%s], [%s], [%s]')%(",".join(train_data), ",".join(valid_data), ",".join(test_data))
    
    for ws in window_sizes:
        test_SdA(datasets=datasets,
                 output_folder=output_folder,
                 base_folder='SdA_cg_plots',
                 window_size=ws,
                 pretraining_epochs=100,
                 training_epochs=1000)
def test_sda(sda, test_names, base, window_size=1, algo='viterbi'):
    test_reader = ICHISeqDataReader(test_names)
    test_set_x, test_set_y = test_reader.read_all()
    
    n_test_patients = len(test_names)
    
    for test_patient in xrange(n_test_patients):
        #get data divided on sequences with respect to labels
        test_set_x, test_set_y = test_reader.read_next_doc()
        test_x_array = test_set_x.get_value()
        n_test_times = test_x_array.shape[0] - window_size + 1
        test_visible_after_sda = numpy.array([sda.get_da_output(
                test_x_array[time: time+window_size]).ravel()
                for time in xrange(n_test_times)]).ravel()
                            
        new_test_visible, new_test_hidden = change_data_for_one_patient(
            hiddens_patient=test_set_y.eval(),
            visibles_patient=test_visible_after_sda,
            window_size=sda.da_layers_output_size,
            base_for_labels=base
        )
        
        patient_error = get_error_on_patient(
            model=sda.hmmLayer,
            visible_set=new_test_visible,
            hidden_set=new_test_hidden,
            algo=algo
        )
        
        print(patient_error, ' error for patient ' + str(test_patient))
        gc.collect()
def test_all_params():
    window_sizes = [1]
    
    train_data = ['p10a','p011','p013','p014','p020','p022','p040','p045','p048']
    valid_data = ['p09b','p023','p035','p038']
    test_data = ['p09a','p033']
    
    train_reader = ICHISeqDataReader(train_data)
    train_set_x, train_set_y = train_reader.read_all()
    
    valid_reader = ICHISeqDataReader(valid_data)
    valid_set_x, valid_set_y = valid_reader.read_all()

    test_reader = ICHISeqDataReader(test_data)
    test_set_x, test_set_y = test_reader.read_all()
    
    datasets = [(train_set_x, train_set_y), (valid_set_x, valid_set_y),
            (test_set_x, test_set_y)]

    output_folder=('[%s], [%s], [%s]')%(",".join(train_data), ",".join(valid_data), ",".join(test_data))
    corruption_levels = [.1, .2]
    pretrain_lr=.03
    
    rank = 1
    start_base=5
    base = pow(start_base, rank) + 1
    
    for ws in window_sizes:
        trained_sda = train_SdA(
                 datasets=datasets,
                 train_names=train_data,
                 output_folder=output_folder,
                 base_folder='SdA_second_hmm_without_window',
                 window_size=ws,
                 corruption_levels=corruption_levels,
                 pretrain_lr=pretrain_lr,
                 base=base,
                 pretraining_epochs=15
        )
        test_sda(sda=trained_sda,
                 test_names=test_data,
                 base = base
        )
Пример #6
0
def test_all_params():
    window_sizes = [10]

    # train_data = ['p10a','p011','p013','p014','p020','p022','p040','p045','p048']
    train_data = ["p10a"]
    valid_data = ["p09b", "p023", "p035", "p038"]
    test_data = ["p09a", "p033"]

    train_reader = ICHISeqDataReader(train_data)
    train_set_x, train_set_y = train_reader.read_all()

    valid_reader = ICHISeqDataReader(valid_data)
    valid_set_x, valid_set_y = valid_reader.read_all()

    test_reader = ICHISeqDataReader(test_data)
    test_set_x, test_set_y = test_reader.read_all()

    datasets = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)]

    output_folder = ("[%s], [%s], [%s]") % (",".join(train_data), ",".join(valid_data), ",".join(test_data))
    corruption_levels = [0.1, 0.2]
    pretrain_lr = 0.03
    finetune_lr = 0.03

    for ws in window_sizes:
        test_SdA(
            datasets=datasets,
            output_folder=output_folder,
            base_folder="SdA_sgd_cg_plots",
            window_size=ws,
            corruption_levels=corruption_levels,
            pretrain_lr=pretrain_lr,
            finetune_lr=finetune_lr,
            pretraining_epochs=1,
            training_epochs=1,
        )
Пример #7
0
def test_da_params(corruption_level):
    window_sizes = [13, 30, 50, 75, 100]
    
    train_data = ['p10a','p011','p013','p014','p020','p022','p040','p045','p048']
    
    train_reader = ICHISeqDataReader(train_data)
    train_set, train_labels = train_reader.read_all()
    
    output_folder=('[%s]')%(",".join(train_data))
    
    for ws in window_sizes:
        train_dA(training_epochs=1,
                 window_size = ws, 
                 corruption_level=corruption_level,
                 n_hidden=ws*2,
                 dataset=train_set,
                 output_folder=output_folder,
                 base_folder='dA_cg_plots')
def test_sda(sda, test_names, rank, start_base, window_size=1, algo='viterbi'):
    test_reader = ICHISeqDataReader(test_names)
    test_set_x, test_set_y = test_reader.read_all()
    
    n_test_patients = len(test_names)
    
    for test_patient in xrange(n_test_patients):
        #get data divided on sequences with respect to labels
        test_set_x, test_set_y = test_reader.read_next_doc()
        test_set_x = test_set_x.get_value()
        test_set_y = test_set_y.eval()
        n_test_times = test_set_x.shape[0] - window_size
        
        test_visible_after_sda = numpy.array([sda.get_da_output(
                test_set_x[time: time+window_size]).ravel()
                for time in xrange(n_test_times)])
                    
        new_test_visible = create_labels_after_das(
            da_output_matrix=test_visible_after_sda,
            rank=rank,
            start_base=start_base,
            window_size=window_size
        )
        
        n_patient_samples = len(test_set_y)
        half_window_size = int(window_size/2)
        new_test_hidden=test_set_y[half_window_size:n_patient_samples-half_window_size]
        
        patient_error = get_error_on_patient(
            model=sda.hmmLayer,
            visible_set=new_test_visible,
            hidden_set=new_test_hidden,
            algo=algo
        )
        
        print(patient_error, ' error for patient ' + str(test_patient))
        gc.collect()