def train():
    #train_data_names = ['p10a','p011','p013','p014','p020','p022','p040','p045','p048']
    train_data_names = ['p10a','p011','p013']
    valid_data = ['p09b','p023','p035','p038']
    test_data = ['p09a','p033']
        
    train_reader = ICHISeqDataReader(train_data_names)
    #get data divided on sequences with respect to labels
    train_set_x, train_set_y = train_reader.read_all_for_second_hmm()
        
    valid_reader = ICHISeqDataReader(valid_data)
    valid_set_x, valid_set_y = valid_reader.read_all_for_second_hmm()
    
    test_reader = ICHISeqDataReader(test_data)
    test_set_x, test_set_y = test_reader.read_all_for_second_hmm()
    
    datasets = [(train_set_x, train_set_y), (valid_set_x, valid_set_y),
        (test_set_x, test_set_y)]
    
    debug_file.write('data is got')
    
    rank = 1
    base = pow(10, rank) + 1
    n_visible_labels = pow(base, 3)

    trained_HMM = HMM_second(n_visible=n_visible_labels,
                      train_set=(train_set_x, train_set_y),
                      train_patient_list = train_data_names)
    gc.collect()                
    debug_file.write('Hmm created')
    debug_file.write('Start validation')
    validation(HMM = trained_HMM,
               patient_list = valid_data,
               valid_set = (valid_set_x, valid_set_y))
def train_all_data():
    # train_data_names = ['p10a','p011','p013','p014','p020','p022','p040','p045','p048']
    train_data_names = ["p10a"]
    valid_data = ["p09b", "p023", "p035", "p038"]
    # valid_data=['p10a']
    test_data = ["p09a", "p033"]

    n_train_patients = len(train_data_names)
    n_valid_patients = len(valid_data)
    n_test_patients = len(test_data)

    rank = 1
    start_base = 5
    base = pow(start_base, rank) + 1

    train_reader = ICHISeqDataReader(train_data_names)
    # get data divided on sequences with respect to labels
    train_set_x, train_set_y = train_reader.read_all_for_second_hmm(rank=rank, start_base=start_base)

    valid_reader = ICHISeqDataReader(valid_data)
    valid_set_x, valid_set_y = valid_reader.read_all_for_second_hmm(rank=rank, start_base=start_base)

    test_reader = ICHISeqDataReader(test_data)
    test_set_x, test_set_y = test_reader.read_all_for_second_hmm(rank=rank, start_base=start_base)

    print("data is got")

    n_visible_labels = pow(base, 3)
    n_hidden = 7
    window_size = 1

    new_train_set_x, new_train_set_y = change_data_for_ws(
        dataset=(train_set_x, train_set_y),
        window_size=window_size,
        base_for_labels=n_visible_labels,
        n_patients=n_train_patients,
    )

    new_valid_set_x, new_valid_set_y = change_data_for_ws(
        dataset=(valid_set_x, valid_set_y),
        window_size=window_size,
        base_for_labels=n_visible_labels,
        n_patients=n_valid_patients,
    )

    new_test_set_x, new_test_set_y = change_data_for_ws(
        dataset=(test_set_x, test_set_y),
        window_size=window_size,
        base_for_labels=n_visible_labels,
        n_patients=n_test_patients,
    )

    trained_HMM = create_hmm_for_all_data(
        n_hidden=n_hidden,
        n_visible=pow(n_visible_labels, window_size),
        train_set=(new_train_set_x, new_train_set_y),
        n_patients=n_train_patients,
        window_size=window_size,
    )

    gc.collect()
    print("Hmm created")
    get_error_on_model(
        model=trained_HMM, n_patients=n_valid_patients, test_set=(new_valid_set_x, new_valid_set_y), window_size=1
    )