예제 #1
0
def write_feature_matrix():
	seqs_h1n1, seqs_h3n2, seqs_h5n1, seqs_h9n2, mixed_seqs = load_aligned_seqs()
	dh1n1, dh3n2, dh5n1, dh9n2, dmixed = mix_subtypes_dist()

	h1n1_mutMatrix = get_feature_matrix(seqs_h1n1, dh1n1, featureMatrix_path + "h1n1.feats")
	h3n2_mutMatrix = get_feature_matrix(seqs_h3n2, dh3n2, featureMatrix_path + "h3n2.feats")
	h5n1_mutMatrix = get_feature_matrix(seqs_h5n1, dh5n1, featureMatrix_path + "h5n1.feats")
	h9n2_mutMatrix = get_feature_matrix(seqs_h9n2, dh9n2, featureMatrix_path + "h9n2.feats")
	mixed_mutMatrix = get_feature_matrix(mixed_seqs, dmixed, featureMatrix_path + "mixed.feats")
예제 #2
0
def transfer_learning(test_data):
    # Train on any three datasets and test on the rest
    # e.g. train on H1N1+H3N2+H5N1; test on H9N2
    print test_data

    dh1n1, dh3n2, dh5n1, dh9n2, dmixed = mix_subtypes_dist()

    h1n1_mutMatrix_file = mutMatrix_path + "h1n1.mut"
    h3n2_mutMatrix_file = mutMatrix_path + "h3n2.mut"
    h5n1_mutMatrix_file = mutMatrix_path + "h5n1.mut"
    h9n2_mutMatrix_file = mutMatrix_path + "h9n2.mut"
    X_h1n1, Y_h1n1, XName_h1n1 = load_mut_dataset(h1n1_mutMatrix_file, dh1n1)
    X_h3n2, Y_h3n2, XName_h3n2 = load_mut_dataset(h3n2_mutMatrix_file, dh3n2)
    X_h5n1, Y_h5n1, XName_h5n1 = load_mut_dataset(h5n1_mutMatrix_file, dh5n1)
    X_h9n2, Y_h9n2, XName_h9n2 = load_mut_dataset(h9n2_mutMatrix_file, dh9n2)

    dataset = {}
    dataset["h1n1"] = (X_h1n1, Y_h1n1, XName_h1n1)
    dataset["h3n2"] = (X_h3n2, Y_h3n2, XName_h3n2)
    dataset["h5n1"] = (X_h5n1, Y_h5n1, XName_h5n1)
    dataset["h9n2"] = (X_h9n2, Y_h9n2, XName_h9n2)

    tmp = ["h1n1", "h3n2", "h5n1", "h9n2"]
    tmp.remove(test_data)

    (X_test, Y_test, XName_test) = dataset[test_data]
    X_test = np.array(X_test)
    Y_test = np.array(Y_test)
    XName_test = np.array(XName_test)
    labels_test = create_binary_labels(Y_test, 4)

    (X1, Y1, XName1) = dataset[tmp[0]]
    (X2, Y2, XName2) = dataset[tmp[1]]
    (X3, Y3, XName3) = dataset[tmp[2]]
    X_train = np.concatenate((X1, X2, X3), axis=0)
    Y_train = np.concatenate((Y1, Y2, Y3), axis=0)
    XName_train = np.concatenate((XName1, XName2, XName3), axis=0)
    labels_train = create_binary_labels(Y_train, 4)

    for i in range(10):
        outfile = "../result/bioinfo2008Liao/tl_" + test_data + "_" + str(
            i) + ".gm5"
        X_train, Y_train, labels_train, XName_train = shuffle_dataset_with_labels(
            X_train, Y_train, labels_train, XName_train)
        X_test, Y_test, labels_test, XName_test = shuffle_dataset_with_labels(
            X_test, Y_test, labels_test, XName_test)

        result = multiReg_GM5(outfile, X_train, Y_train, labels_train,
                              XName_train, X_test, Y_test, labels_test,
                              XName_test)
예제 #3
0
def writeMutMatrix():
    seqs_h1n1, seqs_h3n2, seqs_h5n1, seqs_h9n2, mixed_seqs = load_aligned_seqs(
    )
    dh1n1, dh3n2, dh5n1, dh9n2, dmixed = mix_subtypes_dist()

    # mutMatrix_path = "../data/bioinfo2008Liao_mutMatrix/"
    h1n1_mutMatrix = get_mutMatrix(seqs_h1n1, dh1n1,
                                   mutMatrix_path + "h1n1.mut")
    h3n2_mutMatrix = get_mutMatrix(seqs_h3n2, dh3n2,
                                   mutMatrix_path + "h3n2.mut")
    h5n1_mutMatrix = get_mutMatrix(seqs_h5n1, dh5n1,
                                   mutMatrix_path + "h5n1.mut")
    h9n2_mutMatrix = get_mutMatrix(seqs_h9n2, dh9n2,
                                   mutMatrix_path + "h9n2.mut")
    mixed_mutMatrix = get_mutMatrix(mixed_seqs, dmixed,
                                    mutMatrix_path + "mixed.mut")
예제 #4
0
def transfer_learning(test_data):
    # print "transfer learning test on: " + test_data

    dh1n1, dh3n2, dh5n1, dh9n2, dmixed = mix_subtypes_dist()

    h1n1_fm_file = featureMatrix_path + "h1n1.feats"
    h3n2_fm_file = featureMatrix_path + "h3n2.feats"
    h5n1_fm_file = featureMatrix_path + "h5n1.feats"
    h9n2_fm_file = featureMatrix_path + "h9n2.feats"

    X_h1n1, Y_h1n1, XName_h1n1 = load_features_dataset(h1n1_fm_file, dh1n1)
    X_h3n2, Y_h3n2, XName_h3n2 = load_features_dataset(h3n2_fm_file, dh3n2)
    X_h5n1, Y_h5n1, XName_h5n1 = load_features_dataset(h5n1_fm_file, dh5n1)
    X_h9n2, Y_h9n2, XName_h9n2 = load_features_dataset(h9n2_fm_file, dh9n2)

    dataset = {}
    dataset["h1n1"] = (X_h1n1, Y_h1n1, XName_h1n1)
    dataset["h3n2"] = (X_h3n2, Y_h3n2, XName_h3n2)
    dataset["h5n1"] = (X_h5n1, Y_h5n1, XName_h5n1)
    dataset["h9n2"] = (X_h9n2, Y_h9n2, XName_h9n2)

    tmp = ["h1n1", "h3n2", "h5n1", "h9n2"]
    tmp.remove(test_data)

    (X_test, Y_test, XName_test) = dataset[test_data]
    X_test = np.array(X_test)
    Y_test = np.array(Y_test)
    XName_test = np.array(XName_test)
    labels_test = create_binary_labels(Y_test, 4)

    (X1, Y1, XName1) = dataset[tmp[0]]
    (X2, Y2, XName2) = dataset[tmp[1]]
    (X3, Y3, XName3) = dataset[tmp[2]]
    X_train = np.concatenate((X1, X2, X3), axis=0)
    Y_train = np.concatenate((Y1, Y2, Y3), axis=0)
    XName_train = np.concatenate((XName1, XName2, XName3), axis=0)
    labels_train = create_binary_labels(Y_train, 4)

    for i in range(10):
        outfile = "../result/srep2017Adapted/tl_" + test_data + "_" + str(
            i) + ".rf_srep"
        X_train, Y_train, labels_train, XName_train = shuffle_dataset_with_labels(
            X_train, Y_train, labels_train, XName_train)
        X_test, Y_test, labels_test, XName_test = shuffle_dataset_with_labels(
            X_test, Y_test, labels_test, XName_test)
        result = rf_model(outfile, X_train, Y_train, labels_train, XName_train,
                          X_test, Y_test, labels_test, XName_test)
예제 #5
0
def transfer_learning_2():
    # Train on H1N1+H3N2; Test on H5N1+H9N2

    dh1n1, dh3n2, dh5n1, dh9n2, dmixed = mix_subtypes_dist()

    h1n1_fm_file = featureMatrix_path + "h1n1.feats"
    h3n2_fm_file = featureMatrix_path + "h3n2.feats"
    h5n1_fm_file = featureMatrix_path + "h5n1.feats"
    h9n2_fm_file = featureMatrix_path + "h9n2.feats"

    X_h1n1, Y_h1n1, XName_h1n1 = load_features_dataset(h1n1_fm_file, dh1n1)
    X_h3n2, Y_h3n2, XName_h3n2 = load_features_dataset(h3n2_fm_file, dh3n2)
    X_h5n1, Y_h5n1, XName_h5n1 = load_features_dataset(h5n1_fm_file, dh5n1)
    X_h9n2, Y_h9n2, XName_h9n2 = load_features_dataset(h9n2_fm_file, dh9n2)

    dataset = {}
    dataset["h1n1"] = (X_h1n1, Y_h1n1, XName_h1n1)
    dataset["h3n2"] = (X_h3n2, Y_h3n2, XName_h3n2)
    dataset["h5n1"] = (X_h5n1, Y_h5n1, XName_h5n1)
    dataset["h9n2"] = (X_h9n2, Y_h9n2, XName_h9n2)

    Xtrain2 = np.concatenate((X_h1n1, X_h3n2), axis=0)
    Ytrain2 = np.concatenate((Y_h1n1, Y_h3n2), axis=0)
    XNameTrain2 = np.concatenate((XName_h1n1, XName_h3n2), axis=0)

    Xtest2 = np.concatenate((X_h5n1, X_h9n2), axis=0)
    Ytest2 = np.concatenate((Y_h5n1, Y_h9n2), axis=0)
    XNameTest2 = np.concatenate((XName_h5n1, XName_h9n2), axis=0)

    labels_train2 = create_binary_labels(Ytrain2, 4)
    labels_test2 = create_binary_labels(Ytest2, 4)

    for i in range(10):
        outfile = "../result/srep2017Adapted/tl2_" + str(i) + ".rf_srep"
        X_train, Y_train, labels_train, XName_train = shuffle_dataset_with_labels(
            Xtrain2, Ytrain2, labels_train2, XNameTrain2)
        X_test, Y_test, labels_test, XName_test = shuffle_dataset_with_labels(
            Xtest2, Ytest2, labels_test2, XNameTest2)

        result = rf_model(outfile, X_train, Y_train, labels_train, XName_train,
                          X_test, Y_test, labels_test, XName_test)
예제 #6
0
def transfer_learning_2():
    dh1n1, dh3n2, dh5n1, dh9n2, dmixed = mix_subtypes_dist()

    h1n1_mutMatrix_file = mutMatrix_path + "h1n1.mut"
    h3n2_mutMatrix_file = mutMatrix_path + "h3n2.mut"
    h5n1_mutMatrix_file = mutMatrix_path + "h5n1.mut"
    h9n2_mutMatrix_file = mutMatrix_path + "h9n2.mut"
    X_h1n1, Y_h1n1, XName_h1n1 = load_mut_dataset(h1n1_mutMatrix_file, dh1n1)
    X_h3n2, Y_h3n2, XName_h3n2 = load_mut_dataset(h3n2_mutMatrix_file, dh3n2)
    X_h5n1, Y_h5n1, XName_h5n1 = load_mut_dataset(h5n1_mutMatrix_file, dh5n1)
    X_h9n2, Y_h9n2, XName_h9n2 = load_mut_dataset(h9n2_mutMatrix_file, dh9n2)

    dataset = {}
    dataset["h1n1"] = (X_h1n1, Y_h1n1, XName_h1n1)
    dataset["h3n2"] = (X_h3n2, Y_h3n2, XName_h3n2)
    dataset["h5n1"] = (X_h5n1, Y_h5n1, XName_h5n1)
    dataset["h9n2"] = (X_h9n2, Y_h9n2, XName_h9n2)

    # Train on H1N1+H3N2; Test on H5N1+H9N2
    Xtrain2 = np.concatenate((X_h1n1, X_h3n2), axis=0)
    Ytrain2 = np.concatenate((Y_h1n1, Y_h3n2), axis=0)
    XNameTrain2 = np.concatenate((XName_h1n1, XName_h3n2), axis=0)

    Xtest2 = np.concatenate((X_h5n1, X_h9n2), axis=0)
    Ytest2 = np.concatenate((Y_h5n1, Y_h9n2), axis=0)
    XNameTest2 = np.concatenate((XName_h5n1, XName_h9n2), axis=0)

    labels_train2 = create_binary_labels(Ytrain2, 4)
    labels_test2 = create_binary_labels(Ytest2, 4)

    for i in range(10):
        outfile = "../result/bioinfo2008Liao/tl2_" + str(i) + ".gm5"
        X_train, Y_train, labels_train, XName_train = shuffle_dataset_with_labels(
            Xtrain2, Ytrain2, labels_train2, XNameTrain2)
        X_test, Y_test, labels_test, XName_test = shuffle_dataset_with_labels(
            Xtest2, Ytest2, labels_test2, XNameTest2)

        result = multiReg_GM5(outfile, X_train, Y_train, labels_train,
                              XName_train, X_test, Y_test, labels_test,
                              XName_test)
예제 #7
0
        line = prefix + ", " + str(avg_accu1) + ", " + str(avg_accu2) + ", "
        line = line + str(avg_prec1) + ", " + str(avg_prec2) + ", "
        line = line + str(avg_rec1) + ", " + str(avg_rec2) + ", "
        line = line + str(avg_f1_1) + ", " + str(avg_f1_2) + "\n"

        result_csv.write(line)

    result_csv.close()


if __name__ == '__main__':

    seqs_H1N1, seqs_H3N2, seqs_H5N1, seqs_H9N2, mixed_seqs = load_aligned_seqs(
    )
    dh1n1, dh3n2, dh5n1, dh9n2, dmixed = mix_subtypes_dist()

    train_rfsrep("h1n1", dh1n1)
    train_rfsrep("h3n2", dh3n2)
    train_rfsrep("h5n1", dh5n1)
    train_rfsrep("h9n2", dh9n2)
    train_rfsrep("mixed", dmixed)

    transfer_learning_2()
    transfer_learning("h1n1")
    transfer_learning("h3n2")
    transfer_learning("h5n1")
    transfer_learning("h9n2")

    writeResults_csv()