def write_feature_matrix(): seqs_h1n1, seqs_h3n2, seqs_h5n1, seqs_h9n2, mixed_seqs = load_aligned_seqs() dh1n1, dh3n2, dh5n1, dh9n2, dmixed = mix_subtypes_dist() h1n1_mutMatrix = get_feature_matrix(seqs_h1n1, dh1n1, featureMatrix_path + "h1n1.feats") h3n2_mutMatrix = get_feature_matrix(seqs_h3n2, dh3n2, featureMatrix_path + "h3n2.feats") h5n1_mutMatrix = get_feature_matrix(seqs_h5n1, dh5n1, featureMatrix_path + "h5n1.feats") h9n2_mutMatrix = get_feature_matrix(seqs_h9n2, dh9n2, featureMatrix_path + "h9n2.feats") mixed_mutMatrix = get_feature_matrix(mixed_seqs, dmixed, featureMatrix_path + "mixed.feats")
def transfer_learning(test_data): # Train on any three datasets and test on the rest # e.g. train on H1N1+H3N2+H5N1; test on H9N2 print test_data dh1n1, dh3n2, dh5n1, dh9n2, dmixed = mix_subtypes_dist() h1n1_mutMatrix_file = mutMatrix_path + "h1n1.mut" h3n2_mutMatrix_file = mutMatrix_path + "h3n2.mut" h5n1_mutMatrix_file = mutMatrix_path + "h5n1.mut" h9n2_mutMatrix_file = mutMatrix_path + "h9n2.mut" X_h1n1, Y_h1n1, XName_h1n1 = load_mut_dataset(h1n1_mutMatrix_file, dh1n1) X_h3n2, Y_h3n2, XName_h3n2 = load_mut_dataset(h3n2_mutMatrix_file, dh3n2) X_h5n1, Y_h5n1, XName_h5n1 = load_mut_dataset(h5n1_mutMatrix_file, dh5n1) X_h9n2, Y_h9n2, XName_h9n2 = load_mut_dataset(h9n2_mutMatrix_file, dh9n2) dataset = {} dataset["h1n1"] = (X_h1n1, Y_h1n1, XName_h1n1) dataset["h3n2"] = (X_h3n2, Y_h3n2, XName_h3n2) dataset["h5n1"] = (X_h5n1, Y_h5n1, XName_h5n1) dataset["h9n2"] = (X_h9n2, Y_h9n2, XName_h9n2) tmp = ["h1n1", "h3n2", "h5n1", "h9n2"] tmp.remove(test_data) (X_test, Y_test, XName_test) = dataset[test_data] X_test = np.array(X_test) Y_test = np.array(Y_test) XName_test = np.array(XName_test) labels_test = create_binary_labels(Y_test, 4) (X1, Y1, XName1) = dataset[tmp[0]] (X2, Y2, XName2) = dataset[tmp[1]] (X3, Y3, XName3) = dataset[tmp[2]] X_train = np.concatenate((X1, X2, X3), axis=0) Y_train = np.concatenate((Y1, Y2, Y3), axis=0) XName_train = np.concatenate((XName1, XName2, XName3), axis=0) labels_train = create_binary_labels(Y_train, 4) for i in range(10): outfile = "../result/bioinfo2008Liao/tl_" + test_data + "_" + str( i) + ".gm5" X_train, Y_train, labels_train, XName_train = shuffle_dataset_with_labels( X_train, Y_train, labels_train, XName_train) X_test, Y_test, labels_test, XName_test = shuffle_dataset_with_labels( X_test, Y_test, labels_test, XName_test) result = multiReg_GM5(outfile, X_train, Y_train, labels_train, XName_train, X_test, Y_test, labels_test, XName_test)
def writeMutMatrix(): seqs_h1n1, seqs_h3n2, seqs_h5n1, seqs_h9n2, mixed_seqs = load_aligned_seqs( ) dh1n1, dh3n2, dh5n1, dh9n2, dmixed = mix_subtypes_dist() # mutMatrix_path = "../data/bioinfo2008Liao_mutMatrix/" h1n1_mutMatrix = get_mutMatrix(seqs_h1n1, dh1n1, mutMatrix_path + "h1n1.mut") h3n2_mutMatrix = get_mutMatrix(seqs_h3n2, dh3n2, mutMatrix_path + "h3n2.mut") h5n1_mutMatrix = get_mutMatrix(seqs_h5n1, dh5n1, mutMatrix_path + "h5n1.mut") h9n2_mutMatrix = get_mutMatrix(seqs_h9n2, dh9n2, mutMatrix_path + "h9n2.mut") mixed_mutMatrix = get_mutMatrix(mixed_seqs, dmixed, mutMatrix_path + "mixed.mut")
def transfer_learning(test_data): # print "transfer learning test on: " + test_data dh1n1, dh3n2, dh5n1, dh9n2, dmixed = mix_subtypes_dist() h1n1_fm_file = featureMatrix_path + "h1n1.feats" h3n2_fm_file = featureMatrix_path + "h3n2.feats" h5n1_fm_file = featureMatrix_path + "h5n1.feats" h9n2_fm_file = featureMatrix_path + "h9n2.feats" X_h1n1, Y_h1n1, XName_h1n1 = load_features_dataset(h1n1_fm_file, dh1n1) X_h3n2, Y_h3n2, XName_h3n2 = load_features_dataset(h3n2_fm_file, dh3n2) X_h5n1, Y_h5n1, XName_h5n1 = load_features_dataset(h5n1_fm_file, dh5n1) X_h9n2, Y_h9n2, XName_h9n2 = load_features_dataset(h9n2_fm_file, dh9n2) dataset = {} dataset["h1n1"] = (X_h1n1, Y_h1n1, XName_h1n1) dataset["h3n2"] = (X_h3n2, Y_h3n2, XName_h3n2) dataset["h5n1"] = (X_h5n1, Y_h5n1, XName_h5n1) dataset["h9n2"] = (X_h9n2, Y_h9n2, XName_h9n2) tmp = ["h1n1", "h3n2", "h5n1", "h9n2"] tmp.remove(test_data) (X_test, Y_test, XName_test) = dataset[test_data] X_test = np.array(X_test) Y_test = np.array(Y_test) XName_test = np.array(XName_test) labels_test = create_binary_labels(Y_test, 4) (X1, Y1, XName1) = dataset[tmp[0]] (X2, Y2, XName2) = dataset[tmp[1]] (X3, Y3, XName3) = dataset[tmp[2]] X_train = np.concatenate((X1, X2, X3), axis=0) Y_train = np.concatenate((Y1, Y2, Y3), axis=0) XName_train = np.concatenate((XName1, XName2, XName3), axis=0) labels_train = create_binary_labels(Y_train, 4) for i in range(10): outfile = "../result/srep2017Adapted/tl_" + test_data + "_" + str( i) + ".rf_srep" X_train, Y_train, labels_train, XName_train = shuffle_dataset_with_labels( X_train, Y_train, labels_train, XName_train) X_test, Y_test, labels_test, XName_test = shuffle_dataset_with_labels( X_test, Y_test, labels_test, XName_test) result = rf_model(outfile, X_train, Y_train, labels_train, XName_train, X_test, Y_test, labels_test, XName_test)
def transfer_learning_2(): # Train on H1N1+H3N2; Test on H5N1+H9N2 dh1n1, dh3n2, dh5n1, dh9n2, dmixed = mix_subtypes_dist() h1n1_fm_file = featureMatrix_path + "h1n1.feats" h3n2_fm_file = featureMatrix_path + "h3n2.feats" h5n1_fm_file = featureMatrix_path + "h5n1.feats" h9n2_fm_file = featureMatrix_path + "h9n2.feats" X_h1n1, Y_h1n1, XName_h1n1 = load_features_dataset(h1n1_fm_file, dh1n1) X_h3n2, Y_h3n2, XName_h3n2 = load_features_dataset(h3n2_fm_file, dh3n2) X_h5n1, Y_h5n1, XName_h5n1 = load_features_dataset(h5n1_fm_file, dh5n1) X_h9n2, Y_h9n2, XName_h9n2 = load_features_dataset(h9n2_fm_file, dh9n2) dataset = {} dataset["h1n1"] = (X_h1n1, Y_h1n1, XName_h1n1) dataset["h3n2"] = (X_h3n2, Y_h3n2, XName_h3n2) dataset["h5n1"] = (X_h5n1, Y_h5n1, XName_h5n1) dataset["h9n2"] = (X_h9n2, Y_h9n2, XName_h9n2) Xtrain2 = np.concatenate((X_h1n1, X_h3n2), axis=0) Ytrain2 = np.concatenate((Y_h1n1, Y_h3n2), axis=0) XNameTrain2 = np.concatenate((XName_h1n1, XName_h3n2), axis=0) Xtest2 = np.concatenate((X_h5n1, X_h9n2), axis=0) Ytest2 = np.concatenate((Y_h5n1, Y_h9n2), axis=0) XNameTest2 = np.concatenate((XName_h5n1, XName_h9n2), axis=0) labels_train2 = create_binary_labels(Ytrain2, 4) labels_test2 = create_binary_labels(Ytest2, 4) for i in range(10): outfile = "../result/srep2017Adapted/tl2_" + str(i) + ".rf_srep" X_train, Y_train, labels_train, XName_train = shuffle_dataset_with_labels( Xtrain2, Ytrain2, labels_train2, XNameTrain2) X_test, Y_test, labels_test, XName_test = shuffle_dataset_with_labels( Xtest2, Ytest2, labels_test2, XNameTest2) result = rf_model(outfile, X_train, Y_train, labels_train, XName_train, X_test, Y_test, labels_test, XName_test)
def transfer_learning_2(): dh1n1, dh3n2, dh5n1, dh9n2, dmixed = mix_subtypes_dist() h1n1_mutMatrix_file = mutMatrix_path + "h1n1.mut" h3n2_mutMatrix_file = mutMatrix_path + "h3n2.mut" h5n1_mutMatrix_file = mutMatrix_path + "h5n1.mut" h9n2_mutMatrix_file = mutMatrix_path + "h9n2.mut" X_h1n1, Y_h1n1, XName_h1n1 = load_mut_dataset(h1n1_mutMatrix_file, dh1n1) X_h3n2, Y_h3n2, XName_h3n2 = load_mut_dataset(h3n2_mutMatrix_file, dh3n2) X_h5n1, Y_h5n1, XName_h5n1 = load_mut_dataset(h5n1_mutMatrix_file, dh5n1) X_h9n2, Y_h9n2, XName_h9n2 = load_mut_dataset(h9n2_mutMatrix_file, dh9n2) dataset = {} dataset["h1n1"] = (X_h1n1, Y_h1n1, XName_h1n1) dataset["h3n2"] = (X_h3n2, Y_h3n2, XName_h3n2) dataset["h5n1"] = (X_h5n1, Y_h5n1, XName_h5n1) dataset["h9n2"] = (X_h9n2, Y_h9n2, XName_h9n2) # Train on H1N1+H3N2; Test on H5N1+H9N2 Xtrain2 = np.concatenate((X_h1n1, X_h3n2), axis=0) Ytrain2 = np.concatenate((Y_h1n1, Y_h3n2), axis=0) XNameTrain2 = np.concatenate((XName_h1n1, XName_h3n2), axis=0) Xtest2 = np.concatenate((X_h5n1, X_h9n2), axis=0) Ytest2 = np.concatenate((Y_h5n1, Y_h9n2), axis=0) XNameTest2 = np.concatenate((XName_h5n1, XName_h9n2), axis=0) labels_train2 = create_binary_labels(Ytrain2, 4) labels_test2 = create_binary_labels(Ytest2, 4) for i in range(10): outfile = "../result/bioinfo2008Liao/tl2_" + str(i) + ".gm5" X_train, Y_train, labels_train, XName_train = shuffle_dataset_with_labels( Xtrain2, Ytrain2, labels_train2, XNameTrain2) X_test, Y_test, labels_test, XName_test = shuffle_dataset_with_labels( Xtest2, Ytest2, labels_test2, XNameTest2) result = multiReg_GM5(outfile, X_train, Y_train, labels_train, XName_train, X_test, Y_test, labels_test, XName_test)
line = prefix + ", " + str(avg_accu1) + ", " + str(avg_accu2) + ", " line = line + str(avg_prec1) + ", " + str(avg_prec2) + ", " line = line + str(avg_rec1) + ", " + str(avg_rec2) + ", " line = line + str(avg_f1_1) + ", " + str(avg_f1_2) + "\n" result_csv.write(line) result_csv.close() if __name__ == '__main__': seqs_H1N1, seqs_H3N2, seqs_H5N1, seqs_H9N2, mixed_seqs = load_aligned_seqs( ) dh1n1, dh3n2, dh5n1, dh9n2, dmixed = mix_subtypes_dist() train_rfsrep("h1n1", dh1n1) train_rfsrep("h3n2", dh3n2) train_rfsrep("h5n1", dh5n1) train_rfsrep("h9n2", dh9n2) train_rfsrep("mixed", dmixed) transfer_learning_2() transfer_learning("h1n1") transfer_learning("h3n2") transfer_learning("h5n1") transfer_learning("h9n2") writeResults_csv()