def compare_dataset(dataset1_path, dataset2_path): print("dataset1: ", dataset1_path) print("dataset2: ", dataset2_path) dataset1 = hppi.read_data_sets(dataset1_path) dataset2 = hppi.read_data_sets(dataset2_path) datas1 = dataset1.datas datas2 = dataset2.datas is_equal = (datas1==datas2) not_equal_locations = [(row, column) for row, x in enumerate(is_equal) for column, y in enumerate(x) if not y] max_diff = 0 print("not_equal_locations:") for row, column in not_equal_locations: print("%6s,%6s: %s,%s"%(row, column, datas1[row][column], datas2[row][column])) max_diff = max(max_diff, math.fabs(datas1[row][column]-datas2[row][column])) print("max_diff: ", max_diff)
def load_hppids(dir): hppids = hppi.read_data_sets(dir, one_hot=False) X = hppids.datas Y = hppids.labels print('Success to load ', dir, ', Shape: ', X.shape) return pandas.DataFrame(X), pandas.DataFrame(Y)
def main(): from keras.wrappers.scikit_learn import KerasClassifier model = KerasClassifier(build_fn=create_model, input_dim=686, hidden_units=[256, 256, 256], kernel_initializer='uniform', activation='relu', dropout_rate=0.4, loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'], epochs=50, batch_size=128) import hppi hppids = hppi.read_data_sets("data/02-ct-bin", one_hot=False) X = hppids.datas Y = hppids.labels from sklearn.model_selection import StratifiedKFold, cross_val_score kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=SEED) results = cross_val_score(model, X, Y, cv=kfold) print(np.average(results))
def load_test_data(data_path): hppids = hppi.read_data_sets(data_path, one_hot=True) inp_dims = len(hppids.test.datas[0]) test_datas = np.reshape(hppids.test.datas, (len(hppids.test.datas), 1, inp_dims)) return test_datas, hppids.test.labels
def load_data_sets(data_sets_dir): hppids = hppi.read_data_sets(data_sets_dir, one_hot=False) train_datas, train_labels, test_datas, test_labels = hppids.shuffle( ).split() # train_datas = train_datas [:100] # train_labels = train_labels[:100] # test_datas = test_datas [:100] # test_labels = test_labels [:100] return train_datas, train_labels, test_datas, test_labels
def load_train_data(data_path): hppids = hppi.read_data_sets(data_path, one_hot=True) train_length, train_datas, train_labels, valid_length, valid_datas, valid_labels = hppids.train.shuffle( ).split(ratio=0.8) inp_dims = len(train_datas[0]) train_datas = np.reshape(train_datas, (len(train_datas), 1, inp_dims)) valid_datas = np.reshape(valid_datas, (len(valid_datas), 1, inp_dims)) return train_length, train_datas, train_labels, valid_length, valid_datas, valid_labels, inp_dims
def train_and_test(data_sets_dir, classifier): # Load datasets. hppids = hppi.read_data_sets(data_sets_dir, one_hot=False) train_datas, train_labels, test_datas, test_labels = hppids.shuffle( ).split() # train_datas = train_datas [:100] # train_labels = train_labels[:100] # test_datas = test_datas [:100] # test_labels = test_labels [:100] # train begin_time = datetime.now() classifier.fit(train_datas, train_labels) end_time = datetime.now() train_time = (end_time - begin_time).total_seconds() # test begin_time = datetime.now() mean_accuracy = classifier.score(test_datas, test_labels) end_time = datetime.now() test_time = (end_time - begin_time).total_seconds() # predict begin_time = datetime.now() prediction = classifier.predict(test_datas) # confusion_matrix(test_labels, prediction) end_time = datetime.now() predict_time = (end_time - begin_time).total_seconds() fpr, tpr, thresholds = roc_curve(test_labels, prediction) return ( mean_accuracy, auc(fpr, tpr), average_precision_score(test_labels, prediction), recall_score(test_labels, prediction), log_loss(test_labels, prediction), train_time, test_time, predict_time, )
A Bi-directional Recurrent Neural Network (LSTM) implementation example using TensorFlow library. Author: Gui Yuanmiao Project: https://github.com/smalltalkman/hppi-tensorflow/ """ from __future__ import print_function import tensorflow as tf from tensorflow.contrib import rnn import numpy as np # Import HPPI data import os, hppi hppids = hppi.read_data_sets(os.getcwd() + "/data/09-hppids", one_hot=True) ''' To classify images using a bidirectional recurrent neural network, we consider every image row as a sequence of pixels. Because MNIST image shape is 28*28px, we will then handle 28 sequences of 28 steps for every sample. ''' # Training Parameters learning_rate = 0.001 training_steps = 10000 batch_size = 128 display_step = 200 # Network Parameters num_input = 14 # HPPI data input (data shape: 14*79=1106) timesteps = 79 # timesteps
def main(): # Load datasets. hppids = hppi.read_data_sets(data_sets_dir, one_hot=False) # Specify that all features have real-value data feature_columns = [ tf.feature_column.numeric_column("x", shape=[num_input]) ] # Build 3 layer DNN with 10, 20, 10 units respectively. classifier = tf.estimator.DNNClassifier( feature_columns=feature_columns, # input_layer_partitioner=None, # hidden_units=[10, 20, 10], # hidden_units=[256, 256, 256], hidden_units=hidden_units, # activation_fn=tf.nn.relu, n_classes=num_classes, # optimizer='Adagrad', # optimizer=tf.train.AdamOptimizer(learning_rate=learning_rate), optimizer=optimizer, dropout=dropout, model_dir=model_dir) # Define the training inputs train_input_fn = tf.estimator.inputs.numpy_input_fn( x={"x": hppids.train.datas}, y=hppids.train.labels, batch_size=batch_size, num_epochs=None, shuffle=True, queue_capacity=hppids.train.length) # Train model. begin_time = datetime.now() classifier.train(input_fn=train_input_fn, steps=num_steps) end_time = datetime.now() train_time = (end_time - begin_time).total_seconds() / num_steps * 100 # Define the test inputs test_input_fn = tf.estimator.inputs.numpy_input_fn( x={"x": hppids.test.datas}, y=hppids.test.labels, batch_size=batch_size, num_epochs=1, shuffle=False) # Evaluate accuracy. #accuracy_score = classifier.evaluate(input_fn=test_input_fn)["accuracy"] # Evaluate scores. begin_time = datetime.now() scores = classifier.evaluate(input_fn=test_input_fn) end_time = datetime.now() test_time = (end_time - begin_time).total_seconds() scores_str = "global_step = {0:08d}".format(scores["global_step"]) \ + ", accuracy = {0:8g}".format(scores["accuracy"]) \ + ", accuracy_baseline = {0:8g}".format(scores["accuracy_baseline"]) \ + ", auc = {0:8g}".format(scores["auc"]) \ + ", auc_precision_recall = {0:8g}".format(scores["auc_precision_recall"]) \ + ", average_loss = {0:8g}".format(scores["average_loss"]) \ + ", label/mean = {0:8g}".format(scores["label/mean"]) \ + ", loss = {0:8g}".format(scores["loss"]) \ + ", prediction/mean = {0:8g}".format(scores["prediction/mean"]) \ + ", train_time = {0:8g}".format(train_time) \ + ", test_time = {0:8g}".format(test_time) \ #print("\nTest Accuracy: {0:f}\n".format(accuracy_score)) print("\nTest scores: {0}\n".format(scores_str)) with open(result_file, "a") as file: file.write(scores_str + "\n")
def once(data_sets_dir, data_sets_info , num_input, hidden_units, activation_fn, num_classes, optimizer, learning_rate, dnn_info , num_steps , model_dir_root , result_dir_root ): model_info = "_{0}({1:d}x{2:d})_{3}_{4}_{5:g}".format( data_sets_info , num_input , num_classes , 'x'.join([str(n) for n in hidden_units]) , dnn_info , learning_rate ) model_dir = model_dir_root+model_info result_file = result_dir_root+model_info+".txt" # Load datasets. hppids = hppi.read_data_sets(data_sets_dir, one_hot=False) hppids.shuffle().split(apply=True) # Specify that all features have real-value data feature_columns = [tf.feature_column.numeric_column("x", shape=[num_input])] # Build 3 layer DNN with 10, 20, 10 units respectively. classifier = tf.estimator.DNNClassifier(feature_columns=feature_columns, # input_layer_partitioner=None, # hidden_units=[10, 20, 10], hidden_units=hidden_units, # activation_fn=tf.nn.relu, activation_fn=activation_fn, n_classes=num_classes, # optimizer='Adagrad', optimizer=optimizer(learning_rate=learning_rate), model_dir=model_dir) # Define the training inputs train_input_fn = tf.estimator.inputs.numpy_input_fn( x={"x": hppids.train.datas}, y=hppids.train.labels, num_epochs=None, shuffle=True, queue_capacity=hppids.train.length) # Train model. classifier.train(input_fn=train_input_fn, steps=num_steps) # Define the test inputs test_input_fn = tf.estimator.inputs.numpy_input_fn( x={"x": hppids.test.datas}, y=hppids.test.labels, num_epochs=1, shuffle=False) # Evaluate accuracy. #accuracy_score = classifier.evaluate(input_fn=test_input_fn)["accuracy"] # Evaluate scores. scores = classifier.evaluate(input_fn=test_input_fn) scores_str = "global_step = {0:08d}".format(scores["global_step"]) \ + ", accuracy = {0:8g}".format(scores["accuracy"]) \ + ", accuracy_baseline = {0:8g}".format(scores["accuracy_baseline"]) \ + ", auc = {0:8g}".format(scores["auc"]) \ + ", auc_precision_recall = {0:8g}".format(scores["auc_precision_recall"]) \ + ", average_loss = {0:8g}".format(scores["average_loss"]) \ + ", label/mean = {0:8g}".format(scores["label/mean"]) \ + ", loss = {0:8g}".format(scores["loss"]) \ + ", prediction/mean = {0:8g}".format(scores["prediction/mean"]) \ #print("\nTest Accuracy: {0:f}\n".format(accuracy_score)) print("\nTest scores: {0}\n".format(scores_str)) with open(result_file, "a") as file: file.write(scores_str+"\n")