def plot_nn_loss_against_epoch(X, Y, layers_dim, activation, epochs, image_name, loss='binary_crossentropy', optimizer='adam'): import matplotlib.pyplot as plt import numpy as np from functions import general_functions as general general.check_path_exists(image_name) model = build_simplenn_model(layers_dim=layers_dim, activation=activation, loss=loss, optimizer=optimizer) print() print("Number of epochs:", epochs) print("Loss function:", loss) print("Optimizer function:", optimizer) print() H = model.fit(X, Y, epochs=epochs, batch_size=16, verbose=0, validation_split=0.2, shuffle=True) training_loss = H.history['loss'] validation_loss = H.history['val_loss'] training_acc = H.history['acc'] validation_acc = H.history['val_acc'] plt.switch_backend('agg') plt.figure() plt.plot(np.arange(0, epochs), training_loss, marker='o', label="train_loss") plt.plot(np.arange(0, epochs), validation_loss, marker='o', label="val_loss") plt.plot(np.arange(0, epochs), training_acc, marker='o', label="train_acc") plt.plot(np.arange(0, epochs), validation_acc, marker='o', label="val_acc") # plt.title("Loss and Accuracy against Epochs") plt.xlabel("Number of Epochs") plt.ylabel("Loss / Accuracy") plt.legend(loc="best") plt.savefig(image_name) return training_acc, training_loss, validation_acc, validation_loss
def plot_roc_curve(fpr, tpr, aucs, tprs, image_name): import matplotlib.pyplot as plt import numpy as np from sklearn.metrics import auc from functions import general_functions as general general.check_path_exists(image_name) plt.switch_backend('agg') i = 0 plt.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r', label='Chance', alpha=.8) for fpr_, tpr_, roc_auc in zip(fpr, tpr, aucs): i += 1 plt.plot( fpr_, tpr_, lw=1, alpha=0.3) #,label='ROC fold %d (AUC = %0.2f)' % (i, roc_auc)) mean_tpr = np.mean(tprs, axis=0) mean_tpr[-1] = 1.0 mean_fpr = np.linspace(0, 1, 100) mean_auc = auc(mean_fpr, mean_tpr) std_auc = np.std(aucs) plt.plot(mean_fpr, mean_tpr, color='b', label=r'Mean ROC (AUC = %0.2f $\pm$ %0.2f)' % (mean_auc, std_auc), lw=2, alpha=.8) std_tpr = np.std(tprs, axis=0) tprs_upper = np.minimum(mean_tpr + std_tpr, 1) tprs_lower = np.maximum(mean_tpr - std_tpr, 0) plt.fill_between(mean_fpr, tprs_lower, tprs_upper, color='grey', alpha=.2, label=r'$\pm$ 1 std. dev.') plt.xlim([-0.05, 1.05]) plt.ylim([-0.05, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title( 'ROC of %s' % image_name[image_name.rfind('/') + 1:image_name.rfind("dataset") - 1]) plt.legend(loc="lower right") plt.savefig(image_name)
def save_model(model, filename, neural_network): """ Save model to disk :param model: model to be saved :param filename: (string) filename to save the model to :param neural_network: (boolean) whether the model is a neural network model (keras) or conventional machine learning model (scikit-learn). :return: None """ from functions import general_functions as general general.check_path_exists(filename) print("\nSaving model to '%s' ..." % filename) if neural_network: model.save(filename) else: # import pickle # pickle.dump(model, open(filename, 'wb')) from sklearn.externals import joblib joblib.dump(model, filename) print("Saving model done.")
help='seed number for random shuffling.') args = parser.parse_args() # Check if inputs are available if args.featurizer not in ['rdk', 'ecfp']: raise Exception("Descriptor %s not available. Choose from rdk or ecfp." % args.featurizer) if args.kernel_regularizer not in ['l1', 'l2', 'l1_l2', 'None']: raise Exception("Kernel regularizer %s not available. Choose from l1, l2, l1_l2 or None" % args.regularizer_param) lr = 0.0001 filename = "NN_training_history/%s/%s/batchsize%s/%s_epochs%s_dropout%s_lr%s_random_2" % (args.featurizer, args.kernel_regularizer, args.batch_size, args.regularizer_param, args.epochs, args.dropout_rate, lr) logfile = "./logfile/" + filename + ".log" save_model_path = "./saved_model/" + filename + ".h" general.check_path_exists(logfile) sys.stdout = open(logfile, 'wt') # Import data temp_path = os.path.join(os.getcwd(), 'data/ft_train_random.pkl') if os.path.exists(temp_path): data = general.import_pandas_dataframe(temp_path) print("Shape of data:", data.shape) else: raise Exception("%s does not exist." % temp_path) # Input and target X = np.stack(data[args.featurizer]) Y = LabelBinarizer().fit_transform((data['agrochemical']))
if not os.path.exists(args.model_path): raise Exception("Pathway to model %s does not exist" % args.model_path) if args.test: args.num_split = None ml = ['GB', 'RF', 'KNN'] if any(x in args.model_path for x in ml): nn = False else: nn = True logfile = os.path.join( os.getcwd(), "best_models/%s.log" % args.model_path[args.model_path.find('/'):-2]) general.check_path_exists(logfile) sys.stdout = open(logfile, 'wt') print("Loading model from %s..." % args.model_path) model = model_func.load_model(args.model_path, nn) print("Finished loading model.") if args.test: test_data_path = os.path.join(os.getcwd(), 'data/ft_test_%s.pkl' % args.split_type) test_data = general.import_pandas_dataframe(test_data_path) print("\nPrediction on testing data set of shape", test_data.shape, ": ") ft = None if 'ecfp' in args.model_path: ft = 'ecfp'
parser.add_argument('--optimizer', type=str, default='adam', help='type of optimizer function') args = parser.parse_args() # Check if the parameters are available for this file if args.featurizer not in ['daylight', 'ecfp']: raise Exception("Descriptor %s not available. Choose from 'daylight' or 'ecfp'." % args.featurizer) if args.num_layers not in [3, 4, 5]: raise Exception("Number of layers not available. Choose from 3, 4 or 5, or add them below. ") filename = 'simplenn_epoch_image/%s_%s_%slayers.log' % \ (args.featurizer, args.dataset[:args.dataset.rfind('.')], args.num_layers) # check if directory exists general.check_path_exists(filename) sys.stdout = open(filename,'wt') # Import data temp_path = general.file_pathway(args.dataset) if os.path.exists(temp_path): data = general.import_pandas_dataframe(temp_path) print("Shape of data:", data.shape) else: raise Exception("%s does not exist." % args.dataset) # Featurization if args.featurizer == 'daylight': print("Calculating Daylight fingerprint...") data['fingerprint'] = data['mol'].apply(ft.daylight_fingerprint)
'simplenn', 'gradientboosting', 'randomforest', 'knearest' ]: raise Exception( "%s not available. Choose from simplenn, gradientboosting, randomforest or knearest." % args.method) if args.featurizer not in ['daylight', 'ecfp']: raise Exception( "Descriptor %s not available. Choose from daylight or ecfp." % args.featurizer) filename = "%s/%s_%s_%s" % (args.featurizer, args.method, args.dataset[:args.dataset.rfind('.')], args.filename_append) logfile = "./logfile/" + filename + ".log" general.check_path_exists(logfile) sys.stdout = open(logfile, 'wt') # Import data temp_path = general.file_pathway(args.dataset) if os.path.exists(temp_path): data = general.import_pandas_dataframe(temp_path) print("Shape of data:", data.shape) else: raise Exception("%s does not exist." % args.dataset) if not 'fingerprint' in data.columns: if args.featurizer == 'daylight': print("Daylight Fingerprinting...") data['fingerprint'] = data['mol'].apply(ft.get_rdk) print("Daylight Fingerprinting done.")