def upload_predefined(): # Use a pre existing dataset # Boston housing # CIFAR10 # CIFAR100 # Iris # Oxford17 flowers # MNIST data = request.json notebook = get_notebook_data(data['notebook_name']) # load dataset from datasets folder into notebook _x = numpy.load(open("datasets/" + data['dataset_name'] + "/X", "rb")) notebook['x_raw'] = numpy.reshape(_x, newshape=(-1, numpy.prod(_x.shape[1:]))) notebook['y_raw'] = numpy.load( open("datasets/" + data['dataset_name'] + "/Y", "rb")) set_notebook_data(data['notebook_name']) return json_encoder.encode({ "message": "Success", "comment": "Data loaded successfully" })
def compile_sequential_model(): # Compiles and Trains neural network data = request.json notebook = get_notebook_data(data['notebook_name']) notebook['hyperparameters'] = data['hyperparameters'] notebook["history"] = { "acc": [], "val_acc": [], "loss": [], "val_loss": [] } # allocate specified device while creating notebook config = tensorflow.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = ( notebook["GPU_count"] / len(GPUtil.getAvailable())) keras.backend.tensorflow_backend.set_session( tensorflow.Session(config=config)) notebook['is_online'] = True # load created model model = keras.models.model_from_json(notebook['model']) # compile with client-sent hyperparamters model.compile(loss=data['hyperparameters']['loss'], optimizer=keras.optimizers.SGD( lr=float(data['hyperparameters']['learning_rate']), momentum=float(data['hyperparameters']['momentum']), nesterov=bool(data['hyperparameters']['nesterov'])), metrics=['acc']) # Training starts model.fit(x=notebook['x_train'], y=notebook['y_train'], batch_size=128, validation_data=(notebook['x_test'], notebook['y_test']), epochs=int(data['hyperparameters']['epochs']), callbacks=[on_epoch_end_callback(notebook=notebook)]) # save model separately as model weights could not be pickled model.save("NOTEBOOK_" + data['notebook_name'] + "_neural_network_model.hdf5") notebook['model'] = model.to_json() set_notebook_data(data['notebook_name']) try: keras.backend.clear_session() except: pass return json_encoder.encode({ "message": "Success", "comment": "Compiled model and trained" })
def preprocessing(): data = request.json notebook = get_notebook_data(data['notebook_name']) def _preprocess(my_json, X): module = importlib.import_module('sklearn.' + my_json['module']) _class = getattr(module, my_json['class']) try: _X = _class(**my_json['hyperparameters']).fit_transform(X) except: _X = _class(**my_json['hyperparameters']).fit(X) return _X X = notebook['x_raw'] if 'x_preprocessed' not in notebook else notebook[ 'x_preprocessed'] if (data['model_parameters']['module'] in PREPROC): _X = _preprocess(data['model_parameters'], X) notebook['x_preprocessed'] = _X notebook['preprocessing_applied'] = data['model_parameters']['class'] notebook['has_columns'] = data['has_columns'] notebook['uploaded_file_type'] = data['uploaded_file_type'] set_notebook_data(data['notebook_name']) return json_encoder.encode({ "message": "Success", "comment": "Preprocessor applied" })
def upload_table(): data = request.form notebook = get_notebook_data(data['notebook_name']) # Check if file has been uploaded succesfully if 'file' not in request.files: return json_encoder.encode({ "message": "Failure", "comment": "No file received" }) file = request.files['file'] # Check if there is a file if file.filename == '': return json_encoder.encode({ "message": "Failure", "comment": "No file selected" }) if (request.form['load_notebook_status'] == 'false'): # Check for file types if file and allowed_file(file.filename): filename = secure_filename(file.filename) file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) uploaded_file = open( os.path.join(app.config['UPLOAD_FOLDER'], filename), "r") reader = csv.reader(uploaded_file) total_cols = len(next(reader)) features_cols = [] label_cols = [] for col_num in range(0, total_cols - 1): features_cols.append(col_num) label_cols.append(total_cols - 1) # extract contents of the file to give X and Y data csvObject = CSV( os.path.join(app.config['UPLOAD_FOLDER'], filename), { 'features': features_cols, 'labels': label_cols }, False) X, Y = csvObject.extract() # store extracted data into notebook notebook['x_raw'] = X notebook['y_raw'] = Y notebook['file_name'] = data['file_name'] set_notebook_data(data['notebook_name']) return json_encoder.encode({ "message": "Success", "comment": "Table loaded successfully" })
def create_sequential_model(): data = request.json notebook = get_notebook_data(data['notebook_name']) notebook['model_type'] = "NEURAL NETWORK" # boolean for server sent events notifier notebook['_epoch_done'] = False notebook['numLayers'] = data['numLayers'] layers = data['layers'] notebook['modelLayers'] = layers # get input shape for the first layer input_shape = notebook['x_preprocessed'].shape[ 1:] if 'x_preprocessed' in notebook else notebook['x_raw'].shape[1:] # using keras sequential API model = keras.Sequential() # use eval to evaluate strings received by client (in JSON format) to create layers # add these layers to the model # special case to include input shape model.add( eval("keras.layers." + layers[1]['layerType'] + "(" + ",".join([ str(dct["name"]) + "=" + (str(dct["defaultValue"]) if str_isfloat( dct["defaultValue"]) else "'" + dct["defaultValue"] + "'") for dct in layers[1]['defaultOptions'] if dct["defaultValue"] not in {True, False, None} ] + ["input_shape=" + str(input_shape)]) + ")")) for layer in layers[2:]: model.add( eval("keras.layers." + layer['layerType'] + "(" + ",".join([ str(dct["name"]) + "=" + (str(dct["defaultValue"]) if str_isfloat(dct["defaultValue"]) else "'" + dct["defaultValue"] + "'") for dct in layer['defaultOptions'] if dct["defaultValue"] not in {True, False, None} ]) + ")")) model.summary() # store model in json format notebook['model'] = model.to_json() set_notebook_data(notebook['notebook_name']) # clear the graph to avoid errors try: keras.backend.clear_session() except: pass return json_encoder.encode({ "message": "Success", "comment": "Model Created!" })
def on_epoch_end(self, epoch, logs={}): # Add history data to notebook that can be used during retraining. self.notebook['history']['acc'] += [logs['acc']] self.notebook['history']['loss'] += [logs['loss']] self.notebook['history']['val_acc'] += [logs['val_acc']] self.notebook['history']['val_loss'] += [logs['val_loss']] # plot accuracy history curve plt.plot(self.notebook['history']['acc']) plt.plot(self.notebook['history']['val_acc']) plt.title('Model Accuracy') plt.ylabel('Accuracy') plt.xlabel('Epoch') plt.legend(['Train', 'Test'], loc='upper left') # save the plot into a folder vue.js can access it filename = "NOTEBOOK_" + self.notebook[ 'notebook_name'] + "_accuracy_history_curve.jpg" plt.savefig("../UI/src/assets/" + filename) plt.clf() # update the notebook so that to plot when notebook reloads self.notebook['accuracy_history_curve'] = filename # plot loss history curve plt.plot(self.notebook['history']['loss']) plt.plot(self.notebook['history']['val_loss']) plt.title('Model Loss') plt.ylabel('Loss') plt.xlabel('Epoch') plt.legend(['Train', 'Test'], loc='upper left') # save the plot into a folder vue.js can access it filename = "NOTEBOOK_" + self.notebook[ 'notebook_name'] + "_loss_history_curve.jpg" plt.savefig("../UI/src/assets/" + filename) plt.clf() # update the notebook so that to plot when notebook reloads self.notebook['loss_history_curve'] = filename set_notebook_data(self.notebook['notebook_name']) # boolean to notify external function for server sent events self.notebook['_epoch_done'] = True return
def get_roc_curve(notebook_name_json): notebook_name_dict = json_decoder.decode(notebook_name_json) notebook = get_notebook_data(notebook_name_dict['notebook_name']) # does predict on testing data # to invoke keras predict if notebook['model_type'] == "NEURAL NETWORK": model = keras.models.load_model("NOTEBOOK_" + notebook_name_dict['notebook_name'] + "_neural_network_model.hdf5") probs = model.predict(notebook['x_test']) # to invoke sklearn predict else: probs = notebook['model'].predict_proba(notebook['x_test']) preds = probs[:, 1] fpr, tpr, threshold = roc_curve(notebook['y_test'], preds) roc_auc = auc(fpr, tpr) # Create ROC plot plt.title('Receiver Operating Characteristic') plt.plot(fpr, tpr, 'b', label='AUC = %0.2f' % roc_auc) plt.legend(loc='lower right') plt.plot([0, 1], [0, 1], 'r--') plt.xlim([0, 1]) plt.ylim([0, 1]) plt.ylabel('True Positive Rate') plt.xlabel('False Positive Rate') # Save plot to be used by vue.js filename = "NOTEBOOK_" + notebook['notebook_name'] + "_roc_curve.jpg" plt.savefig("../UI/src/assets/" + filename) plt.clf() # Save file name in notebook notebook['roc_curve'] = filename set_notebook_data(notebook_name_dict['notebook_name']) try: keras.backend.clear_session() except: pass return json_encoder.encode({"message": "Success", "roc_curve": filename})
def explain_instance_tabular_data(instance): newshape = numpy.prod(instance.shape) if notebook['model_type'] == "NEURAL NETWORK": model = keras.models.load_model( "NOTEBOOK_" + notebook_name_dict['notebook_name'] + "_neural_network_model.hdf5") target = list( map( numpy.argmax, model.predict( numpy.reshape(instance, newshape=(1, *instance.shape)))[0]))[0] else: target = notebook['model'].predict([instance])[0] explainer = lt.LimeTabularExplainer( training_data=notebook['x_train'], feature_names=[str(i) for i in range(len(instance))]) exp = explainer.explain_instance(instance, predict_fn, num_features=len(instance), num_samples=min( len(notebook['x_train']), 100), labels=(target, )) exp.as_pyplot_figure(label=target).savefig( "../UI/src/assets/" + "NOTEBOOK_" + notebook['notebook_name'] + "_investigate_model_instance1.jpg", figsize=(50, 50)) exp.save_to_file(file_path="../UI/src/assets/" + "NOTEBOOK_" + notebook['notebook_name'] + "_investigate_model_instance.html") notebook['explanation'] = "NOTEBOOK_" + notebook[ 'notebook_name'] + "_investigate_model_instance.html" set_notebook_data(notebook_name_dict['notebook_name']) try: keras.backend.clear_session() except: pass return json_encoder.encode({ 'explanation': "NOTEBOOK_" + notebook['notebook_name'] + "_investigate_model_instance.html" })
def get_confusion_matrix(notebook_name_json): notebook_name_dict = json_decoder.decode(notebook_name_json) notebook = get_notebook_data(notebook_name_dict['notebook_name']) # to invoke keras predict if notebook['model_type'] == "NEURAL NETWORK": model = keras.models.load_model("NOTEBOOK_" + notebook_name_dict['notebook_name'] + "_neural_network_model.hdf5") prediction = model.predict(notebook['x_test']) prediction = numpy.array(list(map(numpy.argmax, prediction))) # to invoke sklearn predict else: model = notebook['model'] prediction = model.predict(notebook['x_test']) # de-"one hot" y_test = notebook['y_test'] if len(y_test.shape) > 1: y_test = numpy.array(list(map(numpy.argmax, y_test))) matrix = confusion_matrix(notebook['y_test'], prediction).ravel() # save confusion matrix in notebook # did not consider for multiclass labels while displaying notebook['confusion_matrix'] = matrix notebook['true_negative'] = int(matrix[0]) notebook['false_positive'] = int(matrix[1]) notebook['false_negative'] = int(matrix[2]) notebook['true_positive'] = int(matrix[3]) set_notebook_data(notebook_name_dict['notebook_name']) try: keras.backend.clear_session() except: pass return json_encoder.encode({ "message": "Success", "confusion_matrix": matrix.tolist() })
def upload_raw(): # Check if file has been uploaded succesfully if 'file' not in request.files: return json_encoder.encode({ "message": "Failure", "comment": "No file received" }) file = request.files['file'] # Check if there is a file if file.filename == '': return json_encoder.encode({ "message": "Failure", "comment": "No file selected" }) # Check for file types if file and allowed_file(file.filename): filename = secure_filename(file.filename) file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) # extract raw binary data rawObject = RawFile( os.path.join(app.config['UPLOAD_FOLDER'], filename), { 'features': [0], 'labels': [1] }, False) X, Y = rawObject.extract() data = request.json notebook = get_notebook_data(data['notebook_name']) # store dataset in notebook notebook['x_raw'] = X notebook['y_raw'] = Y set_notebook_data(data['notebook_name']) return json_encoder.encode({ "message": "Success", "comment": "Data loaded successfully" })
def set_train_test_data(): data = request.json notebook = get_notebook_data(data['notebook_name']) # used preprocessed data if it exists, or else raw data X = notebook['x_raw'] if 'x_preprocessed' not in notebook else notebook[ 'x_preprocessed'] notebook['hyperparameters'] = {} notebook['hyperparameters']['test_size'] = data['test_size'] # splits dataset into 4 parts and stores it in the notebook notebook['x_train'], notebook['x_test'], notebook['y_train'], notebook[ 'y_test'] = train_test_split(X, notebook['y_raw'], test_size=data['test_size']) set_notebook_data(data['notebook_name']) return json_encoder.encode({"message": "Success", "comment": "Data set"})
def get_accuracy(notebook_name_json): notebook_name_dict = json_decoder.decode(notebook_name_json) notebook = get_notebook_data(notebook_name_dict['notebook_name']) # to invoke keras predict if notebook['model_type'] == "NEURAL NETWORK": model = keras.models.load_model("NOTEBOOK_" + notebook_name_dict['notebook_name'] + "_neural_network_model.hdf5") prediction = model.predict(notebook['x_test']) prediction = numpy.array(list(map(numpy.argmax, prediction))) # to invoke sklearn predict else: model = notebook['model'] prediction = model.predict(notebook['x_test']) # de-"one hot" y_test = notebook['y_test'] if len(y_test.shape) > 1: y_test = numpy.array(list(map(numpy.argmax, y_test))) accuracy = accuracy_score(notebook['y_test'], prediction) # save accuracy in notebook notebook['accuracy'] = accuracy set_notebook_data(notebook_name_dict['notebook_name']) try: keras.backend.clear_session() except: pass return json_encoder.encode({ "message": "Success", "accuracy": str(accuracy) })
def create_non_neural_network_model(): data = request.json notebook = get_notebook_data(data['notebook_name']) notebook['hyperparameters'] = data['model_parameters'] notebook['model_type'] = "NON NEURAL NETWORK" notebook['model_name'] = data['model_parameters']['class'] # change the data types of parameters to required data types for key in data['model_parameters']['hyperparameters']: try: if (data['model_parameters']['hyperparameters'][key] == 'None'): data['model_parameters']['hyperparameters'][key] = None elif (data['model_parameters']['hyperparameters'][key].isdigit()): data['model_parameters']['hyperparameters'][key] = int( data['model_parameters']['hyperparameters'][key]) elif (str_isfloat( data['model_parameters']['hyperparameters'][key])): data['model_parameters']['hyperparameters'][key] = float( data['model_parameters']['hyperparameters'][key]) elif (data['model_parameters']['hyperparameters'][key] == 'True'): data['model_parameters']['hyperparameters'][key] = True elif (data['model_parameters']['hyperparameters'][key] == 'False'): data['model_parameters']['hyperparameters'][key] = False except: pass # allocate CPUs if possible if 'n_jobs' in data['model_parameters']['hyperparameters']: data['model_parameters']['hyperparameters']['n_jobs'] = notebook[ 'CPU_count'] notebook['is_online'] = True def train_supervised(x_train, y_train, my_json): # import just the required class from specific module and train the model module = importlib.import_module('sklearn.' + my_json['module']) _class = getattr(module, my_json['class']) model = _class(**my_json['hyperparameters']) model.fit(x_train, y_train) # deallocate devices after training notebook['is_online'] = False return model def train_unsupervised(X, my_json): # import just the required class from specific module and train the model module = importlib.import_module('sklearn.' + my_json['module']) _class = getattr(module, my_json['class']) model = _class(**my_json['hyperparameters']) try: model.fit_transform(X) except: model.fit(X) # deallocate devices after training notebook['is_online'] = False return model # train supervised and unsupervised algorithms separately # supervised algorithms require 1-D array of Y training samples which contain class labels if data['model_parameters']['module'] in SUPER: print("\n", numpy.array(list(map(numpy.argmax, notebook['y_train']))), "\n") y_train = notebook['y_train'] if len( notebook['y_train'].shape) <= 2 else numpy.array( list(map(numpy.argmax, notebook['y_train']))) notebook['model'] = train_supervised(notebook['x_train'], y_train, data['model_parameters']) # unsupervised algorithms require on X training samples elif data['model_parameters']['module'] in UNSUPER: notebook['model'] = train_supervised(notebook['x_train'], data['model_parameters']) set_notebook_data(data['notebook_name']) return json_encoder.encode({ "message": "Success", "comment": "Model trained" })
def explain_instance_image_data(instance): newshape = numpy.prod(instance.shape) if notebook['model_type'] == "NEURAL NETWORK": model = keras.models.load_model( "NOTEBOOK_" + notebook_name_dict['notebook_name'] + "_neural_network_model.hdf5") target = list( map( numpy.argmax, model.predict( numpy.reshape(instance, newshape=(1, *instance.shape)))[0]))[0] else: target = notebook['model'].predict( numpy.reshape(instance, newshape=newshape)) plt.savefig("../UI/src/assets/" + "NOTEBOOK_" + notebook['notebook_name'] + "_investigate_model_instance0.jpg") plt.clf() explainer = lt.LimeTabularExplainer( training_data=notebook['x_train'], feature_names=[str(i) for i in range(len(instance))] if 'column_names' not in notebook else notebook['column_names']) exp = explainer.explain_instance(instance, predict_fn, num_features=len(instance), num_samples=newshape, labels=(target, )) exp.as_pyplot_figure(label=target).savefig( "../UI/src/assets/" + "NOTEBOOK_" + notebook['notebook_name'] + "_investigate_model_instance1.jpg", figsize=(50, 50)) exp.save_to_file(file_path="../UI/src/assets/" + "NOTEBOOK_" + notebook['notebook_name'] + "_investigate_model_instance.html") explaination_list = exp.as_map()[target] constructed_image = numpy.zeros(shape=len(explaination_list)) for i, j in explaination_list: constructed_image[i] = 0.5 + j / 2 constructed_image = numpy.reshape(constructed_image, newshape=instance.shape) plt.savefig("../UI/src/assets/" + "NOTEBOOK_" + notebook['notebook_name'] + "_investigate_model_instance2.jpg") plt.clf() notebook['explanation'] = "NOTEBOOK_" + notebook[ 'notebook_name'] + "_investigate_model_instance.html" set_notebook_data(notebook_name_dict['notebook_name']) try: keras.backend.clear_session() except: pass return json_encoder.encode({ 'instance': "NOTEBOOK_" + notebook['notebook_name'] + "_investigate_model_instance0.jpg", 'explanation': "NOTEBOOK_" + notebook['notebook_name'] + "_investigate_model_instance.html", 'constructed': "NOTEBOOK_" + notebook['notebook_name'] + "_investigate_model_instance2.jpg" })
def get_precision_recall_curve(notebook_name_json): notebook_name_dict = json_decoder.decode(notebook_name_json) notebook = get_notebook_data(notebook_name_dict['notebook_name']) # to invoke sklearn predict if notebook['model_type'] == "NEURAL NETWORK": model = keras.models.load_model("NOTEBOOK_" + notebook_name_dict['notebook_name'] + "_neural_network_model.hdf5") prediction = model.predict(notebook['x_test']) prediction = numpy.array(list(map(numpy.argmax, prediction))) # to invoke sklearn predict else: model = notebook['model'] prediction = model.predict(notebook['x_test']) y_score = prediction # de-"one hot" y_test = notebook['y_test'] if len(y_test.shape) > 1: y_test = numpy.array(list(map(numpy.argmax, y_test))) # Average precision curve for only binary class problems # can iterate for multi class y_test[y_test == y_test.min()] = 0 y_test[y_test != 0] = 1 average_precision = average_precision_score(y_test, y_score) notebook['average_precision_score'] = average_precision precision, recall, _ = precision_recall_curve(y_test, y_score) # create precision recall curve plt.step(recall, precision, color='b', alpha=0.2, where='post') plt.fill_between(recall, precision, step='post', alpha=0.2, color='b') plt.xlabel('Recall') plt.ylabel('Precision') plt.ylim([0.0, 1.05]) plt.xlim([0.0, 1.0]) plt.title('2-class Precision-Recall curve: AP={0:0.2f}'.format( average_precision)) # save data and filename in the notebook for reloading filename = "NOTEBOOK_" + notebook[ 'notebook_name'] + "_precision_recall_curve.jpg" plt.savefig("../UI/src/assets/" + filename) plt.clf() notebook['precision_recall_curve'] = filename notebook['average_precision_score'] = average_precision notebook['recall'] = recall.mean() set_notebook_data(notebook_name_dict['notebook_name']) try: keras.backend.clear_session() except: pass return json_encoder.encode({ "message": "Success", "precision_recall_curve": filename, "average_precision_score": average_precision, "recall": recall.mean() })