def dropout_influence(X_train, y_train): # 构建 5 种不同数量 Dropout 层的网络 for n in range(5): # 创建容器 model = Sequential() # 创建第一层 model.add(layers.Dense(8, input_dim=2, activation="relu")) counter = 0 # 网络层数固定为 5 for _ in range(5): model.add(layers.Dense(64, activation="relu")) # 添加 n 个 Dropout 层 if counter < n: counter += 1 model.add(layers.Dropout(rate=0.5)) # 输出层 model.add(layers.Dense(1, activation="sigmoid")) # 模型装配 model.compile( loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"] ) # 训练 model.fit(X_train, y_train, epochs=N_EPOCHS, verbose=1) # 绘制不同 Dropout 层数的决策边界曲线 # 可视化的 x 坐标范围为[-2, 3] xx = np.arange(-2, 3, 0.01) # 可视化的 y 坐标范围为[-1.5, 2] yy = np.arange(-1.5, 2, 0.01) # 生成 x-y 平面采样网格点,方便可视化 XX, YY = np.meshgrid(xx, yy) preds = model.predict_classes(np.c_[XX.ravel(), YY.ravel()]) title = "无Dropout层" if n == 0 else "{0}层 Dropout层".format(n) file = "Dropout_%i.png" % n make_plot( X_train, y_train, title, file, XX, YY, preds, output_dir=OUTPUT_DIR + "/dropout", )
def different_hidden_layer(): ''' :return: ''' for n in range(3): model = Sequential() model.add(layers.Dense(64, input_dim=2, activation='relu')) for i in range(n): model.add(layers.Dense(64, activation='relu')) model.add(layers.Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) model.fit(x_train, y_train, epochs=5, verbose=1) preds = model.predict_classes(np.c_[XX.ravel(), YY.ravel()]) title = "hidden layer:{0}".format(n + 1) make_plot(x_train, y_train, title, XX, YY, preds)
def network_layers_influence(x_train, y_train): for n in range(5): model = Sequential() model.add(layers.Dense(8, input_dim=2, activation='relu')) for _ in range(n): model.add(layers.Dense(32, activation='relu')) model.add(layers.Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) history = model.fit(x_train, y_train, epochs=N_EPOCHS, verbose=1) xx = np.arange(-2, 3, 0.01) # 可视化的 y 坐标范围为[-1.5, 2] yy = np.arange(-1.5, 2, 0.01) # 生成 x-y 平面采样网格点,方便可视化 XX, YY = np.meshgrid(xx, yy) preds = model.predict_classes(np.c_[XX.ravel(), YY.ravel()]) title = "layer_nums {0}".format(2 + n) filename = "network_%i.png" % (2 + n) make_plot(x_train, y_train, title, filename, XX, YY, preds, output_dir=OUTPUT_DIR)
def NN(X_train, X_test, y_train, y_test): model = Sequential() model.add(Dense(100, input_dim=1034, activation='relu')) model.add(Dense(100, activation='relu')) model.add(Dense(100, activation='relu')) model.add(Dense(100, activation='relu')) model.add(Dense(10, activation='relu')) model.add(Dense(1, activation='sigmoid')) model.compile(loss=keras.losses.binary_crossentropy, optimizer='adam', metrics=['accuracy']) model.fit(X_train, y_train, validation_split=0.1) prediction = model.predict_classes(X_test) print('Neural Network:') print(100 * accuracy_score(y_test, prediction)) print(mean_squared_error(y_test, prediction, squared=True)) print(mean_absolute_error(y_test, prediction)) print(confusion_matrix(y_test, prediction))
def network_layers_influence(X_train, y_train): # 构建 5 种不同层数的网络 for n in range(5): # 创建容器 model = Sequential() # 创建第一层 model.add(layers.Dense(8, input_dim=2, activation="relu")) # 添加 n 层,共 n+2 层 for _ in range(n): model.add(layers.Dense(32, activation="relu")) # 创建最末层 model.add(layers.Dense(1, activation="sigmoid")) # 模型装配与训练 model.compile( loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"] ) model.fit(X_train, y_train, epochs=N_EPOCHS, verbose=1) # 绘制不同层数的网络决策边界曲线 # 可视化的 x 坐标范围为[-2, 3] xx = np.arange(-2, 3, 0.01) # 可视化的 y 坐标范围为[-1.5, 2] yy = np.arange(-1.5, 2, 0.01) # 生成 x-y 平面采样网格点,方便可视化 XX, YY = np.meshgrid(xx, yy) preds = model.predict_classes(np.c_[XX.ravel(), YY.ravel()]) title = "网络层数:{0}".format(2 + n) file = "网络容量_%i.png" % (2 + n) make_plot( X_train, y_train, title, file, XX, YY, preds, output_dir=OUTPUT_DIR + "/network_layers", )
def train_rna(config): data_train, data_test, target_train, target_test = process_data() rna_folder = pathlib.Path(join(os.getcwd(), 'rna')) fig_folder = pathlib.Path(join(os.getcwd(), "figures")) id = str(uuid.uuid1()).split('-')[0] # Generates a unique id to each RNA created # Here is where the magic really happens! Check this out: model = Sequential() # The model used is the sequential # It has a fully connected input layer model.add(Dense(data_train.shape[1], activation="relu", kernel_initializer=config.initializer, input_shape=(data_train.shape[1],))) # With three others hidden layers model.add(Dense(config.layer_size_hl1, activation=config.activation, kernel_initializer=config.initializer)) # And a dropout layer between them model.add(Dropout(config.dropout)) model.add(Dense(config.layer_size_hl2, activation=config.activation, kernel_initializer=config.initializer)) model.add(Dropout(config.dropout)) model.add(Dense(config.layer_size_hl3, activation=config.activation, kernel_initializer=config.initializer)) model.add(Dense(len(modulations), activation='softmax')) # Once created, the model is then compiled, trained # and saved for further evaluation model.compile(optimizer=config.optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy']) history = model.fit(data_train, target_train, validation_split=0.3, epochs=config.epochs, verbose=1, callbacks=[WandbCallback(validation_data=(data_test, target_test))]) model.save(str(join(rna_folder, 'rna-' + id + '.h5'))) model.save_weights(str(join(rna_folder, 'weights-' + id + '.h5'))) print(join("\nRNA saved with id ", id, "\n").replace("\\", "")) # A figure with a model representation is automatically saved! plot_model(model, to_file=join(fig_folder, 'model-' + id + '.png'), show_shapes=True) # Here is where we make the first evaluation of the RNA loss, acc = model.evaluate(data_test, target_test, verbose=1) print('Test Accuracy: %.3f' % acc) # Here, WANDB takes place and logs all metrics to the cloud metrics = {'accuracy': acc, 'loss': loss, 'dropout': config.dropout, 'epochs': config.epochs, 'initializer': config.initializer, 'layer_syze_hl1': config.layer_size_hl1, 'layer_syze_hl2': config.layer_size_hl2, 'layer_syze_hl3': config.layer_size_hl3, 'optimizer': config.optimizer, 'activation': config.activation, 'id': id} wandb.log(metrics) # Here we make a prediction using the test data... print('\nStarting prediction') predict = model.predict_classes(data_test, verbose=1) # And create a Confusion Matrix for a better visualization! print('\nConfusion Matrix:') confusion_matrix = tf.math.confusion_matrix(target_test, predict).numpy() confusion_matrix_normalized = np.around( confusion_matrix.astype('float') / confusion_matrix.sum(axis=1)[:, np.newaxis], decimals=2) print(confusion_matrix_normalized) cm_data_frame = pd.DataFrame(confusion_matrix_normalized, index=modulations, columns=modulations) figure = plt.figure(figsize=(8, 4), dpi=150) sns.heatmap(cm_data_frame, annot=True, cmap=plt.cm.get_cmap('Blues', 6)) plt.tight_layout() plt.title('Confusion Matrix') plt.ylabel('True label') plt.xlabel('Predicted label') plt.savefig(join(fig_folder, 'confusion_matrix-' + id + '.png'), bbox_inches='tight', dpi=300) plt.clf() plt.plot(history.history['accuracy']) plt.plot(history.history['val_accuracy']) plt.title('Model accuracy') plt.ylabel('Accuracy') plt.xlabel('Epoch') plt.legend(['Train', 'Test'], loc='best') plt.savefig(join(fig_folder, 'history_accuracy-' + id + '.png'), bbox_inches='tight', dpi=300) plt.clf() plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.title('Model loss') plt.ylabel('Loss') plt.xlabel('Epoch') plt.legend(['Train', 'Test'], loc='best') plt.savefig(join(fig_folder, 'history_loss-' + id + '.png'), bbox_inches='tight', dpi=300) plt.close(figure) evaluate_rna(id=id)
plt.ylabel('Loss') plt.xlabel('Epoch') plt.legend(['Train', 'Val'], loc='upper left') plt.show() plot_learningCurve(history, 10) import mlxtend from mlxtend.plotting import plot_decision_regions from mlxtend.frequent_patterns import apriori from mlxtend.frequent_patterns import association_rules from mlxtend.plotting import plot_confusion_matrix from sklearn.metrics import confusion_matrix y_pred = model.predict_classes(X_test) mat = confusion_matrix(y_test, y_pred) plot_confusion_matrix(conf_mat=mat, class_names=label.classes_, show_normed=True, figsize=(7,7))
class ModelBidirectDNA(): def __init__(self, params): """ It initializes the model before the training """ # defines where to save the model's checkpoints self.results_base_dir = params['result_base_dir'] self.pretrained_model = params.get('pretrained_model', None) if self.pretrained_model is not None: # pretrained model load params from pickle print("loading model") train_dir = "/" train_dir = train_dir.join( params['pretrained_model'].split("/")[:-1]) print(train_dir) with open(os.path.join(train_dir, "network_params"), 'rb') as params_pickle: self.params = pickle.load(params_pickle) self.params['result_base_dir'] = self.results_base_dir else: ## new model self.params = params self.seeds = [42, 101, 142, 23, 53] self.learning_rate = self.params['lr'] self.batch_size = self.params['batch_size'] weight_decay = self.params['weight_decay'] # Architecture --- emoji network weight_init = tf.keras.initializers.glorot_uniform recurrent_init = tf.keras.initializers.orthogonal(seed=42) # Model definition self.model = Sequential() self.model.add( Masking(mask_value=[1., 0., 0., 0., 0.], input_shape=(self.params['maxlen'], self.params['vocabulary_len']))) self.model.add( tf.keras.layers.Conv1D( self.params['conv_num_filter'], self.params['conv_kernel_size'], activation='relu', kernel_regularizer=tf.keras.regularizers.l2(weight_decay), kernel_initializer=weight_init(self.seeds[2]), activity_regularizer=tf.keras.regularizers.l2(weight_decay))) self.model.add(tf.keras.layers.MaxPool1D()) self.model.add( tf.keras.layers.Dropout(self.params['dropout_1_rate'], seed=self.seeds[0])) self.model.add( tf.keras.layers.Conv1D( self.params['conv_num_filter'], self.params['conv_kernel_size'], activation='relu', kernel_regularizer=tf.keras.regularizers.l2(weight_decay), kernel_initializer=weight_init(self.seeds[3]), activity_regularizer=tf.keras.regularizers.l2(weight_decay))) self.model.add(tf.keras.layers.MaxPool1D()) self.model.add( Bidirectional( LSTM((int)(self.params['lstm_units']), return_sequences=False, dropout=self.params['lstm_input_dropout'], kernel_initializer=weight_init(self.seeds[0]), recurrent_initializer=recurrent_init, kernel_regularizer=l2(self.params['weight_decay'])))) self.model.add( Dropout(self.params['lstm_output_dropout'], seed=self.seeds[2])) self.model.add( Dense(8, activation='relu', kernel_initializer=weight_init(self.seeds[0]))) self.model.add( Dropout(self.params['dense_dropout_rate'], seed=self.seeds[3])) self.model.add( Dense(1, activation='sigmoid', kernel_initializer=weight_init(self.seeds[4]), kernel_regularizer=l2(self.params['weight_decay']))) # Check if the user wants a pre-trained model. If yes load the weights if self.pretrained_model is not None: self.model.load_weights(self.pretrained_model) def build(self, logger=None): """ It compiles the model by defining optimizer, loss and learning rate """ optimizer = tf.keras.optimizers.RMSprop(lr=self.learning_rate, clipnorm=1.0) self.model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy', f1_m, precision_m, recall_m]) if (logger is not None): self.model.summary(print_fn=lambda x: logger.info(x)) else: self.model.summary() # Print params onto the logger if logger is not None: logger.info("\n" + json.dumps(self.params, indent=4)) def fit(self, X_tr, y_tr, epochs, callbacks_list, validation_data, shuffle=True): """ Fit the model with the provided data and returns the results Inputs: - X_tr: samples - y_tr: labels related to the samples - epochs: number of epochs before stopping the training - callbacks_list - validation_data: data the model is validated on each time a epoch is completed - shuffle: if the dataset has to be shuffled before being fed into the network Outputs: - history: it contains the results of the training """ callbacks_list = self._get_callbacks() history = self.model.fit(x=X_tr, y=y_tr, epochs=epochs, shuffle=True, batch_size=self.batch_size, callbacks=callbacks_list, validation_data=validation_data) trained_epochs = callbacks_list[0].stopped_epoch - callbacks_list[ 0].patience + 1 if callbacks_list[0].stopped_epoch != 0 else epochs return history, trained_epochs def fit_early_stopping_by_loss_val(self, X_tr, y_tr, epochs, early_stopping_loss, callbacks_list, validation_data, shuffle=True): """ Train model until current validation loss reaches holdout training loss specified by early_stopping_loss parameter. Algorithm 7.3 (Ian Goodfellow, Yoshua Bengio, and Aaron Courville. 2016. Deep Learning. The MIT Press, pp. 246-250.) Params: ------- :X_tr: training samples :y_tr: training labels :epochs: number of epochs training is performed on :early_stopping_loss: threshold loss - Once reached this loss the training is stopped :callbacks_list: list of callbacks to use in the training phase :validation_data: data to evaluate the model on at the end of each epoch :shuffle: if True, it shuffles data before starting the training """ print(f"early stopping loss: {early_stopping_loss}") callbacks_list = self._get_callbacks(train=True) callbacks_list.append( EarlyStoppingByLossVal(monitor='val_loss', value=early_stopping_loss)) history = self.model.fit(x=X_tr, y=y_tr, epochs=epochs, batch_size=self.batch_size, shuffle=True, callbacks=callbacks_list, validation_data=validation_data) return history def evaluate(self, features, labels): """ It evalutes the trained model onto the provided data Inputs: - features: sample of data to validate - labels: classes the data belong to Outputs: - loss - accuracy - f1_score - precision - recall """ loss, accuracy, f1_score, precision, recall = self.model.evaluate( features, labels, verbose=0) metrics_value = [loss, accuracy, f1_score, precision, recall] results_dict = dict(zip(self.model.metrics_names, metrics_value)) return results_dict def print_metric(self, name, value): print('{}: {}'.format(name, value)) def save_weights(self): """ It saves the model's weights into a hd5 file """ with open(os.path.join(self.results_base_dir, "network_params"), 'wb') as params_pickle: pickle.dump(self.params, params_pickle) self.model.save_weights( os.path.join(self.results_base_dir, 'my_model_weights.h5')) model_json = self.model.to_json() with open(os.path.join(self.results_base_dir, "model.json"), "w") as json_file: json_file.write(model_json) def fit_generator(self, generator, steps_per_epoch, epochs, validation_data=None, shuffle=True, callbacks_list=None): """ Train the model for the same number of update step as in holdout validation phase Algorithm 7.2(Ian Goodfellow, Yoshua Bengio, and Aaron Courville. 2016. Deep Learning. The MIT Press, pp. 246-250.) """ history = self.model.fit_generator( generator, steps_per_epoch, epochs, shuffle=False, callbacks=self._get_callbacks(train=True), validation_data=validation_data) return history def _get_callbacks(self, train=True): """ It defines the callbacks for this specific architecture """ callbacks_list = [ keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True), keras.callbacks.ModelCheckpoint(filepath=os.path.join( self.results_base_dir, 'model_checkpoint_weights.h5'), monitor='val_loss', save_best_only=True, verbose=0), keras.callbacks.CSVLogger( os.path.join(self.results_base_dir, 'history.csv')), keras.callbacks.ReduceLROnPlateau(patience=10, monitor='val_loss', factor=0.75, verbose=1, min_lr=5e-6) ] return callbacks_list def predict(self, x_test, batch_size: int = 32, verbose: int = 0) -> np.array: """ Wrapper method for Keras model's method 'precict' Params: ------- :x_test: test samples :batch_size: default=32 :verbose: verbosity level """ return self.model.predict( x_test, batch_size=batch_size, verbose=verbose, ).ravel() def predict_classes(self, x_test, batch_size: int = 32, verbose: int = 1) -> np.array: """ Wrapper method for Keras model's method 'precict_classes' Params: ------- :x_test: test samples :batch_size: default=32 :verbose: verbosity level Raise: Exception """ try: return self.model.predict_classes(x_test) except Exception as err: print(f"EXCEPTION-RAISED: {err}") sys.exit(-1) pass
# In[27]: history = model.fit(X_train, y_train, batch_size=32, epochs=15, verbose=1, validation_split=0.2) # In[29]: model.evaluate(X_test, y_test) # In[30]: model.predict_classes(X_test) # In[31]: y_test[0] # In[32]: y_test[1] # In[33]: y_test[2] # In[44]:
plt.plot(range(1, 26), history.history['val_categorical_accuracy']) plt.title('Model Acucracy') plt.ylabel('Accuracy') plt.xlabel('Epochs') plt.legend(['Train', 'Val'], loc='upper left') plt.show plt.plot(range(1, 26), history.history['loss']) plt.plot(range(1, 26), history.history['val_loss']) plt.title('Model Loss') plt.ylabel('Loss') plt.xlabel('Epochs') plt.legend(['Train', 'Val'], loc='upper left') plt.show Y_Predict = model.predict_classes(X_Test) mat = confusion_matrix(Y_Test, Y_Predict) plot_confusion_matrix(mat, figsize=(5, 5)) # #-----------------------*Training and evaluating session start*------------------------------------------------ # init_op = tf.global_variables_initializer() # saver = tf.train.Saver() # with tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(log_device_placement=True)) as sess: # sess.run(init_op) # total_batch = int(1823 / batch_size) # print("total batches: ",total_batch) # epoch_rate_down=0 # best_kappa=0
for n in range(5): #创建容器 model = Sequential() #创建第一层 model.add(Dense(3, input_dim=2, activation='relu')) counter = 0 for i in range(5): model.add(Dense(64, activation='relu')) if counter < n: counter += 1 model.add(layers.Dropout(rate=0.5)) #添加输出层 model.add(Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) history = model.fit(x_train, y_train, batch_size=100, epochs=100, verbose=1) #设置横纵坐标范围,在其内进行模型采样 xx = np.arange(-2, 3, 0.01) yy = np.arange(-2, 2, 0.01) #采样 XX, YY = np.meshgrid(xx, yy) preds = model.predict_classes(np.c_[XX.ravel(), YY.ravel()]) title = 'dropout层数({})'.format(n) filename = '网络容量%f.png' % (2 + n * 1) draw(X, Y, title, filename, XX, YY, preds)
plt.show() plt.clf() plot_graphs(history, 'accuracy') plot_graphs(history, 'loss') seed_text = "Laurence went to dublin" next_words = 100 for _ in range(next_words): token_list = tokenizer.texts_to_sequences([seed_text])[0] token_list = pad_sequences([token_list], maxlen=max_seq_len-1, padding='pre') predicted = model.predict_classes(token_list, verbose=0) output_word = "" for word, index in tokenizer.word_index.items(): if index == predicted: output_word = word break seed_text += " " + output_word print(seed_text) # Tweaks to improve model and larger corpus
mobilenet = MobileNetV2(weights = "imagenet",include_top = False,input_shape=(150,150,3)) for layer in mobilenet.layers: layer.trainable = False model = Sequential() model.add(mobilenet) model.add(Flatten()) model.add(Dense(2,activation="sigmoid")) model.compile(optimizer="adam",loss="categorical_crossentropy",metrics ="accuracy") checkpoint = ModelCheckpoint("moblenet_facemask.h5",monitor="val_accuracy",save_best_only=True,verbose=1) earlystop = EarlyStopping(monitor="val_acc",patience=5,verbose=1) history = model.fit_generator(generator=train,steps_per_epoch=len(train)// 32,validation_data=valid, validation_steps = len(valid)//32,callbacks=[checkpoint,earlystop],epochs=15) model.evaluate_generator(valid) model.save("face_mask.h5") pred = model.predict_classes(valid) pred[:15] #check #without mask mask = "../input/with-and-without-mask/" plt.figure(figsize=(8, 7)) label = {0: "With Mask", 1: "Without Mask"} color_label = {0: (0, 255, 0), 1: (0, 0, 255)} cascade = cv2.CascadeClassifier("../input/frontalface/haarcascade_frontalface_default.xml") count = 0 i = "../input/with-and-without-mask/mask9.jpg" frame = cv2.imread(i) gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) faces = cascade.detectMultiScale(gray, 1.1, 4)
# start capturing img cap = cv2.VideoCapture(0) animal_captured = '' while(True): # Capture frame-by-frame ret, frame = cap.read() # resize img img_resized = cv2.resize(frame, img_shape, interpolation=cv2.INTER_AREA) # change frmo 1..255 to 0..1 img_resized = img_resized / 255 # predict predicted = model.predict_classes(np.asarray([img_resized]))[0] animal_predicted = animal_names[str(predicted)] # check if the animal has changed if animal_captured != animal_predicted: animal_captured = animal_predicted print(animal_captured) cv2.putText(frame, animal_captured, (100,150), cv2.FONT_HERSHEY_SIMPLEX, 3, (255,255,255),2) cv2.imshow('my webcam', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break # esc to quit #time.sleep(0.5) # When everything done, release the capture
plt.xlabel('Epoch') plt.legend(['Train', 'Validation'], loc='upper left') plt.show() # Plot training & validation loss values plt.plot(epoch_range, history.history['loss']) plt.plot(epoch_range, history.history['val_loss']) plt.title('Model loss') plt.ylabel('Loss') plt.xlabel('Epoch') plt.legend(['Train', 'Val'], loc='upper left') plt.show() """Show result for every class in *table*:""" y_pred = model.predict_classes(input_test_data) conf_matrix = confusion_matrix(output_test_data, y_pred) plot_confusion_matrix(conf_matrix, figsize = (15, 11), colorbar = True, show_normed = True, show_absolute = False) """## **Saving** ### Saving result to *colab*: """ with open("model_best.json", 'w') as json_file: json_file.write(model.to_json()) model.save_weights("model_best.h5")
validation_steps=40) #plot for Loss plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.title('model loss') plt.ylabel('loss') plt.xlabel('epochs') plt.legend(['train', 'test'], loc='upper left') plt.show() #test Image test = r'D:\Bhushan\casting_512x512\def_front\cast_def_0_180.jpeg' img = image.load_img(test, target_size=(128, 128)) plt.imshow(img) x = image.img_to_array(img) x = np.expand_dims(x, axis=0) images = np.vstack([x]) val1 = model.predict(images) if val1 == 0: plt.title("def_front") else: plt.title("ok_front") # Test Data val = model.predict_classes(testData[0][0]) from sklearn.metrics import confusion_matrix, accuracy_score p = confusion_matrix(testData[0][1], val) acc = accuracy_score(testData[0][1], val)
from tensorflow.keras.layers import Dense from tensorflow.keras import Sequential from tensorflow.keras.optimizers import Adam # 값차이가 상당히 크기 때문에 정규화가 필수 # 정규화된 데이터 data = np.loadtxt("../../data/diabetes1.csv", skiprows=1, delimiter=",", dtype=np.float32) print(data) x_data = data[:, :-1] y_data = data[:, -1:] print(x_data.shape) print(y_data.shape) IO = Dense(units=1, input_shape=[8], activation="sigmoid") model = Sequential([IO]) model.compile(loss="binary_crossentropy", optimizer=Adam(learning_rate=0.01), metrics=["accuracy"]) history = model.fit(x_data, y_data, epochs=100) print(model.predict(x_data)) print(model.predict_classes(x_data)) print(history.history['acc'][-1])
plt.xlabel("Epochs") plt.ylabel("Loss") plt.grid(True) plt.legend() plt.show() acc_values = hist_dict["accuracy"] val_acc_values = hist_dict["val_accuracy"] epochs = range(1,len(acc_values)+1) line1 = plt.plot(epochs,val_acc_values,label="Validation/Test acc") line2 = plt.plot(epochs,acc_values,label = "Training acc") plt.setp(line1,linewidth=2.0,marker="+",markersize=10.0) plt.setp(line2,linewidth=2.0,marker="4",markersize=10.0) plt.xlabel("Epochs") plt.ylabel("Accuracy") plt.grid(True) plt.legend() plt.show() for i in range(0,10): random = np.random.randint(0,len(x_test)) inputimg = x_test[random] inputimg = inputimg.reshape(1,28,28,1) result = str( model.predict_classes(inputimg,1,verbose=0)[0] ) print(result)
def train_model(): # set seeds for reproducability seed(1) set_random_seed(2) # Load data root = str(Path(__file__).resolve().parents[2]) with Path(root + '/data/imdb.pickle').open('rb') as f: data = pickle.load(f) data = data.drop_duplicates('doc') data = data.dropna() # Load embeddings embed_lookup = {} with Path(root + '/data/glove.6B.50d.txt').resolve().open() as f: for line in f: values = line.split() word = values[0] vec = np.array(values[1:]) embed_lookup[word] = vec print("Loaded {} embeddings".format(len(embed_lookup))) # Split data into 70%/10%/20% training/validation/testing X_train, X_test, y_train, y_test = train_test_split( data.doc, data.sentiment, test_size=0.2, stratify=data.sentiment, random_state=1) X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, stratify=y_train, random_state=1) # Fit tokenizer, get vocabulary and build embedding matrix tk = Tokenizer() tk.fit_on_texts(X_train) vocab_size = len(tk.word_index) + 1 embed_matrix = np.zeros(shape=(vocab_size, 50)) for word, i in tk.word_index.items(): if word in embed_lookup: embed_matrix[i] = embed_lookup[word] # Tokenize, sequence and pad the data X_train, X_val, X_test = encode([X_train, X_val, X_test], tk) # Build and train the network model = Sequential() model.add( Embedding(input_dim=vocab_size, output_dim=50, weights=[embed_matrix], trainable=False)) model.add(LSTM(100)) model.add(Dense(1, activation='sigmoid')) stopper = EarlyStopping() model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) print(model.summary()) history = model.fit(X_train, y_train, validation_data=[X_val, y_val], callbacks=[stopper], batch_size=64, epochs=100) # Evaluate the network print("\nTest on {} samples".format(len(X_test))) y_pred = model.predict_classes(X_test) scores = {} scores['accuracy'] = accuracy_score(y_test, y_pred) scores['f1_macro'] = f1_score(y_test, y_pred, average='macro') scores['f1_None'] = f1_score(y_test, y_pred, average=None) for score, value in scores.items(): print("{}: {}".format(score, value)) return history, y_test, y_pred
): #Extracting the file name of the image from Class Label folder image = cv2.imread(directory + '/' + image_file) #Reading the image (OpenCV) image = cv2.resize( image, (100, 100) ) #Resize the image, Some images are different sizes. (Resizing is very Important) test_images.append(image) test_images = np.array( test_images, dtype=np.float32) #converting the list of images to numpy array. test_images = test_images / 255.0 test_images.shape #predicting the test set poll = model.predict_classes(test_images) #MAKING THE SUBMISSION FILE freeman = samplesub.copy() freeman['growth_stage'] = poll freeman['growth_stage'] = freeman['growth_stage'].map( { 0: 1, 1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7 }, na_action='ignore')
def get_w2v_v2(data_path,batch_size,epochs,sent_len,EMB_DIM): data=pd.read_csv(data_path,names=['text','label'],header=0) print(f'input data shape {data.shape}') dat0=data.dropna() print(f'input data shape {data.shape} after dropping NA') accu_df=pd.DataFrame() label_cnt=pd.DataFrame(dat0['label'].value_counts()).reset_index() label_cnt.columns=['label','count'] least_label_list=list(label_cnt.loc[label_cnt['count']<3,'label'].values) # display(least_label_list) print(f'dropping {len(least_label_list)} labels') data=dat0[~dat0['label'].isin(least_label_list)].reset_index() print(f'after deleting least label{data.shape}') le=LabelEncoder() data['label_en']=le.fit_transform(data['label']) data['seq_id']=data.index data['token']=data['text'].apply(lambda x:gensim.utils.simple_preprocess(x)) sent=data['token'].to_list() # model=Word2Vec(sentences=sent,size=EMB_DIM,window=5,min_count=1,sg=1,workers=4) marker=data_path.split('/')[-1].split('_')[0]+"_"+data_path.split('/')[-1].split('_')[1] model=Word2Vec.load('SKIP_GRAM_MODEL/w2v_models/skip_gram_w2c_{}.model'.format(marker)) word_vectors=model.wv embedding_matrix=word_vectors.vectors print(f'vocabulary size={embedding_matrix.shape[0]}, each word is {1,embedding_matrix.shape[1]}') data['seq_id']=data.index # create a padded sequence for each document word2id={k:v.index for k,v in word_vectors.vocab.items()} emb_df=pd.DataFrame() for i, sent in enumerate(data['token']): text=data.loc[i,'text'] label=data.loc[i,'label_en'] seq_id=data.loc[i,'seq_id'] if i%1000==0: print(i,sent) sent_seq=[] for j, word in enumerate(sent): w_id=word2id.get(word) sent_seq.append(w_id) df=pd.DataFrame({'text':[text],'seq_id':[seq_id],'word_seq':[sent_seq],'label':[label]}) emb_df=emb_df.append(df) display(emb_df.head()) emb_df.to_csv('SKIP_GRAM_MODEL/embeddings/{}_with_word_seq_index.csv'.format(marker),index=False) print(emb_df.dtypes) # proceed with NN model padded_sent=pad_sequences(emb_df['word_seq'].to_list(),maxlen=sent_len,padding='post') X=padded_sent Y=emb_df['label'] X_train,X_test,Y_train,Y_test=train_test_split(X,Y,stratify=Y,test_size=0.2,random_state=111) x_train,x_dev,y_train,y_dev=train_test_split(X_train,Y_train,stratify=Y_train,test_size=0.1,random_state=111) Y_test,y_train,y_dev=to_categorical(Y_test),to_categorical(y_train),to_categorical(y_dev) print('shape:') print(f'Y_test shape{Y_test.shape},y_train shape{y_train.shape},y_dev shape{y_dev.shape}') class_count=emb_df.label.nunique() LOGDIR='./{}'.format(marker) vocab_length=len(embedding_matrix) print('--start neural network training and validation--') NN_model=Sequential([ Embedding(input_dim=vocab_length,output_dim=EMB_DIM,weights=[embedding_matrix],input_length=sent_len), Flatten(), Dense(256,activation = 'relu'), Dense(class_count,activation='softmax')]) NN_model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy']) NN_model.fit(x_train,y_train,validation_data = (x_dev, y_dev),batch_size=batch_size,epochs=epochs,verbose=1,callbacks=[TensorBoard(LOGDIR)]) NN_model.summary() NN_model.save('NN_Models/skip_gram_{}_NN'.format(marker)) y_pred=NN_model.predict_classes(X_test) y_pred=to_categorical(y_pred) maxpos=lambda x:np.argmax(x) yTrueMax=np.array([maxpos(rec) for rec in Y_test]) yPredMax=np.array([maxpos(rec) for rec in y_pred]) yPredTop3=np.argsort(y_pred,axis=1)[:,-3] yPredTop2=np.argsort(y_pred,axis=1)[:,-2] top1accu=sum(yPredMax==yTrueMax)/len(yPredMax) top1accu=round(top1accu*100,2) top3accu=sum((yPredTop3==yTrueTop3)|(yPredTop2==yTrueTop2)|(yPredMax==yTrueMax))/len(top3_pred) top3accu=round(top3accu*100,2) print('test data TOP 1 accuracy {} %'.format(top1accu))#35.32 % print('test data TOP 3 accuracy {} %'.format(top3accu))#35.32 % print() accu=pd.DataFrame({'Model':[marker],'top1 accuracy':[top1accu],'top3 accuracy':[top3accu]}) display(accu) accu_df.append(accu) accu_df.to_csv('SKIP_GRAM_MODEL/skip_gram_accuracy.csv',index=False) return data,model,emb_df,NN_model,X_test,Y_test,y_pred,accu
lambda x: ' '.join([word for word in x if word not in (stop)])) tf = TfidfVectorizer() v = tf.fit_transform(data['stop'].to_numpy()) feature_names = tf.get_feature_names() dense = v.todense() # df = pd.DataFrame(dense, columns=[feature_names]) df = pd.DataFrame(dense) print(df) train_X, val_X, train_y, val_y = train_test_split(df, y, train_size=0.75, test_size=0.25, random_state=0) model = Sequential() model.add(Dense(100, input_dim=16998, activation='relu')) model.add(Dense(100, activation='relu')) # model.add(Dense(100, activation='relu')) model.add(Dense(1, activation='sigmoid')) model.compile(loss=binary_crossentropy, optimizer='adam', metrics=['accuracy', 'Precision', 'Recall']) # todo f1score model.fit(train_X, train_y) prediction = model.predict_classes(val_X) print(precision_recall_fscore_support(val_y, prediction, average='macro')) print(prediction) print(accuracy_score(prediction, val_y))
pool_size=(2, 2), strides=2)) #if stride not given it equal to pool filter size model.add(layers.Conv2D(32, (3, 3), activation='relu')) model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=2)) model.add(layers.Flatten()) model.add(layers.Dense(units=128, activation='relu')) model.add(layers.Dense(units=128, activation='relu')) model.add(layers.Dense(units=1, activation='sigmoid')) model.compile(optimizer='adam', loss='mse') model.fit_generator(train_set, epochs=200, steps_per_epoch=10) #1 for repeat in range(1, 20): img1 = image.load_img( 'C:\\Users\\ahmed\\PycharmProjects\\untitled\\catanddog\\test1\\{}.jpg' .format(repeat), target_size=(100, 100)) img = image.img_to_array(img1) img = img / 255 img = np.expand_dims(img, axis=0) prediction = model.predict_classes(img) plt.text(20, 62, prediction, color='red', fontsize=18, bbox=dict(facecolor='white', alpha=0.8)) plt.imshow(img1) plt.show() #2
# ### Perceptron Evaluation Metrics # In[133]: model1.evaluate(X_train, y_train) # In[134]: model1.evaluate(X_test, y_test) # ### Perceptron Prediction Score # In[135]: train_pred = model1.predict_classes(X_train) pred = model1.predict_classes(X_test) # In[136]: print("train", precision_score(y_train, train_pred)) print("test", precision_score(y_test, pred)) # # 2.Multi-Level Perceptron # In[137]: #step 1: build model model1 = Sequential() #input layer model1.add(Dense(30, input_dim=10, activation='relu'))
# In[ ]: model = load_model('cnn.hdf5') model.load_weights('cnn.hdf5') # In[ ]: score = model.evaluate(x_test, y_test, verbose=0) print('Test Loss :', score[0]) print('Test Accuracy :', score[1]) # In[ ]: #get the predictions for the test data predicted_classes = model.predict_classes(x_test) # In[ ]: confusion_mtx = confusion_matrix(y_test, predicted_classes) plt.imshow(confusion_mtx, interpolation='nearest', cmap=plt.cm.Blues) plt.title('confusion_matrix') plt.colorbar() tick_marks = np.arange(2) plt.xticks(tick_marks, ['R', 'O'], rotation=90) plt.yticks(tick_marks, ['R', 'O']) #Following is to mention the predicated numbers in the plot and highligh the numbers the most predicted number for particular label thresh = confusion_mtx.max() / 2. for i, j in itertools.product(range(confusion_mtx.shape[0]), range(confusion_mtx.shape[1])):
import pandas as pd from sklearn.preprocessing import LabelEncoder from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler df=pd.read_csv('Churn_Modelling.csv') print(df.head()) X=df.drop(labels=['CustomerId','Surname','RowNumber','Exited'],axis=1) y=df['Exited'] lb=LabelEncoder() X['Geography']=lb.fit_transform(X['Geography']) X['Gender']=lb.fit_transform(X['Gender']) X=pd.get_dummies(X,drop_first=True,columns=['Geography']) X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=0,stratify=y) scaler=StandardScaler() X_train=scaler.fit_transform(X_train) X_test=scaler.fit_transform(X_test) reg=Sequential() Reg=Sequential() Reg.add(Dense(X.shape[1],activation='relu')) reg.add(Dense(128,activation='relu')) reg.add(Dense(1,activation='sigmoid')) reg.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy']) reg.fit(X_train,y_train.to_numpy(),batch_size=10,epochs=2,verbose=1) y_pred=reg.predict_classes(X_test) print(y_pred,y_test) reg.evaluate(X_test,y_test.to_numpy()) from sklearn.metrics import confusion_matrix,accuracy_score print(confusion_matrix(y_test,y_pred)) print(accuracy_score(y_test,y_pred))
class ERLC(BaseEstimator): ''' Ensemble Representation Learning Classifier (ERLC) ''' def __init__(self, verbose=True, sae_hidden_nodes=400, innerNN_architecture=[512, 512, 512], outerNN_architecture=[256, 256], pca_components=14): self.verbose = verbose ## Tunable Parameters self.sae_hidden_nodes = sae_hidden_nodes self.innerNN_architecture = innerNN_architecture self.outerNN_architecture = outerNN_architecture self.pca_components = pca_components ## Models self.DT_org = DecisionTreeClassifier() self.DT_new = DecisionTreeClassifier() self.RF_org = RandomForestClassifier() self.RF_new = RandomForestClassifier() self.sae = Sequential() self.inner_dnn = Sequential() self.inner_dnn_new = Sequential() self.outer_dnn = Sequential() # Private class variables self.isTrained = False self.X_train = [] self.X_train_new = [] self.y_train = [] self.fused_train = [] self.num_classes = 0 def get_params(self, deep=True): return { "sae_hidden_nodes": self.sae_hidden_nodes, "pca_components": self.pca_components, } def set_params(self, **parameters): for parameter, value in parameters.items(): setattr(self, parameter, value) return self def fit(self, X_train, y_train, sae_epochs=500, innerNN_epochs=500, outerNN_epochs=500): ''' This function fits/trains the model to the inputted data. inputs -------- X_train: The training data y_train: corresponding labels sae_epochs: epochs of training for the Stacked Autoencoder (SAE) innerNN_epochs: epochs of training for the inner neural network outerNN_epochs: epochs of training for the outer neural network ''' self.X_train = X_train self.y_train = y_train num_classes = np.max(y_train) + 1 self.num_classes = num_classes if (self.verbose): print("Building ERLC model") # First we build the autoencoder if (self.verbose): print("Building autoencoder") self.sae = self.buildSAE(X_train, num_nodes=self.sae_hidden_nodes, epochs=sae_epochs) # Get new representation if (self.verbose): print("Getting new representation of the data") X_train_new = self.sae.predict(X_train) self.X_train_new = X_train_new # Train DT on original representation if (self.verbose): print("Training DT on original representation") self.DT_org.fit(X_train, y_train) train_DT_org = self.DT_org.predict(X_train) # Train DT on new representation if (self.verbose): print("Training DT on new representation") pca = PCA(n_components=self.pca_components) Xtr = pca.fit_transform(X_train_new) self.DT_new.fit(Xtr, y_train) train_DT_new = self.DT_new.predict(Xtr) # Train RF on original representation if (self.verbose): print("Training RF on original representation") self.RF_org.fit(X_train, y_train) train_RF_org = self.RF_org.predict(X_train) # Train RF on new representation if (self.verbose): print("Training RF on new representation") self.RF_new.fit(X_train_new, y_train) train_RF_new = self.RF_new.predict(X_train_new) # Build and train inner DNN if (self.verbose): print("Training inner DNN") self.inner_dnn = self.buildNN(self.innerNN_architecture, X_train, y_train, num_classes=num_classes, activation='relu', do=0, epochs=innerNN_epochs) train_DNN = self.inner_dnn.predict_classes(X_train) # Build and train inner DNN on new representation if (self.verbose): print("Training inner DNN on new representation") self.inner_dnn_new = self.buildNN(self.innerNN_architecture, X_train_new, y_train, num_classes=num_classes, activation='relu', do=0, epochs=innerNN_epochs) train_DNN_new = self.inner_dnn_new.predict_classes(X_train_new) # Changing output of each classifier to categorical if (self.verbose): print("Creating fusion vector") train_DT_org = to_categorical(train_DT_org, num_classes=num_classes) train_DT_new = to_categorical(train_DT_new, num_classes=num_classes) train_RF_org = to_categorical(train_RF_org, num_classes=num_classes) train_RF_new = to_categorical(train_RF_new, num_classes=num_classes) train_DNN = to_categorical(train_DNN, num_classes=num_classes) train_DNN_new = to_categorical(train_DNN_new, num_classes=num_classes) # Combining to make fused training data fused_train = (train_DT_org, train_DT_new, train_RF_org, train_RF_new, train_DNN, train_DNN_new) fused_train = np.concatenate(fused_train, axis=1) self.fused_train = fused_train # Training outer DNN if (self.verbose): print("Training outer DNN") self.outer_dnn = self.buildNN(self.outerNN_architecture, fused_train, y_train, num_classes=num_classes, do=0.3, val_split=0.2, regularizer=True, epochs=outerNN_epochs) if (self.verbose): print("Training complete") self.isTrained = True def predict(self, X_test): ''' This function predicts the output of the input test data. This function must be called after fit has been called. inputs ------- X_test: testing data outputs ------- y_pred: the predicted labels of the test data ''' # Get new representation of test data X_test_new = self.sae.predict(X_test) # DT original DT_org_test = self.DT_org.predict(X_test) # DT new pca = PCA(n_components=self.pca_components) pca.fit(self.X_train_new) tempX = pca.transform(X_test_new) DT_new_test = self.DT_new.predict(tempX) # RF original RF_org_test = self.RF_org.predict(X_test) # RF new RF_new_test = self.RF_new.predict(X_test_new) # DNN original DNN_org_test = self.inner_dnn.predict_classes(X_test) # DNN new DNN_new_test = self.inner_dnn_new.predict_classes(X_test_new) # Transform to categorical and combine DT_org_test = to_categorical(DT_org_test, num_classes=self.num_classes) DT_new_test = to_categorical(DT_new_test, num_classes=self.num_classes) RF_org_test = to_categorical(RF_org_test, num_classes=self.num_classes) RF_new_test = to_categorical(RF_new_test, num_classes=self.num_classes) DNN_org_test = to_categorical(DNN_org_test, num_classes=self.num_classes) DNN_new_test = to_categorical(DNN_new_test, num_classes=self.num_classes) testSet = (DT_org_test, DT_new_test, RF_org_test, RF_new_test, DNN_org_test, DNN_new_test) testSet = np.concatenate(testSet, axis=1) # Outer NN y_pred = self.outer_dnn.predict_classes(testSet) return y_pred def localize(self, X_sample, y_sample, n_measurements=10, normal_label=41): ''' This function localizes the attack by returning the score of each feature (measurement) based on its correlation with the output of that attack. It uses the chi test function. inputs ------- X_sample: the sample vector y_sample: the corresponding label n_measurements: the top n infected measurements to return normal_label: the label value for normal samples outputs -------- score: The chi score of each feature topIndices: the top n features infected based on the chi score test ''' if (X_sample.ndim > 1): raise ValueError('Sample array must be 1 dimensional') if (y_sample.ndim > 1): raise ValueError('Sample label must be 1 dimensional') if (self.isTrained == False): raise ValueError( 'The model has not been trained yet. You must call the fit function first or load a saved model' ) y_pred = self.predict(X_sample) chi_score, topF = chi_test(self.X_train, self.y_train, n_measurements=n_measurements) row = chi_score[self.y_train == y_pred] # currentX = np.vstack( (self.X_train[ (self.y_train == normal_label) | (self.y_train == y_sample) ], X_sample) ) # currentY = np.hstack( (self.y_train[ (self.y_train == normal_label) | (self.y_train == y_sample) ], y_sample) ) # score = chi2(currentX, currentY) # score = np.nan_to_num(score) # score = ch # row = score[1,:].copy() topIndices = row.argsort()[-n_measurements:][::-1] return row, topIndices def buildSAE(self, X_train, num_nodes=400, epochs=100): ''' This function builds the Stacked AutoEncoder (SAE) and trains it to gain a new representation. inputs ------- X_train: matrix of the data num_nodes: the number of nodes in the hidden layer epochs: number of epochs to train the SAE model outputs -------- model: the trained SAE model ''' input_X = Input(shape=(X_train.shape[1], )) encoded = Dense(units=800, activation='relu')(input_X) encoded = Dense(units=num_nodes, activation='relu')(encoded) decoded = Dense(units=800, activation='relu')(encoded) decoded = Dense(units=X_train.shape[1], activation='relu')(decoded) autoencoder = Model(input_X, decoded) autoencoder.compile(optimizer='adam', loss='mean_squared_error', metrics=['mse']) # Early Stop Callback earlystop_callback = tf.keras.callbacks.EarlyStopping(monitor='loss', min_delta=1e-6, mode='min', patience=10) # Fit the autoencoder autoencoder.fit(X_train, X_train, epochs=epochs, batch_size=256, shuffle=True, validation_split=0.2, callbacks=[earlystop_callback]) # Preparing the autoencoder model for use model = Sequential() model.add(autoencoder.layers[0]) model.add(autoencoder.layers[1]) model.add(autoencoder.layers[2]) return model def buildNN(self, architecture, X_train, y_train, num_classes=42, activation='relu', do=0, regularizer=False, epochs=500, val_split=0.2): ''' This function builds the inner Deep Neural Network (DNN) and trains it to gain a new representation. inputs -------- X_train: matrix of the data (meter measurements of a smart grid) y_train: array of the labels for the corresponding X_train samples num_classes: the number of classes num_layers: the number of hidden layers in the neural network num_nodes: the number of nodes in each hidden layer activation: the activation function in each layer (except the final layer) do: percent of dropout in between the hidden layers. This should be a value between 0 and 1. If 0, dropout will not be used regularizer: whether or not to use l2 regularization in hidden layers epochs: number of epochs to train the network val_split: percentage of data to use for validation as the network is being trained. This is a value between 0 and 1. outputs -------- nn_model: The trained neural network ''' # Building the Neural Network y_train2 = to_categorical(y_train, num_classes=num_classes) nn_model = Sequential() nn_model.add(tf.keras.Input(shape=(X_train.shape[1], )), ) for i in range(len(architecture)): if ((i > 0) & (i < len(architecture) - 1) & (do > 0.0)): nn_model.add(Dropout(do)) if (regularizer == True): nn_model.add( Dense(architecture[i], activation=activation, kernel_regularizer=tf.keras.regularizers.l2(0.0001))) else: nn_model.add(Dense(architecture[i], activation=activation)) nn_model.add(Dense(num_classes, activation='softmax')) nn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc', self.f1_m]) # Early Stop Callback earlystop_callback = tf.keras.callbacks.EarlyStopping(monitor='loss', min_delta=1e-6, mode='min', patience=20) if (val_split > 0): nn_model.fit(X_train, y_train2, epochs=epochs, batch_size=256, validation_split=val_split, callbacks=[earlystop_callback]) else: nn_model.fit(X_train, y_train2, epochs=epochs, batch_size=256, callbacks=[earlystop_callback]) return nn_model def chi_test(self, X, y, n_measurements=10, normal_label=41): ''' This function calculates the chi square of features compared to the same features in normal samples. The function takes test data and labels, combines them with the training data and labels, then performs chi squared test on each feature. inputs ------- X: data matrix y: data labels n_measurements: the top n infected measurements to return outputs -------- final_chi: A matrix of size (labels, features) in which each row corresponds to the chi score of each feature for that attack. The labels and features are in the same order as the input data and labels. topF: the top n features infected based on the chi score test ''' # Combine saved train data with test data # X = np.vstack((self.X_train, X_test)) # y = np.hstack((self.y_train, y_test)) labels = np.unique(y) numFeatures = X.shape[1] final_chi = np.empty((len(labels) - 1, numFeatures)) i = 0 normalX = X[y == normal_label] for label in labels: if (label != normal_label): currentX = np.vstack((X[y == label], normalX)) currentY = np.hstack((y[y == label], y[y == normal_label])) ch, pval = chi2(currentX, currentY) final_chi[i, :] = pval i = i + 1 final_chi = np.nan_to_num(final_chi) topF = [] for rowNumber in range(np.unique(y).shape[0] - 1): row = final_chi[rowNumber, :].copy() idx = np.argpartition(row, n_measurements) topIndices = idx[:n_measurements] topF.append(topIndices) topF = np.asarray(topF) return final_chi, topF ## SAVING AND LOADING MODEL def save_model(self, save_path='saved_model/'): # Saving autoencoder self.sae.save(save_path + 'sae.h5') # Saving classifiers dump(self.DT_org, save_path + 'DT_org.joblib') dump(self.DT_new, save_path + 'DT_new.joblib') dump(self.RF_org, save_path + 'RF_org.joblib') dump(self.RF_new, save_path + 'RF_new.joblib') # Saving neural nets self.inner_dnn.save(save_path + 'inner_dnn.h5') self.inner_dnn_new.save(save_path + 'inner_dnn_new.h5') self.outer_dnn.save(save_path + 'outer_dnn.h5') # Saving processed training data savetxt(save_path + 'X_train.csv', self.X_train, delimiter=',') savetxt(save_path + 'X_train_new.csv', self.X_train_new, delimiter=',') savetxt(save_path + 'y_train.csv', self.y_train, delimiter=',') savetxt(save_path + 'fused_train.csv', self.fused_train, delimiter=',') def load_model(self, save_path='saved_model/'): # Loading training data self.X_train = loadtxt(save_path + 'X_train.csv', delimiter=',') self.X_train_new = loadtxt(save_path + 'X_train_new.csv', delimiter=',') self.y_train = loadtxt(save_path + 'y_train.csv', delimiter=',') self.fused_train = loadtxt(save_path + 'fused_train.csv', delimiter=',') # Loading Classifiers self.DT_org = load(save_path + 'DT_org.joblib') self.DT_new = load(save_path + 'DT_new.joblib') self.RF_org = load(save_path + 'RF_org.joblib') self.RF_new = load(save_path + 'RF_new.joblib') # Loading neural nets self.sae = self.rebuildSAE(self.X_train, num_nodes=self.sae_hidden_nodes) self.sae.load_weights(save_path + 'sae.h5') self.inner_dnn = self.rebuildNN(self.X_train, architecture=self.innerNN_architecture, num_classes=np.max(self.y_train) + 1, activation='relu', do=0) self.inner_dnn.load_weights(save_path + 'inner_dnn.h5') self.inner_dnn_new = self.rebuildNN( self.X_train_new, architecture=self.innerNN_architecture, num_classes=np.max(self.y_train) + 1, activation='relu', do=0) self.inner_dnn_new.load_weights(save_path + 'inner_dnn_new.h5') self.outer_dnn = self.rebuildNN(self.fused_train, architecture=self.outerNN_architecture, num_classes=np.max(self.y_train) + 1, activation='relu', do=0.3) self.outer_dnn.load_weights(save_path + 'outer_dnn.h5') ## Rebuilding functions for loading model def rebuildSAE(self, X_train, num_nodes=400): input_X = Input(shape=(X_train.shape[1], )) encoded = Dense(units=800, activation='relu')(input_X) encoded = Dense(units=num_nodes, activation='relu')(encoded) decoded = Dense(units=800, activation='relu')(encoded) decoded = Dense(units=X_train.shape[1], activation='relu')(decoded) autoencoder = Model(input_X, decoded) autoencoder.compile(optimizer='adam', loss='mean_squared_error', metrics=['mse']) model = Sequential() model.add(autoencoder.layers[0]) model.add(autoencoder.layers[1]) model.add(autoencoder.layers[2]) return model def rebuildNN(self, X_train, num_classes=42, architecture=[512, 512, 512], activation='relu', do=0, regularizer=False): ''' This function rebuilds the inner Deep Neural Network (DNN) and trains it to gain a new representation. inputs -------- X_train: matrix of the data (meter measurements of a smart grid) num_classes: the number of classes num_layers: the number of hidden layers in the neural network num_nodes: the number of nodes in each hidden layer activation: the activation function in each layer (except the final layer) do: percent of dropout in between the hidden layers. This should be a value between 0 and 1. If 0, dropout will not be used regularizer: whether or not to use l2 regularization in hidden layers outputs -------- nn_model: The trained neural network ''' # Building the Neural Network nn_model = Sequential() nn_model.add(tf.keras.Input(shape=(X_train.shape[1], )), ) for i in range(len(architecture)): if ((i > 0) & (i < len(architecture) - 1) & (do > 0.0)): nn_model.add(Dropout(do)) if (regularizer == True): nn_model.add( Dense(architecture[i], activation=activation, kernel_regularizer=tf.keras.regularizers.l2(0.0001))) else: nn_model.add(Dense(architecture[i], activation=activation)) nn_model.add(Dense(num_classes, activation='softmax')) nn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc', self.f1_m]) return nn_model ## METRICS def recall_m(self, y_true, y_pred): true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) possible_positives = K.sum(K.round(K.clip(y_true, 0, 1))) recall = true_positives / (possible_positives + K.epsilon()) return recall def precision_m(self, y_true, y_pred): true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1))) precision = true_positives / (predicted_positives + K.epsilon()) return precision def f1_m(self, y_true, y_pred): precision = self.precision_m(y_true, y_pred) recall = self.recall_m(y_true, y_pred) return 2 * ((precision * recall) / (precision + recall + K.epsilon()))
model.add(Dense(512, activation='relu')) model.add(BatchNormalization()) model.add(Dropout(0.4)) model.add(Dense(128, activation='relu')) model.add(Dense(32, activation='softmax')) model.compile(loss='sparse_categorical_crossentropy', optimizer=opt2, metrics=['acc']) #history=model.fit(x_train,y_train, epochs=100, batch_size=30, verbose=1) #print('the mse value is : ', model.evaluate(x_train, y_train)) history=model.fit(x_train,y_train, epochs=100, batch_size=30, verbose=1) print('the mse value is : ', model.evaluate(x_train, y_train)) preds=model.predict_proba(x_train,batch_size=None, verbose=1) preds_label=model.predict_classes(x_train) #print(preds.shape) #print(preds) '''i=0 for label in preds_label: print(preds[i], preds_label[i]) if int(np.argmax(preds[i]))!=int(preds_label[i]): print('aaaaaaaaaaaaa') i+=1''' explainer=lime.lime_tabular.LimeTabularExplainer(x_train, feature_names=features, class_names=['0','1','2']) I=[]
model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy']) history = model.fit(x_train, y_train, epochs=35, validation_data=(x_test, y_test), verbose=1) model.save_weights('model.py') final = model.save('Final_model(1).h5') model.predict_classes(x_test[:6]) y_test[:6] score = model.evaluate(x_test, y_test) def plot_learningCurve(history, epochs): epoch_range = range(1, epochs + 1) plt.plot(epoch_range, history.history['accuracy']) plt.plot(epoch_range, history.history['val_accuracy'], scalex=0.5) plt.title('Model Accuracy') plt.ylabel('Accuracy') plt.xlabel('Epochs') plt.legend(['train', 'Val'], loc='upper left') plt.show
]) model.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), metrics=['accuracy']) model.fit( x=train_texts, y=target, epochs=1) # Safe toxic comment word embedding to disk weights = model.get_layer('embedding').get_weights()[0] vocab = vectorize_layer.get_vocabulary() out_v = io.open('comments_vectors.tsv', 'w', encoding='utf-8') out_m = io.open('comments_metadata.tsv', 'w', encoding='utf-8') for index, word in enumerate(vocab): if index == 0: continue # skip 0, it's padding. vec = weights[index] out_v.write('\t'.join([str(x) for x in vec]) + "\n") out_m.write(word + "\n") out_v.close() out_m.close() print(model.predict_classes(tf.constant(np.array([ 'never mind it is not important', 'i dont care at all, f**k you', 'i love you']))))