def dropout_influence(X_train, y_train):
    # 构建 5 种不同数量 Dropout 层的网络
    for n in range(5):
        # 创建容器
        model = Sequential()
        # 创建第一层
        model.add(layers.Dense(8, input_dim=2, activation="relu"))
        counter = 0
        # 网络层数固定为 5
        for _ in range(5):
            model.add(layers.Dense(64, activation="relu"))
        # 添加 n 个 Dropout 层
        if counter < n:
            counter += 1
            model.add(layers.Dropout(rate=0.5))

        # 输出层
        model.add(layers.Dense(1, activation="sigmoid"))
        # 模型装配
        model.compile(
            loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"]
        )
        # 训练
        model.fit(X_train, y_train, epochs=N_EPOCHS, verbose=1)
        # 绘制不同 Dropout 层数的决策边界曲线
        # 可视化的 x 坐标范围为[-2, 3]
        xx = np.arange(-2, 3, 0.01)
        # 可视化的 y 坐标范围为[-1.5, 2]
        yy = np.arange(-1.5, 2, 0.01)
        # 生成 x-y 平面采样网格点,方便可视化
        XX, YY = np.meshgrid(xx, yy)
        preds = model.predict_classes(np.c_[XX.ravel(), YY.ravel()])
        title = "无Dropout层" if n == 0 else "{0}层 Dropout层".format(n)
        file = "Dropout_%i.png" % n
        make_plot(
            X_train,
            y_train,
            title,
            file,
            XX,
            YY,
            preds,
            output_dir=OUTPUT_DIR + "/dropout",
        )
Пример #2
0
def different_hidden_layer():
    '''

    :return:
    '''
    for n in range(3):
        model = Sequential()
        model.add(layers.Dense(64, input_dim=2, activation='relu'))
        for i in range(n):
            model.add(layers.Dense(64, activation='relu'))
        model.add(layers.Dense(1, activation='sigmoid'))
        model.compile(loss='binary_crossentropy',
                      optimizer='adam',
                      metrics=['accuracy'])
        model.fit(x_train, y_train, epochs=5, verbose=1)

        preds = model.predict_classes(np.c_[XX.ravel(), YY.ravel()])
        title = "hidden layer:{0}".format(n + 1)
        make_plot(x_train, y_train, title, XX, YY, preds)
Пример #3
0
def network_layers_influence(x_train, y_train):
    for n in range(5):
        model = Sequential()
        model.add(layers.Dense(8, input_dim=2, activation='relu'))     
        for _ in range(n):   
            model.add(layers.Dense(32, activation='relu'))
        model.add(layers.Dense(1, activation='sigmoid'))
        model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
        history = model.fit(x_train, y_train, epochs=N_EPOCHS, verbose=1)

        xx = np.arange(-2, 3, 0.01)
        # 可视化的 y 坐标范围为[-1.5, 2]
        yy = np.arange(-1.5, 2, 0.01)
        # 生成 x-y 平面采样网格点,方便可视化
        XX, YY = np.meshgrid(xx, yy)
        preds = model.predict_classes(np.c_[XX.ravel(), YY.ravel()])
        title = "layer_nums {0}".format(2 + n)
        filename = "network_%i.png" % (2 + n)
        make_plot(x_train, y_train, title, filename, XX, YY, preds, output_dir=OUTPUT_DIR)
Пример #4
0
def NN(X_train, X_test, y_train, y_test):
    model = Sequential()
    model.add(Dense(100, input_dim=1034, activation='relu'))
    model.add(Dense(100, activation='relu'))
    model.add(Dense(100, activation='relu'))
    model.add(Dense(100, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(loss=keras.losses.binary_crossentropy,
                  optimizer='adam',
                  metrics=['accuracy'])

    model.fit(X_train, y_train, validation_split=0.1)
    prediction = model.predict_classes(X_test)
    print('Neural Network:')
    print(100 * accuracy_score(y_test, prediction))
    print(mean_squared_error(y_test, prediction, squared=True))
    print(mean_absolute_error(y_test, prediction))
    print(confusion_matrix(y_test, prediction))
def network_layers_influence(X_train, y_train):
    # 构建 5 种不同层数的网络
    for n in range(5):
        # 创建容器
        model = Sequential()
        # 创建第一层
        model.add(layers.Dense(8, input_dim=2, activation="relu"))
        # 添加 n 层,共 n+2 层
        for _ in range(n):
            model.add(layers.Dense(32, activation="relu"))
        # 创建最末层
        model.add(layers.Dense(1, activation="sigmoid"))
        # 模型装配与训练
        model.compile(
            loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"]
        )
        model.fit(X_train, y_train, epochs=N_EPOCHS, verbose=1)
        # 绘制不同层数的网络决策边界曲线
        # 可视化的 x 坐标范围为[-2, 3]
        xx = np.arange(-2, 3, 0.01)
        # 可视化的 y 坐标范围为[-1.5, 2]
        yy = np.arange(-1.5, 2, 0.01)
        # 生成 x-y 平面采样网格点,方便可视化
        XX, YY = np.meshgrid(xx, yy)
        preds = model.predict_classes(np.c_[XX.ravel(), YY.ravel()])
        title = "网络层数:{0}".format(2 + n)
        file = "网络容量_%i.png" % (2 + n)
        make_plot(
            X_train,
            y_train,
            title,
            file,
            XX,
            YY,
            preds,
            output_dir=OUTPUT_DIR + "/network_layers",
        )
Пример #6
0
def train_rna(config):
    data_train, data_test, target_train, target_test = process_data()
    rna_folder = pathlib.Path(join(os.getcwd(), 'rna'))
    fig_folder = pathlib.Path(join(os.getcwd(), "figures"))
    id = str(uuid.uuid1()).split('-')[0]  # Generates a unique id to each RNA created

    # Here is where the magic really happens! Check this out:
    model = Sequential()  # The model used is the sequential
    # It has a fully connected input layer
    model.add(Dense(data_train.shape[1], activation="relu", kernel_initializer=config.initializer,
                    input_shape=(data_train.shape[1],)))
    # With three others hidden layers
    model.add(Dense(config.layer_size_hl1, activation=config.activation, kernel_initializer=config.initializer))
    # And a dropout layer between them
    model.add(Dropout(config.dropout))
    model.add(Dense(config.layer_size_hl2, activation=config.activation, kernel_initializer=config.initializer))
    model.add(Dropout(config.dropout))
    model.add(Dense(config.layer_size_hl3, activation=config.activation, kernel_initializer=config.initializer))
    model.add(Dense(len(modulations), activation='softmax'))

    # Once created, the model is then compiled, trained
    # and saved for further evaluation
    model.compile(optimizer=config.optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    history = model.fit(data_train, target_train, validation_split=0.3, epochs=config.epochs, verbose=1,
                        callbacks=[WandbCallback(validation_data=(data_test, target_test))])
    
    model.save(str(join(rna_folder, 'rna-' + id + '.h5')))
    model.save_weights(str(join(rna_folder, 'weights-' + id + '.h5')))
    print(join("\nRNA saved with id ", id, "\n").replace("\\", ""))

    # A figure with a model representation is automatically saved!
    plot_model(model, to_file=join(fig_folder, 'model-' + id + '.png'), show_shapes=True)

    # Here is where we make the first evaluation of the RNA
    loss, acc = model.evaluate(data_test, target_test, verbose=1)
    print('Test Accuracy: %.3f' % acc)

    # Here, WANDB takes place and logs all metrics to the cloud
    metrics = {'accuracy': acc,
               'loss': loss,
               'dropout': config.dropout,
               'epochs': config.epochs,
               'initializer': config.initializer,
               'layer_syze_hl1': config.layer_size_hl1,
               'layer_syze_hl2': config.layer_size_hl2,
               'layer_syze_hl3': config.layer_size_hl3,
               'optimizer': config.optimizer,
               'activation': config.activation,
               'id': id}
    wandb.log(metrics)

    # Here we make a prediction using the test data...
    print('\nStarting prediction')
    predict = model.predict_classes(data_test, verbose=1)

    # And create a Confusion Matrix for a better visualization!
    print('\nConfusion Matrix:')
    confusion_matrix = tf.math.confusion_matrix(target_test, predict).numpy()
    confusion_matrix_normalized = np.around(
        confusion_matrix.astype('float') / confusion_matrix.sum(axis=1)[:, np.newaxis],
        decimals=2)
    print(confusion_matrix_normalized)
    cm_data_frame = pd.DataFrame(confusion_matrix_normalized, index=modulations, columns=modulations)
    figure = plt.figure(figsize=(8, 4), dpi=150)
    sns.heatmap(cm_data_frame, annot=True, cmap=plt.cm.get_cmap('Blues', 6))
    plt.tight_layout()
    plt.title('Confusion Matrix')
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.savefig(join(fig_folder, 'confusion_matrix-' + id + '.png'), bbox_inches='tight', dpi=300)

    plt.clf()
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('Model accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Test'], loc='best')
    plt.savefig(join(fig_folder, 'history_accuracy-' + id + '.png'), bbox_inches='tight', dpi=300)

    plt.clf()
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Test'], loc='best')
    plt.savefig(join(fig_folder, 'history_loss-' + id + '.png'), bbox_inches='tight', dpi=300)

    plt.close(figure)
    evaluate_rna(id=id)
Пример #7
0
  plt.ylabel('Loss')
  plt.xlabel('Epoch')
  plt.legend(['Train', 'Val'], loc='upper left')
  plt.show()

plot_learningCurve(history, 10)
import mlxtend
from mlxtend.plotting import plot_decision_regions

from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

from mlxtend.plotting import plot_confusion_matrix
from sklearn.metrics import confusion_matrix

y_pred = model.predict_classes(X_test)

mat = confusion_matrix(y_test, y_pred)
plot_confusion_matrix(conf_mat=mat, class_names=label.classes_, show_normed=True, figsize=(7,7))











class ModelBidirectDNA():
    def __init__(self, params):
        """
        It initializes the model before the training
        """

        # defines where to save the model's checkpoints
        self.results_base_dir = params['result_base_dir']

        self.pretrained_model = params.get('pretrained_model', None)
        if self.pretrained_model is not None:
            # pretrained model load params from pickle
            print("loading model")
            train_dir = "/"
            train_dir = train_dir.join(
                params['pretrained_model'].split("/")[:-1])
            print(train_dir)
            with open(os.path.join(train_dir, "network_params"),
                      'rb') as params_pickle:
                self.params = pickle.load(params_pickle)
            self.params['result_base_dir'] = self.results_base_dir
        else:
            ## new model
            self.params = params

        self.seeds = [42, 101, 142, 23, 53]
        self.learning_rate = self.params['lr']
        self.batch_size = self.params['batch_size']
        weight_decay = self.params['weight_decay']

        # Architecture --- emoji network
        weight_init = tf.keras.initializers.glorot_uniform
        recurrent_init = tf.keras.initializers.orthogonal(seed=42)

        # Model definition
        self.model = Sequential()
        self.model.add(
            Masking(mask_value=[1., 0., 0., 0., 0.],
                    input_shape=(self.params['maxlen'],
                                 self.params['vocabulary_len'])))
        self.model.add(
            tf.keras.layers.Conv1D(
                self.params['conv_num_filter'],
                self.params['conv_kernel_size'],
                activation='relu',
                kernel_regularizer=tf.keras.regularizers.l2(weight_decay),
                kernel_initializer=weight_init(self.seeds[2]),
                activity_regularizer=tf.keras.regularizers.l2(weight_decay)))
        self.model.add(tf.keras.layers.MaxPool1D())
        self.model.add(
            tf.keras.layers.Dropout(self.params['dropout_1_rate'],
                                    seed=self.seeds[0]))
        self.model.add(
            tf.keras.layers.Conv1D(
                self.params['conv_num_filter'],
                self.params['conv_kernel_size'],
                activation='relu',
                kernel_regularizer=tf.keras.regularizers.l2(weight_decay),
                kernel_initializer=weight_init(self.seeds[3]),
                activity_regularizer=tf.keras.regularizers.l2(weight_decay)))
        self.model.add(tf.keras.layers.MaxPool1D())
        self.model.add(
            Bidirectional(
                LSTM((int)(self.params['lstm_units']),
                     return_sequences=False,
                     dropout=self.params['lstm_input_dropout'],
                     kernel_initializer=weight_init(self.seeds[0]),
                     recurrent_initializer=recurrent_init,
                     kernel_regularizer=l2(self.params['weight_decay']))))
        self.model.add(
            Dropout(self.params['lstm_output_dropout'], seed=self.seeds[2]))
        self.model.add(
            Dense(8,
                  activation='relu',
                  kernel_initializer=weight_init(self.seeds[0])))
        self.model.add(
            Dropout(self.params['dense_dropout_rate'], seed=self.seeds[3]))
        self.model.add(
            Dense(1,
                  activation='sigmoid',
                  kernel_initializer=weight_init(self.seeds[4]),
                  kernel_regularizer=l2(self.params['weight_decay'])))

        # Check if the user wants a pre-trained model. If yes load the weights
        if self.pretrained_model is not None:
            self.model.load_weights(self.pretrained_model)

    def build(self, logger=None):
        """
        It compiles the model by defining optimizer, loss and learning rate
        """
        optimizer = tf.keras.optimizers.RMSprop(lr=self.learning_rate,
                                                clipnorm=1.0)
        self.model.compile(loss='binary_crossentropy',
                           optimizer=optimizer,
                           metrics=['accuracy', f1_m, precision_m, recall_m])
        if (logger is not None):
            self.model.summary(print_fn=lambda x: logger.info(x))
        else:
            self.model.summary()

        # Print params onto the logger
        if logger is not None:
            logger.info("\n" + json.dumps(self.params, indent=4))

    def fit(self,
            X_tr,
            y_tr,
            epochs,
            callbacks_list,
            validation_data,
            shuffle=True):
        """
        Fit the model with the provided data and returns the results
        Inputs:
        - X_tr: samples
        - y_tr: labels related to the samples
        - epochs: number of epochs before stopping the training
        - callbacks_list
        - validation_data: data the model is validated on each time a epoch is completed
        - shuffle: if the dataset has to be shuffled before being fed into the network

        Outputs:
        - history: it contains the results of the training
        """

        callbacks_list = self._get_callbacks()
        history = self.model.fit(x=X_tr,
                                 y=y_tr,
                                 epochs=epochs,
                                 shuffle=True,
                                 batch_size=self.batch_size,
                                 callbacks=callbacks_list,
                                 validation_data=validation_data)
        trained_epochs = callbacks_list[0].stopped_epoch - callbacks_list[
            0].patience + 1 if callbacks_list[0].stopped_epoch != 0 else epochs
        return history, trained_epochs

    def fit_early_stopping_by_loss_val(self,
                                       X_tr,
                                       y_tr,
                                       epochs,
                                       early_stopping_loss,
                                       callbacks_list,
                                       validation_data,
                                       shuffle=True):
        """
        Train model until current validation loss reaches holdout training loss specified by early_stopping_loss parameter. 
        
        Algorithm 7.3 (Ian Goodfellow, Yoshua Bengio, and Aaron Courville. 2016. Deep Learning. The MIT Press, pp. 246-250.)
        
        Params:
        -------
            :X_tr: training samples
            :y_tr: training labels
            :epochs: number of epochs training is performed on
            :early_stopping_loss: threshold loss - Once reached this loss the training is stopped
            :callbacks_list: list of callbacks to use in the training phase
            :validation_data: data to evaluate the model on at the end of each epoch
            :shuffle: if True, it shuffles data before starting the training
        
        """
        print(f"early stopping loss: {early_stopping_loss}")
        callbacks_list = self._get_callbacks(train=True)
        callbacks_list.append(
            EarlyStoppingByLossVal(monitor='val_loss',
                                   value=early_stopping_loss))
        history = self.model.fit(x=X_tr,
                                 y=y_tr,
                                 epochs=epochs,
                                 batch_size=self.batch_size,
                                 shuffle=True,
                                 callbacks=callbacks_list,
                                 validation_data=validation_data)
        return history

    def evaluate(self, features, labels):
        """
        It evalutes the trained model onto the provided data
        Inputs:
        - features: sample of data to validate
        - labels: classes the data belong to
        Outputs:
        - loss
        - accuracy
        - f1_score
        - precision
        - recall
        """
        loss, accuracy, f1_score, precision, recall = self.model.evaluate(
            features, labels, verbose=0)
        metrics_value = [loss, accuracy, f1_score, precision, recall]

        results_dict = dict(zip(self.model.metrics_names, metrics_value))
        return results_dict

    def print_metric(self, name, value):
        print('{}: {}'.format(name, value))

    def save_weights(self):
        """
        It saves the model's weights into a hd5 file
        """
        with open(os.path.join(self.results_base_dir, "network_params"),
                  'wb') as params_pickle:
            pickle.dump(self.params, params_pickle)

        self.model.save_weights(
            os.path.join(self.results_base_dir, 'my_model_weights.h5'))
        model_json = self.model.to_json()
        with open(os.path.join(self.results_base_dir, "model.json"),
                  "w") as json_file:
            json_file.write(model_json)

    def fit_generator(self,
                      generator,
                      steps_per_epoch,
                      epochs,
                      validation_data=None,
                      shuffle=True,
                      callbacks_list=None):
        """
        Train the model for the same number of update step as in holdout validation phase
        
        Algorithm 7.2(Ian Goodfellow, Yoshua Bengio, and Aaron Courville. 2016. Deep Learning. The MIT Press, pp. 246-250.)
        """
        history = self.model.fit_generator(
            generator,
            steps_per_epoch,
            epochs,
            shuffle=False,
            callbacks=self._get_callbacks(train=True),
            validation_data=validation_data)
        return history

    def _get_callbacks(self, train=True):
        """
        It defines the callbacks for this specific architecture
        """
        callbacks_list = [
            keras.callbacks.EarlyStopping(monitor='val_loss',
                                          patience=10,
                                          restore_best_weights=True),
            keras.callbacks.ModelCheckpoint(filepath=os.path.join(
                self.results_base_dir, 'model_checkpoint_weights.h5'),
                                            monitor='val_loss',
                                            save_best_only=True,
                                            verbose=0),
            keras.callbacks.CSVLogger(
                os.path.join(self.results_base_dir, 'history.csv')),
            keras.callbacks.ReduceLROnPlateau(patience=10,
                                              monitor='val_loss',
                                              factor=0.75,
                                              verbose=1,
                                              min_lr=5e-6)
        ]
        return callbacks_list

    def predict(self,
                x_test,
                batch_size: int = 32,
                verbose: int = 0) -> np.array:
        """
        Wrapper method for Keras model's method 'precict'

        Params:
        -------
            :x_test: test samples
            :batch_size: default=32
            :verbose: verbosity level
        """
        return self.model.predict(
            x_test,
            batch_size=batch_size,
            verbose=verbose,
        ).ravel()

    def predict_classes(self,
                        x_test,
                        batch_size: int = 32,
                        verbose: int = 1) -> np.array:
        """
        Wrapper method for Keras model's method 'precict_classes'

        Params:
        -------
            :x_test: test samples
            :batch_size: default=32
            :verbose: verbosity level

        Raise:
            Exception
        """
        try:
            return self.model.predict_classes(x_test)
        except Exception as err:
            print(f"EXCEPTION-RAISED: {err}")
            sys.exit(-1)
        pass
Пример #9
0
# In[27]:

history = model.fit(X_train,
                    y_train,
                    batch_size=32,
                    epochs=15,
                    verbose=1,
                    validation_split=0.2)

# In[29]:

model.evaluate(X_test, y_test)

# In[30]:

model.predict_classes(X_test)

# In[31]:

y_test[0]

# In[32]:

y_test[1]

# In[33]:

y_test[2]

# In[44]:
plt.plot(range(1, 26), history.history['val_categorical_accuracy'])
plt.title('Model Acucracy')
plt.ylabel('Accuracy')
plt.xlabel('Epochs')
plt.legend(['Train', 'Val'], loc='upper left')
plt.show

plt.plot(range(1, 26), history.history['loss'])
plt.plot(range(1, 26), history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epochs')
plt.legend(['Train', 'Val'], loc='upper left')
plt.show

Y_Predict = model.predict_classes(X_Test)

mat = confusion_matrix(Y_Test, Y_Predict)
plot_confusion_matrix(mat, figsize=(5, 5))

# #-----------------------*Training and evaluating session start*------------------------------------------------
# init_op = tf.global_variables_initializer()
# saver = tf.train.Saver()

# with tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(log_device_placement=True)) as sess:
#     sess.run(init_op)
#     total_batch = int(1823 / batch_size)
#     print("total batches: ",total_batch)

#     epoch_rate_down=0
#     best_kappa=0
for n in range(5):
    #创建容器
    model = Sequential()
    #创建第一层
    model.add(Dense(3, input_dim=2, activation='relu'))
    counter = 0
    for i in range(5):
        model.add(Dense(64, activation='relu'))
        if counter < n:
            counter += 1
        model.add(layers.Dropout(rate=0.5))

    #添加输出层
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    history = model.fit(x_train,
                        y_train,
                        batch_size=100,
                        epochs=100,
                        verbose=1)
    #设置横纵坐标范围,在其内进行模型采样
    xx = np.arange(-2, 3, 0.01)
    yy = np.arange(-2, 2, 0.01)
    #采样
    XX, YY = np.meshgrid(xx, yy)
    preds = model.predict_classes(np.c_[XX.ravel(), YY.ravel()])
    title = 'dropout层数({})'.format(n)
    filename = '网络容量%f.png' % (2 + n * 1)
    draw(X, Y, title, filename, XX, YY, preds)
Пример #12
0
  plt.show()
  plt.clf()
  

plot_graphs(history, 'accuracy')
plot_graphs(history, 'loss')



seed_text = "Laurence went to dublin"
next_words = 100

for _ in range(next_words):
  token_list = tokenizer.texts_to_sequences([seed_text])[0]
  token_list = pad_sequences([token_list], maxlen=max_seq_len-1, padding='pre')
  predicted = model.predict_classes(token_list, verbose=0)
  output_word = ""
  for word, index in tokenizer.word_index.items():
    if index == predicted:
      output_word = word
      break
  seed_text += " " + output_word

print(seed_text)




# Tweaks to improve model and larger corpus

Пример #13
0
mobilenet = MobileNetV2(weights = "imagenet",include_top = False,input_shape=(150,150,3))
for layer in mobilenet.layers:
    layer.trainable = False

model = Sequential()
model.add(mobilenet)
model.add(Flatten())
model.add(Dense(2,activation="sigmoid"))
model.compile(optimizer="adam",loss="categorical_crossentropy",metrics ="accuracy")
checkpoint = ModelCheckpoint("moblenet_facemask.h5",monitor="val_accuracy",save_best_only=True,verbose=1)
earlystop = EarlyStopping(monitor="val_acc",patience=5,verbose=1)
history = model.fit_generator(generator=train,steps_per_epoch=len(train)// 32,validation_data=valid,
                             validation_steps = len(valid)//32,callbacks=[checkpoint,earlystop],epochs=15)
model.evaluate_generator(valid)
model.save("face_mask.h5")
pred = model.predict_classes(valid)
pred[:15]
#check

#without mask
mask = "../input/with-and-without-mask/"
plt.figure(figsize=(8, 7))
label = {0: "With Mask", 1: "Without Mask"}
color_label = {0: (0, 255, 0), 1: (0, 0, 255)}
cascade = cv2.CascadeClassifier("../input/frontalface/haarcascade_frontalface_default.xml")
count = 0
i = "../input/with-and-without-mask/mask9.jpg"

frame = cv2.imread(i)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
faces = cascade.detectMultiScale(gray, 1.1, 4)
Пример #14
0
# start capturing img
cap = cv2.VideoCapture(0)
animal_captured = ''
while(True):
    # Capture frame-by-frame
    ret, frame = cap.read()

    # resize img
    img_resized = cv2.resize(frame, img_shape, interpolation=cv2.INTER_AREA)

    # change frmo 1..255 to 0..1
    img_resized = img_resized / 255

    # predict
    predicted = model.predict_classes(np.asarray([img_resized]))[0]

    animal_predicted = animal_names[str(predicted)]
    # check if the animal has changed
    if animal_captured != animal_predicted:
        animal_captured = animal_predicted
        print(animal_captured)
    cv2.putText(frame, animal_captured, (100,150), cv2.FONT_HERSHEY_SIMPLEX, 3, (255,255,255),2)
    cv2.imshow('my webcam', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break  # esc to quit

    #time.sleep(0.5)


# When everything done, release the capture
Пример #15
0
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

# Plot training & validation loss values
plt.plot(epoch_range, history.history['loss'])
plt.plot(epoch_range, history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Val'], loc='upper left')
plt.show()

"""Show result for every class in *table*:"""

y_pred = model.predict_classes(input_test_data)
conf_matrix = confusion_matrix(output_test_data, y_pred)
plot_confusion_matrix(conf_matrix,
                      figsize = (15, 11),
                      colorbar = True,                       
                      show_normed = True,
                      show_absolute = False)

"""## **Saving**

### Saving result to *colab*:
"""

with open("model_best.json", 'w') as json_file:
  json_file.write(model.to_json())
model.save_weights("model_best.h5")
Пример #16
0
                              validation_steps=40)

#plot for Loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epochs')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

#test Image
test = r'D:\Bhushan\casting_512x512\def_front\cast_def_0_180.jpeg'
img = image.load_img(test, target_size=(128, 128))
plt.imshow(img)
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
images = np.vstack([x])

val1 = model.predict(images)
if val1 == 0:
    plt.title("def_front")
else:
    plt.title("ok_front")

# Test Data
val = model.predict_classes(testData[0][0])
from sklearn.metrics import confusion_matrix, accuracy_score
p = confusion_matrix(testData[0][1], val)
acc = accuracy_score(testData[0][1], val)
Пример #17
0
from tensorflow.keras.layers import Dense
from tensorflow.keras import Sequential
from tensorflow.keras.optimizers import Adam

# 값차이가 상당히 크기 때문에 정규화가 필수
# 정규화된 데이터
data = np.loadtxt("../../data/diabetes1.csv",
                  skiprows=1,
                  delimiter=",",
                  dtype=np.float32)
print(data)

x_data = data[:, :-1]
y_data = data[:, -1:]

print(x_data.shape)
print(y_data.shape)

IO = Dense(units=1, input_shape=[8], activation="sigmoid")
model = Sequential([IO])
model.compile(loss="binary_crossentropy",
              optimizer=Adam(learning_rate=0.01),
              metrics=["accuracy"])

history = model.fit(x_data, y_data, epochs=100)

print(model.predict(x_data))
print(model.predict_classes(x_data))
print(history.history['acc'][-1])
Пример #18
0
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.grid(True)
plt.legend()
plt.show()

acc_values = hist_dict["accuracy"]
val_acc_values = hist_dict["val_accuracy"]

epochs = range(1,len(acc_values)+1)

line1 = plt.plot(epochs,val_acc_values,label="Validation/Test acc")
line2 = plt.plot(epochs,acc_values,label = "Training acc")

plt.setp(line1,linewidth=2.0,marker="+",markersize=10.0)
plt.setp(line2,linewidth=2.0,marker="4",markersize=10.0)
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.grid(True)
plt.legend()
plt.show()

for i in range(0,10):
  random = np.random.randint(0,len(x_test))
  inputimg = x_test[random]
  inputimg = inputimg.reshape(1,28,28,1)

  result = str(
      model.predict_classes(inputimg,1,verbose=0)[0]
  )
  print(result)
Пример #19
0
def train_model():
    # set seeds for reproducability
    seed(1)
    set_random_seed(2)

    # Load data
    root = str(Path(__file__).resolve().parents[2])
    with Path(root + '/data/imdb.pickle').open('rb') as f:
        data = pickle.load(f)
    data = data.drop_duplicates('doc')
    data = data.dropna()

    # Load embeddings
    embed_lookup = {}
    with Path(root + '/data/glove.6B.50d.txt').resolve().open() as f:
        for line in f:
            values = line.split()
            word = values[0]
            vec = np.array(values[1:])
            embed_lookup[word] = vec
    print("Loaded {} embeddings".format(len(embed_lookup)))

    # Split data into 70%/10%/20% training/validation/testing
    X_train, X_test, y_train, y_test = train_test_split(
        data.doc,
        data.sentiment,
        test_size=0.2,
        stratify=data.sentiment,
        random_state=1)
    X_train, X_val, y_train, y_val = train_test_split(X_train,
                                                      y_train,
                                                      test_size=0.2,
                                                      stratify=y_train,
                                                      random_state=1)

    # Fit tokenizer, get vocabulary and build embedding matrix
    tk = Tokenizer()
    tk.fit_on_texts(X_train)
    vocab_size = len(tk.word_index) + 1
    embed_matrix = np.zeros(shape=(vocab_size, 50))
    for word, i in tk.word_index.items():
        if word in embed_lookup:
            embed_matrix[i] = embed_lookup[word]

    # Tokenize, sequence and pad the data
    X_train, X_val, X_test = encode([X_train, X_val, X_test], tk)

    # Build and train the network
    model = Sequential()
    model.add(
        Embedding(input_dim=vocab_size,
                  output_dim=50,
                  weights=[embed_matrix],
                  trainable=False))
    model.add(LSTM(100))
    model.add(Dense(1, activation='sigmoid'))
    stopper = EarlyStopping()
    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    print(model.summary())
    history = model.fit(X_train,
                        y_train,
                        validation_data=[X_val, y_val],
                        callbacks=[stopper],
                        batch_size=64,
                        epochs=100)

    # Evaluate the network
    print("\nTest on {} samples".format(len(X_test)))
    y_pred = model.predict_classes(X_test)
    scores = {}

    scores['accuracy'] = accuracy_score(y_test, y_pred)
    scores['f1_macro'] = f1_score(y_test, y_pred, average='macro')
    scores['f1_None'] = f1_score(y_test, y_pred, average=None)

    for score, value in scores.items():
        print("{}: {}".format(score, value))

    return history, y_test, y_pred
Пример #20
0
):  #Extracting the file name of the image from Class Label folder
    image = cv2.imread(directory + '/' +
                       image_file)  #Reading the image (OpenCV)
    image = cv2.resize(
        image, (100, 100)
    )  #Resize the image, Some images are different sizes. (Resizing is very Important)
    test_images.append(image)

test_images = np.array(
    test_images,
    dtype=np.float32)  #converting the list of images to numpy array.
test_images = test_images / 255.0
test_images.shape

#predicting the test set
poll = model.predict_classes(test_images)

#MAKING THE SUBMISSION FILE

freeman = samplesub.copy()
freeman['growth_stage'] = poll
freeman['growth_stage'] = freeman['growth_stage'].map(
    {
        0: 1,
        1: 2,
        2: 3,
        3: 4,
        4: 5,
        5: 6,
        6: 7
    }, na_action='ignore')
Пример #21
0
def get_w2v_v2(data_path,batch_size,epochs,sent_len,EMB_DIM):
    data=pd.read_csv(data_path,names=['text','label'],header=0)
    print(f'input data shape {data.shape}')
    dat0=data.dropna()
    print(f'input data shape {data.shape} after dropping NA')
    accu_df=pd.DataFrame()       
    label_cnt=pd.DataFrame(dat0['label'].value_counts()).reset_index()
    label_cnt.columns=['label','count']
    least_label_list=list(label_cnt.loc[label_cnt['count']<3,'label'].values)
    # display(least_label_list)
    print(f'dropping {len(least_label_list)} labels')
    data=dat0[~dat0['label'].isin(least_label_list)].reset_index()
    print(f'after deleting least label{data.shape}')
    le=LabelEncoder()
    data['label_en']=le.fit_transform(data['label'])
    data['seq_id']=data.index
    data['token']=data['text'].apply(lambda x:gensim.utils.simple_preprocess(x))
    sent=data['token'].to_list()
    # model=Word2Vec(sentences=sent,size=EMB_DIM,window=5,min_count=1,sg=1,workers=4)
    marker=data_path.split('/')[-1].split('_')[0]+"_"+data_path.split('/')[-1].split('_')[1]
    model=Word2Vec.load('SKIP_GRAM_MODEL/w2v_models/skip_gram_w2c_{}.model'.format(marker))
    word_vectors=model.wv
    embedding_matrix=word_vectors.vectors
    print(f'vocabulary size={embedding_matrix.shape[0]}, each word is {1,embedding_matrix.shape[1]}')
    data['seq_id']=data.index
    # create a padded sequence for each document
    word2id={k:v.index for k,v in word_vectors.vocab.items()}

    emb_df=pd.DataFrame()
    for i, sent in enumerate(data['token']):
        text=data.loc[i,'text']
        label=data.loc[i,'label_en']
        seq_id=data.loc[i,'seq_id']
        if i%1000==0:
            print(i,sent)
        sent_seq=[]
        for j, word in enumerate(sent):
            w_id=word2id.get(word)
            sent_seq.append(w_id)
        df=pd.DataFrame({'text':[text],'seq_id':[seq_id],'word_seq':[sent_seq],'label':[label]})
        emb_df=emb_df.append(df)
        display(emb_df.head())
        emb_df.to_csv('SKIP_GRAM_MODEL/embeddings/{}_with_word_seq_index.csv'.format(marker),index=False)
        print(emb_df.dtypes)
        # proceed with NN model
        padded_sent=pad_sequences(emb_df['word_seq'].to_list(),maxlen=sent_len,padding='post')
        X=padded_sent
        Y=emb_df['label']
        X_train,X_test,Y_train,Y_test=train_test_split(X,Y,stratify=Y,test_size=0.2,random_state=111)
        x_train,x_dev,y_train,y_dev=train_test_split(X_train,Y_train,stratify=Y_train,test_size=0.1,random_state=111)
        Y_test,y_train,y_dev=to_categorical(Y_test),to_categorical(y_train),to_categorical(y_dev)
        print('shape:')
        print(f'Y_test shape{Y_test.shape},y_train shape{y_train.shape},y_dev shape{y_dev.shape}')
        class_count=emb_df.label.nunique()
        LOGDIR='./{}'.format(marker)

        vocab_length=len(embedding_matrix)
        print('--start neural network training and validation--')
        NN_model=Sequential([
                            Embedding(input_dim=vocab_length,output_dim=EMB_DIM,weights=[embedding_matrix],input_length=sent_len),
                            Flatten(),
                            Dense(256,activation = 'relu'),
                            Dense(class_count,activation='softmax')]) 
        NN_model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
        NN_model.fit(x_train,y_train,validation_data = (x_dev, y_dev),batch_size=batch_size,epochs=epochs,verbose=1,callbacks=[TensorBoard(LOGDIR)])
        NN_model.summary()
        NN_model.save('NN_Models/skip_gram_{}_NN'.format(marker))
        y_pred=NN_model.predict_classes(X_test)
        y_pred=to_categorical(y_pred)
        maxpos=lambda x:np.argmax(x)
        yTrueMax=np.array([maxpos(rec) for rec in Y_test])
        yPredMax=np.array([maxpos(rec) for rec in y_pred])
        yPredTop3=np.argsort(y_pred,axis=1)[:,-3]
        yPredTop2=np.argsort(y_pred,axis=1)[:,-2]
    
        top1accu=sum(yPredMax==yTrueMax)/len(yPredMax)
        top1accu=round(top1accu*100,2)
        top3accu=sum((yPredTop3==yTrueTop3)|(yPredTop2==yTrueTop2)|(yPredMax==yTrueMax))/len(top3_pred)
        top3accu=round(top3accu*100,2)
        print('test data TOP 1 accuracy {} %'.format(top1accu))#35.32 %
        print('test data TOP 3 accuracy {} %'.format(top3accu))#35.32 %
        print()
        accu=pd.DataFrame({'Model':[marker],'top1 accuracy':[top1accu],'top3 accuracy':[top3accu]})
        display(accu)
        accu_df.append(accu)
    accu_df.to_csv('SKIP_GRAM_MODEL/skip_gram_accuracy.csv',index=False)
    return data,model,emb_df,NN_model,X_test,Y_test,y_pred,accu
Пример #22
0
    lambda x: ' '.join([word for word in x if word not in (stop)]))

tf = TfidfVectorizer()
v = tf.fit_transform(data['stop'].to_numpy())
feature_names = tf.get_feature_names()
dense = v.todense()
# df = pd.DataFrame(dense, columns=[feature_names])
df = pd.DataFrame(dense)
print(df)

train_X, val_X, train_y, val_y = train_test_split(df,
                                                  y,
                                                  train_size=0.75,
                                                  test_size=0.25,
                                                  random_state=0)
model = Sequential()
model.add(Dense(100, input_dim=16998, activation='relu'))
model.add(Dense(100, activation='relu'))
# model.add(Dense(100, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss=binary_crossentropy,
              optimizer='adam',
              metrics=['accuracy', 'Precision', 'Recall'])  # todo f1score

model.fit(train_X, train_y)
prediction = model.predict_classes(val_X)
print(precision_recall_fscore_support(val_y, prediction, average='macro'))
print(prediction)
print(accuracy_score(prediction, val_y))
Пример #23
0
    pool_size=(2, 2),
    strides=2))  #if stride not given it equal to pool filter size
model.add(layers.Conv2D(32, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=2))
model.add(layers.Flatten())
model.add(layers.Dense(units=128, activation='relu'))
model.add(layers.Dense(units=128, activation='relu'))

model.add(layers.Dense(units=1, activation='sigmoid'))
model.compile(optimizer='adam', loss='mse')
model.fit_generator(train_set, epochs=200, steps_per_epoch=10)
#1
for repeat in range(1, 20):
    img1 = image.load_img(
        'C:\\Users\\ahmed\\PycharmProjects\\untitled\\catanddog\\test1\\{}.jpg'
        .format(repeat),
        target_size=(100, 100))
    img = image.img_to_array(img1)
    img = img / 255
    img = np.expand_dims(img, axis=0)
    prediction = model.predict_classes(img)
    plt.text(20,
             62,
             prediction,
             color='red',
             fontsize=18,
             bbox=dict(facecolor='white', alpha=0.8))
    plt.imshow(img1)
    plt.show()
#2
# ### Perceptron Evaluation Metrics

# In[133]:

model1.evaluate(X_train, y_train)

# In[134]:

model1.evaluate(X_test, y_test)

# ### Perceptron Prediction Score

# In[135]:

train_pred = model1.predict_classes(X_train)
pred = model1.predict_classes(X_test)

# In[136]:

print("train", precision_score(y_train, train_pred))
print("test", precision_score(y_test, pred))

# # 2.Multi-Level Perceptron

# In[137]:

#step 1: build model
model1 = Sequential()
#input layer
model1.add(Dense(30, input_dim=10, activation='relu'))
Пример #25
0
# In[ ]:

model = load_model('cnn.hdf5')
model.load_weights('cnn.hdf5')

# In[ ]:

score = model.evaluate(x_test, y_test, verbose=0)
print('Test Loss :', score[0])
print('Test Accuracy :', score[1])

# In[ ]:

#get the predictions for the test data
predicted_classes = model.predict_classes(x_test)

# In[ ]:

confusion_mtx = confusion_matrix(y_test, predicted_classes)

plt.imshow(confusion_mtx, interpolation='nearest', cmap=plt.cm.Blues)
plt.title('confusion_matrix')
plt.colorbar()
tick_marks = np.arange(2)
plt.xticks(tick_marks, ['R', 'O'], rotation=90)
plt.yticks(tick_marks, ['R', 'O'])
#Following is to mention the predicated numbers in the plot and highligh the numbers the most predicted number for particular label
thresh = confusion_mtx.max() / 2.
for i, j in itertools.product(range(confusion_mtx.shape[0]),
                              range(confusion_mtx.shape[1])):
Пример #26
0
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
df=pd.read_csv('Churn_Modelling.csv')
print(df.head())
X=df.drop(labels=['CustomerId','Surname','RowNumber','Exited'],axis=1)
y=df['Exited']
lb=LabelEncoder()
X['Geography']=lb.fit_transform(X['Geography'])
X['Gender']=lb.fit_transform(X['Gender'])
X=pd.get_dummies(X,drop_first=True,columns=['Geography'])
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=0,stratify=y)
scaler=StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.fit_transform(X_test)
reg=Sequential()
Reg=Sequential()
Reg.add(Dense(X.shape[1],activation='relu'))
reg.add(Dense(128,activation='relu'))
reg.add(Dense(1,activation='sigmoid'))
reg.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])
reg.fit(X_train,y_train.to_numpy(),batch_size=10,epochs=2,verbose=1)
y_pred=reg.predict_classes(X_test)


print(y_pred,y_test)
reg.evaluate(X_test,y_test.to_numpy())
from sklearn.metrics import confusion_matrix,accuracy_score
print(confusion_matrix(y_test,y_pred))
print(accuracy_score(y_test,y_pred))
Пример #27
0
class ERLC(BaseEstimator):
    '''
    Ensemble Representation Learning Classifier (ERLC)
    '''
    def __init__(self,
                 verbose=True,
                 sae_hidden_nodes=400,
                 innerNN_architecture=[512, 512, 512],
                 outerNN_architecture=[256, 256],
                 pca_components=14):
        self.verbose = verbose
        ## Tunable Parameters
        self.sae_hidden_nodes = sae_hidden_nodes
        self.innerNN_architecture = innerNN_architecture
        self.outerNN_architecture = outerNN_architecture
        self.pca_components = pca_components

        ## Models
        self.DT_org = DecisionTreeClassifier()
        self.DT_new = DecisionTreeClassifier()
        self.RF_org = RandomForestClassifier()
        self.RF_new = RandomForestClassifier()
        self.sae = Sequential()
        self.inner_dnn = Sequential()
        self.inner_dnn_new = Sequential()
        self.outer_dnn = Sequential()

        # Private class variables
        self.isTrained = False
        self.X_train = []
        self.X_train_new = []
        self.y_train = []
        self.fused_train = []
        self.num_classes = 0

    def get_params(self, deep=True):
        return {
            "sae_hidden_nodes": self.sae_hidden_nodes,
            "pca_components": self.pca_components,
        }

    def set_params(self, **parameters):
        for parameter, value in parameters.items():
            setattr(self, parameter, value)
        return self

    def fit(self,
            X_train,
            y_train,
            sae_epochs=500,
            innerNN_epochs=500,
            outerNN_epochs=500):
        '''
        This function fits/trains the model to the inputted data.

        inputs
        --------
        X_train: The training data
        y_train: corresponding labels
        sae_epochs: epochs of training for the Stacked Autoencoder (SAE)
        innerNN_epochs: epochs of training for the inner neural network
        outerNN_epochs: epochs of training for the outer neural network
        '''

        self.X_train = X_train
        self.y_train = y_train
        num_classes = np.max(y_train) + 1
        self.num_classes = num_classes

        if (self.verbose):
            print("Building ERLC model")

        # First we build the autoencoder
        if (self.verbose):
            print("Building autoencoder")
        self.sae = self.buildSAE(X_train,
                                 num_nodes=self.sae_hidden_nodes,
                                 epochs=sae_epochs)

        # Get new representation
        if (self.verbose):
            print("Getting new representation of the data")
        X_train_new = self.sae.predict(X_train)
        self.X_train_new = X_train_new

        # Train DT on original representation
        if (self.verbose):
            print("Training DT on original representation")
        self.DT_org.fit(X_train, y_train)
        train_DT_org = self.DT_org.predict(X_train)

        # Train DT on new representation
        if (self.verbose):
            print("Training DT on new representation")

        pca = PCA(n_components=self.pca_components)
        Xtr = pca.fit_transform(X_train_new)
        self.DT_new.fit(Xtr, y_train)
        train_DT_new = self.DT_new.predict(Xtr)

        # Train RF on original representation
        if (self.verbose):
            print("Training RF on original representation")
        self.RF_org.fit(X_train, y_train)
        train_RF_org = self.RF_org.predict(X_train)

        # Train RF on new representation
        if (self.verbose):
            print("Training RF on new representation")
        self.RF_new.fit(X_train_new, y_train)
        train_RF_new = self.RF_new.predict(X_train_new)

        # Build and train inner DNN
        if (self.verbose):
            print("Training inner DNN")
        self.inner_dnn = self.buildNN(self.innerNN_architecture,
                                      X_train,
                                      y_train,
                                      num_classes=num_classes,
                                      activation='relu',
                                      do=0,
                                      epochs=innerNN_epochs)
        train_DNN = self.inner_dnn.predict_classes(X_train)

        # Build and train inner DNN on new representation
        if (self.verbose):
            print("Training inner DNN on new representation")
        self.inner_dnn_new = self.buildNN(self.innerNN_architecture,
                                          X_train_new,
                                          y_train,
                                          num_classes=num_classes,
                                          activation='relu',
                                          do=0,
                                          epochs=innerNN_epochs)
        train_DNN_new = self.inner_dnn_new.predict_classes(X_train_new)

        # Changing output of each classifier to categorical
        if (self.verbose):
            print("Creating fusion vector")
        train_DT_org = to_categorical(train_DT_org, num_classes=num_classes)
        train_DT_new = to_categorical(train_DT_new, num_classes=num_classes)
        train_RF_org = to_categorical(train_RF_org, num_classes=num_classes)
        train_RF_new = to_categorical(train_RF_new, num_classes=num_classes)
        train_DNN = to_categorical(train_DNN, num_classes=num_classes)
        train_DNN_new = to_categorical(train_DNN_new, num_classes=num_classes)

        # Combining to make fused training data
        fused_train = (train_DT_org, train_DT_new, train_RF_org, train_RF_new,
                       train_DNN, train_DNN_new)
        fused_train = np.concatenate(fused_train, axis=1)
        self.fused_train = fused_train

        # Training outer DNN
        if (self.verbose):
            print("Training outer DNN")
        self.outer_dnn = self.buildNN(self.outerNN_architecture,
                                      fused_train,
                                      y_train,
                                      num_classes=num_classes,
                                      do=0.3,
                                      val_split=0.2,
                                      regularizer=True,
                                      epochs=outerNN_epochs)

        if (self.verbose):
            print("Training complete")

        self.isTrained = True

    def predict(self, X_test):
        '''
        This function predicts the output of the input test data.
        This function must be called after fit has been called.
        inputs
        -------
        X_test: testing data

        outputs
        -------
        y_pred: the predicted labels of the test data
        '''
        # Get new representation of test data
        X_test_new = self.sae.predict(X_test)

        # DT original
        DT_org_test = self.DT_org.predict(X_test)

        # DT new
        pca = PCA(n_components=self.pca_components)
        pca.fit(self.X_train_new)
        tempX = pca.transform(X_test_new)
        DT_new_test = self.DT_new.predict(tempX)

        # RF original
        RF_org_test = self.RF_org.predict(X_test)

        # RF new
        RF_new_test = self.RF_new.predict(X_test_new)

        # DNN original
        DNN_org_test = self.inner_dnn.predict_classes(X_test)

        # DNN new
        DNN_new_test = self.inner_dnn_new.predict_classes(X_test_new)

        # Transform to categorical and combine
        DT_org_test = to_categorical(DT_org_test, num_classes=self.num_classes)
        DT_new_test = to_categorical(DT_new_test, num_classes=self.num_classes)
        RF_org_test = to_categorical(RF_org_test, num_classes=self.num_classes)
        RF_new_test = to_categorical(RF_new_test, num_classes=self.num_classes)
        DNN_org_test = to_categorical(DNN_org_test,
                                      num_classes=self.num_classes)
        DNN_new_test = to_categorical(DNN_new_test,
                                      num_classes=self.num_classes)

        testSet = (DT_org_test, DT_new_test, RF_org_test, RF_new_test,
                   DNN_org_test, DNN_new_test)
        testSet = np.concatenate(testSet, axis=1)

        # Outer NN
        y_pred = self.outer_dnn.predict_classes(testSet)

        return y_pred

    def localize(self, X_sample, y_sample, n_measurements=10, normal_label=41):
        '''
        This function localizes the attack by returning the score of each feature (measurement) based on its correlation
        with the output of that attack. It uses the chi test function.

        inputs
        -------
        X_sample: the sample vector
        y_sample: the corresponding label
        n_measurements: the top n infected measurements to return
        normal_label: the label value for normal samples

        outputs
        --------
        score: The chi score of each feature
        topIndices: the top n features infected based on the chi score test
        '''

        if (X_sample.ndim > 1):
            raise ValueError('Sample array must be 1 dimensional')
        if (y_sample.ndim > 1):
            raise ValueError('Sample label must be 1 dimensional')
        if (self.isTrained == False):
            raise ValueError(
                'The model has not been trained yet. You must call the fit function first or load a saved model'
            )

        y_pred = self.predict(X_sample)
        chi_score, topF = chi_test(self.X_train,
                                   self.y_train,
                                   n_measurements=n_measurements)
        row = chi_score[self.y_train == y_pred]
        # currentX = np.vstack( (self.X_train[ (self.y_train == normal_label) | (self.y_train == y_sample) ], X_sample) )
        # currentY = np.hstack( (self.y_train[ (self.y_train == normal_label) | (self.y_train == y_sample) ], y_sample) )

        # score = chi2(currentX, currentY)
        # score = np.nan_to_num(score)
        # score = ch

        # row = score[1,:].copy()
        topIndices = row.argsort()[-n_measurements:][::-1]

        return row, topIndices

    def buildSAE(self, X_train, num_nodes=400, epochs=100):
        '''
        This function builds the Stacked AutoEncoder (SAE) and trains it to gain a new representation.

        inputs
        -------
        X_train: matrix of the data
        num_nodes: the number of nodes in the hidden layer
        epochs: number of epochs to train the SAE model

        outputs
        --------
        model: the trained SAE model
        '''

        input_X = Input(shape=(X_train.shape[1], ))
        encoded = Dense(units=800, activation='relu')(input_X)
        encoded = Dense(units=num_nodes, activation='relu')(encoded)
        decoded = Dense(units=800, activation='relu')(encoded)
        decoded = Dense(units=X_train.shape[1], activation='relu')(decoded)
        autoencoder = Model(input_X, decoded)
        autoencoder.compile(optimizer='adam',
                            loss='mean_squared_error',
                            metrics=['mse'])

        # Early Stop Callback
        earlystop_callback = tf.keras.callbacks.EarlyStopping(monitor='loss',
                                                              min_delta=1e-6,
                                                              mode='min',
                                                              patience=10)

        # Fit the autoencoder
        autoencoder.fit(X_train,
                        X_train,
                        epochs=epochs,
                        batch_size=256,
                        shuffle=True,
                        validation_split=0.2,
                        callbacks=[earlystop_callback])

        # Preparing the autoencoder model for use
        model = Sequential()
        model.add(autoencoder.layers[0])
        model.add(autoencoder.layers[1])
        model.add(autoencoder.layers[2])

        return model

    def buildNN(self,
                architecture,
                X_train,
                y_train,
                num_classes=42,
                activation='relu',
                do=0,
                regularizer=False,
                epochs=500,
                val_split=0.2):
        '''
        This function builds the inner Deep Neural Network (DNN) and trains it to gain a new representation.

        inputs
        --------
        X_train: matrix of the data (meter measurements of a smart grid)
        y_train: array of the labels for the corresponding X_train samples
        num_classes: the number of classes
        num_layers: the number of hidden layers in the neural network
        num_nodes: the number of nodes in each hidden layer
        activation: the activation function in each layer (except the final layer)
        do: percent of dropout in between the hidden layers. This should be a value between 0 and 1. If 0, dropout will not be used
        regularizer: whether or not to use l2 regularization in hidden layers
        epochs: number of epochs to train the network
        val_split: percentage of data to use for validation as the network is being trained. This is a value between 0 and 1.

        outputs
        --------
        nn_model: The trained neural network
        '''

        # Building the Neural Network
        y_train2 = to_categorical(y_train, num_classes=num_classes)
        nn_model = Sequential()
        nn_model.add(tf.keras.Input(shape=(X_train.shape[1], )), )

        for i in range(len(architecture)):
            if ((i > 0) & (i < len(architecture) - 1) & (do > 0.0)):
                nn_model.add(Dropout(do))

            if (regularizer == True):
                nn_model.add(
                    Dense(architecture[i],
                          activation=activation,
                          kernel_regularizer=tf.keras.regularizers.l2(0.0001)))
            else:
                nn_model.add(Dense(architecture[i], activation=activation))

        nn_model.add(Dense(num_classes, activation='softmax'))
        nn_model.compile(optimizer='adam',
                         loss='categorical_crossentropy',
                         metrics=['acc', self.f1_m])

        # Early Stop Callback
        earlystop_callback = tf.keras.callbacks.EarlyStopping(monitor='loss',
                                                              min_delta=1e-6,
                                                              mode='min',
                                                              patience=20)

        if (val_split > 0):
            nn_model.fit(X_train,
                         y_train2,
                         epochs=epochs,
                         batch_size=256,
                         validation_split=val_split,
                         callbacks=[earlystop_callback])
        else:
            nn_model.fit(X_train,
                         y_train2,
                         epochs=epochs,
                         batch_size=256,
                         callbacks=[earlystop_callback])

        return nn_model

    def chi_test(self, X, y, n_measurements=10, normal_label=41):
        '''
        This function calculates the chi square of features compared to the same features in normal samples. The function takes test data
        and labels, combines them with the training data and labels, then performs chi squared test on each feature.

        inputs
        -------
        X: data matrix
        y: data labels
        n_measurements: the top n infected measurements to return

        outputs
        --------
        final_chi: A matrix of size (labels, features) in which each row corresponds to the chi score of each feature for that attack. The
        labels and features are in the same order as the input data and labels.
        topF: the top n features infected based on the chi score test
        '''

        # Combine saved train data with test data
        # X = np.vstack((self.X_train, X_test))
        # y = np.hstack((self.y_train, y_test))

        labels = np.unique(y)
        numFeatures = X.shape[1]
        final_chi = np.empty((len(labels) - 1, numFeatures))
        i = 0
        normalX = X[y == normal_label]

        for label in labels:
            if (label != normal_label):
                currentX = np.vstack((X[y == label], normalX))
                currentY = np.hstack((y[y == label], y[y == normal_label]))
                ch, pval = chi2(currentX, currentY)
                final_chi[i, :] = pval
                i = i + 1

        final_chi = np.nan_to_num(final_chi)

        topF = []

        for rowNumber in range(np.unique(y).shape[0] - 1):
            row = final_chi[rowNumber, :].copy()
            idx = np.argpartition(row, n_measurements)
            topIndices = idx[:n_measurements]
            topF.append(topIndices)

        topF = np.asarray(topF)

        return final_chi, topF

    ## SAVING AND LOADING MODEL
    def save_model(self, save_path='saved_model/'):

        # Saving autoencoder
        self.sae.save(save_path + 'sae.h5')

        # Saving classifiers
        dump(self.DT_org, save_path + 'DT_org.joblib')
        dump(self.DT_new, save_path + 'DT_new.joblib')
        dump(self.RF_org, save_path + 'RF_org.joblib')
        dump(self.RF_new, save_path + 'RF_new.joblib')

        # Saving neural nets
        self.inner_dnn.save(save_path + 'inner_dnn.h5')
        self.inner_dnn_new.save(save_path + 'inner_dnn_new.h5')
        self.outer_dnn.save(save_path + 'outer_dnn.h5')

        # Saving processed training data
        savetxt(save_path + 'X_train.csv', self.X_train, delimiter=',')
        savetxt(save_path + 'X_train_new.csv', self.X_train_new, delimiter=',')
        savetxt(save_path + 'y_train.csv', self.y_train, delimiter=',')
        savetxt(save_path + 'fused_train.csv', self.fused_train, delimiter=',')

    def load_model(self, save_path='saved_model/'):

        # Loading training data
        self.X_train = loadtxt(save_path + 'X_train.csv', delimiter=',')
        self.X_train_new = loadtxt(save_path + 'X_train_new.csv',
                                   delimiter=',')
        self.y_train = loadtxt(save_path + 'y_train.csv', delimiter=',')
        self.fused_train = loadtxt(save_path + 'fused_train.csv',
                                   delimiter=',')

        # Loading Classifiers
        self.DT_org = load(save_path + 'DT_org.joblib')
        self.DT_new = load(save_path + 'DT_new.joblib')
        self.RF_org = load(save_path + 'RF_org.joblib')
        self.RF_new = load(save_path + 'RF_new.joblib')

        # Loading neural nets
        self.sae = self.rebuildSAE(self.X_train,
                                   num_nodes=self.sae_hidden_nodes)
        self.sae.load_weights(save_path + 'sae.h5')

        self.inner_dnn = self.rebuildNN(self.X_train,
                                        architecture=self.innerNN_architecture,
                                        num_classes=np.max(self.y_train) + 1,
                                        activation='relu',
                                        do=0)
        self.inner_dnn.load_weights(save_path + 'inner_dnn.h5')
        self.inner_dnn_new = self.rebuildNN(
            self.X_train_new,
            architecture=self.innerNN_architecture,
            num_classes=np.max(self.y_train) + 1,
            activation='relu',
            do=0)
        self.inner_dnn_new.load_weights(save_path + 'inner_dnn_new.h5')
        self.outer_dnn = self.rebuildNN(self.fused_train,
                                        architecture=self.outerNN_architecture,
                                        num_classes=np.max(self.y_train) + 1,
                                        activation='relu',
                                        do=0.3)
        self.outer_dnn.load_weights(save_path + 'outer_dnn.h5')

    ## Rebuilding functions for loading model
    def rebuildSAE(self, X_train, num_nodes=400):
        input_X = Input(shape=(X_train.shape[1], ))
        encoded = Dense(units=800, activation='relu')(input_X)
        encoded = Dense(units=num_nodes, activation='relu')(encoded)
        decoded = Dense(units=800, activation='relu')(encoded)
        decoded = Dense(units=X_train.shape[1], activation='relu')(decoded)
        autoencoder = Model(input_X, decoded)
        autoencoder.compile(optimizer='adam',
                            loss='mean_squared_error',
                            metrics=['mse'])

        model = Sequential()
        model.add(autoencoder.layers[0])
        model.add(autoencoder.layers[1])
        model.add(autoencoder.layers[2])

        return model

    def rebuildNN(self,
                  X_train,
                  num_classes=42,
                  architecture=[512, 512, 512],
                  activation='relu',
                  do=0,
                  regularizer=False):
        '''
        This function rebuilds the inner Deep Neural Network (DNN) and trains it to gain a new representation.

        inputs
        --------
        X_train: matrix of the data (meter measurements of a smart grid)
        num_classes: the number of classes
        num_layers: the number of hidden layers in the neural network
        num_nodes: the number of nodes in each hidden layer
        activation: the activation function in each layer (except the final layer)
        do: percent of dropout in between the hidden layers. This should be a value between 0 and 1. If 0, dropout will not be used
        regularizer: whether or not to use l2 regularization in hidden layers

        outputs
        --------
        nn_model: The trained neural network
        '''

        # Building the Neural Network
        nn_model = Sequential()
        nn_model.add(tf.keras.Input(shape=(X_train.shape[1], )), )
        for i in range(len(architecture)):
            if ((i > 0) & (i < len(architecture) - 1) & (do > 0.0)):
                nn_model.add(Dropout(do))

            if (regularizer == True):
                nn_model.add(
                    Dense(architecture[i],
                          activation=activation,
                          kernel_regularizer=tf.keras.regularizers.l2(0.0001)))
            else:
                nn_model.add(Dense(architecture[i], activation=activation))

        nn_model.add(Dense(num_classes, activation='softmax'))
        nn_model.compile(optimizer='adam',
                         loss='categorical_crossentropy',
                         metrics=['acc', self.f1_m])

        return nn_model

    ## METRICS
    def recall_m(self, y_true, y_pred):
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall

    def precision_m(self, y_true, y_pred):
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        return precision

    def f1_m(self, y_true, y_pred):
        precision = self.precision_m(y_true, y_pred)
        recall = self.recall_m(y_true, y_pred)
        return 2 * ((precision * recall) / (precision + recall + K.epsilon()))
Пример #28
0
model.add(Dense(512, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.4))
model.add(Dense(128, activation='relu'))
model.add(Dense(32, activation='softmax'))
model.compile(loss='sparse_categorical_crossentropy', optimizer=opt2, metrics=['acc'])
#history=model.fit(x_train,y_train, epochs=100, batch_size=30, verbose=1)
#print('the mse value is : ', model.evaluate(x_train, y_train))
history=model.fit(x_train,y_train, epochs=100, batch_size=30, verbose=1)
print('the mse value is : ', model.evaluate(x_train, y_train))    




preds=model.predict_proba(x_train,batch_size=None, verbose=1)
preds_label=model.predict_classes(x_train)

#print(preds.shape)
#print(preds)

'''i=0
for label in preds_label:
    print(preds[i], preds_label[i])
    if int(np.argmax(preds[i]))!=int(preds_label[i]):
        print('aaaaaaaaaaaaa')
    i+=1'''


explainer=lime.lime_tabular.LimeTabularExplainer(x_train, feature_names=features,
                            class_names=['0','1','2'])
I=[]
Пример #29
0
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

history = model.fit(x_train,
                    y_train,
                    epochs=35,
                    validation_data=(x_test, y_test),
                    verbose=1)

model.save_weights('model.py')

final = model.save('Final_model(1).h5')

model.predict_classes(x_test[:6])

y_test[:6]

score = model.evaluate(x_test, y_test)


def plot_learningCurve(history, epochs):
    epoch_range = range(1, epochs + 1)
    plt.plot(epoch_range, history.history['accuracy'])
    plt.plot(epoch_range, history.history['val_accuracy'], scalex=0.5)
    plt.title('Model Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epochs')
    plt.legend(['train', 'Val'], loc='upper left')
    plt.show
])

model.compile(optimizer='adam',
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])

model.fit(
    x=train_texts, y=target,
    epochs=1)

# Safe toxic comment word embedding to disk
weights = model.get_layer('embedding').get_weights()[0]
vocab = vectorize_layer.get_vocabulary()

out_v = io.open('comments_vectors.tsv', 'w', encoding='utf-8')
out_m = io.open('comments_metadata.tsv', 'w', encoding='utf-8')

for index, word in enumerate(vocab):
  if index == 0:
    continue  # skip 0, it's padding.
  vec = weights[index]
  out_v.write('\t'.join([str(x) for x in vec]) + "\n")
  out_m.write(word + "\n")
out_v.close()
out_m.close()


print(model.predict_classes(tf.constant(np.array([
    'never mind it is not important', 
    'i dont care at all, f**k you', 
    'i love you']))))