Exemplo n.º 1
0
    def test(self, X, Y, time_steps):
        """Tests classifier

        Args:
            X (np.ndarray): Features with shape
                (num_samples * time_steps, features).
            Y (np.array): Labels.
            time_steps (int): Number of time steps.

        Returns:
            dict: Dictionary containing the following fields:
        """

        with tf.Session() as sess:
            self.saver.restore(sess, './model.ckpt')

            # normalize data
            if self.normalize:
                _min = self.feature_min.eval()
                _max = self.feature_max.eval()

                X = fops.normalize(X, _min, _max)

            # Reshape X
            X = X.reshape(-1, time_steps, self.num_features)

            labels, acc, mat = sess.run([self.scores, self.accuracy,
                                         self.confusion_matrix], feed_dict={
                self.X: X,
                self.Y: Y
            })

            self.print('Accuracy: {:.2f}'.format(acc * 100))
            self.print('Confusion Matrix:')
            self.print(mat)
            avg_benign      = []
            avg_malicious   = []
            for i, label in enumerate(labels):
                print('Label: {} | Guess: {}'.format(Y[i], label))
                if Y[i] == 1:
                    avg_benign.append(label)
                else:
                    avg_malicious.append(label)

            data = {
                'benign': {
                    'mean': float(np.mean(avg_benign)),
                    'stddev': float(np.std(avg_benign))
                },
                'malicious': {
                    'mean': float(np.mean(avg_malicious)),
                    'stddev': float(np.std(avg_malicious))
                }
            }

            # self.print(json.dumps(rtn_dict, indent=4))
            self.print(json.dumps(data, indent=4))
Exemplo n.º 2
0
    def train_embedding(self,
                        X,
                        X_data,
                        num_epochs=10,
                        model_name='tea',
                        normalize=False,
                        batch_size=100):
        """Trains the Time Embedding Autoencoder.

        Args:
            X (tf.Tensor): Tensor describing the shape of the input.
            X_data (np.ndarray): Input data.
            num_epochs (int = 10): Number of training epochs.
            model_name (str = 'tea'): Name of the model.
            normalize (bool = False): Flag to normalize the data.
            batch_size (int = 100): Size of the training batches.
        """

        cost = tf.reduce_mean(tf.square(self.create_network(X) - X))
        opt = tf.train.AdamOptimizer().minimize(cost)
        saver = tf.train.Saver()

        training_size = X_data.shape[0]

        # normalize X
        if normalize:
            _min = X_data.min(axis=0)
            _max = X_data.max(axis=0)
            X_data = fops.normalize(X_data, _min, _max)

        assert batch_size < training_size, (
            'batch size is larger than training_size')

        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())

            for epoch in range(num_epochs):
                costs = 0
                num_costs = 0
                for batch_x, in fops.batcher([X_data], batch_size):
                    _, c = sess.run([opt, cost], feed_dict={X: batch_x})

                    costs += c
                    num_costs += 1

                display_str = 'Epoch {0:04} with cost={1:.9f}'
                display_str = display_str.format(epoch + 1, costs / num_costs)
                print(display_str)

            print('TEA Optimization Finished')

            save_path = saver.save(sess, './{}.ckpt'.format(model_name))
            print('Model saved in file: {}'.format(save_path))
Exemplo n.º 3
0
    def train(self, X, Y, time_steps):
        """Train the Classifier.

        Args:
            X (np.ndarray): Features with shape
                (num_samples * time_steps, features).
            Y (np.ndarray): Labels.
            time_steps (int): Number of time steps.
        """

        training_size = X.shape[0]

        # normalize X
        if self.normalize:
            _min = X.min(axis=0)
            _max = X.max(axis=0)
            X = fops.normalize(X, _min, _max)

        assert self.batch_size < training_size, (
            'batch size is larger than training_size'
        )

        # Reshape input array
        X = X.reshape(-1, time_steps, self.num_features)

        with tf.Session() as sess:
            sess.run(self.init_op)
            self.saver.restore(sess, './tea.ckpt')

            scores = []
            for epoch in range(self.training_epochs):
                loss = 0
                loss_size = 0
                for batch_x, batch_y in fops.batcher([X, Y], self.batch_size):
                    for i in range(batch_x.shape[1]):
                        _, l = sess.run([self.opt, self.loss], feed_dict={
                            self.X: batch_x,
                            self.Y: batch_y
                        })

                        loss += l
                        loss_size += 1

                    if epoch == self.training_epochs - 1:
                        scores.append(sess.run(self.scores, feed_dict={
                            self.X: batch_x
                        }))

                if epoch % self.display_step == 0:
                    display_str = 'Epoch {0:04} with cost={1:.9f}'
                    display_str = display_str.format(epoch+1, loss/loss_size)
                    self.print(display_str)

            # assign score threshold
            upper = np.mean(scores) + self.std_param * np.std(scores)
            lower = np.mean(scores) - self.std_param * np.std(scores)
            sess.run(self.score_upper.assign(upper))
            sess.run(self.score_lower.assign(lower))

            self.print('Lower Threshold: ' + str(lower))
            self.print('Upper Threshold: ' + str(upper))

            # assign normalization values
            if self.normalize:
                sess.run(self.feature_min.assign(_min))
                sess.run(self.feature_max.assign(_max))

            self.print('Optimization Finished')

            # save model
            save_path = self.saver.save(sess, './model.ckpt')
            self.print('Model saved in file: {}'.format(save_path))
Exemplo n.º 4
0
    def test(self, X, Y):
        """Tests classifier

        Args:
            X (np.ndarray): Features with shape
                (num_samples * time_steps, features).
            Y (np.array): Labels.

        Returns:
            dict: Dictionary containing the following fields:
        """

        with tf.Session(config=self.config) as sess:
            self.saver.restore(sess, './model.ckpt')

            # normalize data
            if self.normalize:
                _min = self.feature_min.eval()
                _max = self.feature_max.eval()

                X = fops.normalize(X, _min, _max)

            labels, acc, mat = sess.run([self.scores, self.accuracy,
                                         self.confusion_matrix], feed_dict={
                self.X: X,
                self.Y: Y,
                self.keep_prob: 1.0
            })

            avg_benign      = []
            avg_malicious   = []
            for i, label in enumerate(labels):
                if Y[i] == 1:
                    avg_benign.append(label)
                else:
                    avg_malicious.append(label)

            data = {
                'benign': {
                    'mean': np.mean(avg_benign, axis=0).tolist(),
                    'stddev': np.std(avg_benign, axis=0).tolist()
                },
                'malicious': {
                    'mean': np.mean(avg_malicious, axis=0).tolist(),
                    'stddev': np.std(avg_malicious, axis=0).tolist()
                }
            }

            data['confusion_matrix'] = mat.tolist()
            data['accuracy'] = acc * 100

            self.print(json.dumps(data, indent=4))

            # Embedddings
            Z = self.sample_Z(n=X.shape[0])
            embeddings = sess.run(self.embedding_ops, feed_dict={
                self.X: X,
                self.Y: Y,
                self.Z: Z,
                self.keep_prob: 1.0
            })

            for i, embedding in enumerate(embeddings):
                name = self.embedding_ops[i].name.split(':')[0]
                name = name.replace('/', '_')

                with open('graph/{}'.format(name), 'w') as f:
                    csv.writer(f).writerows(embedding)

            return data
Exemplo n.º 5
0
    def train(self, X, Y):
        """Train the Classifier.

        Args:
            X (np.ndarray): Features with shape
                (num_samples * time_steps, features).
            Y (np.ndarray): Labels.
        """

        training_size = X.shape[0]

        # normalize X
        if self.normalize:
            _min = X.min(axis=0)
            _max = X.max(axis=0)
            X = fops.normalize(X, _min, _max)

        assert self.batch_size < training_size, (
            'batch size is larger than training_size'
        )

        with tf.Session(config=self.config) as sess:
            sess.run(self.init_op)

            # for tensorboard
            writer = tf.summary.FileWriter(
                logdir='logdir',
                graph=sess.graph
            )
            writer.flush()

            prev_diff_loss = 0

            batch = fops.random_batcher([X, Y], self.batch_size)

            for epoch in range(self.num_epochs):
                d_loss = 0
                g_loss = 0

                k = self.adpt_l * prev_diff_loss
                kd, kg = np.maximum([1, 1], [k, -k]).astype(np.int32)

                for i in range(kd):
                    batch_x, batch_y = next(batch)
                    Z = self.sample_Z(n=batch_x.shape[0])

                    _, ld = sess.run(
                        [self.D_solver, self.D_only_loss],
                        feed_dict={
                            self.X: batch_x,
                            self.Y: batch_y,
                            self.Z: Z,
                            self.keep_prob: 0.5
                        }
                    )

                    d_loss += ld

                for i in range(kg):
                    batch_x, batch_y = next(batch)
                    Z = self.sample_Z(n=batch_x.shape[0])

                    _, lg = sess.run([self.G_solver, self.G_loss], feed_dict={
                        self.Z: Z,
                        self.Y: batch_y,
                        self.keep_prob: 0.5
                    })

                    g_loss += lg

                prev_diff_loss = ld - lg

                if epoch % self.display_step == 0:
                    display_str = (
                        'Epoch {0:04} with D_loss={1:7.5f}||G_loss={2:.5f}'
                    )
                    display_str += '\nkd={3}, kg={4}'
                    display_str = display_str.format(
                        epoch+1,
                        d_loss/kd,
                        g_loss/kg,
                        kd, kg
                    )
                    self.print(display_str)

            # assign normalization values
            if self.normalize:
                sess.run(self.feature_min.assign(_min))
                sess.run(self.feature_max.assign(_max))

            self.print('Optimization Finished')

            # save model
            save_path = self.saver.save(sess, './model.ckpt')
            self.print('Model saved in file: {}'.format(save_path))
Exemplo n.º 6
0
    def test(self, X, Y, time_steps):
        """Tests classifier

        Args:
            X (np.ndarray): Features with shape
                (num_samples * time_steps, features).
            Y (np.array): Labels.
            time_steps (int): Number of time steps.

        Returns:
            dict: Dictionary containing the following fields:
        """

        with open('logdir/metadata.tsv', 'w') as f:
            # f.write('index\tlabels\n')
            for i, label in enumerate(Y):
                f.write('{}\n'.format(label))

        with tf.Session() as sess:
            self.saver.restore(sess, './model.ckpt')

            # normalize data
            if self.normalize:
                _min = self.feature_min.eval()
                _max = self.feature_max.eval()

                X = fops.normalize(X, _min, _max)

            labels, acc, mat = sess.run(
                [self.scores, self.accuracy, self.confusion_matrix],
                feed_dict={
                    self.X: X,
                    self.Y: Y,
                    self.keep_prob: 1.0
                })

            # for embeddings
            Z = np.random.uniform(-1., 1.,
                                  [X.shape[0], self.latent_vector_size])

            embeddings = sess.run(self.embedding_ops,
                                  feed_dict={
                                      self.X: X,
                                      self.Y: Y,
                                      self.Z: Z,
                                      self.keep_prob: 1.0
                                  })

            for i, embedding in enumerate(embeddings):
                name = self.embedding_ops[i].name.split(':')[0]
                name = name.replace('/', '_')

                with open('graph/{}'.format(name), 'w') as f:
                    csv.writer(f).writerows(embedding)

            avg_benign = []
            avg_malicious = []
            for i, label in enumerate(labels):
                # print('Label: {} | Guess: {}'.format(Y[i], label))
                if Y[i] == 1:
                    avg_benign.append(label)
                else:
                    avg_malicious.append(label)

            self.print('Accuracy: {:.2f}'.format(acc * 100))
            self.print('Confusion Matrix:')
            self.print(mat)
            data = {
                'benign': {
                    'mean': np.mean(avg_benign, axis=0).tolist(),
                    'stddev': np.std(avg_benign, axis=0).tolist()
                },
                'malicious': {
                    'mean': np.mean(avg_malicious, axis=0).tolist(),
                    'stddev': np.std(avg_malicious, axis=0).tolist()
                }
            }

            # self.print(json.dumps(rtn_dict, indent=4))
            self.print(json.dumps(data, indent=4))
Exemplo n.º 7
0
    def train(self, X, Y, time_steps):
        """Train the Classifier.

        Args:
            X (np.ndarray): Features with shape
                (num_samples * time_steps, features).
            Y (np.ndarray): Labels.
            time_steps (int): Number of time steps.
        """

        training_size = X.shape[0]

        # normalize X
        if self.normalize:
            _min = X.min(axis=0)
            _max = X.max(axis=0)
            X = fops.normalize(X, _min, _max)

        assert self.batch_size < training_size, (
            'batch size is larger than training_size')

        with tf.Session() as sess:
            sess.run(self.init_op)

            # TensorBoard
            writer = tf.summary.FileWriter('logdir', sess.graph)
            writer.close()

            for epoch in range(self.training_epochs):
                loss = 0
                g_loss = 0
                loss_size = 0
                for batch_x, batch_y in fops.batcher([X, Y], self.batch_size):
                    Z = np.random.uniform(
                        -1., 1., [self.batch_size, self.latent_vector_size])

                    _, l = sess.run(
                        [self.D_solver, self.D_loss],
                        feed_dict={
                            self.X: batch_x,
                            self.Y: batch_y,
                            self.Z: Z,
                            self.keep_prob: 0.5
                        })

                    _, lg = sess.run(
                        [self.G_solver, self.G_loss],
                        feed_dict={
                            self.X: batch_x,
                            self.Y: batch_y,
                            self.Z: Z,
                            self.keep_prob: 0.5
                        })

                    loss += l
                    g_loss += lg
                    loss_size += 1

                if epoch % self.display_step == 0:
                    display_str = (
                        'Epoch {0:04} with D_loss={1:7.5f}||G_loss={2:.5f}')
                    display_str = display_str.format(epoch + 1,
                                                     loss / loss_size,
                                                     g_loss / loss_size)
                    self.print(display_str)

            # assign normalization values
            if self.normalize:
                sess.run(self.feature_min.assign(_min))
                sess.run(self.feature_max.assign(_max))

            self.print('Optimization Finished')

            # save model
            save_path = self.saver.save(sess, './model.ckpt')
            self.print('Model saved in file: {}'.format(save_path))