示例#1
0
    def predict(self, X):
        """Return module predictions from posterior distribution.

        Parameters
        ----------
        X : np.ndarray
            Input features, with shape like `self.X`.

        Returns
        -------
        predictions : np.ndarray
            Array of scores, with shape `X.shape[0]`.
        """

        with tf.Session(config=self.config) as sess:
            self.saver.restore(sess, self.save_path.format(self.model_name))
            self.saver.restore(sess, self.save_path.format(self.pmf_name))

            # normalize data
            if self.normalize:
                _min = self.feature_min.eval()
                _max = self.feature_max.eval()
                X = ds.rescale(X, _min, _max, -1, 1)

            preds = sess.run(self.posterior, feed_dict={
                self.X: X,
            })

            return preds
示例#2
0
    def train_pmf(self, X, ignore_norm=False):
        """Train the density estimator.

        Parameters
        ----------
        X : np.ndarray
            Input features, with shape like `self.X`.
        ignore_norm : bool, optional
            Ignore normalization, default is False.
        """

        training_size = X.shape[0]

        assert self.pmf_params.batch_size < training_size, (
            'Batch size is larger than number of samples')

        with tf.Session(config=self.config) as sess:
            sess.run(self.init_op)

            self.saver.restore(sess, self.save_path.format(self.model_name))

            if self.normalize and not ignore_norm:
                _min = X.min(axis=0)
                _max = X.max(axis=0)
                X = ds.rescale(X, _min, _max, -1, 1)

                sess.run(self.feature_min.assign(_min))
                sess.run(self.feature_max.assign(_max))

            # Find p_uniform and perform special ops
            scores, pmf_in = sess.run([self.scores, self.pmf_in],
                                      feed_dict={self.X: X})

            sess.run(self.p_uniform.assign(1 / np.unique(scores).shape[0]))

            # Run special ops
            for name, op in self.special_ops.items():
                if 'train_pmf' in name:
                    op(sess, pmf_in)

            batch = ds.random_batcher([X], self.pmf_params.batch_size)

            self.print('Training {}'.format(self.pmf_name))
            self.print('Epoch | Loss')

            for epoch in range(self.pmf_params.n_epochs):
                batch_x, = next(batch)

                _, l = sess.run([self.bayes_opt, self.bayes_loss],
                                feed_dict={self.X: batch_x})

                if epoch % self.pmf_params.display_step == 0:
                    self.print('{0:05} | {1:7.5f}'.format(epoch + 1, l))

            self.print('Finished training density estimator')

            # save model
            save_path = self.saver.save(sess,
                                        self.save_path.format(self.pmf_name))
            self.print('Model saved in file: {}'.format(save_path))
示例#3
0
    def train_model(self, X):
        """Train the model.

        Parameters
        ----------
        X : np.ndarray
            Input features, with shape like `self.X`.
        """

        training_size = X.shape[0]

        assert self.model_params.batch_size < training_size, (
            'batch size is larger than number of samples')

        with tf.Session(config=self.config) as sess:
            sess.run(self.init_op)

            if self.normalize:
                _min = X.min(axis=0)
                _max = X.max(axis=0)
                X = ds.rescale(X, _min, _max, -1, 1)

                sess.run(self.feature_min.assign(_min))
                sess.run(self.feature_max.assign(_max))

            # Run special ops
            for name, op in self.special_ops.items():
                if 'train_model' in name:
                    op(sess, X)

            batch = ds.random_batcher([X], self.model_params.batch_size)

            self.print('Training {}'.format(self.model_name))
            self.print('Epoch | Loss')

            for epoch in range(self.model_params.n_epochs):
                # Don't try on one-shot models
                if self.model_loss is False:
                    break

                batch_x, = next(batch)

                _, l = sess.run([self.model_opt, self.model_loss],
                                feed_dict={self.X: batch_x})

                if epoch % self.model_params.display_step == 0:
                    self.print('{0:05} | {1:7.5f}'.format(epoch + 1, l))

            self.print('Finished training {}'.format(self.model_name))

            # save model
            save_path = self.saver.save(sess,
                                        self.save_path.format(self.model_name))
            self.print('Model saved in file: {}'.format(save_path))

        if self.always_train_pmf:
            self.train_pmf(X, ignore_norm=True)
示例#4
0
    def test(self, X, Y):
        """Evaluate model performance.

        Parameters
        ----------
        X : np.ndarray
            Input features, with shape like `self.X`.
        Y : np.ndarray
            Labels for each sample.

        Returns
        -------
        accuracy : float
            Classification accuracy of model.
        c_mat : np.ndarray
            Confusion matrix.
        """

        with tf.Session(config=self.config) as sess:
            self.saver.restore(sess, self.save_path.format(self.model_name))
            self.saver.restore(sess, self.save_path.format(self.pmf_name))

            # normalize data
            if self.normalize:
                _min = self.feature_min.eval()
                _max = self.feature_max.eval()
                X = ds.rescale(X, _min, _max, -1, 1)

            print(np.max(X))

            acc, mat = sess.run([self.accuracy, self.confusion_matrix],
                                feed_dict={
                                    self.X: X,
                                    self.Y: Y
                                })

            self.print('Accuracy = {:.3f}%'.format(acc * 100))
            self.print(mat)

            return acc * 100, mat
示例#5
0
文件: gan.py 项目: zbn123/ADD-GAN
    def test(self, X, Y):
        """Tests classifier

        Args:
            X (np.ndarray): Features with shape
                (num_samples * time_steps, features).
            Y (np.array): Labels.

        Returns:
            dict: Dictionary containing the following fields:
        """

        with tf.Session(config=self.config) as sess:
            self.saver.restore(sess, './model.ckpt')

            # normalize data
            if self.normalize == 'rescaling':
                _min = self.feature_min.eval()
                _max = self.feature_max.eval()
                X = ds.rescale(X, _min, _max, -1, 1)

            elif self.normalize == 'vector_norm':
                X = ds.vector_norm(X, -1, 1)

            labels, acc, mat, d_loss, g_loss = sess.run(
                [self.scores, self.accuracy, self.confusion_matrix,
                 self.D_loss, self.G_loss],
                feed_dict={
                    self.X: X,
                    self.Y: Y,
                    self.Z: self.sample_Z(n=X.shape[0]),
                    self.keep_prob: 1.0
                }
            )

            avg_benign      = []
            avg_malicious   = []
            for i, label in enumerate(labels):
                if Y[i] == 1:
                    avg_benign.append(label)
                else:
                    avg_malicious.append(label)

            data = {
                'benign': {
                    'mean': np.mean(avg_benign, axis=0).tolist(),
                    'stddev': np.std(avg_benign, axis=0).tolist()
                },
                'malicious': {
                    'mean': np.mean(avg_malicious, axis=0).tolist(),
                    'stddev': np.std(avg_malicious, axis=0).tolist()
                }
            }

            data['confusion_matrix'] = mat.tolist()
            data['accuracy'] = acc * 100
            data['d_loss'] = float(d_loss)
            data['g_loss'] = float(g_loss)

            self.print(json.dumps(data, indent=4))

            # Embedddings
            Z = self.sample_Z(n=X.shape[0])
            embeddings = sess.run(self.embedding_ops, feed_dict={
                self.X: X,
                self.Y: Y,
                self.Z: Z,
                self.keep_prob: 1.0
            })

            for i, embedding in enumerate(embeddings):
                name = self.embedding_ops[i].name.split(':')[0]
                name = name.replace('/', '_')

                with open('graph/{}'.format(name), 'w') as f:
                    csv.writer(f).writerows(embedding)

            return data
示例#6
0
文件: gan.py 项目: zbn123/ADD-GAN
    def train(self, X, Y):
        """Train the Classifier.

        Args:
            X (np.ndarray): Features with shape
                (num_samples * time_steps, features).
            Y (np.ndarray): Labels.
        """

        training_size = X.shape[0]

        # normalize X
        if self.normalize == 'rescaling':
            _min = X.min(axis=0)
            _max = X.max(axis=0)
            X = ds.rescale(X, _min, _max, -1, 1)

        elif self.normalize == 'vector_norm':
            X = ds.vector_norm(X, -1, 1)

        assert self.batch_size < training_size, (
            'batch size is larger than training_size'
        )

        with tf.Session(config=self.config) as sess:
            sess.run(self.init_op)

            # for tensorboard
            writer = tf.summary.FileWriter(
                logdir='logdir/train',
                graph=sess.graph
            )

            prev_diff_loss = 0

            batch = ds.random_batcher([X, Y], self.batch_size)

            count = 0

            for epoch in range(self.num_epochs):
                d_loss = 0
                g_loss = 0

                k = self.adpt_l * prev_diff_loss
                kd, kg = np.maximum([1, 1], [k, -k]).astype(np.int32)

                for i in range(kd):
                    batch_x, batch_y = next(batch)
                    Z = self.sample_Z(n=batch_x.shape[0])

                    s, _, ld = sess.run(
                        [self.merged, self.D_solver, self.D_only_loss],
                        feed_dict={
                            self.X: batch_x,
                            self.Y: batch_y,
                            self.Z: Z,
                            self.keep_prob: 0.5
                        }
                    )

                    writer.add_summary(s, count)
                    count += 1

                    d_loss += ld

                for i in range(kg):
                    batch_x, batch_y = next(batch)
                    Z = self.sample_Z(n=batch_x.shape[0])

                    s, _, lg = sess.run(
                        [self.merged, self.G_solver, self.G_loss],
                        feed_dict={
                            self.X: batch_x,
                            self.Z: Z,
                            self.Y: batch_y,
                            self.keep_prob: 0.5
                        }
                    )

                    writer.add_summary(s, count)
                    count += 1

                    g_loss += lg

                prev_diff_loss = ld - lg

                if epoch % self.display_step == 0:
                    display_str = (
                        'Epoch {0:04} with D_loss={1:7.5f}||G_loss={2:.5f}'
                    )
                    display_str += '\nkd={3}, kg={4}'
                    display_str = display_str.format(
                        epoch+1,
                        d_loss/kd,
                        g_loss/kg,
                        kd, kg
                    )
                    self.print(display_str)

            # assign normalization values
            if self.normalize == 'rescaling':
                sess.run(self.feature_min.assign(_min))
                sess.run(self.feature_max.assign(_max))

            self.print('Optimization Finished')

            # save model
            save_path = self.saver.save(sess, './model.ckpt')
            self.print('Model saved in file: {}'.format(save_path))