def main(_):
    pp.pprint(flags.FLAGS.__flags)

    if not os.path.exists(FLAGS.checkpoint_dir):
        os.makedirs(FLAGS.checkpoint_dir)
    if not os.path.exists(FLAGS.sample_dir):
        os.makedirs(FLAGS.sample_dir)
    if not os.path.exists(FLAGS.output_dir):
        os.makedirs(FLAGS.output_dir)


    config = tf.ConfigProto(
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9),
        device_count = {'GPU': 1},
        allow_soft_placement=True
        #log_device_placement=True,
    )
    config.device_filters.append('/gpu:0')
    config.device_filters.append('/cpu:0')

    with tf.Session(config=config) as sess:
        #with tf.device('/gpu:0'):

        autoencoder = Autoencoder(sess, image_size=FLAGS.image_size, batch_size=FLAGS.batch_size,
                dataset_name=FLAGS.dataset, noise = FLAGS.noise, is_crop=FLAGS.is_crop, checkpoint_dir=FLAGS.checkpoint_dir)

        if FLAGS.is_train:
            autoencoder.train(FLAGS)
        elif FLAGS.is_run:
            autoencoder.run(FLAGS)
        else:
            autoencoder.load(FLAGS.checkpoint_dir)
Exemplo n.º 2
0
def saveEncodings(SHAPENET_PATH=os.path.join("G:\\", "Documenti",
                                             "ShapeNetCore.v2"),
                  cat_filter=["table", "chair", "sofa"],
                  SUBDIVISIONS=2,
                  NAME="learning_pred_and_gen_vector_autoencoder_adam_lr_5",
                  BATCH_SIZE=8,
                  TMP="C:\\GL\\3DShapeGen\\tmp\\pathslist.pk"):
    #
    # Instantiate model
    #
    model = Model(name=NAME, lr=10**-5)
    model.initialize(BATCH_SIZE)
    model.load()
    #
    # Load in advance the paths of all the elements
    # we need to get the embeddings
    #
    if os.path.exists(TMP):
        with open(TMP, "rb") as f:
            paths = pickle.load(f)
    else:
        paths = [
            x for x in dataGenerator(
                SHAPENET_PATH, cat_filter=cat_filter, generate_path=True)
        ]
        with open(TMP, "wb") as f:
            pickle.dump(paths, f)

    #
    # batch the elements and for each batch produce a tensor of binvoxes
    #
    batches = []
    for i in range(0, len(paths), BATCH_SIZE):
        batches.append(paths[i:i + BATCH_SIZE])
    for batch in tqdm(batches):
        models_paths = [
            os.path.join(x, "models", "model_normalized.solid.binvox")
            for x in batch
        ]
        binvoxs = []
        for x in models_paths:
            if os.path.exists(x):
                binvoxs.append(binvoxToNumpy(x))
            else:
                binvoxs.append(np.zeros(shape=(128, 128, 128)))
        binvoxs = tf.convert_to_tensor(binvoxs, dtype=tf.float32)
        binvoxs = tf.reshape(binvoxs,
                             shape=[
                                 binvoxs.shape[0], binvoxs.shape[1],
                                 binvoxs.shape[2], binvoxs.shape[3], 1
                             ])
        binvoxs = tf.nn.max_pool3d(binvoxs,
                                   2 * SUBDIVISIONS,
                                   2 * SUBDIVISIONS,
                                   'VALID',
                                   data_format='NDHWC',
                                   name=None)
        #
        # Produce the embeddings, the embeddings has shape: (bs,embedding_dim)
        #
        embeddings = model.encode(binvoxs)
        #
        # parallel write of the elements to disk
        #
        args = [(path, emb, NAME) for path, emb in zip(batch, embeddings)]
        p = Pool(BATCH_SIZE)
        p.map(__writeDiskMapFn, args)
Exemplo n.º 3
0
class Detector(DetectorBase):
    def __init__(self, key, seq_length=10):
        super(Detector, self).__init__(key, seq_length)
        self.key = str(key)
        self.packet_length = 1500
        self.mini_batch = 30
        self.epochs = 50
        self.train_buffer = []
        self.exec_buffer = []
        self.set_buffer = []
        self.max_round = inf
        self.train_round = 0
        self.model = Autoencoder(self.packet_length, seq_length, self.epochs)
        self.clf = OneClassSVM(kernel='rbf', gamma=0.1, nu=0.05)

        self.model_path = os.path.join('model_{}'.format(seq_length), self.key)
        self.stats_path = os.path.join('stats_{}'.format(seq_length),
                                       self.key + '.pkl')
        self.eval_path = os.path.join('evaluation_{}'.format(seq_length),
                                      self.key + '.csv')
        self.loss_path = os.path.join('evaluation_{}'.format(seq_length),
                                      self.key + '_loss.csv')
        if self.model.exist(self.model_path):
            print('Using existing model: {}'.format(self.key))
            self.model.load(self.model_path)
        if os.path.exists(self.stats_path):
            print('Using existing stats')
            self.clf = joblib.load(self.stats_path)

    def update_buffer(self, seq, mode, info=None):
        seq = deepcopy(seq)
        if mode == 'T' and self.train_round <= self.max_round:
            self.train_buffer.append(seq)
            if len(self.train_buffer) == self.mini_batch:
                random.shuffle(self.train_buffer)
                X = np.array(self.train_buffer)
                self.train(X)
                self.train_buffer = []
                self.train_round += 1
        elif mode == 'E':
            self.exec_buffer.append(seq)
            if len(self.exec_buffer) == 1:
                X = np.array(self.exec_buffer)
                self.execute(X, info)
                self.exec_buffer = []
        else:
            X = np.array(seq)
            X = X.reshape((1, X.shape[0], X.shape[1]))
            self.eval(X)

    def train(self, X):
        if self.train_round < self.max_round:
            history = self.model.fit(X)
            with open(self.loss_path, 'a') as f_loss:
                writer_loss = csv.writer(f_loss)
                if self.train_round == 0:
                    writer_loss.writerow([history.history['loss'][0]])
                writer_loss.writerow([history.history['loss'][-1]])
            print('Detector {} saved'.format(self.key))

    def eval(self, X):
        Y = self.model.predict(X)
        mse = mean_squared_error(X[0], Y[0])
        print('Calculating mse of {}: {}'.format(self.key, mse))
        self.set_buffer.append(mse)

    def set_threshold(self):
        self.clf = OneClassSVM(kernel='rbf', gamma=0.1, nu=0.05)
        self.clf.fit(np.array(self.set_buffer).reshape(-1, 1))
        joblib.dump(self.clf, self.stats_path)

    def execute(self, X, info=None):
        start = time.time()
        Y = self.model.predict(X)
        dur = time.time() - start
        with open(self.eval_path, 'a') as f:
            writer = csv.writer(f)
            for x, y in zip(X, Y):
                mse = mean_squared_error(x, y)
                print('Execute on {}: {}'.format(self.key, mse))
                label = self.clf.predict(np.array(mse).reshape(-1, 1))
                result = 'Normal' if label == 1 else 'Malicious'
                if info:
                    writer.writerow([str(mse), result, str(info)])
                else:
                    writer.writerow([str(mse), result])

    def wrap_up(self, mode):
        if mode == 'T':
            self.model.save(self.model_path)
        elif mode == 'S':
            self.set_threshold()
min_loss = sys.maxsize
it = 0
losses_per_it = []
losses_per_epoch = []
loss = 0

#
# resume the training, if the program is
# started with the argument RESUME, the state file
#
if len(sys.argv) == 2 and sys.argv[1] == "RESUME":
    if not os.path.exists(STATE):
        raise Exception(
            "Impossible to resume training. The previous state is not available"
        )
    model.load()
    with open(STATE, "r") as f:
        lines = f.readlines()
        start_epoch = int(lines[0])
        min_loss = float(lines[1])
        for x in lines[2].split(","):
            try:
                losses_per_epoch.append(float(x))
            except Exception:
                continue

#
# training loop
#
try:
    for e in range(start_epoch, EPOCHS):