Пример #1
0
    def test_train_and_load_embedding(self):
        test_dataset = tf.contrib.data.Dataset.from_tensor_slices(
            ['passj', 'word', 'db'])

        with tempfile.NamedTemporaryFile(mode='w') as ofile:
            config = pe.EmbeddingConfig(alphabet='abcdefghijklmnopqrstuvwxyz',
                                        password_batch=5,
                                        embedding_window_size=1,
                                        batch_size=2,
                                        embedding_size=4,
                                        embedding_num_neg_samples=2,
                                        logging_freq=1,
                                        num_train_epochs=1)
            emb_trainer = pe.EmbeddingTrainer(config)
            with self.test_session() as session:
                emb_trainer.train_and_save(test_dataset, session, ofile)

            ofile.flush()

            with open(ofile.name, 'r') as ifile:
                loader = pe.CharEmbeddingLoader(config)
                output = loader.read_from_file(ifile)

                self.assertEqual(26, len(output))
                for _, value in output.items():
                    self.assertEqual(4, len(value))
Пример #2
0
    def test_skipgram_randomize(self):
        test_dataset = tf.contrib.data.Dataset.from_tensor_slices(
            ['passj', 'word', 'db'])

        config = pe.EmbeddingConfig(alphabet='abcdefghijklmnopqrstuvwxyz',
                                    password_batch=5,
                                    batch_size=10,
                                    embedding_window_size=3)
        emb_trainer = pe.EmbeddingTrainer(config)
        examples, labels = emb_trainer.skipgram(test_dataset, randomize=True)

        with self.test_session() as session:
            session.run([tf.tables_initializer()])
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(coord=coord)
            try:
                session.run([examples, labels])

            except tf.errors.OutOfRangeError:
                pass

            finally:
                coord.request_stop()

            coord.join(threads)
Пример #3
0
 def test_graph_builds(self):
     config = pe.EmbeddingConfig(alphabet='abcdefghijklmnopqrstuvwxyz',
                                 password_batch=5,
                                 embedding_window_size=3)
     test_dataset = tf.contrib.data.Dataset.from_tensor_slices(
         ['passj', 'word', 'db'])
     emb_trainer = pe.EmbeddingTrainer(config)
     with self.test_session() as session:
         emb_trainer.build_graph(test_dataset, session)
Пример #4
0
    def test_train_loop(self):
        config = pe.EmbeddingConfig(alphabet='abcdefghijklmnopqrstuvwxyz',
                                    password_batch=5,
                                    embedding_window_size=1,
                                    batch_size=2,
                                    embedding_size=4,
                                    embedding_num_neg_samples=2,
                                    logging_freq=1,
                                    num_train_epochs=1)
        test_dataset = tf.contrib.data.Dataset.from_tensor_slices(
            ['passj', 'word', 'db'])
        emb_trainer = pe.EmbeddingTrainer(config)
        with self.test_session() as session:
            answer = emb_trainer.train(test_dataset, session)

        self.assertAllEqual([26, 4], answer.shape)
Пример #5
0
    def test_train_save(self):
        with tempfile.NamedTemporaryFile(mode='w') as ofile:
            config = pe.EmbeddingConfig(alphabet='abcdefghijklmnopqrstuvwxyz',
                                        password_batch=5,
                                        embedding_window_size=1,
                                        batch_size=2,
                                        embedding_size=4,
                                        embedding_num_neg_samples=2,
                                        logging_freq=1,
                                        num_train_epochs=1)
            test_dataset = tf.contrib.data.Dataset.from_tensor_slices(
                ['passj', 'word', 'db'])

            emb_trainer = pe.EmbeddingTrainer(config)
            with self.test_session() as session:
                emb_trainer.train_and_save(test_dataset, session, ofile)

            ofile.flush()
Пример #6
0
    def test_initial_counts_partial_batch(self):
        test_dataset = tf.contrib.data.Dataset.from_tensor_slices(
            ['pass', 'word', 'db'])

        config = pe.EmbeddingConfig(alphabet='abcdefghijklmnopqrstuvwxyz',
                                    password_batch=1,
                                    embedding_window_size=1)
        emb_trainer = pe.EmbeddingTrainer(config)
        with self.test_session() as session:
            counts, samples = emb_trainer.initial_counts(test_dataset, session)

        #         a  b  c  d  e  f  g  h  i  j  k  l  m  n  o  p  q  r  s  t  u  v
        expect = [
            1,
            1,
            0,
            2,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            1,
            1,
            0,
            1,
            2,
            0,
            0,
            0,
            #         w  x  y  z
            1,
            0,
            0,
            0
        ]

        self.assertAllEqual(expect, counts)
        self.assertEqual(7, samples)
Пример #7
0
def main(args):
    if not args.input_file or not args.output_file:
        sys.stderr.write('--input-file and --output-file are required\n')
        return

    FORMAT = '%(asctime)s %(levelname)s %(message)s'
    logging.basicConfig(level=logging.INFO, format=FORMAT)

    import pass_embedding as pe
    import pass_dataset as pd

    if args.help_config:
        sys.stdout.write(pe.EmbeddingConfig.__init__.__doc__ + "\n")
        return

    logging.info('Called with %s', vars(args))

    if args.config is not None:
        logging.info('Reading configuration from %s', args.config)
        try:
            config = pe.EmbeddingConfig.from_config_file(args.config)

        except pe.ConfigurationException as e:
            logging.fatal('Error while reading configuration: %s', str(e))
            raise

        except ValueError as e:
            logging.fatal('Error while reading configuration: %s', str(e))
            raise

    else:
        config = pe.EmbeddingConfig()

    dataset = pd.PasswordDatasetMakerFromFile([args.input_file]).make()
    with tf.Graph().as_default():
        with tf.Session() as session:
            trainer = pe.EmbeddingTrainer(
              config, tensorboard_logdir=args.tensorboard_logdir)
            trainer.train_and_save(dataset, session, args.output_file)