Пример #1
0
def _init(logdir, train_mode, *, sampling_seed=None, _cfg):
    set_random_seed(_cfg.get('random_seed'))

    encoding = _cfg['encoding'].configure()
    model = _cfg['model'].configure(RNNSeq2Seq,
                                    train_mode=train_mode,
                                    vocabulary=encoding.vocabulary,
                                    sampling_seed=sampling_seed)
    trainer = _cfg['trainer'].configure(BasicTrainer,
                                        dataset_manager=model.dataset_manager,
                                        training_ops=model.training_ops,
                                        logdir=logdir,
                                        write_summaries=train_mode)

    if train_mode:
        # Configure the dataset manager with the training and validation data.
        _cfg['data_prep'].configure(
            prepare_train_and_val_data,
            dataset_manager=model.dataset_manager,
            train_generator=_cfg['train_data'].configure(_load_data,
                                                         encoding=encoding),
            val_generator=_cfg['val_data'].configure(_load_data,
                                                     encoding=encoding),
            output_types=(tf.int32, tf.int32, tf.int32),
            output_shapes=([None], [None], [None]))

    return model, trainer, encoding
    def __init__(self, logdir, train_mode, sampling_seed=None):
        random_seed = self._cfg.get('random_seed', None)
        set_random_seed(random_seed)
        self.logdir = logdir

        self.input_encoding = self._cfg['input_encoding'].configure()
        self.output_encoding = self._cfg['output_encoding'].configure()

        num_rows = getattr(self.input_encoding, 'num_rows', None)
        self.input_shapes = (([num_rows, None] if num_rows else [None]),
                             [None], [None], [None])
        self.input_types = (tf.float32 if num_rows else tf.int32, tf.int32,
                            tf.int32, tf.int32)
        self.dataset_manager = DatasetManager(
            output_types=self.input_types,
            output_shapes=tuple([None, *shape] for shape in self.input_shapes))

        self.model = self._cfg['model'].configure(
            Model,
            dataset_manager=self.dataset_manager,
            train_mode=train_mode,
            vocabulary=self.output_encoding.vocabulary,
            sampling_seed=sampling_seed)

        self._load_checkpoint = self._cfg.get('load_checkpoint', None)
        if self._load_checkpoint and self.model.training_ops is not None:
            self.model.training_ops.init_op = ()

        self.trainer = self._cfg['trainer'].configure(
            BasicTrainer,
            session=tf.Session(),
            dataset_manager=self.dataset_manager,
            training_ops=self.model.training_ops,
            logdir=logdir,
            write_summaries=train_mode)

        if train_mode:
            # Configure the dataset manager with the training and validation data.
            train_loader = self._cfg['train_data'].configure(
                TrainLoader, random_seed=random_seed)
            val_loader = self._cfg['val_data'].configure(
                TrainLoader, random_seed=random_seed, reseed=True)
            self._cfg['data_prep'].configure(
                prepare_train_and_val_data,
                dataset_manager=self.dataset_manager,
                train_generator=self._load_data(train_loader, training=True),
                val_generator=self._load_data(val_loader),
                output_types=self.input_types,
                output_shapes=self.input_shapes)
Пример #3
0
    def __init__(self, logdir, train_mode):
        set_random_seed(self._cfg.get('random_seed', None))

        self.input_encoding = self._cfg['input_encoding'].configure()
        self.output_encoding = self._cfg['output_encoding'].configure()
        #with open(self._cfg.get('style_list')) as f:
        with open("./seq2seq/data/parallel/styles") as f:
            style_list = [line.rstrip('\n') for line in f]
        self.style_vocabulary = Vocabulary(style_list,
                                           pad_token=None,
                                           start_token=None,
                                           end_token=None)

        self.input_shapes = ([self.input_encoding.num_rows,
                              None], [], [None], [None])
        self.input_types = (tf.float32, tf.int32, tf.int32, tf.int32)
        self.dataset_manager = DatasetManager(
            output_types=self.input_types,
            output_shapes=tuple([None, *shape] for shape in self.input_shapes))

        self.model = self._cfg['model'].configure(
            CNNRNNSeq2Seq,
            dataset_manager=self.dataset_manager,
            train_mode=train_mode,
            vocabulary=self.output_encoding.vocabulary,
            style_vocabulary=self.style_vocabulary)
        self.trainer = self._cfg['trainer'].configure(
            BasicTrainer,
            dataset_manager=self.dataset_manager,
            training_ops=self.model.training_ops,
            logdir=logdir,
            write_summaries=train_mode)

        self._load_data_kwargs = dict(input_encoding=self.input_encoding,
                                      output_encoding=self.output_encoding,
                                      style_vocabulary=self.style_vocabulary)

        if train_mode:
            # Configure the dataset manager with the training and validation data.
            self._cfg['data_prep'].configure(
                prepare_train_and_val_data,
                dataset_manager=self.dataset_manager,
                train_generator=self._cfg['train_data'].configure(
                    load_data, log=True, **self._load_data_kwargs),
                val_generator=self._cfg['val_data'].configure(
                    load_data, **self._load_data_kwargs),
                output_types=self.input_types,
                output_shapes=self.input_shapes)
def _init(cfg, logdir, train_mode, **kwargs):
    set_random_seed(cfg.get('random_seed', None))

    encoding = cfg['encoding'].configure()
    with open(cfg.get('style_list')) as f:
        style_list = [line.rstrip('\n') for line in f]
    style_vocabulary = Vocabulary(style_list,
                                  pad_token=None,
                                  start_token=None,
                                  end_token=None)

    model = cfg['model'].configure(RNNSeq2Seq,
                                   train_mode=train_mode,
                                   vocabulary=encoding.vocabulary,
                                   style_vocabulary=style_vocabulary)
    trainer = cfg['trainer'].configure(BasicTrainer,
                                       dataset_manager=model.dataset_manager,
                                       training_ops=model.training_ops,
                                       logdir=logdir,
                                       write_summaries=train_mode)

    if train_mode:
        # Configure the dataset manager with the training and validation data.
        load_data_kwargs = dict(input_encoding=encoding,
                                output_encoding=encoding,
                                style_vocabulary=style_vocabulary)
        cfg['data_prep'].configure(
            prepare_train_and_val_data,
            dataset_manager=model.dataset_manager,
            train_generator=cfg['train_data'].configure(load_data,
                                                        log=True,
                                                        **load_data_kwargs),
            val_generator=cfg['val_data'].configure(load_data,
                                                    **load_data_kwargs),
            output_types=(tf.int32, tf.int32, tf.int32, tf.int32),
            output_shapes=([None], [], [None], [None]))

    return model, trainer, encoding, style_vocabulary