示例#1
0
 def __init__(self, dy_model, input_dim, output_dim):
     self.L = Linear(input_dim,
                     output_dim,
                     dy_model,
                     bias=False,
                     param_init=LeCunUniformInitializer(),
                     bias_init=LeCunUniformInitializer())
     self.output_dim = output_dim
示例#2
0
    def __init__(self,
                 policy_network=None,
                 baseline=None,
                 z_normalization=True,
                 conf_penalty=None,
                 sample=1,
                 weight=1.0,
                 use_baseline=True,
                 input_dim=Ref("exp_global.default_layer_dim"),
                 output_dim=2,
                 param_init=Ref("exp_global.param_init",
                                default=bare(GlorotInitializer)),
                 bias_init=Ref("exp_global.bias_init",
                               default=bare(ZeroInitializer))):
        self.input_dim = input_dim
        self.policy_network = self.add_serializable_component(
            "policy_network", policy_network,
            lambda: Linear(input_dim=self.input_dim,
                           output_dim=output_dim,
                           param_init=param_init,
                           bias_init=bias_init))
        if use_baseline:
            self.baseline = self.add_serializable_component(
                "baseline", baseline, lambda: Linear(input_dim=self.input_dim,
                                                     output_dim=1,
                                                     param_init=param_init,
                                                     bias_init=bias_init))
        else:
            self.baseline = None

        self.confidence_penalty = self.add_serializable_component(
            "conf_penalty", conf_penalty,
            lambda: conf_penalty) if conf_penalty is not None else None
        self.z_normalization = z_normalization
        self.sample = sample
        self.weight = weight
示例#3
0
 def __init__(self,
              dec_layers=1,
              enc_dim=Ref("exp_global.default_layer_dim"),
              dec_dim=Ref("exp_global.default_layer_dim"),
              param_init=Ref("exp_global.param_init",
                             default=bare(GlorotInitializer)),
              bias_init=Ref("exp_global.bias_init",
                            default=bare(ZeroInitializer)),
              projector=None):
     self.dec_layers = dec_layers
     self.enc_dim = enc_dim
     self.dec_dim = dec_dim
     self.projector = self.add_serializable_component(
         "projector", projector, lambda: Linear(input_dim=self.enc_dim,
                                                output_dim=self.dec_dim,
                                                param_init=param_init,
                                                bias_init=bias_init))
示例#4
0
    def __init__(self,
                 evaluation_metric=None,
                 sample_length=50,
                 use_baseline=False,
                 inv_eval=True,
                 decoder_hidden_dim=Ref("exp_global.default_layer_dim"),
                 baseline=None):
        self.use_baseline = use_baseline
        self.inv_eval = inv_eval
        if evaluation_metric is None:
            self.evaluation_metric = xnmt.evaluator.FastBLEUEvaluator(ngram=4,
                                                                      smooth=1)
        else:
            self.evaluation_metric = evaluation_metric

        if self.use_baseline:
            self.baseline = self.add_serializable_component(
                "baseline", baseline,
                lambda: Linear(input_dim=decoder_hidden_dim, output_dim=1))
示例#5
0
    def __init__(self,
                 word_vocab=None,
                 ngram_size=4,
                 src_vocab=Ref(Path("model.src_reader.vocab")),
                 hidden_dim=Ref("exp_global.default_layer_dim"),
                 word_ngram=None,
                 vocab_size=None):
        super().__init__()
        if word_vocab is None:
            word_vocab = Vocab()
            dict_entry = vocab_size
        else:
            word_vocab.freeze()
            word_vocab.set_unk(word_vocab.UNK_STR)
            dict_entry = len(word_vocab)

        self.dict_entry = dict_entry
        self.src_vocab = src_vocab
        self.word_vocab = word_vocab
        self.ngram_size = ngram_size
        self.word_ngram = self.add_serializable_component(
            "word_ngram", word_ngram,
            lambda: Linear(input_dim=dict_entry, output_dim=hidden_dim))
示例#6
0
    def __init__(self,
                 input_dim: int = Ref("exp_global.default_layer_dim"),
                 vocab_size: Optional[int] = None,
                 vocab: Optional[vocab.Vocab] = None,
                 trg_reader: Optional[input_reader.InputReader] = Ref(
                     "model.trg_reader", default=None),
                 label_smoothing: float = 0.0,
                 param_init: ParamInitializer = Ref(
                     "exp_global.param_init", default=bare(GlorotInitializer)),
                 bias_init: ParamInitializer = Ref(
                     "exp_global.bias_init", default=bare(ZeroInitializer)),
                 output_projector: Linear = None) -> None:
        self.param_col = ParamManager.my_params(self)
        self.input_dim = input_dim
        self.output_dim = self._choose_vocab_size(vocab_size, vocab,
                                                  trg_reader)
        self.label_smoothing = label_smoothing

        self.output_projector = self.add_serializable_component(
            "output_projector", output_projector,
            lambda: output_projector or Linear(input_dim=self.input_dim,
                                               output_dim=self.output_dim,
                                               param_init=param_init,
                                               bias_init=bias_init))
示例#7
0
    def setUp(self):
        # Seeding
        numpy.random.seed(2)
        random.seed(2)
        layer_dim = 64
        xnmt.events.clear()
        ParamManager.init_param_col()
        self.segment_encoder_bilstm = BiLSTMSeqTransducer(input_dim=layer_dim,
                                                          hidden_dim=layer_dim)
        self.segment_composer = SumComposer()
        self.src_reader = CharFromWordTextReader()
        self.trg_reader = PlainTextReader()
        self.loss_calculator = AutoRegressiveMLELoss()

        baseline = Linear(input_dim=layer_dim, output_dim=1)
        policy_network = Linear(input_dim=layer_dim, output_dim=2)
        self.poisson_prior = PoissonPrior(mu=3.3)
        self.eps_greedy = EpsilonGreedy(eps_prob=0.0, prior=self.poisson_prior)
        self.conf_penalty = ConfidencePenalty()
        self.policy_gradient = PolicyGradient(input_dim=layer_dim,
                                              output_dim=2,
                                              baseline=baseline,
                                              policy_network=policy_network,
                                              z_normalization=True,
                                              conf_penalty=self.conf_penalty,
                                              sample=5)
        self.length_prior = PoissonLengthPrior(lmbd=3.3, weight=1)
        self.segmenting_encoder = SegmentingSeqTransducer(
            embed_encoder=self.segment_encoder_bilstm,
            segment_composer=self.segment_composer,
            final_transducer=BiLSTMSeqTransducer(input_dim=layer_dim,
                                                 hidden_dim=layer_dim),
            policy_learning=self.policy_gradient,
            eps_greedy=self.eps_greedy,
            length_prior=self.length_prior,
        )

        self.model = DefaultTranslator(
            src_reader=self.src_reader,
            trg_reader=self.trg_reader,
            src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
            encoder=self.segmenting_encoder,
            attender=MlpAttender(input_dim=layer_dim,
                                 state_dim=layer_dim,
                                 hidden_dim=layer_dim),
            trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
            decoder=AutoRegressiveDecoder(
                input_dim=layer_dim,
                rnn=UniLSTMSeqTransducer(input_dim=layer_dim,
                                         hidden_dim=layer_dim,
                                         decoder_input_dim=layer_dim,
                                         yaml_path="decoder"),
                transform=AuxNonLinear(input_dim=layer_dim,
                                       output_dim=layer_dim,
                                       aux_input_dim=layer_dim),
                scorer=Softmax(vocab_size=100, input_dim=layer_dim),
                trg_embed_dim=layer_dim,
                bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)),
        )
        self.model.set_train(True)

        self.layer_dim = layer_dim
        self.src_data = list(
            self.model.src_reader.read_sents("examples/data/head.ja"))
        self.trg_data = list(
            self.model.trg_reader.read_sents("examples/data/head.en"))
        my_batcher = xnmt.batcher.TrgBatcher(batch_size=3,
                                             src_pad_token=1,
                                             trg_pad_token=2)
        self.src, self.trg = my_batcher.pack(self.src_data, self.trg_data)
        dy.renew_cg(immediate_compute=True, check_validity=True)