def __init__(self, config, **kwargs): super().__init__(config, **kwargs) self.config = config self.embedding_layer = EmbeddingSharedWeights.get_from_graph() if config.editor.word_dim != config.editor.edit_encoder.extractor.hidden_size: self.embedding_layer = self.embedding_layer.get_projected( config.editor.edit_encoder.extractor.hidden_size) self.micro_ev_projection = tf.layers.Dense( config.editor.edit_encoder.micro_ev_dim, activation=config.editor.edit_encoder.get('mev_proj_activation_fn', None), use_bias=True, name='micro_ev_proj') self.edit_vector_projection = tf.layers.Dense( config.editor.edit_encoder.edit_dim, activation=config.editor.edit_encoder.get( 'edit_vector_proj_activation_fn', None), use_bias=False, name='encoder_ev') extractor_config = Config.merge_to_new( [config.editor.transformer, config.editor.edit_encoder.extractor]) extractor_config.put('save_attentions', config.get('eval.save_attentions', False)) self.mev_extractor = TransformerMicroEditExtractor( self.embedding_layer, self.micro_ev_projection, extractor_config)
def __init__(self, config, **kwargs): super().__init__(**kwargs) self.config = Config.merge_to_new( [config.editor.transformer, config.editor.decoder]) # change hidden_size of transformer to match with the augmented input self.config.put('orig_hidden_size', self.config.hidden_size) self.config.put( 'hidden_size', self.config.hidden_size + config.editor.edit_encoder.edit_dim) # Project embedding to transformer's hidden_size if needed embedding_layer = EmbeddingSharedWeights.get_from_graph() self.vocab_size = embedding_layer.vocab_size if config.editor.word_dim != self.config.orig_hidden_size: self.embedding_layer = embedding_layer.get_projected( self.config.orig_hidden_size) else: self.embedding_layer = embedding_layer # As far as EmbeddingSharedWeights class supports linear projection on embeddings, # we will use it to compute the model's logits self.project_back = tf.layers.Dense(config.editor.word_dim, activation=None, name='project_back') self.vocab_projection = embedding_layer # Transformer stack self.decoder_stack = MultiSourceDecoderStack(self.config.to_json(), graph_utils.is_training())
def __init__(self, config: Config): self.config = config encoder_config = Config.merge_to_new( [config.editor.transformer, config.editor.encoder]) self.encoder = TransformerEncoder(encoder_config, name='encoder') self.edit_encoder = EditEncoder(config) self.decoder = Decoder(config)
def __init__(self, config, sent_encoder, **kwargs): super().__init__(config, **kwargs) extractor_config = Config.merge_to_new( [config.editor.transformer, config.editor.edit_encoder.extractor]) extractor_config.put('save_attentions', config.get('eval.save_attentions', False)) self.mev_extractor = TransformerMicroEditExtractor( self.embedding_layer, self.micro_ev_projection, sent_encoder, extractor_config)
def __init__(self, config, data_dir, checkpoint=None): self._data_dir = Path(data_dir) self._checkpoint = checkpoint self._config = Config.from_file(config) self._path_model_dir() self._put_epoch_num() self._config.put('local_data_dir', self._data_dir) tf.logging.info("Model: " + self.model.NAME) tf.logging.info("Config:") tf.logging.info(str(self._config)) print()
def __init__(self, embedding_layer, mev_projection, params, **kwargs): super().__init__(**kwargs) self.params = params is_training = graph_utils.is_training() encoder_config = Config.merge_to_new([params, params.encoder]) decoder_config = Config.merge_to_new([params, params.decoder]) self.target_encoder = EncoderStack(encoder_config.to_json(), is_training, params.save_attentions) self.mev_decoder = DecoderStack(decoder_config.to_json(), is_training, params.save_attentions) self.embedding_layer = embedding_layer self.mev_projection = mev_projection self.cls_tok_embedding = self.add_weight('cls_tok_embedding', (self.params.hidden_size, ), dtype=tf.float32, trainable=True) self.pooling_layer = tf.layers.Dense(self.params.hidden_size, activation='tanh', name='pooling_layer')