Exemplo n.º 1
0
    def build_model(cls, args, task):
        """Build a new model instance."""
        # make sure that all args are properly defaulted (in case there are any new ones)
        base_architecture(args)

        decoder_embed_dict = None
        if args.decoder_embed_path:
            decoder_embed_dict = utils.parse_embedding(args.decoder_embed_path)
            utils.print_embed_overlap(decoder_embed_dict,
                                      task.target_dictionary)

        out_channels = speech_utils.eval_str_nested_list_or_tuple(
            args.encoder_conv_channels, type=int)
        kernel_sizes = speech_utils.eval_str_nested_list_or_tuple(
            args.encoder_conv_kernel_sizes, type=int)
        strides = speech_utils.eval_str_nested_list_or_tuple(
            args.encoder_conv_strides, type=int)
        logger.info('input feature dimension: {}, channels: {}'.format(
            task.feat_dim, task.feat_in_channels))
        assert task.feat_dim % task.feat_in_channels == 0
        conv_layers = ConvBNReLU(
            out_channels,
            kernel_sizes,
            strides,
            in_channels=task.feat_in_channels,
        ) if out_channels is not None else None

        fconv_encoder_input_size = task.feat_dim // task.feat_in_channels
        if conv_layers is not None:
            for stride in strides:
                if isinstance(stride, (list, tuple)):
                    assert len(stride) > 0
                    s = stride[1] if len(stride) > 1 else stride[0]
                else:
                    assert isinstance(stride, int)
                    s = stride
                fconv_encoder_input_size = (fconv_encoder_input_size + s -
                                            1) // s
            fconv_encoder_input_size *= out_channels[-1]

        encoder = SpeechFConvEncoder(
            conv_layers_before=conv_layers,
            input_size=fconv_encoder_input_size,
            embed_dim=args.encoder_embed_dim,
            convolutions=eval(args.encoder_layers),
            dropout=args.dropout,
        )
        decoder = SpeechFConvDecoder(
            dictionary=task.target_dictionary,
            embed_dim=args.decoder_embed_dim,
            embed_dict=decoder_embed_dict,
            convolutions=eval(args.decoder_layers),
            out_embed_dim=args.decoder_out_embed_dim,
            attention=eval(args.decoder_attention),
            dropout=args.dropout,
            max_positions=args.max_target_positions,
            share_embed=args.share_input_output_embed,
            positional_embeddings=args.decoder_positional_embed,
        )
        return cls(encoder, decoder)
Exemplo n.º 2
0
 def load_pretrained_embedding_from_file(embed_path, dictionary, embed_dim):
     num_embeddings = len(dictionary)
     padding_idx = dictionary.pad()
     embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx)
     embed_dict = utils.parse_embedding(embed_path)
     utils.print_embed_overlap(embed_dict, dictionary)
     return utils.load_embedding(embed_dict, dictionary, embed_tokens)
Exemplo n.º 3
0
    def build_model(cls, args, src_dict, dst_dict):
        """Build a new model instance."""

        encoder_embed_dict = None
        if args.encoder_embed_path:
            encoder_embed_dict = utils.parse_embedding(args.encoder_embed_path)
            utils.print_embed_overlap(encoder_embed_dict, src_dict)

        decoder_embed_dict = None
        if args.decoder_embed_path:
            decoder_embed_dict = utils.parse_embedding(args.decoder_embed_path)
            utils.print_embed_overlap(decoder_embed_dict, dst_dict)

        encoder = LSTMEncoder(
            src_dict,
            embed_dim=args.encoder_embed_dim,
            embed_dict=encoder_embed_dict,
            num_layers=args.encoder_layers,
            dropout_in=args.encoder_dropout_in,
            dropout_out=args.encoder_dropout_out,
        )
        decoder = LSTMDecoder(
            dst_dict,
            encoder_embed_dim=args.encoder_embed_dim,
            embed_dim=args.decoder_embed_dim,
            embed_dict=decoder_embed_dict,
            out_embed_dim=args.decoder_out_embed_dim,
            num_layers=args.decoder_layers,
            attention=bool(eval(args.decoder_attention)),
            dropout_in=args.decoder_dropout_in,
            dropout_out=args.decoder_dropout_out,
        )
        return cls(encoder, decoder)
Exemplo n.º 4
0
    def build_model(cls, args, task):
        """Build a new model instance."""
        # make sure that all args are properly defaulted (in case there are any new ones)
        base_architecture(args)

        encoder_embed_dict = None
        if args.encoder_embed_path:
            encoder_embed_dict = utils.parse_embedding(args.encoder_embed_path)
            utils.print_embed_overlap(encoder_embed_dict, task.source_dictionary)

        decoder_embed_dict = None
        if args.decoder_embed_path:
            decoder_embed_dict = utils.parse_embedding(args.decoder_embed_path)
            utils.print_embed_overlap(decoder_embed_dict, task.target_dictionary)

        encoder = FConvEncoder(
            dictionary=task.source_dictionary,
            embed_dim=args.encoder_embed_dim,
            embed_dict=encoder_embed_dict,
            convolutions=eval(args.encoder_layers),
            dropout=args.dropout,
            max_positions=args.max_source_positions,
        )
        decoder = FConvDecoder(
            dictionary=task.target_dictionary,
            embed_dim=args.decoder_embed_dim,
            embed_dict=decoder_embed_dict,
            convolutions=eval(args.decoder_layers),
            out_embed_dim=args.decoder_out_embed_dim,
            attention=eval(args.decoder_attention),
            dropout=args.dropout,
            max_positions=args.max_target_positions,
            share_embed=args.share_input_output_embed,
        )
        return FConvModel(encoder, decoder)
Exemplo n.º 5
0
    def build_model(cls, args, task):
        """Build a new model instance."""
        # make sure that all args are properly defaulted (in case there are any new ones)
        base_architecture(args)


        encoder_embed_dict = None
        if args.encoder_embed_path:
            encoder_embed_dict = utils.parse_embedding(args.encoder_embed_path)
            utils.print_embed_overlap(encoder_embed_dict, task.source_dictionary)

        decoder_embed_dict = None
        if args.decoder_embed_path:
            decoder_embed_dict = utils.parse_embedding(args.decoder_embed_path)
            utils.print_embed_overlap(decoder_embed_dict, task.target_dictionary)

        kw_embed_dict = None
        if args.keywords_embed_path:
            kw_embed_dict = utils.parse_embedding(args.keywords_embed_path)
            print(">> Keywords embeddings loaded!")

        docEncoder = ExtendedEncoder(args,
          FConvEncoder(
            dictionary=task.source_dictionary,
            embed_dim=args.encoder_embed_dim,
            embed_dict=encoder_embed_dict,
            convolutions=eval(args.encoder_layers),
            dropout=args.dropout,
            max_positions=args.max_source_positions,
            normalization_constant=args.normalization_constant,
        ))
        decoder = CondFConvDecoder(
            dictionary=task.target_dictionary,
            embed_dim=args.decoder_embed_dim,
            embed_dict=decoder_embed_dict,
            convolutions=eval(args.decoder_layers),
            out_embed_dim=args.decoder_out_embed_dim,
            attention=eval(args.decoder_attention),
            dropout=args.dropout,
            max_positions=args.max_tgt_sentence_length,
            share_embed=args.share_input_output_embed,
            normalization_constant=args.normalization_constant,
        )

        docDecoder = LDocumentDecoder(args, decoder,
                                      embed_dim=args.decoder_embed_dim,
                                      hidden_size=args.decoder_embed_dim,
                                      out_embed_dim=args.decoder_out_embed_dim,
                                      encoder_embed_dim = args.encoder_embed_dim,
                                      encoder_output_units=args.encoder_embed_dim,
                                      num_layers=1,
                                      attention=eval(args.decoder_attention),
                                      dropout_in = 0.1, dropout_out=0.1,
                                      pretrained_embed = None,
                                      )

        return FEncFairseqModel(docEncoder, docDecoder)
Exemplo n.º 6
0
    def build_model(cls, args, task):
        """Build a new model instance."""
        # make sure that all args are properly defaulted (in case there are any new ones)
        base_architecture(args)

        encoder_embed_dict = None
        if args.encoder_embed_path:
            encoder_embed_dict = utils.parse_embedding(args.encoder_embed_path)
            utils.print_embed_overlap(encoder_embed_dict,
                                      task.source_dictionary)

        decoder_embed_dict = None
        if args.decoder_embed_path:
            decoder_embed_dict = utils.parse_embedding(args.decoder_embed_path)
            utils.print_embed_overlap(decoder_embed_dict,
                                      task.target_dictionary)

        conv_encoder = FConvEncoder3(
            dictionary=task.source_dictionary,
            embed_dim=args.encoder_embed_dim,
            embed_dict=encoder_embed_dict,
            convolutions=eval(args.encoder_layers),
            dropout=args.dropout,
            max_positions=args.max_source_positions,
            normalization_constant=args.normalization_constant,
        )
        gcn_encoder = GCNEncoder2(
            dictionary=task.source_dictionary,
            dropout=args.dropout,
            num_inputs=args.gcn_num_inputs,
            num_units=args.gcn_num_units,
            num_labels=args.gcn_num_labels,
            num_layers=args.gcn_num_layers,
            in_arcs=args.gcn_in_arcs,
            out_arcs=args.gcn_out_arcs,
            batch_first=args.gcn_batch_first,
            residual=args.gcn_residual,
            use_gates=args.gcn_use_gates,
            use_glus=args.gcn_use_glus,
            normalization_constant=args.normalization_constant,
        )
        encoder = FConvGCNOnTopEncoder(task.source_dictionary, conv_encoder,
                                       gcn_encoder)
        decoder = FConvDecoder(
            dictionary=task.target_dictionary,
            embed_dim=args.decoder_embed_dim,
            embed_dict=decoder_embed_dict,
            convolutions=eval(args.decoder_layers),
            out_embed_dim=args.decoder_out_embed_dim,
            attention=eval(args.decoder_attention),
            dropout=args.dropout,
            max_positions=args.max_target_positions,
            share_embed=args.share_input_output_embed,
            normalization_constant=args.normalization_constant,
        )
        return FConvGCNOnTopModel(encoder, decoder)
Exemplo n.º 7
0
 def build_embedding(dictionary, embed_dim, path=None):
     num_embeddings = len(dictionary)
     padding_idx = dictionary.pad()
     emb = Embedding(num_embeddings, embed_dim, padding_idx)
     # if provided, load from preloaded dictionaries
     if path:
         embed_dict = utils.parse_embedding(path)
         utils.load_embedding(embed_dict, dictionary, emb)
         utils.print_embed_overlap(embed_dict, dictionary)
     return emb
Exemplo n.º 8
0
def load_pretrained_embedding_from_file(embed_path, dictionary, embed_dim):
    num_embedding = len(dictionary)
    padding_idx = dictionary.pad()
    embed_tokens = Embedding(num_embedding, embed_dim, padding_idx)
    embed_dict = utils.parse_embedding(embed_path)
    utils.print_embed_overlap(embed_dict, dictionary)
    # embed_keys = set(embed_dict.keys())
    # vocab_keys = set(dictionary.symbols))
    # print(vocab_keys - embed_keys)
    return utils.load_embedding(embed_dict, dictionary,
                                embed_tokens), embed_dict
Exemplo n.º 9
0
    def build_model(cls, args, task):
        """Build a new model instance."""
        # make sure that all args are properly defaulted (in case there are any new ones)
        base_architecture(args)

        if args.merge_mode not in ['concat', 'sum']:
            raise ValueError('Invalid merge mode. '
                             'Merge mode should be one of '
                             '{"sum", "concat"}')
        if args.encoder_embed_dim != args.decoder_embed_dim:
            raise ValueError('encoder embedding dimension '
                             'should be equal to '
                             'decoder embedding dimension')
        if args.merge_mode == 'sum' and args.token_embed_dim != args.encoder_embed_dim:
            raise ValueError('token embedding dimension '
                             'should be equal to '
                             'encoder embedding dimension')

        encoder_embed_dict = None
        if args.encoder_embed_path:
            encoder_embed_dict = utils.parse_embedding(args.encoder_embed_path)
            utils.print_embed_overlap(encoder_embed_dict,
                                      task.source_dictionary)

        decoder_embed_dict = None
        if args.decoder_embed_path:
            decoder_embed_dict = utils.parse_embedding(args.decoder_embed_path)
            utils.print_embed_overlap(decoder_embed_dict,
                                      task.target_dictionary)

        encoder = FConvEncoder(
            dictionary=task.source_dictionary,
            args=args,
            encoder_embed_dim=args.encoder_embed_dim,
            embed_dict=encoder_embed_dict,
            convolutions=eval(args.encoder_layers),
            dropout=args.dropout,
            max_positions=args.max_source_positions,
        )
        decoder = FConvDecoder(
            dictionary=task.target_dictionary,
            args=args,
            decoder_embed_dim=args.decoder_embed_dim,
            embed_dict=decoder_embed_dict,
            convolutions=eval(args.decoder_layers),
            out_embed_dim=args.decoder_out_embed_dim,
            attention=eval(args.decoder_attention),
            dropout=args.dropout,
            max_positions=args.max_target_positions,
            share_embed=args.share_input_output_embed,
        )
        return FConvModel(encoder, decoder)
Exemplo n.º 10
0
def copy_prev_embedding(embed_path, dictionary, embed_dim, prev_embedded_tokens_path, prev_dict):
	num_embeddings = len(dictionary)
	padding_idx = dictionary.pad()
	embed_tokens = nn.Embedding(num_embeddings, embed_dim, padding_idx)
	prev_embedded_tokens = load_random_embedding(prev_embedded_tokens_path)
	for i in range(5, num_embeddings):
		if prev_dict.index(dictionary.symbols[i])!= prev_dict.unk() and i!=dictionary.unk():
			embed_tokens.weight.data[i] = prev_embedded_tokens[prev_dict.index(dictionary.symbols[i])]

	#embed_tokens.weight = nn.Parameter(prev_embedded_tokens)
	embed_dict = utils.parse_embedding(embed_path)
	utils.print_embed_overlap(embed_dict, dictionary)
	return utils.load_embedding(embed_dict, dictionary, embed_tokens)
Exemplo n.º 11
0
    def build_model(cls, args, task):
        """Build a new model instance."""
        # make sure that all args are properly defaulted (in case there are any new ones)
        base_architecture(args)

        attention_layer = {
            'dot': DotAttentionLayer,
            'general': GeneralAttentionLayer,
            'multi-head': MultiheadAttention,
            'mlp': MLPAttentionLayer,
        }

        decoder_embed_dict = None
        if args.decoder_embed_path:
            decoder_embed_dict = utils.parse_embedding(args.decoder_embed_path)
            utils.print_embed_overlap(decoder_embed_dict,
                                      task.target_dictionary)

        # replaced source_dict with target_dict
        encoder = ASTS2SEncoder(
            args=args,
            linear_dim=args.encoder_embed_dim,
            convolutions=eval(args.encoder_convolutions),
            layers=args.encoder_layers,
            dropout=args.dropout,
            max_positions=args.max_source_positions,
            normalization_constant=args.normalization_constant,
            last_state=args.encoder_state,
            weight_norm=args.weight_norm,
            learn_initial=args.learn_initial_state,
            conv_1d=args.conv_1d,
            audio_features=task.audio_features if not args.wav2vec else 512,
        )
        decoder = CLSTMDecoder(
            dictionary=task.target_dictionary,
            embed_dim=args.decoder_embed_dim,
            out_embed_dim=args.decoder_out_embed_dim,
            attention=eval(args.decoder_attention),
            dropout_in=args.dropout,
            dropout_out=args.dropout,
            num_layers=args.decoder_layers,
            attention_layer=attention_layer[args.attention_type],
            initial_state=args.decoder_initial_state,
            weight_norm=args.weight_norm,
            attention_function=args.attention_function,
            max_positions=args.max_target_positions,
            scale_norm=args.scale_norm,
            scale=args.scale,
        )
        return ASTS2SModel(encoder, decoder)
Exemplo n.º 12
0
    def build_model(cls, args, src_dict, dst_dict):
        """Build a new model instance."""
        if not hasattr(args, 'max_source_positions'):
            args.max_source_positions = args.max_positions
            args.max_target_positions = args.max_positions
        if not hasattr(args, 'share_input_output_embed'):
            args.share_input_output_embed = False
        if not hasattr(args, 'encoder_embed_path'):
            args.encoder_embed_path = None
        if not hasattr(args, 'decoder_embed_path'):
            args.decoder_embed_path = None

        encoder_embed_dict = None
        if args.encoder_embed_path:
            encoder_embed_dict = utils.parse_embedding(args.encoder_embed_path)
            utils.print_embed_overlap(encoder_embed_dict, src_dict)

        decoder_embed_dict = None
        if args.decoder_embed_path:
            decoder_embed_dict = utils.parse_embedding(args.decoder_embed_path)
            utils.print_embed_overlap(decoder_embed_dict, dst_dict)

        encoder = FConvEncoder(
            src_dict,
            embed_dim=args.encoder_embed_dim,
            embed_dict=encoder_embed_dict,
            convolutions=eval(args.encoder_layers),
            dropout=args.dropout,
            max_positions=args.max_source_positions,
        )
        decoder = FConvDecoder(dst_dict,
                               embed_dim=args.decoder_embed_dim,
                               embed_dict=decoder_embed_dict,
                               convolutions=eval(args.decoder_layers),
                               out_embed_dim=args.decoder_out_embed_dim,
                               attention=eval(args.decoder_attention),
                               dropout=args.dropout,
                               max_positions=args.max_target_positions,
                               share_embed=args.share_input_output_embed)
        return FConvModel(encoder, decoder)
Exemplo n.º 13
0
    def __init__(self,
        # new approach
        params: Params = None,
        vocab: Dictionary = None,
        # old approach
        args = None,
        task = None
        ):

        if params is not None and args is not None:
            raise ConfigurationError("you cannot define both, params and args, you have to device which one to use (just one way is allowed)")

        if params is not None:
            encoder = FConvEncoder(
                dictionary=vocab
            )
            decoder = FConvDecoder(
                dictionary=vocab
            )
        else:
            if args is not None and task is not None:

                # Create and initialize encoder and decoder here
                encoder_embed_dict = None
                if args.encoder_embed_path:
                    encoder_embed_dict = utils.parse_embedding(args.encoder_embed_path)
                    utils.print_embed_overlap(encoder_embed_dict, task.source_dictionary)

                decoder_embed_dict = None
                if args.decoder_embed_path:
                    decoder_embed_dict = utils.parse_embedding(args.decoder_embed_path)
                    utils.print_embed_overlap(decoder_embed_dict, task.target_dictionary)

                encoder = FConvEncoder(
                    dictionary=task.source_dictionary,
                    embed_dim=args.encoder_embed_dim,
                    embed_dict=encoder_embed_dict,
                    convolutions=eval(args.encoder_layers),
                    dropout=args.dropout,
                    max_positions=args.max_source_positions,
                    batch_norm=args.batch_norm,
                    use_linear_se=True#args.use_linear_se
                )
                decoder = FConvDecoder(
                    dictionary=task.target_dictionary,
                    embed_dim=args.decoder_embed_dim,
                    embed_dict=decoder_embed_dict,
                    convolutions=eval(args.decoder_layers),
                    out_embed_dim=args.decoder_out_embed_dim,
                    attention=eval(args.decoder_attention),
                    dropout=args.dropout,
                    max_positions=args.max_target_positions,
                    share_embed=args.share_input_output_embed,
                    use_linear_se=False#args.use_linear_se
                )
            else:
                # We have a problem!
                raise ConfigurationError("params and (args, task) are all None, something is wrong here.")

        # Call the super class
        super(FConvModel, self).__init__(encoder, decoder)
        # Correctly set the number of attention layers
        self.encoder.num_attention_layers = sum(layer is not None for layer in decoder.attention)
Exemplo n.º 14
0
    def build_model(cls, args, task):
        """Build a new model instance."""

        # make sure all arguments are present in older models
        base_architecture(args)

        attention_layer = {
            'dot': DotAttentionLayer,
            'general': GeneralAttentionLayer,
            'multi-head': MultiheadAttention,
            'mlp': MLPAttentionLayer,
        }

        if args.decoder_embed_path:
            decoder_embed_dict = utils.parse_embedding(args.decoder_embed_path)
            utils.print_embed_overlap(decoder_embed_dict,
                                      task.target_dictionary)

        if not hasattr(args, 'max_source_positions'):
            args.max_source_positions = 1024
        if not hasattr(args, 'max_target_positions'):
            args.max_target_positions = 1024

        src_dict, tgt_dict = task.source_dictionary, task.target_dictionary

        def build_embedding(dictionary, embed_dim, path=None):
            num_embeddings = len(dictionary)
            padding_idx = dictionary.pad()
            emb = Embedding(num_embeddings, embed_dim, padding_idx)
            # if provided, load from preloaded dictionaries
            if path:
                embed_dict = utils.parse_embedding(path)
                utils.load_embedding(embed_dict, dictionary, emb)
            return emb

        def build_audio_embedding(embed_dim, dropout):
            m = nn.Linear(task.audio_features, embed_dim)
            nn.init.normal_(m.weight,
                            mean=0,
                            std=math.sqrt((1 - dropout) / task.audio_features))
            nn.init.constant_(m.bias, 0)
            return m

        encoder_embed_tokens = build_audio_embedding(
            2 * args.encoder_embed_dim, args.dropout)

        encoder = ProxyEncoder(
            args,
            tgt_dict,
            encoder_embed_tokens,
            audio_features=task.audio_features,
        )
        decoder = CLSTMDecoder(
            dictionary=task.target_dictionary,
            embed_dim=args.decoder_embed_dim,
            out_embed_dim=args.decoder_out_embed_dim,
            attention=eval(args.decoder_attention),
            hidden_size=args.hidden_size,
            dropout_in=args.dropout,
            dropout_out=args.dropout,
            num_layers=args.decoder_layers,
            attention_layer=attention_layer[args.attention_type],
            initial_state=args.decoder_initial_state,
            weight_norm=args.weight_norm,
            attention_function=args.attention_function,
            scale_norm=args.scale_norm,
        )
        return Transf2BerModel(encoder, decoder)