def build_model(cls, args, task): """Build a new model instance.""" # make sure that all args are properly defaulted (in case there are any new ones) base_architecture(args) decoder_embed_dict = None if args.decoder_embed_path: decoder_embed_dict = utils.parse_embedding(args.decoder_embed_path) utils.print_embed_overlap(decoder_embed_dict, task.target_dictionary) out_channels = speech_utils.eval_str_nested_list_or_tuple( args.encoder_conv_channels, type=int) kernel_sizes = speech_utils.eval_str_nested_list_or_tuple( args.encoder_conv_kernel_sizes, type=int) strides = speech_utils.eval_str_nested_list_or_tuple( args.encoder_conv_strides, type=int) logger.info('input feature dimension: {}, channels: {}'.format( task.feat_dim, task.feat_in_channels)) assert task.feat_dim % task.feat_in_channels == 0 conv_layers = ConvBNReLU( out_channels, kernel_sizes, strides, in_channels=task.feat_in_channels, ) if out_channels is not None else None fconv_encoder_input_size = task.feat_dim // task.feat_in_channels if conv_layers is not None: for stride in strides: if isinstance(stride, (list, tuple)): assert len(stride) > 0 s = stride[1] if len(stride) > 1 else stride[0] else: assert isinstance(stride, int) s = stride fconv_encoder_input_size = (fconv_encoder_input_size + s - 1) // s fconv_encoder_input_size *= out_channels[-1] encoder = SpeechFConvEncoder( conv_layers_before=conv_layers, input_size=fconv_encoder_input_size, embed_dim=args.encoder_embed_dim, convolutions=eval(args.encoder_layers), dropout=args.dropout, ) decoder = SpeechFConvDecoder( dictionary=task.target_dictionary, embed_dim=args.decoder_embed_dim, embed_dict=decoder_embed_dict, convolutions=eval(args.decoder_layers), out_embed_dim=args.decoder_out_embed_dim, attention=eval(args.decoder_attention), dropout=args.dropout, max_positions=args.max_target_positions, share_embed=args.share_input_output_embed, positional_embeddings=args.decoder_positional_embed, ) return cls(encoder, decoder)
def load_pretrained_embedding_from_file(embed_path, dictionary, embed_dim): num_embeddings = len(dictionary) padding_idx = dictionary.pad() embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) embed_dict = utils.parse_embedding(embed_path) utils.print_embed_overlap(embed_dict, dictionary) return utils.load_embedding(embed_dict, dictionary, embed_tokens)
def build_model(cls, args, src_dict, dst_dict): """Build a new model instance.""" encoder_embed_dict = None if args.encoder_embed_path: encoder_embed_dict = utils.parse_embedding(args.encoder_embed_path) utils.print_embed_overlap(encoder_embed_dict, src_dict) decoder_embed_dict = None if args.decoder_embed_path: decoder_embed_dict = utils.parse_embedding(args.decoder_embed_path) utils.print_embed_overlap(decoder_embed_dict, dst_dict) encoder = LSTMEncoder( src_dict, embed_dim=args.encoder_embed_dim, embed_dict=encoder_embed_dict, num_layers=args.encoder_layers, dropout_in=args.encoder_dropout_in, dropout_out=args.encoder_dropout_out, ) decoder = LSTMDecoder( dst_dict, encoder_embed_dim=args.encoder_embed_dim, embed_dim=args.decoder_embed_dim, embed_dict=decoder_embed_dict, out_embed_dim=args.decoder_out_embed_dim, num_layers=args.decoder_layers, attention=bool(eval(args.decoder_attention)), dropout_in=args.decoder_dropout_in, dropout_out=args.decoder_dropout_out, ) return cls(encoder, decoder)
def build_model(cls, args, task): """Build a new model instance.""" # make sure that all args are properly defaulted (in case there are any new ones) base_architecture(args) encoder_embed_dict = None if args.encoder_embed_path: encoder_embed_dict = utils.parse_embedding(args.encoder_embed_path) utils.print_embed_overlap(encoder_embed_dict, task.source_dictionary) decoder_embed_dict = None if args.decoder_embed_path: decoder_embed_dict = utils.parse_embedding(args.decoder_embed_path) utils.print_embed_overlap(decoder_embed_dict, task.target_dictionary) encoder = FConvEncoder( dictionary=task.source_dictionary, embed_dim=args.encoder_embed_dim, embed_dict=encoder_embed_dict, convolutions=eval(args.encoder_layers), dropout=args.dropout, max_positions=args.max_source_positions, ) decoder = FConvDecoder( dictionary=task.target_dictionary, embed_dim=args.decoder_embed_dim, embed_dict=decoder_embed_dict, convolutions=eval(args.decoder_layers), out_embed_dim=args.decoder_out_embed_dim, attention=eval(args.decoder_attention), dropout=args.dropout, max_positions=args.max_target_positions, share_embed=args.share_input_output_embed, ) return FConvModel(encoder, decoder)
def build_model(cls, args, task): """Build a new model instance.""" # make sure that all args are properly defaulted (in case there are any new ones) base_architecture(args) encoder_embed_dict = None if args.encoder_embed_path: encoder_embed_dict = utils.parse_embedding(args.encoder_embed_path) utils.print_embed_overlap(encoder_embed_dict, task.source_dictionary) decoder_embed_dict = None if args.decoder_embed_path: decoder_embed_dict = utils.parse_embedding(args.decoder_embed_path) utils.print_embed_overlap(decoder_embed_dict, task.target_dictionary) kw_embed_dict = None if args.keywords_embed_path: kw_embed_dict = utils.parse_embedding(args.keywords_embed_path) print(">> Keywords embeddings loaded!") docEncoder = ExtendedEncoder(args, FConvEncoder( dictionary=task.source_dictionary, embed_dim=args.encoder_embed_dim, embed_dict=encoder_embed_dict, convolutions=eval(args.encoder_layers), dropout=args.dropout, max_positions=args.max_source_positions, normalization_constant=args.normalization_constant, )) decoder = CondFConvDecoder( dictionary=task.target_dictionary, embed_dim=args.decoder_embed_dim, embed_dict=decoder_embed_dict, convolutions=eval(args.decoder_layers), out_embed_dim=args.decoder_out_embed_dim, attention=eval(args.decoder_attention), dropout=args.dropout, max_positions=args.max_tgt_sentence_length, share_embed=args.share_input_output_embed, normalization_constant=args.normalization_constant, ) docDecoder = LDocumentDecoder(args, decoder, embed_dim=args.decoder_embed_dim, hidden_size=args.decoder_embed_dim, out_embed_dim=args.decoder_out_embed_dim, encoder_embed_dim = args.encoder_embed_dim, encoder_output_units=args.encoder_embed_dim, num_layers=1, attention=eval(args.decoder_attention), dropout_in = 0.1, dropout_out=0.1, pretrained_embed = None, ) return FEncFairseqModel(docEncoder, docDecoder)
def build_model(cls, args, task): """Build a new model instance.""" # make sure that all args are properly defaulted (in case there are any new ones) base_architecture(args) encoder_embed_dict = None if args.encoder_embed_path: encoder_embed_dict = utils.parse_embedding(args.encoder_embed_path) utils.print_embed_overlap(encoder_embed_dict, task.source_dictionary) decoder_embed_dict = None if args.decoder_embed_path: decoder_embed_dict = utils.parse_embedding(args.decoder_embed_path) utils.print_embed_overlap(decoder_embed_dict, task.target_dictionary) conv_encoder = FConvEncoder3( dictionary=task.source_dictionary, embed_dim=args.encoder_embed_dim, embed_dict=encoder_embed_dict, convolutions=eval(args.encoder_layers), dropout=args.dropout, max_positions=args.max_source_positions, normalization_constant=args.normalization_constant, ) gcn_encoder = GCNEncoder2( dictionary=task.source_dictionary, dropout=args.dropout, num_inputs=args.gcn_num_inputs, num_units=args.gcn_num_units, num_labels=args.gcn_num_labels, num_layers=args.gcn_num_layers, in_arcs=args.gcn_in_arcs, out_arcs=args.gcn_out_arcs, batch_first=args.gcn_batch_first, residual=args.gcn_residual, use_gates=args.gcn_use_gates, use_glus=args.gcn_use_glus, normalization_constant=args.normalization_constant, ) encoder = FConvGCNOnTopEncoder(task.source_dictionary, conv_encoder, gcn_encoder) decoder = FConvDecoder( dictionary=task.target_dictionary, embed_dim=args.decoder_embed_dim, embed_dict=decoder_embed_dict, convolutions=eval(args.decoder_layers), out_embed_dim=args.decoder_out_embed_dim, attention=eval(args.decoder_attention), dropout=args.dropout, max_positions=args.max_target_positions, share_embed=args.share_input_output_embed, normalization_constant=args.normalization_constant, ) return FConvGCNOnTopModel(encoder, decoder)
def build_embedding(dictionary, embed_dim, path=None): num_embeddings = len(dictionary) padding_idx = dictionary.pad() emb = Embedding(num_embeddings, embed_dim, padding_idx) # if provided, load from preloaded dictionaries if path: embed_dict = utils.parse_embedding(path) utils.load_embedding(embed_dict, dictionary, emb) utils.print_embed_overlap(embed_dict, dictionary) return emb
def load_pretrained_embedding_from_file(embed_path, dictionary, embed_dim): num_embedding = len(dictionary) padding_idx = dictionary.pad() embed_tokens = Embedding(num_embedding, embed_dim, padding_idx) embed_dict = utils.parse_embedding(embed_path) utils.print_embed_overlap(embed_dict, dictionary) # embed_keys = set(embed_dict.keys()) # vocab_keys = set(dictionary.symbols)) # print(vocab_keys - embed_keys) return utils.load_embedding(embed_dict, dictionary, embed_tokens), embed_dict
def build_model(cls, args, task): """Build a new model instance.""" # make sure that all args are properly defaulted (in case there are any new ones) base_architecture(args) if args.merge_mode not in ['concat', 'sum']: raise ValueError('Invalid merge mode. ' 'Merge mode should be one of ' '{"sum", "concat"}') if args.encoder_embed_dim != args.decoder_embed_dim: raise ValueError('encoder embedding dimension ' 'should be equal to ' 'decoder embedding dimension') if args.merge_mode == 'sum' and args.token_embed_dim != args.encoder_embed_dim: raise ValueError('token embedding dimension ' 'should be equal to ' 'encoder embedding dimension') encoder_embed_dict = None if args.encoder_embed_path: encoder_embed_dict = utils.parse_embedding(args.encoder_embed_path) utils.print_embed_overlap(encoder_embed_dict, task.source_dictionary) decoder_embed_dict = None if args.decoder_embed_path: decoder_embed_dict = utils.parse_embedding(args.decoder_embed_path) utils.print_embed_overlap(decoder_embed_dict, task.target_dictionary) encoder = FConvEncoder( dictionary=task.source_dictionary, args=args, encoder_embed_dim=args.encoder_embed_dim, embed_dict=encoder_embed_dict, convolutions=eval(args.encoder_layers), dropout=args.dropout, max_positions=args.max_source_positions, ) decoder = FConvDecoder( dictionary=task.target_dictionary, args=args, decoder_embed_dim=args.decoder_embed_dim, embed_dict=decoder_embed_dict, convolutions=eval(args.decoder_layers), out_embed_dim=args.decoder_out_embed_dim, attention=eval(args.decoder_attention), dropout=args.dropout, max_positions=args.max_target_positions, share_embed=args.share_input_output_embed, ) return FConvModel(encoder, decoder)
def copy_prev_embedding(embed_path, dictionary, embed_dim, prev_embedded_tokens_path, prev_dict): num_embeddings = len(dictionary) padding_idx = dictionary.pad() embed_tokens = nn.Embedding(num_embeddings, embed_dim, padding_idx) prev_embedded_tokens = load_random_embedding(prev_embedded_tokens_path) for i in range(5, num_embeddings): if prev_dict.index(dictionary.symbols[i])!= prev_dict.unk() and i!=dictionary.unk(): embed_tokens.weight.data[i] = prev_embedded_tokens[prev_dict.index(dictionary.symbols[i])] #embed_tokens.weight = nn.Parameter(prev_embedded_tokens) embed_dict = utils.parse_embedding(embed_path) utils.print_embed_overlap(embed_dict, dictionary) return utils.load_embedding(embed_dict, dictionary, embed_tokens)
def build_model(cls, args, task): """Build a new model instance.""" # make sure that all args are properly defaulted (in case there are any new ones) base_architecture(args) attention_layer = { 'dot': DotAttentionLayer, 'general': GeneralAttentionLayer, 'multi-head': MultiheadAttention, 'mlp': MLPAttentionLayer, } decoder_embed_dict = None if args.decoder_embed_path: decoder_embed_dict = utils.parse_embedding(args.decoder_embed_path) utils.print_embed_overlap(decoder_embed_dict, task.target_dictionary) # replaced source_dict with target_dict encoder = ASTS2SEncoder( args=args, linear_dim=args.encoder_embed_dim, convolutions=eval(args.encoder_convolutions), layers=args.encoder_layers, dropout=args.dropout, max_positions=args.max_source_positions, normalization_constant=args.normalization_constant, last_state=args.encoder_state, weight_norm=args.weight_norm, learn_initial=args.learn_initial_state, conv_1d=args.conv_1d, audio_features=task.audio_features if not args.wav2vec else 512, ) decoder = CLSTMDecoder( dictionary=task.target_dictionary, embed_dim=args.decoder_embed_dim, out_embed_dim=args.decoder_out_embed_dim, attention=eval(args.decoder_attention), dropout_in=args.dropout, dropout_out=args.dropout, num_layers=args.decoder_layers, attention_layer=attention_layer[args.attention_type], initial_state=args.decoder_initial_state, weight_norm=args.weight_norm, attention_function=args.attention_function, max_positions=args.max_target_positions, scale_norm=args.scale_norm, scale=args.scale, ) return ASTS2SModel(encoder, decoder)
def build_model(cls, args, src_dict, dst_dict): """Build a new model instance.""" if not hasattr(args, 'max_source_positions'): args.max_source_positions = args.max_positions args.max_target_positions = args.max_positions if not hasattr(args, 'share_input_output_embed'): args.share_input_output_embed = False if not hasattr(args, 'encoder_embed_path'): args.encoder_embed_path = None if not hasattr(args, 'decoder_embed_path'): args.decoder_embed_path = None encoder_embed_dict = None if args.encoder_embed_path: encoder_embed_dict = utils.parse_embedding(args.encoder_embed_path) utils.print_embed_overlap(encoder_embed_dict, src_dict) decoder_embed_dict = None if args.decoder_embed_path: decoder_embed_dict = utils.parse_embedding(args.decoder_embed_path) utils.print_embed_overlap(decoder_embed_dict, dst_dict) encoder = FConvEncoder( src_dict, embed_dim=args.encoder_embed_dim, embed_dict=encoder_embed_dict, convolutions=eval(args.encoder_layers), dropout=args.dropout, max_positions=args.max_source_positions, ) decoder = FConvDecoder(dst_dict, embed_dim=args.decoder_embed_dim, embed_dict=decoder_embed_dict, convolutions=eval(args.decoder_layers), out_embed_dim=args.decoder_out_embed_dim, attention=eval(args.decoder_attention), dropout=args.dropout, max_positions=args.max_target_positions, share_embed=args.share_input_output_embed) return FConvModel(encoder, decoder)
def __init__(self, # new approach params: Params = None, vocab: Dictionary = None, # old approach args = None, task = None ): if params is not None and args is not None: raise ConfigurationError("you cannot define both, params and args, you have to device which one to use (just one way is allowed)") if params is not None: encoder = FConvEncoder( dictionary=vocab ) decoder = FConvDecoder( dictionary=vocab ) else: if args is not None and task is not None: # Create and initialize encoder and decoder here encoder_embed_dict = None if args.encoder_embed_path: encoder_embed_dict = utils.parse_embedding(args.encoder_embed_path) utils.print_embed_overlap(encoder_embed_dict, task.source_dictionary) decoder_embed_dict = None if args.decoder_embed_path: decoder_embed_dict = utils.parse_embedding(args.decoder_embed_path) utils.print_embed_overlap(decoder_embed_dict, task.target_dictionary) encoder = FConvEncoder( dictionary=task.source_dictionary, embed_dim=args.encoder_embed_dim, embed_dict=encoder_embed_dict, convolutions=eval(args.encoder_layers), dropout=args.dropout, max_positions=args.max_source_positions, batch_norm=args.batch_norm, use_linear_se=True#args.use_linear_se ) decoder = FConvDecoder( dictionary=task.target_dictionary, embed_dim=args.decoder_embed_dim, embed_dict=decoder_embed_dict, convolutions=eval(args.decoder_layers), out_embed_dim=args.decoder_out_embed_dim, attention=eval(args.decoder_attention), dropout=args.dropout, max_positions=args.max_target_positions, share_embed=args.share_input_output_embed, use_linear_se=False#args.use_linear_se ) else: # We have a problem! raise ConfigurationError("params and (args, task) are all None, something is wrong here.") # Call the super class super(FConvModel, self).__init__(encoder, decoder) # Correctly set the number of attention layers self.encoder.num_attention_layers = sum(layer is not None for layer in decoder.attention)
def build_model(cls, args, task): """Build a new model instance.""" # make sure all arguments are present in older models base_architecture(args) attention_layer = { 'dot': DotAttentionLayer, 'general': GeneralAttentionLayer, 'multi-head': MultiheadAttention, 'mlp': MLPAttentionLayer, } if args.decoder_embed_path: decoder_embed_dict = utils.parse_embedding(args.decoder_embed_path) utils.print_embed_overlap(decoder_embed_dict, task.target_dictionary) if not hasattr(args, 'max_source_positions'): args.max_source_positions = 1024 if not hasattr(args, 'max_target_positions'): args.max_target_positions = 1024 src_dict, tgt_dict = task.source_dictionary, task.target_dictionary def build_embedding(dictionary, embed_dim, path=None): num_embeddings = len(dictionary) padding_idx = dictionary.pad() emb = Embedding(num_embeddings, embed_dim, padding_idx) # if provided, load from preloaded dictionaries if path: embed_dict = utils.parse_embedding(path) utils.load_embedding(embed_dict, dictionary, emb) return emb def build_audio_embedding(embed_dim, dropout): m = nn.Linear(task.audio_features, embed_dim) nn.init.normal_(m.weight, mean=0, std=math.sqrt((1 - dropout) / task.audio_features)) nn.init.constant_(m.bias, 0) return m encoder_embed_tokens = build_audio_embedding( 2 * args.encoder_embed_dim, args.dropout) encoder = ProxyEncoder( args, tgt_dict, encoder_embed_tokens, audio_features=task.audio_features, ) decoder = CLSTMDecoder( dictionary=task.target_dictionary, embed_dim=args.decoder_embed_dim, out_embed_dim=args.decoder_out_embed_dim, attention=eval(args.decoder_attention), hidden_size=args.hidden_size, dropout_in=args.dropout, dropout_out=args.dropout, num_layers=args.decoder_layers, attention_layer=attention_layer[args.attention_type], initial_state=args.decoder_initial_state, weight_norm=args.weight_norm, attention_function=args.attention_function, scale_norm=args.scale_norm, ) return Transf2BerModel(encoder, decoder)