示例#1
0
    def __init__(self, args):
        super().__init__(None)
        self.w2v_encoder = Wav2VecEncoder(args)
        self.is_v0_arch = not args.adaptor_proj
        self.w2v_proj_ln = None
        if not self.is_v0_arch and self.w2v_encoder.proj is not None:
            self.w2v_proj_ln = LayerNorm(args.decoder_embed_dim)
        self.adaptor = self.build_adaptor(args)

        self.num_updates = 0
        self.freezing_updates = args.w2v_freezing_updates
        self.finetuning_params = args.finetune_w2v_params
        for k, p in self.w2v_encoder.w2v_model.named_parameters():
            p.requires_grad = need_finetuning(self.finetuning_params, k)
示例#2
0
 def __init__(self, args):
     super().__init__(None)
     self.w2v_encoder = Wav2VecEncoder(args)
     encoder_out_dim = self.w2v_encoder.w2v_model.encoder.embedding_dim
     # Projection + 8x shrinking
     self.adaptor = Conv1dAdaptor(encoder_out_dim,
                                  args.decoder_embed_dim,
                                  n_layers=args.adaptor_n_layers,
                                  kernel_size=args.adaptor_kernel_size,
                                  stride=args.adaptor_stride,
                                  add_layernorm=args.adaptor_layernorm)
     for k, p in self.w2v_encoder.w2v_model.named_parameters():
         # Freeze pretrained models by default
         if safe_hasattr(args, 'finetune_w2v_params'
                         ) and XMTransformerModel.finetune_params(
                             args.finetune_w2v_params, k):
             p.requires_grad = True
         else:
             p.requires_grad = False