示例#1
0
    def __init__(self, options, inp_dim):
        super(MOCKINGJAY, self).__init__()

        all_states = torch.load(options["ckpt_file"], map_location='cpu')
        self.config = all_states['Settings']['Config']
        self.no_grad = bool(strtobool(options["no_grad"]))

        # increase dropout
        if str(options['dropout']) != 'default':
            self.config['mockingjay']['hidden_dropout_prob'] = float(
                options['dropout'])
            self.config['mockingjay']['attention_probs_dropout_prob'] = float(
                options['dropout'])

        # Model Config
        self.model_config = MockingjayConfig(self.config)
        self.dr = self.model_config.downsample_rate
        self.hidden_size = self.model_config.hidden_size

        # Build model
        self.device = torch.device(
            'cuda') if torch.cuda.is_available() else torch.device('cpu')
        self.model = MockingjayModel(self.model_config,
                                     inp_dim).to(self.device)

        # Load from a PyTorch state_dict
        load = bool(strtobool(options["load_pretrain"]))
        if load:
            self.load_model(all_states['Mockingjay'])
            print('[Mockingjay] - Number of parameters: ' + str(
                sum(p.numel()
                    for p in self.model.parameters() if p.requires_grad)))

        self.out_dim = 768  # This attribute is for pytorch-kaldi
示例#2
0
    def set_model(self, inference=False, with_head=False, from_path=None, output_attention=False):
        self.verbose('Initializing Mockingjay model.')
        
        # uild the Mockingjay model with speech prediction head
        self.model_config = MockingjayConfig(self.config)
        self.dr = self.model_config.downsample_rate
        self.hidden_size = self.model_config.hidden_size
        self.output_attention = output_attention
        
        if not inference or with_head:
            self.model = MockingjayForMaskedAcousticModel(self.model_config, self.input_dim, self.output_dim, self.output_attention).to(self.device)
            self.verbose('Number of parameters: ' + str(sum(p.numel() for p in self.model.parameters() if p.requires_grad)))
            self.mockingjay = self.model.Mockingjay

        if inference and not with_head:
            self.mockingjay = MockingjayModel(self.model_config, self.input_dim, self.output_attention).to(self.device)
            self.verbose('Number of parameters: ' + str(sum(p.numel() for p in self.mockingjay.parameters() if p.requires_grad)))
            self.mockingjay.eval()
        elif inference and with_head:
            self.model.eval()
        elif not inference:
            self.model.train()

            # Setup optimizer
            param_optimizer = list(self.model.named_parameters())

            no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
            optimizer_grouped_parameters = [
                {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
                {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
                ]
            num_train_optimization_steps = self.total_steps // self.gradient_accumulation_steps

            if self.apex:
                try:
                    from apex.optimizers import FP16_Optimizer
                    from apex.optimizers import FusedAdam
                except ImportError:
                    raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training.")

                optimizer = FusedAdam(optimizer_grouped_parameters,
                                      lr=self.learning_rate,
                                      bias_correction=False,
                                      max_grad_norm=1.0)
                if self.config['optimizer']['loss_scale'] == 0:
                    self.optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True)
                else:
                    self.optimizer = FP16_Optimizer(optimizer, static_loss_scale=self.config['optimizer']['loss_scale'])
                self.warmup_linear = WarmupLinearSchedule(warmup=self.warmup_proportion,
                                                          t_total=num_train_optimization_steps)
            else:
                self.optimizer = BertAdam(optimizer_grouped_parameters,
                                        lr=self.learning_rate,
                                        warmup=self.warmup_proportion,
                                        t_total=num_train_optimization_steps)
        else:
            raise NotImplementedError('Invalid Arguments!')

        if self.load: # This will be set to True by default when Tester is running set_model()
            self.load_model(inference=inference, with_head=with_head, from_path=from_path)
示例#3
0
    def __init__(self, options, inp_dim):
        super(MOCKINGJAY, self).__init__()
        
        all_states = torch.load(options["ckpt_file"], map_location='cpu')
        self.config = all_states['Settings']['Config']
        self.no_grad = bool(strtobool(options['no_grad']))
        self.spec_aug = bool(strtobool(options['spec_aug']))
        self.spec_aug_prev = bool(strtobool(options['spec_aug_prev']))
        self.weighted_sum = bool(strtobool(options['weighted_sum']))
        self.select_layer = int(options['select_layer'])
        if (not self.no_grad) and (not self.spec_aug_prev): raise RuntimeError('Only one of them can be set False!')
        
        # increase dropout
        if str(options['dropout']) != 'default':
            self.config['mockingjay']['hidden_dropout_prob'] = float(options['dropout'])
            self.config['mockingjay']['attention_probs_dropout_prob'] = float(options['dropout'])

        # Model Config
        self.model_config = MockingjayConfig(self.config)
        self.dr = self.model_config.downsample_rate
        self.hidden_size = self.model_config.hidden_size
        self.num_layers = self.model_config.num_hidden_layers
        if not (self.select_layer in list(range(-1, self.num_layers))): raise RuntimeError('Out of range int for \'select_layer\'!')

        # use weighted sum from all layers
        if self.weighted_sum:
            self.weight = nn.Parameter(torch.ones(self.num_layers) / self.num_layers)

        # Build model
        self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
        self.model = MockingjayModel(self.model_config, inp_dim).to(self.device)
        self.model.eval() if self.no_grad else self.model.train()
        
        # Load from a PyTorch state_dict
        load = bool(strtobool(options["load_pretrain"]))
        if load: 
            self.load_model(all_states['Mockingjay'])
            print('[Mockingjay] - Number of parameters: ' + str(sum(p.numel() for p in self.model.parameters() if p.requires_grad)))
        
        self.out_dim = self.hidden_size # 768, This attribute is for pytorch-kaldi