def __init__(self, datarc, upstream_config, device='cuda', multi_gpu=False, **kwargs): super(UpstreamPretrainExpert, self).__init__() self.datarc = datarc self.device = device self.multi_gpu = multi_gpu self.upstream_config = yaml.load(open(upstream_config, 'r'), Loader=yaml.FullLoader) print('[UpstreamPretrainExpert] - Using upstream config from:', upstream_config) if 'libri_root' in self.datarc and 'kaldi' in self.upstream_config[ 'audio']: print( '[UpstreamPretrainExpert] - Using kaldi feature extracter, on-the-fly feature extraction' ) extracter, input_dim = get_extracter(self.upstream_config['audio']) output_dim = None elif 'libri_root' in self.datarc: print( '[UpstreamPretrainExpert] - Using online preprocessor, on-the-fly feature extraction' ) extracter, input_dim, output_dim = get_preprocessor( self.upstream_config['audio']) else: print( '[UpstreamPretrainExpert] - Using features pre-extracted and saved' ) extracter, input_dim = None, self.upstream_config['transformer'][ 'input_dim'] output_dim = None print('[UpstreamPretrainExpert] - Input dim:', input_dim) self.dataloader = self._get_train_dataloader(extracter) print('[UpstreamPretrainExpert] - Initializing model...') model_config = TransformerConfig(self.upstream_config['transformer']) setattr(model_config, 'loss', self.upstream_config['task']['loss']) self.model = TransformerForMaskedAcousticModel(model_config, input_dim, output_dim=output_dim) if self.multi_gpu: self.model = torch.nn.DataParallel(self.model) print('[UpstreamPretrainExpert] - Multi-GPU training Enabled: ' + str(torch.cuda.device_count())) print('[UpstreamPretrainExpert] - Number of parameters: ' + str( sum(p.numel() for p in self.model.parameters() if p.requires_grad)))
def __init__(self, options, inp_dim=-1, config=None, on_the_fly_config=None, verbose=False): super(TransformerBuilder, self).__init__() # read config if config is not None: self.config = yaml.load(open(config, 'r'), Loader=yaml.FullLoader) else: self.all_states = torch.load(options["ckpt_file"], map_location='cpu') self.config = self.all_states['Config'] # parse the options dict self.load = bool(strtobool(options["load_pretrain"])) self.no_grad = bool(strtobool(options['no_grad'])) self.spec_aug = bool(strtobool(options['spec_aug'])) self.spec_aug_prev = bool(strtobool(options['spec_aug_prev'])) self.weighted_sum = bool(strtobool(options['weighted_sum'])) self.select_layer = int(options['select_layer']) self.permute_input = bool(strtobool(options['permute_input'])) if (not self.no_grad) and (not self.spec_aug_prev): raise RuntimeError('Only one of them can be set False!') if str(options['dropout']) != 'default': # increase dropout if specified self.config['transformer']['hidden_dropout_prob'] = float(options['dropout']) self.config['transformer']['attention_probs_dropout_prob'] = float(options['dropout']) # Set model config self.model_config = TransformerConfig(self.config['transformer']) self.hidden_size = self.model_config.hidden_size self.num_layers = self.model_config.num_hidden_layers self.max_input_length = self.config['task']['sequence_length'] if on_the_fly_config is not None: self.config['audio'] = yaml.load(open(on_the_fly_config, 'r'), Loader=yaml.FullLoader) if 'audio' in self.config: if 'kaldi' in self.config['audio']: self.extracter, self.inp_dim = get_extracter(self.config['audio']) self.spec_dim = self.inp_dim else: self.extracter, self.inp_dim, self.spec_dim = get_preprocessor(self.config['audio'], process_input_only=True) self.target_level = self.config['audio']['target_level'] elif inp_dim != -1: self.extracter, self.inp_dim, self.spec_dim = None, inp_dim, inp_dim else: self.extracter, self.inp_dim, self.spec_dim = None, self.config['transformer']['input_dim'], self.config['transformer']['input_dim'] if self.max_input_length > 0 and verbose: print('[Transformer] - Maximum input length: ', self.max_input_length) if not (self.select_layer in list(range(-1, self.num_layers))): raise RuntimeError('Out of range int for \'select_layer\'!') if self.weighted_sum: self.weight = nn.Parameter(torch.ones(self.num_layers) / self.num_layers)