def __init__(self, ckpt, **kwargs): super().__init__(**kwargs) if version.parse(fairseq.__version__) > version.parse("0.10.2"): cp = torch.load(ckpt) args = cp["args"] base_wav2vec_architecture(args) self.model = Wav2VecModel.build_model(args, task=None) self.model.load_state_dict(cp["model"]) elif version.parse(fairseq.__version__) == version.parse("0.10.2"): cp = torch.load(ckpt) self.model = Wav2VecModel.build_model(cp["args"], task=None) self.model.load_state_dict(cp["model"]) else: raise NotImplementedError if len(self.hooks) == 0: self.add_hook( "self.model.feature_extractor", lambda input, output: output.transpose(1, 2), ) self.add_hook( "self.model.feature_aggregator", lambda input, output: output.transpose(1, 2), ) module_name = "self.model.feature_aggregator.conv_layers" for conv_id in range(len(eval(module_name)) - 1): self.add_hook( f"{module_name}[{conv_id + 1}]", lambda input, output: input[0].transpose(1, 2), )
def __init__(self, checkpoint_path): super(VQ_Wav2Vec, self).__init__() encoder_path = utils.to_absolute_path(checkpoint_path) checkpoint = torch.load(encoder_path, map_location=lambda storage, loc: storage) self.encoder = Wav2VecModel.build_model(checkpoint['args'], task=None) self.encoder.load_state_dict(checkpoint['model'])
def __init__(self, logfile='_logs/_logs_experiment29bigLR.txt', save_name_model='convnet/convnet_experiment29bigLR.pt', batch_size=8, slice_fn=Data.wav2vec_extraction, scorer=Data.compute_scores, multi_head=True, decay_factor=0, metadata_file='_Data_metadata_letters_wav2vec.pk'): convnet_config = {'emb_dim': 384, 'hid_dim': 512} cp = torch.load('wav2vec_large.pt') wav2vec_model = Wav2VecModel.build_model(cp['args'], task=None) wav2vec_model.load_state_dict(cp['model']) wav2vec_model.eval() super().__init__(logfile=logfile, save_name_model=save_name_model, slice_fn=slice_fn, batch_size=batch_size, scorer=scorer, multi_head=multi_head, metadata_file=metadata_file, convnet_config=convnet_config, wav2vec_model=wav2vec_model, save_features=True, decay_factor=decay_factor, lr=1e-4) self.lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( self.optimizer, 200, eta_min=1e-6, last_epoch=-1) u.load_model(self.model, self.save_name_model, restore_only_similars=True)
def create_wav2vec(self, weight_path): cp = torch.load(weight_path) wav2vec = Wav2VecModel.build_model(cp['args'], task=None) wav2vec.load_state_dict(cp['model']) wav2vec.eval() for param in wav2vec.parameters(): param.requires_grad = False return wav2vec
def __init__(self, fname): super().__init__() device = torch.device('cpu') checkpoint = torch.load(fname, map_location=device) self.args = checkpoint["args"] model = Wav2VecModel.build_model(self.args, None) model.load_state_dict(checkpoint["model"]) model.eval() self.model = model
def post_init(self): import torch from fairseq.models.wav2vec import Wav2VecModel cp = torch.load(self.model_path, map_location=torch.device('cpu')) self.model = Wav2VecModel.build_model(cp['args'], task=None) self.model.load_state_dict(cp['model']) self.model.eval() self.to_device(self.model) self._sess_func = None self._tensor_func = torch.tensor
def __init__(self, fname): super().__init__() checkpoint = torch.load(fname) self.args = checkpoint["args"] model = Wav2VecModel.build_model(self.args, None) model.load_state_dict(checkpoint["model"]) model.eval() self.model = model
def load_model(self,modelpath): if not modelpath[-3:] == '.pt': print('The Model is not Valid. Try again with a valid Model.\n Given Model Path : ',modelpath) return print("Loading wav2vec Model ... ",end='') tload = torch.load(modelpath) self.model = Wav2VecModel.build_model(tload['args'], task=None) self.model.load_state_dict(tload['model']) self.model.eval() print(" ## Model Loaded ##")
def __init__( self, wav2vec_model='/home/michael/Documents/Cogmaster/M1/S1/stage/vq-wav2vec.pt', cache_file='/home/michael/Documents/Cogmaster/M1/S1/stage/model_caches/vq_wav2vec.ft', max_files=None, **kwargs): cp = torch.load(wav2vec_model, map_location=torch.device('cpu')) self.model = Wav2VecModel.build_model(cp['args'], task=None) self.model.load_state_dict(cp['model']) self.model.eval() super().__init__(cache_file, max_files=max_files, **kwargs)
def __init__(self, ckpt, feature_selection, **kwargs): super(UpstreamExpert, self).__init__() cp = torch.load(ckpt) self.model = Wav2VecModel.build_model(cp['args'], task=None) self.model.load_state_dict(cp['model']) pseudo_input = torch.randn(1, SAMPLE_RATE * EXAMPLE_SEC) z = self.model.feature_extractor(pseudo_input) c = self.model.feature_aggregator(z) self.feature_selection = feature_selection self.output_dim = eval(self.feature_selection).transpose(1, 2).size(-1)
def __init__(self, ckpt, **kwargs): super(UpstreamExpert, self).__init__() cp = torch.load(ckpt) self.model = Wav2VecModel.build_model(cp['args'], task=None) self.model.load_state_dict(cp['model']) pseudo_input = torch.randn(1, SAMPLE_RATE * EXAMPLE_SEC) z = self.model.feature_extractor(pseudo_input) # z: (batch_size, feat_dim, seqlen) pseudo_features = z.transpose(1, 2) self.output_dim = pseudo_features.size(-1)
def __init__(self, checkpoint_path): super(VQwav2vec, self).__init__() self.cp = torch.load(checkpoint_path) # Initialize architecture self.model = Wav2VecModel.build_model(self.cp['args'], task=None) # Load weights self.model.load_state_dict(self.cp['model']) self.gEncoder = self.model._modules['feature_extractor'] self.gAR = self.model._modules['feature_aggregator'] self.gVQ = self.model._modules['vector_quantizer'] self.gEncoder.DOWNSAMPLING = 160
def __init__(self, model_type='wav2vec', PRETRAINED_MODEL_PATH='/path/to/wav2vec_large.pt'): super().__init__() self.model_type = model_type if model_type == 'wav2vec': ckpt = torch.load(PRETRAINED_MODEL_PATH) self.model = Wav2VecModel.build_model(ckpt['args'], task=None) self.model.load_state_dict(ckpt['model']) self.model = self.model.feature_extractor self.model.eval() else: print('Please assign a loss model') sys.exit()
def post_init(self): super().post_init() if self.model_path and os.path.exists(self.model_path): import torch from fairseq.models.wav2vec import Wav2VecModel cp = torch.load(self.model_path, map_location=torch.device('cpu')) self.model = Wav2VecModel.build_model(cp['args'], task=None) self.model.load_state_dict(cp['model']) self.model.eval() self.to_device(self.model) self._tensor_func = torch.tensor else: raise PretrainedModelFileDoesNotExist( f'model at {self.model_path} does not exist')
def __init__(self): cp = torch.load( '/hpc/gsir059/INTERSPEECH/MOSI-SEMI/trained_ssl/wav2vec/vq-wav2vec-Kmeans/vq-wav2vec_kmeans.pt' ) self.model = Wav2VecModel.build_model(cp['args'], task=None) self.model.load_state_dict(cp['model']) self.model.eval() #Roberta wav2vec self.roberta = RobertaModel.from_pretrained( '/hpc/gsir059/INTERSPEECH/MOSI-SEMI/trained_ssl/wav2vec/vq-wav2vec-Kmeans-Roberta', checkpoint_file='bert_kmeans.pt') self.roberta.eval()
def __init__(self, input_feat_per_channel, vggblock_config=DEFAULT_ENC_VGGBLOCK_CONFIG, transformer_config=DEFAULT_ENC_TRANSFORMER_CONFIG, encoder_output_dim=512, in_channels=1, transformer_context=None, transformer_sampling=None): super().__init__(input_feat_per_channel, vggblock_config, transformer_config, encoder_output_dim, in_channels, transformer_context, transformer_sampling) wav2vec_checkpoint = HOME + '/data/fairseq-data/wav2vec_models/checkpoint_last.pt' # wav2vec_checkpoint = '/tmp/checkpoint_last.pt' cp = checkpoint_utils.load_checkpoint_to_cpu(wav2vec_checkpoint) model = Wav2VecModel.build_model(cp['args'], task=None) model.load_state_dict(cp['model']) freeze_module_params(model) self.wav2vec_model = model
def __init__(self): cp = torch.load( '/home/gsir059/Documents/EMOTION-FINE/pretrained_ssl_models/vq-wav2vec-kmeans/vq-wav2vec_kmeans.pt' ) self.model = Wav2VecModel.build_model(cp['args'], task=None) self.model.load_state_dict(cp['model']) self.model.eval() #Roberta wav2vec self.roberta = RobertaModel.from_pretrained( '/home/gsir059/Documents/EMOTION-FINE/pretrained_ssl_models/wav2vec-roberta', checkpoint_file='bert_kmeans.pt') self.roberta.eval()
def load_model(self): cp = torch.load(self.checkpoint, map_location=lambda x, _: x) model = Wav2VecModel.build_model(cp["args"], None) self.quantize_location = getattr(cp["args"], "vq", "encoder") model.load_state_dict(cp["model"]) model.eval().float() model.cuda() if self.data_parallel: model = nn.DataParallel(model) return model
def __init__(self, ckpt, feature_selection, **kwargs): super(UpstreamExpert, self).__init__() self.feature_selection = feature_selection cp = torch.load(ckpt) self.model = Wav2VecModel.build_model(cp['args'], task=None) self.model.load_state_dict(cp['model']) pseudo_input = torch.randn(1, SAMPLE_RATE * EXAMPLE_SEC) z = self.model.feature_extractor(pseudo_input) # z: (batch_size, feat_dim, seqlen) if self.feature_selection == 'codewords': codewords, _ = self.model.vector_quantizer.forward_idx(z) # codewords: (batch_size, feat_dim, seqlen) in torch.FloatTensor pseudo_features = eval(self.feature_selection).transpose(1, 2) self.output_dim = pseudo_features.size(-1)
def __init__(self, fname: str): """Load checkpointed wav2vec model and use it as a nn.Module Implements feature extraction as a forward pass Args: fname (str): path to the model checkpoint """ # Example taken from https://github.com/pytorch/fairseq/blob/master/examples/wav2vec/wav2vec_featurize.py#L35 super().__init__() # Load checkpoint to cpu checkpoint = torch.load(fname) # Load build model self.args = checkpoint["args"] model = Wav2VecModel.build_model(self.args, None) model.load_state_dict(checkpoint["model"]) # Eval state model.eval() self.model = model
def load_wav2vec(self, wav2vecpath): if not wav2vecpath: return None print("LOADING WAV2VEC....") cp = torch.load(wav2vecpath, map_location=torch.device('cpu')) if self.method.split('-')[0] == 'w2v2': model = Wav2Vec2Model.build_model(cp['args'], task=None) else: model = Wav2VecModel.build_model(cp['args'], task=None) model.load_state_dict(cp['model']) model = model.eval() if torch.cuda.is_available(): print('moving WAVE2VEC to CUDA') model.cuda() return model
def __init__(self, ckpt, feature_selection='z', **kwargs): super(UpstreamExpert, self).__init__() self.feature_selection = feature_selection or 'z' if version.parse(fairseq.__version__) > version.parse("0.10.2"): model, cfg, task = fairseq.checkpoint_utils.load_model_ensemble_and_task( [ckpt]) self.model = model[0] self.model.eval() elif version.parse(fairseq.__version__) == version.parse("0.10.2"): cp = torch.load(ckpt) self.model = Wav2VecModel.build_model(cp['args'], task=None) self.model.load_state_dict(cp['model']) else: raise NotImplementedError pseudo_input = torch.randn(SAMPLE_RATE * EXAMPLE_SEC) pseudo_output = self.forward([pseudo_input]) self.output_dim = pseudo_output[0].size(-1)
def __init__(self, ckpt, feature_selection, **kwargs): super(UpstreamExpert, self).__init__() if version.parse(fairseq.__version__) > version.parse("0.10.2"): model, cfg, task = fairseq.checkpoint_utils.load_model_ensemble_and_task( [ckpt]) self.model = model[0] self.model.eval() elif version.parse(fairseq.__version__) == version.parse("0.10.2"): cp = torch.load(ckpt) self.model = Wav2VecModel.build_model(cp['args'], task=None) self.model.load_state_dict(cp['model']) else: raise NotImplementedError pseudo_input = torch.randn(1, SAMPLE_RATE * EXAMPLE_SEC) z = self.model.feature_extractor(pseudo_input) c = self.model.feature_aggregator(z) self.feature_selection = feature_selection self.output_dim = eval(self.feature_selection).transpose(1, 2).size(-1)
def __init__( self, out_channels, sample_rate, preemphasis = 0.0, use_context_features = True, extra_args = None, **kwargs ): from fairseq.models.wav2vec import Wav2VecModel assert sample_rate == extra_args.sample_rate, f'Sample rate {sample_rate} is not equal to frontend sample rate {extra_args.sample_rate}, use --sample-rate {extra_args.sample_rate}' if extra_args.aggregator == 'cnn': agg_layers = eval(extra_args.conv_aggregator_layers) agg_dim = agg_layers[-1][0] assert out_channels == agg_dim, f'Out channels {out_channels} is not equal to frontend output dim {agg_dim}, use --num-input-features {agg_dim}' elif extra_args.aggregator == 'gru': assert out_channels == extra_args.gru_dim, f'Out channels {out_channels} is not equal to frontend output dim {extra_args.gru_dim}, use --num-input-features {extra_args.gru_dim}' else: raise RuntimeError(f'Wrong wav2vec aggregator {extra_args.aggregator}. Use cnn or gru instead.') super().__init__() self.fairseq_args = extra_args self.preemphasis = preemphasis self.use_context_features = use_context_features self.model = Wav2VecModel.build_model(extra_args, None).eval()
def __init__(self, weight_path=None, use_cpu=True): """ Initialize an embeddor that uses the Wav2Vec model. Inputs: weight_path - path to an instance of pt file corresponding to the wav2vec_large model use_cpu - boolean, whether to use cpu or gpu """ if weight_path is None: print('Downloading wav2vec model') if not os.path.exists('models'): os.makedirs('models') url = 'https://dl.fbaipublicfiles.com/fairseq/wav2vec/wav2vec_large.pt' wget.download(url, 'models/wav2vec_large.pt') weight_path = 'models/wav2vec_large.pt' if use_cpu: cp = torch.load(weight_path, map_location=torch.device('cpu')) else: cp = torch.load(weight_path) self.model = Wav2VecModel.build_model(cp['args'], task=None) self.model.load_state_dict(cp['model']) self.model.eval()
import torch from fairseq.models.wav2vec import Wav2VecModel import librosa import numpy as np import pickle cp = torch.load('/content/drive/My Drive/EmotionRNN2/wav2vec_large.pt', map_location=torch.device('cpu')) model = Wav2VecModel.build_model(cp['args'], task=None) model.load_state_dict(cp['model']) model.eval() data = pickle.load(open( '/content/drive/My Drive/Emotion RNN/IEMOCAP_features_raw.pkl', 'rb'), encoding="latin1") videoIDs, videoSpeakers, videoLabels, videoText, videoAudio, videoVisual, videoSentence, trainVid, testVid = data base = '/content/drive/My Drive/FYP/IEMOCAP_full_release/Session' dataset_for_experiment = {} for i in videoIDs: file = base + i[4] + '/sentences/wav/' + i + '/' data = [] for j in videoIDs[i]: y, sr = librosa.load(file + j + '.wav', sr=16000) # y -> (t) b = torch.from_numpy(y).unsqueeze(0) # b -> (1, t) z = model.feature_extractor(b) # z -> (1, 512, t) z = model.feature_aggregator(z).squeeze( 0) # z -> (1, 512, t) -> (512, t) start = 0
def load_weights(self): cp = torch.load(os.path.join(self.wav2vec_dir, self.weights_fn), map_location=self.device) self.model = Wav2VecModel.build_model(cp['args'], task=None) self.model.load_state_dict(cp['model']) self.model.eval()
def init(self, model_url: str): self.model_url = model_url self.model_name = self.model_url.replace( 'https://dl.fbaipublicfiles.com/fairseq/', '').replace('/', '_') torch_model = torch.hub.load_state_dict_from_url(self.model_url) self.model = Wav2VecModel.build_model(torch_model['args'], task=None)
def load_wav2vec(path, map_location): cp = torch.load(path, map_location) model = Wav2VecModel.build_model(cp['args'], task=None) model.load_state_dict(cp['model'], strict=True) return model
def load_model() -> torch.nn.Module: cp = torch.load(WAV2VEC_PATH) model = Wav2VecModel.build_model(cp["args"], task=None) model.load_state_dict(cp["model"]) model.eval() return model.to(DEVICE)