Python Wav2VecModel.build_model示例，fairseq.models.wav2vec.Wav2VecModel.build_model Python示例

示例#1

0

显示文件

文件： expert.py 项目： AmirHussein96/Self-Supervised-Speech-Pretraining-and-Representation-Learning

    def __init__(self, ckpt, **kwargs):
        super().__init__(**kwargs)
        if version.parse(fairseq.__version__) > version.parse("0.10.2"):
            cp = torch.load(ckpt)
            args = cp["args"]
            base_wav2vec_architecture(args)
            self.model = Wav2VecModel.build_model(args, task=None)
            self.model.load_state_dict(cp["model"])
        elif version.parse(fairseq.__version__) == version.parse("0.10.2"):
            cp = torch.load(ckpt)
            self.model = Wav2VecModel.build_model(cp["args"], task=None)
            self.model.load_state_dict(cp["model"])
        else:
            raise NotImplementedError

        if len(self.hooks) == 0:
            self.add_hook(
                "self.model.feature_extractor",
                lambda input, output: output.transpose(1, 2),
            )
            self.add_hook(
                "self.model.feature_aggregator",
                lambda input, output: output.transpose(1, 2),
            )
            module_name = "self.model.feature_aggregator.conv_layers"
            for conv_id in range(len(eval(module_name)) - 1):
                self.add_hook(
                    f"{module_name}[{conv_id + 1}]",
                    lambda input, output: input[0].transpose(1, 2),
                )

示例#2

0

显示文件

 def __init__(self, checkpoint_path):
     super(VQ_Wav2Vec, self).__init__()
     encoder_path = utils.to_absolute_path(checkpoint_path)
     checkpoint = torch.load(encoder_path,
                             map_location=lambda storage, loc: storage)
     self.encoder = Wav2VecModel.build_model(checkpoint['args'], task=None)
     self.encoder.load_state_dict(checkpoint['model'])

示例#3

0

显示文件

 def __init__(self,
              logfile='_logs/_logs_experiment29bigLR.txt',
              save_name_model='convnet/convnet_experiment29bigLR.pt',
              batch_size=8,
              slice_fn=Data.wav2vec_extraction,
              scorer=Data.compute_scores,
              multi_head=True,
              decay_factor=0,
              metadata_file='_Data_metadata_letters_wav2vec.pk'):
     convnet_config = {'emb_dim': 384, 'hid_dim': 512}
     cp = torch.load('wav2vec_large.pt')
     wav2vec_model = Wav2VecModel.build_model(cp['args'], task=None)
     wav2vec_model.load_state_dict(cp['model'])
     wav2vec_model.eval()
     super().__init__(logfile=logfile,
                      save_name_model=save_name_model,
                      slice_fn=slice_fn,
                      batch_size=batch_size,
                      scorer=scorer,
                      multi_head=multi_head,
                      metadata_file=metadata_file,
                      convnet_config=convnet_config,
                      wav2vec_model=wav2vec_model,
                      save_features=True,
                      decay_factor=decay_factor,
                      lr=1e-4)
     self.lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
         self.optimizer, 200, eta_min=1e-6, last_epoch=-1)
     u.load_model(self.model,
                  self.save_name_model,
                  restore_only_similars=True)

示例#4

0

显示文件

 def create_wav2vec(self, weight_path):
     cp = torch.load(weight_path)
     wav2vec = Wav2VecModel.build_model(cp['args'], task=None)
     wav2vec.load_state_dict(cp['model'])
     wav2vec.eval()
     for param in wav2vec.parameters():
         param.requires_grad = False
     return wav2vec

示例#5

0

显示文件

 def __init__(self, fname):
     super().__init__()
     device = torch.device('cpu')
     checkpoint = torch.load(fname, map_location=device)
     self.args = checkpoint["args"]
     model = Wav2VecModel.build_model(self.args, None)
     model.load_state_dict(checkpoint["model"])
     model.eval()
     self.model = model

示例#6

0

显示文件

文件： fairseq.py 项目： rutujasurve94/jina

 def post_init(self):
     import torch
     from fairseq.models.wav2vec import Wav2VecModel
     cp = torch.load(self.model_path, map_location=torch.device('cpu'))
     self.model = Wav2VecModel.build_model(cp['args'], task=None)
     self.model.load_state_dict(cp['model'])
     self.model.eval()
     self.to_device(self.model)
     self._sess_func = None
     self._tensor_func = torch.tensor

示例#7

0

显示文件

文件： test_feat_wtv.py 项目： eleferrand/transcription_dtw

    def __init__(self, fname):
        super().__init__()

        checkpoint = torch.load(fname)
        self.args = checkpoint["args"]
        model = Wav2VecModel.build_model(self.args, None)
        model.load_state_dict(checkpoint["model"])
        model.eval()

        self.model = model

示例#8

0

显示文件

文件： model_manager.py 项目： NabinAdhikari674/wav2vec

 def load_model(self,modelpath):
     if not modelpath[-3:] == '.pt':
         print('The Model is not Valid. Try again with a valid Model.\n Given Model Path : ',modelpath)
         return 
     print("Loading wav2vec Model ... ",end='')
     tload = torch.load(modelpath)
     self.model = Wav2VecModel.build_model(tload['args'], task=None)
     self.model.load_state_dict(tload['model'])
     self.model.eval()
     print(" ## Model Loaded ##")

示例#9

0

显示文件

    def __init__(
            self,
            wav2vec_model='/home/michael/Documents/Cogmaster/M1/S1/stage/vq-wav2vec.pt',
            cache_file='/home/michael/Documents/Cogmaster/M1/S1/stage/model_caches/vq_wav2vec.ft',
            max_files=None,
            **kwargs):

        cp = torch.load(wav2vec_model, map_location=torch.device('cpu'))
        self.model = Wav2VecModel.build_model(cp['args'], task=None)
        self.model.load_state_dict(cp['model'])
        self.model.eval()
        super().__init__(cache_file, max_files=max_files, **kwargs)

示例#10

0

显示文件

文件： expert.py 项目： yanjingwangzhang/s3prl

    def __init__(self, ckpt, feature_selection, **kwargs):
        super(UpstreamExpert, self).__init__()

        cp = torch.load(ckpt)
        self.model = Wav2VecModel.build_model(cp['args'], task=None)
        self.model.load_state_dict(cp['model'])

        pseudo_input = torch.randn(1, SAMPLE_RATE * EXAMPLE_SEC)
        z = self.model.feature_extractor(pseudo_input)
        c = self.model.feature_aggregator(z)

        self.feature_selection = feature_selection
        self.output_dim = eval(self.feature_selection).transpose(1, 2).size(-1)

示例#11

0

显示文件

文件： expert.py 项目： sunilsivadas/s3prl

    def __init__(self, ckpt, **kwargs):
        super(UpstreamExpert, self).__init__()

        cp = torch.load(ckpt)
        self.model = Wav2VecModel.build_model(cp['args'], task=None)
        self.model.load_state_dict(cp['model'])

        pseudo_input = torch.randn(1, SAMPLE_RATE * EXAMPLE_SEC)
        z = self.model.feature_extractor(pseudo_input)
        # z: (batch_size, feat_dim, seqlen)

        pseudo_features = z.transpose(1, 2)
        self.output_dim = pseudo_features.size(-1)

示例#12

0

显示文件

    def __init__(self, checkpoint_path):
        super(VQwav2vec, self).__init__()
        self.cp = torch.load(checkpoint_path)

        # Initialize architecture
        self.model = Wav2VecModel.build_model(self.cp['args'], task=None)
        # Load weights
        self.model.load_state_dict(self.cp['model'])

        self.gEncoder = self.model._modules['feature_extractor']
        self.gAR = self.model._modules['feature_aggregator']
        self.gVQ = self.model._modules['vector_quantizer']
        self.gEncoder.DOWNSAMPLING = 160

示例#13

0

显示文件

文件： losses.py 项目： YoungJay0612/PhoneFortifiedPerceptualLoss_SE

 def __init__(self,
              model_type='wav2vec',
              PRETRAINED_MODEL_PATH='/path/to/wav2vec_large.pt'):
     super().__init__()
     self.model_type = model_type
     if model_type == 'wav2vec':
         ckpt = torch.load(PRETRAINED_MODEL_PATH)
         self.model = Wav2VecModel.build_model(ckpt['args'], task=None)
         self.model.load_state_dict(ckpt['model'])
         self.model = self.model.feature_extractor
         self.model.eval()
     else:
         print('Please assign a loss model')
         sys.exit()

示例#14

0

显示文件

 def post_init(self):
     super().post_init()
     if self.model_path and os.path.exists(self.model_path):
         import torch
         from fairseq.models.wav2vec import Wav2VecModel
         cp = torch.load(self.model_path, map_location=torch.device('cpu'))
         self.model = Wav2VecModel.build_model(cp['args'], task=None)
         self.model.load_state_dict(cp['model'])
         self.model.eval()
         self.to_device(self.model)
         self._tensor_func = torch.tensor
     else:
         raise PretrainedModelFileDoesNotExist(
             f'model at {self.model_path} does not exist')

示例#15

0

显示文件

文件： convert_aud_to_token.py 项目： cn-boop/BERT-like-is-All-You-Need

    def __init__(self):

        cp = torch.load(
            '/hpc/gsir059/INTERSPEECH/MOSI-SEMI/trained_ssl/wav2vec/vq-wav2vec-Kmeans/vq-wav2vec_kmeans.pt'
        )
        self.model = Wav2VecModel.build_model(cp['args'], task=None)
        self.model.load_state_dict(cp['model'])
        self.model.eval()

        #Roberta wav2vec
        self.roberta = RobertaModel.from_pretrained(
            '/hpc/gsir059/INTERSPEECH/MOSI-SEMI/trained_ssl/wav2vec/vq-wav2vec-Kmeans-Roberta',
            checkpoint_file='bert_kmeans.pt')

        self.roberta.eval()

示例#16

0

显示文件

 def __init__(self, input_feat_per_channel,
              vggblock_config=DEFAULT_ENC_VGGBLOCK_CONFIG,
              transformer_config=DEFAULT_ENC_TRANSFORMER_CONFIG,
              encoder_output_dim=512, in_channels=1, transformer_context=None,
              transformer_sampling=None):
     super().__init__(input_feat_per_channel, vggblock_config, transformer_config,
                      encoder_output_dim, in_channels, transformer_context,
                      transformer_sampling)
     wav2vec_checkpoint = HOME + '/data/fairseq-data/wav2vec_models/checkpoint_last.pt'
     # wav2vec_checkpoint = '/tmp/checkpoint_last.pt'
     cp = checkpoint_utils.load_checkpoint_to_cpu(wav2vec_checkpoint)
     model = Wav2VecModel.build_model(cp['args'], task=None)
     model.load_state_dict(cp['model'])
     freeze_module_params(model)
     self.wav2vec_model = model

示例#17

0

显示文件

    def __init__(self):

        cp = torch.load(
            '/home/gsir059/Documents/EMOTION-FINE/pretrained_ssl_models/vq-wav2vec-kmeans/vq-wav2vec_kmeans.pt'
        )
        self.model = Wav2VecModel.build_model(cp['args'], task=None)
        self.model.load_state_dict(cp['model'])
        self.model.eval()

        #Roberta wav2vec
        self.roberta = RobertaModel.from_pretrained(
            '/home/gsir059/Documents/EMOTION-FINE/pretrained_ssl_models/wav2vec-roberta',
            checkpoint_file='bert_kmeans.pt')

        self.roberta.eval()

示例#18

0

显示文件

文件： vq-wav2vec_featurize.py 项目： smiyawaki0820/smiyawaki0820.github.io

    def load_model(self):
        cp = torch.load(self.checkpoint, map_location=lambda x, _: x)

        model = Wav2VecModel.build_model(cp["args"], None)

        self.quantize_location = getattr(cp["args"], "vq", "encoder")

        model.load_state_dict(cp["model"])
        model.eval().float()
        model.cuda()

        if self.data_parallel:
            model = nn.DataParallel(model)

        return model

示例#19

0

显示文件

文件： expert.py 项目： martinmamql/s3prl

    def __init__(self, ckpt, feature_selection, **kwargs):
        super(UpstreamExpert, self).__init__()
        self.feature_selection = feature_selection

        cp = torch.load(ckpt)
        self.model = Wav2VecModel.build_model(cp['args'], task=None)
        self.model.load_state_dict(cp['model'])

        pseudo_input = torch.randn(1, SAMPLE_RATE * EXAMPLE_SEC)
        z = self.model.feature_extractor(pseudo_input)
        # z: (batch_size, feat_dim, seqlen)

        if self.feature_selection == 'codewords':
            codewords, _ = self.model.vector_quantizer.forward_idx(z)
            # codewords: (batch_size, feat_dim, seqlen) in torch.FloatTensor

        pseudo_features = eval(self.feature_selection).transpose(1, 2)
        self.output_dim = pseudo_features.size(-1)

示例#20

0

显示文件

文件： wav2vec.py 项目： gonzfe05/ASR_wav2vec_wav2letter

    def __init__(self, fname: str):
        """Load checkpointed wav2vec model and use it as a nn.Module
        Implements feature extraction as a forward pass

        Args:
            fname (str): path to the model checkpoint
        """
        # Example taken from https://github.com/pytorch/fairseq/blob/master/examples/wav2vec/wav2vec_featurize.py#L35
        super().__init__()
        # Load checkpoint to cpu
        checkpoint = torch.load(fname)
        # Load build model
        self.args = checkpoint["args"]
        model = Wav2VecModel.build_model(self.args, None)
        model.load_state_dict(checkpoint["model"])
        # Eval state
        model.eval()
        self.model = model

示例#21

0

显示文件

    def load_wav2vec(self, wav2vecpath):
        if not wav2vecpath:
            return None

        print("LOADING WAV2VEC....")

        cp = torch.load(wav2vecpath, map_location=torch.device('cpu'))
        if self.method.split('-')[0] == 'w2v2':
            model = Wav2Vec2Model.build_model(cp['args'], task=None)
        else:
            model = Wav2VecModel.build_model(cp['args'], task=None)

        model.load_state_dict(cp['model'])
        model = model.eval()
        if torch.cuda.is_available():
            print('moving WAVE2VEC to CUDA')
            model.cuda()
        return model

示例#22

0

显示文件

文件： expert.py 项目： zhaoforever/s3prl

    def __init__(self, ckpt, feature_selection='z', **kwargs):
        super(UpstreamExpert, self).__init__()
        self.feature_selection = feature_selection or 'z'

        if version.parse(fairseq.__version__) > version.parse("0.10.2"):
            model, cfg, task = fairseq.checkpoint_utils.load_model_ensemble_and_task(
                [ckpt])
            self.model = model[0]
            self.model.eval()
        elif version.parse(fairseq.__version__) == version.parse("0.10.2"):
            cp = torch.load(ckpt)
            self.model = Wav2VecModel.build_model(cp['args'], task=None)
            self.model.load_state_dict(cp['model'])
        else:
            raise NotImplementedError

        pseudo_input = torch.randn(SAMPLE_RATE * EXAMPLE_SEC)
        pseudo_output = self.forward([pseudo_input])
        self.output_dim = pseudo_output[0].size(-1)

示例#23

0

显示文件

    def __init__(self, ckpt, feature_selection, **kwargs):
        super(UpstreamExpert, self).__init__()
        if version.parse(fairseq.__version__) > version.parse("0.10.2"):
            model, cfg, task = fairseq.checkpoint_utils.load_model_ensemble_and_task(
                [ckpt])
            self.model = model[0]
            self.model.eval()
        elif version.parse(fairseq.__version__) == version.parse("0.10.2"):
            cp = torch.load(ckpt)
            self.model = Wav2VecModel.build_model(cp['args'], task=None)
            self.model.load_state_dict(cp['model'])
        else:
            raise NotImplementedError

        pseudo_input = torch.randn(1, SAMPLE_RATE * EXAMPLE_SEC)
        z = self.model.feature_extractor(pseudo_input)
        c = self.model.feature_aggregator(z)

        self.feature_selection = feature_selection
        self.output_dim = eval(self.feature_selection).transpose(1, 2).size(-1)

示例#24

0

显示文件

文件： models.py 项目： kotikkonstantin/convasr

	def __init__(
		self, out_channels, sample_rate, preemphasis = 0.0, use_context_features = True, extra_args = None, **kwargs
	):
		from fairseq.models.wav2vec import Wav2VecModel

		assert sample_rate == extra_args.sample_rate, f'Sample rate {sample_rate} is not equal to frontend sample rate {extra_args.sample_rate}, use --sample-rate {extra_args.sample_rate}'

		if extra_args.aggregator == 'cnn':
			agg_layers = eval(extra_args.conv_aggregator_layers)
			agg_dim = agg_layers[-1][0]
			assert out_channels == agg_dim, f'Out channels {out_channels} is not equal to frontend output dim {agg_dim}, use --num-input-features {agg_dim}'
		elif extra_args.aggregator == 'gru':
			assert out_channels == extra_args.gru_dim, f'Out channels {out_channels} is not equal to frontend output dim {extra_args.gru_dim}, use --num-input-features {extra_args.gru_dim}'
		else:
			raise RuntimeError(f'Wrong wav2vec aggregator {extra_args.aggregator}. Use cnn or gru instead.')

		super().__init__()
		self.fairseq_args = extra_args
		self.preemphasis = preemphasis
		self.use_context_features = use_context_features
		self.model = Wav2VecModel.build_model(extra_args, None).eval()

示例#25

0

显示文件

文件： embeddors.py 项目： IQTLabs/speech_shift

    def __init__(self, weight_path=None, use_cpu=True):
        """
        Initialize an embeddor that uses the Wav2Vec model.

        Inputs:
            weight_path - path to an instance of pt file corresponding to the 
            wav2vec_large model
            use_cpu - boolean, whether to use cpu or gpu
        """
        if weight_path is None:
            print('Downloading wav2vec model')
            if not os.path.exists('models'):
                os.makedirs('models')
            url = 'https://dl.fbaipublicfiles.com/fairseq/wav2vec/wav2vec_large.pt'
            wget.download(url, 'models/wav2vec_large.pt')
            weight_path = 'models/wav2vec_large.pt'
        if use_cpu:
            cp = torch.load(weight_path, map_location=torch.device('cpu'))
        else:
            cp = torch.load(weight_path)
        self.model = Wav2VecModel.build_model(cp['args'], task=None)
        self.model.load_state_dict(cp['model'])
        self.model.eval()

示例#26

0

显示文件

文件： Deep_Learnt_Feature_Extraction.py 项目： sadilchamishka/EmotionFYP

import torch
from fairseq.models.wav2vec import Wav2VecModel
import librosa
import numpy as np
import pickle

cp = torch.load('/content/drive/My Drive/EmotionRNN2/wav2vec_large.pt',
                map_location=torch.device('cpu'))
model = Wav2VecModel.build_model(cp['args'], task=None)
model.load_state_dict(cp['model'])
model.eval()

data = pickle.load(open(
    '/content/drive/My Drive/Emotion RNN/IEMOCAP_features_raw.pkl', 'rb'),
                   encoding="latin1")
videoIDs, videoSpeakers, videoLabels, videoText, videoAudio, videoVisual, videoSentence, trainVid, testVid = data

base = '/content/drive/My Drive/FYP/IEMOCAP_full_release/Session'
dataset_for_experiment = {}

for i in videoIDs:
    file = base + i[4] + '/sentences/wav/' + i + '/'
    data = []
    for j in videoIDs[i]:
        y, sr = librosa.load(file + j + '.wav', sr=16000)  # y -> (t)
        b = torch.from_numpy(y).unsqueeze(0)  # b -> (1, t)
        z = model.feature_extractor(b)  # z -> (1, 512, t)
        z = model.feature_aggregator(z).squeeze(
            0)  # z -> (1, 512, t) -> (512, t)

        start = 0

示例#27

0

显示文件

文件： wav2vec.py 项目： loseraitc/audio_classifier

 def load_weights(self):
     cp = torch.load(os.path.join(self.wav2vec_dir, self.weights_fn),
                     map_location=self.device)
     self.model = Wav2VecModel.build_model(cp['args'], task=None)
     self.model.load_state_dict(cp['model'])
     self.model.eval()

示例#28

0

显示文件

 def init(self, model_url: str):
     self.model_url = model_url
     self.model_name = self.model_url.replace(
         'https://dl.fbaipublicfiles.com/fairseq/', '').replace('/', '_')
     torch_model = torch.hub.load_state_dict_from_url(self.model_url)
     self.model = Wav2VecModel.build_model(torch_model['args'], task=None)

示例#29

0

显示文件

def load_wav2vec(path, map_location):
    cp = torch.load(path, map_location)
    model = Wav2VecModel.build_model(cp['args'], task=None)
    model.load_state_dict(cp['model'], strict=True)
    return model

示例#30

0

显示文件

def load_model() -> torch.nn.Module:
    cp = torch.load(WAV2VEC_PATH)
    model = Wav2VecModel.build_model(cp["args"], task=None)
    model.load_state_dict(cp["model"])
    model.eval()
    return model.to(DEVICE)