def load_vectors(self, vectors): if not isinstance(vectors, list): vectors = [vectors] for idx, vector in enumerate(vectors): if six.PY2 and isinstance(vector, str): vector = six.text_type(vector) if isinstance(vector, six.string_types): # Convert the string pretrained vector identifier # to a Vectors object if vector not in pretrained_aliases: raise ValueError( "Got string input vector {}, but allowed pretrained " "vectors are {}".format( vector, list(pretrained_aliases.keys()))) vectors[idx] = pretrained_aliases[vector]() elif not isinstance(vector, Vectors): raise ValueError( "Got input vectors of type {}, expected str or " "Vectors object".format(type(vector))) tot_dim = sum(v.dim for v in vectors) self.vectors = torch.rand(len(self), tot_dim) * .08 * 2 - .08 for i, token in enumerate(self.itos): start_dim = 0 for v in vectors: end_dim = start_dim + v.dim self.vectors[i][start_dim:end_dim] = v[token.strip()] start_dim = end_dim assert (start_dim == tot_dim)
def get_embedding_alias(embedding_type): """A helper function to return a torchtext partial function to download pre-trained embeddings. :param embedding_type: a string, representation of a type of pre-trained embeddings. Must be in pretrained_aliases.keys() or start with a key. :return: if found, the partial function to download the embedding, false if not found. """ embedding_aliases = pretrained_aliases.keys() if embedding_type in embedding_aliases: return pretrained_aliases[embedding_type] elif any( key.startswith(embedding_type) for key in pretrained_aliases.keys()): for alias in pretrained_aliases.keys(): if alias.startswith(embedding_type): embedding_type = alias return pretrained_aliases[embedding_type] else: return False
def load_vectors(self, vectors, **kwargs): """ Args: vectors: one of or a list containing instantiations of the GloVe, CharNGram, or Vectors classes. Alternatively, one of or a list of available pretrained vectors: charngram.100d fasttext.en.300d fasttext.simple.300d glove.42B.300d glove.840B.300d glove.twitter.27B.25d glove.twitter.27B.50d glove.twitter.27B.100d glove.twitter.27B.200d glove.6B.50d glove.6B.100d glove.6B.200d glove.6B.300d Remaining keyword arguments: Passed to the constructor of Vectors classes. """ if not isinstance(vectors, list): vectors = [vectors] for idx, vector in enumerate(vectors): if isinstance(vector, str): # Convert the string pretrained vector identifier # to a Vectors object if vector not in pretrained_aliases: raise ValueError( "Got string input vector {}, but allowed pretrained " "vectors are {}".format(vector, list(pretrained_aliases.keys())) ) vectors[idx] = pretrained_aliases[vector](**kwargs) elif not isinstance(vector, Vectors): raise ValueError( "Got input vectors of type {}, expected str or " "Vectors object".format(type(vector)) ) tot_dim = sum(v.dim for v in vectors) self.vectors = torch.Tensor(len(self), tot_dim) for i, token in enumerate(self.itos): start_dim = 0 for v in vectors: end_dim = start_dim + v.dim self.vectors[i][start_dim:end_dim] = v[token.strip()] start_dim = end_dim assert start_dim == tot_dim