Exemplos de normalize em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: utils.vocab

Método / Função: normalize

Exemplos em hotexamples.com: 2

normalize em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de utils.vocab.normalize em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Exemplo n.º 1

0

Exibir arquivo

def load_embeddings(self, words, embedding_file): """Load pretrained embeddings for a given list of words, if they exist. Args: words: iterable of tokens. Only those that are indexed in the dictionary are kept. embedding_file: path to text file of embeddings, space separated. """ words = {w for w in words if w in vocab} logger.info('Loading pre-trained embeddings for %d words from %s' % (len(words), embedding_file)) embedding = self.network.embedding.weight.data # When normalized, some words are duplicated. (Average the embeddings). vec_counts = {} with open(embedding_file) as f: for line in f: parsed = line.rstrip().split(' ') assert(len(parsed) == embedding.size(1) + 1) w = vocab.normalize(parsed[0]) if w in words: vec = torch.Tensor([float(i) for i in parsed[1:]]) if w not in vec_counts: vec_counts[w] = 1 embedding[vocab[w]].copy_(vec) else: logging.warning('WARN: Duplicate embedding found for %s' % w) vec_counts[w] = vec_counts[w] + 1 embedding[vocab[w]].add_(vec) for w, c in vec_counts.items(): embedding[vocab[w]].div_(c) logger.info('Loaded %d embeddings (%.2f%%)' % (len(vec_counts), 100 * len(vec_counts) / len(words)))

Exemplo n.º 2

0

Exibir arquivo

def load_embeddings(self, words, embedding_file): """Load pretrained embeddings for a given list of words, if they exist. Args: words: iterable of tokens. Only those that are indexed in the dictionary are kept. embedding_file: path to text file of embeddings, space separated. """ words = {w for w in words if w in vocab} logger.info('Loading pre-trained embeddings for %d words from %s' % (len(words), embedding_file)) #embedding = self.network.embedding.weight.data embedding = np.zeros((len(vocab), 300)) # When normalized, some words are duplicated. (Average the embeddings). vec_counts = {} with open(embedding_file) as f: for line in f: parsed = line.rstrip().split(' ') #assert(len(parsed) == embedding.size(1) + 1) w = vocab.normalize(parsed[0]) if w in words: vec = list(map(float, parsed[1:])) if w not in vec_counts: vec_counts[w] = 1 embedding[vocab[w]] = np.array(vec, dtype='float32') else: logging.warning( 'WARN: Duplicate embedding found for %s' % w) vec_counts[w] = vec_counts[w] + 1 embedding[vocab[w]] += np.array(vec, dtype='float32') for w, c in vec_counts.items(): embedding[vocab[w]] /= c logger.info('Loaded %d embeddings (%.2f%%)' % (len(vec_counts), 100 * len(vec_counts) / len(words))) return embedding