Exemplos de Index.getWord em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: utils

Classe / Tipo: Index

Método / Função: getWord

Exemplos em hotexamples.com: 2

Index.getWord em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de utils.Index.getWord em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

Index(10)

add(4)

hasWord(4)

getId(3)

freeze(2)

getWord(2)

vocabulary(2)

vocbulary(2)

fromfile(1)

load(1)

Métodos Frequentes

Index (10)

add (4)

hasWord (4)

getId (3)

freeze (2)

getWord (2)

vocabulary (2)

vocbulary (2)

fromfile (1)

load (1)

Exemplo n.º 1

0

Exibir arquivo

class RandomEmbedding(Embedding): def __init__(self, vectordim = 300): self.index = Index() self.vdim = vectordim self.data = np.zeros((0, self.vdim), dtype = np.float32) self.invindex = None def getVector(self, word): if not self.index.hasWord(word): # create random vector v = np.random.rand(self.vdim).astype(np.float32) # normalize length = np.linalg.norm(v) if length == 0: length += 1e-6 v = v / length # add idx = self.index.add(self.id2w) self.data = np.vstack((self.data, v)) assert idx == len(self.data) if self.invindex is not None: del self.invindex self.invindex = None return v idx = self.index.getId(word) return self.data[idx] def search(self, q, topk = 4): if not self.invindex: print('Building faiss index...') self.invindex = faiss.IndexFlatL2(self.vdim) self.invindex.add(self.data) print('Faiss index built:', self.invindex.is_trained) if len(q.shape) == 1: q = np.matrix(q) if q.shape[1] != self.vdim: print('Wrong shape, expected %d dimensions but got %d.' % (self.vdim, q.shape[1]), file = sys.stderr) return D, I = self.invindex.search(q, topk) # D = distances, I = indices return ( I, D ) def wordForVec(self, v): idx, dist = self.search(v, topk=1) idx = idx[0,0] dist = dist[0,0] sim = 1. - dist word = self.index.getWord(idx) return word, sim def containsWord(self, word): return True def vocabulary(self): return self.index.vocbulary() def dim(self): return self.vdim

Exemplo n.º 2

0

Exibir arquivo

Arquivo: embedding.py Projeto: uhh-lt/lttc

class TextEmbedding(Embedding): def __init__(self, txtfile, sep = ' ', vectordim = 300): self.file = txtfile self.vdim = vectordim self.separator = sep def load(self, skipheader = True, nlines = sys.maxsize, normalize = False): self.index = Index() print('Loading embedding from %s' % self.file) data_ = [] with open(self.file, 'r', encoding='utf-8', errors='ignore') as f: if skipheader: f.readline() for i, line in enumerate(f): if i >= nlines: break try: line = line.strip() splits = line.split(self.separator) word = splits[0] if self.index.hasWord(word): continue coefs = np.array(splits[1:self.vdim+1], dtype=np.float32) if normalize: length = np.linalg.norm(coefs) if length == 0: length += 1e-6 coefs = coefs / length if coefs.shape != (self.vdim,): continue idx = self.index.add(word) data_.append(coefs) assert idx == len(data_) except Exception as err: print('Error in line %d' % i, sys.exc_info()[0], file = sys.stderr) print(' ', err, file = sys.stderr) continue self.data = np.array(data_, dtype = np.float32) del data_ return self def getVector(self, word): if not self.containsWord(word): print("'%s' is unknown." % word, file = sys.stderr) v = np.zeros(self.vdim) v[0] = 1 return v idx = self.index.getId(word) return self.data[idx] def search(self, q, topk = 4): if len(q.shape) == 1: q = np.matrix(q) if q.shape[1] != self.vdim: print('Wrong shape, expected %d dimensions but got %d.' % (self.vdim, q.shape[1]), file = sys.stderr ) return D, I = self.invindex.search(q, topk) # D = distances, I = indices return ( I, D ) def wordForVec(self, v): idx, dist = self.search(v, topk=1) idx = idx[0,0] dist = dist[0,0] sim = 1. - dist word = self.index.getWord(idx) return word, sim def containsWord(self, word): return self.index.hasWord(word) def vocabulary(self): return self.index.vocabulary() def dim(self): return self.vdim