示例#1
0
class Text2W2VEncoder:
    def __init__(self, data_path):
        self.w2v = BigFile(data_path)
        vocab_size, self.ndims = self.w2v.shape()
        print("Text2W2VEncoder", "vocab_size", vocab_size, "dim", self.ndims)

    def encode(self, words):
        renamed, vectors = self.w2v.read(words)

        if len(vectors) > 0:
            vec = np.array(vectors).mean(axis=0)
        else:
            vec = np.zeros([self.ndims])
        return torch.Tensor(vec)
示例#2
0
class W2Vec(Txt2Vec):
    def __init__(self, data_path, norm=0, clean=True):
        super(W2Vec, self).__init__(data_path, norm, clean)
        self.w2v = BigFile(data_path)
        vocab_size, self.ndims = self.w2v.shape()
        logger.info('vob size: %d, vec dim: %d' % (vocab_size, self.ndims))

    def _encoding(self, words):
        renamed, vectors = self.w2v.read(words)

        if len(vectors) > 0:
            vec = np.array(vectors).mean(axis=0)
        else:
            vec = np.zeros(self.ndims, )
        return vec