Exemplo n.º 1
0
    def getContextsShape(self):
        with open(self.contextsFilePath) as contextsFile:
            contextsCount = binary.readi(contextsFile)
            contextSize = binary.readi(contextsFile)
            negative = binary.readi(contextsFile)

            return contextsCount, contextSize, negative
Exemplo n.º 2
0
def loadEmbeddings(embeddingsFilePath):
    with open(embeddingsFilePath, 'rb') as embeddingsFile:
        embeddingsCount = binary.readi(embeddingsFile)
        embeddingSize = binary.readi(embeddingsFile)

        embeddings = numpy.empty((embeddingsCount, embeddingSize)).astype('float32')

        for embeddingIndex in range(0, embeddingsCount):
            embedding = binary.readf(embeddingsFile, embeddingSize)
            embeddings[embeddingIndex] = embedding

            log.progress('Loading embeddings: {0:.3f}%.', embeddingIndex + 1, embeddingsCount)

        log.info('Loading embeddings complete. {0} embeddings loaded.', embeddingsCount)

        return embeddings
Exemplo n.º 3
0
def loadMap(indexMapFilePath, inverse=False):
    vocabulary = collections.OrderedDict()

    with open(indexMapFilePath, 'rb') as indexMapFile:
        itemsCount = binary.readi(indexMapFile)

        for itemIndex in range(0, itemsCount):
            wordLength = binary.readi(indexMapFile)
            word = binary.reads(indexMapFile, wordLength)
            index = binary.readi(indexMapFile)

            if inverse:
                vocabulary[index] = word
            else:
                vocabulary[word] = index

            log.progress('Loading word map: {0:.3f}%.', itemIndex + 1, itemsCount)

        log.info('Loading word map complete. {0} words loaded.', itemsCount)

    return vocabulary
Exemplo n.º 4
0
    def getContexts(self, start, stop, step):
        if step == 1:
            with open(self.contextsFilePath) as contextsFile:
                count = stop - start
                contextSize = self.windowSize + self.negative
                contextsSize = count * contextSize
                contextBufferSize = contextSize * 4

                # 12 for sizeof(contextsCount) + sizeof(contextSize) + sizeof(negative)
                startPosition = start * contextBufferSize + 12

                contextsFile.seek(startPosition, io.SEEK_SET)
                contexts = binary.readi(contextsFile, contextsSize)

                contexts = numpy.reshape(contexts, (count, (self.windowSize + self.negative)))
        else:
            contexts = []
            for contextIndex in xrange(start, stop, step):
                context = self[contextIndex][0]
                contexts.append(context)

        contexts = numpy.asarray(contexts, dtype='int32')

        return contexts