def getContextsShape(self): with open(self.contextsFilePath) as contextsFile: contextsCount = binary.readi(contextsFile) contextSize = binary.readi(contextsFile) negative = binary.readi(contextsFile) return contextsCount, contextSize, negative
def loadEmbeddings(embeddingsFilePath): with open(embeddingsFilePath, 'rb') as embeddingsFile: embeddingsCount = binary.readi(embeddingsFile) embeddingSize = binary.readi(embeddingsFile) embeddings = numpy.empty((embeddingsCount, embeddingSize)).astype('float32') for embeddingIndex in range(0, embeddingsCount): embedding = binary.readf(embeddingsFile, embeddingSize) embeddings[embeddingIndex] = embedding log.progress('Loading embeddings: {0:.3f}%.', embeddingIndex + 1, embeddingsCount) log.info('Loading embeddings complete. {0} embeddings loaded.', embeddingsCount) return embeddings
def loadMap(indexMapFilePath, inverse=False): vocabulary = collections.OrderedDict() with open(indexMapFilePath, 'rb') as indexMapFile: itemsCount = binary.readi(indexMapFile) for itemIndex in range(0, itemsCount): wordLength = binary.readi(indexMapFile) word = binary.reads(indexMapFile, wordLength) index = binary.readi(indexMapFile) if inverse: vocabulary[index] = word else: vocabulary[word] = index log.progress('Loading word map: {0:.3f}%.', itemIndex + 1, itemsCount) log.info('Loading word map complete. {0} words loaded.', itemsCount) return vocabulary
def getContexts(self, start, stop, step): if step == 1: with open(self.contextsFilePath) as contextsFile: count = stop - start contextSize = self.windowSize + self.negative contextsSize = count * contextSize contextBufferSize = contextSize * 4 # 12 for sizeof(contextsCount) + sizeof(contextSize) + sizeof(negative) startPosition = start * contextBufferSize + 12 contextsFile.seek(startPosition, io.SEEK_SET) contexts = binary.readi(contextsFile, contextsSize) contexts = numpy.reshape(contexts, (count, (self.windowSize + self.negative))) else: contexts = [] for contextIndex in xrange(start, stop, step): context = self[contextIndex][0] contexts.append(context) contexts = numpy.asarray(contexts, dtype='int32') return contexts