def __init__(self, source_path): ''' Create the Neighborhood, for finding nearest neighbors. Args: source_path (string): path to a bcolz database with three carray columns: 'id', 'vector' and 'norm' ''' self.source_path = source_path # open bcolz datastores self.vectors = bvec.carray(rootdir=source_path + "/vector") self.norms = bvec.carray(rootdir=source_path + "/norm") self.source_table = bcolz.ctable(rootdir=source_path) #print("Created similarity object from BCOLZ files: source {0}; target: {1}".format(source_path, target_path)) # create similarity object self.similarity = sim.Similarity(self.vectors, self.norms) # create domain <-> index maps # dictionary taking ids to indeces (source) self.id_index_map = self._create_id_index_map(self.source_table) self.index_id_map = self._create_index_id_map(self.source_table)
def empty_like(self, shape=None, chunklen=None, cparams=None, rootdir=None): ''' Create an empty bvec.carray container matching this one, with optional modifications. ''' p_dtype = self.dtype if shape == None: shape = self.shape if cparams == None: cparams = self.cparams if(len(shape) == 1): result_template = np.ndarray(shape=(0), dtype=p_dtype) return bvec.carray(result_template, expectedlen=shape[0], chunklen=chunklen, cparams=cparams, rootdir=rootdir) elif(len(self.shape) == 2): result_template = np.ndarray((0, shape[1]), dtype=p_dtype) return bvec.carray(result_template, expectedlen=shape[0], chunklen=chunklen, cparams=cparams, rootdir=rootdir) else: raise ValueError("Can't create a carray like that. Only one and two dimensions supported.")