def __init__(self, data, depth, n_trees, projection_sparsity='auto', shape=None, mmap=False): """ Initializes an MRPT index object. :param data: Input data either as a NxDim numpy ndarray or as a filepath to a binary file containing the data :param depth: The depth of the trees :param n_trees: The number of trees used in the index :param projection_sparsity: Expected ratio of non-zero components in a projection matrix :param shape: Shape of the data as a tuple (N, dim). Needs to be specified only if loading the data from a file. :param mmap: If true, the data is mapped into memory. Has effect only if the data is loaded from a file. :return: """ if isinstance(data, np.ndarray): if len(data) == 0 or len(data.shape) != 2: raise ValueError( "The data matrix should be non-empty and two-dimensional") if data.dtype != np.float32: raise ValueError("The data matrix should have type float32") if not data.flags['C_CONTIGUOUS'] or not data.flags['ALIGNED']: raise ValueError( "The data matrix has to be C_CONTIGUOUS and ALIGNED") n_samples, dim = data.shape elif isinstance(data, str): if not isinstance(shape, tuple) or len(shape) != 2: raise ValueError( "You must specify the shape of the data as a tuple (N, dim) " "when loading data from a binary file") n_samples, dim = shape else: raise ValueError("Data must be either an ndarray or a filepath") max_depth = np.ceil(np.log2(n_samples)) if not 1 <= depth <= max_depth: raise ValueError("Depth should be in range [1, %d]" % max_depth) if n_trees < 1: raise ValueError("Number of trees must be positive") if projection_sparsity == 'auto': projection_sparsity = 1. / np.sqrt(dim) elif projection_sparsity is None: projection_sparsity = 1 elif not 0 < projection_sparsity <= 1: raise ValueError("Sparsity should be in (0, 1]") if mmap and os.name == 'nt': raise ValueError("Memory mapping is not available on Windows") self.index = mrptlib.MrptIndex(data, n_samples, dim, depth, n_trees, projection_sparsity, mmap) self.built = False
def __init__(self, data, shape=None, mmap=False): """ Initializes an MRPT index object. :param data: Input data either as a NxDim numpy ndarray or as a filepath to a binary file containing the data. :param shape: Shape of the data as a tuple (N, dim). Needs to be specified only if loading the data from a file. :param mmap: If true, the data is mapped into memory. Has effect only if the data is loaded from a file. :return: """ if isinstance(data, np.ndarray): if len(data) == 0 or len(data.shape) != 2: raise ValueError( "The data matrix should be non-empty and two-dimensional") if data.dtype != np.float32: raise ValueError("The data matrix should have type float32") if not data.flags['C_CONTIGUOUS'] or not data.flags['ALIGNED']: raise ValueError( "The data matrix has to be C_CONTIGUOUS and ALIGNED") n_samples, dim = data.shape elif isinstance(data, str): if not isinstance(shape, tuple) or len(shape) != 2: raise ValueError( "You must specify the shape of the data as a tuple (N, dim) " "when loading data from a binary file") n_samples, dim = shape elif data is not None: raise ValueError("Data must be either an ndarray or a filepath") if mmap and os_name == 'nt': raise ValueError("Memory mapping is not available on Windows") if data is not None: self.index = mrptlib.MrptIndex(data, n_samples, dim, mmap) self.dim = dim self.built = False self.autotuned = False