示例#1
0
    def nn_radius(self, query, radius, **kwargs):

        if self.__curindex is None:
            raise FLANNException(
                'build_index(...) method not called first or current index deleted.')

        if query.dtype.type not in allowed_types:
            raise FLANNException('Cannot handle type: %s' % query.dtype)

        if self.__curindex_type != query.dtype.type:
            raise FLANNException('Index and query must have the same type')

        npts, dim = self.__curindex_data.shape
        assert query.shape[0] == dim, 'data and query must have the same dims'

        result = np.empty(npts, dtype=index_type)
        if self.__curindex_type == np.float64:
            dists = np.empty(npts, dtype=np.float64)
        else:
            dists = np.empty(npts, dtype=np.float32)

        self.__flann_parameters.update(kwargs)

        nn = flann.radius_search[
            self.__curindex_type](
            self.__curindex, query, result, dists, npts, radius,
            pointer(self.__flann_parameters))

        return (result[0:nn], dists[0:nn])
示例#2
0
    def execute(self):

        if self.options.input_file == None:
            raise FLANNException("No input file given.")
        if self.options.algorithm == None:
            raise FLANNException("No algorithm specified")
        if self.options.test_file == None:
            raise FLANNException("No test file given.")
        if self.options.output_file == None:
            raise FLANNException("No output file given.")

        print 'Reading input dataset from', self.options.input_file
        dataset = read(self.options.input_file)

        flann = FLANN(log_level=self.options.log_level)
        flann.build_index(dataset,
                          algorithm=self.options.algorithm,
                          trees=self.options.trees,
                          branching=self.options.branching,
                          iterations=self.options.max_iterations,
                          centers_init=self.options.centers_init)

        print 'Reading test dataset from', self.options.test_file
        testset = read(self.options.test_file)

        print "Searching for nearest neighbors"
        matches, dists = flann.nn_index(testset,
                                        self.options.nn,
                                        checks=self.options.checks)

        print "Writing matches to", self.options.output_file
        write(matches, self.options.output_file, format="dat")
示例#3
0
    def execute(self):
        self.nn = FLANN(log_level=self.options.log_level)

        if self.options.input_file == None:
            raise FLANNException("No input file given.")
        print 'Reading input dataset from', self.options.input_file
        self.dataset = read(self.options.input_file)

        if self.options.precision < 0 or self.options.precision > 1:
            raise FLANNException(
                "The precision argument must be between 0 and 1.")
        params = self.nn.build_index(
            self.dataset,
            target_precision=self.options.precision,
            build_weight=self.options.build_weight,
            memory_weight=self.options.memory_weight,
            sample_fraction=self.options.sample_fraction)

        if self.options.params_file != None:
            params_stream = open(self.options.params_file, "w")
        else:
            params_stream = sys.stdout
        configdict = ConfigParser()
        configdict.add_section('params')
        for (k, v) in params.items():
            configdict.set('params', k, v)
        configdict.write(params_stream)
示例#4
0
    def load_index(self, filename, pts):
        """
        Loads an index previously saved to disk.
        """

        if pts.dtype.type not in allowed_types:
            raise FLANNException('Cannot handle type: %s' % pts.dtype)

        pts = ensure_2d_array(pts, default_flags)
        npts, dim = pts.shape

        if self.__curindex is not None:
            flann.free_index[self.__curindex_type](
                self.__curindex, pointer(self.__flann_parameters))
            self.__curindex = None
            self.__curindex_data = None
            self.__added_data = []
            self.__curindex_type = None

        self.__curindex = flann.load_index[pts.dtype.type](
            c_char_p(to_bytes(filename)), pts, npts, dim)

        if self.__curindex is None:
            raise FLANNException(
                ('Error loading the FLANN index with filename=%r.'
                 ' C++ may have thrown more detailed errors') % (filename,))

        self.__curindex_data = pts
        self.__added_data = []
        self.__removed_ids = []
        self.__curindex_type = pts.dtype.type
示例#5
0
    def execute(self):
        if self.options.input_file == None:
            raise FLANNException("Need an input file")
        print "Reading input data from file " + self.options.input_file
        dataset = read(self.options.input_file)

        if self.options.test_file == None:
            raise FLANNException("Need a test file")
        if isfile(self.options.test_file):
            print "Reading test data from file " + self.options.test_file
            testset = read(self.options.test_file)
        else:
            print "Sampling test file"
            testset = sample_dataset(dataset, self.options.count, remove=True)
            dataset = dataset[0:dataset.shape[0] - self.options.count]
            print "Wrinting new dataset file"
            write(dataset, "new_" + self.options.input_file)
            print "Writing testset file"
            write(testset, self.options.test_file)

        print "Computing ground truth"

        start = time.clock()
        match = compute_ground_truth(dataset, testset, self.options.nn)
        print "It took %g seconds" % (time.clock() - start)

        print "Writing match file"
        write(match, self.options.match_file, format="dat")
示例#6
0
def save(dataset, filename):
    if not isinstance(dataset, numpy.ndarray):
        raise FLANNException("Dataset must be in numpy format")
    try:
        numpy.save(filename, dataset)
    except:
        raise FLANNException(
            "Format not supported. You need at least numpy version 1.1")
示例#7
0
def write(dataset, filename):
    if not isinstance(dataset, numpy.ndarray):
        raise FLANNException("Can only save numpy arrays")

    try:
        numpy.save(filename, dataset)
    except:
        raise FLANNException(
            "Format not supported. You need at least numpy version 1.1")
示例#8
0
 def execute(self):
     if self.options.input_file==None:
         raise FLANNException("Need an input file")
     if self.options.output_file==None:
         raise FLANNException("Need an output file")
     print "Reading input data from file "+self.options.input_file
     dataset = read(self.options.input_file, dtype=numpy.dtype(self.options.dtype))
     print "Writing to file %s"%self.options.output_file
     write(dataset,self.options.output_file, format=self.options.format)
         
示例#9
0
    def nn_index(self, qpts, num_neighbors=1, **kwargs):
        """
        For each point in querypts, (which may be a single point), it
        returns the num_neighbors nearest points in the index built by
        calling build_index.
        """

        if self.__curindex is None:
            raise FLANNException(
                'build_index(...) method not called first or current index deleted.'
            )

        if qpts.dtype.type not in allowed_types:
            raise FLANNException('Cannot handle type: %s' % qpts.dtype)

        if self.__curindex_type != qpts.dtype.type:
            raise FLANNException('Index and query must have the same type')

        qpts = ensure_2d_array(qpts, default_flags)

        npts, dim = self.get_indexed_shape()

        if qpts.size == dim:
            qpts.reshape(1, dim)

        nqpts = qpts.shape[0]

        assert qpts.shape[1] == dim, 'data and query must have the same dims'
        assert npts >= num_neighbors, 'more neighbors than there are points'

        result = np.empty((nqpts, num_neighbors), dtype=index_type)
        if self.__curindex_type == np.float64:
            dists = np.empty((nqpts, num_neighbors), dtype=np.float64)
        else:
            dists = np.empty((nqpts, num_neighbors), dtype=np.float32)

        self.__flann_parameters.update(kwargs)

        flann.find_nearest_neighbors_index[self.__curindex_type](
            self.__curindex,
            qpts,
            nqpts,
            result,
            dists,
            num_neighbors,
            pointer(self.__flann_parameters),
        )

        if num_neighbors == 1:
            return (result.reshape(nqpts), dists.reshape(nqpts))
        else:
            return (result, dists)
示例#10
0
 def add_points(self, new_pts, rebuild_threshold=2):
     """
     Adds pts to the current index. If the number of added points is more
     than a factor of rebuild_threshold larger than the original number of
     points, the index is rebuilt.
     """
     if new_pts.dtype.type not in allowed_types:
         raise FLANNException('Cannot handle type: %s' % new_pts.dtype)
     if new_pts.dtype != self.__curindex_type:
         raise FLANNException('New points must have the same type')
     new_pts = ensure_2d_array(new_pts, default_flags)
     rows = new_pts.shape[0]
     flann.add_points[self.__curindex_type](self.__curindex, new_pts, rows, rebuild_threshold)
     self.__added_data.append(new_pts)
示例#11
0
    def nn(self, pts, qpts, num_neighbors=1, **kwargs):
        """
        Returns the num_neighbors nearest points in dataset for each point
        in testset.
        """

        if pts.dtype.type not in allowed_types:
            raise FLANNException('Cannot handle type: %s' % pts.dtype)

        if qpts.dtype.type not in allowed_types:
            raise FLANNException('Cannot handle type: %s' % pts.dtype)

        if pts.dtype != qpts.dtype:
            raise FLANNException('Data and query must have the same type')

        pts = ensure_2d_array(pts, default_flags)
        qpts = ensure_2d_array(qpts, default_flags)

        npts, dim = pts.shape
        nqpts = qpts.shape[0]

        assert qpts.shape[1] == dim, 'data and query must have the same dims'
        assert npts >= num_neighbors, 'more neighbors than there are points'

        result = np.empty((nqpts, num_neighbors), dtype=index_type)
        if pts.dtype == np.float64:
            dists = np.empty((nqpts, num_neighbors), dtype=np.float64)
        else:
            dists = np.empty((nqpts, num_neighbors), dtype=np.float32)

        self.__flann_parameters.update(kwargs)

        flann.find_nearest_neighbors[pts.dtype.type](
            pts,
            npts,
            dim,
            qpts,
            nqpts,
            result,
            dists,
            num_neighbors,
            pointer(self.__flann_parameters),
        )

        if num_neighbors == 1:
            return (result.reshape(nqpts), dists.reshape(nqpts))
        else:
            return (result, dists)
示例#12
0
    def kmeans(self, pts, num_clusters, max_iterations=None,
               dtype=None, **kwargs):
        """
        Runs kmeans on pts with num_clusters centroids.  Returns a
        numpy array of size num_clusters x dim.

        If max_iterations is not None, the algorithm terminates after
        the given number of iterations regardless of convergence.  The
        default is to run until convergence.

        If dtype is None (the default), the array returned is the same
        type as pts.  Otherwise, the returned array is of type dtype.

        """

        if int(num_clusters) != num_clusters or num_clusters < 1:
            raise FLANNException('num_clusters must be an integer >= 1')

        if num_clusters == 1:
            if dtype is None or dtype == pts.dtype:
                return np.mean(pts, 0).reshape(1, pts.shape[1])
            else:
                return dtype(np.mean(pts, 0).reshape(1, pts.shape[1]))

        return self.hierarchical_kmeans(pts, int(num_clusters), 1,
                                        max_iterations,
                                        dtype, **kwargs)
示例#13
0
def load(filename, rows=-1, cols=-1, dtype=numpy.float32):
    try:
        tmp = numpy.save
    except:
        raise FLANNException(
            "Format not supported. You need at least numpy version 1.1")
    data = numpy.load(filename)
    return data
示例#14
0
文件: dataset.py 项目: songyining/rcc
def write(dataset, filename, format="bin"):
    if format == "bin":
        binary_dataset.write(dataset, filename)
    elif format == "dat":
        dat_dataset.write(dataset, filename)
    elif format == "npy":
        npy_dataset.write(dataset, filename)
    else:
        raise FLANNException("Error: Unknown dataset format")
示例#15
0
def save(dataset, filename, format=None, **kwargs):
    try:
        if format is None:
            basename, extension = os.path.splitext(filename)
            format = extension[1:]
        handler = dataset_formats[format]
        handler.save(dataset, filename, **kwargs)
    except Exception as e:
        raise FLANNException(e)
示例#16
0
 def save(dataset, filename, **kwargs):
     if not isinstance(dataset, numpy.ndarray):
         raise FLANNException("Dataset must be in numpy format")
     try:
         if 'title' in kwargs:
             title_name = kwargs['title']
         else:
             title_name = "Dataset saved by pyflann"
         if 'dataset_name' in kwargs:
             dataset_name = kwargs['dataset_name']
         else:
             dataset_name = 'dataset'
         h5file = h5py.File(filename)
         h5file.create_dataset(dataset_name, data=dataset)
         h5file.close()
     except Exception, e:
         h5file.close()
         raise FLANNException(e)
示例#17
0
def save(dataset, filename):
    if not isinstance(dataset, numpy.ndarray):
        raise FLANNException("Dataset must be in numpy format")

    with open(filename + ".meta", 'w') as fd_meta:
        fd_meta.write(\
"""BINARY
%d
%d
%s"""%(dataset.shape[0],dataset.shape[1],dataset.dtype.name))

    dataset.tofile(filename)
示例#18
0
    def add_points(self, pts, rebuild_threshold=2):
        """
        Adds points to pre-built index.

        Params:
            pts: 2D numpy array of points.
            rebuild_threshold: reallocs index when it grows by factor of
                `rebuild_threshold`. A smaller value results is more space
                efficient but less computationally efficient. Must be greater
                than 1.
        """
        if pts.dtype.type not in allowed_types:
            raise FLANNException('Cannot handle type: %s' % pts.dtype)
        if pts.dtype.type != self.__curindex_type:
            raise FLANNException('New points must have the same type')
        pts = ensure_2d_array(pts, default_flags)

        npts = pts.shape[0]
        flann.add_points[self.__curindex_type](self.__curindex, pts, npts,
                                               rebuild_threshold)
        self.__curindex_data = np.row_stack((self.__curindex_data, pts))
        self.__added_data.append(pts)
示例#19
0
def write(dataset, filename):
    if not isinstance(dataset, numpy.ndarray):
        raise FLANNException("Can only save numpy arrays")

    with open(filename, 'w') as fd_meta:
        fd_meta.write(\
"""BINARY
%s
%d
%d
%s"""%(filename+".bin",dataset.shape[1],dataset.shape[0],dataset.dtype.name))
    with open(filename + ".bin", 'wb') as fd:
        fwrite(fd, dataset.size, dataset)
示例#20
0
 def execute(self):
     if self.options.count > 0 and self.options.length > 0 and self.options.filename != None:
         print "Saving a random (%d,%d) matrix in file %s... " % (
             self.options.count, self.options.length,
             self.options.filename),
         stdout.flush()
         data = float32(random((self.options.count, self.options.length)))
         write(data, self.options.filename)
         print "done"
     else:
         raise FLANNException(
             "Error: Incorrect arguments specified (a filename must be given and the count and length must be positive)"
         )
示例#21
0
    def execute(self):
        if self.options.input_file == None:
            raise FLANNException("Need an input file")
        print "Reading input data from file " + self.options.input_file
        dataset = read(self.options.input_file)

        if self.options.count > 0:
            print "Sampling %d features" % self.options.count
            sampledset = sample_dataset(dataset, self.options.count)

            print "Writing sampled dataset to file %s" % self.options.save_file
            write(sampledset,
                  self.options.save_file,
                  format=self.options.format)
示例#22
0
文件: dataset.py 项目: songyining/rcc
def read(filename, dtype=float32):
    with open(filename, "rb") as fd:
        header = fd.read(10)

    if header[0:6] == "BINARY":
        return binary_dataset.read(filename, dtype)
    elif header[1:6] == "NUMPY":
        return npy_dataset.read(filename, dtype)
    else:
        import string
        try:
            value = float(string.split(header)[0])
            return dat_dataset.read(filename, dtype)
        except:
            raise FLANNException("Error: Unknown dataset format")
示例#23
0
    def execute(self):
        if self.options.input_file == None:
            raise FLANNException("No input file given.")
        if self.options.clusters_file == None:
            raise FLANNException("No clusters file given.")

        print 'Reading input dataset from', self.options.input_file
        dataset = read(self.options.input_file)
        print "Computing clusters"

        flann = FLANN(log_level=self.options.log_level)
        num_clusters = self.options.clusters
        branching = self.options.branching
        num_branches = (num_clusters - 1) / (branching - 1)
        clusters = flann.hierarchical_kmeans(
            dataset,
            branching,
            num_branches,
            self.options.max_iterations,
            centers_init=self.options.centers_init)

        print "Saving %d clusters to file %s" % (clusters.shape[0],
                                                 self.options.clusters_file)
        write(clusters, self.options.clusters_file, format="dat")
示例#24
0
    def execute(self):

        if self.options.input_file == None:
            raise FLANNException("No input file given.")
        if self.options.algorithm == None:
            raise FLANNException("No algorithm specified")
        if self.options.test_file == None:
            raise FLANNException("No test file given.")
        if self.options.match_file == None:
            raise FLANNException("No match file given.")

        print 'Reading input dataset from', self.options.input_file
        dataset = read(self.options.input_file)

        flann = FLANN(log_level=self.options.log_level)
        flann.build_index(dataset,
                          algorithm=self.options.algorithm,
                          trees=self.options.trees,
                          branching=self.options.branching,
                          iterations=self.options.max_iterations,
                          centers_init=self.options.centers_init)

        print 'Reading test dataset from', self.options.test_file
        testset = read(self.options.test_file)

        print 'Reading ground truth from matches from', self.options.test_file
        matches = read(self.options.match_file, dtype=int)

        if self.options.precision > 0:
            checks, time = test_with_precision(flann, dataset, testset,
                                               matches, self.options.precision,
                                               self.options.nn)
        else:
            precision, time = test_with_checks(flann, dataset, testset,
                                               matches, self.options.checks,
                                               self.options.nn)
示例#25
0
    def load(filename, rows=-1, cols=-1, dtype=numpy.float32, **kwargs):
        try:
            h5file = h5py.File(filename)
            if 'dataset_name' in kwargs:
                dataset_name = kwargs['dataset_name']
            else:
                dataset_name = 'dataset'

            for node in h5file.keys():
                if node == dataset_name:
                    data = numpy.array(h5file[node])
            h5file.close()
            return data
        except Exception, e:
            h5file.close()
            raise FLANNException(e)
示例#26
0
    def add_points(self, pts, rebuild_threshold=2.0):
        """
        Adds points to pre-built index.

        Params:
            pts: 2D numpy array of points.\n
            rebuild_threshold: reallocs index when it grows by factor of \
                `rebuild_threshold`. A smaller value results is more space \
                efficient but less computationally efficient. Must be greater \
                than 1.           
        """
        if not pts.dtype.type in allowed_types:
            raise FLANNException("Cannot handle type: %s"%pts.dtype)
        pts = ensure_2d_array(pts,default_flags) 
        npts, dim = pts.shape
        flann.add_points[self.__curindex_type](self.__curindex, pts, npts, dim, rebuild_threshold)
        self.__curindex_data = np.row_stack((self.__curindex_data,pts))
示例#27
0
    def nn_index(self, qpts, num_neighbors=1, **kwargs):
        """
        For each point in querypts, (which may be a single point), it
        returns the num_neighbors nearest points in the index built by
        calling build_index.

        """

        if self.__curindex == None:
            raise FLANNException(
                "build_index(...) method not called first or current index deleted."
            )

        npts, dim = self.__curindex_data.shape

        if qpts.size == dim:
            qpts.reshape(1, dim)

        qpts = ensure_2d_array(qpts, float32, default_flags)

        nqpts = qpts.shape[0]

        assert (qpts.shape[1] == dim)
        assert (npts >= num_neighbors)

        result = empty((nqpts, num_neighbors), dtype=index_type)
        dists = empty((nqpts, num_neighbors), dtype=float32)

        self.__flann_parameters.update(kwargs)
        checks = self.__flann_parameters['checks']

        flann.flann_find_nearest_neighbors_index(
            self.__curindex, qpts, nqpts, result, dists, num_neighbors, checks,
            pointer(self.__flann_parameters))

        if num_neighbors == 1:
            return (result.reshape(nqpts), dists.reshape(nqpts))
        else:
            return (result, dists)
示例#28
0
    def nn_radius(self, query, radius, checks, **kwargs):

        if self.__curindex == None:
            raise FLANNException(
                "build_index(...) method not called first or current index deleted."
            )

        npts, dim = self.__curindex_data.shape

        query = require(query, float32, default_flags)
        assert (query.shape[0] == dim)

        result = empty(npts, dtype=index_type)
        dists = empty(npts, dtype=float32)

        self.__flann_parameters.update(kwargs)

        nn = flann.flann_radius_search(self.__curindex, query, result, dists,
                                       npts, radius, checks,
                                       pointer(self.__flann_parameters))

        return (result[0:nn], dists[0:nn])
示例#29
0
    def load_index(self, filename, pts):
        """
        Loads an index previously saved to disk.
        """

        if pts.dtype.type not in allowed_types:
            raise FLANNException('Cannot handle type: %s' % pts.dtype)

        pts = ensure_2d_array(pts, default_flags)
        npts, dim = pts.shape

        if self.__curindex is not None:
            flann.free_index[self.__curindex_type](
                self.__curindex, pointer(self.__flann_parameters))
            self.__curindex = None
            self.__curindex_data = None
            self.__curindex_type = None

        self.__curindex = flann.load_index[pts.dtype.type](
            c_char_p(to_bytes(filename)), pts, npts, dim)
        self.__curindex_data = pts
        self.__curindex_type = pts.dtype.type
示例#30
0
    def build_index(self, pts, **kwargs):
        """
        This builds and internally stores an index to be used for
        future nearest neighbor matchings.  It erases any previously
        stored indexes, so use multiple instances of this class to
        work with multiple stored indices.  Use nn_index(...) to find
        the nearest neighbors in this index.

        pts is a 2d numpy array or matrix. All the computation is done
        in np.float32 type, but pts may be any type that is convertable
        to np.float32.
        """

        if pts.dtype.type not in allowed_types:
            raise FLANNException('Cannot handle type: %s' % pts.dtype)

        pts = ensure_2d_array(pts, default_flags)
        npts, dim = pts.shape

        self.__ensureRandomSeed(kwargs)

        self.__flann_parameters.update(kwargs)

        if self.__curindex is not None:
            flann.free_index[self.__curindex_type](
                self.__curindex, pointer(self.__flann_parameters))
            self.__curindex = None

        speedup = c_float(0)
        self.__curindex = flann.build_index[pts.dtype.type](
            pts, npts, dim, byref(speedup), pointer(self.__flann_parameters))
        self.__curindex_data = pts
        self.__curindex_type = pts.dtype.type

        params = dict(self.__flann_parameters)
        params['speedup'] = speedup.value

        return params