def run_test(self, query_func, npoints=1e6, points=None): """ test the database cross matching it with a set of fake points on the sphere. Parameters: ----------- query_func: callable function that is used to query the catalog. It must accept ra and dec, and the collecion as the arguments: objs = query_func(ra, dec) npoints: `int` number of MC generated points to be used for the macthing. points: `None` or list/array-like use this argument to provide points to be used for the testing, skipping the MC generation. points should be array like and have this structure: [[ra1, dec1], [ra2, dec2], ...] Returns: -------- av_query_time: `float` average query time for npoints queries. """ # method specific import import tqdm # generate the random sample of points if points is None: from extcats.catquery_utils import random_point_sphere points = random_point_sphere(int(npoints)) self.logger.info("running test queries using %d random points" % (npoints)) else: npoints = len(points) self.logger.info( "running test queries using %d user defined points" % (npoints)) # measure query time tot_found = 0 start = time.time() for pp in tqdm.tqdm(points): buff = query_func(ra=pp[0], dec=pp[1], coll=self.coll) if not buff is None: tot_found += 1 end = time.time() total = end - start av_query_time = total / float(npoints) self.logger.info("Total document found for query: %d" % tot_found) self.logger.info( "Took %.2e sec for %d random queries. Average query time: %.3e sec" % (total, int(npoints), av_query_time))
def test_queries(self, query_type, method, rs_arcsec, npoints=1e4, points=None, rnd_seed=42, **qfunc_args): """ run test queries using a set of uniformly distributed points on a sphere as targets. Parameters: ----------- query_type: `str` identifier for the type of query you want to test. Can either be 'within' (for the findwithin query), 'closest' (for the findclosest), or 'binary' (for the isanything query). method: `str` how to query the database. If - healpix: use the healpix index of the catalog (findwithin_HEALPix) - 2dsphere: use mongodb searches in spherical geometry (findwithin_2Dsphere). In this case a value for the sphere2d_key has to be passed to the function. - raw: run a javascript function to compute the angular distances (use findwithin_RAW) rs_arcsec: `float` search radius in arcseconds. npoints: `int` number of MC generated points to be used for the macthing. points: `None` or list/array-like use this argument to provide points to be used for the testing, skipping the MC generation. points should be array like and have this structure: [[ra1, dec1], [ra2, dec2], ...] rnd_seed: `float` or None if not None, this seed will be passed to np.random. Returns: -------- av_query_time: `float` average query time for npoints queries measured as (start-stop)/npoints """ # method specific imports import time, tqdm, astropy # generate the random sample of points if none is given if points is None: from extcats.catquery_utils import random_point_sphere points = random_point_sphere(int(npoints), rnd_seed) self.logger.info("running test queries using %d random points" % (npoints)) else: npoints = len(points) self.logger.info( "running test queries using %d user defined points" % (npoints)) # set up the query function if 'within' in query_type: qfunc = self.findwithin elif 'closest' in query_type: qfunc = self.findclosest elif 'binary' in query_type: qfunc = self.binaryserach else: raise ValueError( "illegal value for parameter 'query_type': %s. Allowed: 'within', 'closest', 'binary'." % query_type) self.logger.info( "running %d test queries using function: %s and method: %s" % (npoints, qfunc.__name__, method)) # measure query time tot_found = 0 start = time.time() for pp in tqdm.tqdm(points): buff = qfunc(pp[0], pp[1], rs_arcsec, method, **qfunc_args) if (type(buff) == tuple and buff == (None, None)) or (buff is None) or (not buff): continue tot_found += 1 end = time.time() total = end - start av_query_time = total / float(npoints) self.logger.info("Total document found in queries: %d" % tot_found) self.logger.info( "Took %.2e sec for %d random queries. Average query time: %.3e sec\n" % (total, int(npoints), av_query_time))