def import_from_sweeps(db, sdss_tabname, sweep_files, create=False, all=False): """ Import an SDSS catalog from a collection of SDSS sweep files. Note: Assumes underlying shared storage for all output table cells (i.e., any worker is able to write to any cell). """ if create: # Create the new database sdss_table = db.create_table(sdss_tabname, sdss_table_def) else: sdss_table = db.table(sdss_tabname) t0 = time.time() at = 0 ntot = 0 pool = pool2.Pool() for (file, nloaded, nin) in pool.imap_unordered(sweep_files, import_from_sweeps_aux, (db, sdss_tabname, all), progress_callback=pool2.progress_pass): at = at + 1 ntot = ntot + nloaded t1 = time.time() time_pass = (t1 - t0) / 60 time_tot = time_pass / at * len(sweep_files) sfile = "..." + file[-67:] if len(file) > 70 else file print( ' ===> Imported %-70s [%d/%d, %5.2f%%] +%-6d %9d (%.0f/%.0f min.)' % (sfile, at, len(sweep_files), 100 * float(at) / len(sweep_files), nloaded, ntot, time_pass, time_tot)) del pool
def get_cells(self, bounds=None, return_bounds=False, include_cached=True): """ Return a list of (cell_id, bounds) tuples completely covering the requested bounds. bounds must be a list of (Polygon, intervalset) tuples. Output is a list of (cell_id, xybounds, tbounds) tuples, unless return_bounds=False when the output is just a list of cell_ids. """ self._include_cached = include_cached # Special case of bounds=None (all sky) if bounds == None: bounds = [(bn.ALLSKY, intervalset((-np.inf, np.inf)))] # Find all existing cells satisfying the bounds cells = defaultdict(dict) if False: # Single-threaded implementation for bounds_xy, bounds_t in bounds: self._get_cells_recursive(cells, bounds_xy, bounds_t, 0, 0, 1, 1.) self._get_cells_recursive(cells, bounds_xy, bounds_t, 0, 1, 1, 1.) self._get_cells_recursive(cells, bounds_xy, bounds_t, 1, 0, 1, 1.) self._get_cells_recursive(cells, bounds_xy, bounds_t, 1, 1, 1, 1.) else: # Multi-process implementation (appears to be as good or better than single thread in # nearly all cases of interest) pool = pool2.Pool() lev = min(4, self._pix.level) ij = np.indices( (2**lev, 2**lev)).reshape(2, -1).T # List of i,j coordinates for cells_ in pool.imap_unordered( ij, _get_cells_kernel, (lev, self, bounds), progress_callback=pool2.progress_pass): for cell_id, b in cells_.iteritems(): for xyb, tb in b.iteritems(): _add_bounds(cells, cell_id, xyb, tb) del pool if len(cells): # Transform (x, y, t) tuples to cell_ids xyt = np.array(cells.keys()).transpose() cell_ids = self._pix._cell_id_for_xyt(xyt[0], xyt[1], xyt[2]) # Reorder cells to be a dict of cell: [(poly, time), (poly, time)] entries cells = dict( ((cell_id, v.items()) for (cell_id, (k, v)) in izip(cell_ids, cells.iteritems()))) if not return_bounds: return cells.keys() else: return cells
def compute_counts_aux(db, tabname, cells, progress_callback=None): ntotal = 0 pool = pool2.Pool() for nobjects in pool.map_reduce_chain(cells, [(ls_mapper, db, tabname)], progress_callback=progress_callback): ntotal += nobjects return ntotal
def import_from_dvo(catdir, dvo_files, create=False): """ Import a PS1 catalog from DVO Note: Assumes underlying shared storage for all catalog cells (i.e., any worker is able to write to any cell). """ if create: # Create the new database cat = catalog.Table(catdir, name='ps1', mode='c') cat.create_cgroup('astrometry', { 'columns': to_dtype(astromCols), 'primary_key': 'id', 'spatial_keys': ('ra', 'dec'), "cached_flag": "cached" }) cat.create_cgroup('photometry', { 'columns': to_dtype(photoCols) }) cat.create_cgroup('import', { 'columns': [ ('file_id', 'a20'), ('hdr', 'i8'), ('cksum', 'a32'), ('imageid', '64i8'), ('blobarr', '64i8'), ], 'blobs': [ 'hdr', 'blobarr' ] }) else: cat = catalog.Table(catdir) t0 = time.time() at = 0; ntot = 0 pool = pool2.Pool() for (file, nloaded, nin) in pool.imap_unordered(dvo_files, import_from_dvo_aux, (cat,)): #for (file, nloaded, nin) in imap(lambda file: import_from_dvo_aux(file, cat), dvo_files): at = at + 1 ntot = ntot + nloaded t1 = time.time() time_pass = (t1 - t0) / 60 time_tot = time_pass / at * len(dvo_files) print(' ===> Imported %s [%d/%d, %5.2f%%] +%-6d %9d (%.0f/%.0f min.)' % (file, at, len(dvo_files), 100 * float(at) / len(dvo_files), nloaded, ntot, time_pass, time_tot))
def _update(self, table_path, snapid): # Find what we already have loaded prevsnap = np.max( self._leaves['snapid']) if len(self._leaves) > 2 else None assert prevsnap <= snapid, "Cannot update a catalog to an older snapshot" ## Enumerate all existing snapshots older or equal to snapid, and newer than prevsnap, and sort them, newest first self.__snapshots = sorted(isnapshots(table_path, first=prevsnap, last=snapid, no_first=True, return_path=True), reverse=True) # Add the snapid snapshot by hand, if not in the list of committed snapshots # (because it is permissible to update to a yet-uncommitted snapshot) if len(self.__snapshots) == 0 or self.__snapshots[0][0] != snapid: self.__snapshots.insert( 0, (snapid, get_snapshot_path(table_path, snapid))) # Recursively scan, in parallel w = bhpix.width(self._pix.level) bmap = np.zeros((w, w), dtype=object) lev = min(4, self._pix.level) dx = bhpix.pix_size(lev) i, j = np.indices( (2**lev, 2**lev)).reshape(2, -1) # List of i,j coordinates w2 = 1 << (lev - 1) x, y = (i - w2 + 0.5) * dx, (j - w2 + 0.5) * dx pool = pool2.Pool() for bmap2 in pool.imap_unordered(zip(x, y), _scan_recursive_kernel, (lev, self)): assert not np.any((bmap != 0) & (bmap2 != 0)) mask = bmap2 != 0 bmap[mask] = bmap2[mask] del pool # Add data about cells that were not touched by this update bmap_cur = self._bmaps[self._pix.level] mask_cur = (bmap_cur != 0) & (bmap == 0) lists_cur = [[(mjd, snapid, cell_id, next > 0) for (mjd, snapid, cell_id, next) in iter_siblings(self._leaves, offs)] for offs in bmap_cur[mask_cur]] try: bmap[mask_cur] = lists_cur except ValueError: # Workaround for a numpy 1.6.0 bug (http://projects.scipy.org/numpy/ticket/1870) coords = np.mgrid[ 0:w, 0: w][:, mask_cur].T # List of coordinates corresponding to True entries in mask_cur for (ii, jj), vv in izip(coords, lists_cur): bmap[ii, jj] = vv # Repack the temporal siblings to a single numpy array, emulating a linked list lists = bmap[bmap != 0] llens = np.fromiter((len(l) for l in lists), dtype=np.int32) leaves = np.empty(np.sum(llens) + 2, dtype=[('mjd', 'f4'), ('snapid', object), ('cell_id', 'u8'), ('next', 'i4')]) leaves[:2] = [ (np.inf, 0, 0, END_MARKER) ] * 2 # We start with two dummy entries, so that offs=0 and 1 are invalid and can take other meanings. seen = dict() at = 2 for l in lists: last_i = len(l) - 1 for (i, (mjd, snapid, cell_id, has_data)) in enumerate(l): # Make equal strings refer to the same string object try: snapid = seen[snapid] except KeyError: seen[snapid] = snapid next = 1 if has_data else -1 if i == last_i: next *= END_MARKER leaves[at] = (mjd, snapid, cell_id, next) at += 1 # Construct bmap that has offsets to head of the linked list of siblings offs = np.zeros(len(llens), dtype=np.int64) offs[1:] = np.cumsum(llens)[:-1] offs += 2 # Pointers to beginnings of individual lists assert np.all(abs(leaves['next'][offs - 1]) == END_MARKER) obmap = np.zeros(bmap.shape, dtype=np.int32) obmap[bmap != 0] = offs # Recompute mipmaps bmaps = self._compute_mipmaps(obmap) return bmaps, leaves
def make_object_catalog(db, obj_tabname, det_tabname, exp_tabname, radius=1. / 3600., explist=None, oldexps=None, fovradius=None): """ Create the object catalog """ # For debugging -- a simple check to see if matching works is to rerun # the match across a just matched table. In this case, we expect # all detections to be matched to existing objects, and no new ones added. _rematching = int(os.getenv('REMATCHING', False)) o2d_tabname = '_%s_to_%s' % (obj_tabname, det_tabname) det_table = db.table(det_tabname) obj_table = db.table(obj_tabname) o2d_table = db.table(o2d_tabname) # Fetch all non-empty cells with detections. Group them by the same spatial # cell ID. This will be the list over which the first kernel will map. if explist is None: det_cells = det_table.get_cells() else: # Fetch only those cells that can contain data from the given exposure list print >> sys.stderr, "Enumerating cells with new detections: ", det_cells = get_cells_with_dets_from_exps(db, explist, exp_tabname, det_tabname, fovradius) print >> sys.stderr, "%d cells to process." % (len(det_cells)) det_cells_grouped = det_table.pix.group_cells_by_spatial(det_cells).items() t0 = time.time() pool = pool2.Pool() ntot = 0 ntotobj = 0 at = 0 for (nexp, nobj, ndet, nnew, nmatch, ndetnc) in pool.map_reduce_chain( det_cells_grouped, [ (_obj_det_match, db, obj_tabname, det_tabname, o2d_tabname, radius, explist, _rematching), ], progress_callback=pool2.progress_pass): at += 1 if nexp is None: continue t1 = time.time() time_pass = (t1 - t0) / 60 time_tot = time_pass / at * len(det_cells) ntot += nmatch ntotobj += nnew nobjnew = nobj + nnew pctnew = 100. * nnew / nobjnew if nobjnew else 0. pctmatch = 100. * nmatch / ndetnc if ndetnc else 0. print " match %7d det to %7d obj (%3d exps): %7d new (%6.2f%%), %7d matched (%6.2f%%) [%.0f/%.0f min.]" % ( ndet, nobj, nexp, nnew, pctnew, nmatch, pctmatch, time_pass, time_tot) # Save the list of exposures that has been matched to the object table if oldexps is not None and len(explist): allexps = set(oldexps) | set(explist) uri = "lsd:%s:cache:all_matched_exposures.txt" % obj_tabname with db.open_uri(uri, mode='w') as f: for exp in allexps: f.write("%d\n" % exp) print "Matched a total of %d sources." % (ntot) print "Total of %d objects added." % (ntotobj)
def import_from_smf(db, det_tabname, exp_tabname, smf_files, survey, create=False): """ Import a PS1 table from DVO Note: Assumes underlying shared storage for all table cells (i.e., any worker is able to write to any cell). """ with locking.lock(db.path[0] + "/.__smf-import-lock.lock"): if not db.table_exists(det_tabname) and create: # Set up commit hooks exp_table_def['commit_hooks'] = [ ('Updating neighbors', 1, 'lsd.smf', 'make_image_cache', [det_tabname]) ] # Create new tables det_table = db.create_table(det_tabname, det_table_def) exp_table = db.create_table(exp_tabname, exp_table_def) # Set up a one-to-X join relationship between the two tables (join det_table:exp_id->exp_table:exp_id) db.define_default_join(det_tabname, exp_tabname, type='indirect', m1=(det_tabname, "det_id"), m2=(det_tabname, "exp_id"), _overwrite=create) else: det_table = db.table(det_tabname) exp_table = db.table(exp_tabname) det_c2f = gen_tab2fits(det_table_def) exp_c2f = gen_tab2fits(exp_table_def) t0 = time.time() at = 0 ntot = 0 pool = pool2.Pool() smf_fns = [] exp_ids = [] for (file, exp_id, smf_fn, nloaded, nin) in pool.imap_unordered( smf_files, import_from_smf_aux, (det_table, exp_table, det_c2f, exp_c2f, survey), progress_callback=pool2.progress_pass): smf_fns.append(smf_fn) exp_ids.append(exp_id) at = at + 1 ntot = ntot + nloaded t1 = time.time() time_pass = (t1 - t0) / 60 time_tot = time_pass / at * len(smf_files) print >> sys.stderr, ' ===> Imported %s [%d/%d, %5.2f%%] +%-6d %9d (%.0f/%.0f min.)' % ( file, at, len(smf_files), 100 * float(at) / len(smf_files), nloaded, ntot, time_pass, time_tot) del pool ret = colgroup.ColGroup() ret._EXP = np.array(exp_ids, dtype=np.uint64) ret.smf_fn = np.array(smf_fns, dtype='a40') return ret