def not_replay_duplicate( replay: sc2reader.resources.Replay, collection_: pymongo.collection.Collection = replays_info) -> bool: ''' Verify that the replay does not exist in a collection. *Args:* - replay (sc2reader.resources.Replay): The replay being cheked - collection_ (pymongo.collection.Collection): The collection where the existance check is being performed. *Returns:* - bool: True if the replay is not in the collection, False if it is. ''' if not collection_.count_documents({'replay_name': replay.filename}, limit=1): print( f'New replay found: {Path(replay.filename).name} \n adding to replay_info collection.' ) return True else: print( f'{replay.filename} already exists in the replay_info collection.') return False
def count_items(collection: pymongo.collection.Collection, *, query: dict = None) -> int: """ Returns the number of items in a collection matching the query :param collection: Collection to query :param query: Only match objects that contain the query (or all if None) :return: """ query = {} if query is None else query return collection.count_documents(query)
def do_skip(coll_tables: pymongo.collection.Collection, raw_pdf_name: str, page_num: str, coords: str) -> bool: """ Check if document is already scanned or not. If yes, skip it """ return coll_tables.count_documents( { 'pdf_name': raw_pdf_name, 'page_num': page_num, 'coords': coords }, limit=1) != 0
def count(collection: pymongo.collection.Collection, query: Dict[Text, Any]) -> int: return collection.count_documents(query)
def exploreChunks(self, ilon_chunk: int, ilat_chunk: int, delta: int, mask_query: Union[dict, None], retrn: str, col_grid: pymongo.collection.Collection)\ -> Union[dict, pymongo.cursor.Cursor]: ''' Explore an xarray chunk and returns either the number of grid cells or the grid ids. Parameters ---------- ilon_chunk : int Longitude of the upper-left bounding box corner. ilon_chunk : int Latitude of the upper-left bounding box corner. delta : int Width and height of the bounding box (in degrees). mask_query : Union[dict, None] If all grid cells needs to be considered, set mask_query=None. If only certain grid cells needs to be considered, filter with this query. Example: for only land grid cells (i.e., excluding oceans) mask_query = {'lsm': {'gt': 0.6}}. Land-sea mask (lsm) has fractional values in the range 0 (sea) to 1 (land). retrn : str What to return: * either 'ndocs' for the number or grid cells inside the chunk * or 'docs' for the ids of the grid cells inside the chunk. col_grid : pymongo.collection.Collection Mongo connection to the grid collection. Returns ------- Union[dict, pymongo.cursor.Cursor] ''' ilon_orig = int(ilon_chunk) ilon_chunk = int(self._shiftlon(x=ilon_chunk)) ilon_plus = int(self._shiftlon(x=ilon_chunk + delta)) ilat_chunk = int(ilat_chunk) geoqry = { 'loc': { '$geoWithin': { '$geometry': { 'type': 'Polygon', 'coordinates': [[[ilon_chunk, ilat_chunk], [ilon_plus, ilat_chunk], [ilon_plus, ilat_chunk + delta], [ilon_chunk, ilat_chunk + delta], [ilon_chunk, ilat_chunk]]] } } } } if mask_query is not None: geoqry.update(mask_query) if retrn == 'ndocs': # How many grid cells in this chunk ? res = { 'ilon_chunk': ilon_orig, 'ilat_chunk': ilat_chunk, 'n': col_grid.count_documents(filter=geoqry) } elif retrn == 'docs': res = col_grid.find(geoqry, {'id_grid': 1, 'loc': 1, '_id': 0}) return (res)