Пример #1
0
def not_replay_duplicate(
        replay: sc2reader.resources.Replay,
        collection_: pymongo.collection.Collection = replays_info) -> bool:
    '''
    Verify that the replay does not exist in a collection.

    *Args:*
        - replay (sc2reader.resources.Replay):
            The replay being cheked
        - collection_ (pymongo.collection.Collection):
            The collection where the existance check is being performed.

    *Returns:*
        - bool:
            True if the replay is not in the collection, False if it is.
    '''
    if not collection_.count_documents({'replay_name': replay.filename},
                                       limit=1):
        print(
            f'New replay found: {Path(replay.filename).name} \n adding to replay_info collection.'
        )
        return True
    else:
        print(
            f'{replay.filename} already exists in the replay_info collection.')
        return False
Пример #2
0
def count_items(collection: pymongo.collection.Collection,
                *,
                query: dict = None) -> int:
    """
    Returns the number of items in a collection matching the query

    :param collection: Collection to query
    :param query: Only match objects that contain the query (or all if None)
    :return:
    """
    query = {} if query is None else query
    return collection.count_documents(query)
Пример #3
0
def do_skip(coll_tables: pymongo.collection.Collection, raw_pdf_name: str,
            page_num: str, coords: str) -> bool:
    """
    Check if document is already scanned or not. If yes, skip it
    """
    return coll_tables.count_documents(
        {
            'pdf_name': raw_pdf_name,
            'page_num': page_num,
            'coords': coords
        },
        limit=1) != 0
Пример #4
0
def count(collection: pymongo.collection.Collection, query: Dict[Text,
                                                                 Any]) -> int:
    return collection.count_documents(query)
Пример #5
0
    def exploreChunks(self,
                      ilon_chunk: int,
                      ilat_chunk: int,
                      delta: int,
                      mask_query: Union[dict, None],
                      retrn: str,
                      col_grid: pymongo.collection.Collection)\
            -> Union[dict, pymongo.cursor.Cursor]:
        '''
        Explore an xarray chunk and returns either the number
        of grid cells or the grid ids.

        Parameters
        ----------
        ilon_chunk : int
            Longitude of the upper-left bounding box corner.
        ilon_chunk : int
            Latitude of the upper-left bounding box corner.
        delta : int
            Width and height of the bounding box (in degrees).
        mask_query : Union[dict, None]
            If all grid cells needs to be considered, set mask_query=None.
            If only certain grid cells needs to be considered, filter
            with this query. Example: for only land grid cells
            (i.e., excluding oceans) mask_query = {'lsm': {'gt': 0.6}}.
            Land-sea mask (lsm) has fractional values in the range
            0 (sea) to 1 (land).
        retrn : str
            What to return:
            * either 'ndocs' for the number or grid cells inside the chunk
            * or 'docs' for the ids of the grid cells inside the chunk.
        col_grid : pymongo.collection.Collection
            Mongo connection to the grid collection.

        Returns
        -------
        Union[dict, pymongo.cursor.Cursor]
        '''
        ilon_orig = int(ilon_chunk)
        ilon_chunk = int(self._shiftlon(x=ilon_chunk))
        ilon_plus = int(self._shiftlon(x=ilon_chunk + delta))
        ilat_chunk = int(ilat_chunk)
        geoqry = {
            'loc': {
                '$geoWithin': {
                    '$geometry': {
                        'type':
                        'Polygon',
                        'coordinates': [[[ilon_chunk, ilat_chunk],
                                         [ilon_plus, ilat_chunk],
                                         [ilon_plus, ilat_chunk + delta],
                                         [ilon_chunk, ilat_chunk + delta],
                                         [ilon_chunk, ilat_chunk]]]
                    }
                }
            }
        }
        if mask_query is not None:
            geoqry.update(mask_query)
        if retrn == 'ndocs':
            # How many grid cells in this chunk ?
            res = {
                'ilon_chunk': ilon_orig,
                'ilat_chunk': ilat_chunk,
                'n': col_grid.count_documents(filter=geoqry)
            }
        elif retrn == 'docs':
            res = col_grid.find(geoqry, {'id_grid': 1, 'loc': 1, '_id': 0})
        return (res)