Exemplo n.º 1
0
async def insert_dataset(path_to_dataset: str, out_dir: str, gmt_dir: str,
                         collection: pymongo.collection.Collection):
    convert_folder = None
    token = None
    try:
        h5_dataset = H5Dataset(path_to_dataset)
        dataset = h5_dataset.dataset
        token = dataset["token"]
        convert_folder = os.path.join(out_dir, token)

        path_find_res = collection.find_one({"selfPath": path_to_dataset})

        token_find_res = collection.find_one({"token": token})

        if path_find_res is None and token_find_res is None:
            put_dataset(h5_dataset, convert_folder, collection)
        elif path_find_res is None and token_find_res is not None:
            logging.error(f"Duplicated token in dataset {h5_dataset.token}")
        elif path_find_res is not None:
            update_dataset(path_find_res, h5_dataset, convert_folder,
                           collection)
        await modules_merger(out_dir, gmt_dir)
    except H5DatasetInvalidException as e:
        logging.error(f"Invalid dataset {path_to_dataset}: {e}")
    except Exception as e:
        if convert_folder is not None:
            rm_rf(convert_folder)
        logging.error(f"Failed to update dataset {token} due to: {e}")
Exemplo n.º 2
0
def get_user(coll: pymongo.collection.Collection, user_id: ObjectId):
    assert isinstance(user_id, ObjectId)

    user_mdoc = coll.find_one({'_id': user_id})
    if not user_mdoc:
        return None
    return user_mdoc
Exemplo n.º 3
0
def find_one(col: pymongo.collection.Collection,
             filter_dict: dict,
             include_id: bool = False) -> dict or None:
    result = col.find_one(filter_dict, {"_id": 1 if include_id else 0})
    if result is None:
        return None
    return dict(result)
Exemplo n.º 4
0
def get_item(collection: pymongo.collection.Collection,
             model: Union[Type[BaseModel], Type[dict]],
             item_id: Union[UUID, str],
             *,
             id_key: str = "id_",
             query: dict = None,
             projection: dict = None,
             raise_exc: bool = True):
    """
    Retrieve a single item from a collection

    :param collection: Collection to query
    :param model: Class which the JSON in the collection represents
    :param item_id: UUID or name of desired item
    :param id_key: If the UUID is stored outside of id_, specify here
    :param query: Return only objects that contain the query
    :param projection: Filter to exclude from mongo query result
    :param raise_exc: Whether to raise exception if item is not found.
    :return: Requested object from collection
    """
    projection = {} if projection is None else projection
    projection.update(ignore_mongo_id)

    query = {} if query is None else query
    query.update(mongo_filter(model, item_id, id_key=id_key))

    item_json = collection.find_one(query, projection=projection)

    if item_json is None and raise_exc:
        raise problems.DoesNotExistException("read", model.__name__, mongo_filter(model, item_id, id_key=id_key))
    elif model is dict or item_json is None:
        return item_json
    else:
        return model(**item_json)
Exemplo n.º 5
0
async def remove_dataset(path_to_dataset: str, out_dir: str, gmt_dir: str,
                         collection: pymongo.collection.Collection):

    dataset = collection.find_one({'selfPath': path_to_dataset})
    if dataset is not None:
        convert_folder = os.path.join(out_dir, dataset["token"])
        collection.delete_one({'selfPath': path_to_dataset})
        rm_rf(convert_folder)
        await modules_merger(out_dir, gmt_dir)
        logging.info(f"Successfully removed dataset {dataset['token']}")
Exemplo n.º 6
0
def get_user_document_owncloud_id(coll: pymongo.collection.Collection,
                                  user_id: ObjectId, document_id: ObjectId):
    assert isinstance(user_id, ObjectId)
    assert isinstance(document_id, ObjectId)

    document_mdoc = coll.find_one(
        {
            "_id": user_id,
            "documents._id": document_id
        }, {"documents.$": 1})
    return document_mdoc["documents"][0]["file"]
Exemplo n.º 7
0
def paragraph_generator(col_query: pymongo.collection.Collection,
                        df: dd.DataFrame, start_index: int, end_index: int,
                        max_num: int):
    assert len(df) == max_num, (
        'len(df) {} != max_num from iter_index {}'.format(len(df), max_num))
    for (i, dask_row) in itertools.islice(zip(range(max_num), df.iterrows()),
                                          start_index, end_index):
        # df index from dask is not consecutive because it reads data by chunk
        (_, record) = dask_row
        record = record.to_dict()
        para = col_query.find_one(
            {'_id': ObjectId(record['paragraph_id']['$oid'])})
        yield para
Exemplo n.º 8
0
    def get_variable_list(
            collection_name: str,
            collection: pm.collection.Collection) -> Sequence[str]:
        '''Returns a list of the names of all variables stored in the input collection

        Keyword arguments:
        @param collection_name -- name of a MongoDB collection (prepended to the variable names)
        @param collection -- a MongoDB collection

        '''
        doc = collection.find_one({})
        variables = DataUtils.get_flattened_variable_names(
            doc, collection_name)
        return variables
Exemplo n.º 9
0
def get_tops_for_index(
        xyz: np.ndarray,
        collection: pymongo.collection.Collection) -> List[np.ndarray]:
    cursor = collection.find_one(
        {'index': {
            'x': xyz[0],
            'y': xyz[1],
            'z': xyz[2]
        }}, {
            '_id': 0,
            'index': 0
        })
    return [
        np.array([top['x'], top['y'], top['z']]) for top in cursor['tops']
    ] if cursor is not None else []
def add_mdoc(coll: pymongo.collection.Collection, mdoc: dict):
    mdoc_filter = {"_id": mdoc["_id"]}

    current_mdoc_in_db = coll.find_one(mdoc_filter)

    if current_mdoc_in_db is not None:
        print(f"Stávanící mdokument '{mdoc['_id']}': {current_mdoc_in_db}")
        rewrite = input(f"'{mdoc['_id']}' mdokument už v kolekci existuje, mám ho přepsat (a/N)? ")
        if rewrite.lower() not in ["a", "ano", "y", "yes"]:
            print("Zachovávám původní mdokument.")
            return

        coll.delete_one(mdoc_filter)

    coll.insert_one(mdoc)
    print(f"Přidán mdokument {mdoc['_id']}")
Exemplo n.º 11
0
    def check_jji(self,
                  collection: pymongo.collection.Collection,
                  driver: webdriver.Chrome,
                  new_only: bool = False):
        """Run a check for new offers in JustJoin.it RSS feed

        Args:
            collection (pymongo.collection.Collection): MongoDB collection to be updated
            driver (webdriver.Chrome): Selenium webdriver for getting offer details
            new_only (bool, optional): If passed we only get new offers and ignore any updates on those in the system. Defaults to False.
        """
        jji_feed = feedparser.parse('https://justjoin.it/feed.atom')
        for offer in tqdm.tqdm(jji_feed['entries'], desc="Getting new offers"):
            old_db_corresponding_record = collection.find_one(
                {"id": offer['id']})
            if not (old_db_corresponding_record and new_only):
                self.update_single_offer(collection, driver, offer,
                                         old_db_corresponding_record)
Exemplo n.º 12
0
def get_user_active_document(coll: pymongo.collection.Collection,
                             user_id,
                             document_type,
                             date: datetime = None):
    assert isinstance(user_id, ObjectId)

    if not date:
        date = datetime.now()

    mdoc = coll.find_one(
        {
            "_id": user_id,
            "documents": {
                "$elemMatch": {
                    "valid_from": {
                        "$lte": date
                    },
                    "valid_until": {
                        "$gte": date
                    },
                    "type":
                    document_type,
                    "$or": [
                        {
                            "invalidation_date": {
                                "$exists": False
                            }
                        },
                        {
                            "invalidation_date": {
                                "$gt": date
                            }
                        },
                    ],
                }
            }
        }, {"documents.$": 1})

    if not mdoc:
        return None

    documents = mdoc.get("documents", None)

    return documents[0] if documents else None
Exemplo n.º 13
0
def get_user_active_contract(coll: pymongo.collection.Collection,
                             user_id: ObjectId,
                             date: datetime = None):
    assert isinstance(user_id, ObjectId)

    if not date:
        date = datetime.now()

    mdoc = coll.find_one(
        {
            "_id": user_id,
            "contracts": {
                "$elemMatch": {
                    "valid_from": {
                        "$lte": date
                    },
                    "valid_until": {
                        "$gte": date
                    },
                    "$or": [
                        {
                            "invalidation_date": {
                                "$exists": False
                            }
                        },
                        {
                            "invalidation_date": {
                                "$gt": date
                            }
                        },
                    ],
                    "type":
                    "dpp"  # TODO udělat obecně
                }
            }
        },
        {"contracts.$": 1})

    if not mdoc:
        return None

    contracts = mdoc.get("contracts", None)

    return contracts[0] if contracts else None
Exemplo n.º 14
0
def get_consecutive(collection: pymongo.collection.Collection, field: str,
                    n: int) -> pd.Series:
    """

    Parameters
    ----------
    collection
    field
    n

    Returns
    -------

    """
    field_max = collection.find_one(sort=[(field,
                                           pymongo.DESCENDING)]).get(field)
    sequence = range(field_max, field_max + n)

    return pd.Series(sequence) + 1
Exemplo n.º 15
0
def update_user_address(coll: pymongo.collection.Collection, user_id: ObjectId,
                        address: dict):
    assert isinstance(user_id, ObjectId)

    address_type = address["type"]

    to_set, to_unset = get_mdocument_set_unset_dicts(address)

    # pokud už je tento typ adresy v databázi
    if coll.find_one({"_id": user_id, "addresses.type": address_type}):

        operation_dict = {}
        if len(to_set) > 1:  # je tam něco kromě "type"
            operation_dict["$set"] = {
                f"addresses.$.{key}": value
                for key, value in to_set.items()
            }
        if to_unset:
            operation_dict["$unset"] = {
                f"addresses.$.{key}": value
                for key, value in to_unset.items()
            }

        updated = coll.find_one_and_update(
            {
                "_id": user_id,
                "addresses.type": address_type
            },
            operation_dict,
            return_document=ReturnDocument.AFTER)

        # smaž adresu z "addresses", pokud po updatu obsahuje pouze "type"
        for address in updated["addresses"]:
            if address["type"] == address_type and len(address) <= 1:
                delete_user_address(coll, user_id, address_type)

    # jinak přidej adresu do databáze, pokud obsahuje víc než jen "type"
    elif len(address) > 1:
        add_embedded_mdoc_to_mdoc_array(coll,
                                        user_id,
                                        "addresses",
                                        address,
                                        filter_values=None)
Exemplo n.º 16
0
def insert_id_if_not_exist(col: pymongo.collection.Collection, key_name: str, value):
    result = col.find_one({"_id": key_name})
    # logger.warning(f'result: {result}, key_name: {key_name}')
    if result is None:
        # logger.warning(f'insert key_name: {key_name}')
        col.insert_one({"_id": key_name, "sequence_value": value})
Exemplo n.º 17
0
def get_api(apis_collection: pymongo.collection.Collection,
            api_provider: str) -> dict:
    api = apis_collection.find_one({"provider": api_provider})
    return api
Exemplo n.º 18
0
 def fetch(self, db_collection: pymongo.collection.Collection):
     self._document = db_collection.find_one({"symbol": self._symbol})
     return self
Exemplo n.º 19
0
def get_meme(memes_collection: pymongo.collection.Collection,
             meme_name: str) -> dict:
    """Get single meme based on its name."""
    meme = memes_collection.find_one({"name": meme_name})
    return meme
Exemplo n.º 20
0
def insert_id_if_not_exist(col: pymongo.collection.Collection, key_name: str,
                           value):
    result = col.find_one({"_id": key_name})
    if result is None:
        col.insert_one({"_id": key_name, "sequence_value": value})