async def insert_dataset(path_to_dataset: str, out_dir: str, gmt_dir: str, collection: pymongo.collection.Collection): convert_folder = None token = None try: h5_dataset = H5Dataset(path_to_dataset) dataset = h5_dataset.dataset token = dataset["token"] convert_folder = os.path.join(out_dir, token) path_find_res = collection.find_one({"selfPath": path_to_dataset}) token_find_res = collection.find_one({"token": token}) if path_find_res is None and token_find_res is None: put_dataset(h5_dataset, convert_folder, collection) elif path_find_res is None and token_find_res is not None: logging.error(f"Duplicated token in dataset {h5_dataset.token}") elif path_find_res is not None: update_dataset(path_find_res, h5_dataset, convert_folder, collection) await modules_merger(out_dir, gmt_dir) except H5DatasetInvalidException as e: logging.error(f"Invalid dataset {path_to_dataset}: {e}") except Exception as e: if convert_folder is not None: rm_rf(convert_folder) logging.error(f"Failed to update dataset {token} due to: {e}")
def get_user(coll: pymongo.collection.Collection, user_id: ObjectId): assert isinstance(user_id, ObjectId) user_mdoc = coll.find_one({'_id': user_id}) if not user_mdoc: return None return user_mdoc
def find_one(col: pymongo.collection.Collection, filter_dict: dict, include_id: bool = False) -> dict or None: result = col.find_one(filter_dict, {"_id": 1 if include_id else 0}) if result is None: return None return dict(result)
def get_item(collection: pymongo.collection.Collection, model: Union[Type[BaseModel], Type[dict]], item_id: Union[UUID, str], *, id_key: str = "id_", query: dict = None, projection: dict = None, raise_exc: bool = True): """ Retrieve a single item from a collection :param collection: Collection to query :param model: Class which the JSON in the collection represents :param item_id: UUID or name of desired item :param id_key: If the UUID is stored outside of id_, specify here :param query: Return only objects that contain the query :param projection: Filter to exclude from mongo query result :param raise_exc: Whether to raise exception if item is not found. :return: Requested object from collection """ projection = {} if projection is None else projection projection.update(ignore_mongo_id) query = {} if query is None else query query.update(mongo_filter(model, item_id, id_key=id_key)) item_json = collection.find_one(query, projection=projection) if item_json is None and raise_exc: raise problems.DoesNotExistException("read", model.__name__, mongo_filter(model, item_id, id_key=id_key)) elif model is dict or item_json is None: return item_json else: return model(**item_json)
async def remove_dataset(path_to_dataset: str, out_dir: str, gmt_dir: str, collection: pymongo.collection.Collection): dataset = collection.find_one({'selfPath': path_to_dataset}) if dataset is not None: convert_folder = os.path.join(out_dir, dataset["token"]) collection.delete_one({'selfPath': path_to_dataset}) rm_rf(convert_folder) await modules_merger(out_dir, gmt_dir) logging.info(f"Successfully removed dataset {dataset['token']}")
def get_user_document_owncloud_id(coll: pymongo.collection.Collection, user_id: ObjectId, document_id: ObjectId): assert isinstance(user_id, ObjectId) assert isinstance(document_id, ObjectId) document_mdoc = coll.find_one( { "_id": user_id, "documents._id": document_id }, {"documents.$": 1}) return document_mdoc["documents"][0]["file"]
def paragraph_generator(col_query: pymongo.collection.Collection, df: dd.DataFrame, start_index: int, end_index: int, max_num: int): assert len(df) == max_num, ( 'len(df) {} != max_num from iter_index {}'.format(len(df), max_num)) for (i, dask_row) in itertools.islice(zip(range(max_num), df.iterrows()), start_index, end_index): # df index from dask is not consecutive because it reads data by chunk (_, record) = dask_row record = record.to_dict() para = col_query.find_one( {'_id': ObjectId(record['paragraph_id']['$oid'])}) yield para
def get_variable_list( collection_name: str, collection: pm.collection.Collection) -> Sequence[str]: '''Returns a list of the names of all variables stored in the input collection Keyword arguments: @param collection_name -- name of a MongoDB collection (prepended to the variable names) @param collection -- a MongoDB collection ''' doc = collection.find_one({}) variables = DataUtils.get_flattened_variable_names( doc, collection_name) return variables
def get_tops_for_index( xyz: np.ndarray, collection: pymongo.collection.Collection) -> List[np.ndarray]: cursor = collection.find_one( {'index': { 'x': xyz[0], 'y': xyz[1], 'z': xyz[2] }}, { '_id': 0, 'index': 0 }) return [ np.array([top['x'], top['y'], top['z']]) for top in cursor['tops'] ] if cursor is not None else []
def add_mdoc(coll: pymongo.collection.Collection, mdoc: dict): mdoc_filter = {"_id": mdoc["_id"]} current_mdoc_in_db = coll.find_one(mdoc_filter) if current_mdoc_in_db is not None: print(f"Stávanící mdokument '{mdoc['_id']}': {current_mdoc_in_db}") rewrite = input(f"'{mdoc['_id']}' mdokument už v kolekci existuje, mám ho přepsat (a/N)? ") if rewrite.lower() not in ["a", "ano", "y", "yes"]: print("Zachovávám původní mdokument.") return coll.delete_one(mdoc_filter) coll.insert_one(mdoc) print(f"Přidán mdokument {mdoc['_id']}")
def check_jji(self, collection: pymongo.collection.Collection, driver: webdriver.Chrome, new_only: bool = False): """Run a check for new offers in JustJoin.it RSS feed Args: collection (pymongo.collection.Collection): MongoDB collection to be updated driver (webdriver.Chrome): Selenium webdriver for getting offer details new_only (bool, optional): If passed we only get new offers and ignore any updates on those in the system. Defaults to False. """ jji_feed = feedparser.parse('https://justjoin.it/feed.atom') for offer in tqdm.tqdm(jji_feed['entries'], desc="Getting new offers"): old_db_corresponding_record = collection.find_one( {"id": offer['id']}) if not (old_db_corresponding_record and new_only): self.update_single_offer(collection, driver, offer, old_db_corresponding_record)
def get_user_active_document(coll: pymongo.collection.Collection, user_id, document_type, date: datetime = None): assert isinstance(user_id, ObjectId) if not date: date = datetime.now() mdoc = coll.find_one( { "_id": user_id, "documents": { "$elemMatch": { "valid_from": { "$lte": date }, "valid_until": { "$gte": date }, "type": document_type, "$or": [ { "invalidation_date": { "$exists": False } }, { "invalidation_date": { "$gt": date } }, ], } } }, {"documents.$": 1}) if not mdoc: return None documents = mdoc.get("documents", None) return documents[0] if documents else None
def get_user_active_contract(coll: pymongo.collection.Collection, user_id: ObjectId, date: datetime = None): assert isinstance(user_id, ObjectId) if not date: date = datetime.now() mdoc = coll.find_one( { "_id": user_id, "contracts": { "$elemMatch": { "valid_from": { "$lte": date }, "valid_until": { "$gte": date }, "$or": [ { "invalidation_date": { "$exists": False } }, { "invalidation_date": { "$gt": date } }, ], "type": "dpp" # TODO udělat obecně } } }, {"contracts.$": 1}) if not mdoc: return None contracts = mdoc.get("contracts", None) return contracts[0] if contracts else None
def get_consecutive(collection: pymongo.collection.Collection, field: str, n: int) -> pd.Series: """ Parameters ---------- collection field n Returns ------- """ field_max = collection.find_one(sort=[(field, pymongo.DESCENDING)]).get(field) sequence = range(field_max, field_max + n) return pd.Series(sequence) + 1
def update_user_address(coll: pymongo.collection.Collection, user_id: ObjectId, address: dict): assert isinstance(user_id, ObjectId) address_type = address["type"] to_set, to_unset = get_mdocument_set_unset_dicts(address) # pokud už je tento typ adresy v databázi if coll.find_one({"_id": user_id, "addresses.type": address_type}): operation_dict = {} if len(to_set) > 1: # je tam něco kromě "type" operation_dict["$set"] = { f"addresses.$.{key}": value for key, value in to_set.items() } if to_unset: operation_dict["$unset"] = { f"addresses.$.{key}": value for key, value in to_unset.items() } updated = coll.find_one_and_update( { "_id": user_id, "addresses.type": address_type }, operation_dict, return_document=ReturnDocument.AFTER) # smaž adresu z "addresses", pokud po updatu obsahuje pouze "type" for address in updated["addresses"]: if address["type"] == address_type and len(address) <= 1: delete_user_address(coll, user_id, address_type) # jinak přidej adresu do databáze, pokud obsahuje víc než jen "type" elif len(address) > 1: add_embedded_mdoc_to_mdoc_array(coll, user_id, "addresses", address, filter_values=None)
def insert_id_if_not_exist(col: pymongo.collection.Collection, key_name: str, value): result = col.find_one({"_id": key_name}) # logger.warning(f'result: {result}, key_name: {key_name}') if result is None: # logger.warning(f'insert key_name: {key_name}') col.insert_one({"_id": key_name, "sequence_value": value})
def get_api(apis_collection: pymongo.collection.Collection, api_provider: str) -> dict: api = apis_collection.find_one({"provider": api_provider}) return api
def fetch(self, db_collection: pymongo.collection.Collection): self._document = db_collection.find_one({"symbol": self._symbol}) return self
def get_meme(memes_collection: pymongo.collection.Collection, meme_name: str) -> dict: """Get single meme based on its name.""" meme = memes_collection.find_one({"name": meme_name}) return meme
def insert_id_if_not_exist(col: pymongo.collection.Collection, key_name: str, value): result = col.find_one({"_id": key_name}) if result is None: col.insert_one({"_id": key_name, "sequence_value": value})