Пример #1
0
def update_user_email_is_validated_status(coll: pymongo.collection.Collection,
                                          user_id: ObjectId,
                                          yes=False,
                                          no=False,
                                          token=""):
    assert isinstance(user_id, ObjectId)

    if token:
        res = coll.update_one({"_id": user_id}, {
            "$set": {
                "email_validated": "pending",
                "email_validation_token": token,
            }
        })
    elif yes:
        coll.update_one({"_id": user_id}, {
            "$set": {
                "email_validated": "yes",
            },
            "$unset": {
                "email_validation_token": "",
            },
        })
    elif no:
        coll.update_one({"_id": user_id}, {
            "$set": {
                "email_validated": "no",
            },
            "$unset": {
                "email_validation_token": "",
            },
        })
Пример #2
0
async def insert_dataset(path_to_dataset: str, out_dir: str, gmt_dir: str,
                         collection: pymongo.collection.Collection):
    convert_folder = None
    token = None
    try:
        h5_dataset = H5Dataset(path_to_dataset)
        dataset = h5_dataset.dataset
        token = dataset["token"]
        convert_folder = os.path.join(out_dir, token)

        path_find_res = collection.find_one({"selfPath": path_to_dataset})

        token_find_res = collection.find_one({"token": token})

        if path_find_res is None and token_find_res is None:
            put_dataset(h5_dataset, convert_folder, collection)
        elif path_find_res is None and token_find_res is not None:
            logging.error(f"Duplicated token in dataset {h5_dataset.token}")
        elif path_find_res is not None:
            update_dataset(path_find_res, h5_dataset, convert_folder,
                           collection)
        await modules_merger(out_dir, gmt_dir)
    except H5DatasetInvalidException as e:
        logging.error(f"Invalid dataset {path_to_dataset}: {e}")
    except Exception as e:
        if convert_folder is not None:
            rm_rf(convert_folder)
        logging.error(f"Failed to update dataset {token} due to: {e}")
Пример #3
0
def set_inserted_column(c: pymongo.collection.Collection, ids: List[int], time: datetime.datetime):
    c.update_many(
        {'id': {'$in': ids}},
        {'$set': {
            'inserted': time
        }
        })
Пример #4
0
def write_contract_to_mongo(reqId, contract: Contract,
                            symbols: pymongo.collection.Collection):
    """write a contract to symbols database with valid from attribute"""
    contract = vars(contract)
    contract["reqId"] = reqId
    contract["validFrom"] = datetime.datetime.now()
    symbols.insert_one(contract)
Пример #5
0
def construct_schema_pymongo(
    collection: pymongo.collection.Collection,
    delimiter: str,
    sample_size: Optional[int] = None,
) -> Dict[Tuple[str, ...], SchemaDescription]:
    """
    Calls construct_schema on a PyMongo collection.

    Returned schema is keyed by tuples of nested field names, with each
    value containing 'types', 'count', 'nullable', 'delimited_name', and 'type' attributes.

    Parameters
    ----------
        collection:
            the PyMongo collection
        delimiter:
            string to concatenate field names by
        sample_size:
            number of items in the collection to sample
            (reads entire collection if not provided)
    """

    if sample_size:
        # get sample documents in collection
        documents = collection.aggregate(
            [{"$sample": {"size": sample_size}}], allowDiskUse=True
        )
    else:
        # if sample_size is not provided, just take all items in the collection
        documents = collection.find({})

    return construct_schema(list(documents), delimiter)
Пример #6
0
def build_replay_info(
        rp_gen: Generator = REPLAY_GEN,
        db_collection: pymongo.collection.Collection = replays_info) -> bool:
    '''
    Triggers the search for new replays at CONFIG.replay_path. Adds the
    information description of the replays to the a data collection within
    a MongoDB data base, if they are not in the database already.

    *Args:*
        - rp_gen (Generator = REPLAY_GEN):
            sc2reader.resources.Replay generator that yields the replays found
            in the CONFIG.replay_path.
        - db_collection (pymongo.collection.Collection = replays_info):
            the database where the function adds the new documents.

    *Returns:*
        - bool:
            True if new replays were found and added to the replay_info collection, False otherwise.
    '''
    replays_data_set = [
        asdict(replay_data)
        for replay_data in get_replays_data_set(rp_gen, db_collection)
        if replay_data != None
    ]
    if replays_data_set:
        db_collection.insert_many(replays_data_set)
        return True
    else:
        print(f'No new replays at {CONFIG.replay_path}')
        return False
Пример #7
0
    def update_single_offer(self,
                            collection: pymongo.collection.Collection,
                            driver: webdriver.Chrome,
                            offer: dict,
                            old_db_corresponding_record=None):
        """Parses the info for a single offer

        Args:
            collection (pymongo.collection.Collection): MongoDB collection to run the update against
            driver (webdriver.Chrome): Selenium webdriver to get offer details
            offer (dict): A single offer from MongoDB
            old_db_corresponding_record (Dict, optional): Corresponding record in MongoDB, if exists. Defaults to None.
        """

        doc = {
            "id": offer['id'],
            "date": offer['published'],
            "title": offer['title'],
            "position": offer['title'].split('@')[0],
            "author": offer['author'],
            "link": offer['link'],
        }
        offer_soup = BeautifulSoup(offer.summary, 'html.parser')
        text = offer_soup.text.split('\n')
        address = text[5].split('Location: ', 1)[1]
        city = address.split(',')[-1].strip()
        address = ','.join(address.split(',')[:-1])
        salaries_raw = text[4].split('Salary: ')[1]
        salaries = [
            salaries_raw
        ] if len(re.findall("\(.{1,17},.{1,17}\)",
                            salaries_raw)) > 0 else salaries_raw.split(',')
        salary_ranges = []
        for salary in salaries:
            salary_ranges.append(
                self.get_salary_details(salary.replace(" ", ""), salaries_raw))

        if city in self.cities_translations:
            city = self.cities_translations[city]
        resp = driver.get(offer['id'])
        page_text = driver.find_element_by_id('root')

        doc["address"] = address
        doc["city"] = city
        doc["salary"] = salary_ranges
        doc["raw_salary"] = salaries_raw
        doc["full_description"] = page_text.text
        offer["full_description"] = page_text.text
        try:
            description_details = self.parse_single_description(offer)
            doc.update(description_details)
        except IndexError as e:
            print(f'Error parsing description for offer {offer["id"]} : {e}')
        if city not in self.cities:
            self.cities.append(city)
        if old_db_corresponding_record:
            collection.update_one({"id": offer['id']}, {"$set": doc})
        else:
            collection.insert_one(doc)
Пример #8
0
def import_new_records(base_id: str,
                       table: str,
                       mongo_table: pymongo.collection.Collection,
                       view: Optional[str] = None) -> None:
    """Import new records from Airtable to MongoDB."""

    if not _AIRTABLE_API_KEY:
        raise ValueError(
            'No API key found. Create an airtable API key at '
            'https://airtable.com/account and set it in the AIRTABLE_API_KEY '
            'env var.')
    client = airtable.Airtable(base_id, _AIRTABLE_API_KEY)
    records = client.iterate(table, view=view)

    converter = airtable_to_protos.ProtoAirtableConverter(
        proto_type=review_pb2.DocumentToReview,
        id_field=None,
        required_fields=('anonymized_url', ))

    num_inserted = 0
    num_updated = 0
    for record in records:
        mongo_id = record.get('fields', {}).get('mongo_id')

        proto_data = converter.convert_record(record)
        airtable_id = proto_data.pop('_id')
        if record['fields'].get('anonymized_url'):
            proto_data['anonymizedUrl'] = record['fields']['anonymized_url'][
                0]['url']

        if mongo_id:
            # Already added, let's update it.
            document_json = mongo_table.find_one_and_update(
                {'_id': objectid.ObjectId(mongo_id)},
                {'$set': proto_data},
            )
            any_pending_or_done_review = document_json.get('numPendingReviews', 0) or \
                document_json.get('numDoneReviews', 0)
            timeout_review_count = sum(
                1 for review in document_json.get('reviews', [])
                if review.get('status') == 'REVIEW_TIME_OUT')
            client.update(
                table, airtable_id, {
                    'Bayes help needed': not any_pending_or_done_review,
                    'review_timeouts': timeout_review_count,
                })
            num_updated += 1
            continue

        result = mongo_table.insert_one(proto_data)
        mongo_id = str(result.inserted_id)
        client.update(table, airtable_id, {
            'mongo_id': mongo_id,
            'Bayes help needed': True
        })
        num_inserted += 1

    print(f'{num_updated:d} documents updated.')
    print(f'{num_inserted:d} documents added.')
Пример #9
0
def insertion(collection: pymongo.collection.Collection, candles: List[Dict]):
    try:
        collection.insert_many(candles, ordered=False)
    except pymongo.errors.BulkWriteError as e:
        panic_list = list(
            filter(lambda x: x['code'] != 11000, e.details['writeErrors']))
        if len(panic_list) > 0:
            raise e
Пример #10
0
def update_api(
    apis_collection: pymongo.collection.Collection,
    api_provider: str,
    number_of_calls: int,
):
    apis_collection.update_one({"provider": api_provider},
                               {"$set": {
                                   "number_of_calls": number_of_calls
                               }})
Пример #11
0
def put_dataset(h5_dataset: H5Dataset, folder: str,
                collection: pymongo.collection.Collection):
    try:
        h5_dataset.convert(folder)
        collection.insert_one(h5_dataset.dataset)
        logging.info(f"Successfully inserted dataset {h5_dataset.token}")
    except pymongo.errors.DuplicateKeyError as e:
        rm_rf(folder)
        logging.error(f"Duplicated token in dataset {h5_dataset.token}")
Пример #12
0
def delete_user_address(coll: pymongo.collection.Collection, user_id: ObjectId,
                        address_type: str):
    assert isinstance(user_id, ObjectId)

    coll.update_one({"_id": user_id},
                    {"$pull": {
                        "addresses": {
                            "type": address_type
                        }
                    }})
Пример #13
0
async def remove_dataset(path_to_dataset: str, out_dir: str, gmt_dir: str,
                         collection: pymongo.collection.Collection):

    dataset = collection.find_one({'selfPath': path_to_dataset})
    if dataset is not None:
        convert_folder = os.path.join(out_dir, dataset["token"])
        collection.delete_one({'selfPath': path_to_dataset})
        rm_rf(convert_folder)
        await modules_merger(out_dir, gmt_dir)
        logging.info(f"Successfully removed dataset {dataset['token']}")
Пример #14
0
def add_user_contract_scan(coll: pymongo.collection.Collection,
                           user_id: ObjectId, contract_id: ObjectId, file_id):
    assert isinstance(user_id, ObjectId)
    assert isinstance(contract_id, ObjectId)

    coll.update_one({
        "_id": user_id,
        "contracts._id": contract_id
    }, {"$set": {
        "contracts.$.scan_file": file_id,
    }})
Пример #15
0
def write_to_fomo(id: int, id_cursor: pymongo.collection.Collection, source: str):
    # check if it exists first
    record = exists_on_fomo(id, id_cursor)
    if record:
        if source == 'file':
            deleted_ids.append(id)
            id_cursor.delete_one({'id': id})
        if source == 'api':
            return id_cursor.update_one({'id': id}, {'$set': {'updated': datetime.datetime.utcnow()}})
    else:
        id_cursor.insert_one({"id": id, "updated": datetime.datetime.utcnow()})
Пример #16
0
def delete_user_document(coll: pymongo.collection.Collection,
                         user_id: ObjectId, document_id: ObjectId):
    assert isinstance(user_id, ObjectId)
    assert isinstance(document_id, ObjectId)

    coll.update_one({"_id": user_id},
                    {"$pull": {
                        "documents": {
                            "_id": document_id
                        }
                    }})
Пример #17
0
def update_tree(collection: pymongo.collection.Collection,
                tree: MondrianTree, previous_leaves: Set[str],
                partitions: Dict[UUID, Partition]):
    new_nodes = []
    is_incremental = True
    if not previous_leaves:
        new_nodes = [tree]
        is_incremental = False
    else:
        diff_tree(tree, previous_leaves, new_nodes)
    collection.bulk_write(produce_updates(new_nodes, is_incremental,
                                          partitions), ordered=False, bypass_document_validation=True)
Пример #18
0
def invalidate_user_contract(coll: pymongo.collection.Collection,
                             user_id: ObjectId, contract_id: ObjectId,
                             invalidation_date: datetime):
    assert isinstance(user_id, ObjectId)
    assert isinstance(contract_id, ObjectId)

    coll.update_one({
        "_id": user_id,
        "contracts._id": contract_id
    }, {"$set": {
        "contracts.$.invalidation_date": invalidation_date,
    }})
Пример #19
0
def invalidate_user_document(coll: pymongo.collection.Collection,
                             user_id: ObjectId, document_id: ObjectId,
                             invalidation_date: datetime):
    assert isinstance(user_id, ObjectId)
    assert isinstance(document_id, ObjectId)

    coll.update_one({
        "_id": user_id,
        "documents._id": document_id
    }, {"$set": {
        "documents.$.invalidation_date": invalidation_date
    }})
Пример #20
0
def insert_meme(
    memes_collection: pymongo.collection.Collection,
    meme_name: str,
    meme_url: str,
    meme_description: str = "*new meme*",
):
    meme = {
        "name": meme_name,
        "description": meme_description,
        "times_used": 0,
        "url": meme_url,
    }
    memes_collection.insert_one(meme)
Пример #21
0
def add_users(coll: pymongo.collection.Collection, ids: list):
    users = [{
        "_id": _id,
        "user": _id,
        "role": [],
        "workpans": [],
        "created": datetime.now().replace(microsecond=0),
        "type": "user",
        "email_validated": "no",
    } for _id in ids if isinstance(_id, ObjectId)]
    coll.insert_many(users)

    return ids
Пример #22
0
    def fix_all_descriptions(self, collection: pymongo.collection.Collection):
        """One time function to fix all descriptions

        Args:
            collection (pymongo.collection.Collection): MongoDB collection to be updated
        """
        offers = collection.find()
        for offer in tqdm.tqdm(offers, desc="Fixing the descriptions"):
            try:
                description_details = self.parse_single_description(offer)
                collection.update_one({"id": offer['id']},
                                      {"$set": description_details})
            except IndexError:
                print("ERROR")
Пример #23
0
def store_quotes(quotes: list, collection: pymongo.collection.Collection):
    """ Creates entries in the database for the provided quotes. """
    def map_quote(quote: dict) -> dict:
        instrument_id = parse_instrument_url(quote["instrument"])

        plucked = {
            "instrument_id": instrument_id,
            **pluck(DESIRED_QUOTE_KEYS, quote)
        }
        plucked["updated_at"] = parse_updated_at(plucked["updated_at"])
        return plucked

    quotes = list(filter(lambda quote: quote != None, quotes))

    def format_quote(quote: dict) -> dict:
        return {
            "symbol": quote["symbol"],
            "bid": quote["bid_price"],
            "ask": quote["ask_price"]
        }

    pprint(list(map(format_quote, quotes)))

    # Update the index collection with up-to-date tradability info
    timestamp = datetime.datetime.utcnow()

    def update_index_symbol(datum: dict) -> pymongo.operations.UpdateOne:
        data = {
            "timestamp": timestamp,
            "has_traded": datum.get("has_traded"),
            "updated_at": parse_updated_at(datum.get("updated_at")),
            "trading_halted": datum.get("trading_halted"),
        }
        instrument_id = parse_instrument_url(datum["instrument"])

        return pymongo.operations.UpdateOne({"instrument_id": instrument_id},
                                            {"$set": data})

    ops = list(map(update_index_symbol, quotes))
    INDEX_COL.bulk_write(ops, ordered=False)

    quotes = list(map(map_quote, quotes))
    try:
        collection.insert_many(quotes, ordered=False)
    except BulkWriteError as bwe:
        for err in bwe.details["writeErrors"]:
            if "duplicate key" not in err["errmsg"]:
                print("ERROR: Unhandled exception occured during batch write:")
                pprint(err)
Пример #24
0
def insert_doc(doc: dict, col: pymongo.collection.Collection):
    if_insert = True

    if doc["danmaku"]:
        for each in col.find({"danmaku": doc["danmaku"]}):
            if_insert = False
            break
    elif doc["uid"]:
        for each in col.find({"uid": doc["uid"]}):
            if_insert = False
            break

    if if_insert:
        col.insert_one(doc)
        print("insert: " + str(doc))
Пример #25
0
def get_all(collection: pymongo.collection.Collection) -> List[str]:
    instruments: List[str] = list()
    cursor = collection.find({})
    for document in cursor:
        instruments.append(document['symbole'])

    return instruments
Пример #26
0
def get_next_id(col: pymongo.collection.Collection, id_name: str):
    # logger.warning(f'id_name: {id_name}')
    ret = col.find_one_and_update({"_id": id_name},
                                  {"$inc": {"sequence_value": 1}},
                                  new=True)
    new_id = ret["sequence_value"]
    return new_id
Пример #27
0
def get_all_items(collection: pymongo.collection.Collection,
                  model: Type[BaseModel],
                  *,
                  page: int = 1,
                  num_per_page: int = 20,
                  query: dict = None,
                  projection: dict = None):
    """
    Retrieve all items from a collection

    :param collection: Collection to query
    :param model: Class which the JSON in the collection represents
    :param page: Page number to retrieve.  #ToDo: implement correct server-side pagination
    :param num_per_page: Number of items per page to retrieve. Defaults to 20.
    :param query: Return only objects that contain the query
    :param projection: Filter to exclude keys from each result
    :return: List of objects in the collection
    """

    projection = {} if projection is None else projection
    projection.update(ignore_mongo_id)

    collection_json = list(collection.find(filter=query, projection=projection)
                           .skip((page - 1) * num_per_page)
                           .limit(num_per_page))

    return [model(**item_json) for item_json in collection_json]
Пример #28
0
def update_dataset(old_dataset: Dict, h5_dataset: H5Dataset, folder: str,
                   collection: pymongo.collection.Collection):

    try:
        rm_rf(folder)
        h5_dataset.convert(folder)
        collection.update_one({'_id': old_dataset['_id']},
                              {'$set': h5_dataset.dataset})
        logging.info(f"Successfully updated dataset {h5_dataset.token}")
    except pymongo.errors.DuplicateKeyError as e:
        rm_rf(folder)
        logging.error(f"Duplicated token in dataset {h5_dataset.token}")
    except Exception as e:
        rm_rf(folder)
        logging.error(
            f"Failed to update dataset {h5_dataset.token} due to: {e}")
Пример #29
0
def get_item(collection: pymongo.collection.Collection,
             model: Union[Type[BaseModel], Type[dict]],
             item_id: Union[UUID, str],
             *,
             id_key: str = "id_",
             query: dict = None,
             projection: dict = None,
             raise_exc: bool = True):
    """
    Retrieve a single item from a collection

    :param collection: Collection to query
    :param model: Class which the JSON in the collection represents
    :param item_id: UUID or name of desired item
    :param id_key: If the UUID is stored outside of id_, specify here
    :param query: Return only objects that contain the query
    :param projection: Filter to exclude from mongo query result
    :param raise_exc: Whether to raise exception if item is not found.
    :return: Requested object from collection
    """
    projection = {} if projection is None else projection
    projection.update(ignore_mongo_id)

    query = {} if query is None else query
    query.update(mongo_filter(model, item_id, id_key=id_key))

    item_json = collection.find_one(query, projection=projection)

    if item_json is None and raise_exc:
        raise problems.DoesNotExistException("read", model.__name__, mongo_filter(model, item_id, id_key=id_key))
    elif model is dict or item_json is None:
        return item_json
    else:
        return model(**item_json)
def add_mdoc(coll: pymongo.collection.Collection, mdoc: dict):
    mdoc_filter = {"_id": mdoc["_id"]}

    current_mdoc_in_db = coll.find_one(mdoc_filter)

    if current_mdoc_in_db is not None:
        print(f"Stávanící mdokument '{mdoc['_id']}': {current_mdoc_in_db}")
        rewrite = input(f"'{mdoc['_id']}' mdokument už v kolekci existuje, mám ho přepsat (a/N)? ")
        if rewrite.lower() not in ["a", "ano", "y", "yes"]:
            print("Zachovávám původní mdokument.")
            return

        coll.delete_one(mdoc_filter)

    coll.insert_one(mdoc)
    print(f"Přidán mdokument {mdoc['_id']}")