Exemplo n.º 1
0
def test_bson(everything: Everything):
    from bson import decode as bson_loads
    from bson import encode as bson_dumps

    converter = bson_make_converter()
    raw = bson_dumps(
        converter.unstructure(everything),
        codec_options=CodecOptions(tz_aware=True),
    )
    assert (converter.structure(
        bson_loads(raw, codec_options=CodecOptions(tz_aware=True)),
        Everything,
    ) == everything)
Exemplo n.º 2
0
def get_mongodb_connection(collection=None):

    try:
        connection = MongoClient(host=MONGO_HOST, port=MONGO_PORT)
    except errors.ConnectionFailure:
        raise ValueError('Connection to server failed.')

    except errors.ServerSelectionTimeoutError:
        raise ValueError("Database Server is down. Please start it")

    else:
        db = connection.get_database(MONGO_DBNAME)
        if MONGO_DBUSER and MONGO_DBPASS:
            try:
                db.authenticate(name=MONGO_DBUSER, password=MONGO_DBPASS)
            except errors.OperationFailure:
                raise ValueError('Authentication to database {} failed'.format(
                    MONGO_DBNAME))

        if collection is None:
            return db
        options = CodecOptions(tz_aware=True, )
        col = db.get_collection(collection, codec_options=options)
        col.ensure_index('_id')
        return col
    def find_or_create_data_key(self):

        key_vault_client = MongoClient(self.connection_string)

        key_vault = key_vault_client[self.key_db][self.key_coll]

        self.ensure_unique_index_on_key_vault(key_vault)

        data_key = key_vault.find_one({"keyAltNames": self.key_alt_name})

        self.client_encryption = ClientEncryption(
            self.kms_providers, self.key_vault_namespace, key_vault_client,
            CodecOptions(uuid_representation=STANDARD))

        if data_key is None:
            data_key = self.client_encryption.create_data_key(
                "local", key_alt_names=[self.key_alt_name])
            uuid_data_key_id = UUID(bytes=data_key)

        else:
            uuid_data_key_id = data_key["_id"]

        base_64_data_key_id = (base64.b64encode(
            uuid_data_key_id.bytes).decode("utf-8"))

        return uuid_data_key_id, base_64_data_key_id
Exemplo n.º 4
0
def process_article_summaries(db, override=False):
    col = db.Article

    opts = CodecOptions(unicode_decode_error_handler='ignore')
    col = col.with_options(codec_options=opts)

    articles = None
    skipped = 0
    summarized = 0

    if override:
        articles = col.find()
    else:
        articles = col.find({
            "$or": [
                { "summary": { "$size": 0 } },
                { "summary": { "$exists": False } }
            ],
            "body": { "$ne": "" }
        })

        skipped = col.find({
            "summary": {
                "$not": { "$size": 0 },
                "$exists": True
            }
        }).count()

    for article in articles:
        #print("Processing {} ...".format(article['article_id']))
        summary = summarize(article['headline'], article['body'])
        col.update({ '_id': article['_id'] }, { '$set': { 'summary': summary } })
        summarized += 1

    return { 'summarized': summarized, 'skipped': skipped }
Exemplo n.º 5
0
    def test_sub_collection(self):
        # Verify that a collection with a dotted name inherits options from its
        # parent collection.
        write_concern = WriteConcern(w=2, j=True)
        read_concern = ReadConcern("majority")
        read_preference = Secondary([{"dc": "sf"}])
        codec_options = CodecOptions(tz_aware=True,
                                     uuid_representation=JAVA_LEGACY)

        coll1 = self.db.get_collection(
            "test",
            write_concern=write_concern,
            read_concern=read_concern,
            read_preference=read_preference,
            codec_options=codec_options,
        )

        coll2 = coll1.subcollection
        coll3 = coll1["subcollection"]

        for c in [coll1, coll2, coll3]:
            self.assertEqual(write_concern, c.write_concern)
            self.assertEqual(read_concern, c.read_concern)
            self.assertEqual(read_preference, c.read_preference)
            self.assertEqual(codec_options, c.codec_options)
Exemplo n.º 6
0
def main():
    conf = configparser.ConfigParser()
    conf.read('../../development.ini')
    uri = conf['app:webcan']['mongo_uri']

    conn = MongoClient(uri)['webcan']
    filtered_trips = pluck(conn.webcan_trip_filters.find(), 'trip_id')
    # vid_re = 'rocco_phev'
    vid_re = '^adl_metro'
    num_trips = len(set(x.split('_')[2] for x in
                        conn.rpi_readings.distinct('trip_id', {'vid': {'$regex': vid_re}}))
                    - set(x.split('_')[2] for x in filtered_trips))
    # generate a fuel consumption report
    query = {
        # 'vid': vid_re,
        'vid': {'$regex': vid_re},
    }
    if filtered_trips:
        query['trip_id'] = {'$nin': filtered_trips}
    readings = conn.rpi_readings.with_options(
        codec_options=CodecOptions(tz_aware=True, tzinfo=pytz.timezone('Australia/Adelaide')))
    cursor = readings.find(query).sort([('trip_key', 1)])
    report = []

    prog = tqdm.tqdm(desc='Trip Reports: ', total=num_trips, unit=' trips')

    def on_complete(r):
        # put this in the db
        # print(r)
        conn.trip_summary.insert_one({k: parse(v) for k, v in r.items()})
        if r['Distance (km)'] >= 10:
            report.append(r)
        prog.update()

    def summary_exists(trip_key):
        return conn.trip_summary.find_one({'trip_key': trip_key}) is not None

    pool = Pool()
    i = 0

    for trip_id, readings in groupby(cursor, key=lambda x: x['trip_key']):

        if summary_exists(trip_id):
            continue
        readings = list(readings)
        # on_complete(fuel_report_trip(trip_id, readings))
        pool.apply_async(fuel_report_trip, args=(trip_id, readings), callback=on_complete)
        i += 1

    pool.close()
    pool.join()
    prog.close()
    print(tabulate.tabulate(report, headers='keys'))
    exit()
    import csv
    with open('adl_metro_report_phev.csv', 'w') as out:
        writer = csv.DictWriter(out, fieldnames=list(report[0].keys()))
        writer.writeheader()
        writer.writerows(report)
Exemplo n.º 7
0
def getColTZ(col):
    """
    给 collection 查询添加时区
    :param col:
    :return:
    """
    return col.with_options(
        codec_options=CodecOptions(tz_aware=True, tzinfo=LOCAL_TIMEZONE))
Exemplo n.º 8
0
def get_collections(mongo):
    """Get labels and translations collections from mongo."""
    son_options = CodecOptions(document_class=SON)
    labels_collection = mongo.db.labels.with_options(codec_options=son_options)
    translations_collection = mongo.db.translations
    elements_collection = mongo.db.elements.with_options(codec_options=son_options)

    return labels_collection, translations_collection, elements_collection
Exemplo n.º 9
0
 def __init__(self):
     """
     The default constructor of DataEngine. Initializes and prepares a database connection.
     """
     # establish the database connection
     self.connection = MongoClient('127.0.0.1', 27017, connect=False) #server.local_bind_port)
     self.db = self.connection.threat
     self.options = CodecOptions(document_class=SON)
Exemplo n.º 10
0
    def __init__(self, collection: str, model_cls: Type[BaseModel]):
        self._db = get_db()
        type_registry = TypeRegistry([DecimalCodec()])
        codec_options = CodecOptions(type_registry=type_registry)

        self._collection = self._db.get_collection(collection, codec_options=codec_options)
        self._collection.create_index("id", unique=True)
        self._model_cls = model_cls
Exemplo n.º 11
0
def bson_numpy_func(use_large):
    raw_coll = db.get_collection(
        collection_names[use_large],
        codec_options=CodecOptions(document_class=RawBSONDocument))

    cursor = raw_coll.find()
    dtype = dtypes[use_large]
    bsonnumpy.sequence_to_ndarray((doc.raw for doc in cursor), dtype,
                                  raw_coll.count())
Exemplo n.º 12
0
 def __new__(cls) -> "MongoDBClient":
     if cls.__instance is None:
         cls.__instance = object.__new__(cls)
         app = get_current_app()
         tzinfo = get_timezone()
         cls.__instance.codec_options = CodecOptions(tz_aware=True,
                                                     tzinfo=tzinfo)
         cls.__instance.mongodb = app.mongodb
     return cls.__instance
Exemplo n.º 13
0
def build_split_distribution(db, ns, no_timeout):

    print('Building split distribution...')

    chunks_son = db['chunks'].with_options(codec_options=CodecOptions(
        document_class=SON))
    i_splits = 0

    chunks_count = db['chunks'].count({'ns': ns})

    if (no_progressbar == False):
        pbar = ProgressBar(widgets=[Percentage(), Bar()],
                           maxval=chunks_count).start()
        bar_i = 0

    bookmark = 0
    chunks_cursor = chunks_son.find({
        'ns': ns
    }, {
        '_id': 0,
        'min': 1,
        'max': 1
    },
                                    no_cursor_timeout=no_timeout).sort([
                                        ('min', pymongo.ASCENDING)
                                    ])
    while (1):
        try:
            chunk_son = chunks_cursor.next()
        except StopIteration:
            break

        chunk = collections.OrderedDict()
        chunk['min'] = chunk_son['min']
        chunk['max'] = chunk_son['max']
        (split, bookmark) = find_split(list_splits, bookmark, chunk)
        if (split != None):
            # Insert the split, offset by split count
            final_list.append(split)
            try:
                for skip in range(split['splits']):
                    chunks_cursor.next()
            except StopIteration:
                print('Warning: unexpected end of iteration')
                print('skip ' + str(skip) + ' of ' + str(split['splits']))
                break
        else:
            # insert chunk
            chunk['splits'] = 0
            final_list.append(chunk)

        if (no_progressbar == False):
            bar_i = bar_i + 1
            pbar.update(bar_i)

    if (no_progressbar == False):
        pbar.finish()
Exemplo n.º 14
0
def get_db_collection(connection_str: str, db_name: str,
                      collection_name: str) -> Collection:
    """Creates a mongodb connection.

    :return: MongoDB Collection object
    """
    client = MongoClient(connection_str)
    db = client[db_name]
    opts = CodecOptions(document_class=SON)
    return db[collection_name].with_options(codec_options=opts)
Exemplo n.º 15
0
 def _init_collection(
         self,
         col_name: str,
         indexes: Optional[list[IndexModel]] = None) -> Collection:
     codecs = CodecOptions(
         type_registry=TypeRegistry([c() for c in [DecimalCodec]]))
     col = self._database.get_collection(col_name, codecs)
     if indexes:
         col.create_indexes(indexes)
     return col
Exemplo n.º 16
0
    def test_get_database(self):
        codec_options = CodecOptions(tz_aware=True)
        write_concern = WriteConcern(w=2, j=True)
        db = self.cx.get_database("foo", codec_options, ReadPreference.SECONDARY, write_concern)

        self.assertTrue(isinstance(db, motor.MotorDatabase))
        self.assertEqual("foo", db.name)
        self.assertEqual(codec_options, db.codec_options)
        self.assertEqual(ReadPreference.SECONDARY, db.read_preference)
        self.assertEqual(write_concern, db.write_concern)
    def __init__(self, connection_string, database: str, collection: str):
        logger.info(
            f'Initialize. Database: "{database}". Collection: "{collection}"')

        # http://api.mongodb.com/python/current/tutorial.html?_ga=1.114535310.822912736.1490913716

        self.client = MongoClient(connection_string)
        self.db = self.client[database]

        codec_options = CodecOptions(uuid_representation=binary.STANDARD)
        self.collection = self.db.get_collection(collection, codec_options)
Exemplo n.º 18
0
def get_db(collection=None):
    if 'db' not in g and collection is None:
        g.db = pymongo.MongoClient(
            current_app.config["CONNECTION_STRING_MDB"]).online_shopping
    elif 'db' not in g:
        codec_options = CodecOptions(tz_aware=True,
                                     tzinfo=pytz.timezone("Asia/Tehran"))
        g.db = pymongo.MongoClient(current_app.config["CONNECTION_STRING_MDB"]
                                   ).online_shopping.get_collection(
                                       collection, codec_options=codec_options)
    return g.db
Exemplo n.º 19
0
    def test_get_database(self):
        codec_options = CodecOptions(tz_aware=True)
        write_concern = WriteConcern(w=2, j=True)
        db = self.cx.get_database('foo', codec_options,
                                  ReadPreference.SECONDARY, write_concern)

        assert isinstance(db, motor_asyncio.AsyncIOMotorDatabase)
        self.assertEqual('foo', db.name)
        self.assertEqual(codec_options, db.codec_options)
        self.assertEqual(ReadPreference.SECONDARY, db.read_preference)
        self.assertEqual(write_concern.document, db.write_concern)
Exemplo n.º 20
0
 def write(self, kind: FileType) -> None:
     fn = self._path(kind)
     if kind == FileType.PICKLE:
         # serialize as TreeNode
         with open(fn, "wb") as f:
             pickle.dump(self.treenode, f, protocol=-1)
     elif kind == FileType.CSV:
         # serialize as id_dict
         with open(fn, "w") as f:
             w = csv.DictWriter(f, Node._fields)
             w.writeheader()
             for item in self.treenode.node_iter():
                 w.writerow(item._asdict())
     elif kind == FileType.MSGPACK:
         # https://msgpack-python.readthedocs.io/en/latest/api.html
         with open(fn, "wb") as f:
             # Doesn't improve speed
             # msgpack.pack(self._to_dict(), f, use_bin_type=True)
             msgpack.pack(self.to_dict_list(), f)
     elif kind == FileType.JSON:
         self._json_dump(fn, json.dump)
     elif kind == FileType.UJSON:
         self._json_dump(fn, ujson.dump)
     elif kind == FileType.SIMPLEJSON:
         # NOTE: simplejson includes key names when serializing NamedTuples
         with open(fn, "w") as f:
             if self.json_dict_list:
                 simplejson.dump(list(self.id_dict.values()), f, ensure_ascii=True)
             else:
                 simplejson.dump(self.id_dict, f, ensure_ascii=True)
     elif kind == FileType.CBOR2:
         with open(fn, "wb") as f:
             cbor2.dump(self.to_dict_list(), f)
     elif kind == FileType.CBOR:
         with open(fn, "wb") as f:
             cbor.dump(self.to_dict_list(), f)
     elif kind == FileType.RAPIDJSON:
         # https://python-rapidjson.readthedocs.io/en/latest/benchmarks.html
         # TODO: See this example for possible speed improvement - deeper integration with Node
         #  https://python-rapidjson.readthedocs.io/en/latest/encoder.html
         # NOTE: can't use id_dict - keys must be strings
         #       can't use self.id_dict.values() - not serializable
         #       list(self.id_dict.values()) produces a list of lists - no keys - very fragile
         with open(fn, "w") as f:
             if self.json_dict_list:
                 rapidjson.Encoder(number_mode=rapidjson.NM_NATIVE, ensure_ascii=False)(self.to_dict_list(), f)
             else:
                 rapidjson.Encoder(number_mode=rapidjson.NM_NATIVE, ensure_ascii=False)(list(self.id_dict.values()), f)
     elif kind == FileType.BSON:
         with open(fn, "wb") as f:
             co = CodecOptions(document_class=RawBSONDocument)
             for node in self.treenode.node_iter():
                 f.write(BSON.encode(node._asdict(), codec_options=co))
Exemplo n.º 21
0
 def get_db_collection(cls,
                       type_codecs: [CoreEntityCodec],
                       document_class: MutableMapping = AttributeDict):
     """
     Provide a collection with codec options
     :return: Collection for the current database connection
     """
     db: Database = cls.get_database()
     codec_options = CodecOptions(document_class=document_class,
                                  type_registry=TypeRegistry(type_codecs))
     return db.get_collection(name=cls._collection_name,
                              codec_options=codec_options)
Exemplo n.º 22
0
 def _get_collection(
     self,
     collection: Optional[str] = None,
     tz_aware=False,
 ) -> pymongo.collection.Collection:
     """Parses codec options and returns MongoDB collection objection"""
     if collection is None:
         collection = self.default_collection
     elif collection not in self.collections:
         self._make_indexes(collection)
         self.logger.info(f'Making new collection: {collection}')
     opts = CodecOptions(tz_aware=tz_aware)
     return self.db.get_collection(collection).with_options(opts)
def find_doc_by_uuid(collection: str, uuid_str: str):
    # need to use java legacy uuid representation
    # https://stackoverflow.com/questions/26712600/mongo-uuid-python-vs-java-format/31061472
    db_collection = DB.get_collection(
        collection, CodecOptions(uuid_representation=JAVA_LEGACY))

    doc = db_collection.find_one({
        'uuid.uuid': uuid.UUID(uuid_str),
        'isUpdate': False
    })

    if not doc:
        LOGGER.error(f'No doc found for {uuid_str}')

    return doc
Exemplo n.º 24
0
    def test_get_collection(self):
        codec_options = CodecOptions(tz_aware=True,
                                     uuid_representation=JAVA_LEGACY)
        write_concern = WriteConcern(w=2, j=True)
        coll = self.db.get_collection('foo', codec_options,
                                      ReadPreference.SECONDARY, write_concern)

        self.assertTrue(isinstance(coll, motor.MotorCollection))
        self.assertEqual('foo', coll.name)
        self.assertEqual(codec_options, coll.codec_options)
        self.assertEqual(ReadPreference.SECONDARY, coll.read_preference)
        self.assertEqual(write_concern, coll.write_concern)

        pref = Secondary([{"dc": "sf"}])
        coll = self.db.get_collection('foo', read_preference=pref)
        self.assertEqual(pref, coll.read_preference)
        self.assertEqual(self.db.codec_options, coll.codec_options)
        self.assertEqual(self.db.write_concern, coll.write_concern)
Exemplo n.º 25
0
    def test_with_options(self):
        coll = self.db.test
        codec_options = CodecOptions(tz_aware=True,
                                     uuid_representation=JAVA_LEGACY)

        write_concern = WriteConcern(w=2, j=True)
        coll2 = coll.with_options(codec_options, ReadPreference.SECONDARY,
                                  write_concern)

        self.assertTrue(isinstance(coll2, motor.MotorCollection))
        self.assertEqual(codec_options, coll2.codec_options)
        self.assertEqual(Secondary(), coll2.read_preference)
        self.assertEqual(write_concern, coll2.write_concern)

        pref = Secondary([{"dc": "sf"}])
        coll2 = coll.with_options(read_preference=pref)
        self.assertEqual(pref, coll2.read_preference)
        self.assertEqual(coll.codec_options, coll2.codec_options)
        self.assertEqual(coll.write_concern, coll2.write_concern)
Exemplo n.º 26
0
    def test_with_options(self):
        db = self.db
        codec_options = CodecOptions(tz_aware=True,
                                     uuid_representation=JAVA_LEGACY)

        write_concern = WriteConcern(w=2, j=True)
        db2 = db.with_options(codec_options, ReadPreference.SECONDARY,
                              write_concern)

        self.assertTrue(isinstance(db2, motor.MotorDatabase))
        self.assertEqual(codec_options, db2.codec_options)
        self.assertEqual(Secondary(), db2.read_preference)
        self.assertEqual(write_concern, db2.write_concern)

        pref = Secondary([{"dc": "sf"}])
        db2 = db.with_options(read_preference=pref)
        self.assertEqual(pref, db2.read_preference)
        self.assertEqual(db.codec_options, db2.codec_options)
        self.assertEqual(db.write_concern, db2.write_concern)
Exemplo n.º 27
0
 def __init__(
     self,
     model_class: Type[T],
     database: Database,
     col_name: str,
     indexes: Optional[list[Union[IndexModel, str]]] = None,
     wrap_object_str_id=True,
 ):
     codecs = CodecOptions(
         type_registry=TypeRegistry([c() for c in [DecimalCodec]]))
     self.collection = database.get_collection(col_name, codecs)
     if indexes:
         indexes = [
             parse_str_index_model(i) if isinstance(i, str) else i
             for i in indexes
         ]
         self.collection.create_indexes(indexes)
     self.model_class = model_class
     self.wrap_object_id = model_class.__fields__[
         "id"].type_ == ObjectIdStr and wrap_object_str_id
Exemplo n.º 28
0
 def verify_output(self,
                   bson_metrics_file_name,
                   expected_results,
                   check_last_row_only=False):
     """
     :param bson_metrics_file_name:
     :param expected_results:
     :param check_last_row_only: Check that the last row is correct. Since the results are
     cumulative, this likely means previous rows are all correct as well.
     :return:
     """
     with open(bson_metrics_file_name, 'rb') as f:
         options = CodecOptions(document_class=OrderedDict)
         index = 0
         if check_last_row_only:
             decoded_bson = list(decode_file_iter(f, options))
             self.assertEqual(expected_results, decoded_bson[-1])
         else:
             for doc in decode_file_iter(f, options):
                 self.assertEqual(doc, expected_results[index])
                 index += 1
Exemplo n.º 29
0
    def test_with_options(self):
        coll = self.db.test
        codec_options = CodecOptions(
            tz_aware=True, uuid_representation=JAVA_LEGACY)

        write_concern = WriteConcern(w=2, j=True)
        coll2 = coll.with_options(
            codec_options, ReadPreference.SECONDARY, write_concern)

        self.assertTrue(isinstance(coll2, AsyncIOMotorCollection))
        self.assertEqual(codec_options, coll2.codec_options)
        self.assertEqual(JAVA_LEGACY, coll2.uuid_subtype)
        self.assertEqual(ReadPreference.SECONDARY, coll2.read_preference)
        self.assertEqual(write_concern.document, coll2.write_concern)

        pref = Secondary([{"dc": "sf"}])
        coll2 = coll.with_options(read_preference=pref)
        self.assertEqual(pref.mode, coll2.read_preference)
        self.assertEqual(pref.tag_sets, coll2.tag_sets)
        self.assertEqual(coll.codec_options, coll2.codec_options)
        self.assertEqual(coll.uuid_subtype, coll2.uuid_subtype)
        self.assertEqual(coll.write_concern, coll2.write_concern)
Exemplo n.º 30
0
def db_collection(db):
    return db.get_collection(
        "pymongo_migrate",
        codec_options=CodecOptions(tz_aware=True, tzinfo=timezone.utc),
    )