def test_bson(everything: Everything): from bson import decode as bson_loads from bson import encode as bson_dumps converter = bson_make_converter() raw = bson_dumps( converter.unstructure(everything), codec_options=CodecOptions(tz_aware=True), ) assert (converter.structure( bson_loads(raw, codec_options=CodecOptions(tz_aware=True)), Everything, ) == everything)
def get_mongodb_connection(collection=None): try: connection = MongoClient(host=MONGO_HOST, port=MONGO_PORT) except errors.ConnectionFailure: raise ValueError('Connection to server failed.') except errors.ServerSelectionTimeoutError: raise ValueError("Database Server is down. Please start it") else: db = connection.get_database(MONGO_DBNAME) if MONGO_DBUSER and MONGO_DBPASS: try: db.authenticate(name=MONGO_DBUSER, password=MONGO_DBPASS) except errors.OperationFailure: raise ValueError('Authentication to database {} failed'.format( MONGO_DBNAME)) if collection is None: return db options = CodecOptions(tz_aware=True, ) col = db.get_collection(collection, codec_options=options) col.ensure_index('_id') return col
def find_or_create_data_key(self): key_vault_client = MongoClient(self.connection_string) key_vault = key_vault_client[self.key_db][self.key_coll] self.ensure_unique_index_on_key_vault(key_vault) data_key = key_vault.find_one({"keyAltNames": self.key_alt_name}) self.client_encryption = ClientEncryption( self.kms_providers, self.key_vault_namespace, key_vault_client, CodecOptions(uuid_representation=STANDARD)) if data_key is None: data_key = self.client_encryption.create_data_key( "local", key_alt_names=[self.key_alt_name]) uuid_data_key_id = UUID(bytes=data_key) else: uuid_data_key_id = data_key["_id"] base_64_data_key_id = (base64.b64encode( uuid_data_key_id.bytes).decode("utf-8")) return uuid_data_key_id, base_64_data_key_id
def process_article_summaries(db, override=False): col = db.Article opts = CodecOptions(unicode_decode_error_handler='ignore') col = col.with_options(codec_options=opts) articles = None skipped = 0 summarized = 0 if override: articles = col.find() else: articles = col.find({ "$or": [ { "summary": { "$size": 0 } }, { "summary": { "$exists": False } } ], "body": { "$ne": "" } }) skipped = col.find({ "summary": { "$not": { "$size": 0 }, "$exists": True } }).count() for article in articles: #print("Processing {} ...".format(article['article_id'])) summary = summarize(article['headline'], article['body']) col.update({ '_id': article['_id'] }, { '$set': { 'summary': summary } }) summarized += 1 return { 'summarized': summarized, 'skipped': skipped }
def test_sub_collection(self): # Verify that a collection with a dotted name inherits options from its # parent collection. write_concern = WriteConcern(w=2, j=True) read_concern = ReadConcern("majority") read_preference = Secondary([{"dc": "sf"}]) codec_options = CodecOptions(tz_aware=True, uuid_representation=JAVA_LEGACY) coll1 = self.db.get_collection( "test", write_concern=write_concern, read_concern=read_concern, read_preference=read_preference, codec_options=codec_options, ) coll2 = coll1.subcollection coll3 = coll1["subcollection"] for c in [coll1, coll2, coll3]: self.assertEqual(write_concern, c.write_concern) self.assertEqual(read_concern, c.read_concern) self.assertEqual(read_preference, c.read_preference) self.assertEqual(codec_options, c.codec_options)
def main(): conf = configparser.ConfigParser() conf.read('../../development.ini') uri = conf['app:webcan']['mongo_uri'] conn = MongoClient(uri)['webcan'] filtered_trips = pluck(conn.webcan_trip_filters.find(), 'trip_id') # vid_re = 'rocco_phev' vid_re = '^adl_metro' num_trips = len(set(x.split('_')[2] for x in conn.rpi_readings.distinct('trip_id', {'vid': {'$regex': vid_re}})) - set(x.split('_')[2] for x in filtered_trips)) # generate a fuel consumption report query = { # 'vid': vid_re, 'vid': {'$regex': vid_re}, } if filtered_trips: query['trip_id'] = {'$nin': filtered_trips} readings = conn.rpi_readings.with_options( codec_options=CodecOptions(tz_aware=True, tzinfo=pytz.timezone('Australia/Adelaide'))) cursor = readings.find(query).sort([('trip_key', 1)]) report = [] prog = tqdm.tqdm(desc='Trip Reports: ', total=num_trips, unit=' trips') def on_complete(r): # put this in the db # print(r) conn.trip_summary.insert_one({k: parse(v) for k, v in r.items()}) if r['Distance (km)'] >= 10: report.append(r) prog.update() def summary_exists(trip_key): return conn.trip_summary.find_one({'trip_key': trip_key}) is not None pool = Pool() i = 0 for trip_id, readings in groupby(cursor, key=lambda x: x['trip_key']): if summary_exists(trip_id): continue readings = list(readings) # on_complete(fuel_report_trip(trip_id, readings)) pool.apply_async(fuel_report_trip, args=(trip_id, readings), callback=on_complete) i += 1 pool.close() pool.join() prog.close() print(tabulate.tabulate(report, headers='keys')) exit() import csv with open('adl_metro_report_phev.csv', 'w') as out: writer = csv.DictWriter(out, fieldnames=list(report[0].keys())) writer.writeheader() writer.writerows(report)
def getColTZ(col): """ 给 collection 查询添加时区 :param col: :return: """ return col.with_options( codec_options=CodecOptions(tz_aware=True, tzinfo=LOCAL_TIMEZONE))
def get_collections(mongo): """Get labels and translations collections from mongo.""" son_options = CodecOptions(document_class=SON) labels_collection = mongo.db.labels.with_options(codec_options=son_options) translations_collection = mongo.db.translations elements_collection = mongo.db.elements.with_options(codec_options=son_options) return labels_collection, translations_collection, elements_collection
def __init__(self): """ The default constructor of DataEngine. Initializes and prepares a database connection. """ # establish the database connection self.connection = MongoClient('127.0.0.1', 27017, connect=False) #server.local_bind_port) self.db = self.connection.threat self.options = CodecOptions(document_class=SON)
def __init__(self, collection: str, model_cls: Type[BaseModel]): self._db = get_db() type_registry = TypeRegistry([DecimalCodec()]) codec_options = CodecOptions(type_registry=type_registry) self._collection = self._db.get_collection(collection, codec_options=codec_options) self._collection.create_index("id", unique=True) self._model_cls = model_cls
def bson_numpy_func(use_large): raw_coll = db.get_collection( collection_names[use_large], codec_options=CodecOptions(document_class=RawBSONDocument)) cursor = raw_coll.find() dtype = dtypes[use_large] bsonnumpy.sequence_to_ndarray((doc.raw for doc in cursor), dtype, raw_coll.count())
def __new__(cls) -> "MongoDBClient": if cls.__instance is None: cls.__instance = object.__new__(cls) app = get_current_app() tzinfo = get_timezone() cls.__instance.codec_options = CodecOptions(tz_aware=True, tzinfo=tzinfo) cls.__instance.mongodb = app.mongodb return cls.__instance
def build_split_distribution(db, ns, no_timeout): print('Building split distribution...') chunks_son = db['chunks'].with_options(codec_options=CodecOptions( document_class=SON)) i_splits = 0 chunks_count = db['chunks'].count({'ns': ns}) if (no_progressbar == False): pbar = ProgressBar(widgets=[Percentage(), Bar()], maxval=chunks_count).start() bar_i = 0 bookmark = 0 chunks_cursor = chunks_son.find({ 'ns': ns }, { '_id': 0, 'min': 1, 'max': 1 }, no_cursor_timeout=no_timeout).sort([ ('min', pymongo.ASCENDING) ]) while (1): try: chunk_son = chunks_cursor.next() except StopIteration: break chunk = collections.OrderedDict() chunk['min'] = chunk_son['min'] chunk['max'] = chunk_son['max'] (split, bookmark) = find_split(list_splits, bookmark, chunk) if (split != None): # Insert the split, offset by split count final_list.append(split) try: for skip in range(split['splits']): chunks_cursor.next() except StopIteration: print('Warning: unexpected end of iteration') print('skip ' + str(skip) + ' of ' + str(split['splits'])) break else: # insert chunk chunk['splits'] = 0 final_list.append(chunk) if (no_progressbar == False): bar_i = bar_i + 1 pbar.update(bar_i) if (no_progressbar == False): pbar.finish()
def get_db_collection(connection_str: str, db_name: str, collection_name: str) -> Collection: """Creates a mongodb connection. :return: MongoDB Collection object """ client = MongoClient(connection_str) db = client[db_name] opts = CodecOptions(document_class=SON) return db[collection_name].with_options(codec_options=opts)
def _init_collection( self, col_name: str, indexes: Optional[list[IndexModel]] = None) -> Collection: codecs = CodecOptions( type_registry=TypeRegistry([c() for c in [DecimalCodec]])) col = self._database.get_collection(col_name, codecs) if indexes: col.create_indexes(indexes) return col
def test_get_database(self): codec_options = CodecOptions(tz_aware=True) write_concern = WriteConcern(w=2, j=True) db = self.cx.get_database("foo", codec_options, ReadPreference.SECONDARY, write_concern) self.assertTrue(isinstance(db, motor.MotorDatabase)) self.assertEqual("foo", db.name) self.assertEqual(codec_options, db.codec_options) self.assertEqual(ReadPreference.SECONDARY, db.read_preference) self.assertEqual(write_concern, db.write_concern)
def __init__(self, connection_string, database: str, collection: str): logger.info( f'Initialize. Database: "{database}". Collection: "{collection}"') # http://api.mongodb.com/python/current/tutorial.html?_ga=1.114535310.822912736.1490913716 self.client = MongoClient(connection_string) self.db = self.client[database] codec_options = CodecOptions(uuid_representation=binary.STANDARD) self.collection = self.db.get_collection(collection, codec_options)
def get_db(collection=None): if 'db' not in g and collection is None: g.db = pymongo.MongoClient( current_app.config["CONNECTION_STRING_MDB"]).online_shopping elif 'db' not in g: codec_options = CodecOptions(tz_aware=True, tzinfo=pytz.timezone("Asia/Tehran")) g.db = pymongo.MongoClient(current_app.config["CONNECTION_STRING_MDB"] ).online_shopping.get_collection( collection, codec_options=codec_options) return g.db
def test_get_database(self): codec_options = CodecOptions(tz_aware=True) write_concern = WriteConcern(w=2, j=True) db = self.cx.get_database('foo', codec_options, ReadPreference.SECONDARY, write_concern) assert isinstance(db, motor_asyncio.AsyncIOMotorDatabase) self.assertEqual('foo', db.name) self.assertEqual(codec_options, db.codec_options) self.assertEqual(ReadPreference.SECONDARY, db.read_preference) self.assertEqual(write_concern.document, db.write_concern)
def write(self, kind: FileType) -> None: fn = self._path(kind) if kind == FileType.PICKLE: # serialize as TreeNode with open(fn, "wb") as f: pickle.dump(self.treenode, f, protocol=-1) elif kind == FileType.CSV: # serialize as id_dict with open(fn, "w") as f: w = csv.DictWriter(f, Node._fields) w.writeheader() for item in self.treenode.node_iter(): w.writerow(item._asdict()) elif kind == FileType.MSGPACK: # https://msgpack-python.readthedocs.io/en/latest/api.html with open(fn, "wb") as f: # Doesn't improve speed # msgpack.pack(self._to_dict(), f, use_bin_type=True) msgpack.pack(self.to_dict_list(), f) elif kind == FileType.JSON: self._json_dump(fn, json.dump) elif kind == FileType.UJSON: self._json_dump(fn, ujson.dump) elif kind == FileType.SIMPLEJSON: # NOTE: simplejson includes key names when serializing NamedTuples with open(fn, "w") as f: if self.json_dict_list: simplejson.dump(list(self.id_dict.values()), f, ensure_ascii=True) else: simplejson.dump(self.id_dict, f, ensure_ascii=True) elif kind == FileType.CBOR2: with open(fn, "wb") as f: cbor2.dump(self.to_dict_list(), f) elif kind == FileType.CBOR: with open(fn, "wb") as f: cbor.dump(self.to_dict_list(), f) elif kind == FileType.RAPIDJSON: # https://python-rapidjson.readthedocs.io/en/latest/benchmarks.html # TODO: See this example for possible speed improvement - deeper integration with Node # https://python-rapidjson.readthedocs.io/en/latest/encoder.html # NOTE: can't use id_dict - keys must be strings # can't use self.id_dict.values() - not serializable # list(self.id_dict.values()) produces a list of lists - no keys - very fragile with open(fn, "w") as f: if self.json_dict_list: rapidjson.Encoder(number_mode=rapidjson.NM_NATIVE, ensure_ascii=False)(self.to_dict_list(), f) else: rapidjson.Encoder(number_mode=rapidjson.NM_NATIVE, ensure_ascii=False)(list(self.id_dict.values()), f) elif kind == FileType.BSON: with open(fn, "wb") as f: co = CodecOptions(document_class=RawBSONDocument) for node in self.treenode.node_iter(): f.write(BSON.encode(node._asdict(), codec_options=co))
def get_db_collection(cls, type_codecs: [CoreEntityCodec], document_class: MutableMapping = AttributeDict): """ Provide a collection with codec options :return: Collection for the current database connection """ db: Database = cls.get_database() codec_options = CodecOptions(document_class=document_class, type_registry=TypeRegistry(type_codecs)) return db.get_collection(name=cls._collection_name, codec_options=codec_options)
def _get_collection( self, collection: Optional[str] = None, tz_aware=False, ) -> pymongo.collection.Collection: """Parses codec options and returns MongoDB collection objection""" if collection is None: collection = self.default_collection elif collection not in self.collections: self._make_indexes(collection) self.logger.info(f'Making new collection: {collection}') opts = CodecOptions(tz_aware=tz_aware) return self.db.get_collection(collection).with_options(opts)
def find_doc_by_uuid(collection: str, uuid_str: str): # need to use java legacy uuid representation # https://stackoverflow.com/questions/26712600/mongo-uuid-python-vs-java-format/31061472 db_collection = DB.get_collection( collection, CodecOptions(uuid_representation=JAVA_LEGACY)) doc = db_collection.find_one({ 'uuid.uuid': uuid.UUID(uuid_str), 'isUpdate': False }) if not doc: LOGGER.error(f'No doc found for {uuid_str}') return doc
def test_get_collection(self): codec_options = CodecOptions(tz_aware=True, uuid_representation=JAVA_LEGACY) write_concern = WriteConcern(w=2, j=True) coll = self.db.get_collection('foo', codec_options, ReadPreference.SECONDARY, write_concern) self.assertTrue(isinstance(coll, motor.MotorCollection)) self.assertEqual('foo', coll.name) self.assertEqual(codec_options, coll.codec_options) self.assertEqual(ReadPreference.SECONDARY, coll.read_preference) self.assertEqual(write_concern, coll.write_concern) pref = Secondary([{"dc": "sf"}]) coll = self.db.get_collection('foo', read_preference=pref) self.assertEqual(pref, coll.read_preference) self.assertEqual(self.db.codec_options, coll.codec_options) self.assertEqual(self.db.write_concern, coll.write_concern)
def test_with_options(self): coll = self.db.test codec_options = CodecOptions(tz_aware=True, uuid_representation=JAVA_LEGACY) write_concern = WriteConcern(w=2, j=True) coll2 = coll.with_options(codec_options, ReadPreference.SECONDARY, write_concern) self.assertTrue(isinstance(coll2, motor.MotorCollection)) self.assertEqual(codec_options, coll2.codec_options) self.assertEqual(Secondary(), coll2.read_preference) self.assertEqual(write_concern, coll2.write_concern) pref = Secondary([{"dc": "sf"}]) coll2 = coll.with_options(read_preference=pref) self.assertEqual(pref, coll2.read_preference) self.assertEqual(coll.codec_options, coll2.codec_options) self.assertEqual(coll.write_concern, coll2.write_concern)
def test_with_options(self): db = self.db codec_options = CodecOptions(tz_aware=True, uuid_representation=JAVA_LEGACY) write_concern = WriteConcern(w=2, j=True) db2 = db.with_options(codec_options, ReadPreference.SECONDARY, write_concern) self.assertTrue(isinstance(db2, motor.MotorDatabase)) self.assertEqual(codec_options, db2.codec_options) self.assertEqual(Secondary(), db2.read_preference) self.assertEqual(write_concern, db2.write_concern) pref = Secondary([{"dc": "sf"}]) db2 = db.with_options(read_preference=pref) self.assertEqual(pref, db2.read_preference) self.assertEqual(db.codec_options, db2.codec_options) self.assertEqual(db.write_concern, db2.write_concern)
def __init__( self, model_class: Type[T], database: Database, col_name: str, indexes: Optional[list[Union[IndexModel, str]]] = None, wrap_object_str_id=True, ): codecs = CodecOptions( type_registry=TypeRegistry([c() for c in [DecimalCodec]])) self.collection = database.get_collection(col_name, codecs) if indexes: indexes = [ parse_str_index_model(i) if isinstance(i, str) else i for i in indexes ] self.collection.create_indexes(indexes) self.model_class = model_class self.wrap_object_id = model_class.__fields__[ "id"].type_ == ObjectIdStr and wrap_object_str_id
def verify_output(self, bson_metrics_file_name, expected_results, check_last_row_only=False): """ :param bson_metrics_file_name: :param expected_results: :param check_last_row_only: Check that the last row is correct. Since the results are cumulative, this likely means previous rows are all correct as well. :return: """ with open(bson_metrics_file_name, 'rb') as f: options = CodecOptions(document_class=OrderedDict) index = 0 if check_last_row_only: decoded_bson = list(decode_file_iter(f, options)) self.assertEqual(expected_results, decoded_bson[-1]) else: for doc in decode_file_iter(f, options): self.assertEqual(doc, expected_results[index]) index += 1
def test_with_options(self): coll = self.db.test codec_options = CodecOptions( tz_aware=True, uuid_representation=JAVA_LEGACY) write_concern = WriteConcern(w=2, j=True) coll2 = coll.with_options( codec_options, ReadPreference.SECONDARY, write_concern) self.assertTrue(isinstance(coll2, AsyncIOMotorCollection)) self.assertEqual(codec_options, coll2.codec_options) self.assertEqual(JAVA_LEGACY, coll2.uuid_subtype) self.assertEqual(ReadPreference.SECONDARY, coll2.read_preference) self.assertEqual(write_concern.document, coll2.write_concern) pref = Secondary([{"dc": "sf"}]) coll2 = coll.with_options(read_preference=pref) self.assertEqual(pref.mode, coll2.read_preference) self.assertEqual(pref.tag_sets, coll2.tag_sets) self.assertEqual(coll.codec_options, coll2.codec_options) self.assertEqual(coll.uuid_subtype, coll2.uuid_subtype) self.assertEqual(coll.write_concern, coll2.write_concern)
def db_collection(db): return db.get_collection( "pymongo_migrate", codec_options=CodecOptions(tz_aware=True, tzinfo=timezone.utc), )