def test_aggregate_out(self): coll = self.db.get_collection('coll', read_concern=ReadConcern('local')) tuple(coll.aggregate([{'$match': {'field': 'value'}}, {'$out': 'output_collection'}])) # Aggregate with $out supports readConcern MongoDB 4.2 onwards. if client_context.version >= (4, 1): self.assertIn('readConcern', self.listener.results['started'][0].command) else: self.assertNotIn('readConcern', self.listener.results['started'][0].command)
def update_ss_with_new_rs(old_rs, new_rs): dbsnp_sve_collection = mongo_source.mongo_handle[ mongo_source.db_name][DBSNP_SUBMITTED_VARIANT_ENTITY] eva_sve_collection = mongo_source.mongo_handle[ mongo_source.db_name][EVA_SUBMITTED_VARIANT_ENTITY] logger.info( f"updating submittedVariantEntity with old_rs: {old_rs} to new_rs: {new_rs}" ) filter_query = {'rs': old_rs} update_value = {'$set': {'rs': new_rs}} dbsnp_sve_collection.with_options(read_concern=ReadConcern("majority"), read_preference=pymongo.ReadPreference.PRIMARY, write_concern=WriteConcern("majority")) \ .update_many(filter_query, update_value) eva_sve_collection.with_options(read_concern=ReadConcern("majority"), read_preference=pymongo.ReadPreference.PRIMARY, write_concern=WriteConcern("majority")) \ .update_many(filter_query, update_value)
def test_get_collection(self): codec_options = CodecOptions(tz_aware=True) write_concern = WriteConcern(w=2, j=True) read_concern = ReadConcern('majority') coll = self.client.pymongo_test.get_collection( 'foo', codec_options, ReadPreference.SECONDARY, write_concern, read_concern) self.assertEqual('foo', coll.name) self.assertEqual(codec_options, coll.codec_options) self.assertEqual(ReadPreference.SECONDARY, coll.read_preference) self.assertEqual(write_concern, coll.write_concern) self.assertEqual(read_concern, coll.read_concern)
def parse_opts(opts): parsed = {} if 'readPreference' in opts: parsed['read_preference'] = parse_read_preference( opts.pop('readPreference')) if 'writeConcern' in opts: parsed['write_concern'] = WriteConcern(**opts.pop('writeConcern')) if 'readConcern' in opts: parsed['read_concern'] = ReadConcern(**opts.pop('readConcern')) return parsed
def parse_options(opts): if 'readPreference' in opts: opts['read_preference'] = parse_read_preference( opts.pop('readPreference')) if 'writeConcern' in opts: opts['write_concern'] = WriteConcern( **dict(opts.pop('writeConcern'))) if 'readConcern' in opts: opts['read_concern'] = ReadConcern( **dict(opts.pop('readConcern'))) return opts
def test_transaction_options_validation(self): default_options = TransactionOptions() self.assertIsNone(default_options.read_concern) self.assertIsNone(default_options.write_concern) TransactionOptions(read_concern=ReadConcern(), write_concern=WriteConcern()) with self.assertRaisesRegex(TypeError, "read_concern must be "): TransactionOptions(read_concern={}) with self.assertRaisesRegex(TypeError, "write_concern must be "): TransactionOptions(write_concern={}) with self.assertRaisesRegex( ConfigurationError, "transactions must use an acknowledged write concern"): TransactionOptions(write_concern=WriteConcern(w=0))
def get_balances_in_txn(conn, colls_list): balances_list = [] session = conn.start_session() session.start_transaction(read_concern=ReadConcern('snapshot')) for coll in colls_list: cursor = coll.find({}, session=session) for acct in cursor: balances_list.append(acct['balance']) session.commit_transaction() return balances_list
def __init__(self, client, key_vault_coll, mongocryptd_client, opts): """Internal class to perform I/O on behalf of pymongocrypt.""" # Use a weak ref to break reference cycle. if client is not None: self.client_ref = weakref.ref(client) else: self.client_ref = None self.key_vault_coll = key_vault_coll.with_options( codec_options=_KEY_VAULT_OPTS, read_concern=ReadConcern(level='majority'), write_concern=WriteConcern(w='majority')) self.mongocryptd_client = mongocryptd_client self.opts = opts self._spawned = False
def extract_ids_to_file(mongo_source, collections, filter_criteria, output_dir): file_names = [] for collection_name in collections: file_name = f"{output_dir}/{collection_name}_{assembly}.txt" file_names.append(file_name) logger.info(f'Searching in collections {collection_name}') collection = mongo_source.mongo_handle[mongo_source.db_name][collection_name] cursor = collection \ .with_options(read_concern=ReadConcern("majority")) \ .find(filter_criteria, no_cursor_timeout=True) with open(file_name, "w") as file: for variant in cursor: file.write(str(variant['_id']) + '\n') return file_names
def test_inline_map_reduce(self): coll = self.db.get_collection('coll', read_concern=ReadConcern('local')) try: tuple( coll.inline_map_reduce( 'function() { emit(this._id, this.value); }', 'function(key, values) { return 42; }')) except OperationFailure: # "ns doesn't exist" pass self.assertEqual( {'level': 'local'}, self.listener.results['started'][0].command['readConcern'])
def detect_discordant_cluster_variant(mongo_source, assemblies, batch_size=1000): dbsnp_cve_collection = mongo_source.mongo_handle[ mongo_source.db_name]["dbsnpClusteredVariantEntity"] dbsnp_sve_collection = mongo_source.mongo_handle[ mongo_source.db_name]["dbsnpSubmittedVariantEntity"] cve_filter_criteria = {} sve_filter_criteria = {} if assemblies: cve_filter_criteria = {'asm': {"$in": assemblies}} sve_filter_criteria = {'seq': {"$in": assemblies}} cursor = dbsnp_cve_collection.with_options(read_concern=ReadConcern("majority"))\ .find(cve_filter_criteria, no_cursor_timeout=True) cursor.batch_size(batch_size) projection = {'contig': 1, 'start': 1, 'rs': 1} nb_clustered_variants = 0 nb_error = 0 for clustered_variants in grouper(batch_size, cursor): rsids = [ clustered_variant.get('accession') for clustered_variant in clustered_variants if clustered_variant ] nb_clustered_variants += len(rsids) sve_filtering = sve_filter_criteria.copy() sve_filtering['rs'] = {'$in': rsids} sve_cursor = dbsnp_sve_collection.with_options(read_concern=ReadConcern("majority"))\ .find(sve_filtering, projection) submitted_variant_position_per_rs = defaultdict(set) for sve in sve_cursor: submitted_variant_position_per_rs[sve.get('rs')].add( f"{sve.get('contig')}:{sve.get('start')}") nb_error += compare(clustered_variants, submitted_variant_position_per_rs) logger.info( f'Processed {nb_clustered_variants}: Found {nb_error} errors')
def test_command_cursor(self): # readConcern not sent in command if not specified. coll = self.db.coll tuple(coll.aggregate([{'$match': {'field': 'value'}}])) self.assertNotIn('readConcern', self.listener.results['started'][0].command) self.listener.results.clear() # Explicitly set readConcern to 'local'. coll = self.db.get_collection('coll', read_concern=ReadConcern('local')) tuple(coll.aggregate([{'$match': {'field': 'value'}}])) self.assertEqual( {'level': 'local'}, self.listener.results['started'][0].command['readConcern'])
def find_rs_entity_not_exist_in_collection(mongo_db, collection_name, rsid_list, assembly_accession): cve_collection = mongo_db.mongo_handle[mongo_db.db_name][collection_name] filter_criteria = { 'asm': assembly_accession, 'accession': { '$in': rsid_list } } projection = {'accession': 1} cursor = cve_collection.with_options(read_concern=ReadConcern("majority"))\ .find(filter_criteria, projection, no_cursor_timeout=True) accession_found = [record['accession'] for record in cursor] remaining_rsids = set(rsid_list).difference(accession_found) return list(remaining_rsids)
def set_read_write_concern(collection, write_concerns, read_concerns): combined_write_concerns = dict(collection.write_concern.document.items()) if write_concerns is not None: combined_write_concerns.update(write_concerns) combined_read_concerns = dict(collection.read_concern.document.items()) if read_concerns is not None: combined_read_concerns.update(read_concerns) yield collection.with_options( write_concern=WriteConcern(**combined_write_concerns), read_concern=ReadConcern(**combined_read_concerns), )
async def test_basic(self): # Create the collection. await self.collection.insert_one({}) async def coro(session): await self.collection.insert_one({'_id': 1}, session=session) async with await self.cx.start_session() as s: await s.with_transaction(coro, read_concern=ReadConcern('local'), write_concern=WriteConcern('majority'), read_preference=ReadPreference.PRIMARY, max_commit_time_ms=30000) doc = await self.collection.find_one({'_id': 1}) self.assertEqual(doc, {'_id': 1})
def find_rs_references_in_ss_collection(mongo_db, collection_name, assembly_accession, batch_size=1000): sve_collection = mongo_db.mongo_handle[mongo_db.db_name][collection_name] filter_criteria = {'seq': assembly_accession, 'rs': {'$exists': True}} projection = {'rs': 1} cursor = sve_collection.with_options(read_concern=ReadConcern("majority"))\ .find(filter_criteria, projection, no_cursor_timeout=True) rs_list = [] for record in cursor: rs_list.append(record['rs']) if len(rs_list) == batch_size: yield rs_list rs_list = [] yield rs_list
def parse_spec_options(opts): if 'readPreference' in opts: opts['read_preference'] = parse_read_preference( opts.pop('readPreference')) if 'writeConcern' in opts: opts['write_concern'] = WriteConcern(**dict(opts.pop('writeConcern'))) if 'readConcern' in opts: opts['read_concern'] = ReadConcern(**dict(opts.pop('readConcern'))) if 'maxTimeMS' in opts: opts['max_time_ms'] = opts.pop('maxTimeMS') if 'maxCommitTimeMS' in opts: opts['max_commit_time_ms'] = opts.pop('maxCommitTimeMS') if 'hint' in opts: hint = opts.pop('hint') if not isinstance(hint, str): hint = list(hint.items()) opts['hint'] = hint # Properly format 'hint' arguments for the Bulk API tests. if 'requests' in opts: reqs = opts.pop('requests') for req in reqs: if 'name' in req: # CRUD v2 format args = req.pop('arguments', {}) if 'hint' in args: hint = args.pop('hint') if not isinstance(hint, str): hint = list(hint.items()) args['hint'] = hint req['arguments'] = args else: # Unified test format bulk_model, spec = next(iter(req.items())) if 'hint' in spec: hint = spec.pop('hint') if not isinstance(hint, str): hint = list(hint.items()) spec['hint'] = hint opts['requests'] = reqs return dict(opts)
def test_find_command(self): # readConcern not sent in command if not specified. coll = self.db.coll tuple(coll.find({'field': 'value'})) self.assertNotIn('readConcern', self.listener.results['started'][0].command) self.listener.results.clear() # Explicitly set readConcern to 'local'. coll = self.db.get_collection('coll', read_concern=ReadConcern('local')) tuple(coll.find({'field': 'value'})) self.assertEqualCommand( SON([('find', 'coll'), ('filter', {'field': 'value'}), ('readConcern', {'level': 'local'})]), self.listener.results['started'][0].command)
def test_map_reduce_out(self): coll = self.db.get_collection('coll', read_concern=ReadConcern('local')) coll.map_reduce('function() { emit(this._id, this.value); }', 'function(key, values) { return 42; }', out='output_collection') self.assertNotIn('readConcern', self.listener.results['started'][0].command) if client_context.version.at_least(3, 1, 9, -1): self.listener.results.clear() coll.map_reduce('function() { emit(this._id, this.value); }', 'function(key, values) { return 42; }', out={'inline': 1}) self.assertEqual( {'level': 'local'}, self.listener.results['started'][0].command['readConcern'])
def find_documents(mongo_source, collection_name, filter_criteria): collection = mongo_source.mongo_handle[ mongo_source.db_name][collection_name] cursor = collection.with_options(read_concern=ReadConcern("majority"), read_preference=pymongo.ReadPreference.PRIMARY) \ .find(filter_criteria, no_cursor_timeout=True) records = [] try: for result in cursor: records.append(result) except Exception as e: logger.exception(traceback.format_exc()) raise e finally: cursor.close() return records
def update_employee_info(session): employees_coll = session.client.hr.employees events_coll = session.client.reporting.events with session.start_transaction( read_concern=ReadConcern("snapshot"), write_concern=WriteConcern(w="majority"), read_preference=ReadPreference.PRIMARY): employees_coll.update_one( {"employee": 3}, {"$set": {"status": "Inactive"}}, session=session) events_coll.insert_one( {"employee": 3, "status": { "new": "Inactive", "old": "Active"}}, session=session) commit_with_retry(session)
def test_aggregate_out(self): coll = self.db.get_collection('coll', read_concern=ReadConcern('local')) try: tuple( coll.aggregate([{ '$match': { 'field': 'value' } }, { '$out': 'output_collection' }])) except OperationFailure: # "ns doesn't exist" pass self.assertNotIn('readConcern', self.listener.results['started'][0].command)
def wrapper(session, *args, **kwargs): while True: try: with session.start_transaction( read_concern=ReadConcern(level="snapshot"), write_concern=WriteConcern(w="majority"), read_preference=ReadPreference.PRIMARY ): return txn_func(session, *args, **kwargs) except (ConnectionFailure, OperationFailure) as ex: if ex.has_error_label("TransientTransactionError"): print( "INFO: TransientTransactionError," "Повторная попытка транзакции ..." ) continue raise ErrorDataDB("Ошибка при попытке транзакции")
def atomic_assign_data(self, doc, mongoClient, db_from, col_from, db_to, col_to): """ atomic transaction on mongodb to allocate task to worker """ from pymongo.read_concern import ReadConcern from pymongo.write_concern import WriteConcern from pymongo.read_preferences import ReadPreference wc_majority = WriteConcern("majority", wtimeout=2000) from copy import deepcopy import numpy as np, pickle, datetime doc['assignment_time'] = datetime.datetime.now() to_send = deepcopy(doc) to_send.pop('_id') collection_from = mongoClient[db_from][col_from] collection_to = mongoClient[db_to][col_to] session = mongoClient.start_session() logging_info = ('packet id' + str(doc['_id']) + ' assigned to worker ' + str(worker_name)) logging.info(logging_info + ' start') session.start_transaction(read_concern=ReadConcern('local'), write_concern=wc_majority) logging_info = ('packet id' + str(doc['_id']) + ' assigned to worker ' + str(worker_name)) # Important:: You must pass the session to the operations. collection_from.find_one_and_delete({'_id': doc['_id']}, session=session) collection_to.replace_one({'_id': doc['_id']}, doc, upsert=True, session=session) mongoClient['log']['controller_log'].insert_one( { 'info': logging_info, "utctime": datetime.datetime.utcnow() }, session=session) session.commit_transaction() session.end_session() logging.info(logging_info + ' end') pass
def verify_outcome(self, spec): for collection_data in spec: coll_name = collection_data['collectionName'] db_name = collection_data['databaseName'] expected_documents = collection_data['documents'] coll = self.client.get_database(db_name).get_collection( coll_name, read_preference=ReadPreference.PRIMARY, read_concern=ReadConcern(level='local')) if expected_documents: sorted_expected_documents = sorted(expected_documents, key=lambda doc: doc['_id']) actual_documents = list( coll.find({}, sort=[('_id', ASCENDING)])) self.assertListEqual(sorted_expected_documents, actual_documents)
def most_active_commenters(): """ Returns a list of the top 20 most frequent commenters. """ """ Ticket: User Report Construct a pipeline to find the users who comment the most on MFlix, sort by the number of comments, and then only return the 20 documents with the highest values. No field projection necessary. """ pipeline = [{'$sortByCount': '$email'}, {"$limit": 20}] comments = db.comments.with_options(read_concern=ReadConcern('majority')) result = comments.aggregate(pipeline) return list(result)
def _bootstrap(self, db_name, coll_name): """读取历史数据""" # 传递开始标志位 yield { 'operationType': 'bootstrap-start', 'ns': { 'db': db_name, 'coll': coll_name }, 'topic': f'{db_name}-{coll_name}', } db = self.conn.client[db_name] coll = db[coll_name] # 开启 session, 获得当前时间下最大的 _id, start_operation_time with self.conn.client.start_session() as session: with session.start_transaction( read_concern=ReadConcern('snapshot')): ts = Timestamp(datetime.utcnow(), 0) max_obj = coll.find_one(sort=[('_id', DESCENDING)]) max_id = max_obj.get('_id') if max_id: domain = {'_id': {'$lte': max_id}} values = coll.find(domain) index = 1 for val in values: data = { 'operationType': 'bootstrap-insert', 'fullDocument': val, 'documentKey': val['_id'], 'ns': { 'db': db_name, 'coll': coll_name }, 'topic': f'{db_name}-{coll_name}', } yield data # 添加采样数据到日志 index += 1 if index % 50_000 == 0: logger.info(f"MongoDB index = {index}, data={data}")
def test_transaction_options_validation(self): default_options = TransactionOptions() self.assertIsNone(default_options.read_concern) self.assertIsNone(default_options.write_concern) self.assertIsNone(default_options.read_preference) TransactionOptions(read_concern=ReadConcern(), write_concern=WriteConcern(), read_preference=ReadPreference.PRIMARY) with self.assertRaisesRegex(TypeError, "read_concern must be "): TransactionOptions(read_concern={}) with self.assertRaisesRegex(TypeError, "write_concern must be "): TransactionOptions(write_concern={}) with self.assertRaisesRegex( ConfigurationError, "transactions do not support unacknowledged write concern"): TransactionOptions(write_concern=WriteConcern(w=0)) with self.assertRaisesRegex(TypeError, "is not valid for read_preference"): TransactionOptions(read_preference={})
def lambda_handler(event, context): operation = event['httpMethod'] # Get the parameters from the UI by parsing the json file (body) parameters = json.loads(event['body']) if operation == 'POST': client = MongoClient(MONGO_CLUSTER) # Check the validity of the simulation name user_id = parameters['username'] simulation_name = parameters['simname'] if (simulationExists(client, user_id, simulation_name)): return { 'statusCode': '401', 'body': "Simulation name already exists for this user. Please provide a different simulation name." } wc_majority = WriteConcern("majority", wtimeout=1000) # Run a new simulation parameters2 = dict() parameters2['population_size'] = int(parameters['population']) parameters2['statuses_weights'] = [float(parameters['susceptibility'])/100, float(parameters['infectious'])/100, float(parameters['contagious'])/100, float(parameters['treatment'])/100, float(parameters['cure'])/100, .0] parameters2['mortality_rate'] = float(parameters['mortality_rate'])/100 parameters2['infection_rate'] = float(parameters['infection_rate'])/100 parameters2['days'] = int(parameters['days']) sim_statistics = run_simulation(parameters2) # Open a transaction to perform the following actions # Create a new simulation with client.start_session() as session: sim_id = session.with_transaction( lambda s: callback(s, user_id, simulation_name, sim_statistics), read_concern=ReadConcern('local'), write_concern=wc_majority, read_preference=ReadPreference.PRIMARY) return { 'statusCode': '400' if None else '200', 'body': "Successfully created the new simulation " + str(sim_id) }
def wrapper(session, *args, **kwargs): while True: try: with session.start_transaction( read_concern=ReadConcern(level="snapshot"), write_concern=WriteConcern(w="majority"), read_preference=ReadPreference.PRIMARY ): # Транзакция успешно завершилась commit'ом # и функция успешно вернула результат return txn_func(session, *args, **kwargs) except (ConnectionFailure, OperationFailure) as ex: if ex.has_error_label("TransientTransactionError"): print( "INFO: TransientTransactionError," "повторная попытка транзакции ..." ) continue raise ErrorDataDB("O.o Что-то страшное при попытке транзакции")