def test_bulk_write(self): self.db.test.collection.bulk_write([ DeleteOne({'noCollation': 42}), DeleteMany({'noCollation': 42}), DeleteOne({'foo': 42}, collation=self.collation), DeleteMany({'foo': 42}, collation=self.collation), ReplaceOne({'noCollation': 24}, {'bar': 42}), UpdateOne({'noCollation': 84}, {'$set': {'bar': 10}}, upsert=True), UpdateMany({'noCollation': 45}, {'$set': {'bar': 42}}), ReplaceOne({'foo': 24}, {'foo': 42}, collation=self.collation), UpdateOne({'foo': 84}, {'$set': {'foo': 10}}, upsert=True, collation=self.collation), UpdateMany({'foo': 45}, {'$set': {'foo': 42}}, collation=self.collation) ]) delete_cmd = self.listener.results['started'][0].command update_cmd = self.listener.results['started'][1].command def check_ops(ops): for op in ops: if 'noCollation' in op['q']: self.assertNotIn('collation', op) else: self.assertEqual(self.collation.document, op['collation']) check_ops(delete_cmd['deletes']) check_ops(update_cmd['updates'])
def saveAll(self, exchanger, unit, sums): reqs = [] for s in sums: item = s.toDict() condition = {'datetime': item['datetime']} reqs.append(ReplaceOne(condition, item, upsert=True)) if len(reqs) > 0: self.getCollection(exchanger, unit).bulk_write(reqs)
def test_ReplaceOne(self): result = yield self.coll.bulk_write([ ReplaceOne({'x': 42}, {'j': 5}), ReplaceOne({'x': 555}, {'k': 5}, upsert=True), ]) docs = yield self.coll.find(fields={"_id": 0}) self.assertEqual(len(docs), 4) self.assertIn({'j': 5}, docs) self.assertIn({'y': 123}, docs) self.assertIn({'z': 321}, docs) self.assertIn({'k': 5}, docs) self.assertIsInstance(result, BulkWriteResult) self.assertEqual(result.matched_count, 1) self.assertEqual(result.modified_count, 1) self.assertEqual(set(result.upserted_ids), {1})
def retryable_single_statement_ops(coll): return [ (coll.bulk_write, [[InsertOne({}), InsertOne({})]], {}), (coll.bulk_write, [[InsertOne({}), InsertOne({})]], {'ordered': False}), (coll.bulk_write, [[ReplaceOne({}, {})]], {}), (coll.bulk_write, [[ReplaceOne({}, {}), ReplaceOne({}, {})]], {}), (coll.bulk_write, [[UpdateOne({}, {'$set': {'a': 1}}), UpdateOne({}, {'$set': {'a': 1}})]], {}), (coll.bulk_write, [[DeleteOne({})]], {}), (coll.bulk_write, [[DeleteOne({}), DeleteOne({})]], {}), (coll.insert_one, [{}], {}), (coll.insert_many, [[{}, {}]], {}), (coll.replace_one, [{}, {}], {}), (coll.update_one, [{}, {'$set': {'a': 1}}], {}), (coll.delete_one, [{}], {}), (coll.find_one_and_replace, [{}, {'a': 3}], {}), (coll.find_one_and_update, [{}, {'$set': {'a': 1}}], {}), (coll.find_one_and_delete, [{}, {}], {}), ]
def send_to_db(username, display_name, user_ratings): database_url = os.getenv('DATABASE_URL', None) if database_url: client = pymongo.MongoClient(database_url, server_api=pymongo.server_api.ServerApi('1')) db = client["letterboxd"] users = db.users ratings = db.ratings movies = db.movies user = { "username": username, "display_name": display_name, "num_reviews": len(user_ratings) } users.update_one({"username": user["username"]}, {"$set": user}, upsert=True) upsert_ratings_operations = [] upsert_movies_operations = [] # print(len(user_ratings)) for rating in user_ratings: upsert_ratings_operations.append( ReplaceOne({ "user_id": username, "movie_id": rating["movie_id"] }, rating, upsert=True) ) upsert_movies_operations.append(UpdateOne({ "movie_id": rating["movie_id"] }, { "$set": { "movie_id": rating["movie_id"] } }, upsert=True ) ) try: if len(upsert_ratings_operations) > 0: ratings.bulk_write(upsert_ratings_operations, ordered=False) if len(upsert_movies_operations) > 0: movies.bulk_write(upsert_movies_operations, ordered=False) except BulkWriteError as bwe: pprint(bwe.details) return
def saveAll(self, exchanger, ticks): batch_size = 2048 collection = self.collections[exchanger] reqs = [] for t in ticks: item = t.toDict() condition = {'datetime': item['datetime']} reqs.append(ReplaceOne(condition, item, upsert=True)) if len(reqs) >= batch_size: collection.bulk_write(reqs) reqs = [] if len(reqs) > 0: collection.bulk_write(reqs, ordered=False)
def retryable_single_statement_ops(coll): return [ (coll.bulk_write, [[InsertOne({}), InsertOne({})]], {}), (coll.bulk_write, [[InsertOne({}), InsertOne({})]], {'ordered': False}), (coll.bulk_write, [[ReplaceOne({}, {})]], {}), (coll.bulk_write, [[ReplaceOne({}, {}), ReplaceOne({}, {})]], {}), (coll.bulk_write, [[UpdateOne({}, {'$set': {'a': 1}}), UpdateOne({}, {'$set': {'a': 1}})]], {}), (coll.bulk_write, [[DeleteOne({})]], {}), (coll.bulk_write, [[DeleteOne({}), DeleteOne({})]], {}), (coll.insert_one, [{}], {}), (coll.insert_many, [[{}, {}]], {}), (coll.replace_one, [{}, {}], {}), (coll.update_one, [{}, {'$set': {'a': 1}}], {}), (coll.delete_one, [{}], {}), (coll.find_one_and_replace, [{}, {'a': 3}], {}), (coll.find_one_and_update, [{}, {'$set': {'a': 1}}], {}), (coll.find_one_and_delete, [{}, {}], {}), # Deprecated methods. # Insert with single or multiple documents. (coll.insert, [{}], {}), (coll.insert, [[{}]], {}), (coll.insert, [[{}, {}]], {}), # Save with and without an _id. (coll.save, [{}], {}), (coll.save, [{'_id': ObjectId()}], {}), # Non-multi update. (coll.update, [{}, {'$set': {'a': 1}}], {}), # Non-multi remove. (coll.remove, [{}], {'multi': False}), # Replace. (coll.find_and_modify, [{}, {'a': 3}], {}), # Update. (coll.find_and_modify, [{}, {'$set': {'a': 1}}], {}), # Delete. (coll.find_and_modify, [{}, {}], {'remove': True}), ]
def test_ReplaceOneNotEquals(self): self.assertNotEqual(ReplaceOne({'foo': 42}, {'bar': 42}, upsert=False), ReplaceOne({'foo': 42}, {'bar': 42}, upsert=True))
def copy_indexes(*index_names, batch_size: int = 50, rewrite: bool = False, request_timeout: int = 60) -> None: """ Read the data from D3M's database and store it to the lab's database. We mirror just a subset of indexes and fields. Parameters ---------- indexes : list The names of the specific indexes to read from D3M's db. If not provided, all indexes will by read. batch_size : int The number of records to retrieve from D3M's db with each network request. rewrite : bool If `True`, deletes the collections and rereads them from scratch. If `False`, only new records will be copied down. request_timeout : int Number of seconds to wait for a response from elasticsearch. """ d3m_db = D3MDB() aml_db = AMLDB() if len(index_names) == 0: # Copy all by default. to_copy = Index else: to_copy = {Index(name) for name in index_names} for index in to_copy: index_name = index.value aml_collection = aml_db.db[index_name] if rewrite: print(f"Removing all records in the '{index_name}' collection...") aml_collection.delete_many({}) # Only copy over documents we don't have yet. print( f"Determining which documents to copy from index '{index_name}'..." ) d3m_ids = d3m_db.get_all_ids(index_name) aml_ids = aml_db.get_all_ids(index_name) ids_of_docs_to_copy = list(d3m_ids - aml_ids) num_docs_to_copy = len(ids_of_docs_to_copy) print(( f"Now copying subset of index '{index_name}' ({num_docs_to_copy} documents) " f"to the AML database...")) # We'll write the data to the lab db in batches. write_buffer = MongoWriteBuffer(aml_collection, batch_size) # Iterate over this index in batches, only querying the subset of fields we care about. for id_chunk in chunk(ids_of_docs_to_copy, batch_size, show_progress=True): hits = (d3m_db.search(index=index_name).query( "ids", values=list(id_chunk)).source( elasticsearch_fields[index]).params( size=batch_size, request_timeout=request_timeout).execute()) for hit in hits: doc = hit.to_dict() # Mongodb will use the same primary key elastic search does. doc["_id"] = hit.meta.id write_buffer.queue( # Insert the doc, or if another document already exists with the same _id, # then replace it. ReplaceOne(filter={"_id": doc["_id"]}, replacement=doc, upsert=True)) # Write and flush any leftovers. write_buffer.flush()