def delete_expired_tokens(self): """Delete expired tokens. Also, remove docs with no tokens.""" now = datetime.utcnow() bulk_requests = [] docs = list(self.mgdb.tokens.find( {"$or": [{"link.expires": {"$lte": now}}, {"fetch.expires": {"$lte": now}}, {"link": [], "fetch": []}]})) for d in docs: if not d["link"] and not d["fetch"]: bulk_requests.append(DeleteOne(dict(_id=d["_id"]))) continue link = [] for t in d["link"]: if t["expires"] > now: link.append(t) fetch = [] for t in d["fetch"]: if t["expires"] > now: fetch.append(t) if not link and not fetch: bulk_requests.append(DeleteOne(dict(_id=d["_id"]))) else: bulk_requests.append(ReplaceOne( dict(_id=d["_id"]), dict(email=d["email"], link=link, fetch=fetch) )) if bulk_requests: self.mgdb.tokens.bulk_write(bulk_requests)
def execute(self): config = self.config coll = config.tests.test_collection self.set_source("dirname/test.txt") coll.insert_one({"hello": "world"}) data = [] for doc in coll.find(): data.append(doc) assert len(data) == 1 assert data[0]["_job_id"] == self._id assert data[0]["_src"] == "test.txt" coll.insert_many([ {"hello": 1}, {"hello": 2}, ]) from pymongo import InsertOne, DeleteOne, ReplaceOne, UpdateOne, \ UpdateMany, DeleteMany requests = [InsertOne({'hello': 3}), DeleteOne({'x': 1}), ReplaceOne({'hello': 1}, {'hello': 1.234}, upsert=True), UpdateOne({'hello': 2}, {'$set': {'hello': 2.234}}, upsert=True), UpdateMany({'hello': 3}, {'$set': {'hello': 3.234}}, upsert=True), DeleteMany({'x': 1}) ] coll.bulk_write(requests) requests = [InsertOne({'hello': 4}), DeleteOne({'x': 1}), ReplaceOne({'hello': 1.234}, {'zz': 1}, upsert=True), UpdateOne({'hello': 2.234}, {'$set': {'zz': 2}}, upsert=True), UpdateMany({'hello': 3.234}, {'$set': {'zz': 3}}, upsert=True), DeleteMany({'x': 1})] coll.bulk_write(requests) coll.update_one({"hello": 5}, {"$set": {"ua": 1}}, upsert=True) coll.update_many({"hello": "5"}, {"$set": {"ua": 2}}, upsert=True) data = [] for doc in coll.find(): data.append(doc) assert len(data) == 7 assert set([d["_job_id"] for d in data]) == {self._id} assert set([d["_src"] for d in data]) == {"test.txt"}
async def delete(self, models: ModuleType, indexer: str = "slug"): children = {} parent = await self.ancestors(models, True) if parent: self_class = self.__class__.__name__ for field in fields(parent): if "model" in field.metadata and field.metadata[ "model"] == self_class: childs = getattr(parent, field.name) childs.remove( getattr( self, "_id" if field.type == List[ObjectId] else indexer)) children[field.name] = childs actions = [ DeleteOne({"_id": self._id}), DeleteMany({"path": { "$regex": f"^{self.get_url()}" }}) ] if children: actions.append(UpdateOne({"_id": parent._id}, {"$set": children})) async with await self._table.database.client.start_session() as s: async with s.start_transaction(): await self._table.bulk_write(actions) self.id_ = None
def delete_asset_entry(asset, archive=False, **kwargs): """ Delete an asset entry in our database :param asset: BaseAsset asset we want to delete from database :param archive: bool soft deletion of asset (move to archive database/location) """ return DeleteOne(AssetOperations.unpack_asset_info(asset, **kwargs))
async def delete_by_ids(cls, ids, session=None, **kwargs): """ 依据ID删除记录 :param ids: ID列表 :param session: 会话 :return: """ if not isinstance(ids, (list, tuple)): raise TypeError("'ids' must be a list or tuple.") bulk_requests, tmp_id_list = [], [] for oid in ids: tmp_id_list.append(str(oid)) bulk_requests.append( DeleteOne( {'_id': ObjectId(oid) if isinstance(oid, str) else oid})) if bulk_requests: clazz = cls() result = await clazz.get_async_collection().bulk_write( bulk_requests, session=session, **kwargs) if result: # 清除缓存 if clazz.__cached: CacheEngine.delete_many(tmp_id_list) return result.deleted_count return 0
def func2(): """ 批量插入文档函数 """ client = MongoClient('mongodb://127.0.0.1:27017/') # 建立连接 collection = client['blogdb'].get_collection( 'posts', write_concern=WriteConcern(w=1, j=True, wtimeout=1)) # 选择集合 # write_concern控制何时调用getLastError() # write_concern=1:mongod在写入内存之后,返回响应 # write_concern=1 & journal:true:mongod在写入内存、journal日志之后,返回响应 # write_concern=2:在集群模式生效,2时表示只有secondary从primary完成复制之后,返回响应 try: insertData = [InsertOne({'title': i}) for i in range(4)] # 插入文档 otherData = [ DeleteMany({}), # Remove all documents. InsertOne({'_id': 1}), InsertOne({'_id': 2}), InsertOne({'_id': 3}), UpdateOne({'_id': 1}, {'$set': { 'foo': 'bar' }}), UpdateOne({'_id': 4}, {'$inc': { 'j': 1 }}, upsert=True), ReplaceOne({'j': 1}, {'j': 2}), DeleteOne({'_id': 2}) ] collection.bulk_write(otherData + insertData, ordered=True) except BulkWriteError as bwe: print(bwe.details)
def upsert(obj, store, name, chunksize=1): # noqa from pymongo import DeleteOne, UpdateOne ops_updates = [] ops_deletions = [] for i, row in tqdm(obj.iterrows(), total=len(obj)): data = row.to_dict() # om rowid is re-added on insert only to preserve existing row id del data['_om#rowid'] updates = { '$set': data, '$setOnInsert': { '_om#rowid': row['_om#rowid'] } } key = {k: v for k, v in data.items() if k.startswith('_idx#')} if data.get('_delete_', delete): # collection.delete_one(key) ops_deletions.append(DeleteOne(key)) else: # collection.update_one(key, updates, upsert=True) ops_updates.append(UpdateOne(key, updates, upsert=True)) # do all updates in one step collection.bulk_write(ops_deletions + ops_updates)
def dedupe_pid(self): from pymongo import MongoClient from pymongo import DeleteOne client = MongoClient() db = client.NGA_multi cursor = db.NGA_replies.aggregate([{ "$group": { "_id": { "tid": "$tid", "pid": "$pid" }, "unique_ids": { "$addToSet": "$_id" }, "count": { "$sum": 1 } } }, { "$match": { "count": { "$gte": 2 } } }], allowDiskUse=True) response = [] for doc in cursor: del doc["unique_ids"][0] for id in doc["unique_ids"]: response.append(DeleteOne({'_id': id})) if len(response) >= 1: db.NGA_replies.bulk_write(response) cursor.close()
def remove_job_by_id(self, jid): """ Remove job from schedule """ self.logger.info("Remove job %s", jid) with self.bulk_lock: self.bulk += [DeleteOne({Job.ATTR_ID: jid})]
def delete_variants(self, variants): """Delete observations in database Given a list of variants, the write operation for each of the variant is given as a bulk to mongodb. Args: variants (list(Variant)): a list of variants """ variant_id_dict = {variant["_id"]: variant for variant in variants} # Look up all variants at the same time to reduce number of operations # done on the database query = self.db.variant.find( {"_id": { "$in": list(variant_id_dict.keys()) }}) operations = [] for mongo_variant in query: variant = variant_id_dict.get(mongo_variant["_id"]) if variant is None: continue if mongo_variant["observations"] == 1: operations.append(DeleteOne({"_id": variant["_id"]})) continue update = self._get_update_delete(variant) operations.append( UpdateOne({"_id": variant["_id"]}, update, upsert=False)) # Make the accumulated write operations if operations: self.db.variant.bulk_write(operations, ordered=False)
async def handle_pending_tx(pending, actions_list): LOGGER.info("%s Handling TX in block %s" % (pending['context']['chain_name'], pending['context']['height'])) messages = await get_chaindata_messages(pending['content'], pending['context']) if isinstance(messages, list): message_actions = list() for i, message in enumerate(messages): message['time'] = pending['context']['time'] + (i/1000) # force order message = await check_message(message, trusted=True) # we don't check signatures yet. if message is None: continue # we add it to the message queue... bad idea? should we process it asap? message_actions.append(InsertOne({ 'message': message, 'source': dict( chain_name=pending['context']['chain_name'], tx_hash=pending['context']['tx_hash'], height=pending['context']['height'], check_message=True # should we store this? ) })) await asyncio.sleep(0) if message_actions: await PendingMessage.collection.bulk_write(message_actions) if messages is not None: # bogus or handled, we remove it. actions_list.append(DeleteOne({'_id': pending['_id']}))
def update_for_model(cls, model): """ Update connection cache for object model :param model: ObjectModel instance :return: """ cache = {} collection = ModelConnectionsCache._get_collection() for cc in ModelConnectionsCache.objects.filter(model=model.id): cache[(cc.type, cc.gender, cc.model, cc.name)] = cc.id bulk = [] for c in model.connections: k = (c.type.id, c.gender, model.id, c.name) if k in cache: del cache[k] continue bulk += [ InsertOne({ "type": c.type.id, "gender": c.gender, "model": model.id, "name": c.name }) ] if cache: bulk += [DeleteOne({"_id": x}) for x in six.itervalues(cache)] if bulk: collection.bulk_write(bulk)
def fix(): bulk = [] # Model ex_tags = [] print("Update models....") for m in [ManagedObject]: tags = set() for s in m.objects.filter(is_managed=True).exclude( tags=None).values_list('tags', flat=True).distinct(): tags.update(s) for t in tags: bulk += [ UpdateOne({"tag": t}, { "$addToSet": { "models": repr(m) }, "$inc": { "count": m.objects.filter(tags__in=["{%s}" % t]).count() } }, upsert=True) ] ex_tags += [t.decode("utf8") for t in tags] # Documents print("Fixing documents....") for m in [NetworkSegment]: tags = set( t[0] for t in m.objects.filter(tags__exists=True).values_list('tags') if t) ex_tags += list(tags) for t in tags: bulk += [ UpdateOne({"tag": t}, { "$addToSet": { "models": repr(m) }, "$inc": { "count": m.objects.filter(tags__in=[t]).count() } }, upsert=True) ] delete_tags = set(Tag.objects.values_list("tag")) - set(ex_tags) print("Clean tags: %s" % delete_tags) for t in delete_tags: bulk += [DeleteOne({"tag": t})] if bulk: print("Commiting changes to database") try: r = Tag._get_collection().bulk_write(bulk) print("Database has been synced") print("Inserted: %d, Modify: %d, Deleted: %d" % (r.inserted_count + r.upserted_count, r.modified_count, r.deleted_count)) except BulkWriteError as e: print("Bulk write error: '%s'", e.details) print("Stopping check")
def bulk(): from pymongo import InsertOne, DeleteOne, ReplaceOne requests = [ InsertOne({'y': 1}), DeleteOne({'x': 1}), ReplaceOne({'w': 1}, {'z': 1}, upsert=True) ] result = db.my.bulk_write(requests)
def delete(self, coll, recs, keyflds=None): if recs is None or len(recs) == 0: return 0 filter_arr = [] keyflds = keyflds or [] if '_id' in keyflds: reqs = [DeleteOne({'_id': o['_id']}) for i, o in enumerate(recs)] else: for rec in recs: conds = [] for fld in keyflds: conds.append({fld: rec[fld]}) filter_arr.append(conds) reqs = [ DeleteOne({'$and': filter_arr[i]}) for i, o in enumerate(recs) ] result = self._db[coll].bulk_write(reqs) return result.deleted_count
async def release_schedules(self, scheduler_id: str, schedules: List[Schedule]) -> None: updated_schedules: List[Tuple[str, datetime]] = [] finished_schedule_ids: List[str] = [] async with await self.client.start_session() as s, s.start_transaction(): # Update schedules that have a next fire time requests = [] for schedule in schedules: filters = {'_id': schedule.id, 'acquired_by': scheduler_id} if schedule.next_fire_time is not None: try: serialized_data = self.serializer.serialize(schedule) except SerializationError: self._logger.exception('Error serializing schedule %r – ' 'removing from data store', schedule.id) requests.append(DeleteOne(filters)) finished_schedule_ids.append(schedule.id) continue update = { '$unset': { 'acquired_by': True, 'acquired_until': True, }, '$set': { 'next_fire_time': schedule.next_fire_time, 'serialized_data': serialized_data } } requests.append(UpdateOne(filters, update)) updated_schedules.append((schedule.id, schedule.next_fire_time)) else: requests.append(DeleteOne(filters)) finished_schedule_ids.append(schedule.id) if requests: await self._schedules.bulk_write(requests, ordered=False) now = datetime.now(timezone.utc) for schedule_id, next_fire_time in updated_schedules: await self.publish(ScheduleUpdated(now, schedule_id, next_fire_time)) now = datetime.now(timezone.utc) for schedule_id in finished_schedule_ids: await self.publish(ScheduleRemoved(now, schedule_id))
def del_charts(charts_num, charts_del_keepnum): '''后台界面/console/删除charts表数据使用''' arr = [] if charts_del_keepnum < 0: charts_del_keepnum = 0 charts_num -= charts_del_keepnum if charts_num > 0: for _ in range(charts_num): arr.append(DeleteOne({})) coll.bulk_write(arr)
def del_logs(logs_num, logs_del_keepnum): '''为/console/后台删除log''' arr = [] if logs_del_keepnum < 0: logs_del_keepnum = 0 logs_num -= logs_del_keepnum if logs_num > 0: for _ in range(logs_num): arr.append(DeleteOne({})) coll.bulk_write(arr)
def update_items(insert, edit, delete): requests = [] for i in insert: requests.append(InsertOne(i)) for i in edit: requests.append(UpdateOne({'id': i['id']}, {'$set': {'description': i['description'],'index':i['index'], 'file_path': i['file_path']}})) for i in delete: requests.append(DeleteOne({'id': i['id']})) result = Persistence.db[Persistence.COLLECTION_ITEMS].bulk_write(requests) return result.acknowledged
def delete(self, col_name, query_builder, count, is_finish=False): col = self.db[col_name] if not is_finish: self.bulk[col_name].append(DeleteOne(query_builder)) if len(self.bulk[col_name]) >= self.MONGOBULK or is_finish: s = time.time() col.bulk_write(self.bulk[col_name]) e = time.time() self.bulk = [] print("***%s***, 清除%s个, 当前已操作 %s 个" % (e - s, self.MONGOBULK, count))
def delete_irrelevant_disambiguator_links(dryrun=True): """ After a while, disambiguator links can break if segmentation changes Check that for each existing disambiguator link, there still exists an inline citation to back it """ def make_tc(tref, oref): global _tc_cache tc = oref.text('he') _tc_cache[tref] = tc if len(_tc_cache) > 5000: _tc_cache = {} return tc def normalize(s): return re.sub(r"<[^>]+>", "", strip_cantillation(s, strip_vowels=True)) irrelevant_links = [] ls = LinkSet({"generated_by": "link_disambiguator", "auto": True}) for link in tqdm(ls, total=ls.count()): source_tref, quoted_tref = link.refs if Ref( link.refs[1]).primary_category == 'Talmud' else reversed(link.refs) source_oref = Ref(source_tref) quoted_oref = Ref(quoted_tref) if quoted_oref.primary_category != 'Talmud': continue source_tc = _tc_cache.get(source_tref, make_tc(source_tref, source_oref)) if len(source_tc.text) == 0 or isinstance(source_tc.text, list): snippets = None else: snippets = get_snippet_by_seg_ref(source_tc, quoted_oref.section_ref(), must_find_snippet=True) if snippets is None: irrelevant_links += [{ "ID": link._id, "Source": source_tref, "Quoted": quoted_tref, "Source Text": normalize(source_tc.ja().flatten_to_string()) }] with open(ROOT + '/irrelevant_links.csv', 'w') as fout: c = csv.DictWriter(fout, ['ID', 'Source', 'Quoted', 'Source Text']) c.writeheader() c.writerows(irrelevant_links) if not dryrun: from sefaria.system.database import db from pymongo import DeleteOne db.links.bulk_write( [DeleteOne({"_id": _id}) for _id in irrelevant_links])
def update(cls): import requests # Get new values new = {} logger.info("Fetching new items from %s", cls.DOWNLOAD_URL) r = requests.get(cls.DOWNLOAD_URL) assert r.status_code == 200 for l in r.text.splitlines(): if "(hex)" in l: oui, vendor = l.split("(hex)") oui = oui.strip().replace("-", "").upper() vendor = vendor.strip() new[oui] = vendor # Get old values old = { d["_id"]: d["vendor"] for d in MACVendor._get_collection().find() } # Compare collection = MACVendor._get_collection() bulk = [] for oui, vendor in new.items(): if oui in old: if vendor != old[oui]: logger.info("[%s] %s -> %s", oui, old[oui], vendor) bulk += [ UpdateOne({"_id": oui}, {"$set": { "vendor": vendor }}) ] else: logger.info("[%s] Add %s", oui, vendor) bulk += [InsertOne({"_id": oui, "vendor": vendor})] for oui in set(old) - set(new): logger.info("[%s] Delete") bulk += [DeleteOne({"_id": oui})] if bulk: logger.info("Commiting changes to database") try: r = collection.bulk_write(bulk, ordered=False) logger.info("Database has been synced") if r.acknowledged: logger.info( "Inserted: %d, Modify: %d, Deleted: %d", r.inserted_count + r.upserted_count, r.modified_count, r.deleted_count, ) except BulkWriteError as e: logger.error("Bulk write error: '%s'", e.details) logger.error("Stopping check")
def remove_duplicates(self): try: logger.info('====== Removing duplicates =======') buffer = [] for document in self.collection.aggregate(self.pipeline): it = iter(document['uniqueIds']) next(it) for id in it: buffer.append(DeleteOne({'_id': id})) self.collection.bulk_write(buffer) except Exception as x: logger.error('Error when remove duplicates. Details: {}'.format(x))
def Delete(self, id): coll = self.coll if coll: req = DeleteOne({_id: ObjectId(id)}) res = coll.bulk_write([req]) found = int(res.bulk_api_result['nRemoved']) > 0 deleted = res.deleted_count > 0 return found return False
def remove(db, uuids, **kwargs): """Remove data from DB. Args: db (Collection): DB connection uuids (list): A list of unique IDs """ ops = [ DeleteOne({'_uuid': uuid}) for uuid in uuids ] if len(ops) > 0: db.bulk_write(ops)
def rebuild_for_object(cls, object): # Stored data old = {} # selector -> doc for d in SelectorCache._get_collection().find({"object": object.id}): old[d["selector"]] = d # Refreshed data vcdomain = object.vc_domain.id if object.vc_domain else None collection = SelectorCache._get_collection().with_options( write_concern=WriteConcern(w=0)) bulk = [] for s in cls.selectors_for_object(object): sdata = old.get(s) if sdata: # Cache record exists if sdata.get("vc_domain") != vcdomain: # VC Domain changed logger.debug("[%s] Changing VC Domain to %s", object.name, vcdomain) bulk += [ UpdateOne({"_id": sdata["_id"]}, {"$set": { "vc_domain": vcdomain }}) ] del old[s] else: # New record logging.debug("[%s] Add to selector %s", object.name, s) bulk += [ InsertOne({ "object": object.id, "selector": s, "vc_domain": vcdomain }) ] # Delete stale records for sdata in old.values(): logging.debug("[%s] Remove from selector %s", object.name, sdata["_id"]) bulk += [DeleteOne({"_id": sdata["_id"]})] # Apply changes if bulk: logging.info("[%s]Commiting changes to database", object.name) try: collection.bulk_write(bulk, ordered=False) logging.info("Database has been synced") except BulkWriteError as e: logging.error("Bulk write error: '%s'", e.details) logging.error("Stopping check")
async def handle_pending_message(pending, seen_ids, actions_list, messages_actions_list): result = await incoming( pending['message'], chain_name=pending['source'].get('chain_name'), tx_hash=pending['source'].get('tx_hash'), height=pending['source'].get('height'), seen_ids=seen_ids, check_message=pending['source'].get('check_message', True), retrying=True, bulk_operation=True) if result is not None: # message handled (valid or not, we don't care) # Is it valid to add to a list passed this way? to be checked. if result is not True: messages_actions_list.append(result) actions_list.append(DeleteOne({'_id': pending['_id']}))
def _delete_docs(self, docs): """ Delete a certain set of documents from train :param ids: The ids of the documents to remove :return: The ids of the documents *not* removed """ requests = [DeleteOne(doc) for doc in docs] try: result = self.train.bulk_write(requests) except BulkWriteError as bwe: num_removed = bwe.details['nRemoved'] else: num_removed = result.bulk_api_result['nRemoved'] logging.info("Deleted {} docs from train".format(num_removed)) remaining = docs[num_removed:] return remaining
def migrate_handler_ids(self): h_coll = self.mongo_db["handlers"] bulk = [] for doc in h_coll.find({}): doc["handler"] = doc["_id"] doc["_id"] = bson.ObjectId() bulk += [InsertOne(doc), DeleteOne({"_id": doc["handler"]})] self.db.execute( """ UPDATE sa_managedobjectprofile SET resolver_handler = %s WHERE resolver_handler = %s """, [str(doc["_id"]), doc["handler"]], ) if bulk: h_coll.bulk_write(bulk, ordered=True)
def clear_cache(): rs = cache_table.find({}, ('_id',)) urls = [] for r in rs: url = r['_id'] # if url.find('/series?active') != -1: # urls.append(url) p = url.find('/series/') if p != -1 and len(url[p:].split('/')) != 3: urls.append(url) print('ready to clean # of urls = %d' % len(urls)) print(urls[:10]) from pymongo import DeleteOne ops = [DeleteOne({'_id': url}) for url in urls] cache_table.bulk_write(ops, ordered=False) print('clean OK')