def reindex_model_catalog(dmd, root="/zport", idxs=None, types=()): """ Performs a single threaded soft reindex """ start = time.time() log.info( "Performing soft reindex on model_catalog. Params = root:'{}' / idxs:'{}' / types:'{}'" .format(root, idxs, types)) modelindex = init_model_catalog() uids = get_uids(modelindex, root=root, types=types) if uids: index_updates = [] for uid in uids: try: obj = dmd.unrestrictedTraverse(uid) except (KeyError, NotFound): log.warn("Stale object found in Solr: {}".format(uid)) index_updates.append(IndexUpdate(None, op=UNINDEX, uid=uid)) else: index_updates.append(IndexUpdate(obj, op=INDEX, idxs=idxs)) if len(index_updates) % 1000 == 0: modelindex.process_batched_updates(index_updates, commit=False) index_updates = [] if index_updates: modelindex.process_batched_updates(index_updates, commit=True) else: modelindex.commit() log.info("Reindexing took {} seconds.".format(time.time() - start))
def batch_update(self, batch, commit=False): """ Uses a Solr index client to perform a batch update of batch. """ self.index_client.process_batched_updates( [IndexUpdate(node, op=INDEX, idxs=self.fields) for node in batch], commit=commit)
def get_pending_updates(self): """ return updates that have not been sent to the index """ # build the IndexUpdate from the ObjectUpdate buffered in self.pending_updates modelindex_updates = {} for object_update in self.pending_updates.values(): uid = object_update.uid op = object_update.op idxs = object_update.idxs modelindex_updates[uid] = IndexUpdate(object_update.obj, op=op , idxs=idxs, uid=uid) return modelindex_updates
def _process_pending_updates(self, tx_state): """ index all pending updates during a mid transaction commit """ updates = tx_state.get_pending_updates() # we are going to index all pending updates setting the MODEL_INDEX_UID_FIELD # field as the OBJECT_UID_FIELD appending the tid and setting tx_state field # to the current tid tweaked_updates = [] indexed_uids = set() deleted_uids = set() for update in updates.itervalues(): tid = tx_state.tid if update.op == UNINDEX: # dont do anything for unindexed objects, just add them to # a set to be able to blacklist them from search results deleted_uids.add(update.uid) else: # Index the object with a special uid temp_uid = self._mid_transaction_uid(update.uid, tx_state) # the first time we index a mid transaction atomic update, we need # to index the whole object, otherwise the temp document will only # have the mandatory fields and whatever fields the partial update has # if update.spec.partial_spec and update.uid not in tx_state.temp_indexed_uids: original_update = update obj = self.context.dmd.unrestrictedTraverse( original_update.uid) update = IndexUpdate(obj, op=original_update.op, uid=original_update.uid) update.spec.set_field_value(MODEL_INDEX_UID_FIELD, temp_uid) update.spec.set_field_value(TX_STATE_FIELD, tid) indexed_uids.add(update.uid) tweaked_updates.append(update) # send and commit indexed docs to solr self.model_index.process_batched_updates(tweaked_updates) # marked docs as indexed tx_state.mark_pending_updates_as_indexed(updates, indexed_uids, deleted_uids)
def uncatalog_object(self, uid): self.updates.append(IndexUpdate(None, op=UNINDEX, uid=uid)) if len(self.updates) % 1000 == 0: self.commit()