def process_message(self, pid, method): # process an update message from fedora # when an object is purged from fedora, remove it from the index if method == 'purgeObject': # since we don't know which index (if any) this object was indexed in, # delete it from all configured indexes for site, index in self.indexes.iteritems(): try: index.delete_item(pid) except Exception as e: logging.error("Failed to purge %s (%s): %s", pid, site, e) # Add a prefix to the detail error message if we # can identify what type of error this is. detail_type = '' if isinstance(e, SolrError): detail_type = 'Solr Error: ' action_str = 'Purge: ' msg = '%s%s%s' % (detail_type, action_str, e) err = IndexError(object_id=pid, site=site, detail=msg) err.save() logger.info('Deleting %s from all configured Solr indexes', pid) # commit? # ingest, modify object or modify datastream else: # if the object isn't already in the queue to be indexed, check if it should be if pid not in self.to_index: # get content models from resource index obj_cmodels = list(self.repo.risearch.get_objects('info:fedora/%s' % pid, modelns.hasModel)) sample_obj = self.repo.get_object(pid) obj_cmodels2 = sample_obj.get_models() logger.debug('Logging object cmodels %s', modelns.hasModel) logger.debug('Logging object cmodels %s', obj_cmodels) logger.debug('Logging object cmodels %s', obj_cmodels2) # may include generic content models, but should not be a problem # find which configured site(s) index the item for site, index in self.indexes.iteritems(): if index.indexes_item(obj_cmodels): if pid not in self.to_index: # first site found - create a queue item and add to the list self.to_index[pid] = QueueItem(site) else: # subsequent site - add the site to the existing queue item self.to_index[pid].add_site(site)
def reindex_object(self, site, pid): indexes = load_indexes() site_index = indexes[site] try: indexed = site_index.index_item(pid) err = None except Exception as e: logging.error("Failed to index %s (%s): %s", pid, site, e) # Add a prefix to the detail error message if we # can identify what type of error this is. detail_type = '' if isinstance(e, SolrError): detail_type = 'Solr Error: ' msg = '%s%s' % (detail_type, e) err = IndexError(object_id=pid, site=site, detail=msg) err.save() return 'Indexed pid %s' % pid
def index_item(self, pid, queueitem, site): '''Index an item in a single configured site index and handle any errors, updating the queueitem retry count and marking sites as indexed according to success or any errors. :param pid: pid for the item to be indexed :param queueitem: :class:`QueueItem` :param site: name of the site index to use ''' try: # tell the site index to index the item - returns True on success if self.indexes[site].index_item(pid): # mark the site index as complete on the queued item self.to_index[pid].site_complete(site) except RecoverableIndexError as rie: # If the index attempt resulted in error that we # can potentially recover from, keep the item in # the queue and attempt to index it again. # Increase the count of index attempts, so we know when to stop. self.to_index[pid].tries += 1 # quit when we reached the configured number of index attempts if self.to_index[pid].tries >= self.index_max_tries: logger.error("Failed to index %s (%s) after %d tries: %s", pid, site, self.to_index[pid].tries, rie) err = IndexError(object_id=pid, site=site, detail='Failed to index after %d attempts: %s' % \ (self.to_index[pid].tries, rie)) err.save() # we've hit the index retry limit, so set site as complete on the queue item self.to_index[pid].site_complete(site) else: logging.warn("Recoverable error attempting to index %s (%s), %d tries: %s", pid, site, self.to_index[pid].tries, rie) # update the index time - wait the configured index delay before # attempting to reindex again self.to_index[pid].time = datetime.now() except Exception as e: logging.error("Failed to index %s (%s): %s", pid, site, e) # Add a prefix to the detail error message if we # can identify what type of error this is. detail_type = '' if isinstance(e, SolrError): detail_type = 'Solr Error: ' msg = '%s%s' % (detail_type, e) err = IndexError(object_id=pid, site=site, detail=msg) err.save() # any exception not caught in the recoverable error block # should not be attempted again - set site as complete on queue item self.to_index[pid].site_complete(site)
def index_object(self, pid, site): indexes = load_indexes() index_max_tries = 3 try: # tell the site index to index the item indexes[site].index_item(pid) except RecoverableIndexError as rie: # If the index attempt resulted in error that we # can potentially recover from, keep the item in # the queue and attempt to index it again. self.retry(countdown=2, exc=rie) if index_object.request.retries >= index_object.max_retries: logger.error("Failed to index %s (%s) after %d tries: %s", pid, site, index_object.request.retries, rie) err = IndexError(object_id=pid, site=site, detail='Failed to index after %d attempts: %s' % \ (index_object.request.retries, rie)) err.save() else: logging.warn("Recoverable error attempting to index %s (%s), %d tries: %s", pid, site, index_object.request.retries, rie) except Exception as e: logging.error("Failed to index %s (%s): %s", pid, site, e) # Add a prefix to the detail error message if we # can identify what type of error this is. detail_type = '' if isinstance(e, SolrError): detail_type = 'Solr Error: ' msg = '%s%s' % (detail_type, e) err = IndexError(object_id=pid, site=site, detail=msg) err.save() # any exception not caught in the recoverable error block # should not be attempted again - set site as complete on queue item # self.to_index[pid].site_complete(site) return 'Indexed pid %s' % pid