Пример #1
0
    def process_message(self, pid, method):
        # process an update message from fedora

        # when an object is purged from fedora, remove it from the index
        if method == 'purgeObject':
            # since we don't know which index (if any) this object was indexed in,
            # delete it from all configured indexes
            for site, index in self.indexes.iteritems():
                try:
                    index.delete_item(pid)
                except Exception as e:
                    logging.error("Failed to purge %s (%s): %s",
                                  pid, site, e)

                    # Add a prefix to the detail error message if we
                    # can identify what type of error this is.
                    detail_type = ''
                    if isinstance(e, SolrError):
                        detail_type = 'Solr Error: '
                    action_str = 'Purge: '
                    msg = '%s%s%s' % (detail_type, action_str, e)
                    err = IndexError(object_id=pid, site=site, detail=msg)
                    err.save()
            logger.info('Deleting %s from all configured Solr indexes', pid)
            # commit?

        # ingest, modify object or modify datastream
        else:
            # if the object isn't already in the queue to be indexed, check if it should be
            if pid not in self.to_index:
                # get content models from resource index
                obj_cmodels = list(self.repo.risearch.get_objects('info:fedora/%s' % pid,
                                                                  modelns.hasModel))
                sample_obj = self.repo.get_object(pid)
                obj_cmodels2 = sample_obj.get_models()
                logger.debug('Logging object cmodels %s', modelns.hasModel)
                logger.debug('Logging object cmodels %s', obj_cmodels)
                logger.debug('Logging object cmodels %s', obj_cmodels2)
                # may include generic content models, but should not be a problem

                # find which configured site(s) index the item
                for site, index in self.indexes.iteritems():
                    if index.indexes_item(obj_cmodels):
                        if pid not in self.to_index:
                            # first site found - create a queue item and add to the list
                            self.to_index[pid] = QueueItem(site)
                        else:
                            # subsequent site - add the site to the existing queue item
                            self.to_index[pid].add_site(site)
Пример #2
0
def reindex_object(self, site, pid):
    indexes =  load_indexes()
    site_index = indexes[site]
    try:
        indexed = site_index.index_item(pid)
        err = None
    except Exception as e:
        logging.error("Failed to index %s (%s): %s",
                      pid, site, e)

        # Add a prefix to the detail error message if we
        # can identify what type of error this is.
        detail_type = ''
        if isinstance(e, SolrError):
            detail_type = 'Solr Error: '
        msg = '%s%s' % (detail_type, e)
        err = IndexError(object_id=pid, site=site,
                         detail=msg)
        err.save()
    return 'Indexed pid %s' % pid
Пример #3
0
    def index_item(self, pid, queueitem, site):
        '''Index an item in a single configured site index and handle
        any errors, updating the queueitem retry count and marking
        sites as indexed according to success or any errors.

        :param pid: pid for the item to be indexed
        :param queueitem: :class:`QueueItem`
        :param site: name of the site index to use
        '''
        try:
            # tell the site index to index the item - returns True on success
            if self.indexes[site].index_item(pid):
                # mark the site index as complete on the queued item
                self.to_index[pid].site_complete(site)

        except RecoverableIndexError as rie:
            # If the index attempt resulted in error that we
            # can potentially recover from, keep the item in
            # the queue and attempt to index it again.

            # Increase the count of index attempts, so we know when to stop.
            self.to_index[pid].tries += 1

            # quit when we reached the configured number of index attempts
            if self.to_index[pid].tries >= self.index_max_tries:
                logger.error("Failed to index %s (%s) after %d tries: %s",
                              pid, site, self.to_index[pid].tries, rie)

                err = IndexError(object_id=pid, site=site,
                                 detail='Failed to index after %d attempts: %s' % \
                                 (self.to_index[pid].tries, rie))
                err.save()
                # we've hit the index retry limit, so set site as complete on the queue item
                self.to_index[pid].site_complete(site)

            else:
                logging.warn("Recoverable error attempting to index %s (%s), %d tries: %s",
                             pid, site, self.to_index[pid].tries, rie)

                # update the index time - wait the configured index delay before
                # attempting to reindex again
                self.to_index[pid].time = datetime.now()

        except Exception as e:
            logging.error("Failed to index %s (%s): %s",
                          pid, site, e)

            # Add a prefix to the detail error message if we
            # can identify what type of error this is.
            detail_type = ''
            if isinstance(e, SolrError):
                detail_type = 'Solr Error: '
            msg = '%s%s' % (detail_type, e)
            err = IndexError(object_id=pid, site=site,
                             detail=msg)
            err.save()

            # any exception not caught in the recoverable error block
            # should not be attempted again - set site as complete on queue item
            self.to_index[pid].site_complete(site)
Пример #4
0
def index_object(self, pid, site):
    indexes =  load_indexes()
    index_max_tries = 3 

    try:
        # tell the site index to index the item
        indexes[site].index_item(pid)


    except RecoverableIndexError as rie:
        # If the index attempt resulted in error that we
        # can potentially recover from, keep the item in
        # the queue and attempt to index it again.

        self.retry(countdown=2, exc=rie)
        if index_object.request.retries >= index_object.max_retries:
            logger.error("Failed to index %s (%s) after %d tries: %s",
                        pid, site, index_object.request.retries, rie)

            err = IndexError(object_id=pid, site=site,
                             detail='Failed to index after %d attempts: %s' % \
                             (index_object.request.retries, rie))
            err.save()
        else:
            logging.warn("Recoverable error attempting to index %s (%s), %d tries: %s",
                        pid, site, index_object.request.retries, rie)
    
    except Exception as e:
        logging.error("Failed to index %s (%s): %s",
                          pid, site, e)
        # Add a prefix to the detail error message if we
        # can identify what type of error this is.
        detail_type = ''
        if isinstance(e, SolrError):
            detail_type = 'Solr Error: '
        msg = '%s%s' % (detail_type, e)
        err = IndexError(object_id=pid, site=site,
                         detail=msg)
        err.save()

        # any exception not caught in the recoverable error block
        # should not be attempted again - set site as complete on queue item
        # self.to_index[pid].site_complete(site)

    return 'Indexed pid %s' % pid