def reindex_uuids(self, uuids): """ reindexes a list of uuids """ if isinstance(uuids, list): crawler = Crawler() crawler.index_document_list(uuids) return len(uuids) else: return False
def reindex_uuids(self, uuids): """ reindexes a list of uuids """ self.clear_caches() if isinstance(uuids, list): crawler = Crawler() if isinstance(self.max_geo_zoom, int): if self.max_geo_zoom > 5: # only positive integers crawler.max_geo_zoom = self.max_geo_zoom if isinstance(self.human_remains, int): if self.human_remains > 0: # we're reindexing sensitive human remains crawler.human_remains = self.human_remains crawler.index_document_list(uuids, self.list_size) return len(uuids) else: return False
def reindex(self): """ Reindexes items in Solr, with item UUIDs coming from a given source """ self.clear_caches() self.iteration += 1 print('Iteration: ' + str(self.iteration)) if self.iteration <= self.max_iterations: uuids = [] if self.solr_direct_url is not False: print('Get uuids from solr: ' + str(self.solr_direct_url)) uuids = self.get_uuids_solr_direct(self.solr_direct_url) elif self.oc_url is not False: # now validate to make sure we're asking for uuids if 'response=uuid' in self.oc_url \ and '.json' in self.oc_url: print('Get uuids from OC-API: ' + str(self.oc_url)) uuids = self.get_uuids_oc_url(self.oc_url) elif isinstance(self.project_uuids, list) \ and self.annotated_after is False \ and self.skip_indexed_after is False: # now validate to make sure we're asking for uuids print('Getting uuids for: ' + str(len(self.project_uuids)) + ' projects') uuids = [] raw_uuids = Manifest.objects\ .filter(project_uuid__in=self.project_uuids)\ .values_list('uuid', flat=True) for raw_uuid in raw_uuids: uuids.append(str(raw_uuid)) elif isinstance(self.project_uuids, list)\ and self.annotated_after is False\ and self.skip_indexed_after is not False: # index items from projects, but not items indexed after a certain # datetime uuids = [] raw_uuids = Manifest.objects\ .filter(project_uuid__in=self.project_uuids)\ .exclude(indexed__gte=self.skip_indexed_after)\ .values_list('uuid', flat=True) for raw_uuid in raw_uuids: uuids.append(str(raw_uuid)) elif self.annotated_after is not False: self.max_iterations = 1 uuids = [] anno_list = [] if self.project_uuids is not False: if not isinstance(self.project_uuids, list): project_uuids = [self.project_uuids] else: project_uuids = self.project_uuids anno_list = LinkAnnotation.objects\ .filter(project_uuid__in=project_uuids, updated__gte=self.annotated_after) else: anno_list = LinkAnnotation.objects\ .filter(updated__gte=self.annotated_after) for anno in anno_list: print('Index annotation: ' + anno.subject + ' :: ' + anno.predicate_uri + ' :: ' + anno.object_uri) if(anno.subject_type in (item[0] for item in settings.ITEM_TYPES)): # make sure it's an Open Context item that can get indexed if anno.subject not in uuids: uuids.append(anno.subject) if anno.subject_type == 'types' and self.related_annotations: # get the # subjects item used with this type, we need to do a lookup # on the assertions table assertions = Assertion.objects\ .filter(object_uuid=geo_anno.subject) for ass in assertions: if ass.uuid not in uuids: uuids.append(ass.uuid) if isinstance(uuids, list): print('Ready to index ' + str(len(uuids)) + ' items') crawler = Crawler() if isinstance(self.max_geo_zoom, int): if self.max_geo_zoom > 5: # only positive integers crawler.max_geo_zoom = self.max_geo_zoom if isinstance(self.human_remains, int): if self.human_remains > 0: # we're reindexing sensitive human remains crawler.human_remains = self.human_remains crawler.index_document_list(uuids, self.list_size) self.reindex() else: print('Problem with: ' + str(uuids))
def reindex(self): """ Reindexes items in Solr, with item UUIDs coming from a given source """ self.iteration += 1 print('Iteration: ' + str(self.iteration)) if self.iteration <= self.max_iterations: uuids = [] if self.solr_direct_url is not False: print('Get uuids from solr: ' + str(self.solr_direct_url)) uuids = self.get_uuids_solr_direct(self.solr_direct_url) elif self.oc_url is not False: # now validate to make sure we're asking for uuids if 'response=uuid' in self.oc_url \ and '.json' in self.oc_url: print('Get uuids from OC-API: ' + str(self.oc_url)) uuids = self.get_uuids_oc_url(self.oc_url) elif isinstance(self.project_uuids, list) \ and self.annotated_after is False \ and self.skip_indexed_after is False: # now validate to make sure we're asking for uuids uuids = [] raw_uuids = Manifest.objects\ .filter(project_uuid__in=self.project_uuids)\ .values_list('uuid', flat=True) for raw_uuid in raw_uuids: uuids.append(str(raw_uuid)) elif isinstance(self.project_uuids, list)\ and self.annotated_after is False\ and self.skip_indexed_after is not False: # index items from projects, but not items indexed after a certain # datetime uuids = [] raw_uuids = Manifest.objects\ .filter(project_uuid__in=self.project_uuids)\ .exclude(indexed__gte=self.skip_indexed_after)\ .values_list('uuid', flat=True) for raw_uuid in raw_uuids: uuids.append(str(raw_uuid)) elif self.annotated_after is not False: self.max_iterations = 1 uuids = [] anno_list = [] if self.project_uuids is not False: if not isinstance(self.project_uuids, list): project_uuids = [self.project_uuids] else: project_uuids = self.project_uuids anno_list = LinkAnnotation.objects\ .filter(project_uuid__in=project_uuids, updated__gte=self.annotated_after) else: anno_list = LinkAnnotation.objects\ .filter(updated__gte=self.annotated_after) for anno in anno_list: print('Index annotation: ' + anno.subject + ' :: ' + anno.predicate_uri + ' :: ' + anno.object_uri) if(anno.subject_type in (item[0] for item in settings.ITEM_TYPES)): # make sure it's an Open Context item that can get indexed if anno.subject not in uuids: uuids.append(anno.subject) if anno.subject_type == 'types' and self.related_annotations: # get the # subjects item used with this type, we need to do a lookup # on the assertions table assertions = Assertion.objects\ .filter(object_uuid=geo_anno.subject) for ass in assertions: if ass.uuid not in uuids: uuids.append(ass.uuid) if isinstance(uuids, list): print('Ready to index ' + str(len(uuids)) + ' items') crawler = Crawler() crawler.index_document_list(uuids) self.reindex() else: print('Problem with: ' + str(uuids))