def find_record_by_identifier(identifiers, collection, owner=None, ignore_suffix=False, suffix_regex=r'[-_]\d+$'): idfields = standardfield_ids('identifier', equiv=True) if not isinstance(identifiers, (list, tuple)): identifiers = [identifiers] else: identifiers = list(identifiers) if ignore_suffix: identifiers.extend([re.sub(suffix_regex, '', id) for id in identifiers]) records = Record.by_fieldvalue(idfields, identifiers).filter(collection=collection, owner=owner) return records
def find_record_by_identifier(identifiers, collection, owner=None, ignore_suffix=False, suffix_regex=r'[-_]\d+$'): idfields = standardfield_ids('identifier', equiv=True) records = Record.by_fieldvalue(idfields, identifiers) \ .filter(collection=collection, owner=owner) if not records and ignore_suffix: if not isinstance(identifiers, (list, tuple)): identifiers = [identifiers] identifiers = (re.sub(suffix_regex, '', id) for id in identifiers) records = Record.by_fieldvalue(idfields, identifiers) \ .filter(collection=collection, owner=owner) return records
def find_record_by_identifier( identifiers, collection, owner=None, ignore_suffix=False, suffix_regex=r'[-_]\d+$'): idfields = standardfield_ids('identifier', equiv=True) if not isinstance(identifiers, (list, tuple)): identifiers = [identifiers] else: identifiers = list(identifiers) if ignore_suffix: identifiers.extend( [re.sub(suffix_regex, '', id) for id in identifiers]) records = Record.by_fieldvalue( idfields, identifiers).filter( collection=collection, owner=owner).distinct() return records
def handle(self, from_collection, to_collections, commit, *args, **options): if not from_collection or not to_collections: print("Error: Must specify --from and --to arguments") return print("Mapping presentation items from collection %s to " \ "collection(s) %s" % (from_collection, to_collections)) idfields = standardfield_ids('identifier', equiv=True) print("Fetching identifiers") query = FieldValue.objects.filter( field__in=idfields, record__collectionitem__collection=from_collection, owner=None, context_type=None, hidden=False).values_list('value', 'record') record_to_id = dict() for identifier, record in query: record_to_id.setdefault(record, []).append(identifier) print("Fetching target records") query = FieldValue.objects.filter( field__in=idfields, record__collectionitem__collection__in=to_collections, owner=None, context_type=None, hidden=False).values_list('value', 'record') id_to_record = dict() for identifier, record in query: id_to_record.setdefault(identifier, []).append(record) print("Mapping presentation items") remapped = 0 errors = [] items = PresentationItem.objects.filter( record__collectionitem__collection=from_collection) pb = ProgressBar(len(items)) for count, item in enumerate(items): identifiers = record_to_id.get(item.record_id) if identifiers: for identifier in identifiers: new_records = id_to_record.get(identifier) if new_records: if len(new_records) == 1: remapped += 1 if commit: item.record_id = new_records[0] item.save() break else: errors.append( "Multiple matching records with identifier " "'%s' found in collection %s: %s" % (identifier, to_collections, sorted(new_records))) else: errors.append( "No record with identifier '%s' found in " "collection %s" % (identifier, to_collections)) else: errors.append("No identifier found for record %s" % item.record_id) pb.update(count) pb.done() errors = sorted(set(errors)) if commit: print("Remapped %s items" % remapped) else: print("Would have remapped %s items - rerun with --commit" % \ remapped) if errors: print("%s unique errors occurred:" % len(errors)) print('\n'.join(errors))
def handle(self, collection, from_date, until_date, file_format, file_name, include_events, exclude_events, list_events, *args, **options): events = list( Activity.objects.distinct().order_by('event').values_list( 'event', flat=True)) if include_events: events = include_events for event in exclude_events: if event in events: events.remove(event) if list_events: print "Events (not all may apply to date range or collection):" print '\n'.join(events) return if not from_date: print "Please specify a start date" return if not collection: print "Please specify a collection" return try: collection = Collection.objects.get(name=collection) except Collection.DoesNotExist: try: collection = Collection.objects.get(id=collection) except (Collection.DoesNotExist, ValueError): print "Cannot find specified collection: %s" % collection return def accumulation_status(date, event, step, numsteps): print >> sys.stderr, "Accumulating data for event %s on %s... (%d/%d)" % ( event, date, step + 1, numsteps) assure_accumulation(from_date, until_date, events, callback=accumulation_status) activity = AccumulatedActivity.objects.filter(object_id__isnull=False, date__gte=from_date) if until_date: activity = activity.filter(date__lt=until_date) record_ids = CollectionItem.objects.filter( collection=collection, record__owner__isnull=True).values('record') media = Media.objects.filter( record__in=record_ids).select_related('storage') media_dict = dict((id, (record, name)) for id, record, name in media.values_list( 'id', 'record', 'storage__name')) record_type = ContentType.objects.get_for_model(Record) media_type = ContentType.objects.get_for_model(Media) activity = activity.filter( (Q(content_type=record_type, object_id__in=record_ids) | Q(content_type=media_type, object_id__in=media.values('id')))) records = dict() identifier_field = standardfield_ids('identifier', equiv=True) title_field = standardfield_ids('title', equiv=True) if file_name: output = open(file_name, 'wb') else: output = sys.stdout writer = csv.writer(output, dialect='excel') writer.writerow(( 'Date', '', 'Record', 'Title', 'Media', 'Storage', 'Event', 'Count', )) for entry in activity.select_related('content_type').order_by( 'date', 'event'): if entry.content_type == media_type: record_id, storage = media_dict[entry.object_id] media_id = entry.object_id else: record_id = entry.object_id media_id = None storage = None if records.has_key(record_id): identifier, title = records[record_id] else: try: identifier = FieldValue.objects.filter( record=record_id, field__in=identifier_field, ).order_by('order')[0].value identifier = identifier.encode('utf-8') except Exception, e: print >> sys.stderr, e identifier = None try: title = FieldValue.objects.filter( record=record_id, field__in=title_field, ).order_by('order')[0].value title = title.encode('utf-8') except Exception, e: print >> sys.stderr, e title = None records[record_id] = (identifier, title)
def title_from_fieldvalues(self, fieldvalues): titlefields = standardfield_ids('title', equiv=True) for fv in fieldvalues: if fv.field_id in titlefields: return fv.value return None
def title(self): titlefields = standardfield_ids('title', equiv=True) q = Q(field__in=titlefields) fv = self.get_fieldvalues(q=q) return None if not fv else fv[0].value
def handle(self, *args, **kwargs): mapping_file = kwargs.get('mapping_file') collections = map(int, kwargs.get('collections') or list()) if not mapping_file or not collections: print "--collection and --mapping are required parameters" return works = dict() with open(mapping_file, 'rU') as mappings: reader = csv.DictReader(mappings) for row in reader: identifier = row['ImageFileName'] work = row['fk_WorkID'] works.setdefault(work, []).append(identifier) # Clean out old relations FieldValue.objects.filter( record__collection__in=collections, field__standard__prefix='dc', field__name='relation', refinement='IsPartOf', ).delete() related_field = Field.objects.get( standard__prefix='dc', name='relation', ) id_fields = standardfield_ids('identifier', equiv=True) print "Caching record identifiers" identifiers = dict() values = FieldValue.objects.select_related('record').filter( record__collection__in=collections, field__in=id_fields) for fv in values: identifiers[fv.value] = fv.record.id pb = ProgressBar(len(works)) # Insert new relations for count, work in enumerate(works.itervalues()): primary = work[0] items = work[1:] for item in items: options = [item] if item.lower().endswith('.jpg'): options.append(item[:-4]) record = None for option in options: record = identifiers.get(option) if record: break else: continue FieldValue.objects.create(record=Record.objects.get(id=record), field=related_field, refinement='IsPartOf', value=primary) pb.update(count) pb.done()
def handle(self, *args, **kwargs): system_field = get_system_field() collections = map(int, kwargs.get('collections') or list()) mapping_file = kwargs.get('mapping_file') if not collections: print "--collection is a required parameter" return if not mapping_file: print "--mapping is a required parameter" return mappings = dict() with open(mapping_file, 'r') as csvfile: reader = csv.DictReader(csvfile) for row in reader: mappings[row['Identifier']] = (row['Work'], row['Primary']) related_field = Field.objects.get( standard__prefix='dc', name='relation', ) existing_works = FieldValue.objects.filter( record__collection__in=collections, field=related_field, refinement='IsPartOf', ) # Clean out old relations print "Deleting old works info" existing_works.delete() id_fields = standardfield_ids('identifier', equiv=True) print "Fetching records" identifiers = FieldValue.objects.select_related('record').filter( record__collection__in=collections, field__in=id_fields, ) pb = ProgressBar(identifiers.count()) # Insert new relations for count, identifier in enumerate(identifiers): work, isprimary = mappings.get(identifier.value, (None, False)) isprimary = isprimary == 'True' if not work: print "Warning: no entry found for identifier '%s'" % \ identifier.value continue FieldValue.objects.create(record=identifier.record, field=related_field, refinement='IsPartOf', value=work, hidden=True) fv = list( FieldValue.objects.filter(record=identifier.record, field=system_field, label='primary-work-record')) if len(fv) > 0: if not isprimary: for f in fv: f.delete() elif isprimary: FieldValue.objects.create( record=identifier.record, field=system_field, label='primary-work-record', value=work, hidden=True, ) pb.update(count) pb.done()
def handle(self, collection, from_date, until_date, file_format, file_name, include_events, exclude_events, list_events, *args, **options): events = list(Activity.objects.distinct() .order_by('event').values_list('event', flat=True)) if include_events: events = include_events for event in exclude_events: if event in events: events.remove(event) if list_events: print "Events (not all may apply to date range or collection):" print '\n'.join(events) return if not from_date: print "Please specify a start date" return if not collection: print "Please specify a collection" return try: collection = Collection.objects.get(name=collection) except Collection.DoesNotExist: try: collection = Collection.objects.get(id=collection) except (Collection.DoesNotExist, ValueError): print "Cannot find specified collection: %s" % collection return def accumulation_status(date, event, step, numsteps): print >> sys.stderr, "Accumulating data for event %s on %s... (%d/%d)" % ( event, date, step + 1, numsteps) assure_accumulation(from_date, until_date, events, callback=accumulation_status) activity = AccumulatedActivity.objects.filter(object_id__isnull=False, date__gte=from_date) if until_date: activity = activity.filter(date__lt=until_date) record_ids = CollectionItem.objects.filter(collection=collection, record__owner__isnull=True).values('record') media = Media.objects.filter(record__in=record_ids).select_related('storage') media_dict = dict((id, (record, name)) for id, record, name in media.values_list('id', 'record', 'storage__name')) record_type = ContentType.objects.get_for_model(Record) media_type = ContentType.objects.get_for_model(Media) activity = activity.filter( (Q(content_type=record_type, object_id__in=record_ids) | Q(content_type=media_type, object_id__in=media.values('id'))) ) records = dict() identifier_field = standardfield_ids('identifier', equiv=True) title_field = standardfield_ids('title', equiv=True) if file_name: output = open(file_name, 'wb') else: output = sys.stdout writer = csv.writer(output, dialect='excel') writer.writerow(( 'Date', '', 'Record', 'Title', 'Media', 'Storage', 'Event', 'Count', )) for entry in activity.select_related('content_type').order_by('date', 'event'): if entry.content_type == media_type: record_id, storage = media_dict[entry.object_id] media_id = entry.object_id else: record_id = entry.object_id media_id = None storage = None if records.has_key(record_id): identifier, title = records[record_id] else: try: identifier = FieldValue.objects.filter(record=record_id, field__in=identifier_field, ).order_by('order')[0].value identifier = identifier.encode('utf-8') except Exception, e: print >> sys.stderr, e identifier = None try: title = FieldValue.objects.filter(record=record_id, field__in=title_field, ).order_by('order')[0].value title = title.encode('utf-8') except Exception, e: print >> sys.stderr, e title = None records[record_id] = (identifier, title)
def handle(self, *args, **kwargs): system_field = get_system_field() collections = map(int, kwargs.get('collections') or list()) mapping_file = kwargs.get('mapping_file') if not collections: print "--collection is a required parameter" return if not mapping_file: print "--mapping is a required parameter" return mappings = dict() with open(mapping_file, 'r') as csvfile: reader = csv.DictReader(csvfile) for row in reader: mappings[row['Identifier']] = (row['Work'], row['Primary']) related_field = Field.objects.get( standard__prefix='dc', name='relation', ) existing_works = FieldValue.objects.filter( record__collection__in=collections, field=related_field, refinement='IsPartOf', ) # Clean out old relations print "Deleting old works info" existing_works.delete() id_fields = standardfield_ids('identifier', equiv=True) print "Fetching records" identifiers = FieldValue.objects.select_related('record').filter( record__collection__in=collections, field__in=id_fields, ) pb = ProgressBar(identifiers.count()) # Insert new relations for count, identifier in enumerate(identifiers): work, isprimary = mappings.get(identifier.value, (None, False)) isprimary = isprimary == 'True' if not work: print "Warning: no entry found for identifier '%s'" % \ identifier.value continue FieldValue.objects.create( record=identifier.record, field=related_field, refinement='IsPartOf', value=work, hidden=True ) fv = list(FieldValue.objects.filter( record=identifier.record, field=system_field, label='primary-work-record' )) if len(fv) > 0: if not isprimary: for f in fv: f.delete() elif isprimary: FieldValue.objects.create( record=identifier.record, field=system_field, label='primary-work-record', value=work, hidden=True, ) pb.update(count) pb.done()