def test_agents(self): ip = InformationPackage.objects.create() self.assertEqual(len(fill_specification_data(ip=ip)['AGENTS']), 0) agent = Agent.objects.create() ip.agents.add(agent) self.assertEqual(len(fill_specification_data(ip=ip)['AGENTS']), 1)
def test_content_path(self): user = User.objects.create() ip = InformationPackage.objects.create( object_path="foo", package_type=InformationPackage.SIP, ) self.assertEqual(fill_specification_data(ip=ip)['CONTENTPATH'], 'foo') sa = SubmissionAgreement.objects.create( profile_sip=Profile.objects.create(profile_type='sip', structure=[ { 'type': 'folder', 'name': 'data', 'use': 'content', }, ]), policy=StoragePolicy.objects.create( cache_storage=StorageMethod.objects.create(), ingest_path=Path.objects.create(), ), ) ip.submission_agreement = sa ip.save() sa.lock_to_information_package(ip, user) self.assertEqual( fill_specification_data(ip=ip)['CONTENTPATH'], 'foo/data')
def generate_content_metadata(ip): files_to_create = {} generate_premis = ip.profile_locked('preservation_metadata') if generate_premis: premis_profile_type = 'preservation_metadata' premis_profile_rel = ip.get_profile_rel(premis_profile_type) premis_profile_data = ip.get_profile_data(premis_profile_type) data = fill_specification_data(premis_profile_data, ip=ip) premis_path = parseContent(ip.get_premis_file_path(), data) full_premis_path = os.path.join(ip.object_path, premis_path) files_to_create[full_premis_path] = { 'spec': premis_profile_rel.profile.specification, 'data': data, } mets_path = ip.get_content_mets_file_path() full_mets_path = os.path.join(ip.object_path, mets_path) profile_type = ip.get_package_type_display().lower() profile_rel = ip.get_profile_rel(profile_type) profile_data = ip.get_profile_data(profile_type) files_to_create[full_mets_path] = { 'spec': profile_rel.profile.specification, 'data': fill_specification_data(profile_data, ip=ip), } parsed_files = profile_rel.data.parsed_files extra_paths_to_parse = profile_rel.data.extra_paths_to_parse algorithm = ip.get_checksum_algorithm() allow_unknown_file_types = ip.get_allow_unknown_file_types() allow_encrypted_files = ip.get_allow_encrypted_files() generator = XMLGenerator( allow_unknown_file_types=allow_unknown_file_types, allow_encrypted_files=allow_encrypted_files, ) generator.generate(files_to_create, folderToParse=ip.object_path, algorithm=algorithm, parsed_files=parsed_files, extra_paths_to_parse=extra_paths_to_parse) ip.content_mets_path = mets_path ip.content_mets_create_date = timestamp_to_datetime( creation_date(full_mets_path)).isoformat() ip.content_mets_size = os.path.getsize(full_mets_path) ip.content_mets_digest_algorithm = MESSAGE_DIGEST_ALGORITHM_CHOICES_DICT[ algorithm.upper()] ip.content_mets_digest = calculate_checksum(full_mets_path, algorithm=algorithm) ip.save()
def create_profile_rels(self, profile_types, user): sa = self.submission_agreement extra_data = fill_specification_data(ip=self, sa=sa) for p_type in profile_types: profile = getattr(sa, 'profile_%s' % p_type, None) if profile is None: continue profile_ip = ProfileIP.objects.create(ip=self, profile=profile) data = {} for field in profile_ip.profile.template: try: if field['defaultValue'] in extra_data: data[field['key']] = extra_data[field['defaultValue']] continue data[field['key']] = field['defaultValue'] except KeyError: pass data_obj = ProfileIPData.objects.create(relation=profile_ip, data=data, version=0, user=user) profile_ip.data = data_obj profile_ip.save()
def get_content_type_file(self): ctsdir, ctsfile = find_destination('content_type_specification', self.get_structure()) if ctsdir is None: return None return parseContent(os.path.join(ctsdir, ctsfile), fill_specification_data(ip=self))
def generate_package_mets(ip): sa = ip.submission_agreement if ip.package_type == InformationPackage.SIP: profile_type = 'submit_description' elif ip.package_type == InformationPackage.AIP: profile_type = 'aip_description' else: raise ValueError( 'Cannot create package mets for IP of type {package_type}'.format( package_type=ip.package_type ) ) profile_rel = ip.get_profile_rel(profile_type) profile_data = ip.get_profile_data(profile_type) xmlpath = os.path.splitext(ip.object_path)[0] + '.xml' data = fill_specification_data(profile_data, ip=ip, sa=sa) data["_IP_CREATEDATE"] = timestamp_to_datetime(creation_date(ip.object_path)).isoformat() files_to_create = { xmlpath: { 'spec': profile_rel.profile.specification, 'data': data } } algorithm = ip.get_checksum_algorithm() generator = XMLGenerator() generator.generate(files_to_create, folderToParse=ip.object_path, algorithm=algorithm) ip.package_mets_path = normalize_path(xmlpath) ip.package_mets_create_date = timestamp_to_datetime(creation_date(xmlpath)).isoformat() ip.package_mets_size = os.path.getsize(xmlpath) ip.package_mets_digest_algorithm = MESSAGE_DIGEST_ALGORITHM_CHOICES_DICT[algorithm.upper()] ip.package_mets_digest = calculate_checksum(xmlpath, algorithm=algorithm) ip.save()
def run(self, structure=None, root=""): """ Creates the IP physical model based on a logical model. Args: structure: A dict specifying the logical model. root: The root directory to be used """ ip = self.get_information_package() data = fill_specification_data(ip=ip, sa=ip.submission_agreement) structure = structure or ip.get_structure() root = ip.object_path if not root else root created = [] try: for dirname in self.get_dirs(structure, data, root): try: os.makedirs(dirname) except OSError as e: if e.errno != errno.EEXIST: raise created.append(dirname) except Exception: for dirname in created: try: shutil.rmtree(dirname) except OSError as e: if e.errno != errno.ENOENT: raise raise self.set_progress(1, total=1)
def run(self, filesToCreate=None, folderToParse=None, extra_paths_to_parse=None, parsed_files=None, algorithm='SHA-256'): """ Generates the XML using the specified data and folder, and adds the XML to the specified files """ if filesToCreate is None: filesToCreate = {} if extra_paths_to_parse is None: extra_paths_to_parse = [] if parsed_files is None: parsed_files = [] ip = InformationPackage.objects.filter(pk=self.ip).first() sa = None if ip is not None: sa = ip.submission_agreement for _, v in filesToCreate.items(): v['data'] = fill_specification_data(v['data'], ip=ip, sa=sa) generator = XMLGenerator() generator.generate( filesToCreate, folderToParse=folderToParse, extra_paths_to_parse=extra_paths_to_parse, parsed_files=parsed_files, algorithm=algorithm, )
def run(self, workarea, validators, stop_at_failure=True): workarea = Workarea.objects.get(pk=workarea) workarea.successfully_validated = {} for validator in validators: workarea.successfully_validated[validator] = None workarea.save(update_fields=['successfully_validated']) ip = workarea.ip sa = ip.submission_agreement validation_profile = ip.get_profile('validation') profile_data = fill_specification_data(data=ip.get_profile_data('validation'), sa=sa, ip=ip) responsible = User.objects.get(pk=self.responsible) try: validation.validate_path(workarea.path, validators, validation_profile, data=profile_data, ip=ip, task=self.task_id, stop_at_failure=stop_at_failure, responsible=responsible) except ValidationError: self.create_notification(ip) else: self.create_notification(ip) finally: validations = ip.validation_set.all() failed_validators = validations.values('validator').filter( passed=False, required=True ).values_list('validator', flat=True) for k, v in workarea.successfully_validated.items(): class_name = validation.AVAILABLE_VALIDATORS[k].split('.')[-1] workarea.successfully_validated[k] = class_name not in failed_validators workarea.save(update_fields=['successfully_validated'])
def generate_content_mets(ip): mets_path = ip.get_content_mets_file_path() full_mets_path = os.path.join(ip.object_path, mets_path) profile_type = ip.get_package_type_display().lower() profile_rel = ip.get_profile_rel(profile_type) profile_data = ip.get_profile_data(profile_type) files_to_create = { full_mets_path: { 'spec': profile_rel.profile.specification, 'data': fill_specification_data(profile_data, ip=ip) } } algorithm = ip.get_checksum_algorithm() allow_unknown_file_types = ip.get_allow_unknown_file_types() allow_encrypted_files = ip.get_allow_encrypted_files() generator = XMLGenerator( allow_unknown_file_types=allow_unknown_file_types, allow_encrypted_files=allow_encrypted_files, ) generator.generate(files_to_create, folderToParse=ip.object_path, algorithm=algorithm) ip.content_mets_path = mets_path ip.content_mets_create_date = timestamp_to_datetime( creation_date(full_mets_path)).isoformat() ip.content_mets_size = os.path.getsize(full_mets_path) ip.content_mets_digest_algorithm = MESSAGE_DIGEST_ALGORITHM_CHOICES_DICT[ algorithm.upper()] ip.content_mets_digest = calculate_checksum(full_mets_path, algorithm=algorithm) ip.save()
def GeneratePremis(self): generate_premis(self.get_information_package()) ip = self.get_information_package() data = fill_specification_data(ip=ip) path = parseContent(ip.get_premis_file_path(), data) msg = 'Generated {xml}'.format(xml=path) self.create_success_event(msg)
def run(self): ip = self.get_information_package() mets_path = ip.get_content_mets_file_path() profile_type = ip.get_package_type_display().lower() profile_rel = ip.get_profile_rel(profile_type) profile_data = ip.get_profile_data(profile_type) files_to_create = { mets_path: { 'spec': profile_rel.profile.specification, 'data': fill_specification_data(profile_data, ip=ip) } } algorithm = ip.get_checksum_algorithm() generator = XMLGenerator() generator.generate(files_to_create, folderToParse=ip.object_path, algorithm=algorithm) ip.content_mets_path = mets_path ip.content_mets_create_date = timestamp_to_datetime( creation_date(mets_path)).isoformat() ip.content_mets_size = os.path.getsize(mets_path) ip.content_mets_digest_algorithm = MESSAGE_DIGEST_ALGORITHM_CHOICES_DICT[ algorithm.upper()] ip.content_mets_digest = calculate_checksum(mets_path, algorithm=algorithm) ip.save()
def ValidateWorkarea(self, workarea, validators, stop_at_failure=True): def create_notification(ip): errcount = Validation.objects.filter(information_package=ip, passed=False, required=True).count() if errcount: Notification.objects.create( message='Validation of "{ip}" failed with {errcount} error(s)'. format(ip=ip.object_identifier_value, errcount=errcount), level=logging.ERROR, user_id=self.responsible, refresh=True) else: Notification.objects.create( message='"{ip}" was successfully validated'.format( ip=ip.object_identifier_value), level=logging.INFO, user_id=self.responsible, refresh=True) workarea = Workarea.objects.get(pk=workarea) workarea.successfully_validated = {} for validator in validators: workarea.successfully_validated[validator] = None workarea.save(update_fields=['successfully_validated']) ip = workarea.ip sa = ip.submission_agreement validation_profile = ip.get_profile('validation') profile_data = fill_specification_data( data=ip.get_profile_data('validation'), sa=sa, ip=ip) responsible = User.objects.get(pk=self.responsible) try: validation.validate_path(workarea.path, validators, validation_profile, data=profile_data, ip=ip, task=self.get_processtask(), stop_at_failure=stop_at_failure, responsible=responsible) except ValidationError: create_notification(ip) else: create_notification(ip) finally: validations = ip.validation_set.all() failed_validators = validations.values('validator').filter( passed=False, required=True).values_list('validator', flat=True) for k, _v in workarea.successfully_validated.items(): class_name = validation.AVAILABLE_VALIDATORS[k].split('.')[-1] workarea.successfully_validated[ k] = class_name not in failed_validators workarea.save(update_fields=['successfully_validated'])
def get_content_mets_file_path(self): mets_dir, mets_name = find_destination("mets_file", self.get_structure()) if mets_dir is not None: path = os.path.join(mets_dir, mets_name) path = parseContent(path, fill_specification_data(ip=self)) else: path = 'mets.xml' return normalize_path(os.path.join(self.object_path, path))
def get_premis_file_path(self): premis_dir, premis_name = find_destination( "preservation_description_file", self.get_structure()) if premis_dir is not None: path = os.path.join(premis_dir, premis_name) path = parseContent(path, fill_specification_data(ip=self)) else: path = 'metadata/premis.xml' return normalize_path(os.path.join(self.object_path, path))
def generate_events_xml(ip): xml_path = os.path.join(ip.object_path, ip.get_events_file_path()) files_to_create = { xml_path: { 'spec': get_event_spec(), 'data': fill_specification_data(ip=ip) } } algorithm = ip.get_checksum_algorithm() generator = XMLGenerator() generator.generate(files_to_create, algorithm=algorithm)
def get_template(self, obj): data = fill_specification_data() for field in obj.template: try: defaultValue = field['defaultValue'] if defaultValue in data: field['defaultValue'] = data[defaultValue] except KeyError: continue return obj.template
def run(self): ip = self.get_information_package() xml_path = os.path.join(ip.object_path, ip.get_events_file_path()) files_to_create = { xml_path: { 'spec': get_event_spec(), 'data': fill_specification_data(ip=ip) } } algorithm = ip.get_checksum_algorithm() generator = XMLGenerator() generator.generate(files_to_create, algorithm=algorithm)
def create(self, template, destination, outcome, short_message, message, date=None, ip=None, task=None, **kwargs): if task is not None and destination is None: destination = task.responsible.email if not destination: msg = 'No recipient set for email' logger.error(msg) raise NoEmailRecipientError(msg) logger.debug('Sending receipt email to {}'.format(destination)) subject = short_message data = {} if ip is not None: data = fill_specification_data(data=data, ip=ip).to_dict() data['outcome'] = outcome data['message'] = message data['date'] = date or timezone.now() if task is not None: data['task_traceback'] = task.traceback data['task_exception'] = task.exception data['validations'] = Validation.objects.filter( task=task).order_by('time_started') body = render_to_string(template, data) msg = EmailMessage( subject, body, None, [destination], ) for attachment in kwargs.get('attachments', []): msg.attach_file(attachment) msg_count = msg.send(fail_silently=False) logger.debug( '{} emails sent (including cc and bcc entries)'.format(msg_count)) if not msg_count: raise NoEmailSentError('No emails sent') logger.info('Email receipt sent to {}'.format(destination))
def generate_premis(ip): premis_path = ip.get_premis_file_path() premis_profile_rel = ip.get_profile_rel('preservation_metadata') premis_profile_data = ip.get_profile_data('preservation_metadata') files_to_create = { premis_path: { 'spec': premis_profile_rel.profile.specification, 'data': fill_specification_data(premis_profile_data, ip=ip) } } algorithm = ip.get_checksum_algorithm() generator = XMLGenerator() generator.generate(files_to_create, folderToParse=ip.object_path, algorithm=algorithm)
def create(self, template, destination, outcome, short_message, message, date=None, ip=None, task=None): logger.debug(u'Creating XML receipt: {}'.format(destination)) spec = json.loads(get_template(template).template.source) data = {} if ip is not None: data = fill_specification_data(data=data, ip=ip) data['outcome'] = outcome data['message'] = message data['date'] = date or timezone.now() if task is not None: validations = Validation.objects.filter( task=task).order_by('time_started') data['validations'] = ValidationSerializer(validations, many=True).data data[u'ärenden'] = [] if ip is not None: cts = ip.get_content_type_file() if cts is not None: tree = etree.parse(ip.open_file(cts)) for arende in tree.xpath( "//*[local-name()='ArkivobjektArende']"): arende_id = arende.xpath( "*[local-name()='ArkivobjektID']")[0].text a_data = {'ArkivobjektID': arende_id} try: a_data['id'] = Search(index=['component']).filter( 'bool', must=[ Q('term', type=u"Ärende"), Q('term', **{'reference_code.keyword': arende_id}), Q('term', ip=str(ip.pk)) ]).execute().hits[0].meta.id except IndexError: pass data[u'ärenden'].append(a_data) files_to_create = {destination: {'spec': spec, 'data': data}} XMLGenerator().generate(files_to_create) logger.info(u'XML receipt created: {}'.format(destination))
def get_submission_agreement_data(self, obj): if obj.submission_agreement_data is not None: serializer = SubmissionAgreementIPDataSerializer(obj.submission_agreement_data) data = serializer.data else: data = {'data': {}} extra_data = fill_specification_data(ip=obj, sa=obj.submission_agreement) for field in getattr(obj.submission_agreement, 'template', []): if field['key'] in extra_data: data['data'][field['key']] = extra_data[field['key']] return data
def get_events_file_path(self, from_container=False): if not from_container and os.path.isfile(self.object_path): return os.path.splitext(self.object_path)[0] + '_ipevents.xml' ip_profile = self.get_profile(self.get_package_type_display().lower()) structure = ip_profile.structure events_dir, events_file = find_destination('events_file', structure) if events_dir is not None: full_path = os.path.join(events_dir, events_file) return normalize_path( parseContent(full_path, fill_specification_data(ip=self))) return 'ipevents.xml'
def lock(self, user): self.LockedBy = user extra_data = fill_specification_data(ip=self.ip, sa=self.ip.submission_agreement) for field in self.profile.template: if 'defaultValue' in field and field['key'] not in self.profile.specification_data.keys(): if field['defaultValue'] in extra_data: self.profile.specification_data[field['key']] = extra_data[field['defaultValue']] continue self.profile.specification_data[field['key']] = field['defaultValue'] self.profile.save(update_fields=['specification_data']) self.save()
def get_specification_data(self, obj): data = obj.specification_data request = self.context.get('request') if request: sa = SubmissionAgreement.objects.filter( pk=request.GET.get('sa')).first() ip = InformationPackage.objects.filter( pk=request.GET.get('ip')).first() if not sa and ip: sa = ip.submission_agreement data = fill_specification_data(data=data, sa=sa, ip=ip) return data
def get_data(self, obj): if obj.data is not None: serializer = ProfileIPDataSerializer(obj.data, context={'request': self.context['request']}) data = serializer.data else: data = {'data': {}} data['data'].update(obj.get_related_profile_data(original_keys=True)) extra_data = fill_specification_data(ip=obj.ip, sa=obj.ip.submission_agreement) for field in obj.profile.template: if field['key'] in extra_data: data['data'][field['key']] = extra_data[field['key']] return data
def GenerateXML(self, filesToCreate=None, folderToParse=None, extra_paths_to_parse=None, parsed_files=None, algorithm='SHA-256'): """ Generates the XML using the specified data and folder, and adds the XML to the specified files """ if filesToCreate is None: filesToCreate = {} if extra_paths_to_parse is None: extra_paths_to_parse = [] if parsed_files is None: parsed_files = [] ip = InformationPackage.objects.filter(pk=self.ip).first() sa = None allow_unknown_file_types = False allow_encrypted_files = False if ip is not None: sa = ip.submission_agreement allow_unknown_file_types = ip.get_allow_unknown_file_types() allow_encrypted_files = ip.get_allow_encrypted_files() for _, v in filesToCreate.items(): v['data'] = fill_specification_data(v['data'], ip=ip, sa=sa) generator = XMLGenerator( allow_unknown_file_types=allow_unknown_file_types, allow_encrypted_files=allow_encrypted_files, ) generator.generate( filesToCreate, folderToParse=folderToParse, extra_paths_to_parse=extra_paths_to_parse, parsed_files=parsed_files, algorithm=algorithm, ) if filesToCreate is None: filesToCreate = {} msg = "Generated %s" % ", ".join(filesToCreate.keys()) self.create_success_event(msg)
def validate(self, data): relation = data['relation'] instance_data = data.get('data', {}) if self.instance is None and relation.data is not None and instance_data == relation.data.data: raise serializers.ValidationError('No changes made') filtered_data = {} extra_data = fill_specification_data( ip=relation.ip, sa=relation.ip.submission_agreement) for k, v in instance_data.items(): if k not in extra_data: filtered_data[k] = v validate_template(relation.profile.template, filtered_data) data['data'] = filtered_data return data
def run(self): ip = self.get_information_package() info = { 'FIDType': "UUID", 'FID': ip.object_identifier_value, 'FFormatName': ip.get_container_format().upper(), 'FLocationType': 'URI', 'FName': ip.object_path, } spec = get_premis_ip_object_element_spec() info = fill_specification_data(info, ip=ip) xmlfile = os.path.join(ip.object_path, ip.get_events_file_path()) generator = XMLGenerator(filepath=xmlfile) target = generator.find_element('premis') generator.insert_from_specification(target, spec, data=info, index=0) generator.write(xmlfile)
def generate_premis(ip): premis_profile_rel = ip.get_profile_rel('preservation_metadata') premis_profile_data = ip.get_profile_data('preservation_metadata') data = fill_specification_data(premis_profile_data, ip=ip) premis_path = parseContent(ip.get_premis_file_path(), data) files_to_create = { premis_path: { 'spec': premis_profile_rel.profile.specification, 'data': data, } } algorithm = ip.get_checksum_algorithm() allow_unknown_file_types = ip.get_allow_unknown_file_types() allow_encrypted_files = ip.get_allow_encrypted_files() generator = XMLGenerator( allow_unknown_file_types=allow_unknown_file_types, allow_encrypted_files=allow_encrypted_files, ) generator.generate(files_to_create, folderToParse=ip.object_path, algorithm=algorithm)