def create_streams(streams, premis_file): """Create PREMIS objects for streams :streams: Stream dict :premis_file: Created PREMIS XML file for the digital object file """ if len(streams) < 2: return None premis_list = {} for index, stream in six.iteritems(streams): if stream['stream_type'] not in ['video', 'audio']: continue id_value = six.text_type(uuid4()) identifier = premis.identifier(identifier_type='UUID', identifier_value=id_value) premis_format_des = premis.format_designation(stream['mimetype'], stream['version']) premis_format = premis.format(child_elements=[premis_format_des]) premis_objchar = premis.object_characteristics( child_elements=[premis_format]) el_premis_object = premis.object(identifier, child_elements=[premis_objchar], bitstream=True) premis_list[index] = el_premis_object premis_file.append( premis.relationship('structural', 'includes', el_premis_object)) return premis_list
def create_premis_object(tree, fname, skip_inspection=None, format_name=None, format_version=None, digest_algorithm='MD5', message_digest=None, date_created=None, charset=None): """Create Premis object for given file.""" techmd = {} if not skip_inspection: for validator in iter_validators(fileinfo(fname)): validation_result = validator.result() if not validation_result['is_valid']: raise Exception('File %s is not valid: %s', fname, validation_result['errors']) techmd = validation_result['result'] if message_digest is None: message_digest = md5(fname) if digest_algorithm is None: digest_algorithm = 'MD5' if format_name is None: format_name = techmd['format']['mimetype'] if format_version is None and (techmd and 'version' in techmd['format']): format_version = techmd['format']['version'] if charset or (techmd and 'charset' in techmd['format']): format_name += '; charset=' + charset \ if charset else '; charset=' + techmd['format']['charset'] if date_created is None: date_created = creation_date(fname) premis_fixity = premis.fixity(message_digest, digest_algorithm) premis_format_des = premis.format_designation(format_name, format_version) premis_format = premis.format(child_elements=[premis_format_des]) premis_date_created = premis.date_created(date_created) premis_create = premis.creating_application( child_elements=[premis_date_created]) premis_objchar = premis.object_characteristics( child_elements=[premis_fixity, premis_format, premis_create]) # Create object element object_identifier = premis.identifier(identifier_type='UUID', identifier_value=str(uuid4())) el_premis_object = premis.object(object_identifier, child_elements=[premis_objchar]) tree.append(el_premis_object) return tree
def create_report_object(metadata_info, linking_sip_type, linking_sip_id): """Create premis element for digital object.""" dep_id = premis.identifier(metadata_info['object_id']['type'], metadata_info['object_id']['value'], prefix='dependency') dependency = premis.dependency(identifiers=[dep_id]) environ = premis.environment(child_elements=[dependency]) related_id = premis.identifier(identifier_type=linking_sip_type, identifier_value=linking_sip_id, prefix='object') related = premis.relationship(relationship_type='structural', relationship_subtype='is included in', related_object=related_id) object_id = premis.identifier('preservation-object-id', str(uuid.uuid4())) report_object = premis.object(object_id=object_id, original_name=metadata_info['filename'], child_elements=[environ, related], representation=True) return report_object
def create_premis_object(fname, scraper, file_format=None, checksum=None, date_created=None, charset=None, identifier=None, format_registry=None): """Create Premis object for given file.""" if scraper.info[0]['class'] == 'FileExists' and \ len(scraper.info[0]['errors']) > 0: raise IOError(scraper.info[0]['errors']) for _, info in six.iteritems(scraper.info): if info['class'] == 'ScraperNotFound': raise ValueError('File format is not supported.') if checksum in [None, ()]: message_digest = scraper.checksum(algorithm='md5') digest_algorithm = 'MD5' else: message_digest = checksum[1] digest_algorithm = checksum[0] if file_format in [None, ()]: format_name = scraper.mimetype # Set the default version for predefined mimetypes. format_version = DEFAULT_VERSIONS.get(format_name, None) # Scraper's version information will override the version # information if any is found. if scraper.version and scraper.version != UNKNOWN_VERSION: format_version = scraper.version # Case for unapplicable versions where version information don't exist. if format_version == NO_VERSION: format_version = '' else: format_name = file_format[0] format_version = file_format[1] if not charset and scraper.streams[0]['stream_type'] == 'text': charset = scraper.streams[0]['charset'] check_metadata(format_name, format_version, scraper.streams, fname) if charset: if charset not in ALLOWED_CHARSETS: raise ValueError('Invalid charset.') format_name += '; charset={}'.format(charset) if date_created is None: date_created = creation_date(fname) if identifier in [None, ()]: object_identifier = premis.identifier(identifier_type='UUID', identifier_value=six.text_type( uuid4())) else: object_identifier = premis.identifier(identifier_type=identifier[0], identifier_value=identifier[1]) premis_fixity = premis.fixity(message_digest, digest_algorithm) premis_format_des = premis.format_designation(format_name, format_version) if format_registry in [None, ()]: premis_format = premis.format(child_elements=[premis_format_des]) else: premis_registry = premis.format_registry(format_registry[0], format_registry[1]) premis_format = premis.format( child_elements=[premis_format_des, premis_registry]) premis_date_created = premis.date_created(date_created) premis_create = \ premis.creating_application(child_elements=[premis_date_created]) premis_objchar = premis.object_characteristics( child_elements=[premis_fixity, premis_format, premis_create]) # Create object element el_premis_object = premis.object(object_identifier, child_elements=[premis_objchar]) return el_premis_object
def create_premis_object(fname, streams, **attributes): """ Create Premis object for given file. :fname: File name of the digital object :streams: Streams from the Scraper :attributes: The following keys: charset: Character encoding of a file, file_format: File format and version (tuple) of a file, format_registry: Format registry name and value (tuple), identifier: File identifier type and value (tuple), checksum: Checksum algorithm and value (tuple), date_created: Creation date of a file :returns: PREMIS object as etree :raises: ValueError if character set is invalid for text files. """ attributes = _attribute_values(attributes) if not attributes["checksum"]: attributes["checksum"] = ("MD5", calc_checksum(fname)) date_created = attributes["date_created"] or creation_date(fname) if streams[0]['stream_type'] == 'text': charset = attributes["charset"] or streams[0]['charset'] else: charset = None if not attributes["file_format"]: if streams[0]["version"] and streams[0]["version"] != UNKNOWN_VERSION: format_version = '' if streams[0]["version"] == NO_VERSION else \ streams[0]["version"] else: format_version = DEFAULT_VERSIONS.get(streams[0]["mimetype"], None) file_format = (streams[0]["mimetype"], format_version) else: file_format = (attributes["file_format"][0], attributes["file_format"][1]) check_metadata(file_format[0], file_format[1], streams, fname) charset_mime = "" if charset: if charset not in ALLOWED_CHARSETS: raise ValueError('Invalid charset.') charset_mime = '; charset={}'.format(charset) if attributes["identifier"]: identifier_type = attributes["identifier"][0] identifier_value = attributes["identifier"][1] else: identifier_type = 'UUID' identifier_value = six.text_type(uuid4()) object_identifier = premis.identifier( identifier_type=identifier_type, identifier_value=identifier_value ) premis_fixity = premis.fixity(attributes["checksum"][1], attributes["checksum"][0]) premis_format_des = premis.format_designation( file_format[0] + charset_mime, file_format[1]) if not attributes["format_registry"]: premis_format = premis.format(child_elements=[premis_format_des]) else: premis_registry = premis.format_registry( attributes["format_registry"][0], attributes["format_registry"][1]) premis_format = premis.format(child_elements=[premis_format_des, premis_registry]) premis_date_created = premis.date_created(date_created) premis_create = \ premis.creating_application(child_elements=[premis_date_created]) premis_objchar = premis.object_characteristics( child_elements=[premis_fixity, premis_format, premis_create]) # Create object element el_premis_object = premis.object( object_identifier, child_elements=[premis_objchar]) return el_premis_object