def get_date_modified(directory): ''' Returns the date modified of a file in DD/MM/YYYY, which is the format used for the Object Entry register. yes, we should be using ISO8601 but we'll fix this later. ''' file_list = ififuncs.recursive_file_list(directory) # This will blindly use the first video file it encounters. # This is fine for this project as all the objects folders contain single files. extension = os.path.splitext(file_list[0])[1] return time.strftime('%m/%d/%Y', time.gmtime(os.path.getmtime( file_list[0]))), extension
def main(args_): # if multiple file are present, this script will treat them as a single # instantiation/representation and get aggregate metadata about the whole # package. For now, this will be a clumsy implementation - the first file # will provide most metadata. Things like duration/bitrate/filesize # will be calculated as a whole. # Although another way would be that every call is looped, and if # this could catch files that should not be in the package, eg. a 4:2:2 # file in a 4:2:0 package.. # yup - do it that way! args = parse_args(args_) all_files = ififuncs.recursive_file_list(args.input) silence = True if args.user: user = args.user else: user = ififuncs.get_user() for dirs in os.listdir(args.input): if ififuncs.validate_uuid4(dirs) is None: instantiationIdentif = dirs Accession_Number = get_accession_number(args.input) Reference_Number = get_reference_number(args.input) if args.p: for root, _, filenames in os.walk(args.input): if os.path.basename(root) == 'metadata': metadata_dir = root elif os.path.basename(root) == 'logs': logs_dir = root csv_filename = os.path.join(metadata_dir, Accession_Number + '.csv') sipcreator_log = os.path.join(logs_dir, instantiationIdentif + '_sip_log.log') ififuncs.generate_log(sipcreator_log, 'EVENT = makepbcore.py started') ififuncs.generate_log( sipcreator_log, 'eventDetail=makepbcore.py %s' % ififuncs.get_script_version('makepbcore.py')) ififuncs.generate_log(sipcreator_log, 'Command line arguments: %s' % args) ififuncs.generate_log(sipcreator_log, 'EVENT = agentName=%s' % user) else: csv_filename = 'blaa.csv' print ' - Metadata will be stored in %s' % csv_filename for filenames in os.listdir(args.input): if '_manifest.md5' in filenames: md5_manifest = os.path.join(args.input, filenames) elif 'manifest-sha512.txt' in filenames: sha512_manifest = os.path.join(args.input, filenames) make_csv(csv_filename) ms = 0 FrameCount = 0 instantFileSize = 0 instantFileSize_gigs = 0 for source in all_files: metadata = subprocess.check_output( ['mediainfo', '--Output=PBCore2', source]) root = etree.fromstring(metadata) print(' - Analysing %s') % source pbcore_namespace = root.xpath('namespace-uri(.)') track_type = root.xpath('//ns:essenceTrackType', namespaces={'ns': pbcore_namespace}) if len(track_type) > 0: for track in track_type: if track.text == 'Video': essenceTrackEncodvid = get_metadata( "ns:essenceTrackEncoding", track.getparent(), pbcore_namespace) vcodec_attributes = get_attributes(track.getparent(), pbcore_namespace) elif track.text == 'Audio': silence = False essenceTrackEncod_au = get_metadata( "ns:essenceTrackEncoding", track.getparent(), pbcore_namespace) acodec_attributes = get_attributes(track.getparent(), pbcore_namespace) ScanType = get_metadata( "//ns:essenceTrackAnnotation[@annotationType='ScanType']", root, pbcore_namespace) matrix_coefficients = get_metadata( "//ns:essenceTrackAnnotation[@annotationType='matrix_coefficients']", root, pbcore_namespace) transfer_characteris = get_metadata( "//ns:essenceTrackAnnotation[@annotationType='transfer_characteristics']", root, pbcore_namespace) colour_primaries = get_metadata( "//ns:essenceTrackAnnotation[@annotationType='color_primaries']", root, pbcore_namespace) FrameCount += int( get_metadata( "//ns:essenceTrackAnnotation[@annotationType='FrameCount']", root, pbcore_namespace)) instantFileSize += int( get_metadata("//ns:instantiationFileSize", root, pbcore_namespace)) instantDataRate = round( float( ififuncs.get_mediainfo('OverallBitRate', '--inform=General;%OverallBitRate%', source)) / 1000 / 1000, 2) ms += ififuncs.get_milliseconds(source) ColorSpace = get_metadata( "//ns:essenceTrackAnnotation[@annotationType='ColorSpace']", root, pbcore_namespace) ChromaSubsampling = get_metadata( "//ns:essenceTrackAnnotation[@annotationType='ChromaSubsampling']", root, pbcore_namespace) instantMediaty = get_metadata("//ns:instantiationMediaType", root, pbcore_namespace) essenceFrameSize = get_metadata("//ns:essenceTrackFrameSize", root, pbcore_namespace) essenceAspectRatio = get_metadata("//ns:essenceTrackAspectRatio", root, pbcore_namespace) PixelAspectRatio = get_metadata( "//ns:essenceTrackAnnotation[@annotationType='PixelAspectRatio']", root, pbcore_namespace) instantiationStandar = get_metadata( "//ns:instantiationAnnotation[@annotationType='Format']", root, pbcore_namespace) essenceFrameRate = get_metadata("//ns:essenceTrackFrameRate", root, pbcore_namespace) essenceTrackSampling = get_metadata("//ns:essenceTrackSamplingRate", root, pbcore_namespace) Interlacement = get_metadata( "//ns:instantiationAnnotation[@annotationType='Interlacement']", root, pbcore_namespace) Compression_Mode = get_metadata( "//ns:instantiationAnnotation[@annotationType='Compression_Mode']", root, pbcore_namespace) instantiationDate_modified = get_metadata( "//ns:instantiationDate[@dateType='file modification']", root, pbcore_namespace) pix_fmt = ififuncs.get_ffmpeg_fmt(source, 'video') audio_fmt = ififuncs.get_ffmpeg_fmt(source, 'audio') if not silence: audio_codecid = acodec_attributes['ref'] essenceBitDepth_au = ififuncs.get_mediainfo( 'duration', '--inform=Audio;%BitDepth%', source) else: audio_codecid = 'n/a' essenceBitDepth_au = 'n/a' essenceTrackEncod_au = 'n/a' video_codecid = vcodec_attributes['ref'] try: video_codec_version = vcodec_attributes['version'] except KeyError: video_codec_version = 'n/a' try: video_codec_profile = vcodec_attributes['annotation'][8:] except KeyError: video_codec_profile = 'n/a' tc = ififuncs.convert_millis(ms) instantiationDuratio = ififuncs.convert_timecode(25, tc) Donor = '' Edited_By = user Date_Created = '' Date_Last_Modified = '' Film_Or_Tape = 'Digital File' Date_Of_Donation = '' Habitat = '' Type_Of_Deposit = '' Depositor_Reference = '' Master_Viewing = 'Preservation Master' Language_Version = '' Condition_Rating = '' Companion_Elements = '' EditedNew = user FIO = 'In' CollectionTitle = '' Created_By = user instantiationDimensi = '' instantiationLocatio = '' instantTimeStart = '' instantFileSize_gigs = round( float(instantFileSize) / 1024 / 1024 / 1024, 3) instantColors = '' instantLanguage = '' instantAltMo = 'n/a' essenceBitDepth_vid = ififuncs.get_mediainfo('duration', '--inform=Video;%BitDepth%', source) instantiationChanCon = '' ififuncs.append_csv(csv_filename, [ Reference_Number, Donor, Edited_By, Date_Created, Date_Last_Modified, Film_Or_Tape, Date_Of_Donation, Accession_Number, Habitat, Type_Of_Deposit, Depositor_Reference, Master_Viewing, Language_Version, Condition_Rating, Companion_Elements, EditedNew, FIO, CollectionTitle, Created_By, instantiationIdentif, instantiationDate_modified, instantiationDimensi, instantiationStandar, instantiationLocatio, instantMediaty, instantFileSize, instantFileSize_gigs, instantTimeStart, instantDataRate, instantColors, instantLanguage, instantAltMo, essenceTrackEncodvid, essenceFrameRate, essenceTrackSampling, essenceBitDepth_vid, essenceFrameSize, essenceAspectRatio, essenceTrackEncod_au, essenceBitDepth_au, instantiationDuratio, instantiationChanCon, PixelAspectRatio, FrameCount, ColorSpace, ChromaSubsampling, ScanType, Interlacement, Compression_Mode, colour_primaries, transfer_characteris, matrix_coefficients, pix_fmt, audio_fmt, audio_codecid, video_codecid, video_codec_version, video_codec_profile ]) if args.p: ififuncs.generate_log( sipcreator_log, 'EVENT = Metadata extraction - eventDetail=Technical record creation using PBCore, eventOutcome=%s, agentName=makepbcore' % (csv_filename)) ififuncs.generate_log(sipcreator_log, 'EVENT = makepbcore.py finished') ififuncs.checksum_replace(md5_manifest, sipcreator_log, 'md5') ififuncs.checksum_replace(sha512_manifest, sipcreator_log, 'sha512') ififuncs.manifest_update(md5_manifest, csv_filename) print ' - Updating %s with %s' % (md5_manifest, csv_filename) ififuncs.sha512_update(sha512_manifest, csv_filename) print ' - Updating %s with %s' % (sha512_manifest, csv_filename)
def main(args_): # if multiple file are present, this script will treat them as a single # instantiation/representation and get aggregate metadata about the whole # package. For now, this will be a clumsy implementation - the first file # will provide most metadata. Things like duration/bitrate/filesize # will be calculated as a whole. # Although another way would be that every call is looped, and if # this could catch files that should not be in the package, eg. a 4:2:2 # file in a 4:2:0 package.. # yup - do it that way! args = parse_args(args_) all_files = ififuncs.recursive_file_list(args.input) silence = True if args.user: user = args.user else: user = ififuncs.get_user() acquisition_type = '' if args.acquisition_type: acquisition_type = ififuncs.get_acquisition_type( args.acquisition_type)[0] for dirs in os.listdir(args.input): if ififuncs.validate_uuid4(dirs) is None: instantiationIdentif = dirs Accession_Number = get_accession_number(args.input) if args.reference: Reference_Number = args.reference.upper() else: Reference_Number = get_reference_number(args.input) if args.p: for root, _, filenames in os.walk(args.input): if os.path.basename(root) == 'metadata': metadata_dir = root elif os.path.basename(root) == 'logs': logs_dir = root csv_filename = os.path.join(metadata_dir, Accession_Number + '_pbcore.csv') sipcreator_log = os.path.join(logs_dir, instantiationIdentif + '_sip_log.log') ififuncs.generate_log(sipcreator_log, 'EVENT = makepbcore.py started') ififuncs.generate_log( sipcreator_log, 'eventDetail=makepbcore.py %s' % ififuncs.get_script_version('makepbcore.py')) ififuncs.generate_log(sipcreator_log, 'Command line arguments: %s' % args) ififuncs.generate_log(sipcreator_log, 'EVENT = agentName=%s' % user) else: csv_filename = 'blaa.csv' print ' - Metadata will be stored in %s' % csv_filename for filenames in os.listdir(args.input): if '_manifest.md5' in filenames: md5_manifest = os.path.join(args.input, filenames) elif 'manifest-sha512.txt' in filenames: sha512_manifest = os.path.join(args.input, filenames) make_csv(csv_filename) ms = 0 FrameCount = 0 instantFileSize_byte = 0 instantFileSize_gigs = 0 scan_types = [] matrix_list = [] transfer_list = [] colour_primaries_list = [] color_spaces = [] chroma = [] frame_sizes = [] par_list = [] container_list = [] fps_list = [] sample_rate_list = [] track_count_list = [] interlace_list = [] compression_list = [] pix_fmt_list = [] audio_fmt_list = [] audio_codecid_list = [] audio_codec_list = [] au_bitdepth_list = [] video_codecid_list = [] video_codec_version_list = [] video_codec_profile_list = [] timecode_list = [] channels_list = [] for source in all_files: metadata = subprocess.check_output( ['mediainfo', '--Output=PBCore2', source]) root = etree.fromstring(metadata) print(' - Analysing %s') % source pbcore_namespace = root.xpath('namespace-uri(.)') track_type = root.xpath('//ns:essenceTrackType', namespaces={'ns': pbcore_namespace}) if len(track_type) > 0: for track in track_type: if track.text == 'Video': essenceTrackEncodvid = get_metadata( "ns:essenceTrackEncoding", track.getparent(), pbcore_namespace) vcodec_attributes = get_attributes(track.getparent(), pbcore_namespace) elif track.text == 'Audio': silence = False essenceTrackEncod_au = get_metadata( "//ns:essenceTrackEncoding", track.getparent(), pbcore_namespace) audio_codec_list.append(essenceTrackEncod_au) acodec_attributes = get_attributes(track.getparent(), pbcore_namespace) try: audio_codecid = acodec_attributes['ref'] except KeyError: audio_codecid = 'n/a' essenceTrackSampling = ififuncs.get_mediainfo( 'samplerate', '--inform=Audio;%SamplingRate_String%', source) sample_rate_list.append(essenceTrackSampling) essenceBitDepth_au = get_metadata( "//ns:essenceTrackBitDepth", root, pbcore_namespace) audio_codecid_list.append(audio_codecid) au_bitdepth_list.append(essenceBitDepth_au) channels = get_metadata( "//ns:essenceTrackAnnotation[@annotationType='Channel(s)']", track.getparent(), pbcore_namespace) channels_list.append(channels) ScanType = get_metadata( "//ns:essenceTrackAnnotation[@annotationType='ScanType']", root, pbcore_namespace) scan_types.append(ScanType) matrix_coefficients = get_metadata( "//ns:essenceTrackAnnotation[@annotationType='matrix_coefficients']", root, pbcore_namespace) timecode_source, starting_timecode = get_timecode( pbcore_namespace, root, source) timecode_list.append(starting_timecode) matrix_list.append(matrix_coefficients) transfer_characteris = get_metadata( "//ns:essenceTrackAnnotation[@annotationType='transfer_characteristics']", root, pbcore_namespace) transfer_list.append(transfer_characteris) colour_primaries = get_metadata( "//ns:essenceTrackAnnotation[@annotationType='color_primaries']", root, pbcore_namespace) colour_primaries_list.append(colour_primaries) FrameCount += int( get_metadata( "//ns:essenceTrackAnnotation[@annotationType='FrameCount']", root, pbcore_namespace)) instantFileSize_byte += int( get_metadata("//ns:instantiationFileSize", root, pbcore_namespace)) instantDataRate = round( float( ififuncs.get_mediainfo('OverallBitRate', '--inform=General;%OverallBitRate%', source)) / 1000 / 1000, 2) instantTracks = ififuncs.get_number_of_tracks(source) track_count_list.append(instantTracks) ms += ififuncs.get_milliseconds(source) ColorSpace = get_metadata( "//ns:essenceTrackAnnotation[@annotationType='ColorSpace']", root, pbcore_namespace) color_spaces.append(ColorSpace) ChromaSubsampling = get_metadata( "//ns:essenceTrackAnnotation[@annotationType='ChromaSubsampling']", root, pbcore_namespace) chroma.append(ChromaSubsampling) instantMediaty = get_metadata("//ns:instantiationMediaType", root, pbcore_namespace) essenceFrameSize = get_metadata("//ns:essenceTrackFrameSize", root, pbcore_namespace) frame_sizes.append(essenceFrameSize) PixelAspectRatio = get_metadata( "//ns:essenceTrackAnnotation[@annotationType='PixelAspectRatio']", root, pbcore_namespace) par_list.append(PixelAspectRatio) instantiationStandar = get_metadata( "//ns:instantiationAnnotation[@annotationType='Format']", root, pbcore_namespace) container_list.append(instantiationStandar) essenceFrameRate = get_metadata("//ns:essenceTrackFrameRate", root, pbcore_namespace) fps_list.append(essenceFrameRate) essenceAspectRatio = ififuncs.get_mediainfo( 'DAR', '--inform=Video;%DisplayAspectRatio_String%', source) Interlacement = get_metadata( "//ns:instantiationAnnotation[@annotationType='Interlacement']", root, pbcore_namespace) interlace_list.append(Interlacement) Compression_Mode = get_metadata( "//ns:instantiationAnnotation[@annotationType='Compression_Mode']", root, pbcore_namespace) colour_range = get_metadata( "//ns:essenceTrackAnnotation[@annotationType='colour_range']", root, pbcore_namespace) format_version = get_metadata( "//ns:instantiationAnnotation[@annotationType='Format_Version']", root, pbcore_namespace) app_company_name = get_metadata( "//ns:instantiationAnnotation[@annotationType='Encoded_Application_CompanyName']", root, pbcore_namespace) app_name = get_metadata( "//ns:instantiationAnnotation[@annotationType='Encoded_Application_Name']", root, pbcore_namespace) app_version = get_metadata( "//ns:instantiationAnnotation[@annotationType='Encoded_Application_Version']", root, pbcore_namespace) library_name = get_metadata( "//ns:instantiationAnnotation[@annotationType='Encoded_Library_Name']", root, pbcore_namespace) library_version = get_metadata( "//ns:instantiationAnnotation[@annotationType='Encoded_Library_Version']", root, pbcore_namespace) compression_list.append(Compression_Mode) instantiationDate_mo = get_metadata( "//ns:instantiationDate[@dateType='file modification']", root, pbcore_namespace) instantDate_other = 'n/a' instantDate_type = 'n/a' pix_fmt = ififuncs.get_ffmpeg_fmt(source, 'video') pix_fmt_list.append(pix_fmt) audio_fmt = ififuncs.get_ffmpeg_fmt(source, 'audio') audio_fmt_list.append(audio_fmt) if silence: audio_codecid = 'n/a' essenceBitDepth_au = 'n/a' essenceTrackEncod_au = 'n/a' essenceTrackSampling = 'n/a' video_codecid = vcodec_attributes['ref'] video_codecid_list.append(video_codecid) try: video_codec_version = vcodec_attributes['version'] except KeyError: video_codec_version = 'n/a' try: video_codec_profile = vcodec_attributes['annotation'][8:] except KeyError: video_codec_profile = 'n/a' video_codec_version_list.append(video_codec_version) video_codec_profile_list.append(video_codec_profile) metadata_error = '' metadata_list = [ scan_types, matrix_list, transfer_list, colour_primaries_list, color_spaces, chroma, frame_sizes, par_list, container_list, fps_list, sample_rate_list, track_count_list, interlace_list, compression_list, pix_fmt_list, audio_fmt_list, audio_codecid_list, audio_codec_list, au_bitdepth_list, video_codecid_list, video_codec_version_list, video_codec_profile_list, channels_list, timecode_list ] for i in metadata_list: if len(set(i)) > 1: metadata_error += 'WARNING - Your metadata values are not the same for all files: %s\n' % set( i) print metadata_error if args.p: ififuncs.generate_log( sipcreator_log, 'EVENT = Metadata mismatch - Your metadata values are not the same for all files: %s' % set(i)) tc = ififuncs.convert_millis(ms) instantiationDuratio = ififuncs.convert_timecode(25, tc) if args.donor: Donor = args.donor else: Donor = '' Edited_By = user Date_Created = '' Date_Last_Modified = '' Film_Or_Tape = 'Digital AV Object' Date_Of_Donation = '' reproduction_creator = '' if args.acquisition_type: if acquisition_type == 'Reproduction': Date_Of_Donation = instantiationDate_mo.split('T')[0] # if a reproduction, then there's no Donor/transfer of title. Donor = 'n/a' if ififuncs.find_concat_user( sipcreator_log) == 'Aoife Fitzmaurice': reproduction_creator = 'Aoife Fitzmaurice (2016)' elif ififuncs.find_concat_user( sipcreator_log) == 'Kieran O\'Leary': reproduction_creator = 'Kieran O\'Leary (2013)' Habitat = '' backup_habitat = '' Type_Of_Deposit = acquisition_type if args.depositor_reference: Depositor_Reference = args.depositor_reference else: Depositor_Reference = '' Master_Viewing = 'Preservation Object' Language_Version = '' Condition_Rating = '' Companion_Elements = '' TTape_Origin = args.parent EditedNew = user FIO = 'In' CollectionTitle = '' Created_By = user instantTimeStart = 'n/a' instantFileSize_gigs = round( float(instantFileSize_byte) / 1024 / 1024 / 1024, 3) instantColors = 'n/a' instantLanguage = 'n/a' instantAltMo = 'n/a' essenceBitDepth_vid = ififuncs.get_mediainfo('duration', '--inform=Video;%BitDepth%', source) instantiationChanCon = 'n/a' colour_range = colour_range format_version = format_version TimeCode_FirstFrame = process_mixed_values(timecode_list) TimeCode_Source = timecode_source reproduction_reason = '' dig_object_descrip = ififuncs.get_digital_object_descriptor(args.input) ififuncs.append_csv(csv_filename, [ Reference_Number, Donor, Edited_By, Date_Created, Date_Last_Modified, Film_Or_Tape, Date_Of_Donation, Accession_Number, Habitat, backup_habitat, TTape_Origin, Type_Of_Deposit, Depositor_Reference, Master_Viewing, Language_Version, Condition_Rating, Companion_Elements, EditedNew, FIO, CollectionTitle, Created_By, instantiationIdentif, instantDate_other, instantDate_type, instantiationDate_mo, instantiationStandar, instantMediaty, instantFileSize_byte, instantFileSize_gigs, instantTimeStart, instantDataRate, instantTracks, instantColors, instantLanguage, instantAltMo, essenceTrackEncodvid, essenceFrameRate, essenceTrackSampling, essenceBitDepth_vid, essenceFrameSize, essenceAspectRatio, essenceTrackEncod_au, essenceBitDepth_au, instantiationDuratio, instantiationChanCon, PixelAspectRatio, FrameCount, ColorSpace, ChromaSubsampling, ScanType, Interlacement, Compression_Mode, colour_primaries, transfer_characteris, matrix_coefficients, pix_fmt, audio_fmt, audio_codecid, video_codecid, video_codec_version, video_codec_profile, channels, colour_range, format_version, TimeCode_FirstFrame, TimeCode_Source, app_company_name, app_name, app_version, library_name, library_version, reproduction_creator, reproduction_reason, dig_object_descrip, ]) if args.p: ififuncs.generate_log( sipcreator_log, 'EVENT = Metadata extraction - eventDetail=Technical record creation using PBCore, eventOutcome=%s, agentName=makepbcore' % (csv_filename)) ififuncs.generate_log(sipcreator_log, 'EVENT = makepbcore.py finished') ififuncs.checksum_replace(md5_manifest, sipcreator_log, 'md5') ififuncs.checksum_replace(sha512_manifest, sipcreator_log, 'sha512') ififuncs.manifest_update(md5_manifest, csv_filename) print ' - Updating %s with %s' % (md5_manifest, csv_filename) ififuncs.sha512_update(sha512_manifest, csv_filename) print ' - Updating %s with %s' % (sha512_manifest, csv_filename) print metadata_error
def main(args_): # if multiple file are present, this script will treat them as a single # instantiation/representation and get aggregate metadata about the whole # package. For now, this will be a clumsy implementation - the first file # will provide most metadata. Things like duration/bitrate/filesize # will be calculated as a whole. # Although another way would be that every call is looped, and if # this could catch files that should not be in the package, eg. a 4:2:2 # file in a 4:2:0 package.. # yup - do it that way! args = parse_args(args_) all_files = ififuncs.recursive_file_list(args.input) silence = True audio_only = True if args.user: user = args.user else: user = ififuncs.get_user() acquisition_type = '' if args.acquisition_type: acquisition_type = ififuncs.get_acquisition_type( args.acquisition_type)[0] instantiationIdentif = '' for dirs in os.listdir(args.input): if ififuncs.validate_uuid4(dirs) is None: instantiationIdentif = dirs Accession_Number = get_accession_number(args.input) if args.reference: Reference_Number = args.reference.upper() else: Reference_Number = get_reference_number(args.input) if args.p: for root, _, filenames in os.walk(args.input): if os.path.basename(root) == 'metadata': metadata_dir = root elif os.path.basename(root) == 'logs': logs_dir = root csv_filename = os.path.join( metadata_dir, Accession_Number + '_%s_pbcore.csv' % Reference_Number) sipcreator_log = os.path.join(logs_dir, instantiationIdentif + '_sip_log.log') ififuncs.generate_log(sipcreator_log, 'EVENT = makepbcore.py started') ififuncs.generate_log( sipcreator_log, 'eventDetail=makepbcore.py %s' % ififuncs.get_script_version('makepbcore.py')) ififuncs.generate_log(sipcreator_log, 'Command line arguments: %s' % args) ififuncs.generate_log(sipcreator_log, 'EVENT = agentName=%s' % user) else: csv_filename = 'blaa.csv' print((' - Metadata will be stored in %s' % csv_filename)) for filenames in os.listdir(args.input): if '_manifest.md5' in filenames: md5_manifest = os.path.join(args.input, filenames) elif 'manifest-sha512.txt' in filenames: sha512_manifest = os.path.join(args.input, filenames) make_csv(csv_filename) ms = 0 FrameCount = 0 instantFileSize_byte = 0 instantFileSize_gigs = 0 scan_types = [] matrix_list = [] transfer_list = [] colour_primaries_list = [] color_spaces = [] chroma = [] frame_sizes = [] par_list = [] container_list = [] fps_list = [] sample_rate_list = [] track_count_list = [] interlace_list = [] compression_list = [] pix_fmt_list = [] audio_fmt_list = [] audio_codecid_list = [] audio_codec_list = [] au_bitdepth_list = [] video_codecid_list = [] video_codec_version_list = [] video_codec_profile_list = [] timecode_list = [] channels_list = [] stl = False subtitle_check = ififuncs.get_digital_object_descriptor(args.input) if 'STL' in subtitle_check: stl = True for source in all_files: metadata = subprocess.check_output( ['mediainfo', '--Output=PBCore2', source]) new_metadata = subprocess.check_output( ['mediainfo', '--Output=XML', source]) try: root = etree.fromstring(metadata) new_root = etree.fromstring(new_metadata) except lxml.etree.XMLSyntaxError: print('Windows encoding detected - transforming into utf-8') root = etree.fromstring(metadata.decode('cp1252').encode('utf-8')) new_root = etree.fromstring( new_metadata.decode('cp1252').encode('utf-8')) print(((' - Analysing %s') % source)) pbcore_namespace = root.xpath('namespace-uri(.)') mediainfo_namespace = new_root.xpath('namespace-uri(.)') track_type = root.xpath('//ns:essenceTrackType', namespaces={'ns': pbcore_namespace}) new_track_type = new_root.xpath('//ns:track', namespaces={'ns': mediainfo_namespace}) if len(new_track_type) > 0: for track in new_track_type: if track.attrib['type'] == 'Video': audio_only = False essenceTrackEncodvid = ififuncs.get_metadata( "ns:Format", track, mediainfo_namespace) #vcodec_attributes = get_attributes(track.getparent(), pbcore_namespace) #vcodec_attributes = 'TODO' video_codecid = ififuncs.get_metadata( "ns:CodecID", track, mediainfo_namespace) video_codec_version = ififuncs.get_metadata( "ns:Format_Version", track, mediainfo_namespace) video_codec_profile = ififuncs.get_metadata( "ns:Format_Profile", track, mediainfo_namespace) video_codec_version_list.append(video_codec_version) video_codec_profile_list.append(video_codec_profile) elif track.attrib['type'] == 'Audio': silence = False essenceTrackEncod_au = ififuncs.get_metadata( "ns:Format", track, mediainfo_namespace) audio_codec_list.append(essenceTrackEncod_au) #acodec_attributes = get_attributes(track.getparent(), pbcore_namespace) audio_codecid = ififuncs.get_metadata( "ns:CodecID", track, mediainfo_namespace) essenceTrackSampling = ififuncs.get_mediainfo( 'samplerate', '--inform=Audio;%SamplingRate_String%', source) sample_rate_list.append(essenceTrackSampling) essenceBitDepth_au = ififuncs.get_metadata( "ns:BitDepth", track, mediainfo_namespace) audio_codecid_list.append(audio_codecid) au_bitdepth_list.append(essenceBitDepth_au) channels = ififuncs.get_metadata("//ns:Channels", track, mediainfo_namespace) channels_list.append(channels) if audio_only: essenceTrackEncodvid = 'n/a' video_codecid = 'n/a' video_codec_version = 'n/a' video_codec_profile = 'n/a' ScanType = ififuncs.get_metadata("//ns:ScanType", new_root, mediainfo_namespace) scan_types.append(ScanType) matrix_coefficients = ififuncs.get_metadata("//ns:matrix_coefficients", new_root, mediainfo_namespace) timecode_source, starting_timecode = get_timecode( pbcore_namespace, root, source) timecode_list.append(starting_timecode) matrix_list.append(matrix_coefficients) transfer_characteris = ififuncs.get_metadata( "//ns:transfer_characteristics", new_root, mediainfo_namespace) transfer_list.append(transfer_characteris) colour_primaries = ififuncs.get_metadata("//ns:colour_primaries", new_root, mediainfo_namespace) colour_primaries_list.append(colour_primaries) try: if audio_only: FrameCount = 'n/a' else: # increment if multiple objects are present try: FrameCount += int( ififuncs.get_metadata("//ns:FrameCount", new_root, mediainfo_namespace)) except ValueError: # don't increment if multiple values are returned as str FrameCount = ififuncs.get_metadata("//ns:FrameCount", new_root, mediainfo_namespace) except TypeError: # workaround for silent pic in DCP FrameCount = 'n/a' instantFileSize_byte += int( ififuncs.get_metadata("//ns:FileSize", new_root, mediainfo_namespace)) instantDataRate = round( float( ififuncs.get_mediainfo('OverallBitRate', '--inform=General;%OverallBitRate%', source)) / 1000 / 1000, 2) instantTracks = ififuncs.get_number_of_tracks(source) track_count_list.append(instantTracks) if stl is True: track_count_list.append('STL sidecar') ms += ififuncs.get_milliseconds(source) ColorSpace = ififuncs.get_metadata("//ns:ColorSpace", new_root, mediainfo_namespace) color_spaces.append(ColorSpace) ChromaSubsampling = get_metadata("//ns:ChromaSubsampling", new_root, mediainfo_namespace) chroma.append(ChromaSubsampling) instantMediaty = get_metadata("//ns:instantiationMediaType", root, pbcore_namespace) if audio_only: essenceFrameSize = 'n/a' else: essenceFrameSize = get_metadata("//ns:essenceTrackFrameSize", root, pbcore_namespace) frame_sizes.append(essenceFrameSize) PixelAspectRatio = ififuncs.get_metadata("//ns:PixelAspectRatio", new_root, mediainfo_namespace) par_list.append(PixelAspectRatio) general_root = new_root.xpath("//ns:track[@type='General']", namespaces={'ns': mediainfo_namespace})[0] instantiationStandar = ififuncs.get_metadata("ns:Format", general_root, mediainfo_namespace) container_list.append(instantiationStandar) essenceFrameRate = ififuncs.get_metadata("//ns:FrameRate", new_root, mediainfo_namespace) fps_list.append(essenceFrameRate) essenceAspectRatio = ififuncs.get_mediainfo( 'DAR', '--inform=Video;%DisplayAspectRatio_String%', source) Interlacement = ififuncs.get_metadata("//ns:ScanOrder", new_root, mediainfo_namespace) # FFV1/MKV seems to have this scanorder metadata here rather than Interlacement # FFV1/MKV is the only example I've seen so far that behaves like this :| # It could be that Interlacement is set at a codec level for FFV1, but others are # declared at the container level.. if Interlacement == 'n/a': Interlacement = get_metadata( "//ns:essenceTrackAnnotation[@annotationType='ScanOrder']", root, pbcore_namespace) interlace_list.append(Interlacement) Compression_Mode = ififuncs.get_metadata("//ns:Compression_Mode", new_root, mediainfo_namespace) colour_range = ififuncs.get_metadata("//ns:colour_range", new_root, mediainfo_namespace) # this needs to be clarified as it exists in general and codec format_version = ififuncs.get_metadata("ns:Format_Version", general_root, mediainfo_namespace) app_company_name = ififuncs.get_metadata( "//ns:Encoded_Application_CompanyName", new_root, mediainfo_namespace) app_name = ififuncs.get_metadata("//ns:Encoded_Application_Name", new_root, mediainfo_namespace) app_version = ififuncs.get_metadata("//ns:Encoded_Application_Version", new_root, mediainfo_namespace) library_name = ififuncs.get_metadata("//ns:Encoded_Library_Name", new_root, mediainfo_namespace) if library_name == 'n/a': library_name = ififuncs.get_metadata("//ns:Encoded_Library", general_root, mediainfo_namespace) library_version = ififuncs.get_metadata("//ns:Encoded_Library_Version", new_root, mediainfo_namespace) compression_list.append(Compression_Mode) instantiationDate_mo = get_metadata( "//ns:instantiationDate[@dateType='file modification']", root, pbcore_namespace) instantDate_other = 'n/a' instantDate_type = 'n/a' pix_fmt = ififuncs.get_ffmpeg_fmt(source, 'video') pix_fmt_list.append(pix_fmt) audio_fmt = ififuncs.get_ffmpeg_fmt(source, 'audio') audio_fmt_list.append(audio_fmt) essenceBitDepth_vid = ififuncs.get_mediainfo( 'duration', '--inform=Video;%BitDepth%', source) if silence: audio_codecid = 'n/a' essenceBitDepth_au = 'n/a' essenceTrackEncod_au = 'n/a' essenceTrackSampling = 'n/a' channels = 'n/a' ''' video_codecid = vcodec_attributes['ref'] video_codecid_list.append(video_codecid) try: video_codec_version = vcodec_attributes['version'] except KeyError: video_codec_version = 'n/a' try: video_codec_profile = vcodec_attributes['annotation'][8:] except KeyError: video_codec_profile = 'n/a' ''' metadata_error = '' metadata_list = [ scan_types, matrix_list, transfer_list, colour_primaries_list, color_spaces, chroma, frame_sizes, par_list, container_list, fps_list, sample_rate_list, track_count_list, interlace_list, compression_list, pix_fmt_list, audio_fmt_list, audio_codecid_list, audio_codec_list, au_bitdepth_list, video_codecid_list, video_codec_version_list, video_codec_profile_list, channels_list, timecode_list ] for i in metadata_list: if len(set(i)) > 1: metadata_error += 'WARNING - Your metadata values are not the same for all files - but this could be a false positive if dealing with atomised audio and video as with DCP: %s\n' % set( i) if args.p: ififuncs.generate_log( sipcreator_log, 'EVENT = Metadata mismatch - Your metadata values are not the same for all files - but this could be a false positive if dealing with atomised audio and video as with DCP: %s' % set(i)) print(metadata_error) tc = ififuncs.convert_millis(ms) instantiationDuratio = ififuncs.convert_timecode(25, tc) if args.donor: Donor = args.donor else: Donor = '' Edited_By = user Date_Created = '' Date_Last_Modified = '' Film_Or_Tape = 'Digital AV Object' Date_Of_Donation = '' if args.reproduction_creator: reproduction_creator = args.reproduction_creator else: reproduction_creator = '' if args.acquisition_type: if acquisition_type == 'Reproduction': Date_Of_Donation = instantiationDate_mo.split('T')[0] # if a reproduction, then there's no Donor/transfer of title. Donor = 'n/a' else: Date_Of_Donation = args.donation_date Habitat = '' backup_habitat = '' Type_Of_Deposit = acquisition_type if args.depositor_reference: Depositor_Reference = args.depositor_reference else: Depositor_Reference = '' Master_Viewing = 'Preservation Object' Language_Version = '' Condition_Rating = '' Companion_Elements = '' TTape_Origin = args.parent EditedNew = user FIO = 'In' CollectionTitle = '' Created_By = user instantTimeStart = 'n/a' instantFileSize_gigs = round( float(instantFileSize_byte) / 1024 / 1024 / 1024, 3) instantColors = 'n/a' instantLanguage = 'n/a' instantAltMo = 'n/a' instantiationChanCon = 'n/a' ''' no idea why these are here colour_range = colour_range format_version = format_version ''' TimeCode_FirstFrame = process_mixed_values(timecode_list) pix_fmt = process_mixed_values(pix_fmt_list) audio_fmt = process_mixed_values(audio_fmt_list) instantTracks = process_mixed_values(track_count_list) TimeCode_Source = timecode_source reproduction_reason = '' dig_object_descrip = ififuncs.get_digital_object_descriptor(args.input) if 'STL' in dig_object_descrip: dig_object_descrip = 'AS-11 package' dcp_check = ififuncs.find_cpl(args.input) if dcp_check is not None: essenceFrameSize, ChromaSubsampling, ColorSpace, FrameCount, essenceAspectRatio, instantiationDuratio, PixelAspectRatio, ScanType, dig_object_descrip, instantTracks, instantDataRate, essenceBitDepth_vid, instantMediaty = check_dcp( dcp_check) ififuncs.append_csv(csv_filename, [ Reference_Number, Donor, Edited_By, Date_Created, Date_Last_Modified, Film_Or_Tape, Date_Of_Donation, Accession_Number, Habitat, backup_habitat, TTape_Origin, Type_Of_Deposit, Depositor_Reference, Master_Viewing, Language_Version, Condition_Rating, Companion_Elements, EditedNew, FIO, CollectionTitle, Created_By, instantiationIdentif, instantDate_other, instantDate_type, instantiationDate_mo, instantiationStandar, instantMediaty, instantFileSize_byte, instantFileSize_gigs, instantTimeStart, instantDataRate, instantTracks, instantColors, instantLanguage, instantAltMo, essenceTrackEncodvid, essenceFrameRate, essenceTrackSampling, essenceBitDepth_vid, essenceFrameSize, essenceAspectRatio, essenceTrackEncod_au, essenceBitDepth_au, instantiationDuratio, instantiationChanCon, PixelAspectRatio, FrameCount, ColorSpace, ChromaSubsampling, ScanType, Interlacement, Compression_Mode, colour_primaries, transfer_characteris, matrix_coefficients, pix_fmt, audio_fmt, audio_codecid, video_codecid, video_codec_version, video_codec_profile, channels, colour_range, format_version, TimeCode_FirstFrame, TimeCode_Source, app_company_name, app_name, app_version, library_name, library_version, reproduction_creator, reproduction_reason, dig_object_descrip, ]) if args.p: ififuncs.generate_log( sipcreator_log, 'EVENT = Metadata extraction - eventDetail=Technical record creation using PBCore, eventOutcome=%s, agentName=makepbcore' % (csv_filename)) ififuncs.generate_log(sipcreator_log, 'EVENT = makepbcore.py finished') ififuncs.checksum_replace(md5_manifest, sipcreator_log, 'md5') ififuncs.checksum_replace(sha512_manifest, sipcreator_log, 'sha512') ififuncs.manifest_update(md5_manifest, csv_filename) print((' - Updating %s with %s' % (md5_manifest, csv_filename))) ififuncs.sha512_update(sha512_manifest, csv_filename) print((' - Updating %s with %s' % (sha512_manifest, csv_filename))) print(metadata_error)