def create_metadata(source): ''' Recursively finds image sequences and creates mediainfo/mediatrace in parent directory. ''' for root, _, _ in os.walk(source): os.chdir(root) tiff_check = glob('*.tiff') dpx_check = glob('*.dpx') tif_check = glob('*.tif') if len(dpx_check) > 0: images = dpx_check elif len(tiff_check) > 0: images = tiff_check elif len(tif_check) > 0: images = tif_check else: continue mediainfo_xml = '%s/%s_mediainfo.xml' % (os.path.dirname(root), images[0]) mediatrace_xml = '%s/%s_mediatrace.xml' % (os.path.dirname(root), images[0]) print 'Creating mediainfo XML for %s' % images[0] make_mediainfo(mediainfo_xml, 'mediaxmloutput', images[0]) print 'Creating mediatrace XML for %s' % images[0] make_mediatrace(mediatrace_xml, 'mediatracexmlinput', images[0])
def get_filenames(directory, log_filename_alteration): os.chdir(directory) tiff_check = glob('*.tiff') dpx_check = glob('*.dpx') if len(dpx_check) > 0: images = dpx_check elif len(tiff_check) > 0: images = tiff_check else: return 'none' images.sort() image_date_modified = get_date_modified(images[0]) mediainfo_xml = '%s/%s_mediainfo.xml' % (os.path.dirname(os.path.dirname(directory)) + '/metadata/image', images[0]) mediatrace_xml = '%s/%s_mediatrace.xml' % (os.path.dirname(os.path.dirname(directory)) + '/metadata/image', images[0]) if not os.path.isfile(mediainfo_xml): print 'Creating mediainfo XML for %s' % images[0] make_mediainfo(mediainfo_xml, 'mediaxmloutput', images[0]) if not os.path.isfile(mediatrace_xml): print 'Creating mediatrace XML for %s' % images[0] make_mediatrace(mediatrace_xml, 'mediatracexmlinput', images[0]) if '864000' in images[0]: start_number = '864000' elif len(images[0].split("_")[-1].split(".")) > 2: start_number = images[0].split("_")[-1].split(".")[1] else: start_number = images[0].split("_")[-1].split(".")[0] container = images[0].split(".")[-1] if len(images[0].split("_")[-1].split(".")) > 2: numberless_filename = images[0].split(".") else: numberless_filename = images[0].split("_")[0:-1] ffmpeg_friendly_name = '' counter = 0 if len(images[0].split("_")[-1].split(".")) > 2: numberless_filename = images[0].split(".")[0:-1] for i in numberless_filename[:-1]: ffmpeg_friendly_name += i + '.' print ffmpeg_friendly_name else: while counter <len(numberless_filename) : ffmpeg_friendly_name += numberless_filename[counter] + '_' counter += 1 image_seq_without_container = ffmpeg_friendly_name if len(images[0].split("_")[-1].split(".")) > 2: image_seq_without_container = ffmpeg_friendly_name[:-1] + ffmpeg_friendly_name[-1].replace('_', '.') start_number_length = len(start_number) number_regex = "%0" + str(start_number_length) + 'd.' ffmpeg_friendly_name += number_regex + '%s' % container info = [image_seq_without_container, start_number, container, image_date_modified] return info
def extract_provenance(filename, output_folder, output_uuid): ''' This will extract mediainfo and mediatrace XML ''' inputxml = "%s/%s_source_mediainfo.xml" % (output_folder, output_uuid) inputtracexml = "%s/%s_source_mediatrace.xml" % (output_folder, output_uuid) print(' - Generating mediainfo xml of input file and saving it in %s' % inputxml) ififuncs.make_mediainfo(inputxml, 'mediaxmlinput', filename) print(' - Generating mediatrace xml of input file and saving it in %s' % inputtracexml) ififuncs.make_mediatrace(inputtracexml, 'mediatracexmlinput', filename) return inputxml, inputtracexml
def make_sip(video_files): for filename in video_files: #loop all files in directory filenoext = os.path.splitext(filename)[0] # Generate new directory names metadata_dir = "%s/metadata" % filenoext log_dir = "%s/logs" % filenoext data_dir = "%s/objects" % filenoext # Actually create the directories. os.makedirs(metadata_dir) os.makedirs(data_dir) os.makedirs(log_dir) #Generate filenames for new files. inputxml = "%s/%s_mediainfo.xml" % ( metadata_dir, os.path.basename(filename) ) inputtracexml = "%s/%s_mediatrace.xml" % ( metadata_dir, os.path.basename(filename) ) fmd5 = "%s/%s.framemd5" % ( metadata_dir, os.path.basename(filename) ) log = "%s/%s_log.log" % (log_dir, filename) generate_log(log, 'Input = %s' % filename) fmd5_logfile = log_dir + '/%s_framemd5.log' % filename fmd5_env_dict = set_environment(fmd5_logfile) fmd5_command = [ 'ffmpeg', # Create decoded md5 checksums for every frame '-i', filename, '-report', '-f', 'framemd5', '-an', fmd5 ] print fmd5_command subprocess.call(fmd5_command, env=fmd5_env_dict) generate_log( log, 'makeffv1.py Framemd5 generation of output file completed' ) if os.path.basename(sys.argv[0]) == 'makeffv1.py': shutil.copy(sys.argv[0], log_dir) print 'Generating mediainfo xml of input file and saving it in %s' % inputxml make_mediainfo(inputxml, 'mediaxmlinput', filename) print 'Generating mediatrace xml of input file and saving it in %s' % inputtracexml make_mediatrace(inputtracexml, 'mediatracexmlinput', filename) source_parent_dir = os.path.dirname(os.path.abspath(filename)) manifest = '%s/%s_manifest.md5' % (source_parent_dir, filenoext) if os.path.isfile(filename): shutil.move(filename, data_dir) generate_log(log, 'dvsip.py DV file moved to %s' % data_dir) generate_log(log, 'dvsip.py MD5 manifest started') hashlib_manifest(filenoext, manifest, source_parent_dir)
def make_sip(video_files): for filename in video_files: #loop all files in directory filenoext = os.path.splitext(filename)[0] # Generate new directory names metadata_dir = '%s/metadata' % filenoext log_dir = '%s/logs' % filenoext data_dir = '%s/objects' % filenoext # Actually create the directories. os.makedirs(metadata_dir) os.makedirs(data_dir) os.makedirs(log_dir) #Generate filenames for new files. inputxml = '%s/%s_mediainfo.xml' % (metadata_dir, os.path.basename(filename)) inputtracexml = '%s/%s_mediatrace.xml' % (metadata_dir, os.path.basename(filename)) fmd5 = '%s/%s.framemd5' % (metadata_dir, os.path.basename(filename)) log = '%s/%s_log.log' % (log_dir, filename) generate_log(log, 'Input = %s' % filename) fmd5_logfile = log_dir + '/%s_framemd5.log' % filename fmd5_env_dict = set_environment(fmd5_logfile) fmd5_command = [ 'ffmpeg', # Create decoded md5 checksums for every frame '-i', filename, '-report', '-f', 'framemd5', '-an', fmd5 ] print(fmd5_command) subprocess.call(fmd5_command, env=fmd5_env_dict) generate_log( log, 'makeffv1.py Framemd5 generation of output file completed') if os.path.basename(sys.argv[0]) == 'makeffv1.py': shutil.copy(sys.argv[0], log_dir) print('Generating mediainfo xml of input file and saving it in %s' ) % inputxml make_mediainfo(inputxml, 'mediaxmlinput', filename) print('Generating mediatrace xml of input file and saving it in %s' ) % inputtracexml make_mediatrace(inputtracexml, 'mediatracexmlinput', filename) source_parent_dir = os.path.dirname(os.path.abspath(filename)) manifest = '%s/%s_manifest.md5' % (source_parent_dir, filenoext) if os.path.isfile(filename): shutil.move(filename, data_dir) generate_log(log, 'dvsip.py DV file moved to %s' % data_dir) generate_log(log, 'dvsip.py MD5 manifest started') hashlib_manifest(filenoext, manifest, source_parent_dir)
def check_files(input): for root, dirs, filenames in os.walk(input): os.chdir(root) tiff_check = glob('*.tiff') dpx_check = glob('*.dpx') tif_check = glob('*.tif') if len(dpx_check) > 0: images = dpx_check elif len(tiff_check) > 0: images = tiff_check elif len(tif_check) > 0: images = tif_check else: continue mediainfo_xml = '%s/%s_mediainfo.xml' % (os.path.dirname(root), images[0]) mediatrace_xml = '%s/%s_mediatrace.xml' % (os.path.dirname(root), images[0]) print 'Creating mediainfo XML for %s' % images[0] make_mediainfo(mediainfo_xml, 'mediaxmloutput', images[0]) print 'Creating mediatrace XML for %s' % images[0] make_mediatrace(mediatrace_xml, 'mediatracexmlinput', images[0])
def get_metadata(path, new_log_textfile): ''' Recursively create mediainfos and mediatraces for AV files. This should probably go in ififuncs as it could be used by other scripts. ''' mediainfo_version = 'mediainfo' try: mediainfo_version = subprocess.check_output([ 'mediainfo', '--Version' ]).rstrip() except subprocess.CalledProcessError as grepexc: mediainfo_version = grepexc.output.rstrip().splitlines()[1] for root, _, filenames in os.walk(path): for av_file in filenames: if av_file.endswith( ('.mov', 'MP4', '.mp4', '.mkv', '.MXF', '.mxf', '.dv', '.DV') ): if av_file[0] != '.': inputxml = "%s/%s_mediainfo.xml" % ( os.path.join(path, 'metadata'), os.path.basename(av_file) ) inputtracexml = "%s/%s_mediatrace.xml" % ( os.path.join(path, 'metadata'), os.path.basename(av_file) ) print 'Generating mediainfo xml of input file and saving it in %s' % inputxml ififuncs.make_mediainfo( inputxml, 'mediaxmlinput', os.path.join(root, av_file) ) ififuncs.generate_log( new_log_textfile, 'EVENT = Metadata extraction - eventDetail=Technical metadata extraction via mediainfo, eventOutcome=%s, agentName=%s' % (inputxml, mediainfo_version) ) print 'Generating mediatrace xml of input file and saving it in %s' % inputtracexml ififuncs.make_mediatrace( inputtracexml, 'mediatracexmlinput', os.path.join(root, av_file) ) ififuncs.generate_log( new_log_textfile, 'EVENT = Metadata extraction - eventDetail=Mediatrace technical metadata extraction via mediainfo, eventOutcome=%s, agentName=%s' % (inputtracexml, mediainfo_version) ) elif av_file.endswith( ('.tif', 'tiff', '.doc', '.txt', '.docx', '.pdf', '.jpg', '.jpeg', '.png', '.rtf', '.xml', '.odt') ): blacklist = ('siegfried', 'exiftool', 'mediainfo', 'mediatrace') if av_file[0] != '.': if any(word in blacklist for word in av_file): exiftool_version = 'exiftool' try: exiftool_version = subprocess.check_output([ 'exiftool', '-ver' ]) except subprocess.CalledProcessError as grepexc: exiftool_version = grepexc.output.rstrip().splitlines()[1] siegfried_version = 'siegfried' try: siegfried_version = subprocess.check_output([ 'sf', '-version' ]) except subprocess.CalledProcessError as grepexc: siegfried_version = grepexc.output.rstrip().splitlines()[1] inputxml = "%s/%s_exiftool.xml" % ( os.path.join(path, 'metadata'), os.path.basename(av_file) ) inputtracexml = "%s/%s_siegfried.json" % ( os.path.join(path, 'metadata'), os.path.basename(av_file) ) ififuncs.make_siegfried( inputtracexml, os.path.join(root, av_file) ) print 'Generating exiftool xml of input file and saving it in %s' % inputxml ififuncs.generate_log( new_log_textfile, 'EVENT = Metadata extraction - eventDetail=Technical metadata extraction via exiftool, eventOutcome=%s, agentName=%s' % (inputxml, exiftool_version) ) print 'Generating mediatrace xml of input file and saving it in %s' % inputtracexml ififuncs.make_exiftool( inputxml, os.path.join(root, av_file) ) ififuncs.generate_log( new_log_textfile, 'EVENT = Format identification - eventType=format identification, eventDetail=Format identification via PRONOM signatures using Siegfried, eventOutcome=%s, agentName=%s' % (inputtracexml, siegfried_version) )
def get_filenames(directory, log_filename_alteration): os.chdir(directory) tiff_check = glob('*.tiff') dpx_check = glob('*.dpx') if len(dpx_check) > 0: images = dpx_check elif len(tiff_check) > 0: images = tiff_check else: return 'none' images.sort() image_date_modified = get_date_modified(images[0]) mediainfo_xml = '%s/%s_mediainfo.xml' % (os.path.dirname( os.path.dirname(directory)) + '/metadata/image', images[0]) mediatrace_xml = '%s/%s_mediatrace.xml' % (os.path.dirname( os.path.dirname(directory)) + '/metadata/image', images[0]) if not os.path.isfile(mediainfo_xml): print 'Creating mediainfo XML for %s' % images[0] make_mediainfo(mediainfo_xml, 'mediaxmloutput', images[0]) if not os.path.isfile(mediatrace_xml): print 'Creating mediatrace XML for %s' % images[0] make_mediatrace(mediatrace_xml, 'mediatracexmlinput', images[0]) if '864000' in images[0]: start_number = '864000' elif len(images[0].split("_")[-1].split(".")) > 2: start_number = images[0].split("_")[-1].split(".")[1] else: start_number = images[0].split("_")[-1].split(".")[0] container = images[0].split(".")[-1] if len(images[0].split("_")[-1].split(".")) > 2: numberless_filename = images[0].split(".") else: numberless_filename = images[0].split("_")[0:-1] ffmpeg_friendly_name = '' counter = 0 if len(images[0].split("_")[-1].split(".")) > 2: numberless_filename = images[0].split(".")[0:-1] for i in numberless_filename[:-1]: ffmpeg_friendly_name += i + '.' print ffmpeg_friendly_name else: while counter < len(numberless_filename): ffmpeg_friendly_name += numberless_filename[counter] + '_' counter += 1 image_seq_without_container = ffmpeg_friendly_name if len(images[0].split("_")[-1].split(".")) > 2: image_seq_without_container = ffmpeg_friendly_name[: -1] + ffmpeg_friendly_name[ -1].replace( '_', '.') start_number_length = len(start_number) number_regex = "%0" + str(start_number_length) + 'd.' ffmpeg_friendly_name += number_regex + '%s' % container info = [ image_seq_without_container, start_number, container, image_date_modified ] return info
def main(): desktop_logdir = os.path.expanduser("~/Desktop/") + 'seq_csv_reports' if not os.path.isdir(desktop_logdir): os.makedirs(desktop_logdir) all_files = sys.argv[1:] permission = '' if not permission == 'y' or permission == 'Y': print '\n\n**** All image sequences within these directories will be converted the input for this script.\n' for i in all_files: print i permission = raw_input( '\n**** All image sequences within these directories will be converted the input for this script \n**** If this looks ok, please press Y, otherwise, type N\n' ) while permission not in ('Y', 'y', 'N', 'n'): permission = raw_input( '\n**** All image sequences within these directories will be converted the input for this script \n**** If this looks ok, please press Y, otherwise, type N\n' ) if permission == 'n' or permission == 'N': print 'Exiting at your command- Cheerio for now' sys.exit() elif permission == 'y' or permission == 'Y': print 'Ok so!' csv_report_filename = desktop_logdir + '/seq2prores_report' + time.strftime( "_%Y_%m_%dT%H_%M_%S") + '.csv' user = get_user() create_csv(csv_report_filename, ('Sequence Name', 'Start time', 'Finish Time')) for source_directory in all_files: for root, dirnames, filenames in os.walk(source_directory): #if "tiff_scans" in dirnames: source_directory = root # + '/tiff_scans' total_size = 0 remove_bad_files(source_directory) source_parent_dir = os.path.dirname(source_directory) normpath = os.path.normpath(source_directory) relative_path = normpath.split(os.sep)[-1] split_path = os.path.split(os.path.basename(source_directory))[1] start = datetime.datetime.now() info = get_filenames(source_directory, 'dpx_framemd5') if info == 'none': continue for files in filenames: total_size += os.path.getsize(os.path.join(root, files)) master_parent_dir = os.path.dirname(source_parent_dir) master_object_dir = master_parent_dir + '/objects/image' master_metadata_dir = master_parent_dir + '/' + 'metadata' middle = os.listdir( os.path.dirname(os.path.dirname(master_parent_dir)) + '/mezzanine')[0] mezzanine_object_dir = os.path.dirname( os.path.dirname( master_parent_dir)) + '/mezzanine/%s/objects' % middle mezzanine_parent_dir = os.path.dirname( os.path.dirname(master_parent_dir)) + '/mezzanine/%s' % middle mezzanine_metadata_dir = mezzanine_parent_dir + '/metadata' source_manifest = master_parent_dir + '/' + os.path.basename( master_parent_dir) + '_manifest.md5' mezzanine_manifest = mezzanine_parent_dir + '/' + os.path.basename( mezzanine_parent_dir) + '_manifest.md5' audio_dir_list = os.listdir(master_parent_dir + '/objects/audio') for audio_files in audio_dir_list: if not audio_files[0] == '.': if audio_files.endswith('.wav'): master_audio = master_parent_dir + '/objects/audio/' + audio_files audio_date_modified = get_date_modified(master_audio) mezzanine_file = mezzanine_object_dir + '/' + os.path.basename( mezzanine_parent_dir) + '_mezzanine.mov' if os.path.isfile(mezzanine_file): print 'Mezzanine file already exists so this script has most likely already been run.. skipping.' continue image_seq_without_container = info[0] start_number = info[1] container = info[2] image_date_modified = info[3] start_number_length = len(start_number) number_regex = "%0" + str(start_number_length) + 'd.' audio_dir = source_parent_dir + '/audio' logs_dir = mezzanine_parent_dir + '/logs' intellectual_entity_uuid = str(uuid.uuid4()) source_representation_uuid = premis_description( master_object_dir, master_parent_dir + '/objects/audio', user, image_date_modified, audio_date_modified, intellectual_entity_uuid) os.chdir(audio_dir) audio_file_list = glob('*.wav') audio_file = os.path.join(audio_dir, audio_file_list[0]) dpx_filename = image_seq_without_container + number_regex + container logfile = logs_dir + '/%s_prores.log' % os.path.basename( mezzanine_parent_dir) env_dict = os.environ.copy() # https://github.com/imdn/scripts/blob/0dd89a002d38d1ff6c938d6f70764e6dd8815fdd/ffmpy.py#L272 logfile = "\'" + logfile + "\'" env_dict['FFREPORT'] = 'file={}:level=48'.format(logfile) seq2prores = [ 'ffmpeg', '-y', '-f', 'image2', '-framerate', '24', '-start_number', start_number, '-i', root + '/' + dpx_filename, '-i', audio_file, '-c:v', 'prores', '-profile:v', '3', '-c:a', 'pcm_s24le', '-ar', '48000', mezzanine_object_dir + '/' + os.path.basename(mezzanine_parent_dir) + '_mezzanine.mov', '-f', 'framemd5', '-an', master_metadata_dir + '/image/' + os.path.basename(master_parent_dir) + '.framemd5', '-c:a', 'pcm_s24le', '-f', 'framemd5', '-vn', master_metadata_dir + '/audio/' + os.path.basename(master_parent_dir) + '.framemd5' ] print seq2prores subprocess.call(seq2prores, env=env_dict) representation_uuid = str(uuid.uuid4()) split_list = os.path.basename(mezzanine_parent_dir).split('_') premisxml, premis_namespace, doc, premis = setup_xml( mezzanine_file) items = { "workflow": "seq2prores", "oe": 'n/a', "filmographic": split_list[0], "sourceAccession": split_list[1], "interventions": ['placeholder'], "prepList": ['placeholder'], "user": user } premis = doc.getroot() xml_info = make_premis(mezzanine_file, items, premis, premis_namespace, premisxml, representation_uuid, '????') sequence = xml_info[3] linking_representation_uuids = [] linking_representation_uuids.append(xml_info[2]) linking_representation_uuids.append( xml_info[2] ) # the duplicate does nothing btw, they are a placeholder from a hardcoded function linking_representation_uuids.append(source_representation_uuid) create_intellectual_entity(premisxml, premis_namespace, doc, premis, items, intellectual_entity_uuid) create_representation(premisxml, premis_namespace, doc, premis, items, linking_representation_uuids, representation_uuid, sequence, intellectual_entity_uuid) doc = xml_info[0] premisxml = xml_info[1] final_sip_manifest_uuid = str(uuid.uuid4()) prores_event_uuid = str(uuid.uuid4()) macMiniTelecineMachineAgent_events = [ prores_event_uuid, final_sip_manifest_uuid ] macMiniTelecineMachineAgent = make_agent( premis, macMiniTelecineMachineAgent_events, '230d72da-07e7-4a79-96ca-998b9f7a3e41') macMiniTelecineMachineOSAgent_events = [ prores_event_uuid, final_sip_manifest_uuid ] macMiniTelecineOSAgent = make_agent( premis, macMiniTelecineMachineOSAgent_events, '9486b779-907c-4cc4-802c-22e07dc1242f') hashlib_events = [final_sip_manifest_uuid] hashlibAgent = make_agent(premis, hashlib_events, '9430725d-7523-4071-9063-e8a6ac4f84c4') ffmpegAgent_events = [prores_event_uuid] ffmpegAgent = make_agent(premis, ffmpegAgent_events, 'ee83e19e-cdb1-4d83-91fb-7faf7eff738e') operatorEvents = [final_sip_manifest_uuid, prores_event_uuid] operatorAgent = make_agent(premis, operatorEvents, user) #ffmpegAgent = make_agent(premis,[framemd5_uuid ], 'ee83e19e-cdb1-4d83-91fb-7faf7eff738e') make_event( premis, 'creation', 'Image Sequence and WAV re-encoded to Apple Pro Res 422 HQ with 48khz 24-bit PCM audio', [ macMiniTelecineMachineAgent, macMiniTelecineOSAgent, ffmpegAgent, operatorAgent ], prores_event_uuid, [representation_uuid], 'outcome', 'now') print premisxml mezzanine_mediainfoxml = "%s/%s_mediainfo.xml" % ( mezzanine_metadata_dir, os.path.basename(mezzanine_parent_dir)) tracexml = "%s/%s_mediatrace.xml" % ( mezzanine_metadata_dir, os.path.basename(mezzanine_parent_dir)) audio_mediainfoxml = "%s/%s_mediainfo.xml" % ( master_metadata_dir + '/audio', os.path.basename(master_audio)) audio_mediatracexml = "%s/%s_mediatrace.xml" % ( master_metadata_dir + '/audio', os.path.basename(master_audio)) if not os.path.isfile(audio_mediainfoxml): make_mediainfo(audio_mediainfoxml, 'audiomediaxmlinput', master_audio) if not os.path.isfile(audio_mediatracexml): make_mediainfo(audio_mediatracexml, 'audiomediatraceinput', master_audio) if not os.path.isfile(mezzanine_mediainfoxml): make_mediainfo( mezzanine_mediainfoxml, 'mediaxmlinput', mezzanine_object_dir + '/' + os.path.basename(mezzanine_parent_dir) + '_mezzanine.mov') if not os.path.isfile(tracexml): make_mediatrace( tracexml, 'mediatracexmlinput', mezzanine_object_dir + '/' + os.path.basename(mezzanine_parent_dir) + '_mezzanine.mov') hashlib_manifest(master_parent_dir, source_manifest, master_parent_dir) hashlib_manifest(mezzanine_parent_dir, mezzanine_manifest, mezzanine_parent_dir) make_event(premis, 'message digest calculation', 'Checksum manifest for whole package created', [ macMiniTelecineMachineAgent, macMiniTelecineOSAgent, operatorAgent ], final_sip_manifest_uuid, [representation_uuid], 'source', 'now') write_premis(doc, premisxml) finish = datetime.datetime.now() append_csv(csv_report_filename, (os.path.basename(master_parent_dir), start, finish)) '''
def make_ffv1(video_files, csv_report_filename): for filename in video_files: #loop all files in directory filenoext = os.path.splitext(filename)[0] # Generate new directory names metadata_dir = "%s/metadata" % filenoext log_dir = "%s/logs" % filenoext data_dir = "%s/objects" % filenoext # Actually create the directories. os.makedirs(metadata_dir) os.makedirs(data_dir) os.makedirs(log_dir) #Generate filenames for new files. inputxml = "%s/%s_source_mediainfo.xml" % ( metadata_dir, os.path.basename(filename) ) inputtracexml = "%s/%s_source_mediatrace.xml" % ( metadata_dir, os.path.basename(filename) ) output = "%s/%s.mkv" % ( data_dir, os.path.splitext(os.path.basename(filename))[0] ) # Generate filename of ffv1.mkv without the path. outputfilename = os.path.basename(output) outputxml = "%s/%s_mediainfo.xml" % (metadata_dir, outputfilename) outputtracexml = "%s/%s_mediatrace.xml" % (metadata_dir, outputfilename) fmd5 = "%s/%s_source.framemd5" % ( metadata_dir, os.path.basename(filename) ) fmd5ffv1 = "%s/%s_ffv1.framemd5" % (metadata_dir, outputfilename) log = "%s/%s_log.log" % (log_dir, filename) generate_log(log, 'Input = %s' % filename) generate_log(log, 'Output = %s' % output) generate_log( log, 'makeffv1.py transcode to FFV1 and framemd5 generation of source started.' ) ffv1_logfile = log_dir + '/%s_ffv1_transcode.log' % filename ffv1_env_dict = set_environment(ffv1_logfile) par = subprocess.check_output( [ 'mediainfo', '--Language=raw', '--Full', "--Inform=Video;%PixelAspectRatio%", filename ] ).rstrip() field_order = subprocess.check_output( [ 'mediainfo', '--Language=raw', '--Full', "--Inform=Video;%ScanType%", filename ] ).rstrip() height = subprocess.check_output( [ 'mediainfo', '--Language=raw', '--Full', "--Inform=Video;%Height%", filename ] ).rstrip() # Transcode video file writing frame md5 and output appropriately ffv1_command = [ 'ffmpeg', '-i', filename, '-c:v', 'ffv1', # Use FFv1 codec '-g', '1', # Use intra-frame only aka ALL-I aka GOP=1 '-level', '3', # Use Version 3 of FFv1 '-c:a', 'copy', # Copy and paste audio bitsream with no transcoding '-map', '0', '-dn', '-report', '-slicecrc', '1', '-slices', '16', ] # check for FCP7 lack of description and PAL if par == '1.000': if field_order == '': if height == '576': ffv1_command += [ '-vf', 'setfield=tff, setdar=4/3' ] ffv1_command += [ output, '-f', 'framemd5', '-an', # Create decoded md5 checksums for every frame of the input. -an ignores audio fmd5 ] print ffv1_command subprocess.call(ffv1_command, env=ffv1_env_dict) generate_log( log, 'makeffv1.py transcode to FFV1 and framemd5 generation completed.' ) generate_log( log, 'makeffv1.py Framemd5 generation of output file started.' ) fmd5_logfile = log_dir + '/%s_framemd5.log' % outputfilename fmd5_env_dict = set_environment(fmd5_logfile) fmd5_command = [ 'ffmpeg', # Create decoded md5 checksums for every frame '-i', output, '-report', '-f', 'framemd5', '-an', fmd5ffv1 ] print fmd5_command subprocess.call(fmd5_command, env=fmd5_env_dict) generate_log( log, 'makeffv1.py Framemd5 generation of output file completed' ) source_video_size = get_mediainfo( 'source_video_size', "--inform=General;%FileSize%", filename ) ffv1_video_size = get_mediainfo( 'ffv1_video_size', '--inform=General;%FileSize%', output ) compression_ratio = float(source_video_size) / float(ffv1_video_size) if os.path.basename(sys.argv[0]) == 'makeffv1.py': shutil.copy(sys.argv[0], log_dir) print 'Generating mediainfo xml of input file and saving it in %s' % inputxml make_mediainfo(inputxml, 'mediaxmlinput', filename) print 'Generating mediainfo xml of output file and saving it in %s' % outputxml make_mediainfo(outputxml, 'mediaxmloutput', output) print 'Generating mediatrace xml of input file and saving it in %s' % inputtracexml make_mediatrace(inputtracexml, 'mediatracexmlinput', filename) print 'Generating mediatrace xml of output file and saving it in %s' % outputtracexml make_mediatrace(outputtracexml, 'mediatracexmloutput', output) source_parent_dir = os.path.dirname(os.path.abspath(filename)) manifest = '%s/%s_manifest.md5' % (source_parent_dir, filenoext) generate_log(log, 'makeffv1.py MD5 manifest started') checksum_mismatches = [] with open(fmd5) as f1: with open(fmd5ffv1) as f2: for (lineno1, line1), (lineno2, line2) in itertools.izip( read_non_comment_lines(f1), read_non_comment_lines(f2) ): if line1 != line2: if 'sar' in line1: checksum_mismatches = ['sar'] else: checksum_mismatches.append(1) if len(checksum_mismatches) == 0: print 'LOSSLESS' append_csv( csv_report_filename, ( output, 'LOSSLESS', source_video_size, ffv1_video_size, compression_ratio ) ) generate_log(log, 'makeffv1.py Transcode was lossless') elif len(checksum_mismatches) == 1: if checksum_mismatches[0] == 'sar': print 'Image content is lossless,' ' Pixel Aspect Ratio has been altered.' ' Update ffmpeg in order to resolve the PAR issue.' append_csv( csv_report_filename, ( output, 'LOSSLESS - different PAR', source_video_size, ffv1_video_size, compression_ratio ) ) generate_log( log, 'makeffv1.py Image content is lossless but Pixel Aspect Ratio has been altered.Update ffmpeg in order to resolve the PAR issue.' ) elif len(checksum_mismatches) > 1: print 'NOT LOSSLESS' append_csv( csv_report_filename, ( output, 'NOT LOSSLESS', source_video_size, ffv1_video_size, compression_ratio ) ) generate_log(log, 'makeffv1.py Not Lossless.') hashlib_manifest(filenoext, manifest, source_parent_dir) if filecmp.cmp(fmd5, fmd5ffv1, shallow=False): print "YOUR FILES ARE LOSSLESS YOU SHOULD BE SO HAPPY!!!" else: print "The framemd5 text files are not completely identical." " This may be because of a lossy transcode," " or a change in metadata, most likely pixel aspect ratio." " Please analyse the framemd5 files for source and output."
def get_metadata(path, new_log_textfile): ''' Recursively create mediainfos and mediatraces for AV files. This should probably go in ififuncs as it could be used by other scripts. ''' mediainfo_version = 'mediainfo' try: mediainfo_version = subprocess.check_output(['mediainfo', '--Version']).rstrip() except subprocess.CalledProcessError as grepexc: mediainfo_version = grepexc.output.rstrip().splitlines()[1] for root, directories, filenames in os.walk(path): directories[:] = [d for d in directories if d != 'metadata'] for av_file in filenames: if av_file.lower().endswith( ('.mov', 'MP4', '.mp4', '.mkv', '.MXF', '.mxf', '.dv', '.DV', '.3gp', '.webm', '.swf', '.avi')): if av_file[0] != '.': inputxml = "%s/%s_mediainfo.xml" % (os.path.join( path, 'metadata'), os.path.basename(av_file)) inputtracexml = "%s/%s_mediatrace.xml" % (os.path.join( path, 'metadata'), os.path.basename(av_file)) print 'Generating mediainfo xml of input file and saving it in %s' % inputxml ififuncs.make_mediainfo(inputxml, 'mediaxmlinput', os.path.join(root, av_file)) ififuncs.generate_log( new_log_textfile, 'EVENT = Metadata extraction - eventDetail=Technical metadata extraction via mediainfo, eventOutcome=%s, agentName=%s' % (inputxml, mediainfo_version)) print 'Generating mediatrace xml of input file and saving it in %s' % inputtracexml ififuncs.make_mediatrace(inputtracexml, 'mediatracexmlinput', os.path.join(root, av_file)) ififuncs.generate_log( new_log_textfile, 'EVENT = Metadata extraction - eventDetail=Mediatrace technical metadata extraction via mediainfo, eventOutcome=%s, agentName=%s' % (inputtracexml, mediainfo_version)) elif av_file.lower().endswith( ('.tif', 'tiff', '.doc', '.txt', '.docx', '.pdf', '.jpg', '.jpeg', '.png', '.rtf', '.xml', '.odt', '.cr2', '.epub', '.ppt', '.pptx', '.xls', '.xlsx', '.gif', '.bmp', '.csv')): if av_file[0] != '.': if not av_file.lower().endswith(('.txt', '.csv')): exiftool_version = 'exiftool' try: exiftool_version = subprocess.check_output( ['exiftool', '-ver']) except subprocess.CalledProcessError as grepexc: exiftool_version = grepexc.output.rstrip( ).splitlines()[1] inputxml = "%s/%s_exiftool.json" % (os.path.join( path, 'metadata'), os.path.basename(av_file)) ififuncs.generate_log( new_log_textfile, 'EVENT = Metadata extraction - eventDetail=Technical metadata extraction via exiftool, eventOutcome=%s, agentName=%s' % (inputxml, exiftool_version)) print 'Generating exiftool json of input file and saving it in %s' % inputxml ififuncs.make_exiftool(inputxml, os.path.join(root, av_file)) siegfried_version = 'siegfried' try: siegfried_version = subprocess.check_output( ['sf', '-version']) except subprocess.CalledProcessError as grepexc: siegfried_version = grepexc.output.rstrip().splitlines( )[1] inputtracexml = "%s/%s_siegfried.json" % (os.path.join( path, 'metadata'), os.path.basename(av_file)) print 'Generating Siegfried json of input file and saving it in %s' % inputtracexml ififuncs.make_siegfried(inputtracexml, os.path.join(root, av_file)) ififuncs.generate_log( new_log_textfile, 'EVENT = Format identification - eventType=format identification, eventDetail=Format identification via PRONOM signatures using Siegfried, eventOutcome=%s, agentName=%s' % (inputtracexml, siegfried_version))
def main(): desktop_logdir = os.path.expanduser("~/Desktop/") + 'seq_csv_reports' if not os.path.isdir(desktop_logdir): os.makedirs(desktop_logdir) all_files = sys.argv[1:] permission = '' if not permission == 'y' or permission == 'Y': print '\n\n**** All image sequences within these directories will be converted the input for this script.\n' for i in all_files: print i permission = raw_input('\n**** All image sequences within these directories will be converted the input for this script \n**** If this looks ok, please press Y, otherwise, type N\n' ) while permission not in ('Y','y','N','n'): permission = raw_input('\n**** All image sequences within these directories will be converted the input for this script \n**** If this looks ok, please press Y, otherwise, type N\n') if permission == 'n' or permission == 'N': print 'Exiting at your command- Cheerio for now' sys.exit() elif permission =='y' or permission == 'Y': print 'Ok so!' csv_report_filename = desktop_logdir + '/seq2prores_report' + time.strftime("_%Y_%m_%dT%H_%M_%S") + '.csv' user = get_user() create_csv(csv_report_filename, ('Sequence Name', 'Start time', 'Finish Time')) for source_directory in all_files: for root,dirnames,filenames in os.walk(source_directory): #if "tiff_scans" in dirnames: source_directory = root# + '/tiff_scans' total_size = 0 remove_bad_files(source_directory) source_parent_dir = os.path.dirname(source_directory) normpath = os.path.normpath(source_directory) relative_path = normpath.split(os.sep)[-1] split_path = os.path.split(os.path.basename(source_directory))[1] start = datetime.datetime.now() info = get_filenames(source_directory, 'dpx_framemd5') if info == 'none': continue for files in filenames: total_size += os.path.getsize(os.path.join(root,files)) master_parent_dir = os.path.dirname(source_parent_dir) master_object_dir = master_parent_dir + '/objects/image' master_metadata_dir = master_parent_dir + '/' + 'metadata' middle = os.listdir(os.path.dirname(os.path.dirname(master_parent_dir)) + '/mezzanine')[0] mezzanine_object_dir = os.path.dirname(os.path.dirname(master_parent_dir)) + '/mezzanine/%s/objects' % middle mezzanine_parent_dir = os.path.dirname(os.path.dirname(master_parent_dir)) + '/mezzanine/%s' % middle mezzanine_metadata_dir = mezzanine_parent_dir + '/metadata' source_manifest = master_parent_dir + '/' + os.path.basename( master_parent_dir) + '_manifest.md5' mezzanine_manifest = mezzanine_parent_dir + '/' + os.path.basename( mezzanine_parent_dir) + '_manifest.md5' master_audio = master_parent_dir + '/objects/audio/' + os.listdir(master_parent_dir + '/objects/audio')[0] mezzanine_file = mezzanine_object_dir + '/' + os.path.basename(mezzanine_parent_dir) + '_mezzanine.mov' if os.path.isfile(mezzanine_file): print 'Mezzanine file already exists so this script has most likely already been run.. skipping.' continue image_seq_without_container = info[0] start_number = info[1] container = info[2] start_number_length = len(start_number) number_regex = "%0" + str(start_number_length) + 'd.' audio_dir = source_parent_dir + '/audio' logs_dir = mezzanine_parent_dir + '/logs' source_representation_uuid = premis_description(master_object_dir, master_parent_dir + '/objects/audio', user) os.chdir(audio_dir) audio_file_list = glob('*.wav') audio_file = os.path.join(audio_dir,audio_file_list[0]) dpx_filename = image_seq_without_container + number_regex + container logfile = logs_dir + '/%s_prores.log' % os.path.basename(mezzanine_parent_dir) env_dict = os.environ.copy() # https://github.com/imdn/scripts/blob/0dd89a002d38d1ff6c938d6f70764e6dd8815fdd/ffmpy.py#L272 logfile = "\'" + logfile + "\'" env_dict['FFREPORT'] = 'file={}:level=48'.format(logfile) seq2prores= ['ffmpeg','-y','-f','image2','-framerate','24', '-start_number', start_number, '-i', root + '/' + dpx_filename ,'-i', audio_file,'-c:v','prores','-profile:v', '3','-c:a','pcm_s24le', '-ar', '48000', mezzanine_object_dir + '/' + os.path.basename(mezzanine_parent_dir) + '_mezzanine.mov','-f', 'framemd5', '-an', master_metadata_dir + '/image/' + os.path.basename(master_parent_dir) + '.framemd5', '-c:a', 'pcm_s24le', '-f', 'framemd5', '-vn', master_metadata_dir + '/audio/' + os.path.basename(master_parent_dir) + '.framemd5'] print seq2prores subprocess.call(seq2prores,env=env_dict) representation_uuid = str(uuid.uuid4()) split_list = os.path.basename(mezzanine_parent_dir).split('_') premisxml, premis_namespace, doc, premis = setup_xml(mezzanine_file) items = {"workflow":"seq2prores","oe":'n/a', "filmographic":split_list[0], "sourceAccession":split_list[1], "interventions":['placeholder'], "prepList":['placeholder'], "user":user} premis = doc.getroot() xml_info = make_premis(mezzanine_file, items, premis, premis_namespace,premisxml, representation_uuid, '????') sequence = xml_info[3] linking_representation_uuids = [] linking_representation_uuids.append(xml_info[2]) linking_representation_uuids.append(xml_info[2]) # the duplicate does nothing btw, they are a placeholder from a hardcoded function linking_representation_uuids.append(source_representation_uuid) create_representation(premisxml, premis_namespace, doc, premis, items,linking_representation_uuids, representation_uuid,sequence ) doc = xml_info[0] premisxml = xml_info[1] final_sip_manifest_uuid = str(uuid.uuid4()) prores_event_uuid = str(uuid.uuid4()) macMiniTelecineMachineAgent_events = [prores_event_uuid,final_sip_manifest_uuid ] macMiniTelecineMachineAgent = make_agent(premis,macMiniTelecineMachineAgent_events, '230d72da-07e7-4a79-96ca-998b9f7a3e41') macMiniTelecineMachineOSAgent_events = [prores_event_uuid,final_sip_manifest_uuid ] macMiniTelecineOSAgent = make_agent(premis,macMiniTelecineMachineOSAgent_events, '9486b779-907c-4cc4-802c-22e07dc1242f') hashlib_events = [final_sip_manifest_uuid ] hashlibAgent = make_agent(premis,hashlib_events, '9430725d-7523-4071-9063-e8a6ac4f84c4') ffmpegAgent_events = [prores_event_uuid ] ffmpegAgent = make_agent(premis,ffmpegAgent_events , 'ee83e19e-cdb1-4d83-91fb-7faf7eff738e') operatorEvents = [final_sip_manifest_uuid,prores_event_uuid] operatorAgent = make_agent(premis,operatorEvents ,user) #ffmpegAgent = make_agent(premis,[framemd5_uuid ], 'ee83e19e-cdb1-4d83-91fb-7faf7eff738e') make_event(premis, 'creation', 'Image Sequence and WAV re-encoded to Apple Pro Res 422 HQ with 48khz 24-bit PCM audio', [macMiniTelecineMachineAgent ,macMiniTelecineOSAgent, ffmpegAgent, operatorAgent ],prores_event_uuid,[representation_uuid], 'outcome') print premisxml mezzanine_mediainfoxml = "%s/%s_mediainfo.xml" % (mezzanine_metadata_dir,os.path.basename(mezzanine_parent_dir) ) tracexml = "%s/%s_mediatrace.xml" % (mezzanine_metadata_dir,os.path.basename(mezzanine_parent_dir) ) audio_mediainfoxml = "%s/%s_mediainfo.xml" % (master_metadata_dir + '/audio',os.path.basename(master_audio) ) audio_mediatracexml = "%s/%s_mediatrace.xml" % (master_metadata_dir + '/audio',os.path.basename(master_audio) ) if not os.path.isfile(audio_mediainfoxml): make_mediainfo(audio_mediainfoxml,'audiomediaxmlinput',master_audio) if not os.path.isfile(audio_mediatracexml): make_mediainfo(audio_mediatracexml,'audiomediatraceinput',master_audio) if not os.path.isfile(mezzanine_mediainfoxml): make_mediainfo(mezzanine_mediainfoxml,'mediaxmlinput',mezzanine_object_dir + '/' + os.path.basename(mezzanine_parent_dir) + '_mezzanine.mov') if not os.path.isfile(tracexml): make_mediatrace(tracexml,'mediatracexmlinput',mezzanine_object_dir + '/' + os.path.basename(mezzanine_parent_dir) + '_mezzanine.mov') hashlib_manifest(master_parent_dir, source_manifest, master_parent_dir) hashlib_manifest(mezzanine_parent_dir, mezzanine_manifest, mezzanine_parent_dir) make_event(premis, 'message digest calculation', 'Checksum manifest for whole package created', [macMiniTelecineMachineAgent ,macMiniTelecineOSAgent, operatorAgent],final_sip_manifest_uuid,[representation_uuid], 'source') write_premis(doc, premisxml) finish = datetime.datetime.now() append_csv(csv_report_filename, (os.path.basename( master_parent_dir), start, finish)) '''
def make_ffv1(video_files, csv_report_filename): for filename in video_files: #loop all files in directory filenoext = os.path.splitext(filename)[0] # Generate new directory names metadata_dir = "%s/metadata" % filenoext log_dir = "%s/logs" % filenoext data_dir = "%s/objects" % filenoext # Actually create the directories. os.makedirs(metadata_dir) os.makedirs(data_dir) os.makedirs(log_dir) #Generate filenames for new files. inputxml = "%s/%s_source_mediainfo.xml" % (metadata_dir, os.path.basename(filename)) inputtracexml = "%s/%s_source_mediatrace.xml" % ( metadata_dir, os.path.basename(filename)) output = "%s/%s.mkv" % ( data_dir, os.path.splitext(os.path.basename(filename))[0]) # Generate filename of ffv1.mkv without the path. outputfilename = os.path.basename(output) outputxml = "%s/%s_mediainfo.xml" % (metadata_dir, outputfilename) outputtracexml = "%s/%s_mediatrace.xml" % (metadata_dir, outputfilename) fmd5 = "%s/%s_source.framemd5" % (metadata_dir, os.path.basename(filename)) fmd5ffv1 = "%s/%s_ffv1.framemd5" % (metadata_dir, outputfilename) log = "%s/%s_log.log" % (log_dir, filename) generate_log(log, 'Input = %s' % filename) generate_log(log, 'Output = %s' % output) generate_log( log, 'makeffv1.py transcode to FFV1 and framemd5 generation of source started.' ) ffv1_logfile = log_dir + '/%s_ffv1_transcode.log' % filename ffv1_env_dict = set_environment(ffv1_logfile) par = subprocess.check_output([ 'mediainfo', '--Language=raw', '--Full', "--Inform=Video;%PixelAspectRatio%", filename ]).rstrip() field_order = subprocess.check_output([ 'mediainfo', '--Language=raw', '--Full', "--Inform=Video;%ScanType%", filename ]).rstrip() height = subprocess.check_output([ 'mediainfo', '--Language=raw', '--Full', "--Inform=Video;%Height%", filename ]).rstrip() # Transcode video file writing frame md5 and output appropriately ffv1_command = [ 'ffmpeg', '-i', filename, '-c:v', 'ffv1', # Use FFv1 codec '-g', '1', # Use intra-frame only aka ALL-I aka GOP=1 '-level', '3', # Use Version 3 of FFv1 '-c:a', 'copy', # Copy and paste audio bitsream with no transcoding '-map', '0', '-dn', '-report', '-slicecrc', '1', '-slices', '16', ] # check for FCP7 lack of description and PAL if par == '1.000': if field_order == '': if height == '576': ffv1_command += ['-vf', 'setfield=tff, setdar=4/3'] ffv1_command += [ output, '-f', 'framemd5', '-an', # Create decoded md5 checksums for every frame of the input. -an ignores audio fmd5 ] print ffv1_command subprocess.call(ffv1_command, env=ffv1_env_dict) generate_log( log, 'makeffv1.py transcode to FFV1 and framemd5 generation completed.') generate_log( log, 'makeffv1.py Framemd5 generation of output file started.') fmd5_logfile = log_dir + '/%s_framemd5.log' % outputfilename fmd5_env_dict = set_environment(fmd5_logfile) pix_fmt = get_ffmpeg_fmt(filename, 'video') fmd5_command = [ 'ffmpeg', # Create decoded md5 checksums for every frame '-i', output, '-report', '-pix_fmt', pix_fmt, '-f', 'framemd5', '-an', fmd5ffv1 ] print fmd5_command subprocess.call(fmd5_command, env=fmd5_env_dict) generate_log( log, 'makeffv1.py Framemd5 generation of output file completed') source_video_size = get_mediainfo('source_video_size', "--inform=General;%FileSize%", filename) ffv1_video_size = get_mediainfo('ffv1_video_size', '--inform=General;%FileSize%', output) compression_ratio = float(source_video_size) / float(ffv1_video_size) if os.path.basename(sys.argv[0]) == 'makeffv1.py': try: shutil.copy(sys.argv[0], log_dir) except IOError: pass print 'Generating mediainfo xml of input file and saving it in %s' % inputxml make_mediainfo(inputxml, 'mediaxmlinput', filename) print 'Generating mediainfo xml of output file and saving it in %s' % outputxml make_mediainfo(outputxml, 'mediaxmloutput', output) print 'Generating mediatrace xml of input file and saving it in %s' % inputtracexml make_mediatrace(inputtracexml, 'mediatracexmlinput', filename) print 'Generating mediatrace xml of output file and saving it in %s' % outputtracexml make_mediatrace(outputtracexml, 'mediatracexmloutput', output) source_parent_dir = os.path.dirname(os.path.abspath(filename)) manifest = '%s/%s_manifest.md5' % (source_parent_dir, filenoext) generate_log(log, 'makeffv1.py MD5 manifest started') checksum_mismatches = [] with open(fmd5) as f1: with open(fmd5ffv1) as f2: for (lineno1, line1), (lineno2, line2) in itertools.izip( read_non_comment_lines(f1), read_non_comment_lines(f2)): if line1 != line2: if 'sar' in line1: checksum_mismatches = ['sar'] else: checksum_mismatches.append(1) if len(checksum_mismatches) == 0: print 'LOSSLESS' append_csv(csv_report_filename, (output, 'LOSSLESS', source_video_size, ffv1_video_size, compression_ratio)) generate_log(log, 'makeffv1.py Transcode was lossless') elif len(checksum_mismatches) == 1: if checksum_mismatches[0] == 'sar': print 'Image content is lossless,' ' Pixel Aspect Ratio has been altered.' ' Update ffmpeg in order to resolve the PAR issue.' append_csv( csv_report_filename, (output, 'LOSSLESS - different PAR', source_video_size, ffv1_video_size, compression_ratio)) generate_log( log, 'makeffv1.py Image content is lossless but Pixel Aspect Ratio has been altered.Update ffmpeg in order to resolve the PAR issue.' ) elif len(checksum_mismatches) > 1: print 'NOT LOSSLESS' append_csv(csv_report_filename, (output, 'NOT LOSSLESS', source_video_size, ffv1_video_size, compression_ratio)) generate_log(log, 'makeffv1.py Not Lossless.') hashlib_manifest(filenoext, manifest, source_parent_dir) if filecmp.cmp(fmd5, fmd5ffv1, shallow=False): print "YOUR FILES ARE LOSSLESS YOU SHOULD BE SO HAPPY!!!" else: print "The framemd5 text files are not completely identical." " This may be because of a lossy transcode," " or a change in metadata, most likely pixel aspect ratio." " Please analyse the framemd5 files for source and output."