def log_results(manifest, log, parent_dir): ''' Updates the existing log file. This is copy pasted from validate.py. Eventally, both functions should be merged and moved into ififuncs. ''' updated_manifest = [] basename = os.path.basename(manifest).replace('_manifest.md5', '') sip_dir = parent_dir logs_dir = os.path.join(sip_dir, 'logs') logname = logname_check(basename, logs_dir) logfile = os.path.join(logs_dir, logname) ififuncs.generate_log( log, 'EVENT = Logs consolidation - Log from %s merged into %s' % (log, logfile) ) if os.path.isfile(logfile): with open(log, 'r') as fo: validate_log = fo.readlines() with open(logfile, 'ab') as ba: for lines in validate_log: ba.write(lines) with open(manifest, 'r') as manifesto: manifest_lines = manifesto.readlines() for lines in manifest_lines: if os.path.basename(logname) in lines: lines = lines[:31].replace(lines[:31], ififuncs.hashlib_md5(logfile)) + lines[32:] updated_manifest.append(lines) with open(manifest, 'wb') as fo: for lines in updated_manifest: fo.write(lines)
def main(): ''' Launches the functions that will validate your FFV1/MKV files. ''' args = parse_args() source = args.input user = ififuncs.get_user() for root, _, filenames in os.walk(source): for filename in filenames: if filename[0] != '.' and filename.endswith('.mkv'): if setup(os.path.join(root, filename), user) == 'skipping': continue else: log_name_source, user, mediaconch_xmlfile, manifest, full_path, parent_dir = setup( os.path.join(root, filename), user ) launch_mediaconch( log_name_source, user, mediaconch_xmlfile, manifest, full_path, ) validation_outcome = parse_mediaconch(mediaconch_xmlfile) print str(validation_outcome) if int(validation_outcome['fail_count']) > 0: print 'Validation failed!' event_outcome = 'fail' elif int(validation_outcome['fail_count']) == 0: print 'validation successful' event_outcome = 'pass' ififuncs.generate_log( log_name_source, 'EVENT = eventType=validation, eventOutcome=%s, eventDetail=%s' % ( event_outcome, str(validation_outcome) ) ) log_results(manifest, log_name_source, parent_dir)
def remove_bad_files(root_dir): rm_these = ['.DS_Store', 'Thumbs.db', 'desktop.ini'] for root, dirs, files in os.walk(root_dir): for name in files: path = os.path.join(root, name) for i in rm_these: if name == i: print '***********************' + 'removing: ' + path generate_log(log_name_source, 'EVENT = Unwanted file removal - %s was removed' % path) os.remove(path)
def diff_report(file1, file2, log_name_source): with open(file1, 'r') as fo: sourcelist = fo.readlines() with open(file2, 'r') as ba: destlist = ba.readlines() for i in sourcelist: if i not in destlist: print '%s was expected, but a different value was found in destination manifest' % i.rstrip() generate_log(log_name_source, 'ERROR = %s was expected, but a different value was found in destination manifest' % i.rstrip())
def test_write_capabilities(directory, log_name_source): ''' Checks if drives have write access. Also checks if source is a file or directory (no file support right now) ''' if os.path.isdir(directory): temp = tempfile.mkstemp(dir=directory, suffix='.tmp') os.close(temp[0]) # Needed for windows. os.remove(temp[1]) elif os.path.isfile(directory): print '\nFile transfer is not currently supported, only directories.\n' generate_log( log_name_source, 'Error: Attempted file transfer. Source and Destination must be a directory' ) generate_log(log_name_source, 'move.py exit') sys.exit() else: print ' %s is either not a directory or it does not exist' % directory generate_log( log_name_source, ' %s is either not a directory or it does not exist' % directory ) generate_log(log_name_source, 'move.py exit') sys.exit()
def create_manifest(source): ''' Generates a master log and creates checksum manifests for all subdirectories. ''' master_log = os.path.expanduser('~/Desktop/batchfixity_errors.log') os.chdir(source) for dirname in os.walk('.').next()[1]: full_path = os.path.join(source, dirname) manifest_textfile = '%s/%s_manifest.md5' % (full_path, dirname) if not os.path.isfile(manifest_textfile): log_name = '%s/%s_fixity.log' % ( os.path.dirname(full_path), dirname ) generate_log(log_name, 'batchfixity started') generate_log(log_name, '%s created' % manifest_textfile) try: hashlib_manifest(full_path, manifest_textfile, full_path) generate_log(log_name, 'manifest creation complete') shutil.move(log_name, full_path) except IOError: with open(master_log, 'ab') as log: log.write( '%s has failed probably because of special characters like a fada\n' % full_path ) generate_log( log_name, 'manifest has failed probably because of special characters like a fada' )
def diff_report(file1, file2, log_name_source): ''' Analyzes checksum manifests in order to find mismatches. ''' print 'Comparing manifests to verify file transfer' with open(file1, 'r') as file1_manifest: sourcelist = file1_manifest.readlines() with open(file2, 'r') as file2_manifest: destlist = file2_manifest.readlines() for i in sourcelist: if i not in destlist: print '%s was expected, but a different value was found in destination manifest' % i.rstrip() generate_log( log_name_source, 'ERROR = %s was expected, but a different value was found in destination manifest' % i.rstrip())
def run_loop(args): ''' Launches a recursive loop to process all images sequences in your subdirectories. ''' if args.user: user = args.user else: user = ififuncs.get_user() log_name_source = os.path.join( args.destination, '%s_seq2ffv1_log.log' % time.strftime("_%Y_%m_%dT%H_%M_%S") ) ififuncs.generate_log(log_name_source, 'seq2ffv1.py started.') ififuncs.generate_log( log_name_source, 'eventDetail=seq2ffv1.py %s' % ififuncs.get_script_version('seq2ffv1.py')) ififuncs.generate_log( log_name_source, 'Command line arguments: %s' % args ) ififuncs.generate_log( log_name_source, 'EVENT = agentName=%s' % user ) verdicts = [] for source_directory, _, _ in os.walk(args.source_directory): output_dirname = args.destination images = ififuncs.get_image_sequence_files(source_directory) if images == 'none': continue (ffmpeg_friendly_name, start_number, root_filename) = ififuncs.parse_image_sequence(images) source_abspath = os.path.join(source_directory, ffmpeg_friendly_name) judgement, sipcreator_log, sipcreator_manifest = make_ffv1( start_number, source_abspath, output_dirname, root_filename, args, log_name_source ) verdicts.append([root_filename, judgement]) for verdict in verdicts: print "%-*s : %s" % (50, verdict[0], verdict[1]) ififuncs.generate_log(log_name_source, 'seq2ffv1.py finished.') ififuncs.merge_logs(log_name_source, sipcreator_log, sipcreator_manifest)
def check_extra_files(file1, file2, log_name_source): with open(file1, 'r') as fo: sourcelist = fo.readlines() with open(file2, 'r') as ba: destlist = ba.readlines() destlist_files = [] sourcelist_files = [] for x in destlist: destlist_files.append(x[32:]) for y in sourcelist: sourcelist_files.append(y[32:]) for i in destlist_files: if i not in sourcelist_files: print '%s is in your destination manifest but is not in the source manifest' % i.rstrip() generate_log(log_name_source, 'ERROR = %s is in your destination manifest but is not in the source manifest' % i.rstrip())
def validate(manifest_dict, manifest,missing_files, log_name_source): ififuncs.generate_log( log_name_source, 'Validating %s ' % manifest ) error_counter = 0 manifest_directory = os.path.dirname(manifest) os.chdir(manifest_directory) error_list = [] for i in sorted(manifest_dict.keys()): print 'Validating %s' % i current_hash = hashlib_md5(i) if current_hash == manifest_dict[i]: print '%s has validated' % i else: print '%s has mismatched checksum - %s expected - %s hashed' % (i, manifest_dict[i], current_hash) ififuncs.generate_log( log_name_source, '%s has mismatched checksum - %s expected - %s hashed' % (i, manifest_dict[i], current_hash) ) error_list.append('%s has mismatched checksum - %s expected - %s hashed' % (i, manifest_dict[i], current_hash)) error_counter += 1 if error_counter > 0: print '\n\n*****ERRORS***********!!!!\n***********\nThe number of mismatched checksums is: %s\n***********\n' % error_counter ififuncs.generate_log( log_name_source, 'The number of mismatched checksums is: %s' % error_counter ) print '***** List of mismatched files*****' for i in error_list: print i elif error_counter == 0: if missing_files > 0: print 'ERRORS - The number of missing files: %s' % missing_files ififuncs.generate_log( log_name_source, 'ERRORS - The number of mismatched checksums is: %s' % missing_files ) elif missing_files == 0: print 'All checksums have validated' ififuncs.generate_log( log_name_source, 'All checksums have validated' )
def overwrite_check( destination, log_name_source, destination_final_path, manifest_destination ): ''' Possibly redundant - this launches other overwrite functions. ''' try: test_write_capabilities(destination, log_name_source) except OSError: print 'You cannot write to your destination!' generate_log( log_name_source, 'EVENT = I/O Test - Failure - No write access to destination directory.' ) sys.exit() overwrite_destination_manifest = check_overwrite(manifest_destination) overwrite_destination_dir = check_overwrite_dir(destination_final_path) return overwrite_destination_manifest, overwrite_destination_dir
def manifest_existence( manifest_root, manifest_sidecar, manifest, source_count, file_list, log_name_source ): ''' Checks for the three different kinds of source manifests: Sidecar, desktop and root of drive ''' count_in_manifest = 0 manifest_files = [] proceed = 'n' if os.path.isfile(manifest_root): print '1' proceed = 'y' manifest_info = manifest_file_count(manifest_root) count_in_manifest = manifest_info[0] manifest_files = manifest_info[1] elif os.path.isfile(manifest_sidecar): print '2' manifest_info = manifest_file_count(manifest_sidecar) proceed = 'y' count_in_manifest = manifest_info[0] manifest_files = manifest_info[1] elif os.path.isfile(manifest): print '3' manifest_info = manifest_file_count(manifest) count_in_manifest = manifest_info[0] manifest_files = manifest_info[1] proceed = 'y' if proceed == 'y': if source_count != count_in_manifest: print 'checking which files are different' for i in file_list: if i not in manifest_files: print i, 'is present in your source directory but not in the source manifest' for i in manifest_files: if i not in file_list: print i, 'is present in manifest but is missing in your source files' print 'This manifest may be outdated as the number of files in your directory does not match the number of files in the manifest' print 'There are', source_count, 'files in your source directory', count_in_manifest, 'in the manifest' generate_log(log_name_source, 'EVENT = Existing source manifest check - Failure - The number of files in the source directory is not equal to the number of files in the source manifest ') sys.exit() return proceed, count_in_manifest, manifest_files
def make_destination_manifest( overwrite_destination_manifest, log_name_source, rootpos, destination_final_path, manifest_destination, destination ): ''' Um, write destination manifest ''' if overwrite_destination_manifest not in ('N', 'n'): if overwrite_destination_manifest == None: generate_log( log_name_source, 'EVENT = Generating destination manifest: status=started, eventType=message digest calculation, module=hashlib' ) else: generate_log( log_name_source, 'EVENT = Destination Manifest Overwrite - Destination manifest already exists - Overwriting.' ) print 'Generating destination manifest' if rootpos == 'y': files_in_manifest = make_manifest( destination_final_path, manifest_destination, destination ) generate_log( log_name_source, 'EVENT = Generating destination manifest: status=completed' ) else: files_in_manifest = make_manifest( destination_final_path, manifest_destination, destination ) generate_log( log_name_source, 'EVENT = Generating destination manifest: status=completed') else: generate_log( log_name_source, 'EVENT = File Transfer Overwrite - Destination directory already exists - Not Overwriting.' ) remove_bad_files(destination_final_path, log_name_source) return files_in_manifest
def check_extra_files(file1, file2, log_name_source): ''' Are there any extra files in the destination directory? ''' with open(file1, 'r') as file1_manifest: sourcelist = file1_manifest.readlines() with open(file2, 'r') as file2_manifest: destlist = file2_manifest.readlines() destlist_files = [] sourcelist_files = [] for dest_files in destlist: destlist_files.append(dest_files[32:]) for source_files in sourcelist: sourcelist_files.append(source_files[32:]) for i in destlist_files: if i not in sourcelist_files: print '%s is in your destination manifest but is not in the source manifest' % i.rstrip() generate_log( log_name_source, 'ERROR = %s is in your destination manifest but is not in the source manifest' % i.rstrip())
def make_sip(video_files): for filename in video_files: #loop all files in directory filenoext = os.path.splitext(filename)[0] # Generate new directory names metadata_dir = "%s/metadata" % filenoext log_dir = "%s/logs" % filenoext data_dir = "%s/objects" % filenoext # Actually create the directories. os.makedirs(metadata_dir) os.makedirs(data_dir) os.makedirs(log_dir) #Generate filenames for new files. inputxml = "%s/%s_mediainfo.xml" % ( metadata_dir, os.path.basename(filename) ) inputtracexml = "%s/%s_mediatrace.xml" % ( metadata_dir, os.path.basename(filename) ) fmd5 = "%s/%s.framemd5" % ( metadata_dir, os.path.basename(filename) ) log = "%s/%s_log.log" % (log_dir, filename) generate_log(log, 'Input = %s' % filename) fmd5_logfile = log_dir + '/%s_framemd5.log' % filename fmd5_env_dict = set_environment(fmd5_logfile) fmd5_command = [ 'ffmpeg', # Create decoded md5 checksums for every frame '-i', filename, '-report', '-f', 'framemd5', '-an', fmd5 ] print fmd5_command subprocess.call(fmd5_command, env=fmd5_env_dict) generate_log( log, 'makeffv1.py Framemd5 generation of output file completed' ) if os.path.basename(sys.argv[0]) == 'makeffv1.py': shutil.copy(sys.argv[0], log_dir) print 'Generating mediainfo xml of input file and saving it in %s' % inputxml make_mediainfo(inputxml, 'mediaxmlinput', filename) print 'Generating mediatrace xml of input file and saving it in %s' % inputtracexml make_mediatrace(inputtracexml, 'mediatracexmlinput', filename) source_parent_dir = os.path.dirname(os.path.abspath(filename)) manifest = '%s/%s_manifest.md5' % (source_parent_dir, filenoext) if os.path.isfile(filename): shutil.move(filename, data_dir) generate_log(log, 'dvsip.py DV file moved to %s' % data_dir) generate_log(log, 'dvsip.py MD5 manifest started') hashlib_manifest(filenoext, manifest, source_parent_dir)
def remove_bad_files(root_dir, log_name_source): ''' Stolen and adapted from Ben Fino-Radin. Removes annoying files. ''' print 'Checking if any .Ds_Stores/Thumbs.db or Desktop.ini files exist' rm_these = ['.DS_Store', 'Thumbs.db', 'desktop.ini'] for root, _, files in os.walk(root_dir): for name in files: path = os.path.join(root, name) for i in rm_these: if name == i: print '***********************' + 'removing: ' + path if not log_name_source == None: generate_log( log_name_source, 'EVENT = Unwanted file removal - %s was removed' % path ) try: os.remove(path) except OSError: print 'can\'t delete as source is read-only'
def parse_manifest(manifest, log_name_source): missing_files = 0 manifest_dict = {} os.chdir(os.path.dirname(manifest)) with open(manifest, 'rb') as manifest_object: manifest_list = manifest_object.readlines() for entries in manifest_list: checksum = entries.split(' ')[0] path = entries[34:].replace('\r', '').replace('\n', '') path = path.replace('\\', '/') if not os.path.isfile(path): ififuncs.generate_log( log_name_source, '%s is missing' % path ) print '%s is missing' % path missing_files += 1 elif os.path.isfile(path): manifest_dict[path] = checksum if missing_files > 0: print 'The number of missing files: %s' % missing_files ififuncs.generate_log( log_name_source, 'The number of missing files is: %s' % missing_files ) elif missing_files == 0: print 'All files present' ififuncs.generate_log( log_name_source, 'All files present' ) return manifest_dict, missing_files
def consolidate_manifests(path, directory, new_log_textfile): ''' Consolidates all manifests in the objects folder moves old manifests into logs renames manifest with uuid and updates paths in manifest textfile. ''' uuid = os.path.basename(path) objects_dir = os.path.join(path, directory) new_manifest_textfile = os.path.join( os.path.dirname(path), uuid + '_manifest.md5' ) collective_manifest = [] for manifest in os.listdir(objects_dir): if manifest.endswith('.md5'): if manifest[0] != '.': ififuncs.generate_log( new_log_textfile, 'EVENT = Manifest consolidation - Checksums from %s merged into %s' % (os.path.join(objects_dir, manifest), new_manifest_textfile) ) with open(os.path.join(objects_dir, manifest), 'r') as fo: manifest_lines = fo.readlines() for i in manifest_lines: # This is what appends the new path to existing paths. new_manifest_path = uuid + '/%s/' % directory + i[34:] collective_manifest.append( i[:32] + ' ' + new_manifest_path ) # Cut and paste old manifests into the log directory shutil.move( objects_dir + '/' + manifest, os.path.join(path, 'logs') ) ififuncs.generate_log( new_log_textfile, 'EVENT = Manifest movement - Manifest from %s to %s' % (objects_dir + '/' + manifest, os.path.join(path, 'logs')) ) with open(new_manifest_textfile, 'ab') as manifest_object: for checksums in collective_manifest: manifest_object.write(checksums) return new_manifest_textfile
def control_flow(manifest_sidecar, log_name_source, manifest, rootpos, args, source): if os.path.isfile(manifest_sidecar): print 'Manifest Sidecar exists - Source manifest Generation will be skipped.' generate_log( log_name_source, 'EVENT = Manifest sidecar exists - source manifest generation will be skipped' ) manifest = manifest_sidecar elif not os.path.isfile(manifest): try: print 'Generating source manifest' generate_log(log_name_source, 'EVENT = Generating source manifest: status=started, eventType=message digest calculation, module=hashlib') if rootpos == 'y': make_manifest( args.source, manifest, args.source ) else: make_manifest( source, manifest, os.path.dirname(source) ) generate_log(log_name_source, 'EVENT = Generating source manifest: status=completed') except OSError: print 'You do not have access to this directory. Perhaps it is read only, or the wrong file system\n' sys.exit() return manifest_sidecar, manifest, rootpos
def verify_copy(manifest, manifest_destination, log_name_source, overwrite_destination_manifest, files_in_manifest, destination_count, source_count): if filecmp.cmp(manifest, manifest_destination, shallow=False): print "Your files have reached their destination and the checksums match" generate_log( log_name_source, 'EVENT = File Transfer Judgement - Success, eventOutcome=pass' ) else: print "***********YOUR CHECKSUMS DO NOT MATCH*************" if overwrite_destination_manifest not in ('N', 'n'): generate_log( log_name_source, 'EVENT = File Transfer Outcome - Failure, eventOutcome=fail' ) print ' There are: \n %s files in your destination manifest \n' % files_in_manifest print ' %s files in your destination \n %s files at source' % ( destination_count, source_count ) diff_report(manifest, manifest_destination, log_name_source) check_extra_files(manifest, manifest_destination, log_name_source) generate_log(log_name_source, 'EVENT = File Transfer Failure Explanation - %s files in your destination, %s files at source' % (destination_count, source_count)) else: print ' %s files in your destination \n %s files at source' % ( destination_count, source_count )
def launch_mediaconch(log_name_source, user, mediaconch_xmlfile, manifest, full_path): ''' Run mediaconch on files. ''' ififuncs.generate_log( log_name_source, 'EVENT = ffv1mkvvalidate.py started' ) ififuncs.generate_log( log_name_source, 'agentName=%s' % user ) ififuncs.generate_log( log_name_source, 'eventDetail=ffv1mkvvalidate.py %s' % ififuncs.get_script_version('ffv1mkvvalidate.py') ) mediaconch_version = subprocess.check_output(['mediaconch', '-v']).rstrip() ififuncs.generate_log( log_name_source, 'agentName=mediaconch, agentversion=%s' % mediaconch_version ) if not os.path.isfile(mediaconch_xmlfile): ififuncs.make_mediaconch(full_path, mediaconch_xmlfile) ififuncs.manifest_update(manifest, mediaconch_xmlfile)
def copy_dir(): if _platform == "win32": subprocess.call(['robocopy',source, destination_final_path, '/E', '/XA:SH', '/XD', '.*', '/XD', '*System Volume Information*', '/XD', '$Recycle.bin', '/a-:SH', '/a+:R']) generate_log(log_name_source, 'EVENT = File Transfer - Windows O.S - Software=Robocopy') elif _platform == "darwin": # https://github.com/amiaopensource/ltopers/blob/master/writelto#L51 if rootpos == 'y': if not os.path.isdir(destination + '/' + dirname): os.makedirs(destination + '/' + dirname) cmd = ['rsync','-rtv', '--exclude=.*', '--exclude=.*/', '--stats','--progress', source, destination + '/' + dirname] else: cmd = ['rsync','-rtv', '--exclude=.*', '--exclude=.*/', '--stats','--progress', source, destination] generate_log(log_name_source, 'EVENT = File Transfer - OSX - Software=rsync') print cmd subprocess.call(cmd) elif _platform == "linux2": # https://github.com/amiaopensource/ltopers/blob/master/writelto#L51 cmd = [ 'cp','--preserve=mode,timestamps', '-nRv',source, destination_final_path] generate_log(log_name_source, 'EVENT = File Transfer - Linux- Software=cp') subprocess.call(cmd)
def main(): parser = make_parser() args = parser.parse_args() desktop_logs_dir = make_desktop_logs_dir() log_name_source_ = os.path.basename(args.input) + time.strftime("_%Y_%m_%dT%H_%M_%S") log_name_source = "%s/%s_fixity_validation.log" % (desktop_logs_dir, log_name_source_) ififuncs.generate_log( log_name_source, 'EVENT = validate.py started' ) ififuncs.generate_log( log_name_source, 'eventDetail=validate.py %s' % ififuncs.get_script_version('validate.py') ) ififuncs.generate_log( log_name_source, 'Command line arguments: %s' % args ) manifest = check_manifest(args.input, log_name_source) log_results(manifest, log_name_source, args)
def main(args_): # if multiple file are present, this script will treat them as a single # instantiation/representation and get aggregate metadata about the whole # package. For now, this will be a clumsy implementation - the first file # will provide most metadata. Things like duration/bitrate/filesize # will be calculated as a whole. # Although another way would be that every call is looped, and if # this could catch files that should not be in the package, eg. a 4:2:2 # file in a 4:2:0 package.. # yup - do it that way! args = parse_args(args_) all_files = ififuncs.recursive_file_list(args.input) silence = True audio_only = True if args.user: user = args.user else: user = ififuncs.get_user() acquisition_type = '' if args.acquisition_type: acquisition_type = ififuncs.get_acquisition_type(args.acquisition_type)[0] instantiationIdentif = '' for dirs in os.listdir(args.input): if ififuncs.validate_uuid4(dirs) is None: instantiationIdentif = dirs Accession_Number = get_accession_number(args.input) if args.reference: Reference_Number = args.reference.upper() else: Reference_Number = get_reference_number(args.input) if args.p: for root, _, filenames in os.walk(args.input): if os.path.basename(root) == 'metadata': metadata_dir = root elif os.path.basename(root) == 'logs': logs_dir = root csv_filename = os.path.join(metadata_dir, Accession_Number + '_%s_pbcore.csv' % Reference_Number) sipcreator_log = os.path.join( logs_dir, instantiationIdentif + '_sip_log.log' ) ififuncs.generate_log( sipcreator_log, 'EVENT = makepbcore.py started' ) ififuncs.generate_log( sipcreator_log, 'eventDetail=makepbcore.py %s' % ififuncs.get_script_version('makepbcore.py') ) ififuncs.generate_log( sipcreator_log, 'Command line arguments: %s' % args ) ififuncs.generate_log( sipcreator_log, 'EVENT = agentName=%s' % user ) else: csv_filename = 'blaa.csv' print((' - Metadata will be stored in %s' % csv_filename)) for filenames in os.listdir(args.input): if '_manifest.md5' in filenames: md5_manifest = os.path.join(args.input, filenames) elif'manifest-sha512.txt' in filenames: sha512_manifest = os.path.join(args.input, filenames) make_csv(csv_filename) ms = 0 FrameCount = 0 instantFileSize_byte = 0 instantFileSize_gigs = 0 scan_types = [] matrix_list = [] transfer_list = [] colour_primaries_list = [] color_spaces = [] chroma = [] frame_sizes = [] par_list = [] container_list = [] fps_list = [] sample_rate_list = [] track_count_list = [] interlace_list = [] compression_list = [] pix_fmt_list = [] audio_fmt_list = [] audio_codecid_list = [] audio_codec_list = [] au_bitdepth_list = [] video_codecid_list = [] video_codec_version_list = [] video_codec_profile_list = [] timecode_list = [] channels_list = [] for source in all_files: metadata = subprocess.check_output(['mediainfo', '--Output=PBCore2', source]) new_metadata = subprocess.check_output(['mediainfo', '--Output=XML', source]) root = etree.fromstring(metadata) new_root = etree.fromstring(new_metadata) print(((' - Analysing %s') % source)) pbcore_namespace = root.xpath('namespace-uri(.)') mediainfo_namespace = new_root.xpath('namespace-uri(.)') track_type = root.xpath('//ns:essenceTrackType', namespaces={'ns':pbcore_namespace}) new_track_type = new_root.xpath('//ns:track', namespaces={'ns':mediainfo_namespace}) if len(new_track_type) > 0: for track in new_track_type: if track.attrib['type'] == 'Video': audio_only = False essenceTrackEncodvid = ififuncs.get_metadata( "ns:Format", track, mediainfo_namespace ) #vcodec_attributes = get_attributes(track.getparent(), pbcore_namespace) #vcodec_attributes = 'TODO' video_codecid = ififuncs.get_metadata( "ns:CodecID", track, mediainfo_namespace ) video_codec_version = ififuncs.get_metadata( "ns:Format_Version", track, mediainfo_namespace ) video_codec_profile = ififuncs.get_metadata( "ns:Format_Profile", track, mediainfo_namespace ) video_codec_version_list.append(video_codec_version) video_codec_profile_list.append(video_codec_profile) elif track.attrib['type'] == 'Audio': silence = False essenceTrackEncod_au = ififuncs.get_metadata( "ns:Format", track, mediainfo_namespace ) audio_codec_list.append(essenceTrackEncod_au) #acodec_attributes = get_attributes(track.getparent(), pbcore_namespace) audio_codecid = ififuncs.get_metadata( "ns:CodecID", track, mediainfo_namespace ) essenceTrackSampling = ififuncs.get_mediainfo( 'samplerate', '--inform=Audio;%SamplingRate_String%', source ) sample_rate_list.append(essenceTrackSampling) essenceBitDepth_au = ififuncs.get_metadata( "ns:BitDepth", track, mediainfo_namespace ) audio_codecid_list.append(audio_codecid) au_bitdepth_list.append(essenceBitDepth_au) channels = ififuncs.get_metadata( "//ns:Channels", track, mediainfo_namespace ) channels_list.append(channels) if audio_only: essenceTrackEncodvid = 'n/a' video_codecid = 'n/a' video_codec_version = 'n/a' video_codec_profile = 'n/a' ScanType = ififuncs.get_metadata( "//ns:ScanType", new_root, mediainfo_namespace ) scan_types.append(ScanType) matrix_coefficients = ififuncs.get_metadata( "//ns:matrix_coefficients", new_root, mediainfo_namespace ) timecode_source,starting_timecode = get_timecode(pbcore_namespace, root, source) timecode_list.append(starting_timecode) matrix_list.append(matrix_coefficients) transfer_characteris = ififuncs.get_metadata( "//ns:transfer_characteristics", new_root, mediainfo_namespace ) transfer_list.append(transfer_characteris) colour_primaries = ififuncs.get_metadata( "//ns:colour_primaries", new_root, mediainfo_namespace ) colour_primaries_list.append(colour_primaries) try: if audio_only: FrameCount = 'n/a' print(FrameCount) else: FrameCount += int(ififuncs.get_metadata( "//ns:FrameCount", new_root, mediainfo_namespace )) except TypeError: # workaround for silent pic in DCP FrameCount = 'n/a' instantFileSize_byte += int(ififuncs.get_metadata( "//ns:FileSize", new_root, mediainfo_namespace )) instantDataRate = round(float(ififuncs.get_mediainfo( 'OverallBitRate', '--inform=General;%OverallBitRate%', source )) / 1000 / 1000, 2) instantTracks = ififuncs.get_number_of_tracks(source) track_count_list.append(instantTracks) ms += ififuncs.get_milliseconds(source) ColorSpace = ififuncs.get_metadata( "//ns:ColorSpace", new_root, mediainfo_namespace ) color_spaces.append(ColorSpace) ChromaSubsampling = get_metadata( "//ns:ChromaSubsampling", new_root, mediainfo_namespace ) chroma.append(ChromaSubsampling) instantMediaty = get_metadata( "//ns:instantiationMediaType", root, pbcore_namespace ) if audio_only: essenceFrameSize = 'n/a' else: essenceFrameSize = get_metadata( "//ns:essenceTrackFrameSize", root, pbcore_namespace ) frame_sizes.append(essenceFrameSize) PixelAspectRatio = ififuncs.get_metadata( "//ns:PixelAspectRatio", new_root, mediainfo_namespace ) par_list.append(PixelAspectRatio) general_root = new_root.xpath("//ns:track[@type='General']", namespaces={'ns':mediainfo_namespace})[0] instantiationStandar = ififuncs.get_metadata( "ns:Format", general_root, mediainfo_namespace ) container_list.append(instantiationStandar) essenceFrameRate = ififuncs.get_metadata( "//ns:FrameRate", new_root, mediainfo_namespace ) fps_list.append(essenceFrameRate) essenceAspectRatio = ififuncs.get_mediainfo( 'DAR', '--inform=Video;%DisplayAspectRatio_String%', source ) Interlacement = ififuncs.get_metadata( "//ns:ScanOrder", new_root, mediainfo_namespace ) # FFV1/MKV seems to have this scanorder metadata here rather than Interlacement # FFV1/MKV is the only example I've seen so far that behaves like this :| # It could be that Interlacement is set at a codec level for FFV1, but others are # declared at the container level.. if Interlacement == 'n/a': Interlacement = get_metadata( "//ns:essenceTrackAnnotation[@annotationType='ScanOrder']", root, pbcore_namespace ) interlace_list.append(Interlacement) Compression_Mode = ififuncs.get_metadata( "//ns:Compression_Mode", new_root, mediainfo_namespace ) colour_range = ififuncs.get_metadata( "//ns:colour_range", new_root, mediainfo_namespace ) # this needs to be clarified as it exists in general and codec format_version = ififuncs.get_metadata( "ns:Format_Version", general_root, mediainfo_namespace ) app_company_name = ififuncs.get_metadata( "//ns:Encoded_Application_CompanyName", new_root, mediainfo_namespace ) app_name = ififuncs.get_metadata( "//ns:Encoded_Application_Name", new_root, mediainfo_namespace ) app_version = ififuncs.get_metadata( "//ns:Encoded_Application_Version", new_root, mediainfo_namespace ) library_name = ififuncs.get_metadata( "//ns:Encoded_Library_Name", new_root, mediainfo_namespace ) if library_name == 'n/a': library_name = ififuncs.get_metadata( "//ns:Encoded_Library", general_root, mediainfo_namespace ) library_version = ififuncs.get_metadata( "//ns:Encoded_Library_Version", new_root, mediainfo_namespace ) compression_list.append(Compression_Mode) instantiationDate_mo = get_metadata( "//ns:instantiationDate[@dateType='file modification']", root, pbcore_namespace ) instantDate_other = 'n/a' instantDate_type = 'n/a' pix_fmt = ififuncs.get_ffmpeg_fmt(source, 'video') pix_fmt_list.append(pix_fmt) audio_fmt = ififuncs.get_ffmpeg_fmt(source, 'audio') audio_fmt_list.append(audio_fmt) essenceBitDepth_vid = ififuncs.get_mediainfo( 'duration', '--inform=Video;%BitDepth%', source ) if silence: audio_codecid = 'n/a' essenceBitDepth_au = 'n/a' essenceTrackEncod_au = 'n/a' essenceTrackSampling = 'n/a' channels = 'n/a' ''' video_codecid = vcodec_attributes['ref'] video_codecid_list.append(video_codecid) try: video_codec_version = vcodec_attributes['version'] except KeyError: video_codec_version = 'n/a' try: video_codec_profile = vcodec_attributes['annotation'][8:] except KeyError: video_codec_profile = 'n/a' ''' metadata_error = '' metadata_list = [ scan_types, matrix_list, transfer_list, colour_primaries_list, color_spaces, chroma, frame_sizes, par_list, container_list, fps_list, sample_rate_list, track_count_list, interlace_list, compression_list, pix_fmt_list, audio_fmt_list, audio_codecid_list, audio_codec_list, au_bitdepth_list, video_codecid_list, video_codec_version_list, video_codec_profile_list, channels_list, timecode_list ] for i in metadata_list: if len(set(i)) > 1: metadata_error += 'WARNING - Your metadata values are not the same for all files - but this could be a false positive if dealing with atomised audio and video as with DCP: %s\n' % set(i) print(metadata_error) if args.p: ififuncs.generate_log( sipcreator_log, 'EVENT = Metadata mismatch - Your metadata values are not the same for all files - but this could be a false positive if dealing with atomised audio and video as with DCP: %s' % set(i) ) tc = ififuncs.convert_millis(ms) instantiationDuratio = ififuncs.convert_timecode(25, tc) if args.donor: Donor = args.donor else: Donor = '' Edited_By = user Date_Created = '' Date_Last_Modified = '' Film_Or_Tape = 'Digital AV Object' Date_Of_Donation = '' reproduction_creator = '' if args.acquisition_type: if acquisition_type == 'Reproduction': Date_Of_Donation = instantiationDate_mo.split('T')[0] # if a reproduction, then there's no Donor/transfer of title. Donor = 'n/a' else: Date_Of_Donation = args.donation_date Habitat = '' backup_habitat = '' Type_Of_Deposit = acquisition_type if args.depositor_reference: Depositor_Reference = args.depositor_reference else: Depositor_Reference = '' Master_Viewing = 'Preservation Object' Language_Version = '' Condition_Rating = '' Companion_Elements = '' TTape_Origin = args.parent EditedNew = user FIO = 'In' CollectionTitle = '' Created_By = user instantTimeStart = 'n/a' instantFileSize_gigs = round( float(instantFileSize_byte) / 1024 / 1024 / 1024, 3 ) instantColors = 'n/a' instantLanguage = 'n/a' instantAltMo = 'n/a' instantiationChanCon = 'n/a' ''' no idea why these are here colour_range = colour_range format_version = format_version ''' TimeCode_FirstFrame = process_mixed_values(timecode_list) pix_fmt = process_mixed_values(pix_fmt_list) TimeCode_Source = timecode_source reproduction_reason = '' dig_object_descrip = ififuncs.get_digital_object_descriptor(args.input) dcp_check = ififuncs.find_cpl(args.input) if dcp_check is not None: essenceFrameSize, ChromaSubsampling, ColorSpace, FrameCount, essenceAspectRatio, instantiationDuratio, PixelAspectRatio, ScanType, dig_object_descrip, instantTracks, instantDataRate, essenceBitDepth_vid = check_dcp(dcp_check) ififuncs.append_csv(csv_filename, [ Reference_Number, Donor, Edited_By, Date_Created, Date_Last_Modified, Film_Or_Tape, Date_Of_Donation, Accession_Number, Habitat, backup_habitat, TTape_Origin, Type_Of_Deposit, Depositor_Reference, Master_Viewing, Language_Version, Condition_Rating, Companion_Elements, EditedNew, FIO, CollectionTitle, Created_By, instantiationIdentif, instantDate_other, instantDate_type, instantiationDate_mo, instantiationStandar, instantMediaty, instantFileSize_byte, instantFileSize_gigs, instantTimeStart, instantDataRate, instantTracks, instantColors, instantLanguage, instantAltMo, essenceTrackEncodvid, essenceFrameRate, essenceTrackSampling, essenceBitDepth_vid, essenceFrameSize, essenceAspectRatio, essenceTrackEncod_au, essenceBitDepth_au, instantiationDuratio, instantiationChanCon, PixelAspectRatio, FrameCount, ColorSpace, ChromaSubsampling, ScanType, Interlacement, Compression_Mode, colour_primaries, transfer_characteris, matrix_coefficients, pix_fmt, audio_fmt, audio_codecid, video_codecid, video_codec_version, video_codec_profile, channels, colour_range, format_version, TimeCode_FirstFrame, TimeCode_Source, app_company_name, app_name, app_version, library_name, library_version, reproduction_creator, reproduction_reason, dig_object_descrip, ]) if args.p: ififuncs.generate_log( sipcreator_log, 'EVENT = Metadata extraction - eventDetail=Technical record creation using PBCore, eventOutcome=%s, agentName=makepbcore' % (csv_filename)) ififuncs.generate_log( sipcreator_log, 'EVENT = makepbcore.py finished') ififuncs.checksum_replace(md5_manifest, sipcreator_log, 'md5') ififuncs.checksum_replace(sha512_manifest, sipcreator_log, 'sha512') ififuncs.manifest_update(md5_manifest, csv_filename) print((' - Updating %s with %s' % (md5_manifest, csv_filename))) ififuncs.sha512_update(sha512_manifest, csv_filename) print((' - Updating %s with %s' % (sha512_manifest, csv_filename))) print(metadata_error)
def main(args_): ''' Launch all the functions for creating an IFI SIP. ''' args = parse_args(args_) start = datetime.datetime.now() inputs = args.i print args if args.user: user = args.user else: user = ififuncs.get_user() if args.oe: if args.oe[:2] != 'oe': print 'First two characters must be \'oe\' and last four characters must be four digits' object_entry = ififuncs.get_object_entry() elif len(args.oe[2:]) != 4: print 'First two characters must be \'oe\' and last four characters must be four digits' object_entry = ififuncs.get_object_entry() elif not args.oe[2:].isdigit(): object_entry = ififuncs.get_object_entry() print 'First two characters must be \'oe\' and last four characters must be four digits' else: object_entry = args.oe else: object_entry = ififuncs.get_object_entry() sip_path = make_folder_path(os.path.join(args.o), args, object_entry) if args.u: if ififuncs.validate_uuid4(args.u) is None: uuid = args.u uuid_event = ( 'EVENT = eventType=Identifier assignement,' ' eventIdentifierType=UUID, value=%s, module=uuid.uuid4' ) % uuid else: print 'exiting due to invalid UUID' uuid_event = ( 'EVENT = exiting due to invalid UUID supplied on the commmand line: %s' % uuid ) uuid = False else: uuid = os.path.basename(sip_path) uuid_event = ( 'EVENT = eventType=Identifier assignement,' ' eventIdentifierType=UUID, value=%s, module=uuid.uuid4' ) % uuid new_log_textfile = os.path.join(sip_path, 'logs' + '/' + uuid + '_sip_log.log') ififuncs.generate_log( new_log_textfile, 'EVENT = sipcreator.py started' ) ififuncs.generate_log( new_log_textfile, 'eventDetail=sipcreator.py %s' % ififuncs.get_script_version('sipcreator.py') ) ififuncs.generate_log( new_log_textfile, 'Command line arguments: %s' % args ) ififuncs.generate_log( new_log_textfile, 'EVENT = agentName=%s' % user ) ififuncs.generate_log( new_log_textfile, uuid_event ) if args.u is False: sys.exit() ififuncs.generate_log( new_log_textfile, 'EVENT = eventType=Identifier assignement,' ' eventIdentifierType=object entry, value=%s' % object_entry ) metadata_dir = os.path.join(sip_path, 'metadata') logs_dir = os.path.join(sip_path, 'logs') log_names = move_files(inputs, sip_path, args) get_metadata(sip_path, new_log_textfile) ififuncs.hashlib_manifest( metadata_dir, metadata_dir + '/metadata_manifest.md5', metadata_dir ) new_manifest_textfile = consolidate_manifests(sip_path, 'objects', new_log_textfile) consolidate_manifests(sip_path, 'metadata', new_log_textfile) ififuncs.hashlib_append( logs_dir, new_manifest_textfile, os.path.dirname(os.path.dirname(logs_dir)) ) ififuncs.sort_manifest(new_manifest_textfile) if not args.quiet: log_report(log_names) finish = datetime.datetime.now() print '\n', user, 'ran this script at %s and it finished at %s' % (start, finish) if args.d: content_title = create_content_title_text(args, sip_path) ififuncs.manifest_replace( new_manifest_textfile, os.path.join('objects', os.path.basename(args.i[0])).replace("\\", "/"), os.path.join('objects', content_title).replace("\\", "/") ) return new_log_textfile, new_manifest_textfile
def main(args_): ''' Launch all the functions for creating an IFI SIP. ''' args = parse_args(args_) start = datetime.datetime.now() inputs = args.i if args.d: try: import clairmeta except ImportError: print( 'Exiting as Clairmeta is not installed. If there is a case for not using clairmeta, please let me know and i can make a workaround' ) sys.exit() print args if args.user: user = args.user else: user = ififuncs.get_user() if not args.sc: if args.oe: if args.oe[:2] != 'oe': print 'First two characters must be \'oe\' and last four characters must be four digits' object_entry = ififuncs.get_object_entry() elif len(args.oe[2:]) not in range(4, 6): print 'First two characters must be \'oe\' and last four characters must be four digits' object_entry = ififuncs.get_object_entry() elif not args.oe[2:].isdigit(): object_entry = ififuncs.get_object_entry() print 'First two characters must be \'oe\' and last four characters must be four digits' else: object_entry = args.oe else: object_entry = ififuncs.get_object_entry() else: object_entry = 'not_applicable' sip_path = make_folder_path(os.path.join(args.o), args, object_entry) if args.u: if ififuncs.validate_uuid4(args.u) is None: uuid = args.u uuid_event = ( 'EVENT = eventType=Identifier assignement,' ' eventIdentifierType=UUID, value=%s, module=uuid.uuid4' ) % uuid else: print 'exiting due to invalid UUID' uuid_event = ( 'EVENT = exiting due to invalid UUID supplied on the commmand line: %s' % uuid) uuid = False else: uuid = os.path.basename(sip_path) uuid_event = ( 'EVENT = eventType=Identifier assignement,' ' eventIdentifierType=UUID, value=%s, module=uuid.uuid4') % uuid new_log_textfile = os.path.join(sip_path, 'logs' + '/' + uuid + '_sip_log.log') ififuncs.generate_log(new_log_textfile, 'EVENT = sipcreator.py started') ififuncs.generate_log( new_log_textfile, 'eventDetail=sipcreator.py %s' % ififuncs.get_script_version('sipcreator.py')) ififuncs.generate_log(new_log_textfile, 'Command line arguments: %s' % args) ififuncs.generate_log(new_log_textfile, 'EVENT = agentName=%s' % user) ififuncs.generate_log(new_log_textfile, uuid_event) if args.u is False: sys.exit() if not args.sc: ififuncs.generate_log( new_log_textfile, 'EVENT = eventType=Identifier assignement,' ' eventIdentifierType=object entry, value=%s' % object_entry) metadata_dir = os.path.join(sip_path, 'metadata') supplemental_dir = os.path.join(metadata_dir, 'supplemental') logs_dir = os.path.join(sip_path, 'logs') log_names = move_files(inputs, sip_path, args) get_metadata(sip_path, new_log_textfile) ififuncs.hashlib_manifest(metadata_dir, metadata_dir + '/metadata_manifest.md5', metadata_dir) if args.sc: normalise_objects_manifest(sip_path) new_manifest_textfile = consolidate_manifests(sip_path, 'objects', new_log_textfile) consolidate_manifests(sip_path, 'metadata', new_log_textfile) ififuncs.hashlib_append(logs_dir, new_manifest_textfile, os.path.dirname(os.path.dirname(logs_dir))) if args.supplement: os.makedirs(supplemental_dir) supplement_cmd = [ '-i', args.supplement, '-user', user, '-new_folder', supplemental_dir, os.path.dirname(sip_path), '-copy' ] package_update.main(supplement_cmd) if args.sc: print('Generating Digital Forensics XML') dfxml = accession.make_dfxml(args, sip_path, uuid) ififuncs.generate_log( new_log_textfile, 'EVENT = Metadata extraction - eventDetail=File system metadata extraction using Digital Forensics XML, eventOutcome=%s, agentName=makedfxml' % (dfxml)) ififuncs.manifest_update(new_manifest_textfile, dfxml) sha512_log = manifest.main([sip_path, '-sha512', '-s']) sha512_manifest = os.path.join(os.path.dirname(sip_path), uuid + '_manifest-sha512.txt') ififuncs.merge_logs_append(sha512_log, new_log_textfile, new_manifest_textfile) ififuncs.checksum_replace(sha512_manifest, new_log_textfile, 'sha512') os.remove(sha512_log) ififuncs.sort_manifest(new_manifest_textfile) if not args.quiet: log_report(log_names) finish = datetime.datetime.now() print '\n', user, 'ran this script at %s and it finished at %s' % (start, finish) if args.d: content_title = create_content_title_text(sip_path) new_dcp_path = os.path.join('objects', content_title).replace("\\", "/") absolute_dcp_path = os.path.join(sip_path, new_dcp_path) ififuncs.manifest_replace( new_manifest_textfile, os.path.join('objects', os.path.basename(args.i[0])).replace("\\", "/"), new_dcp_path) ''' a = subprocess.check_output(['python', '-m', 'clairmeta.cli', 'check', '-type', 'dcp', absolute_dcp_path], stderr=subprocess.STDOUT) b = subprocess.check_output(['python', '-m', 'clairmeta.cli', 'probe', '-type', 'dcp', '-format', 'xml', absolute_dcp_path], stderr=subprocess.STDOUT) ''' dcp = DCP(absolute_dcp_path) clairmeta_version = clairmeta.__version__ dcp_dict = dcp.parse() # json_str = json.dumps(dcp_dict , sort_keys=True, indent=2, separators=(',', ': ')) xml_str = dicttoxml.dicttoxml(dcp_dict, custom_root='ClairmetaProbe', ids=False, attr_type=False) xml_pretty = prettyprint_xml(xml_str) status, report = dcp.check() ififuncs.generate_log( new_log_textfile, 'EVENT = eventType=validation, eventOutcome=%s, eventDetail=%s, agentName=Clairmeta version %s' % (status, report, clairmeta_version)) clairmeta_xml = os.path.join(metadata_dir, '%s_clairmeta.xml' % content_title) ififuncs.generate_log( new_log_textfile, 'EVENT = Metadata extraction - eventDetail=Clairmeta DCP metadata extraction, eventOutcome=%s, agentName=Clairmeta version %s' % (clairmeta_xml, clairmeta_version)) with open(clairmeta_xml, 'w') as fo: fo.write(xml_pretty) ififuncs.checksum_replace(new_manifest_textfile, new_log_textfile, 'md5') ififuncs.manifest_update(new_manifest_textfile, clairmeta_xml) print status print report print '\n', user, 'ran this script at %s and it finished at %s' % ( start, finish) return new_log_textfile, new_manifest_textfile
def main(args_): ''' Launch all the functions for creating an IFI SIP. ''' args = parse_args(args_) start = datetime.datetime.now() inputs = args.i if args.d: try: import clairmeta clairmeta_version = clairmeta.__version__ except ImportError: print( 'Exiting as Clairmeta is not installed. If there is a case for not using clairmeta, please let me know and i can make a workaround' ) sys.exit() print(args) user = ififuncs.determine_user(args) object_entry = get_object_entry(args) sip_path = make_folder_path(os.path.join(args.o), args, object_entry) uuid, uuid_event = determine_uuid(args, sip_path) new_log_textfile = os.path.join(sip_path, 'logs' + '/' + uuid + '_sip_log.log') if args.d: content_title = create_content_title_text(sip_path, args) ififuncs.generate_log(new_log_textfile, 'EVENT = sipcreator.py started') ififuncs.generate_log( new_log_textfile, 'eventDetail=sipcreator.py %s' % ififuncs.get_script_version('sipcreator.py')) ififuncs.generate_log(new_log_textfile, 'Command line arguments: %s' % args) ififuncs.generate_log(new_log_textfile, 'EVENT = agentName=%s' % user) ififuncs.generate_log(new_log_textfile, uuid_event) if not args.sc: ififuncs.generate_log( new_log_textfile, 'EVENT = eventType=Identifier assignement,' ' eventIdentifierType=object entry, value=%s' % object_entry) metadata_dir = os.path.join(sip_path, 'metadata') supplemental_dir = os.path.join(metadata_dir, 'supplemental') logs_dir = os.path.join(sip_path, 'logs') log_names = move_files(inputs, sip_path, args) ififuncs.get_technical_metadata(sip_path, new_log_textfile) ififuncs.hashlib_manifest(metadata_dir, metadata_dir + '/metadata_manifest.md5', metadata_dir) if args.sc: normalise_objects_manifest(sip_path) new_manifest_textfile = consolidate_manifests(sip_path, 'objects', new_log_textfile) consolidate_manifests(sip_path, 'metadata', new_log_textfile) ififuncs.hashlib_append(logs_dir, new_manifest_textfile, os.path.dirname(os.path.dirname(logs_dir))) if args.supplement: os.makedirs(supplemental_dir) supplement_cmd = [ '-i', args.supplement, '-user', user, '-new_folder', supplemental_dir, os.path.dirname(sip_path), '-copy' ] package_update.main(supplement_cmd) if args.sc: print('Generating Digital Forensics XML') dfxml = accession.make_dfxml(args, sip_path, uuid) ififuncs.generate_log( new_log_textfile, 'EVENT = Metadata extraction - eventDetail=File system metadata extraction using Digital Forensics XML, eventOutcome=%s, agentName=makedfxml' % (dfxml)) ififuncs.manifest_update(new_manifest_textfile, dfxml) sha512_log = manifest.main([sip_path, '-sha512', '-s']) sha512_manifest = os.path.join(os.path.dirname(sip_path), uuid + '_manifest-sha512.txt') ififuncs.merge_logs_append(sha512_log, new_log_textfile, new_manifest_textfile) ififuncs.checksum_replace(sha512_manifest, new_log_textfile, 'sha512') os.remove(sha512_log) ififuncs.sort_manifest(new_manifest_textfile) if not args.quiet: log_report(log_names) finish = datetime.datetime.now() print(('\n', user, 'ran this script at %s and it finished at %s' % (start, finish))) if args.d: process_dcp(sip_path, content_title, args, new_manifest_textfile, new_log_textfile, metadata_dir, clairmeta_version) return new_log_textfile, new_manifest_textfile
def package(objects, object_entry, uuid, source_abspath, args, log_name_source, normalisation_tool, user, rawcooked_logfiles, multi_reeler, current_dir): ''' Package the MKV using sipcreator.py ''' sip_dir = os.path.join(args.o, os.path.join(object_entry, uuid)) inputxml, inputtracexml, dfxml = ififuncs.generate_mediainfo_xmls( source_abspath, args.o, uuid, log_name_source) source_manifest = os.path.join( args.o, os.path.basename(args.i) + '_manifest-md5.txt') ififuncs.generate_log( log_name_source, 'EVENT = message digest calculation, status=started, eventType=messageDigestCalculation, agentName=hashlib, eventDetail=MD5 checksum of source files' ) if multi_reeler: ififuncs.hashlib_manifest(args.i, source_manifest, args.i) else: ififuncs.hashlib_manifest(args.i, source_manifest, os.path.dirname(args.i)) ififuncs.generate_log( log_name_source, 'EVENT = message digest calculation, status=finished, eventType=messageDigestCalculation, agentName=hashlib, eventDetail=MD5 checksum of source files' ) ififuncs.generate_log( log_name_source, 'EVENT = losslessness verification, status=started, eventType=messageDigestCalculation, agentName=%s, eventDetail=Full reversibility of %s back to its original form, followed by checksum verification using %s ' % (normalisation_tool, objects, source_manifest)) if args.reversibility_dir: reversibility_dir = args.reversibility_dir else: reversibility_dir = args.o judgement = reversibility_verification(objects, source_manifest, reversibility_dir) ififuncs.generate_log( log_name_source, 'EVENT = losslessness verification, status=finished, eventType=messageDigestCalculation, agentName=%s, eventDetail=Full reversibilty of %s back to its original form, followed by checksum verification using %s , eventOutcome=%s' % (normalisation_tool, objects, source_manifest, judgement)) supplement_cmd = [ '-supplement', inputxml, inputtracexml, dfxml, source_manifest ] if args.supplement: supplement_cmd.extend(args.supplement) sipcreator_cmd = [ '-i', ] for i in objects: sipcreator_cmd.append(i) sipcreator_cmd += [ '-u', uuid, '-quiet', '-move', '-user', user, '-oe', object_entry, '-o', args.o ] sipcreator_cmd.extend(supplement_cmd) sipcreator_log, sipcreator_manifest = sipcreator.main(sipcreator_cmd) logs_dir = os.path.join(sip_dir, 'logs') for files in os.listdir(logs_dir): if files.endswith('.md5'): deletefiles.main([ '-i', os.path.join(logs_dir, files), '-uuid_path', sip_dir, '-user', user ]) for rawcooked_logfile in rawcooked_logfiles: rawcooked_logfile = rawcooked_logfile.replace('\'', '') shutil.move(rawcooked_logfile, logs_dir) ififuncs.manifest_update( sipcreator_manifest, os.path.join(logs_dir, os.path.basename(rawcooked_logfile))) metadata_dir = os.path.join(sip_dir, 'metadata') os.chdir(current_dir) shutil.copy( os.path.join(os.path.abspath(os.path.dirname(sys.argv[0])), 'film_scan_aip_documentation.txt'), metadata_dir) ififuncs.manifest_update( sipcreator_manifest, os.path.join(metadata_dir, 'film_scan_aip_documentation.txt')) os.remove(dfxml) os.remove(inputtracexml) os.remove(inputxml) return judgement, sipcreator_log, sipcreator_manifest
def copy_dir(source, destination_final_path, log_name_source, rootpos, destination, dirname, args): ''' Depending on which operating system is running the script, system tools are launched that copy paste source files to destination. ''' if sys.platform == "win32": if os.path.isfile(source): generate_log( log_name_source, 'EVENT = File Transfer, status=started, agentName=Windows, module=shutil.copy2' ) print('copying file with python/shutil') shutil.copy2(source, destination_final_path) else: subprocess.call([ 'robocopy', source, destination_final_path, '/E', '/XA:SH', '/XD', '.*', '/XD', '*System Volume Information*', '/XD', 'Seagate', '/XD', '$Recycle.bin', '/a-:SH', '/a+:R' ]) generate_log( log_name_source, 'EVENT = File Transfer, status=started, agentName=Windows O.S, agentName=Robocopy' ) elif sys.platform == "darwin": if args.l: cmd = [ 'gcp', '--preserve=mode,timestamps', '-nRv', source, destination_final_path ] generate_log( log_name_source, 'EVENT = File Transfer, status=started, agentName=OSX - agentName=gcp' ) subprocess.call(cmd) # https://github.com/amiaopensource/ltopers/blob/master/writelto#L51 else: if rootpos == 'y': if not os.path.isdir(destination + '/' + dirname): os.makedirs(destination + '/' + dirname) cmd = [ 'rsync', '-rtv', '--exclude=.*', '--exclude=.*/', '--stats', '--progress', source, destination + '/' + dirname ] else: cmd = [ 'rsync', '-rtv', '--exclude=.*', '--exclude=.*/', '--stats', '--progress', source, destination ] generate_log( log_name_source, 'EVENT = File Transfer, status=started, agentName=OSX, agentName=rsync' ) print(cmd) subprocess.call(cmd) elif 'linux' in sys.platform: # https://github.com/amiaopensource/ltopers/blob/master/writelto#L51 cmd = [ 'cp', '--preserve=mode,timestamps', '-nRv', source, destination_final_path ] generate_log( log_name_source, 'EVENT = File Transfer, status=started, agentName=Linux, agentName=cp' ) subprocess.call(cmd) generate_log(log_name_source, 'EVENT = File Transfer, status=completed')
def setup(args_): ''' Sets a bunch of filename variables and parses command line. some examples: if manifest_sidecar = /home/kieranjol/fakeeeeee/fakeeeeee_manifest.md5 then manifes_root = /home/kieranjol/fakeeeeee_manifest.md5 ''' parser = argparse.ArgumentParser( description='Copy directory with checksum comparison' 'and manifest generation.Written by Kieran O\'Leary.') parser.add_argument( 'source', help='Input directory' ) parser.add_argument( 'destination', help='Destination directory' ) parser.add_argument( '-l', '-lto', action='store_true', help='use gcp instead of rsync on osx for SPEED on LTO' ) parser.add_argument( '-move', action='store_true', help='Move files instead of copying - much faster!' ) rootpos = '' dircheck = None args = parser.parse_args(args_) if os.path.isdir(args.source): dircheck = check_for_sip(args.source) if dircheck != None: if os.path.isdir(dircheck): source = check_for_sip(args.source) destination = os.path.join(args.destination, os.path.basename(args.source)) os.makedirs(destination) else: source = args.source destination = args.destination normpath = os.path.normpath(source) #is there any benefit to this over os.path.basename dirname = os.path.split(os.path.basename(source))[1] if dirname == '': rootpos = 'y' dirname = raw_input( 'What do you want your destination folder to be called?\n' ) relative_path = normpath.split(os.sep)[-1] # or hardcode destination_final_path = os.path.join(destination, dirname) manifest_destination = destination + '/%s_manifest.md5' % dirname if os.path.isfile(manifest_destination): print 'Destination manifest already exists' manifest_filename = '%s_manifest.md5' % dirname desktop_manifest_dir = make_desktop_manifest_dir() # manifest = desktop manifest, looks like this can get rewritten later. manifest = os.path.join( desktop_manifest_dir, manifest_filename ) manifest_sidecar = os.path.join( os.path.dirname(source), relative_path + '_manifest.md5' ) manifest_root = source + '/%s_manifest.md5' % os.path.basename(source) log_name_filename = dirname + time.strftime("_%Y_%m_%dT%H_%M_%S") desktop_logs_dir = make_desktop_logs_dir() log_name_source = "%s/%s.log" % (desktop_logs_dir, log_name_filename) generate_log(log_name_source, 'copyit.py started.') ififuncs.generate_log( log_name_source, 'eventDetail=copyit.py %s' % ififuncs.get_script_version('copyit.py')) generate_log(log_name_source, 'Source: %s' % source) generate_log(log_name_source, 'Destination: %s' % destination) return args, rootpos, manifest_sidecar, log_name_source, destination_final_path, manifest_root, manifest_destination, manifest, destination, dirname, desktop_manifest_dir
rootpos = 'y' dirname = raw_input('What do you want your destination folder to be called?\n') relative_path = normpath.split(os.sep)[-1] destination = args.destination # or hardcode destination_final_path = destination + '/%s' % dirname manifest_destination = destination + '/%s_manifest.md5' % dirname manifest_ = '/%s_manifest.md5' % dirname desktop_manifest_dir = make_desktop_manifest_dir() manifest = "%s/%s" % (desktop_manifest_dir, manifest_) manifest_sidecar = source_parent_dir + '/%s_manifest.md5' % relative_path manifest_root = source + '/%s_manifest.md5' % os.path.basename(source) log_name_source_ = dirname + time.strftime("_%Y_%m_%dT%H_%M_%S") desktop_logs_dir = make_desktop_logs_dir() log_name_source = "%s/%s.log" % (desktop_logs_dir, log_name_source_) log_name_destination = destination + '/%s_ifi_events_log.log' % dirname generate_log(log_name_source, 'move.py started.') generate_log(log_name_source, 'Source: %s' % source) generate_log(log_name_source, 'Destination: %s' % destination) manifest_generator = '' try: test_write_capabilities(destination) except OSError: print 'You cannot write to your destination!' generate_log(log_name_source, 'EVENT = I/O Test - Failure - No write access to destination directory.') sys.exit() overwrite_destination_manifest = check_overwrite(manifest_destination) overwrite_destination_dir = check_overwrite_dir(destination_final_path) remove_bad_files(source)
def main(args_): ''' Launch all the functions for creating an IFI SIP. ''' args = parse_args(args_) start = datetime.datetime.now() inputs = args.i for input in inputs: if ififuncs.check_av_or_doc(input) == 'av': ififuncs.check_existence(['mediainfo']) elif ififuncs.check_av_or_doc(input) == 'doc': ififuncs.check_existence(['sf', 'exiftool']) if args.d: try: import clairmeta clairmeta_version = clairmeta.__version__ except ImportError: print( 'Exiting as Clairmeta is not installed. If there is a case for not using clairmeta, please let me know and i can make a workaround' ) sys.exit() if args.zip: ififuncs.check_existence(['7za']) print(args) user = ififuncs.determine_user(args) object_entry = get_object_entry(args) sip_path = make_folder_path(os.path.join(args.o), args, object_entry) uuid, uuid_event = determine_uuid(args, sip_path) new_log_textfile = os.path.join(sip_path, 'logs' + '/' + uuid + '_sip_log.log') if args.d: content_title = create_content_title_text(sip_path, args) ififuncs.generate_log(new_log_textfile, 'EVENT = sipcreator.py started') ififuncs.generate_log( new_log_textfile, 'eventDetail=sipcreator.py %s' % ififuncs.get_script_version('sipcreator.py')) ififuncs.generate_log(new_log_textfile, 'Command line arguments: %s' % args) ififuncs.generate_log(new_log_textfile, 'EVENT = agentName=%s' % user) ififuncs.generate_log(new_log_textfile, uuid_event) if not args.sc: ififuncs.generate_log( new_log_textfile, 'EVENT = eventType=Identifier assignement,' ' eventIdentifierType=object entry, value=%s' % object_entry) metadata_dir = os.path.join(sip_path, 'metadata') supplemental_dir = os.path.join(metadata_dir, 'supplemental') logs_dir = os.path.join(sip_path, 'logs') if args.accession: accession_number = ififuncs.get_accession_number() reference_number = ififuncs.get_reference_number() parent = ififuncs.ask_question( 'What is the parent record? eg MV 1234. Enter n/a if this is a born digital acquisition with no parent.' ) donor = ififuncs.ask_question( 'Who is the source of acquisition, as appears on the donor agreement? This will not affect Reproductions.' ) reproduction_creator = ififuncs.ask_question( 'Who is the reproduction creator? This will not affect acquisitions. Enter n/a if not applicable' ) depositor_reference = ififuncs.ask_question( 'What is the donor/depositor number? This will not affect Reproductions.' ) acquisition_type = ififuncs.get_acquisition_type('') donation_date = ififuncs.ask_question( 'When was the donation date in DD/MM/YYYY format? Eg. 31/12/1999 - Unfortunately this is NOT using ISO 8601.' ) if args.zip: inputxml, inputtracexml, dfxml = ififuncs.generate_mediainfo_xmls( inputs[0], args.o, uuid, new_log_textfile) if args.manifest: shutil.copy( args.manifest, args.manifest.replace('_manifest.md5', '_manifest-md5.txt')) source_manifest = args.manifest.replace('_manifest.md5', '_manifest-md5.txt') else: source_manifest = os.path.join( args.o, os.path.basename(args.i[0]) + '_manifest-md5.txt') ififuncs.generate_log( new_log_textfile, 'EVENT = message digest calculation, status=started, eventType=messageDigestCalculation, agentName=hashlib, eventDetail=MD5 checksum of source files within ZIP' ) ififuncs.hashlib_manifest(args.i[0], source_manifest, os.path.dirname(args.i[0])) ififuncs.generate_log( new_log_textfile, 'EVENT = message digest calculation, status=finished, eventType=messageDigestCalculation, agentName=hashlib, eventDetail=MD5 checksum of source files within ZIP' ) ififuncs.generate_log( new_log_textfile, 'EVENT = packing, status=started, eventType=packing, agentName=makezip.py, eventDetail=Source object to be packed=%s' % inputs[0]) makezip_judgement, zip_file = makezip.main([ '-i', inputs[0], '-o', os.path.join(sip_path, 'objects'), '-basename', uuid + '.zip' ]) ififuncs.generate_log( new_log_textfile, 'EVENT = packing, status=finished, eventType=packing, agentName=makezip.py, eventDetail=Source object packed into=%s' % zip_file) if makezip_judgement is None: judgement = 'lossless' else: judgement = makezip_judgement ififuncs.generate_log( new_log_textfile, 'EVENT = losslessness verification, status=finished, eventType=messageDigestCalculation, agentName=makezip.py, eventDetail=embedded crc32 checksum validation, eventOutcome=%s' % judgement) ififuncs.generate_log( new_log_textfile, 'EVENT = losslessness verification, status=finished, eventType=messageDigestCalculation, agentName=makezip.py, eventDetail=embedded crc32 checksum validation, eventOutcome=%s' % judgement) else: log_names = move_files(inputs, sip_path, args, user) ififuncs.get_technical_metadata(sip_path, new_log_textfile) ififuncs.hashlib_manifest(metadata_dir, metadata_dir + '/metadata_manifest.md5', metadata_dir) if args.sc: normalise_objects_manifest(sip_path) new_manifest_textfile = consolidate_manifests(sip_path, 'objects', new_log_textfile) if args.zip: if zip_file.endswith('.001'): for split_archive in os.listdir(os.path.dirname(zip_file)): ififuncs.generate_log( new_log_textfile, 'EVENT = Message Digest Calculation, status=started, eventType=message digest calculation, eventDetail=%s module=hashlib' % split_archive) ififuncs.manifest_update( new_manifest_textfile, os.path.join(os.path.dirname(zip_file), split_archive)) ififuncs.generate_log( new_log_textfile, 'EVENT = Message Digest Calculation, status=finished, eventType=message digest calculation, eventDetail=%s module=hashlib' % split_archive) else: ififuncs.generate_log( new_log_textfile, 'EVENT = Message Digest Calculation, status=started, eventType=message digest calculation, eventDetail=%s module=hashlib' % zip_file) ififuncs.manifest_update(new_manifest_textfile, zip_file) ififuncs.generate_log( new_log_textfile, 'EVENT = Message Digest Calculation, status=finished, eventType=message digest calculation, eventDetail=%s module=hashlib' % zip_file) consolidate_manifests(sip_path, 'metadata', new_log_textfile) ififuncs.hashlib_append(logs_dir, new_manifest_textfile, os.path.dirname(os.path.dirname(logs_dir))) if args.supplement: os.makedirs(supplemental_dir) supplement_cmd = [ '-i', args.supplement, '-user', user, '-new_folder', supplemental_dir, os.path.dirname(sip_path), '-copy' ] package_update.main(supplement_cmd) if args.zip: os.makedirs(supplemental_dir) supplement_cmd = [ '-i', [inputxml, inputtracexml, dfxml, source_manifest], '-user', user, '-new_folder', supplemental_dir, os.path.dirname(sip_path), '-copy' ] package_update.main(supplement_cmd) if args.sc: print('Generating Digital Forensics XML') dfxml = accession.make_dfxml(args, sip_path, uuid) ififuncs.generate_log( new_log_textfile, 'EVENT = Metadata extraction - eventDetail=File system metadata extraction using Digital Forensics XML, eventOutcome=%s, agentName=makedfxml' % (dfxml)) ififuncs.manifest_update(new_manifest_textfile, dfxml) sha512_log = manifest.main([sip_path, '-sha512', '-s']) sha512_manifest = os.path.join(os.path.dirname(sip_path), uuid + '_manifest-sha512.txt') ififuncs.merge_logs_append(sha512_log, new_log_textfile, new_manifest_textfile) ififuncs.checksum_replace(sha512_manifest, new_log_textfile, 'sha512') os.remove(sha512_log) ififuncs.sort_manifest(new_manifest_textfile) if not args.quiet: if 'log_names' in locals(): log_report(log_names) finish = datetime.datetime.now() print('\n- %s ran this script at %s and it finished at %s' % (user, start, finish)) if args.d: process_dcp(sip_path, content_title, args, new_manifest_textfile, new_log_textfile, metadata_dir, clairmeta_version) if args.accession: register = accession.make_register() filmographic_dict = ififuncs.extract_metadata(args.filmo_csv)[0] for filmographic_record in filmographic_dict: if filmographic_record['Reference Number'].lower( ) == reference_number.lower(): if filmographic_record['Title'] == '': title = filmographic_record[ 'TitleSeries'] + '; ' + filmographic_record['EpisodeNo'] else: title = filmographic_record['Title'] oe_register = make_oe_register() ififuncs.append_csv( oe_register, (object_entry.upper()[:2] + '-' + object_entry[2:], donation_date, '1', '', title, donor, acquisition_type[1], accession_number, 'Representation of %s|Reproduction of %s' % (reference_number, parent), '')) accession_cmd = [ os.path.dirname(sip_path), '-user', user, '-force', '-number', accession_number, '-reference', reference_number, '-register', register, '-filmo_csv', args.filmo_csv, '-pbcore' ] if not parent.lower() == 'n/a': accession_cmd.extend(['-parent', parent]) accession_cmd.extend(['-donor', donor]) accession_cmd.extend(['-depositor_reference', depositor_reference]) accession_cmd.extend(['-acquisition_type', acquisition_type[2]]) accession_cmd.extend(['-donation_date', donation_date]) accession_cmd.extend(['-reproduction_creator', reproduction_creator]) print(accession_cmd) accession.main(accession_cmd) return new_log_textfile, new_manifest_textfile
def main(args_): ''' Launches the functions that prepare and execute the concatenation. ''' ififuncs.check_existence(['ffmpeg', 'mkvpropedit', 'mediainfo']) uuid = ififuncs.create_uuid() args = parse_args(args_) print(args) log_name_source = os.path.join( args.o, '%s_concat_log.log' % time.strftime("_%Y_%m_%dT%H_%M_%S")) ififuncs.generate_log(log_name_source, 'concat.py started.') if args.mov: container = 'mov' else: container = 'mkv' ififuncs.generate_log( log_name_source, 'eventDetail=concat.py %s' % ififuncs.get_script_version('concat.py')) ififuncs.generate_log(log_name_source, 'Command line arguments: %s' % args) if args.user: user = args.user else: user = ififuncs.get_user() if args.oe: if args.oe[:2] != 'oe': print( 'First two characters must be \'oe\' and last four characters must be four digits' ) object_entry = ififuncs.get_object_entry() elif len(args.oe[2:]) not in range(4, 6): print( 'First two characters must be \'oe\' and last four characters must be four digits' ) object_entry = ififuncs.get_object_entry() elif not args.oe[2:].isdigit(): object_entry = ififuncs.get_object_entry() print( 'First two characters must be \'oe\' and last four characters must be four digits' ) else: object_entry = args.oe else: object_entry = ififuncs.get_object_entry() ififuncs.generate_log(log_name_source, 'EVENT = agentName=%s' % user) source_uuid_check = '' if os.path.isfile(args.i[0]): source_uuid = ififuncs.get_source_uuid() elif os.path.isdir(args.i[0]): source_uuid_check = ififuncs.check_for_uuid(args) if source_uuid_check == False: source_uuid = ififuncs.get_source_uuid() else: source_uuid = source_uuid_check ififuncs.generate_log( log_name_source, 'Relationship, derivation, has source=%s' % source_uuid) video_files = args.i concat_file = ififuncs.get_temp_concat('concat_stuff') ififuncs.generate_log(log_name_source, 'concatenation file=%s' % concat_file) if args.r: video_files = recursive_file_list(video_files) video_files = ififuncs.sanitise_filenames(video_files) for source_files in video_files: ififuncs.generate_log(log_name_source, 'source_files = %s' % source_files) make_chapters(video_files) ififuncs.concat_textfile(video_files, concat_file) ififuncs.generate_log( log_name_source, 'EVENT = Concatenation, status=started, eventType=Creation, agentName=ffmpeg, eventDetail=Source media concatenated into a single file output=%s' % os.path.join(args.o, '%s.%s' % (uuid, container))) source_bitstream_md5, fmd5_logfile = ffmpeg_concat(concat_file, args, uuid, container) output_file = os.path.join(args.o, '%s.%s' % (uuid, container)) ififuncs.generate_log( log_name_source, 'EVENT = Concatenation, status=finished, eventType=Creation, agentName=ffmpeg, eventDetail=Source media concatenated into a single file output=%s' % os.path.join(args.o, '%s.%s' % (uuid, container))) ififuncs.generate_log( log_name_source, 'EVENT = losslessness verification, status=started, eventType=messageDigestCalculation, agentName=ffmpeg, eventDetail=MD5s of AV streams of output file generated for validation' ) validation_logfile = os.path.join(args.o, '%s_validation.log' % uuid).replace( '\\', '\\\\').replace(':', '\:') validation_env_dict = ififuncs.set_environment(validation_logfile) output_bitstream_md5 = subprocess.check_output( [ 'ffmpeg', '-report', '-i', output_file, '-f', 'md5', '-map', '0:v', '-map', '0:a?', '-c', 'copy', '-' ], env=validation_env_dict).rstrip() ififuncs.generate_log( log_name_source, 'EVENT = losslessness verification, status=finished, eventType=messageDigestCalculation, agentName=ffmpeg, eventDetail=MD5s of AV streams of output file generated for validation' ) if source_bitstream_md5 == output_bitstream_md5: print('process appears to be lossless') print(source_bitstream_md5, output_bitstream_md5) ififuncs.generate_log( log_name_source, 'EVENT = losslessness verification, eventOutcome=pass') else: print('something went wrong - not lossless!') print(source_bitstream_md5, output_bitstream_md5) ififuncs.generate_log( log_name_source, 'EVENT = losslessness verification, eventOutcome=fail') if args.nochapters != True: subprocess.call(['mkvpropedit', output_file, '-c', 'chapters.txt']) ififuncs.generate_log( log_name_source, 'EVENT = eventType=modification, agentName=mkvpropedit, eventDetail=Chapters added to file detailing start point of source clips.' ) ififuncs.concat_textfile(video_files, concat_file) with open(log_name_source, 'r') as concat_log: concat_lines = concat_log.readlines() if not args.no_sip: sipcreator_log, sipcreator_manifest = sipcreator.main([ '-i', output_file, '-u', uuid, '-oe', object_entry, '-user', user, '-o', args.o ]) shutil.move(fmd5_logfile, os.path.dirname(sipcreator_log)) shutil.move( validation_logfile.replace('\\\\', '\\').replace('\:', ':'), os.path.dirname(sipcreator_log)) logs_dir = os.path.dirname(sipcreator_log) ififuncs.manifest_update( sipcreator_manifest, os.path.join(logs_dir, os.path.basename(fmd5_logfile))) ififuncs.manifest_update( sipcreator_manifest, os.path.join(logs_dir, (os.path.basename( validation_logfile.replace('\\\\', '\\').replace('\:', ':'))))) ififuncs.merge_logs(log_name_source, sipcreator_log, sipcreator_manifest)
def get_metadata(path, new_log_textfile): ''' Recursively create mediainfos and mediatraces for AV files. This should probably go in ififuncs as it could be used by other scripts. ''' mediainfo_version = 'mediainfo' try: mediainfo_version = subprocess.check_output(['mediainfo', '--Version']).rstrip() except subprocess.CalledProcessError as grepexc: mediainfo_version = grepexc.output.rstrip().splitlines()[1] for root, _, filenames in os.walk(path): for av_file in filenames: if av_file.endswith( ('.mov', 'MP4', '.mp4', '.mkv', '.MXF', '.mxf', '.dv', '.DV')): if av_file[0] != '.': inputxml = "%s/%s_mediainfo.xml" % (os.path.join( path, 'metadata'), os.path.basename(av_file)) inputtracexml = "%s/%s_mediatrace.xml" % (os.path.join( path, 'metadata'), os.path.basename(av_file)) print 'Generating mediainfo xml of input file and saving it in %s' % inputxml ififuncs.make_mediainfo(inputxml, 'mediaxmlinput', os.path.join(root, av_file)) ififuncs.generate_log( new_log_textfile, 'EVENT = Metadata extraction - eventDetail=Technical metadata extraction via mediainfo, eventOutcome=%s, agentName=%s' % (inputxml, mediainfo_version)) print 'Generating mediatrace xml of input file and saving it in %s' % inputtracexml ififuncs.make_mediatrace(inputtracexml, 'mediatracexmlinput', os.path.join(root, av_file)) ififuncs.generate_log( new_log_textfile, 'EVENT = Metadata extraction - eventDetail=Mediatrace technical metadata extraction via mediainfo, eventOutcome=%s, agentName=%s' % (inputtracexml, mediainfo_version)) elif av_file.endswith( ('.tif', 'tiff', '.doc', '.txt', '.docx', '.pdf', '.jpg', '.jpeg', '.png', '.rtf', '.xml', '.odt')): blacklist = ('siegfried', 'exiftool', 'mediainfo', 'mediatrace') if av_file[0] != '.': if any(word in blacklist for word in av_file): exiftool_version = 'exiftool' try: exiftool_version = subprocess.check_output( ['exiftool', '-ver']) except subprocess.CalledProcessError as grepexc: exiftool_version = grepexc.output.rstrip( ).splitlines()[1] siegfried_version = 'siegfried' try: siegfried_version = subprocess.check_output( ['sf', '-version']) except subprocess.CalledProcessError as grepexc: siegfried_version = grepexc.output.rstrip( ).splitlines()[1] inputxml = "%s/%s_exiftool.xml" % (os.path.join( path, 'metadata'), os.path.basename(av_file)) inputtracexml = "%s/%s_siegfried.json" % (os.path.join( path, 'metadata'), os.path.basename(av_file)) ififuncs.make_siegfried(inputtracexml, os.path.join(root, av_file)) print 'Generating exiftool xml of input file and saving it in %s' % inputxml ififuncs.generate_log( new_log_textfile, 'EVENT = Metadata extraction - eventDetail=Technical metadata extraction via exiftool, eventOutcome=%s, agentName=%s' % (inputxml, exiftool_version)) print 'Generating mediatrace xml of input file and saving it in %s' % inputtracexml ififuncs.make_exiftool(inputxml, os.path.join(root, av_file)) ififuncs.generate_log( new_log_textfile, 'EVENT = Format identification - eventType=format identification, eventDetail=Format identification via PRONOM signatures using Siegfried, eventOutcome=%s, agentName=%s' % (inputtracexml, siegfried_version))
def main(args_): ''' Launch all the functions for creating an IFI SIP. ''' args = parse_args(args_) source = args.input sip_path = ififuncs.check_for_sip([source]) if sip_path is not None: oe_path = os.path.dirname(sip_path) uuid = os.path.basename(sip_path) sip_manifest = os.path.join( oe_path, uuid ) + '_manifest.md5' start = datetime.datetime.now() print args if args.user: user = args.user else: user = ififuncs.get_user() new_log_textfile = os.path.join(sip_path, 'logs' + '/' + uuid + '_sip_log.log') ififuncs.generate_log( new_log_textfile, 'EVENT = rearrange.py started' ) ififuncs.generate_log( new_log_textfile, 'eventDetail=rearrange.py %s' % ififuncs.get_script_version('rearrange.py') ) ififuncs.generate_log( new_log_textfile, 'Command line arguments: %s' % args ) ififuncs.generate_log( new_log_textfile, 'EVENT = agentName=%s' % user ) if not os.path.isdir(args.new_folder): os.makedirs(args.new_folder) for filename in args.i: # add test to see if it actually deleted - what if read only? shutil.move(filename, args.new_folder) print '%s has been moved into %s' % (filename, args.new_folder) ififuncs.generate_log( new_log_textfile, 'EVENT = eventType=file movement,' ' eventOutcomeDetailNote=%s has been moved into %s' ' agentName=shutil.move()' % (filename, args.new_folder) ) relative_filename = filename.replace(args.input + '/', '') relative_new_folder = args.new_folder.replace(args.input + '/', '') update_manifest( sip_manifest, relative_filename, os.path.join(relative_new_folder, os.path.basename(relative_filename)), new_log_textfile ) ififuncs.generate_log( new_log_textfile, 'EVENT = rearrange.py finished' ) ififuncs.checksum_replace(sip_manifest, new_log_textfile, 'md5') finish = datetime.datetime.now() print('\n- %s ran this script at %s and it finished at %s' % (user, start, finish))
def main(args_): ''' Launches the various functions that will accession a package ''' args = parse_args(args_) source = args.input uuid_directory = ififuncs.check_for_sip([source]) if uuid_directory is not None: oe_path = os.path.dirname(uuid_directory) oe_number = os.path.basename(oe_path) if args.user: user = args.user else: user = ififuncs.get_user() if args.number: if args.number[:3] != 'aaa': print 'First three characters must be \'aaa\' and last four characters must be four digits' accession_number = ififuncs.get_accession_number() elif len(args.number[3:]) != 4: accession_number = ififuncs.get_accession_number() print 'First three characters must be \'aaa\' and last four characters must be four digits' elif not args.number[3:].isdigit(): accession_number = ififuncs.get_accession_number() print 'First three characters must be \'aaa\' and last four characters must be four digits' else: accession_number = args.number else: accession_number = ififuncs.get_accession_number() if args.pbcore: if args.reference: Reference_Number = args.reference.upper() else: Reference_Number = ififuncs.get_reference_number() accession_path = os.path.join(os.path.dirname(oe_path), accession_number) uuid = os.path.basename(uuid_directory) new_uuid_path = os.path.join(accession_path, uuid) logs_dir = os.path.join(new_uuid_path, 'logs') sipcreator_log = os.path.join(logs_dir, uuid) + '_sip_log.log' if args.force: proceed = 'Y' else: proceed = ififuncs.ask_yes_no('Do you want to rename %s with %s' % (oe_number, accession_number)) if proceed == 'Y': os.rename(oe_path, accession_path) if args.register: register = args.register else: register = make_register() ififuncs.append_csv(register, (oe_number.upper()[:2] + '-' + oe_number[2:6], accession_number, '', '', '', '', '')) ififuncs.generate_log(sipcreator_log, 'EVENT = accession.py started') ififuncs.generate_log( sipcreator_log, 'eventDetail=accession.py %s' % ififuncs.get_script_version('accession.py')) ififuncs.generate_log(sipcreator_log, 'Command line arguments: %s' % args) ififuncs.generate_log(sipcreator_log, 'EVENT = agentName=%s' % user) ififuncs.generate_log( sipcreator_log, 'EVENT = eventType=Identifier assignment,' ' eventIdentifierType=accession number, value=%s' % accession_number) ififuncs.generate_log( sipcreator_log, 'EVENT = eventType=accession,' ' eventIdentifierType=accession number, value=%s' % accession_number) sip_manifest = os.path.join(accession_path, uuid) + '_manifest.md5' sha512_log = manifest.main([new_uuid_path, '-sha512', '-s']) sha512_manifest = os.path.join(os.path.dirname(new_uuid_path), uuid + '_manifest-sha512.txt') ififuncs.merge_logs_append(sha512_log, sipcreator_log, sip_manifest) os.remove(sha512_log) print('Generating Digital Forensics XML') dfxml = make_dfxml(args, new_uuid_path, uuid) ififuncs.generate_log( sipcreator_log, 'EVENT = Metadata extraction - eventDetail=File system metadata extraction using Digital Forensics XML, eventOutcome=%s, agentName=makedfxml' % (dfxml)) ififuncs.generate_log(sipcreator_log, 'EVENT = accession.py finished') ififuncs.checksum_replace(sip_manifest, sipcreator_log, 'md5') ififuncs.checksum_replace(sha512_manifest, sipcreator_log, 'sha512') ififuncs.manifest_update(sip_manifest, dfxml) ififuncs.sha512_update(sha512_manifest, dfxml) if args.pbcore: makepbcore.main([ accession_path, '-p', '-user', user, '-reference', Reference_Number ]) else: print 'not a valid package. The input should include a package that has been through Object Entry'
def main(args_): # if multiple file are present, this script will treat them as a single # instantiation/representation and get aggregate metadata about the whole # package. For now, this will be a clumsy implementation - the first file # will provide most metadata. Things like duration/bitrate/filesize # will be calculated as a whole. # Although another way would be that every call is looped, and if # this could catch files that should not be in the package, eg. a 4:2:2 # file in a 4:2:0 package.. # yup - do it that way! args = parse_args(args_) all_files = ififuncs.recursive_file_list(args.input) silence = True if args.user: user = args.user else: user = ififuncs.get_user() for dirs in os.listdir(args.input): if ififuncs.validate_uuid4(dirs) is None: instantiationIdentif = dirs Accession_Number = get_accession_number(args.input) if args.reference: Reference_Number = args.reference.upper() else: Reference_Number = get_reference_number(args.input) if args.p: for root, _, filenames in os.walk(args.input): if os.path.basename(root) == 'metadata': metadata_dir = root elif os.path.basename(root) == 'logs': logs_dir = root csv_filename = os.path.join(metadata_dir, Accession_Number + '_pbcore.csv') sipcreator_log = os.path.join(logs_dir, instantiationIdentif + '_sip_log.log') ififuncs.generate_log(sipcreator_log, 'EVENT = makepbcore.py started') ififuncs.generate_log( sipcreator_log, 'eventDetail=makepbcore.py %s' % ififuncs.get_script_version('makepbcore.py')) ififuncs.generate_log(sipcreator_log, 'Command line arguments: %s' % args) ififuncs.generate_log(sipcreator_log, 'EVENT = agentName=%s' % user) else: csv_filename = 'blaa.csv' print ' - Metadata will be stored in %s' % csv_filename for filenames in os.listdir(args.input): if '_manifest.md5' in filenames: md5_manifest = os.path.join(args.input, filenames) elif 'manifest-sha512.txt' in filenames: sha512_manifest = os.path.join(args.input, filenames) make_csv(csv_filename) ms = 0 FrameCount = 0 instantFileSize = 0 instantFileSize_gigs = 0 scan_types = [] matrix_list = [] transfer_list = [] colour_primaries_list = [] color_spaces = [] chroma = [] frame_sizes = [] par_list = [] container_list = [] fps_list = [] sample_rate_list = [] track_count_list = [] interlace_list = [] compression_list = [] pix_fmt_list = [] audio_fmt_list = [] audio_codecid_list = [] audio_codec_list = [] au_bitdepth_list = [] video_codecid_list = [] video_codec_version_list = [] video_codec_profile_list = [] for source in all_files: metadata = subprocess.check_output( ['mediainfo', '--Output=PBCore2', source]) root = etree.fromstring(metadata) print(' - Analysing %s') % source pbcore_namespace = root.xpath('namespace-uri(.)') track_type = root.xpath('//ns:essenceTrackType', namespaces={'ns': pbcore_namespace}) if len(track_type) > 0: for track in track_type: if track.text == 'Video': essenceTrackEncodvid = get_metadata( "ns:essenceTrackEncoding", track.getparent(), pbcore_namespace) vcodec_attributes = get_attributes(track.getparent(), pbcore_namespace) elif track.text == 'Audio': silence = False essenceTrackEncod_au = get_metadata( "//ns:essenceTrackEncoding", track.getparent(), pbcore_namespace) audio_codec_list.append(essenceTrackEncod_au) acodec_attributes = get_attributes(track.getparent(), pbcore_namespace) audio_codecid = acodec_attributes['ref'] essenceTrackSampling = ififuncs.get_mediainfo( 'samplerate', '--inform=Audio;%SamplingRate_String%', source) essenceBitDepth_au = get_metadata( "//ns:essenceTrackBitDepth", root, pbcore_namespace) audio_codecid_list.append(audio_codecid) au_bitdepth_list.append(essenceBitDepth_au) ScanType = get_metadata( "//ns:essenceTrackAnnotation[@annotationType='ScanType']", root, pbcore_namespace) scan_types.append(ScanType) matrix_coefficients = get_metadata( "//ns:essenceTrackAnnotation[@annotationType='matrix_coefficients']", root, pbcore_namespace) matrix_list.append(matrix_coefficients) transfer_characteris = get_metadata( "//ns:essenceTrackAnnotation[@annotationType='transfer_characteristics']", root, pbcore_namespace) transfer_list.append(transfer_characteris) colour_primaries = get_metadata( "//ns:essenceTrackAnnotation[@annotationType='color_primaries']", root, pbcore_namespace) colour_primaries_list.append(colour_primaries) FrameCount += int( get_metadata( "//ns:essenceTrackAnnotation[@annotationType='FrameCount']", root, pbcore_namespace)) instantFileSize += int( get_metadata("//ns:instantiationFileSize", root, pbcore_namespace)) instantDataRate = round( float( ififuncs.get_mediainfo('OverallBitRate', '--inform=General;%OverallBitRate%', source)) / 1000 / 1000, 2) instantTracks = ififuncs.get_number_of_tracks(source) track_count_list.append(instantTracks) ms += ififuncs.get_milliseconds(source) ColorSpace = get_metadata( "//ns:essenceTrackAnnotation[@annotationType='ColorSpace']", root, pbcore_namespace) color_spaces.append(ColorSpace) ChromaSubsampling = get_metadata( "//ns:essenceTrackAnnotation[@annotationType='ChromaSubsampling']", root, pbcore_namespace) chroma.append(ChromaSubsampling) instantMediaty = get_metadata("//ns:instantiationMediaType", root, pbcore_namespace) essenceFrameSize = get_metadata("//ns:essenceTrackFrameSize", root, pbcore_namespace) frame_sizes.append(essenceFrameSize) PixelAspectRatio = get_metadata( "//ns:essenceTrackAnnotation[@annotationType='PixelAspectRatio']", root, pbcore_namespace) par_list.append(PixelAspectRatio) instantiationStandar = get_metadata( "//ns:instantiationAnnotation[@annotationType='Format']", root, pbcore_namespace) container_list.append(instantiationStandar) essenceFrameRate = get_metadata("//ns:essenceTrackFrameRate", root, pbcore_namespace) fps_list.append(essenceFrameRate) essenceAspectRatio = ififuncs.get_mediainfo( 'DAR', '--inform=Video;%DisplayAspectRatio_String%', source) sample_rate_list.append(essenceTrackSampling) Interlacement = get_metadata( "//ns:instantiationAnnotation[@annotationType='Interlacement']", root, pbcore_namespace) interlace_list.append(Interlacement) Compression_Mode = get_metadata( "//ns:instantiationAnnotation[@annotationType='Compression_Mode']", root, pbcore_namespace) compression_list.append(Compression_Mode) instantiationDate_modified = get_metadata( "//ns:instantiationDate[@dateType='file modification']", root, pbcore_namespace) pix_fmt = ififuncs.get_ffmpeg_fmt(source, 'video') pix_fmt_list.append(pix_fmt) audio_fmt = ififuncs.get_ffmpeg_fmt(source, 'audio') audio_fmt_list.append(audio_fmt) if silence: audio_codecid = 'n/a' essenceBitDepth_au = 'n/a' essenceTrackEncod_au = 'n/a' video_codecid = vcodec_attributes['ref'] video_codecid_list.append(video_codecid) try: video_codec_version = vcodec_attributes['version'] except KeyError: video_codec_version = 'n/a' try: video_codec_profile = vcodec_attributes['annotation'][8:] except KeyError: video_codec_profile = 'n/a' video_codec_version_list.append(video_codec_version) video_codec_profile_list.append(video_codec_profile) metadata_error = '' metadata_list = [ scan_types, matrix_list, transfer_list, colour_primaries_list, color_spaces, chroma, frame_sizes, par_list, container_list, fps_list, sample_rate_list, track_count_list, interlace_list, compression_list, pix_fmt_list, audio_fmt_list, audio_codecid_list, audio_codec_list, au_bitdepth_list, video_codecid_list, video_codec_version_list, video_codec_profile_list ] for i in metadata_list: if len(set(i)) > 1: metadata_error += 'WARNING - Your metadata values are not the same for all files: %s\n' % set( i) print metadata_error if args.p: ififuncs.generate_log( sipcreator_log, 'EVENT = Metadata mismatch - Your metadata values are not the same for all files: %s' % set(i)) tc = ififuncs.convert_millis(ms) instantiationDuratio = ififuncs.convert_timecode(25, tc) Donor = '' Edited_By = user Date_Created = '' Date_Last_Modified = '' Film_Or_Tape = 'Digital File' Date_Of_Donation = '' Habitat = '' Type_Of_Deposit = '' Depositor_Reference = '' Master_Viewing = 'Preservation Master' Language_Version = '' Condition_Rating = '' Companion_Elements = '' EditedNew = user FIO = 'In' CollectionTitle = '' Created_By = user instantiationDimensi = '' instantiationLocatio = '' instantTimeStart = '' instantFileSize_gigs = round( float(instantFileSize) / 1024 / 1024 / 1024, 3) instantColors = '' instantLanguage = '' instantAltMo = 'n/a' essenceBitDepth_vid = ififuncs.get_mediainfo('duration', '--inform=Video;%BitDepth%', source) instantiationChanCon = '' ififuncs.append_csv(csv_filename, [ Reference_Number, Donor, Edited_By, Date_Created, Date_Last_Modified, Film_Or_Tape, Date_Of_Donation, Accession_Number, Habitat, Type_Of_Deposit, Depositor_Reference, Master_Viewing, Language_Version, Condition_Rating, Companion_Elements, EditedNew, FIO, CollectionTitle, Created_By, instantiationIdentif, instantiationDate_modified, instantiationDimensi, instantiationStandar, instantiationLocatio, instantMediaty, instantFileSize, instantFileSize_gigs, instantTimeStart, instantDataRate, instantTracks, instantColors, instantLanguage, instantAltMo, essenceTrackEncodvid, essenceFrameRate, essenceTrackSampling, essenceBitDepth_vid, essenceFrameSize, essenceAspectRatio, essenceTrackEncod_au, essenceBitDepth_au, instantiationDuratio, instantiationChanCon, PixelAspectRatio, FrameCount, ColorSpace, ChromaSubsampling, ScanType, Interlacement, Compression_Mode, colour_primaries, transfer_characteris, matrix_coefficients, pix_fmt, audio_fmt, audio_codecid, video_codecid, video_codec_version, video_codec_profile ]) if args.p: ififuncs.generate_log( sipcreator_log, 'EVENT = Metadata extraction - eventDetail=Technical record creation using PBCore, eventOutcome=%s, agentName=makepbcore' % (csv_filename)) ififuncs.generate_log(sipcreator_log, 'EVENT = makepbcore.py finished') ififuncs.checksum_replace(md5_manifest, sipcreator_log, 'md5') ififuncs.checksum_replace(sha512_manifest, sipcreator_log, 'sha512') ififuncs.manifest_update(md5_manifest, csv_filename) print ' - Updating %s with %s' % (md5_manifest, csv_filename) ififuncs.sha512_update(sha512_manifest, csv_filename) print ' - Updating %s with %s' % (sha512_manifest, csv_filename) print metadata_error
def main(args_): ''' Launches the functions that will safely copy and paste your files. ''' manifest_temp = '--' # add two characters so that I can slice for manifest_temp[1] later. dircheck = None args, rootpos, manifest_sidecar, log_name_source, destination_final_path, manifest_root, manifest_destination, manifest, destination, dirname, desktop_manifest_dir = setup( args_) if os.path.isdir(args.source): dircheck = check_for_sip(args.source) if dircheck != None: if os.path.isdir(dircheck): source = check_for_sip(args.source) else: source = os.path.abspath(args.source) destination = args.destination overwrite_destination_manifest, overwrite_destination_dir = overwrite_check( destination, log_name_source, destination_final_path, manifest_destination) remove_bad_files(source, log_name_source) source_count, file_list = ififuncs.count_stuff(source) manifest_existence(manifest_root, manifest_sidecar, manifest, source_count, file_list, log_name_source) manifest_sidecar, manifest, rootpos = control_flow(manifest_sidecar, log_name_source, manifest, rootpos, args, source) if overwrite_destination_dir not in ('N', 'n'): if overwrite_destination_dir != None: generate_log( log_name_source, 'EVENT = File Transfer Overwrite - Destination directory already exists - Overwriting.' ) if not args.move: copy_dir(source, destination_final_path, log_name_source, rootpos, destination, dirname, args) else: shutil.move(source, destination_final_path) else: generate_log( log_name_source, 'EVENT = File Transfer Overwrite - Destination directory already exists - Not Overwriting.' ) if args.justcopy: generate_log( log_name_source, 'EVENT = Exiting without destination manifest or verification due to the use of -justcopy' ) print( 'Exiting without destination manifest or verification due to the use of -justcopy' ) sys.exit() else: files_in_manifest = make_destination_manifest( overwrite_destination_manifest, log_name_source, rootpos, destination_final_path, manifest_destination, destination) destination_count = 0 # dear god do this better, this is dreadful code! for _, _, filenames in os.walk(destination_final_path): for _ in filenames: destination_count += 1 #works in windows at least if rootpos == 'y': manifest_temp = tempfile.mkstemp(dir=desktop_manifest_dir, suffix='.md5') os.close(manifest_temp[0]) # Needed for windows. with open(manifest, 'r') as fo: dest_manifest_list = fo.readlines() with open(manifest_temp[1], 'w') as temp_object: for i in dest_manifest_list: temp_object.write(i[:33] + ' ' + os.path.basename( os.path.dirname(destination_final_path)) + '/' + i[34:]) legacy_manifest = manifest manifest = manifest_temp[1] verify_copy(manifest, manifest_destination, log_name_source, overwrite_destination_manifest, files_in_manifest, destination_count, source_count) manifest_rename = manifest[:-4] + time.strftime( "_%Y_%m_%dT%H_%M_%S") + '.md5' if os.path.normpath(os.path.dirname(manifest)) == os.path.normpath( desktop_manifest_dir): os.rename(manifest, manifest_rename) shutil.move(manifest_rename, os.path.join(desktop_manifest_dir, 'old_manifests')) if rootpos == 'y': legacy_manifest_rename = legacy_manifest[:-4] + time.strftime( "_%Y_%m_%dT%H_%M_%S") + '.md5' os.rename(legacy_manifest, legacy_manifest_rename) shutil.move( legacy_manifest_rename, os.path.join(desktop_manifest_dir, 'old_manifests')) # hack to also copy the sha512 manifest :( # Stop the temp manifest from copying if not os.path.basename( manifest_temp[1]) == os.path.basename(manifest): sha512_manifest = manifest.replace('_manifest.md5', '_manifest-sha512.txt') if os.path.isfile(sha512_manifest): shutil.copy2(sha512_manifest, os.path.dirname(destination_final_path)) print(('%s has been copied to %s' % (sha512_manifest, os.path.dirname(destination_final_path)))) return log_name_source
def main(args_): ''' Launches the various functions that will accession a package ''' args = parse_args(args_) source = args.input uuid_directory = ififuncs.check_for_sip([source]) if uuid_directory is not None: oe_path = os.path.dirname(uuid_directory) oe_number = os.path.basename(oe_path) if args.user: user = args.user else: user = ififuncs.get_user() if args.number: if args.number[:3] != 'aaa': print( 'First three characters must be \'aaa\' and last four characters must be four digits' ) accession_number = ififuncs.get_accession_number() elif len(args.number[3:]) != 4: accession_number = ififuncs.get_accession_number() print( 'First three characters must be \'aaa\' and last four characters must be four digits' ) elif not args.number[3:].isdigit(): accession_number = ififuncs.get_accession_number() print( 'First three characters must be \'aaa\' and last four characters must be four digits' ) else: accession_number = args.number else: accession_number = ififuncs.get_accession_number() if args.reference: Reference_Number = args.reference.upper() else: Reference_Number = ififuncs.get_reference_number() if args.acquisition_type: acquisition_type = ififuncs.get_acquisition_type( args.acquisition_type) print(acquisition_type) accession_path = os.path.join(os.path.dirname(oe_path), accession_number) uuid = os.path.basename(uuid_directory) new_uuid_path = os.path.join(accession_path, uuid) logs_dir = os.path.join(new_uuid_path, 'logs') sipcreator_log = os.path.join(logs_dir, uuid) + '_sip_log.log' if args.force: proceed = 'Y' else: proceed = ififuncs.ask_yes_no('Do you want to rename %s with %s' % (oe_number, accession_number)) if proceed == 'Y': os.rename(oe_path, accession_path) if args.register: register = args.register else: register = make_register() ififuncs.append_csv(register, (oe_number.upper()[:2] + '-' + oe_number[2:], accession_number, '', '', '', '', '', '')) ififuncs.generate_log(sipcreator_log, 'EVENT = accession.py started') ififuncs.generate_log( sipcreator_log, 'eventDetail=accession.py %s' % ififuncs.get_script_version('accession.py')) ififuncs.generate_log(sipcreator_log, 'Command line arguments: %s' % args) ififuncs.generate_log(sipcreator_log, 'EVENT = agentName=%s' % user) ififuncs.generate_log( sipcreator_log, 'EVENT = eventType=Identifier assignment,' ' eventIdentifierType=accession number, value=%s' % accession_number) ififuncs.generate_log( sipcreator_log, 'EVENT = eventType=accession,' ' eventIdentifierType=accession number, value=%s' % accession_number) sip_manifest = os.path.join(accession_path, uuid) + '_manifest.md5' sha512_log = manifest.main([new_uuid_path, '-sha512', '-s']) sha512_manifest = os.path.join(os.path.dirname(new_uuid_path), uuid + '_manifest-sha512.txt') ififuncs.merge_logs_append(sha512_log, sipcreator_log, sip_manifest) os.remove(sha512_log) print('Generating Digital Forensics XML') dfxml_check = True try: dfxml = make_dfxml(args, new_uuid_path, uuid) ififuncs.generate_log( sipcreator_log, 'EVENT = Metadata extraction - eventDetail=File system metadata extraction using Digital Forensics XML, eventOutcome=%s, agentName=makedfxml' % (dfxml)) except UnicodeDecodeError: ififuncs.generate_log( sipcreator_log, 'EVENT = Metadata extraction - eventDetail=File system metadata extraction using Digital Forensics XML, eventOutcome=FAILURE due to UnicodeDecodeError, agentName=makedfxml' ) dfxml_check = False # this is inefficient. The script should not have to ask for reference # number twice if someone wants to insert the filmographic but do not # want to make the pbcore csv, perhaps because the latter already exists. if args.filmo_csv: metadata_dir = os.path.join(new_uuid_path, 'metadata') if '+' in Reference_Number: reference_list = Reference_Number.split('+') else: reference_list = [Reference_Number] for ref in reference_list: package_filmographic = os.path.join(metadata_dir, ref + '_filmographic.csv') insert_filmographic(args.filmo_csv, ref, package_filmographic) ififuncs.generate_log( sipcreator_log, 'EVENT = Metadata extraction - eventDetail=Filmographic descriptive metadata added to metadata folder, eventOutcome=%s, agentName=accession.py' % (package_filmographic)) ififuncs.manifest_update(sip_manifest, package_filmographic) ififuncs.sha512_update(sha512_manifest, package_filmographic) print( 'Filmographic descriptive metadata added to metadata folder' ) ififuncs.generate_log(sipcreator_log, 'EVENT = accession.py finished') ififuncs.checksum_replace(sip_manifest, sipcreator_log, 'md5') ififuncs.checksum_replace(sha512_manifest, sipcreator_log, 'sha512') if dfxml_check is True: ififuncs.manifest_update(sip_manifest, dfxml) ififuncs.sha512_update(sha512_manifest, dfxml) if args.pbcore: for ref in reference_list: makepbcore_cmd = [ accession_path, '-p', '-user', user, '-reference', ref ] if args.parent: makepbcore_cmd.extend(['-parent', args.parent]) if args.acquisition_type: makepbcore_cmd.extend( ['-acquisition_type', args.acquisition_type]) if args.donor: makepbcore_cmd.extend(['-donor', args.donor]) if args.donor: makepbcore_cmd.extend( ['-depositor_reference', args.depositor_reference]) if args.reproduction_creator: makepbcore_cmd.extend( ['-reproduction_creator', args.reproduction_creator]) if args.donation_date: makepbcore_cmd.extend( ['-donation_date', args.donation_date]) makepbcore.main(makepbcore_cmd) else: print( 'not a valid package. The input should include a package that has been through Object Entry' )
def setup(args_): ''' Sets a bunch of filename variables and parses command line. some examples: if manifest_sidecar = /home/kieranjol/fakeeeeee/fakeeeeee_manifest.md5 then manifes_root = /home/kieranjol/fakeeeeee_manifest.md5 ''' parser = argparse.ArgumentParser( description='Copy directory with checksum comparison' 'and manifest generation.Written by Kieran O\'Leary.') parser.add_argument('source', help='Input directory') parser.add_argument('destination', help='Destination directory') parser.add_argument( '-l', '-lto', action='store_true', help='use gcp instead of rsync on osx for SPEED on LTO') parser.add_argument('-move', action='store_true', help='Move files instead of copying - much faster!') parser.add_argument( '-justcopy', action='store_true', help='Do not generate destination manifest and verify integrity :(') parser.add_argument( '-y', action='store_true', help= 'Answers YES to the question: Not enough free space, would you like to continue?' ) rootpos = '' dircheck = None args = parser.parse_args(args_) if os.path.isdir(args.source): dircheck = check_for_sip(args.source) if dircheck != None: if os.path.isdir(dircheck): source = check_for_sip(args.source) destination = os.path.join(args.destination, os.path.basename(args.source)) os.makedirs(destination) else: source = os.path.abspath(args.source) destination = args.destination normpath = os.path.normpath(source) #is there any benefit to this over os.path.basename dirname = os.path.split(os.path.basename(source))[1] if dirname == '': rootpos = 'y' ''' dirname = raw_input( 'What do you want your destination folder to be called?\n' ) ''' relative_path = normpath.split(os.sep)[-1] # or hardcode destination_final_path = os.path.join(destination, dirname) if rootpos == 'y': manifest_destination = os.path.dirname( destination) + '/%s_manifest.md5' % os.path.basename(destination) else: manifest_destination = destination + '/%s_manifest.md5' % dirname if os.path.isfile(manifest_destination): print('Destination manifest already exists') if rootpos == 'y': manifest_filename = '%s_manifest.md5' % os.path.basename(destination) else: manifest_filename = '%s_manifest.md5' % dirname desktop_manifest_dir = make_desktop_manifest_dir() # manifest = desktop manifest, looks like this can get rewritten later. manifest = os.path.join(desktop_manifest_dir, manifest_filename) manifest_sidecar = os.path.join(os.path.dirname(source), relative_path + '_manifest.md5') manifest_root = source + '/%s_manifest.md5' % os.path.basename(source) log_name_filename = dirname + time.strftime("_%Y_%m_%dT%H_%M_%S") desktop_logs_dir = make_desktop_logs_dir() log_name_source = "%s/%s.log" % (desktop_logs_dir, log_name_filename) generate_log(log_name_source, 'copyit.py started.') ififuncs.generate_log( log_name_source, 'eventDetail=copyit.py %s' % ififuncs.get_script_version('copyit.py')) generate_log(log_name_source, 'Source: %s' % source) generate_log(log_name_source, 'Destination: %s' % destination) print('Checking total size of input folder') total_input_size = ififuncs.get_folder_size(os.path.abspath(args.source)) print('Checking if enough space in destination folder') free_space = ififuncs.get_free_space(args.destination) if total_input_size > free_space: print('You do not have enough free space!') if args.y: go_forth_blindly = 'Y' else: go_forth_blindly = ififuncs.ask_yes_no( 'Would you like to continue anyway? Press Y or N') if go_forth_blindly == 'Y': generate_log( log_name_source, 'You do not have enough free space!, but the user has decided to continue anyhow' ) else: generate_log(log_name_source, 'You do not have enough free space! - Exiting') sys.exit() return args, rootpos, manifest_sidecar, log_name_source, destination_final_path, manifest_root, manifest_destination, manifest, destination, dirname, desktop_manifest_dir
def main(args_): ''' Launch all the functions for creating an IFI SIP. ''' args = parse_args(args_) source = args.input sip_path = ififuncs.check_for_sip([source]) if sip_path is not None: oe_path = os.path.dirname(sip_path) uuid = os.path.basename(sip_path) sip_manifest = os.path.join(oe_path, uuid) + '_manifest.md5' start = datetime.datetime.now() print args if args.user: user = args.user else: user = ififuncs.get_user() new_log_textfile = os.path.join(sip_path, 'logs' + '/' + uuid + '_sip_log.log') ififuncs.generate_log(new_log_textfile, 'EVENT = rearrange.py started') ififuncs.generate_log( new_log_textfile, 'eventDetail=rearrange.py %s' % ififuncs.get_script_version('package_update.py')) ififuncs.generate_log(new_log_textfile, 'Command line arguments: %s' % args) ififuncs.generate_log(new_log_textfile, 'EVENT = agentName=%s' % user) if not os.path.isdir(args.new_folder): os.makedirs(args.new_folder) for filenames in args.i: if args.copy: for filename in filenames: copyit.main([filename, args.new_folder]) ififuncs.generate_log( new_log_textfile, 'EVENT = eventType=file movement,' ' eventOutcomeDetailNote=%s has been moved into %s' ' agentName=copyit.py' % (filename, args.new_folder)) # this is hardcoded - pick this apart so that any folder can be added to. sipcreator.consolidate_manifests(sip_path, 'metadata/supplemental', new_log_textfile) log_manifest = os.path.join( os.path.dirname(new_log_textfile), os.path.basename(filename) + '_manifest.md5') ififuncs.manifest_update(sip_manifest, log_manifest) ififuncs.sort_manifest(sip_manifest) else: # add test to see if it actually deleted - what if read only? shutil.move(filename, args.new_folder) ififuncs.generate_log( new_log_textfile, 'EVENT = eventType=file movement,' ' eventOutcomeDetailNote=%s has been moved into %s' ' agentName=shutil.move()' % (filename, args.new_folder)) print '%s has been moved into %s' % (filename, args.new_folder) relative_filename = filename.replace(args.input + '/', '') relative_new_folder = args.new_folder.replace(args.input + '/', '') update_manifest( sip_manifest, relative_filename, os.path.join(relative_new_folder, os.path.basename(relative_filename)), new_log_textfile) ififuncs.generate_log(new_log_textfile, 'EVENT = rearrange.py finished') ififuncs.checksum_replace(sip_manifest, new_log_textfile, 'md5') finish = datetime.datetime.now() print '\n', user, 'ran this script at %s and it finished at %s' % (start, finish)
def run_loop(args): ''' This will only process one sequence. Batch processing will come later. ''' current_dir = os.path.dirname(os.path.abspath(sys.argv[0])) if args.user: user = args.user else: user = ififuncs.get_user() object_entry = ififuncs.get_object_entry() log_name_source = os.path.join( args.o, '%s_seq2ffv1_log.log' % time.strftime("_%Y_%m_%dT%H_%M_%S")) ififuncs.generate_log(log_name_source, 'seq2ffv1.py started.') ififuncs.generate_log( log_name_source, 'eventDetail=seq2ffv1.py %s' % ififuncs.get_script_version('seq2ffv1.py')) ififuncs.generate_log(log_name_source, 'Command line arguments: %s' % args) ififuncs.generate_log(log_name_source, 'EVENT = agentName=%s' % user) uuid = ififuncs.create_uuid() verdicts = [] multi_reeler = False source_directory = args.i images = ififuncs.get_image_sequence_files(source_directory) if images == 'none': print( 'no images found in directory - checking for multi-reel sequence') images = ififuncs.check_multi_reel(source_directory) multi_reeler = True if images == 'none': sys.exit() # this is checking for a single reeler. else: images = [source_directory] reel_number = 1 objects = [] short_test_reports = [] rawcooked_logfiles = [] for reel in images: short_test_reports.append(short_test(reel)) for i in short_test_reports: print((' - 24 frame reversibility test for %s is %s' % (os.path.basename(reel), i))) if i == 'lossy': print( 'It appears that this sequence is not reversible - exiting' ) sys.exit() time.sleep(2) # check for a/b rolls if reel[-1] in ['a', 'b']: reel_number = reel[-2] ffv1_path, source_abspath, args, log_name_source, normalisation_tool, rawcooked_logfile = make_ffv1( reel, args, log_name_source, reel_number, uuid, multi_reeler) objects.append(ffv1_path) rawcooked_logfiles.append(rawcooked_logfile) # check for a/b rolls if not reel[-1] in ['a', 'b']: reel_number += 1 judgement = package(objects, object_entry, uuid, source_abspath, args, log_name_source, normalisation_tool, user, rawcooked_logfiles, multi_reeler, current_dir) judgement, sipcreator_log, sipcreator_manifest = judgement verdicts.append([source_directory, judgement]) for verdict in verdicts: print(("%-*s : %s" % (50, args.i, verdict[1]))) ififuncs.generate_log(log_name_source, 'seq2ffv1.py finished.') ififuncs.merge_logs(log_name_source, sipcreator_log, sipcreator_manifest)
def make_ffv1(reel, args, log_name_source, reel_number, uuid, multi_reeler): ''' This launches the image sequence to FFV1/Matroska process as well as framemd5 losslessness verification. ''' output_dirname = args.o if multi_reeler: if reel[-1] in ['a', 'b']: mkv_basename = uuid + '_reel%s%s.mkv' % (str(reel_number), reel[-1]) else: mkv_basename = uuid + '_reel%s.mkv' % str(reel_number) else: mkv_basename = uuid + '.mkv' ffv1_path = os.path.join(output_dirname, mkv_basename) rawcooked_logfile = os.path.join(args.o, '%s_rawcooked.log' % mkv_basename) normalisation_tool = ififuncs.get_rawcooked_version() rawcooked_logfile = "\'" + rawcooked_logfile + "\'" env_dict = ififuncs.set_environment(rawcooked_logfile) rawcooked_cmd = [ 'rawcooked', reel, '--check', 'full', '-c:a', 'copy', '-o', ffv1_path ] if args.framerate: rawcooked_cmd.extend(['-framerate', args.framerate]) ffv12dpx = (rawcooked_cmd) print(ffv12dpx) if args.zip: uuid = ififuncs.create_uuid() # ugly hack until i recfactor. this is the zip_path, not ffv1_path ffv1_path = os.path.join(output_dirname, uuid + '.zip') ififuncs.generate_log( log_name_source, 'EVENT = packing, status=started, eventType=packing, agentName=makezip.py, eventDetail=Source object to be packed=%s' % reel) makezip_judgement = makezip.main([ '-i', reel, '-o', output_dirname, '-basename', os.path.basename(ffv1_path) ])[0] ififuncs.generate_log( log_name_source, 'EVENT = packing, status=finished, eventType=packing, agentName=makezip.py, Source object packed into=%s' % ffv1_path) if makezip_judgement is None: judgement = 'lossless' else: judgement = makezip_judgement ififuncs.generate_log( log_name_source, 'EVENT = losslessness verification, status=finished, eventType=messageDigestCalculation, agentName=makezip.py, eventDetail=embedded crc32 checksum validation, eventOutcome=%s' % judgement) if not args.zip: ififuncs.generate_log( log_name_source, 'EVENT = normalisation, status=started, eventType=Creation, agentName=%s, eventDetail=Image sequence normalised to FFV1 in a Matroska container' % normalisation_tool) subprocess.call(ffv12dpx, env=env_dict) ififuncs.generate_log( log_name_source, 'EVENT = normalisation, status=finshed, eventType=Creation, agentName=%s, eventDetail=Image sequence normalised to FFV1 in a Matroska container' % normalisation_tool) return ffv1_path, reel, args, log_name_source, normalisation_tool, rawcooked_logfile
elif user == '3': user = '******' print 'Hi Raelene, Brian Cash must be on paternity leave' time.sleep(1) create_csv(csv_report_filename, ('Sequence Name', 'Lossless?', 'Start time', 'Finish Time')) for source_directory in all_files: for root,dirnames,filenames in os.walk(source_directory): source_directory = root if not file_check(source_directory) == 'TIFF': append_csv(csv_report_filename, (source_directory,'EMPTY DIRECTORY - SKIPPED', 'n/a', 'n/a')) continue root_dir = os.path.dirname(os.path.dirname(root)) general_log = root_dir + '/logs/image/%s_image_log.log' % os.path.basename(root_dir) generate_log(general_log, 'Input = %s' % root) remove_bad_files(source_directory) source_parent_dir = os.path.dirname(source_directory) normpath = os.path.normpath(source_directory) relative_path = normpath.split(os.sep)[-1] split_path = os.path.split(os.path.basename(source_directory))[1] start = datetime.datetime.now() source_manifest = root_dir + '/%s_manifest.md5' % relative_path generate_log(general_log, 'Generating source manifest via md5deep and storing as %s' % source_manifest) make_manifest(root_dir, root_dir, source_manifest) info = make_framemd5(source_directory, 'tiff', 'tiff_framemd5') output_dirname = info[0] source_textfile = info[1] fmd5copy = root_dir + '/metadata/image' shutil.copy(source_textfile,fmd5copy ) image_seq_without_container = info[2]
def main(args_): ''' Retrospectively updates older FFV1/DV packages in order to meet our current packaging requirements. This should allow accession.py and makepbcore.py to run as expected. This script should work on files created by: makeffv1.py dvsip.py loopline.py ''' args = parse_args(args_) user = ififuncs.get_user() new_object_entry = get_numbers(args) filmographic_csv = args.filmographic technical_csv = args.technical filmographic_oe_list = [] filmo_csv_extraction = ififuncs.extract_metadata(filmographic_csv) tech_csv_extraction = ififuncs.extract_metadata(technical_csv) register = make_register() for line_item in filmo_csv_extraction[0]: dictionary = {} oe_number = line_item['Object Entry'].lower() dictionary['title'] = line_item['Title'] dictionary['uppercase_dashed_oe'] = oe_number.upper() for tech_record in tech_csv_extraction[0]: if tech_record['Reference Number'] == dictionary[ 'uppercase_dashed_oe']: dictionary['source_accession_number'] = tech_record[ 'Accession Number'] dictionary['filmographic_reference_number'] = tech_record[ 'new_ref'] # this transforms OE-#### to oe#### dictionary['old_oe'] = oe_number[:2] + oe_number[3:] filmographic_oe_list.append(dictionary) for oe_package in filmographic_oe_list: for root, _, filenames in os.walk(args.input): if os.path.basename(root) == oe_package['old_oe']: old_oe_path = root old_oe = os.path.basename(root) log_dir = os.path.join(root, 'logs') for files in os.listdir(log_dir): if '.mov_log.log' in files: log = os.path.join(log_dir, files) manifest = os.path.join(os.path.dirname(root), old_oe + '_manifest.md5') uuid = ififuncs.create_uuid() uuid_event = ( 'EVENT = eventType=Identifier assignement,' ' eventIdentifierType=UUID, value=%s, module=uuid.uuid4' ) % uuid ififuncs.generate_log(log, 'EVENT = loopline_repackage.py started') ififuncs.generate_log( log, 'eventDetail=loopline_repackage.py %s' % ififuncs.get_script_version('loopline_repackage.py')) ififuncs.generate_log(log, 'Command line arguments: %s' % args) ififuncs.generate_log(log, 'EVENT = agentName=%s' % user) ififuncs.generate_log(log, uuid_event) ififuncs.generate_log( log, 'EVENT = eventType=Identifier assignement,' ' eventIdentifierType=object entry, value=%s' % new_object_entry) oe_package['new_object_entry'] = new_object_entry ififuncs.generate_log( log, 'Relationship, derivation, has source=%s' % oe_package['source_accession_number']) old_uuid_path = os.path.join(os.path.dirname(root), uuid) new_oe_path, new_uuid_path = move_files( root, new_object_entry, old_oe_path, old_uuid_path, uuid) updated_lines = update_manifest(manifest, old_oe, uuid) new_manifest = os.path.join(new_oe_path, uuid) + '_manifest.md5' shutil.move(manifest, new_manifest) with open(new_manifest, 'w') as fo: for lines in updated_lines: fo.write(lines) new_logs_path = os.path.join(new_uuid_path, 'logs') for files in os.listdir(new_logs_path): if '.mov_log.log' in files: log = os.path.join(new_logs_path, files) logname = rename_files(new_uuid_path, old_oe, uuid, new_manifest, log) provenance_string = 'Reproduction of %s' % oe_package[ 'source_accession_number'] ififuncs.append_csv( register, (oe_package['new_object_entry'].upper()[:2] + '-' + oe_package['new_object_entry'][2:], 'date_received', '1', '', oe_package['title'], 'contact_name', 'Reproduction', provenance_string, '', '')) ififuncs.generate_log( logname, 'EVENT = loopline_repackage.py finished') ififuncs.checksum_replace(new_manifest, logname, 'md5') oe_digits = int(os.path.basename(new_oe_path)[2:]) + 1 new_object_entry = 'oe' + str(oe_digits)
def main(args_): ''' Launches the functions that will safely copy and paste your files. ''' dircheck = None args, rootpos, manifest_sidecar, log_name_source, destination_final_path, manifest_root, manifest_destination, manifest, destination, dirname, desktop_manifest_dir = setup(args_) if os.path.isdir(args.source): dircheck = check_for_sip(args.source) if dircheck != None: if os.path.isdir(dircheck): source = check_for_sip(args.source) else: source = args.source destination = args.destination overwrite_destination_manifest, overwrite_destination_dir = overwrite_check( destination, log_name_source, destination_final_path, manifest_destination ) remove_bad_files( source, log_name_source ) source_count, file_list = count_stuff( source ) manifest_existence( manifest_root, manifest_sidecar, manifest, source_count, file_list, log_name_source ) manifest_sidecar, manifest, rootpos = control_flow( manifest_sidecar, log_name_source, manifest, rootpos, args, source ) if overwrite_destination_dir not in ('N', 'n'): if overwrite_destination_dir != None: generate_log( log_name_source, 'EVENT = File Transfer Overwrite - Destination directory already exists - Overwriting.' ) if not args.move: copy_dir( source, destination_final_path, log_name_source, rootpos, destination, dirname, args ) else: shutil.move(source, destination_final_path) else: generate_log( log_name_source, 'EVENT = File Transfer Overwrite - Destination directory already exists - Not Overwriting.' ) files_in_manifest = make_destination_manifest( overwrite_destination_manifest, log_name_source, rootpos, destination_final_path, manifest_destination, destination ) destination_count = 0 # dear god do this better, this is dreadful code! for _, _, filenames in os.walk(destination_final_path): for _ in filenames: destination_count += 1 #works in windows at least if rootpos == 'y': manifest_temp = tempfile.mkstemp( dir=desktop_manifest_dir, suffix='.md5' ) os.close(manifest_temp[0]) # Needed for windows. with open(manifest, 'r') as fo: dest_manifest_list = fo.readlines() with open(manifest_temp[1], 'wb') as temp_object: for i in dest_manifest_list: temp_object.write(i[:33] + ' ' + dirname + '/' + i[34:]) manifest = manifest_temp[1] verify_copy( manifest, manifest_destination, log_name_source, overwrite_destination_manifest, files_in_manifest, destination_count, source_count ) manifest_rename = manifest[:-4] + time.strftime("_%Y_%m_%dT%H_%M_%S") + '.md5' if os.path.dirname(manifest) == desktop_manifest_dir: os.rename(manifest, manifest_rename) shutil.move(manifest_rename, os.path.join(desktop_manifest_dir, 'old_manifests')) return log_name_source
def main(args_): ''' Retrospectively updates older FFV1/DV packages in order to meet our current packaging requirements. This should allow accession.py and makepbcore.py to run as expected. This script should work on files created by: makeffv1.py dvsip.py loopline.py ''' args = parse_args(args_) user = ififuncs.get_user() new_object_entry = get_numbers(args) for root, _, filenames in os.walk(args.input): if os.path.basename(root)[:2] == 'oe': if len(os.path.basename(root)[2:]) == 4: log_dir = os.path.join(root, 'logs') for files in os.listdir(log_dir): if '.mov_log.log' in files: log = os.path.join(log_dir, files) old_oe_path = root old_oe = os.path.basename(root) manifest = os.path.join(os.path.dirname(root), old_oe + '_manifest.md5') uuid = ififuncs.create_uuid() uuid_event = ( 'EVENT = eventType=Identifier assignement,' ' eventIdentifierType=UUID, value=%s, module=uuid.uuid4' ) % uuid ififuncs.generate_log(log, 'EVENT = loopline_repackage.py started') ififuncs.generate_log( log, 'eventDetail=loopline_repackage.py %s' % ififuncs.get_script_version('loopline_repackage.py')) ififuncs.generate_log(log, 'Command line arguments: %s' % args) ififuncs.generate_log(log, 'EVENT = agentName=%s' % user) ififuncs.generate_log(log, uuid_event) ififuncs.generate_log( log, 'EVENT = eventType=Identifier assignement,' ' eventIdentifierType=object entry, value=%s' % new_object_entry) old_uuid_path = os.path.join(os.path.dirname(root), uuid) new_oe_path, new_uuid_path = move_files( root, new_object_entry, old_oe_path, old_uuid_path, uuid) updated_lines = update_manifest(manifest, old_oe, uuid) new_manifest = os.path.join(new_oe_path, uuid) + '_manifest.md5' shutil.move(manifest, new_manifest) with open(new_manifest, 'w') as fo: for lines in updated_lines: fo.write(lines) new_logs_path = os.path.join(new_uuid_path, 'logs') for files in os.listdir(new_logs_path): if '.mov_log.log' in files: log = os.path.join(new_logs_path, files) logname = rename_files(new_uuid_path, old_oe, uuid, new_manifest, log) ififuncs.generate_log( logname, 'EVENT = loopline_repackage.py finished') ififuncs.checksum_replace(new_manifest, logname, 'md5') oe_digits = int(os.path.basename(new_oe_path)[2:]) + 1 new_object_entry = 'oe' + str(oe_digits)
def make_ffv1(video_files, csv_report_filename): for filename in video_files: #loop all files in directory filenoext = os.path.splitext(filename)[0] # Generate new directory names metadata_dir = "%s/metadata" % filenoext log_dir = "%s/logs" % filenoext data_dir = "%s/objects" % filenoext # Actually create the directories. os.makedirs(metadata_dir) os.makedirs(data_dir) os.makedirs(log_dir) #Generate filenames for new files. inputxml = "%s/%s_source_mediainfo.xml" % ( metadata_dir, os.path.basename(filename) ) inputtracexml = "%s/%s_source_mediatrace.xml" % ( metadata_dir, os.path.basename(filename) ) output = "%s/%s.mkv" % ( data_dir, os.path.splitext(os.path.basename(filename))[0] ) # Generate filename of ffv1.mkv without the path. outputfilename = os.path.basename(output) outputxml = "%s/%s_mediainfo.xml" % (metadata_dir, outputfilename) outputtracexml = "%s/%s_mediatrace.xml" % (metadata_dir, outputfilename) fmd5 = "%s/%s_source.framemd5" % ( metadata_dir, os.path.basename(filename) ) fmd5ffv1 = "%s/%s_ffv1.framemd5" % (metadata_dir, outputfilename) log = "%s/%s_log.log" % (log_dir, filename) generate_log(log, 'Input = %s' % filename) generate_log(log, 'Output = %s' % output) generate_log( log, 'makeffv1.py transcode to FFV1 and framemd5 generation of source started.' ) ffv1_logfile = log_dir + '/%s_ffv1_transcode.log' % filename ffv1_env_dict = set_environment(ffv1_logfile) par = subprocess.check_output( [ 'mediainfo', '--Language=raw', '--Full', "--Inform=Video;%PixelAspectRatio%", filename ] ).rstrip() field_order = subprocess.check_output( [ 'mediainfo', '--Language=raw', '--Full', "--Inform=Video;%ScanType%", filename ] ).rstrip() height = subprocess.check_output( [ 'mediainfo', '--Language=raw', '--Full', "--Inform=Video;%Height%", filename ] ).rstrip() # Transcode video file writing frame md5 and output appropriately ffv1_command = [ 'ffmpeg', '-i', filename, '-c:v', 'ffv1', # Use FFv1 codec '-g', '1', # Use intra-frame only aka ALL-I aka GOP=1 '-level', '3', # Use Version 3 of FFv1 '-c:a', 'copy', # Copy and paste audio bitsream with no transcoding '-map', '0', '-dn', '-report', '-slicecrc', '1', '-slices', '16', ] # check for FCP7 lack of description and PAL if par == '1.000': if field_order == '': if height == '576': ffv1_command += [ '-vf', 'setfield=tff, setdar=4/3' ] ffv1_command += [ output, '-f', 'framemd5', '-an', # Create decoded md5 checksums for every frame of the input. -an ignores audio fmd5 ] print ffv1_command subprocess.call(ffv1_command, env=ffv1_env_dict) generate_log( log, 'makeffv1.py transcode to FFV1 and framemd5 generation completed.' ) generate_log( log, 'makeffv1.py Framemd5 generation of output file started.' ) fmd5_logfile = log_dir + '/%s_framemd5.log' % outputfilename fmd5_env_dict = set_environment(fmd5_logfile) fmd5_command = [ 'ffmpeg', # Create decoded md5 checksums for every frame '-i', output, '-report', '-f', 'framemd5', '-an', fmd5ffv1 ] print fmd5_command subprocess.call(fmd5_command, env=fmd5_env_dict) generate_log( log, 'makeffv1.py Framemd5 generation of output file completed' ) source_video_size = get_mediainfo( 'source_video_size', "--inform=General;%FileSize%", filename ) ffv1_video_size = get_mediainfo( 'ffv1_video_size', '--inform=General;%FileSize%', output ) compression_ratio = float(source_video_size) / float(ffv1_video_size) if os.path.basename(sys.argv[0]) == 'makeffv1.py': shutil.copy(sys.argv[0], log_dir) print 'Generating mediainfo xml of input file and saving it in %s' % inputxml make_mediainfo(inputxml, 'mediaxmlinput', filename) print 'Generating mediainfo xml of output file and saving it in %s' % outputxml make_mediainfo(outputxml, 'mediaxmloutput', output) print 'Generating mediatrace xml of input file and saving it in %s' % inputtracexml make_mediatrace(inputtracexml, 'mediatracexmlinput', filename) print 'Generating mediatrace xml of output file and saving it in %s' % outputtracexml make_mediatrace(outputtracexml, 'mediatracexmloutput', output) source_parent_dir = os.path.dirname(os.path.abspath(filename)) manifest = '%s/%s_manifest.md5' % (source_parent_dir, filenoext) generate_log(log, 'makeffv1.py MD5 manifest started') checksum_mismatches = [] with open(fmd5) as f1: with open(fmd5ffv1) as f2: for (lineno1, line1), (lineno2, line2) in itertools.izip( read_non_comment_lines(f1), read_non_comment_lines(f2) ): if line1 != line2: if 'sar' in line1: checksum_mismatches = ['sar'] else: checksum_mismatches.append(1) if len(checksum_mismatches) == 0: print 'LOSSLESS' append_csv( csv_report_filename, ( output, 'LOSSLESS', source_video_size, ffv1_video_size, compression_ratio ) ) generate_log(log, 'makeffv1.py Transcode was lossless') elif len(checksum_mismatches) == 1: if checksum_mismatches[0] == 'sar': print 'Image content is lossless,' ' Pixel Aspect Ratio has been altered.' ' Update ffmpeg in order to resolve the PAR issue.' append_csv( csv_report_filename, ( output, 'LOSSLESS - different PAR', source_video_size, ffv1_video_size, compression_ratio ) ) generate_log( log, 'makeffv1.py Image content is lossless but Pixel Aspect Ratio has been altered.Update ffmpeg in order to resolve the PAR issue.' ) elif len(checksum_mismatches) > 1: print 'NOT LOSSLESS' append_csv( csv_report_filename, ( output, 'NOT LOSSLESS', source_video_size, ffv1_video_size, compression_ratio ) ) generate_log(log, 'makeffv1.py Not Lossless.') hashlib_manifest(filenoext, manifest, source_parent_dir) if filecmp.cmp(fmd5, fmd5ffv1, shallow=False): print "YOUR FILES ARE LOSSLESS YOU SHOULD BE SO HAPPY!!!" else: print "The framemd5 text files are not completely identical." " This may be because of a lossy transcode," " or a change in metadata, most likely pixel aspect ratio." " Please analyse the framemd5 files for source and output."
def parse_manifest(manifest, log_name_source): ''' Analyses the manifest to see if any files are missing. Returns a list of missing files and a dictionary containing checksums and paths. ''' if manifest.endswith('md5'): source_dir = os.path.join( os.path.dirname(manifest), os.path.basename(manifest).replace('_manifest.md5','') ) elif manifest.endswith('sha512.txt'): source_dir = os.path.join( os.path.dirname(manifest), os.path.basename(manifest).replace('_manifest-sha512.txt','') ) source_count, file_list = ififuncs.count_stuff(source_dir) missing_files_list = [] manifest_dict = {} paths = [] proceed = 'Y' os.chdir(os.path.dirname(manifest)) with open(manifest, 'r', encoding='utf-8') as manifest_object: try: manifest_list = manifest_object.readlines() except UnicodeDecodeError: with open(manifest, 'r', encoding='cp1252') as manifest_object: manifest_list = manifest_object.readlines() for entries in manifest_list: checksum = entries.split(' ')[0] if 'manifest-sha512.txt' in manifest: path = entries[130:].replace('\r', '').replace('\n', '') else: path = entries[34:].replace('\r', '').replace('\n', '') path = unicodedata.normalize('NFC', path).replace('\\', '/') if not os.path.isfile(path): path = unicodedata.normalize('NFD', path) if not os.path.isfile(path): ififuncs.generate_log( log_name_source, '%s is missing' % path ) print(('%s is missing' % path)) missing_files_list.append(path) elif os.path.isfile(path): manifest_dict[path] = checksum paths.append(path) manifest_file_count = len(manifest_list) if source_count != manifest_file_count: print(' - There is mismatch between your file count and the manifest file count') print(' - checking which files are different') for i in file_list: if i not in paths: print((i, 'is present in your source directory but not in the source manifest')) proceed = ififuncs.ask_yes_no('Do you want to proceed regardless?') if proceed == 'N': print('Exiting') sys.exit() else: if len(missing_files_list) > 0: print(('The number of missing files: %s' % len(missing_files_list))) ififuncs.generate_log( log_name_source, 'The number of missing files is: %s' % len(missing_files_list) ) elif len(missing_files_list) == 0: print('All files present') ififuncs.generate_log( log_name_source, 'All files present' ) return manifest_dict, missing_files_list
dirname = os.path.split(os.path.basename(source))[1] relative_path = normpath.split(os.sep)[-1] if args.s: manifest = source_parent_dir + '/%s_manifest.md5' % relative_path else: manifest_ = '/%s_manifest.md5' % relative_path desktop_manifest_dir = make_desktop_manifest_dir() manifest = "%s/%s" % (desktop_manifest_dir, manifest_) log_name_source_ = os.path.basename(args.source) + time.strftime("_%Y_%m_%dT%H_%M_%S") desktop_logs_dir = make_desktop_logs_dir() log_name_source = "%s/%s.log" % (desktop_logs_dir, log_name_source_) generate_log(log_name_source, 'move.py started.') generate_log(log_name_source, 'Source: %s' % source) if os.path.isfile(source): print '\nFile checksum is not currently supported, only directories.\n' generate_log(log_name_source, 'Error: Attempted to generate manifest for file. Only Directories/Folders are currently supported') generate_log(log_name_source, 'move.py exit') sys.exit() elif not os.path.isdir(source): print ' %s is either not a directory or it does not exist' % source generate_log(log_name_source, ' %s is either not a directory or it does not exist' % source) generate_log(log_name_source, 'move.py exit') sys.exit() remove_bad_files(source) source_count = 0
space_counter += 1 if space_counter > 0: sys.exit() for root, dirnames, filenames in os.walk(source_directory): source_directory = root if not file_check(source_directory) == 'TIFF': append_csv( csv_report_filename, (source_directory, 'EMPTY DIRECTORY - SKIPPED', 'n/a', 'n/a')) continue root_dir = os.path.dirname(os.path.dirname(root)) general_log = root_dir + '/logs/image/%s__second_pass_image_log.log' % os.path.basename( root_dir) generate_log(general_log, 'Input = %s' % root) source_parent_dir = os.path.dirname(source_directory) normpath = os.path.normpath(source_directory) relative_path = normpath.split(os.sep)[-1] split_path = os.path.split(os.path.basename(source_directory))[1] start = datetime.datetime.now() source_manifest = root_dir + '/%s_manifest.md5' % relative_path info = make_framemd5(source_directory, 'tiff', 'tiff_framemd5') output_dirname = info[0] source_textfile = info[1] fmd5copy = root_dir + '/metadata/image' shutil.copy(source_textfile, fmd5copy) image_seq_without_container = info[2] output_parent_directory = info[3] tiff_filename = image_seq_without_container + "%06d.tiff" dpx_filename = image_seq_without_container + "%06d.dpx"
def get_metadata(path, new_log_textfile): ''' Recursively create mediainfos and mediatraces for AV files. This should probably go in ififuncs as it could be used by other scripts. ''' mediainfo_version = 'mediainfo' try: mediainfo_version = subprocess.check_output([ 'mediainfo', '--Version' ]).rstrip() except subprocess.CalledProcessError as grepexc: mediainfo_version = grepexc.output.rstrip().splitlines()[1] for root, _, filenames in os.walk(path): for av_file in filenames: if av_file.endswith( ('.mov', 'MP4', '.mp4', '.mkv', '.MXF', '.mxf', '.dv', '.DV') ): if av_file[0] != '.': inputxml = "%s/%s_mediainfo.xml" % ( os.path.join(path, 'metadata'), os.path.basename(av_file) ) inputtracexml = "%s/%s_mediatrace.xml" % ( os.path.join(path, 'metadata'), os.path.basename(av_file) ) print 'Generating mediainfo xml of input file and saving it in %s' % inputxml ififuncs.make_mediainfo( inputxml, 'mediaxmlinput', os.path.join(root, av_file) ) ififuncs.generate_log( new_log_textfile, 'EVENT = Metadata extraction - eventDetail=Technical metadata extraction via mediainfo, eventOutcome=%s, agentName=%s' % (inputxml, mediainfo_version) ) print 'Generating mediatrace xml of input file and saving it in %s' % inputtracexml ififuncs.make_mediatrace( inputtracexml, 'mediatracexmlinput', os.path.join(root, av_file) ) ififuncs.generate_log( new_log_textfile, 'EVENT = Metadata extraction - eventDetail=Mediatrace technical metadata extraction via mediainfo, eventOutcome=%s, agentName=%s' % (inputtracexml, mediainfo_version) ) elif av_file.endswith( ('.tif', 'tiff', '.doc', '.txt', '.docx', '.pdf', '.jpg', '.jpeg', '.png', '.rtf', '.xml', '.odt') ): blacklist = ('siegfried', 'exiftool', 'mediainfo', 'mediatrace') if av_file[0] != '.': if any(word in blacklist for word in av_file): exiftool_version = 'exiftool' try: exiftool_version = subprocess.check_output([ 'exiftool', '-ver' ]) except subprocess.CalledProcessError as grepexc: exiftool_version = grepexc.output.rstrip().splitlines()[1] siegfried_version = 'siegfried' try: siegfried_version = subprocess.check_output([ 'sf', '-version' ]) except subprocess.CalledProcessError as grepexc: siegfried_version = grepexc.output.rstrip().splitlines()[1] inputxml = "%s/%s_exiftool.xml" % ( os.path.join(path, 'metadata'), os.path.basename(av_file) ) inputtracexml = "%s/%s_siegfried.json" % ( os.path.join(path, 'metadata'), os.path.basename(av_file) ) ififuncs.make_siegfried( inputtracexml, os.path.join(root, av_file) ) print 'Generating exiftool xml of input file and saving it in %s' % inputxml ififuncs.generate_log( new_log_textfile, 'EVENT = Metadata extraction - eventDetail=Technical metadata extraction via exiftool, eventOutcome=%s, agentName=%s' % (inputxml, exiftool_version) ) print 'Generating mediatrace xml of input file and saving it in %s' % inputtracexml ififuncs.make_exiftool( inputxml, os.path.join(root, av_file) ) ififuncs.generate_log( new_log_textfile, 'EVENT = Format identification - eventType=format identification, eventDetail=Format identification via PRONOM signatures using Siegfried, eventOutcome=%s, agentName=%s' % (inputtracexml, siegfried_version) )
def make_ffv1(video_files, csv_report_filename): for filename in video_files: #loop all files in directory filenoext = os.path.splitext(filename)[0] # Generate new directory names metadata_dir = "%s/metadata" % filenoext log_dir = "%s/logs" % filenoext data_dir = "%s/objects" % filenoext # Actually create the directories. os.makedirs(metadata_dir) os.makedirs(data_dir) os.makedirs(log_dir) #Generate filenames for new files. inputxml = "%s/%s_source_mediainfo.xml" % (metadata_dir, os.path.basename(filename)) inputtracexml = "%s/%s_source_mediatrace.xml" % ( metadata_dir, os.path.basename(filename)) output = "%s/%s.mkv" % ( data_dir, os.path.splitext(os.path.basename(filename))[0]) # Generate filename of ffv1.mkv without the path. outputfilename = os.path.basename(output) outputxml = "%s/%s_mediainfo.xml" % (metadata_dir, outputfilename) outputtracexml = "%s/%s_mediatrace.xml" % (metadata_dir, outputfilename) fmd5 = "%s/%s_source.framemd5" % (metadata_dir, os.path.basename(filename)) fmd5ffv1 = "%s/%s_ffv1.framemd5" % (metadata_dir, outputfilename) log = "%s/%s_log.log" % (log_dir, filename) generate_log(log, 'Input = %s' % filename) generate_log(log, 'Output = %s' % output) generate_log( log, 'makeffv1.py transcode to FFV1 and framemd5 generation of source started.' ) ffv1_logfile = log_dir + '/%s_ffv1_transcode.log' % filename ffv1_env_dict = set_environment(ffv1_logfile) par = subprocess.check_output([ 'mediainfo', '--Language=raw', '--Full', "--Inform=Video;%PixelAspectRatio%", filename ]).rstrip() field_order = subprocess.check_output([ 'mediainfo', '--Language=raw', '--Full', "--Inform=Video;%ScanType%", filename ]).rstrip() height = subprocess.check_output([ 'mediainfo', '--Language=raw', '--Full', "--Inform=Video;%Height%", filename ]).rstrip() # Transcode video file writing frame md5 and output appropriately ffv1_command = [ 'ffmpeg', '-i', filename, '-c:v', 'ffv1', # Use FFv1 codec '-g', '1', # Use intra-frame only aka ALL-I aka GOP=1 '-level', '3', # Use Version 3 of FFv1 '-c:a', 'copy', # Copy and paste audio bitsream with no transcoding '-map', '0', '-dn', '-report', '-slicecrc', '1', '-slices', '16', ] # check for FCP7 lack of description and PAL if par == '1.000': if field_order == '': if height == '576': ffv1_command += ['-vf', 'setfield=tff,setdar=4/3'] ffv1_command += [ output, '-f', 'framemd5', '-an' # Create decoded md5 checksums for every frame of the input. -an ignores audio , fmd5 ] print ffv1_command subprocess.call(ffv1_command, env=ffv1_env_dict) generate_log( log, 'makeffv1.py transcode to FFV1 and framemd5 generation completed.') generate_log( log, 'makeffv1.py Framemd5 generation of output file started.') fmd5_logfile = log_dir + '/%s_framemd5.log' % outputfilename fmd5_env_dict = set_environment(fmd5_logfile) fmd5_command = [ 'ffmpeg', # Create decoded md5 checksums for every frame of the ffv1 output '-i', output, '-report', '-f', 'framemd5', '-an', fmd5ffv1 ] print fmd5_command subprocess.call(fmd5_command, env=fmd5_env_dict) generate_log( log, 'makeffv1.py Framemd5 generation of output file completed') source_video_size = get_mediainfo('source_video_size', "--inform=General;%FileSize%", filename) ffv1_video_size = get_mediainfo('ffv1_video_size', '--inform=General;%FileSize%', output) compression_ratio = float(source_video_size) / float(ffv1_video_size) if os.path.basename(sys.argv[0]) == 'makeffv1.py': shutil.copy(sys.argv[0], log_dir) checksum_mismatches = [] with open(fmd5) as f1: with open(fmd5ffv1) as f2: for (lineno1, line1), (lineno2, line2) in itertools.izip( read_non_comment_lines(f1), read_non_comment_lines(f2)): if line1 != line2: if 'sar' in line1: checksum_mismatches = ['sar'] else: checksum_mismatches.append(1) if len(checksum_mismatches) == 0: print 'LOSSLESS' append_csv(csv_report_filename, (output, 'LOSSLESS', source_video_size, ffv1_video_size, compression_ratio)) generate_log(log, 'makeffv1.py Transcode was lossless') elif len(checksum_mismatches) == 1: if checksum_mismatches[0] == 'sar': print 'Image content is lossless, Pixel Aspect Ratio has been altered. Update ffmpeg in order to resolve the PAR issue.' append_csv( csv_report_filename, (output, 'LOSSLESS - different PAR', source_video_size, ffv1_video_size, compression_ratio)) generate_log( log, 'makeffv1.py Image content is lossless, but Pixel Aspect Ratio has been altered. Update ffmpeg in order to resolve the PAR issue.' ) elif len(checksum_mismatches) > 1: print 'NOT LOSSLESS' append_csv(csv_report_filename, (output, 'NOT LOSSLESS', source_video_size, ffv1_video_size, compression_ratio)) generate_log(log, 'makeffv1.py Not Lossless.') if filecmp.cmp(fmd5, fmd5ffv1, shallow=False): print "YOUR FILES ARE LOSSLESS YOU SHOULD BE SO HAPPY!!!" else: print "The framemd5 text files are not completely identical. This may be because of a lossy transcode, or a change in metadata, most likely pixel aspect ratio. Please analyse the framemd5 files for source and output." print 'Generating mediainfo xml of input file and saving it in %s' % inputxml make_mediainfo(inputxml, 'mediaxmlinput', filename) print 'Generating mediainfo xml of output file and saving it in %s' % outputxml make_mediainfo(outputxml, 'mediaxmloutput', output) print 'Generating mediatrace xml of input file and saving it in %s' % inputtracexml make_mediatrace(inputtracexml, 'mediatracexmlinput', filename) print 'Generating mediatrace xml of output file and saving it in %s' % outputtracexml make_mediatrace(outputtracexml, 'mediatracexmloutput', output) source_parent_dir = os.path.dirname(os.path.abspath(filename)) manifest = '%s/%s_manifest.md5' % (source_parent_dir, filenoext) generate_log(log, 'makeffv1.py MD5 manifest started') hashlib_manifest(filenoext, manifest, source_parent_dir)
def main(args_): ''' Launch all the functions for creating an IFI SIP. ''' args = parse_args(args_) start = datetime.datetime.now() inputs = args.i print args if args.user: user = args.user else: user = ififuncs.get_user() if args.oe: if args.oe[:2] != 'oe': print 'First two characters must be \'oe\' and last four characters must be four digits' object_entry = ififuncs.get_object_entry() elif len(args.oe[2:]) != 4: print 'First two characters must be \'oe\' and last four characters must be four digits' object_entry = ififuncs.get_object_entry() elif not args.oe[2:].isdigit(): object_entry = ififuncs.get_object_entry() print 'First two characters must be \'oe\' and last four characters must be four digits' else: object_entry = args.oe else: object_entry = ififuncs.get_object_entry() sip_path = make_folder_path(os.path.join(args.o), args, object_entry) if args.u: if ififuncs.validate_uuid4(args.u) is None: uuid = args.u uuid_event = ( 'EVENT = eventType=Identifier assignement,' ' eventIdentifierType=UUID, value=%s, module=uuid.uuid4' ) % uuid else: print 'exiting due to invalid UUID' uuid_event = ( 'EVENT = exiting due to invalid UUID supplied on the commmand line: %s' % uuid) uuid = False else: uuid = os.path.basename(sip_path) uuid_event = ( 'EVENT = eventType=Identifier assignement,' ' eventIdentifierType=UUID, value=%s, module=uuid.uuid4') % uuid new_log_textfile = os.path.join(sip_path, 'logs' + '/' + uuid + '_sip_log.log') ififuncs.generate_log(new_log_textfile, 'EVENT = sipcreator.py started') ififuncs.generate_log( new_log_textfile, 'eventDetail=sipcreator.py %s' % ififuncs.get_script_version('sipcreator.py')) ififuncs.generate_log(new_log_textfile, 'Command line arguments: %s' % args) ififuncs.generate_log(new_log_textfile, 'EVENT = agentName=%s' % user) ififuncs.generate_log(new_log_textfile, uuid_event) if args.u is False: sys.exit() ififuncs.generate_log( new_log_textfile, 'EVENT = eventType=Identifier assignement,' ' eventIdentifierType=object entry, value=%s' % object_entry) metadata_dir = os.path.join(sip_path, 'metadata') logs_dir = os.path.join(sip_path, 'logs') log_names = move_files(inputs, sip_path, args) get_metadata(sip_path, new_log_textfile) ififuncs.hashlib_manifest(metadata_dir, metadata_dir + '/metadata_manifest.md5', metadata_dir) new_manifest_textfile = consolidate_manifests(sip_path, 'objects', new_log_textfile) consolidate_manifests(sip_path, 'metadata', new_log_textfile) ififuncs.hashlib_append(logs_dir, new_manifest_textfile, os.path.dirname(os.path.dirname(logs_dir))) ififuncs.sort_manifest(new_manifest_textfile) if not args.quiet: log_report(log_names) finish = datetime.datetime.now() print '\n', user, 'ran this script at %s and it finished at %s' % (start, finish) if args.d: content_title = create_content_title_text(args, sip_path) ififuncs.manifest_replace( new_manifest_textfile, os.path.join('objects', os.path.basename(args.i[0])).replace("\\", "/"), os.path.join('objects', content_title).replace("\\", "/")) return new_log_textfile, new_manifest_textfile
def main(args_): ififuncs.check_existence(['ffmpeg', 'mediainfo']) print('\n - Normalise.py started') args = parse_args(args_) print(args) source = args.i output_folder = args.o file_list = sorted(ififuncs.get_video_files(source)) if args.sip: if args.user: user = args.user else: user = ififuncs.get_user() if args.oe: if args.oe[:2] != 'oe': print( 'First two characters must be \'oe\' and last four characters must be four digits' ) object_entry = ififuncs.get_object_entry() elif len(args.oe[2:]) not in range(4, 6): print( 'First two characters must be \'oe\' and last four characters must be four digits' ) object_entry = ififuncs.get_object_entry() elif not args.oe[2:].isdigit(): object_entry = ififuncs.get_object_entry() print( 'First two characters must be \'oe\' and last four characters must be four digits' ) else: object_entry = args.oe else: object_entry = ififuncs.get_object_entry() if args.sip: oe_digits = int(object_entry.replace('oe', '')) for filename in file_list: log_name_source = os.path.join( args.o, '%s_normalise_log.log' % time.strftime("_%Y_%m_%dT%H_%M_%S")) ififuncs.generate_log(log_name_source, 'normalise.py started.') ififuncs.generate_log(log_name_source, 'Command line arguments: %s' % args) if args.sip: ififuncs.generate_log(log_name_source, 'EVENT = agentName=%s' % user) print('\n - Processing: %s' % filename) ififuncs.generate_log( log_name_source, 'EVENT = Normalization, status=started, eventType=Normalization, agentName=ffmpeg, eventDetail=Source object to be normalised=%s' % filename) output, output_uuid, fmd5, ffv1_logfile = normalise_process( filename, output_folder) ififuncs.generate_log( log_name_source, 'EVENT = Normalization, status=finished, eventType=Normalization, agentName=ffmpeg, eventDetail=Source object normalised into=%s' % output) inputxml, inputtracexml, dfxml = ififuncs.generate_mediainfo_xmls( filename, output_folder, output_uuid, log_name_source) fmd5_logfile, fmd5ffv1, verdict = verify_losslessness( output_folder, output, output_uuid, fmd5) ififuncs.generate_log( log_name_source, 'EVENT = losslessness verification, status=finished, eventType=messageDigestCalculation, agentName=ffmpeg, eventDetail=MD5s of AV streams of output file generated for validation, eventOutcome=%s' % verdict) if args.sip: object_entry_complete = 'oe' + str(oe_digits) supplement_cmd = ['-supplement', inputxml, inputtracexml, dfxml] sipcreator_cmd = [ '-i', output, '-move', '-u', output_uuid, '-user', user, '-oe', object_entry_complete, '-o', args.o ] if args.supplement: supplement_cmd.extend(args.supplement) sipcreator_cmd.extend(supplement_cmd) sipcreator_log, sipcreator_manifest = sipcreator.main( sipcreator_cmd) metadata_dir = os.path.join( os.path.dirname(os.path.dirname(sipcreator_log)), 'metadata') shutil.move(fmd5, metadata_dir) shutil.move(fmd5_logfile, os.path.dirname(sipcreator_log)) shutil.move(fmd5ffv1, metadata_dir) shutil.move( ffv1_logfile.replace('\\\\', '\\').replace('\:', ':'), os.path.dirname(sipcreator_log)) logs_dir = os.path.dirname(sipcreator_log) ififuncs.manifest_update( sipcreator_manifest, os.path.join(metadata_dir, os.path.basename(fmd5))) ififuncs.manifest_update( sipcreator_manifest, os.path.join(metadata_dir, os.path.basename(fmd5ffv1))) ififuncs.manifest_update( sipcreator_manifest, os.path.join( logs_dir, os.path.basename( ffv1_logfile.replace('\\\\', '\\').replace('\:', ':')))) ififuncs.manifest_update( sipcreator_manifest, os.path.join( logs_dir, os.path.basename( fmd5_logfile.replace('\\\\', '\\').replace('\:', ':')))) ififuncs.merge_logs(log_name_source, sipcreator_log, sipcreator_manifest) os.remove(dfxml) os.remove(inputxml) os.remove(inputtracexml) print( 'The judgement above only refers to the copyit job, the losslessness judgement is: %s' % verdict) oe_digits += 1