def run_loop(args): ''' Launches a recursive loop to process all images sequences in your subdirectories. ''' if args.user: user = args.user else: user = ififuncs.get_user() log_name_source = os.path.join( args.o, '%s_seq2ffv1_log.log' % time.strftime("_%Y_%m_%dT%H_%M_%S") ) ififuncs.generate_log(log_name_source, 'seq2ffv1.py started.') ififuncs.generate_log( log_name_source, 'eventDetail=seq2ffv1.py %s' % ififuncs.get_script_version('seq2ffv1.py')) ififuncs.generate_log( log_name_source, 'Command line arguments: %s' % args ) ififuncs.generate_log( log_name_source, 'EVENT = agentName=%s' % user ) verdicts = [] for source_directory, _, _ in os.walk(args.i): output_dirname = args.o images = ififuncs.get_image_sequence_files(source_directory) if images == 'none': continue (ffmpeg_friendly_name, start_number, root_filename) = ififuncs.parse_image_sequence(images) if args.short_test: short_test(images, args) source_abspath = os.path.join(source_directory, ffmpeg_friendly_name) judgement = make_ffv1( start_number, source_abspath, output_dirname, args, log_name_source, user ) if args.sip: judgement, sipcreator_log, sipcreator_manifest = judgement verdicts.append([root_filename, judgement]) for verdict in verdicts: print("%-*s : %s" % (50, verdict[0], verdict[1])) ififuncs.generate_log(log_name_source, 'seq2ffv1.py finished.') if args.sip: ififuncs.merge_logs(log_name_source, sipcreator_log, sipcreator_manifest)
def main(args_): ''' Launch all the functions for creating an IFI SIP. ''' args = parse_args(args_) source = args.input user = ififuncs.get_user() for root, _, _ in os.walk(source): if ififuncs.check_for_uuid_generic(root) is not False: print(" - Processing %s" % root) uuid_path = root uuid = ififuncs.check_for_uuid_generic(root) logs_dir = os.path.join(uuid_path, 'logs') logs_dir = os.path.join(uuid_path, 'logs') metadata_dir = os.path.join(uuid_path, 'metadata') dfxml = os.path.join(metadata_dir, uuid + '_dfxml.xml') new_log_textfile = os.path.join(logs_dir, uuid) + '_sip_log.log' sha512_manifest = os.path.join(os.path.dirname(uuid_path), uuid + '_manifest-sha512.txt') if not os.path.isfile(dfxml) and not os.path.isfile( sha512_manifest): new_manifest_textfile = os.path.join( os.path.dirname(uuid_path), uuid) + '_manifest.md5' ififuncs.generate_log(new_log_textfile, 'EVENT = shadfxml.py started') ififuncs.generate_log( new_log_textfile, 'eventDetail=shadfxml.py %s' % ififuncs.get_script_version('shadfxml.py')) ififuncs.generate_log(new_log_textfile, 'Command line arguments: %s' % args) ififuncs.generate_log(new_log_textfile, 'EVENT = agentName=%s' % user) if not os.path.isfile(dfxml): print('Generating Digital Forensics XML') dfxml = accession.make_dfxml(args, uuid_path, uuid) ififuncs.generate_log( new_log_textfile, 'EVENT = Metadata extraction - eventDetail=File system metadata extraction using Digital Forensics XML, eventOutcome=%s, agentName=makedfxml' % (dfxml)) ififuncs.manifest_update(new_manifest_textfile, dfxml) if not os.path.isfile(sha512_manifest): sha512_log = manifest.main([uuid_path, '-sha512', '-s']) ififuncs.merge_logs_append(sha512_log, new_log_textfile, new_manifest_textfile) ififuncs.checksum_replace(sha512_manifest, new_log_textfile, 'sha512') os.remove(sha512_log) ififuncs.sort_manifest(new_manifest_textfile) else: print("Exiting as this package already has DFXML and SHA512")
def main(): parser = make_parser() args = parser.parse_args() desktop_logs_dir = make_desktop_logs_dir() log_name_source_ = os.path.basename( args.input) + time.strftime("_%Y_%m_%dT%H_%M_%S") log_name_source = "%s/%s_fixity_validation.log" % (desktop_logs_dir, log_name_source_) ififuncs.generate_log(log_name_source, 'EVENT = validate.py started') ififuncs.generate_log( log_name_source, 'eventDetail=validate.py %s' % ififuncs.get_script_version('validate.py')) ififuncs.generate_log(log_name_source, 'Command line arguments: %s' % args) manifest = check_manifest(args.input, log_name_source) log_results(manifest, log_name_source, args)
def main(args_): ''' Launch all the functions for creating an IFI SIP. ''' args = parse_args(args_) source = args.input sip_path = ififuncs.check_for_sip([source]) if sip_path is not None: oe_path = os.path.dirname(sip_path) uuid = os.path.basename(sip_path) sip_manifest = os.path.join(oe_path, uuid) + '_manifest.md5' start = datetime.datetime.now() print args if args.user: user = args.user else: user = ififuncs.get_user() new_log_textfile = os.path.join(sip_path, 'logs' + '/' + uuid + '_sip_log.log') ififuncs.generate_log(new_log_textfile, 'EVENT = rearrange.py started') ififuncs.generate_log( new_log_textfile, 'eventDetail=rearrange.py %s' % ififuncs.get_script_version('rearrange.py')) ififuncs.generate_log(new_log_textfile, 'Command line arguments: %s' % args) ififuncs.generate_log(new_log_textfile, 'EVENT = agentName=%s' % user) if not os.path.isdir(args.new_folder): os.makedirs(args.new_folder) for filename in args.i: # add test to see if it actually deleted - what if read only? shutil.move(filename, args.new_folder) print '%s has been moved into %s' % (filename, args.new_folder) ififuncs.generate_log( new_log_textfile, 'EVENT = eventType=file movement,' ' eventOutcomeDetailNote=%s has been moved into %s' ' agentName=shutil.move()' % (filename, args.new_folder)) relative_filename = filename.replace(args.input + '/', '') relative_new_folder = args.new_folder.replace(args.input + '/', '') update_manifest( sip_manifest, relative_filename, os.path.join(relative_new_folder, os.path.basename(relative_filename)), new_log_textfile) ififuncs.generate_log(new_log_textfile, 'EVENT = rearrange.py finished') ififuncs.checksum_replace(sip_manifest, new_log_textfile, 'md5') finish = datetime.datetime.now() print '\n', user, 'ran this script at %s and it finished at %s' % (start, finish)
def run_loop(args): ''' Launches a recursive loop to process all images sequences in your subdirectories. ''' if args.user: user = args.user else: user = ififuncs.get_user() log_name_source = os.path.join( args.destination, '%s_seq2ffv1_log.log' % time.strftime("_%Y_%m_%dT%H_%M_%S") ) ififuncs.generate_log(log_name_source, 'seq2ffv1.py started.') ififuncs.generate_log( log_name_source, 'eventDetail=seq2ffv1.py %s' % ififuncs.get_script_version('seq2ffv1.py')) ififuncs.generate_log( log_name_source, 'Command line arguments: %s' % args ) ififuncs.generate_log( log_name_source, 'EVENT = agentName=%s' % user ) verdicts = [] for source_directory, _, _ in os.walk(args.source_directory): output_dirname = args.destination images = ififuncs.get_image_sequence_files(source_directory) if images == 'none': continue (ffmpeg_friendly_name, start_number, root_filename) = ififuncs.parse_image_sequence(images) source_abspath = os.path.join(source_directory, ffmpeg_friendly_name) judgement, sipcreator_log, sipcreator_manifest = make_ffv1( start_number, source_abspath, output_dirname, root_filename, args, log_name_source ) verdicts.append([root_filename, judgement]) for verdict in verdicts: print "%-*s : %s" % (50, verdict[0], verdict[1]) ififuncs.generate_log(log_name_source, 'seq2ffv1.py finished.') ififuncs.merge_logs(log_name_source, sipcreator_log, sipcreator_manifest)
def launch_mediaconch(log_name_source, user, mediaconch_xmlfile, manifest, full_path): ''' Run mediaconch on files. ''' ififuncs.generate_log(log_name_source, 'EVENT = ffv1mkvvalidate.py started') ififuncs.generate_log(log_name_source, 'agentName=%s' % user) ififuncs.generate_log( log_name_source, 'eventDetail=ffv1mkvvalidate.py %s' % ififuncs.get_script_version('ffv1mkvvalidate.py')) mediaconch_version = subprocess.check_output(['mediaconch', '-v']).rstrip() ififuncs.generate_log( log_name_source, 'agentName=mediaconch, agentversion=%s' % mediaconch_version) if not os.path.isfile(mediaconch_xmlfile): ififuncs.make_mediaconch(full_path, mediaconch_xmlfile) ififuncs.manifest_update(manifest, mediaconch_xmlfile)
def main(args_): ''' Launches all other functions when called from the command line. ''' args = make_parser(args_) desktop_logs_dir = make_desktop_logs_dir() log_name_source_ = os.path.basename( args.input) + time.strftime("_%Y_%m_%dT%H_%M_%S") log_name_source = "%s/%s_fixity_validation.log" % (desktop_logs_dir, log_name_source_) ififuncs.generate_log(log_name_source, 'EVENT = validate.py started') ififuncs.generate_log( log_name_source, 'eventDetail=validate.py %s' % ififuncs.get_script_version('validate.py')) ififuncs.generate_log(log_name_source, 'Command line arguments: %s' % args) manifest, error_counter = check_manifest(args, log_name_source) if args.update_log: log_results(manifest, log_name_source, args) return error_counter
def main(): parser = make_parser() args = parser.parse_args() desktop_logs_dir = make_desktop_logs_dir() log_name_source_ = os.path.basename(args.input) + time.strftime("_%Y_%m_%dT%H_%M_%S") log_name_source = "%s/%s_fixity_validation.log" % (desktop_logs_dir, log_name_source_) ififuncs.generate_log( log_name_source, 'EVENT = validate.py started' ) ififuncs.generate_log( log_name_source, 'eventDetail=validate.py %s' % ififuncs.get_script_version('validate.py') ) ififuncs.generate_log( log_name_source, 'Command line arguments: %s' % args ) manifest = check_manifest(args.input, log_name_source) log_results(manifest, log_name_source, args)
def launch_mediaconch(log_name_source, user, mediaconch_xmlfile, manifest, full_path): ''' Run mediaconch on files. ''' ififuncs.generate_log( log_name_source, 'EVENT = ffv1mkvvalidate.py started' ) ififuncs.generate_log( log_name_source, 'agentName=%s' % user ) ififuncs.generate_log( log_name_source, 'eventDetail=ffv1mkvvalidate.py %s' % ififuncs.get_script_version('ffv1mkvvalidate.py') ) mediaconch_version = subprocess.check_output(['mediaconch', '-v']).rstrip() ififuncs.generate_log( log_name_source, 'agentName=mediaconch, agentversion=%s' % mediaconch_version ) if not os.path.isfile(mediaconch_xmlfile): ififuncs.make_mediaconch(full_path, mediaconch_xmlfile) ififuncs.manifest_update(manifest, mediaconch_xmlfile)
def main(args_): ''' Launches the various functions that will accession a package ''' args = parse_args(args_) input = args.input uuid_directory = ififuncs.check_for_sip([input]) if uuid_directory is not None: oe_path = os.path.dirname(uuid_directory) oe_number = os.path.basename(oe_path) if args.user: user = args.user else: user = ififuncs.get_user() if args.number: if args.number[:3] != 'aaa': print 'First three characters must be \'aaa\' and last four characters must be four digits' accession_number = ififuncs.get_accession_number() elif len(args.number[3:]) != 4: accession_number = ififuncs.get_accession_number() print 'First three characters must be \'aaa\' and last four characters must be four digits' elif not args.number[3:].isdigit(): accession_number = ififuncs.get_accession_number() print 'First three characters must be \'aaa\' and last four characters must be four digits' else: accession_number = args.number else: accession_number = ififuncs.get_accession_number() accession_path = os.path.join( os.path.dirname(oe_path), accession_number ) uuid = os.path.basename(uuid_directory) new_uuid_path = os.path.join(accession_path, uuid) logs_dir = os.path.join(new_uuid_path, 'logs') sipcreator_log = os.path.join(logs_dir, uuid) + '_sip_log.log' if args.force: proceed = 'Y' else: proceed = ififuncs.ask_yes_no( 'Do you want to rename %s with %s' % (oe_number, accession_number) ) if proceed == 'Y': os.rename(oe_path, accession_path) register = make_register() ififuncs.append_csv(register, (oe_number.upper()[:2] + '-' + oe_number[2:6], accession_number, '','','','', '')) ififuncs.generate_log( sipcreator_log, 'EVENT = accession.py started' ) ififuncs.generate_log( sipcreator_log, 'eventDetail=accession.py %s' % ififuncs.get_script_version('accession.py') ) ififuncs.generate_log( sipcreator_log, 'Command line arguments: %s' % args ) ififuncs.generate_log( sipcreator_log, 'EVENT = agentName=%s' % user ) ififuncs.generate_log( sipcreator_log, 'EVENT = eventType=Identifier assignment,' ' eventIdentifierType=accession number, value=%s' % accession_number ) ififuncs.generate_log( sipcreator_log, 'EVENT = eventType=accession,' ' eventIdentifierType=accession number, value=%s' % accession_number ) sip_manifest = os.path.join( accession_path, uuid ) + '_manifest.md5' sha512_log = manifest.main([new_uuid_path, '-sha512', '-s']) sha512_manifest = os.path.join( os.path.dirname(new_uuid_path), uuid + '_manifest-sha512.txt' ) ififuncs.merge_logs_append(sha512_log, sipcreator_log, sip_manifest) os.remove(sha512_log) dfxml = make_dfxml(args, new_uuid_path, uuid) ififuncs.generate_log( sipcreator_log, 'EVENT = Metadata extraction - eventDetail=File system metadata extraction using Digital Forensics XML, eventOutcome=%s, agentName=makedfxml' % (dfxml) ) ififuncs.generate_log( sipcreator_log, 'EVENT = accession.py finished' ) ififuncs.checksum_replace(sip_manifest, sipcreator_log, 'md5') ififuncs.checksum_replace(sha512_manifest, sipcreator_log, 'sha512') ififuncs.manifest_update(sip_manifest, dfxml) ififuncs.sha512_update(sha512_manifest, dfxml) if args.pbcore: makepbcore.main([accession_path, '-p', '-user', user]) else: print 'not a valid package. The input should include a package that has been through Object Entry'
def main(args_): # if multiple file are present, this script will treat them as a single # instantiation/representation and get aggregate metadata about the whole # package. For now, this will be a clumsy implementation - the first file # will provide most metadata. Things like duration/bitrate/filesize # will be calculated as a whole. # Although another way would be that every call is looped, and if # this could catch files that should not be in the package, eg. a 4:2:2 # file in a 4:2:0 package.. # yup - do it that way! args = parse_args(args_) all_files = ififuncs.recursive_file_list(args.input) silence = True if args.user: user = args.user else: user = ififuncs.get_user() acquisition_type = '' if args.acquisition_type: acquisition_type = ififuncs.get_acquisition_type( args.acquisition_type)[0] for dirs in os.listdir(args.input): if ififuncs.validate_uuid4(dirs) is None: instantiationIdentif = dirs Accession_Number = get_accession_number(args.input) if args.reference: Reference_Number = args.reference.upper() else: Reference_Number = get_reference_number(args.input) if args.p: for root, _, filenames in os.walk(args.input): if os.path.basename(root) == 'metadata': metadata_dir = root elif os.path.basename(root) == 'logs': logs_dir = root csv_filename = os.path.join(metadata_dir, Accession_Number + '_pbcore.csv') sipcreator_log = os.path.join(logs_dir, instantiationIdentif + '_sip_log.log') ififuncs.generate_log(sipcreator_log, 'EVENT = makepbcore.py started') ififuncs.generate_log( sipcreator_log, 'eventDetail=makepbcore.py %s' % ififuncs.get_script_version('makepbcore.py')) ififuncs.generate_log(sipcreator_log, 'Command line arguments: %s' % args) ififuncs.generate_log(sipcreator_log, 'EVENT = agentName=%s' % user) else: csv_filename = 'blaa.csv' print ' - Metadata will be stored in %s' % csv_filename for filenames in os.listdir(args.input): if '_manifest.md5' in filenames: md5_manifest = os.path.join(args.input, filenames) elif 'manifest-sha512.txt' in filenames: sha512_manifest = os.path.join(args.input, filenames) make_csv(csv_filename) ms = 0 FrameCount = 0 instantFileSize_byte = 0 instantFileSize_gigs = 0 scan_types = [] matrix_list = [] transfer_list = [] colour_primaries_list = [] color_spaces = [] chroma = [] frame_sizes = [] par_list = [] container_list = [] fps_list = [] sample_rate_list = [] track_count_list = [] interlace_list = [] compression_list = [] pix_fmt_list = [] audio_fmt_list = [] audio_codecid_list = [] audio_codec_list = [] au_bitdepth_list = [] video_codecid_list = [] video_codec_version_list = [] video_codec_profile_list = [] timecode_list = [] channels_list = [] for source in all_files: metadata = subprocess.check_output( ['mediainfo', '--Output=PBCore2', source]) root = etree.fromstring(metadata) print(' - Analysing %s') % source pbcore_namespace = root.xpath('namespace-uri(.)') track_type = root.xpath('//ns:essenceTrackType', namespaces={'ns': pbcore_namespace}) if len(track_type) > 0: for track in track_type: if track.text == 'Video': essenceTrackEncodvid = get_metadata( "ns:essenceTrackEncoding", track.getparent(), pbcore_namespace) vcodec_attributes = get_attributes(track.getparent(), pbcore_namespace) elif track.text == 'Audio': silence = False essenceTrackEncod_au = get_metadata( "//ns:essenceTrackEncoding", track.getparent(), pbcore_namespace) audio_codec_list.append(essenceTrackEncod_au) acodec_attributes = get_attributes(track.getparent(), pbcore_namespace) try: audio_codecid = acodec_attributes['ref'] except KeyError: audio_codecid = 'n/a' essenceTrackSampling = ififuncs.get_mediainfo( 'samplerate', '--inform=Audio;%SamplingRate_String%', source) sample_rate_list.append(essenceTrackSampling) essenceBitDepth_au = get_metadata( "//ns:essenceTrackBitDepth", root, pbcore_namespace) audio_codecid_list.append(audio_codecid) au_bitdepth_list.append(essenceBitDepth_au) channels = get_metadata( "//ns:essenceTrackAnnotation[@annotationType='Channel(s)']", track.getparent(), pbcore_namespace) channels_list.append(channels) ScanType = get_metadata( "//ns:essenceTrackAnnotation[@annotationType='ScanType']", root, pbcore_namespace) scan_types.append(ScanType) matrix_coefficients = get_metadata( "//ns:essenceTrackAnnotation[@annotationType='matrix_coefficients']", root, pbcore_namespace) timecode_source, starting_timecode = get_timecode( pbcore_namespace, root, source) timecode_list.append(starting_timecode) matrix_list.append(matrix_coefficients) transfer_characteris = get_metadata( "//ns:essenceTrackAnnotation[@annotationType='transfer_characteristics']", root, pbcore_namespace) transfer_list.append(transfer_characteris) colour_primaries = get_metadata( "//ns:essenceTrackAnnotation[@annotationType='color_primaries']", root, pbcore_namespace) colour_primaries_list.append(colour_primaries) FrameCount += int( get_metadata( "//ns:essenceTrackAnnotation[@annotationType='FrameCount']", root, pbcore_namespace)) instantFileSize_byte += int( get_metadata("//ns:instantiationFileSize", root, pbcore_namespace)) instantDataRate = round( float( ififuncs.get_mediainfo('OverallBitRate', '--inform=General;%OverallBitRate%', source)) / 1000 / 1000, 2) instantTracks = ififuncs.get_number_of_tracks(source) track_count_list.append(instantTracks) ms += ififuncs.get_milliseconds(source) ColorSpace = get_metadata( "//ns:essenceTrackAnnotation[@annotationType='ColorSpace']", root, pbcore_namespace) color_spaces.append(ColorSpace) ChromaSubsampling = get_metadata( "//ns:essenceTrackAnnotation[@annotationType='ChromaSubsampling']", root, pbcore_namespace) chroma.append(ChromaSubsampling) instantMediaty = get_metadata("//ns:instantiationMediaType", root, pbcore_namespace) essenceFrameSize = get_metadata("//ns:essenceTrackFrameSize", root, pbcore_namespace) frame_sizes.append(essenceFrameSize) PixelAspectRatio = get_metadata( "//ns:essenceTrackAnnotation[@annotationType='PixelAspectRatio']", root, pbcore_namespace) par_list.append(PixelAspectRatio) instantiationStandar = get_metadata( "//ns:instantiationAnnotation[@annotationType='Format']", root, pbcore_namespace) container_list.append(instantiationStandar) essenceFrameRate = get_metadata("//ns:essenceTrackFrameRate", root, pbcore_namespace) fps_list.append(essenceFrameRate) essenceAspectRatio = ififuncs.get_mediainfo( 'DAR', '--inform=Video;%DisplayAspectRatio_String%', source) Interlacement = get_metadata( "//ns:instantiationAnnotation[@annotationType='Interlacement']", root, pbcore_namespace) interlace_list.append(Interlacement) Compression_Mode = get_metadata( "//ns:instantiationAnnotation[@annotationType='Compression_Mode']", root, pbcore_namespace) colour_range = get_metadata( "//ns:essenceTrackAnnotation[@annotationType='colour_range']", root, pbcore_namespace) format_version = get_metadata( "//ns:instantiationAnnotation[@annotationType='Format_Version']", root, pbcore_namespace) app_company_name = get_metadata( "//ns:instantiationAnnotation[@annotationType='Encoded_Application_CompanyName']", root, pbcore_namespace) app_name = get_metadata( "//ns:instantiationAnnotation[@annotationType='Encoded_Application_Name']", root, pbcore_namespace) app_version = get_metadata( "//ns:instantiationAnnotation[@annotationType='Encoded_Application_Version']", root, pbcore_namespace) library_name = get_metadata( "//ns:instantiationAnnotation[@annotationType='Encoded_Library_Name']", root, pbcore_namespace) library_version = get_metadata( "//ns:instantiationAnnotation[@annotationType='Encoded_Library_Version']", root, pbcore_namespace) compression_list.append(Compression_Mode) instantiationDate_mo = get_metadata( "//ns:instantiationDate[@dateType='file modification']", root, pbcore_namespace) instantDate_other = 'n/a' instantDate_type = 'n/a' pix_fmt = ififuncs.get_ffmpeg_fmt(source, 'video') pix_fmt_list.append(pix_fmt) audio_fmt = ififuncs.get_ffmpeg_fmt(source, 'audio') audio_fmt_list.append(audio_fmt) if silence: audio_codecid = 'n/a' essenceBitDepth_au = 'n/a' essenceTrackEncod_au = 'n/a' essenceTrackSampling = 'n/a' video_codecid = vcodec_attributes['ref'] video_codecid_list.append(video_codecid) try: video_codec_version = vcodec_attributes['version'] except KeyError: video_codec_version = 'n/a' try: video_codec_profile = vcodec_attributes['annotation'][8:] except KeyError: video_codec_profile = 'n/a' video_codec_version_list.append(video_codec_version) video_codec_profile_list.append(video_codec_profile) metadata_error = '' metadata_list = [ scan_types, matrix_list, transfer_list, colour_primaries_list, color_spaces, chroma, frame_sizes, par_list, container_list, fps_list, sample_rate_list, track_count_list, interlace_list, compression_list, pix_fmt_list, audio_fmt_list, audio_codecid_list, audio_codec_list, au_bitdepth_list, video_codecid_list, video_codec_version_list, video_codec_profile_list, channels_list, timecode_list ] for i in metadata_list: if len(set(i)) > 1: metadata_error += 'WARNING - Your metadata values are not the same for all files: %s\n' % set( i) print metadata_error if args.p: ififuncs.generate_log( sipcreator_log, 'EVENT = Metadata mismatch - Your metadata values are not the same for all files: %s' % set(i)) tc = ififuncs.convert_millis(ms) instantiationDuratio = ififuncs.convert_timecode(25, tc) if args.donor: Donor = args.donor else: Donor = '' Edited_By = user Date_Created = '' Date_Last_Modified = '' Film_Or_Tape = 'Digital AV Object' Date_Of_Donation = '' reproduction_creator = '' if args.acquisition_type: if acquisition_type == 'Reproduction': Date_Of_Donation = instantiationDate_mo.split('T')[0] # if a reproduction, then there's no Donor/transfer of title. Donor = 'n/a' if ififuncs.find_concat_user( sipcreator_log) == 'Aoife Fitzmaurice': reproduction_creator = 'Aoife Fitzmaurice (2016)' elif ififuncs.find_concat_user( sipcreator_log) == 'Kieran O\'Leary': reproduction_creator = 'Kieran O\'Leary (2013)' Habitat = '' backup_habitat = '' Type_Of_Deposit = acquisition_type if args.depositor_reference: Depositor_Reference = args.depositor_reference else: Depositor_Reference = '' Master_Viewing = 'Preservation Object' Language_Version = '' Condition_Rating = '' Companion_Elements = '' TTape_Origin = args.parent EditedNew = user FIO = 'In' CollectionTitle = '' Created_By = user instantTimeStart = 'n/a' instantFileSize_gigs = round( float(instantFileSize_byte) / 1024 / 1024 / 1024, 3) instantColors = 'n/a' instantLanguage = 'n/a' instantAltMo = 'n/a' essenceBitDepth_vid = ififuncs.get_mediainfo('duration', '--inform=Video;%BitDepth%', source) instantiationChanCon = 'n/a' colour_range = colour_range format_version = format_version TimeCode_FirstFrame = process_mixed_values(timecode_list) TimeCode_Source = timecode_source reproduction_reason = '' dig_object_descrip = ififuncs.get_digital_object_descriptor(args.input) ififuncs.append_csv(csv_filename, [ Reference_Number, Donor, Edited_By, Date_Created, Date_Last_Modified, Film_Or_Tape, Date_Of_Donation, Accession_Number, Habitat, backup_habitat, TTape_Origin, Type_Of_Deposit, Depositor_Reference, Master_Viewing, Language_Version, Condition_Rating, Companion_Elements, EditedNew, FIO, CollectionTitle, Created_By, instantiationIdentif, instantDate_other, instantDate_type, instantiationDate_mo, instantiationStandar, instantMediaty, instantFileSize_byte, instantFileSize_gigs, instantTimeStart, instantDataRate, instantTracks, instantColors, instantLanguage, instantAltMo, essenceTrackEncodvid, essenceFrameRate, essenceTrackSampling, essenceBitDepth_vid, essenceFrameSize, essenceAspectRatio, essenceTrackEncod_au, essenceBitDepth_au, instantiationDuratio, instantiationChanCon, PixelAspectRatio, FrameCount, ColorSpace, ChromaSubsampling, ScanType, Interlacement, Compression_Mode, colour_primaries, transfer_characteris, matrix_coefficients, pix_fmt, audio_fmt, audio_codecid, video_codecid, video_codec_version, video_codec_profile, channels, colour_range, format_version, TimeCode_FirstFrame, TimeCode_Source, app_company_name, app_name, app_version, library_name, library_version, reproduction_creator, reproduction_reason, dig_object_descrip, ]) if args.p: ififuncs.generate_log( sipcreator_log, 'EVENT = Metadata extraction - eventDetail=Technical record creation using PBCore, eventOutcome=%s, agentName=makepbcore' % (csv_filename)) ififuncs.generate_log(sipcreator_log, 'EVENT = makepbcore.py finished') ififuncs.checksum_replace(md5_manifest, sipcreator_log, 'md5') ififuncs.checksum_replace(sha512_manifest, sipcreator_log, 'sha512') ififuncs.manifest_update(md5_manifest, csv_filename) print ' - Updating %s with %s' % (md5_manifest, csv_filename) ififuncs.sha512_update(sha512_manifest, csv_filename) print ' - Updating %s with %s' % (sha512_manifest, csv_filename) print metadata_error
def main(args_): ''' Launch all the functions for creating an IFI SIP. ''' args = parse_args(args_) start = datetime.datetime.now() inputs = args.i if args.d: try: import clairmeta except ImportError: print( 'Exiting as Clairmeta is not installed. If there is a case for not using clairmeta, please let me know and i can make a workaround' ) sys.exit() print args if args.user: user = args.user else: user = ififuncs.get_user() if not args.sc: if args.oe: if args.oe[:2] != 'oe': print 'First two characters must be \'oe\' and last four characters must be four digits' object_entry = ififuncs.get_object_entry() elif len(args.oe[2:]) not in range(4, 6): print 'First two characters must be \'oe\' and last four characters must be four digits' object_entry = ififuncs.get_object_entry() elif not args.oe[2:].isdigit(): object_entry = ififuncs.get_object_entry() print 'First two characters must be \'oe\' and last four characters must be four digits' else: object_entry = args.oe else: object_entry = ififuncs.get_object_entry() else: object_entry = 'not_applicable' sip_path = make_folder_path(os.path.join(args.o), args, object_entry) if args.u: if ififuncs.validate_uuid4(args.u) is None: uuid = args.u uuid_event = ( 'EVENT = eventType=Identifier assignement,' ' eventIdentifierType=UUID, value=%s, module=uuid.uuid4' ) % uuid else: print 'exiting due to invalid UUID' uuid_event = ( 'EVENT = exiting due to invalid UUID supplied on the commmand line: %s' % uuid) uuid = False else: uuid = os.path.basename(sip_path) uuid_event = ( 'EVENT = eventType=Identifier assignement,' ' eventIdentifierType=UUID, value=%s, module=uuid.uuid4') % uuid new_log_textfile = os.path.join(sip_path, 'logs' + '/' + uuid + '_sip_log.log') ififuncs.generate_log(new_log_textfile, 'EVENT = sipcreator.py started') ififuncs.generate_log( new_log_textfile, 'eventDetail=sipcreator.py %s' % ififuncs.get_script_version('sipcreator.py')) ififuncs.generate_log(new_log_textfile, 'Command line arguments: %s' % args) ififuncs.generate_log(new_log_textfile, 'EVENT = agentName=%s' % user) ififuncs.generate_log(new_log_textfile, uuid_event) if args.u is False: sys.exit() if not args.sc: ififuncs.generate_log( new_log_textfile, 'EVENT = eventType=Identifier assignement,' ' eventIdentifierType=object entry, value=%s' % object_entry) metadata_dir = os.path.join(sip_path, 'metadata') supplemental_dir = os.path.join(metadata_dir, 'supplemental') logs_dir = os.path.join(sip_path, 'logs') log_names = move_files(inputs, sip_path, args) get_metadata(sip_path, new_log_textfile) ififuncs.hashlib_manifest(metadata_dir, metadata_dir + '/metadata_manifest.md5', metadata_dir) if args.sc: normalise_objects_manifest(sip_path) new_manifest_textfile = consolidate_manifests(sip_path, 'objects', new_log_textfile) consolidate_manifests(sip_path, 'metadata', new_log_textfile) ififuncs.hashlib_append(logs_dir, new_manifest_textfile, os.path.dirname(os.path.dirname(logs_dir))) if args.supplement: os.makedirs(supplemental_dir) supplement_cmd = [ '-i', args.supplement, '-user', user, '-new_folder', supplemental_dir, os.path.dirname(sip_path), '-copy' ] package_update.main(supplement_cmd) if args.sc: print('Generating Digital Forensics XML') dfxml = accession.make_dfxml(args, sip_path, uuid) ififuncs.generate_log( new_log_textfile, 'EVENT = Metadata extraction - eventDetail=File system metadata extraction using Digital Forensics XML, eventOutcome=%s, agentName=makedfxml' % (dfxml)) ififuncs.manifest_update(new_manifest_textfile, dfxml) sha512_log = manifest.main([sip_path, '-sha512', '-s']) sha512_manifest = os.path.join(os.path.dirname(sip_path), uuid + '_manifest-sha512.txt') ififuncs.merge_logs_append(sha512_log, new_log_textfile, new_manifest_textfile) ififuncs.checksum_replace(sha512_manifest, new_log_textfile, 'sha512') os.remove(sha512_log) ififuncs.sort_manifest(new_manifest_textfile) if not args.quiet: log_report(log_names) finish = datetime.datetime.now() print '\n', user, 'ran this script at %s and it finished at %s' % (start, finish) if args.d: content_title = create_content_title_text(sip_path) new_dcp_path = os.path.join('objects', content_title).replace("\\", "/") absolute_dcp_path = os.path.join(sip_path, new_dcp_path) ififuncs.manifest_replace( new_manifest_textfile, os.path.join('objects', os.path.basename(args.i[0])).replace("\\", "/"), new_dcp_path) ''' a = subprocess.check_output(['python', '-m', 'clairmeta.cli', 'check', '-type', 'dcp', absolute_dcp_path], stderr=subprocess.STDOUT) b = subprocess.check_output(['python', '-m', 'clairmeta.cli', 'probe', '-type', 'dcp', '-format', 'xml', absolute_dcp_path], stderr=subprocess.STDOUT) ''' dcp = DCP(absolute_dcp_path) clairmeta_version = clairmeta.__version__ dcp_dict = dcp.parse() # json_str = json.dumps(dcp_dict , sort_keys=True, indent=2, separators=(',', ': ')) xml_str = dicttoxml.dicttoxml(dcp_dict, custom_root='ClairmetaProbe', ids=False, attr_type=False) xml_pretty = prettyprint_xml(xml_str) status, report = dcp.check() ififuncs.generate_log( new_log_textfile, 'EVENT = eventType=validation, eventOutcome=%s, eventDetail=%s, agentName=Clairmeta version %s' % (status, report, clairmeta_version)) clairmeta_xml = os.path.join(metadata_dir, '%s_clairmeta.xml' % content_title) ififuncs.generate_log( new_log_textfile, 'EVENT = Metadata extraction - eventDetail=Clairmeta DCP metadata extraction, eventOutcome=%s, agentName=Clairmeta version %s' % (clairmeta_xml, clairmeta_version)) with open(clairmeta_xml, 'w') as fo: fo.write(xml_pretty) ififuncs.checksum_replace(new_manifest_textfile, new_log_textfile, 'md5') ififuncs.manifest_update(new_manifest_textfile, clairmeta_xml) print status print report print '\n', user, 'ran this script at %s and it finished at %s' % ( start, finish) return new_log_textfile, new_manifest_textfile
def main(args_): ''' Launch all the functions for creating an IFI SIP. ''' args = parse_args(args_) start = datetime.datetime.now() inputs = args.i print args if args.user: user = args.user else: user = ififuncs.get_user() if args.oe: if args.oe[:2] != 'oe': print 'First two characters must be \'oe\' and last four characters must be four digits' object_entry = ififuncs.get_object_entry() elif len(args.oe[2:]) != 4: print 'First two characters must be \'oe\' and last four characters must be four digits' object_entry = ififuncs.get_object_entry() elif not args.oe[2:].isdigit(): object_entry = ififuncs.get_object_entry() print 'First two characters must be \'oe\' and last four characters must be four digits' else: object_entry = args.oe else: object_entry = ififuncs.get_object_entry() sip_path = make_folder_path(os.path.join(args.o), args, object_entry) if args.u: if ififuncs.validate_uuid4(args.u) is None: uuid = args.u uuid_event = ( 'EVENT = eventType=Identifier assignement,' ' eventIdentifierType=UUID, value=%s, module=uuid.uuid4' ) % uuid else: print 'exiting due to invalid UUID' uuid_event = ( 'EVENT = exiting due to invalid UUID supplied on the commmand line: %s' % uuid ) uuid = False else: uuid = os.path.basename(sip_path) uuid_event = ( 'EVENT = eventType=Identifier assignement,' ' eventIdentifierType=UUID, value=%s, module=uuid.uuid4' ) % uuid new_log_textfile = os.path.join(sip_path, 'logs' + '/' + uuid + '_sip_log.log') ififuncs.generate_log( new_log_textfile, 'EVENT = sipcreator.py started' ) ififuncs.generate_log( new_log_textfile, 'eventDetail=sipcreator.py %s' % ififuncs.get_script_version('sipcreator.py') ) ififuncs.generate_log( new_log_textfile, 'Command line arguments: %s' % args ) ififuncs.generate_log( new_log_textfile, 'EVENT = agentName=%s' % user ) ififuncs.generate_log( new_log_textfile, uuid_event ) if args.u is False: sys.exit() ififuncs.generate_log( new_log_textfile, 'EVENT = eventType=Identifier assignement,' ' eventIdentifierType=object entry, value=%s' % object_entry ) metadata_dir = os.path.join(sip_path, 'metadata') logs_dir = os.path.join(sip_path, 'logs') log_names = move_files(inputs, sip_path, args) get_metadata(sip_path, new_log_textfile) ififuncs.hashlib_manifest( metadata_dir, metadata_dir + '/metadata_manifest.md5', metadata_dir ) new_manifest_textfile = consolidate_manifests(sip_path, 'objects', new_log_textfile) consolidate_manifests(sip_path, 'metadata', new_log_textfile) ififuncs.hashlib_append( logs_dir, new_manifest_textfile, os.path.dirname(os.path.dirname(logs_dir)) ) ififuncs.sort_manifest(new_manifest_textfile) if not args.quiet: log_report(log_names) finish = datetime.datetime.now() print '\n', user, 'ran this script at %s and it finished at %s' % (start, finish) if args.d: content_title = create_content_title_text(args, sip_path) ififuncs.manifest_replace( new_manifest_textfile, os.path.join('objects', os.path.basename(args.i[0])).replace("\\", "/"), os.path.join('objects', content_title).replace("\\", "/") ) return new_log_textfile, new_manifest_textfile
def main(args_): ''' Launches the various functions that will accession a package ''' args = parse_args(args_) source = args.input uuid_directory = ififuncs.check_for_sip([source]) if uuid_directory is not None: oe_path = os.path.dirname(uuid_directory) oe_number = os.path.basename(oe_path) if args.user: user = args.user else: user = ififuncs.get_user() if args.number: if args.number[:3] != 'aaa': print 'First three characters must be \'aaa\' and last four characters must be four digits' accession_number = ififuncs.get_accession_number() elif len(args.number[3:]) != 4: accession_number = ififuncs.get_accession_number() print 'First three characters must be \'aaa\' and last four characters must be four digits' elif not args.number[3:].isdigit(): accession_number = ififuncs.get_accession_number() print 'First three characters must be \'aaa\' and last four characters must be four digits' else: accession_number = args.number else: accession_number = ififuncs.get_accession_number() if args.pbcore: if args.reference: Reference_Number = args.reference.upper() else: Reference_Number = ififuncs.get_reference_number() if args.acquisition_type: acquisition_type = ififuncs.get_acquisition_type( args.acquisition_type) print acquisition_type accession_path = os.path.join(os.path.dirname(oe_path), accession_number) uuid = os.path.basename(uuid_directory) new_uuid_path = os.path.join(accession_path, uuid) logs_dir = os.path.join(new_uuid_path, 'logs') sipcreator_log = os.path.join(logs_dir, uuid) + '_sip_log.log' if args.force: proceed = 'Y' else: proceed = ififuncs.ask_yes_no('Do you want to rename %s with %s' % (oe_number, accession_number)) if proceed == 'Y': os.rename(oe_path, accession_path) if args.register: register = args.register else: register = make_register() ififuncs.append_csv(register, (oe_number.upper()[:2] + '-' + oe_number[2:], accession_number, '', '', '', '', '', '')) ififuncs.generate_log(sipcreator_log, 'EVENT = accession.py started') ififuncs.generate_log( sipcreator_log, 'eventDetail=accession.py %s' % ififuncs.get_script_version('accession.py')) ififuncs.generate_log(sipcreator_log, 'Command line arguments: %s' % args) ififuncs.generate_log(sipcreator_log, 'EVENT = agentName=%s' % user) ififuncs.generate_log( sipcreator_log, 'EVENT = eventType=Identifier assignment,' ' eventIdentifierType=accession number, value=%s' % accession_number) ififuncs.generate_log( sipcreator_log, 'EVENT = eventType=accession,' ' eventIdentifierType=accession number, value=%s' % accession_number) sip_manifest = os.path.join(accession_path, uuid) + '_manifest.md5' sha512_log = manifest.main([new_uuid_path, '-sha512', '-s']) sha512_manifest = os.path.join(os.path.dirname(new_uuid_path), uuid + '_manifest-sha512.txt') ififuncs.merge_logs_append(sha512_log, sipcreator_log, sip_manifest) os.remove(sha512_log) print('Generating Digital Forensics XML') dfxml = make_dfxml(args, new_uuid_path, uuid) ififuncs.generate_log( sipcreator_log, 'EVENT = Metadata extraction - eventDetail=File system metadata extraction using Digital Forensics XML, eventOutcome=%s, agentName=makedfxml' % (dfxml)) # this is inefficient. The script should not have to ask for reference # number twice if someone wants to insert the filmographic but do not # want to make the pbcore csv, perhaps because the latter already exists. if args.csv: metadata_dir = os.path.join(new_uuid_path, 'metadata') package_filmographic = os.path.join( metadata_dir, Reference_Number + '_filmographic.csv') insert_filmographic(args.csv, Reference_Number, package_filmographic) ififuncs.generate_log( sipcreator_log, 'EVENT = Metadata extraction - eventDetail=Filmographic descriptive metadata added to metadata folder, eventOutcome=%s, agentName=accession.py' % (package_filmographic)) ififuncs.manifest_update(sip_manifest, package_filmographic) ififuncs.sha512_update(sha512_manifest, package_filmographic) print('Filmographic descriptive metadata added to metadata folder') ififuncs.generate_log(sipcreator_log, 'EVENT = accession.py finished') ififuncs.checksum_replace(sip_manifest, sipcreator_log, 'md5') ififuncs.checksum_replace(sha512_manifest, sipcreator_log, 'sha512') ififuncs.manifest_update(sip_manifest, dfxml) ififuncs.sha512_update(sha512_manifest, dfxml) if args.pbcore: makepbcore_cmd = [ accession_path, '-p', '-user', user, '-reference', Reference_Number ] if args.parent: makepbcore_cmd.extend(['-parent', args.parent]) if args.acquisition_type: makepbcore_cmd.extend( ['-acquisition_type', args.acquisition_type]) if args.donor: makepbcore_cmd.extend(['-donor', args.donor]) if args.donor: makepbcore_cmd.extend( ['-depositor_reference', args.depositor_reference]) if args.donation_date: makepbcore_cmd.extend(['-donation_date', args.donation_date]) makepbcore.main(makepbcore_cmd) else: print 'not a valid package. The input should include a package that has been through Object Entry'
def main(args_): ''' Overly long main function that makes a sidecar manifest. This needs to get broken up into smaller functions. ''' parser = argparse.ArgumentParser(description='Generate manifest with' ' checksums for a directory' ' Written by Kieran O\'Leary.') parser.add_argument( 'source', help='Input directory' ) parser.add_argument( '-s', '-sidecar', action='store_true', help='Generates Sidecar' ) parser.add_argument( '-f', '-felix', action='store_true', help='Felix Meehan workflow - places manifest inside of source directory' ) parser.add_argument( '-sha512', action='store_true', help='Generates sha512 checksums instead of md5' ) args = parser.parse_args(args_) source = args.source source_parent_dir = os.path.dirname(source) normpath = os.path.normpath(source) relative_path = normpath.split(os.sep)[-1] log_name_source_ = os.path.basename( args.source ) + time.strftime("_%Y_%m_%dT%H_%M_%S") if args.s: if args.sha512: manifest = source_parent_dir + '/%s_manifest-sha512.txt' % relative_path else: manifest = source_parent_dir + '/%s_manifest.md5' % relative_path log_name_source = source_parent_dir + '/%s.log' % log_name_source_ elif args.f: if args.sha512: manifest = source_parent_dir + '/%s_manifest-sha512.txt' % relative_path else: manifest = source + '/%s_manifest.md5' % relative_path log_name_source = source_parent_dir + '/%s.log' % log_name_source_ else: if args.sha512: manifest_ = manifest_ = '/%s_manifest-sha512.txt' % relative_path else: manifest_ = '/%s_manifest.md5' % relative_path desktop_manifest_dir = make_desktop_manifest_dir() manifest = "%s/%s" % (desktop_manifest_dir, manifest_) desktop_logs_dir = make_desktop_logs_dir() log_name_source = "%s/%s.log" % (desktop_logs_dir, log_name_source_) if args.sha512: module = 'hashlib.sha512' else: module = 'hashlib.md5' generate_log(log_name_source, 'manifest.py started.') if sys.platform == "win32": generate_log( log_name_source, 'EVENT = Generating manifest: status=started, eventType=message digest calculation, module=%s, agent=Windows' % module ) if sys.platform == "darwin": generate_log( log_name_source, 'EVENT = Generating manifest: status=started, eventType=message digest calculation, module=%s, agent=OSX' % module ) elif sys.platform == "linux2": generate_log( log_name_source, 'EVENT = Generating manifest: status=started, eventType=message digest calculation, module=%s, agent=Linux' % module ) ififuncs.generate_log( log_name_source, 'eventDetail=manifest.py %s' % ififuncs.get_script_version('manifest.py')) generate_log(log_name_source, 'Source: %s' % source) if os.path.isfile(source): print('\nFile checksum is not currently supported, only directories.\n') generate_log(log_name_source, 'Error: Attempted to generate manifest for file. Only Directories/Folders are currently supported') generate_log(log_name_source, 'manifest.py exit') sys.exit() elif not os.path.isdir(source): print((' %s is either not a directory or it does not exist' % source)) generate_log(log_name_source, ' %s is either not a directory or it does not exist' % source) generate_log(log_name_source, 'manifest.py exit') sys.exit() remove_bad_files(source, log_name_source) source_count = 0 for _, _, filenames in os.walk(source): # There has to be a better way to count the files.. for _ in filenames: source_count += 1 #works in windows at least if os.path.isfile(manifest): count_in_manifest = manifest_file_count(manifest) if source_count != count_in_manifest: print('This manifest may be outdated as the number of files in your directory does not match the number of files in the manifest') generate_log(log_name_source, 'EVENT = Existing source manifest check - Failure - The number of files in the source directory is not equal to the number of files in the source manifest ') sys.exit() if not os.path.isfile(manifest): try: print('Generating source manifest') generate_log(log_name_source, 'EVENT = Generating source manifest') if args.f: if args.sha512: ififuncs.sha512_manifest(source, manifest, source) else: hashlib_manifest(source, manifest, source) shutil.move(log_name_source, source) else: if args.sha512: ififuncs.sha512_manifest(source, manifest, source_parent_dir) else: hashlib_manifest(source, manifest, source_parent_dir) except OSError: print('You do not have access to this directory. Perhaps it is read only, or the wrong file system\n') sys.exit() else: generate_log(log_name_source, 'EVENT = Existing source manifest check - Source manifest already exists. Script will exit. ') print(('Manifest created in %s' % manifest)) generate_log(log_name_source, 'Manifest created in %s' % manifest) return log_name_source
def setup(args_): ''' Sets a bunch of filename variables and parses command line. some examples: if manifest_sidecar = /home/kieranjol/fakeeeeee/fakeeeeee_manifest.md5 then manifes_root = /home/kieranjol/fakeeeeee_manifest.md5 ''' parser = argparse.ArgumentParser( description='Copy directory with checksum comparison' 'and manifest generation.Written by Kieran O\'Leary.') parser.add_argument('source', help='Input directory') parser.add_argument('destination', help='Destination directory') parser.add_argument( '-l', '-lto', action='store_true', help='use gcp instead of rsync on osx for SPEED on LTO') rootpos = '' dircheck = None args = parser.parse_args(args_) if os.path.isdir(args.source): dircheck = check_for_sip(args.source) if dircheck != None: if os.path.isdir(dircheck): source = check_for_sip(args.source) destination = os.path.join(args.destination, os.path.basename(args.source)) os.makedirs(destination) else: source = args.source destination = args.destination normpath = os.path.normpath(source) #is there any benefit to this over os.path.basename dirname = os.path.split(os.path.basename(source))[1] if dirname == '': rootpos = 'y' dirname = raw_input( 'What do you want your destination folder to be called?\n') relative_path = normpath.split(os.sep)[-1] # or hardcode destination_final_path = os.path.join(destination, dirname) manifest_destination = destination + '/%s_manifest.md5' % dirname if os.path.isfile(manifest_destination): print 'Destination manifest already exists' manifest_filename = '%s_manifest.md5' % dirname desktop_manifest_dir = make_desktop_manifest_dir() # manifest = desktop manifest, looks like this can get rewritten later. manifest = os.path.join(desktop_manifest_dir, manifest_filename) manifest_sidecar = os.path.join(os.path.dirname(source), relative_path + '_manifest.md5') manifest_root = source + '/%s_manifest.md5' % os.path.basename(source) log_name_filename = dirname + time.strftime("_%Y_%m_%dT%H_%M_%S") desktop_logs_dir = make_desktop_logs_dir() log_name_source = "%s/%s.log" % (desktop_logs_dir, log_name_filename) generate_log(log_name_source, 'copyit.py started.') ififuncs.generate_log( log_name_source, 'eventDetail=copyit.py %s' % ififuncs.get_script_version('copyit.py')) generate_log(log_name_source, 'Source: %s' % source) generate_log(log_name_source, 'Destination: %s' % destination) return args, rootpos, manifest_sidecar, log_name_source, destination_final_path, manifest_root, manifest_destination, manifest, destination, dirname, desktop_manifest_dir
def main(args_): ''' Retrospectively updates older FFV1/DV packages in order to meet our current packaging requirements. This should allow accession.py and makepbcore.py to run as expected. This script should work on files created by: makeffv1.py dvsip.py loopline.py ''' args = parse_args(args_) user = ififuncs.get_user() new_object_entry = get_numbers(args) filmographic_csv = args.filmographic technical_csv = args.technical filmographic_oe_list = [] filmo_csv_extraction = ififuncs.extract_metadata(filmographic_csv) tech_csv_extraction = ififuncs.extract_metadata(technical_csv) register = make_register() for line_item in filmo_csv_extraction[0]: dictionary = {} oe_number = line_item['Object Entry'].lower() dictionary['title'] = line_item['Title'] if dictionary['title'] == '': dictionary['title'] = '%s - %s' % (line_item['TitleSeries'], line_item['EpisodeNo']) dictionary['uppercase_dashed_oe'] = oe_number.upper() for tech_record in tech_csv_extraction[0]: if tech_record['Reference Number'] == dictionary[ 'uppercase_dashed_oe']: dictionary['source_accession_number'] = tech_record[ 'Accession Number'] dictionary['filmographic_reference_number'] = tech_record[ 'new_ref'] # this transforms OE-#### to oe#### dictionary['old_oe'] = oe_number[:2] + oe_number[3:] filmographic_oe_list.append(dictionary) for oe_package in filmographic_oe_list: for root, _, filenames in os.walk(args.input): if os.path.basename(root) == oe_package['old_oe']: old_oe_path = root old_oe = os.path.basename(root) log_dir = os.path.join(root, 'logs') for files in os.listdir(log_dir): if '.mov_log.log' in files: log = os.path.join(log_dir, files) manifest = os.path.join(os.path.dirname(root), old_oe + '_manifest.md5') uuid = ififuncs.create_uuid() uuid_event = ( 'EVENT = eventType=Identifier assignement,' ' eventIdentifierType=UUID, value=%s, module=uuid.uuid4' ) % uuid ififuncs.generate_log(log, 'EVENT = loopline_repackage.py started') ififuncs.generate_log( log, 'eventDetail=loopline_repackage.py %s' % ififuncs.get_script_version('loopline_repackage.py')) ififuncs.generate_log(log, 'Command line arguments: %s' % args) ififuncs.generate_log(log, 'EVENT = agentName=%s' % user) ififuncs.generate_log(log, uuid_event) ififuncs.generate_log( log, 'EVENT = eventType=Identifier assignement,' ' eventIdentifierType=object entry, value=%s' % new_object_entry) ififuncs.generate_log( log, 'EVENT = eventType=Identifier assignement,' ' eventIdentifierType=Filmographic reference number , value=%s' % oe_package['filmographic_reference_number']) oe_package['new_object_entry'] = new_object_entry print('Transforming %s into %s' % (oe_package['old_oe'], oe_package['new_object_entry'])) ififuncs.generate_log( log, 'Relationship, derivation, has source=%s' % oe_package['source_accession_number']) old_uuid_path = os.path.join(os.path.dirname(root), uuid) new_oe_path, new_uuid_path = move_files( root, new_object_entry, old_oe_path, old_uuid_path, uuid) updated_lines = update_manifest(manifest, old_oe, uuid) new_manifest = os.path.join(new_oe_path, uuid) + '_manifest.md5' shutil.move(manifest, new_manifest) with open(new_manifest, 'w') as fo: for lines in updated_lines: fo.write(lines) new_logs_path = os.path.join(new_uuid_path, 'logs') for files in os.listdir(new_logs_path): if '.mov_log.log' in files: log = os.path.join(new_logs_path, files) logname = rename_files(new_uuid_path, old_oe, uuid, new_manifest, log) date_modified, extension = get_date_modified(new_uuid_path) # This normally would be bad practise, but this project only has two formats. MOV/DV and FFv1/MKV if extension == '.mkv': av_format = 'FFV1/PCM/Matroska' elif extension == '.mov': av_format = 'DV/PCM/QuickTime' provenance_string = 'Reproduction of %s' % oe_package[ 'source_accession_number'] ififuncs.append_csv( register, (oe_package['new_object_entry'].upper()[:2] + '-' + oe_package['new_object_entry'][2:], date_modified, '1', av_format, oe_package['title'], 'contact_name', 'Reproduction', '', provenance_string, '', '')) ififuncs.generate_log( logname, 'EVENT = loopline_repackage.py finished') ififuncs.checksum_replace(new_manifest, logname, 'md5') oe_digits = int(os.path.basename(new_oe_path)[2:]) + 1 new_object_entry = 'oe' + str(oe_digits)
def main(args_): ''' Launch all the functions for creating an IFI SIP. ''' args = parse_args(args_) start = datetime.datetime.now() inputs = args.i if args.d: try: import clairmeta clairmeta_version = clairmeta.__version__ except ImportError: print( 'Exiting as Clairmeta is not installed. If there is a case for not using clairmeta, please let me know and i can make a workaround' ) sys.exit() print(args) user = ififuncs.determine_user(args) object_entry = get_object_entry(args) sip_path = make_folder_path(os.path.join(args.o), args, object_entry) uuid, uuid_event = determine_uuid(args, sip_path) new_log_textfile = os.path.join(sip_path, 'logs' + '/' + uuid + '_sip_log.log') if args.d: content_title = create_content_title_text(sip_path, args) ififuncs.generate_log(new_log_textfile, 'EVENT = sipcreator.py started') ififuncs.generate_log( new_log_textfile, 'eventDetail=sipcreator.py %s' % ififuncs.get_script_version('sipcreator.py')) ififuncs.generate_log(new_log_textfile, 'Command line arguments: %s' % args) ififuncs.generate_log(new_log_textfile, 'EVENT = agentName=%s' % user) ififuncs.generate_log(new_log_textfile, uuid_event) if not args.sc: ififuncs.generate_log( new_log_textfile, 'EVENT = eventType=Identifier assignement,' ' eventIdentifierType=object entry, value=%s' % object_entry) metadata_dir = os.path.join(sip_path, 'metadata') supplemental_dir = os.path.join(metadata_dir, 'supplemental') logs_dir = os.path.join(sip_path, 'logs') log_names = move_files(inputs, sip_path, args) ififuncs.get_technical_metadata(sip_path, new_log_textfile) ififuncs.hashlib_manifest(metadata_dir, metadata_dir + '/metadata_manifest.md5', metadata_dir) if args.sc: normalise_objects_manifest(sip_path) new_manifest_textfile = consolidate_manifests(sip_path, 'objects', new_log_textfile) consolidate_manifests(sip_path, 'metadata', new_log_textfile) ififuncs.hashlib_append(logs_dir, new_manifest_textfile, os.path.dirname(os.path.dirname(logs_dir))) if args.supplement: os.makedirs(supplemental_dir) supplement_cmd = [ '-i', args.supplement, '-user', user, '-new_folder', supplemental_dir, os.path.dirname(sip_path), '-copy' ] package_update.main(supplement_cmd) if args.sc: print('Generating Digital Forensics XML') dfxml = accession.make_dfxml(args, sip_path, uuid) ififuncs.generate_log( new_log_textfile, 'EVENT = Metadata extraction - eventDetail=File system metadata extraction using Digital Forensics XML, eventOutcome=%s, agentName=makedfxml' % (dfxml)) ififuncs.manifest_update(new_manifest_textfile, dfxml) sha512_log = manifest.main([sip_path, '-sha512', '-s']) sha512_manifest = os.path.join(os.path.dirname(sip_path), uuid + '_manifest-sha512.txt') ififuncs.merge_logs_append(sha512_log, new_log_textfile, new_manifest_textfile) ififuncs.checksum_replace(sha512_manifest, new_log_textfile, 'sha512') os.remove(sha512_log) ififuncs.sort_manifest(new_manifest_textfile) if not args.quiet: log_report(log_names) finish = datetime.datetime.now() print(('\n', user, 'ran this script at %s and it finished at %s' % (start, finish))) if args.d: process_dcp(sip_path, content_title, args, new_manifest_textfile, new_log_textfile, metadata_dir, clairmeta_version) return new_log_textfile, new_manifest_textfile
def main(args_): ''' Launch all the functions for creating an IFI SIP. ''' args = parse_args(args_) source = args.input sip_path = ififuncs.check_for_sip([source]) if sip_path is not None: oe_path = os.path.dirname(sip_path) uuid = os.path.basename(sip_path) sip_manifest = os.path.join(oe_path, uuid) + '_manifest.md5' else: # this is assuming that the other workflow will be the # special collections workflow that has the uuid as the parent. # some real checks should exist for this whole if/else flow. sip_path = args.input oe_path = os.path.dirname(args.input) uuid = os.path.basename(sip_path) sip_manifest = os.path.join(oe_path, uuid + '_manifest.md5') start = datetime.datetime.now() print(args) if args.user: user = args.user else: user = ififuncs.get_user() new_log_textfile = os.path.join(sip_path, 'logs' + '/' + uuid + '_sip_log.log') ififuncs.generate_log(new_log_textfile, 'EVENT = package_update.py started') ififuncs.generate_log( new_log_textfile, 'eventDetail=package_update.py %s' % ififuncs.get_script_version('package_update.py')) ififuncs.generate_log(new_log_textfile, 'Command line arguments: %s' % args) ififuncs.generate_log(new_log_textfile, 'EVENT = agentName=%s' % user) if not os.path.isdir(args.new_folder): os.makedirs(args.new_folder) if isinstance(args.i[0], (list, )): args.i = args.i[0] for filenames in args.i: if args.copy: copyit.main([filenames, args.new_folder]) ififuncs.generate_log( new_log_textfile, 'EVENT = eventType=file movement,' ' eventOutcomeDetailNote=%s has been moved into %s' ' agentName=copyit.py' % (filenames, args.new_folder)) # this is hardcoded - pick this apart so that any folder can be added to. # this must be fixed in normalise.py as well. relative_new_path = args.new_folder.replace(sip_path, '') print((relative_new_path, 'relative')) if (relative_new_path[0] == '/') or relative_new_path[0] == '\\': relative_new_path = relative_new_path[1:].replace('\\', '/') sipcreator.consolidate_manifests(sip_path, relative_new_path, new_log_textfile) log_manifest = os.path.join( os.path.dirname(new_log_textfile), os.path.basename(filenames) + '_manifest.md5') ififuncs.manifest_update(sip_manifest, log_manifest) ififuncs.sort_manifest(sip_manifest) else: # add test to see if it actually deleted - what if read only? shutil.move(filenames, args.new_folder) ififuncs.generate_log( new_log_textfile, 'EVENT = eventType=file movement,' ' eventOutcomeDetailNote=%s has been moved into %s' ' agentName=shutil.move()' % (filenames, args.new_folder)) print(('%s has been moved into %s' % (filenames, args.new_folder))) relative_filename = filenames.replace( os.path.dirname(args.input) + '/', '').replace('\\', '/') relative_filename = filenames.replace( os.path.dirname(args.input) + '\\', '').replace('\\', '/') relative_new_folder = args.new_folder.replace( os.path.dirname(args.input) + '/', '').replace('\\', '/') relative_new_folder = args.new_folder.replace( os.path.dirname(args.input) + '\\', '').replace('\\', '/') update_manifest( sip_manifest, relative_filename, os.path.join(relative_new_folder, os.path.basename(relative_filename)).replace( '\\', '/'), new_log_textfile) ififuncs.generate_log(new_log_textfile, 'EVENT = package_update.py finished') ififuncs.checksum_replace(sip_manifest, new_log_textfile, 'md5') finish = datetime.datetime.now() print('\n- %s ran this script at %s and it finished at %s' % (user, start, finish))
def main(args_): ''' Retrospectively updates older FFV1/DV packages in order to meet our current packaging requirements. This should allow accession.py and makepbcore.py to run as expected. This script should work on files created by: makeffv1.py dvsip.py loopline.py ''' args = parse_args(args_) user = ififuncs.get_user() new_object_entry = get_numbers(args) for root, _, filenames in os.walk(args.input): if os.path.basename(root)[:2] == 'oe': if len(os.path.basename(root)[2:]) == 4: log_dir = os.path.join(root, 'logs') for files in os.listdir(log_dir): if '.mov_log.log' in files: log = os.path.join(log_dir, files) old_oe_path = root old_oe = os.path.basename(root) manifest = os.path.join(os.path.dirname(root), old_oe + '_manifest.md5') uuid = ififuncs.create_uuid() uuid_event = ( 'EVENT = eventType=Identifier assignement,' ' eventIdentifierType=UUID, value=%s, module=uuid.uuid4' ) % uuid ififuncs.generate_log(log, 'EVENT = loopline_repackage.py started') ififuncs.generate_log( log, 'eventDetail=loopline_repackage.py %s' % ififuncs.get_script_version('loopline_repackage.py')) ififuncs.generate_log(log, 'Command line arguments: %s' % args) ififuncs.generate_log(log, 'EVENT = agentName=%s' % user) ififuncs.generate_log(log, uuid_event) ififuncs.generate_log( log, 'EVENT = eventType=Identifier assignement,' ' eventIdentifierType=object entry, value=%s' % new_object_entry) old_uuid_path = os.path.join(os.path.dirname(root), uuid) new_oe_path, new_uuid_path = move_files( root, new_object_entry, old_oe_path, old_uuid_path, uuid) updated_lines = update_manifest(manifest, old_oe, uuid) new_manifest = os.path.join(new_oe_path, uuid) + '_manifest.md5' shutil.move(manifest, new_manifest) with open(new_manifest, 'w') as fo: for lines in updated_lines: fo.write(lines) new_logs_path = os.path.join(new_uuid_path, 'logs') for files in os.listdir(new_logs_path): if '.mov_log.log' in files: log = os.path.join(new_logs_path, files) logname = rename_files(new_uuid_path, old_oe, uuid, new_manifest, log) ififuncs.generate_log( logname, 'EVENT = loopline_repackage.py finished') ififuncs.checksum_replace(new_manifest, logname, 'md5') oe_digits = int(os.path.basename(new_oe_path)[2:]) + 1 new_object_entry = 'oe' + str(oe_digits)
def run_loop(args): ''' This will only process one sequence. Batch processing will come later. ''' current_dir = os.path.dirname(os.path.abspath(sys.argv[0])) if args.user: user = args.user else: user = ififuncs.get_user() object_entry = ififuncs.get_object_entry() log_name_source = os.path.join( args.o, '%s_seq2ffv1_log.log' % time.strftime("_%Y_%m_%dT%H_%M_%S") ) ififuncs.generate_log(log_name_source, 'seq2ffv1.py started.') ififuncs.generate_log( log_name_source, 'eventDetail=seq2ffv1.py %s' % ififuncs.get_script_version('seq2ffv1.py')) ififuncs.generate_log( log_name_source, 'Command line arguments: %s' % args ) ififuncs.generate_log( log_name_source, 'EVENT = agentName=%s' % user ) uuid = ififuncs.create_uuid() verdicts = [] multi_reeler = False source_directory = args.i images = ififuncs.get_image_sequence_files(source_directory) if images == 'none': print('no images found in directory - checking for multi-reel sequence') images = ififuncs.check_multi_reel(source_directory) multi_reeler = True if images == 'none': sys.exit() # this is checking for a single reeler. else: images = [source_directory] reel_number = 1 objects = [] short_test_reports = [] rawcooked_logfiles = [] for reel in images: short_test_reports.append(short_test(reel)) for i in short_test_reports: print((' - 24 frame reversibility test for %s is %s' % (os.path.basename(reel), i))) if i == 'lossy': print('It appears that this sequence is not reversible - exiting') sys.exit() time.sleep(2) # check for a/b rolls if reel[-1] in ['a', 'b']: reel_number = reel[-2] ffv1_path, source_abspath, args, log_name_source, normalisation_tool, rawcooked_logfile = make_ffv1( reel, args, log_name_source, reel_number, uuid, multi_reeler ) objects.append(ffv1_path) rawcooked_logfiles.append(rawcooked_logfile) # check for a/b rolls if not reel[-1] in ['a', 'b']: reel_number += 1 judgement = package(objects, object_entry, uuid, source_abspath, args, log_name_source, normalisation_tool, user, rawcooked_logfiles, multi_reeler, current_dir) judgement, sipcreator_log, sipcreator_manifest = judgement verdicts.append([source_directory, judgement]) for verdict in verdicts: print(("%-*s : %s" % (50, args.i, verdict[1]))) ififuncs.generate_log(log_name_source, 'seq2ffv1.py finished.') ififuncs.merge_logs(log_name_source, sipcreator_log, sipcreator_manifest)
def main(args_): ''' Launch all the functions for creating an IFI SIP. ''' args = parse_args(args_) source = args.input sip_path = ififuncs.check_for_sip([source]) if sip_path is not None: oe_path = os.path.dirname(sip_path) uuid = os.path.basename(sip_path) sip_manifest = os.path.join( oe_path, uuid ) + '_manifest.md5' start = datetime.datetime.now() print args if args.user: user = args.user else: user = ififuncs.get_user() new_log_textfile = os.path.join(sip_path, 'logs' + '/' + uuid + '_sip_log.log') ififuncs.generate_log( new_log_textfile, 'EVENT = deletefiles.py started' ) ififuncs.generate_log( new_log_textfile, 'eventDetail=deletefiles.py %s' % ififuncs.get_script_version('deletefiles.py') ) ififuncs.generate_log( new_log_textfile, 'Command line arguments: %s' % args ) ififuncs.generate_log( new_log_textfile, 'EVENT = agentName=%s' % user ) metadata_dir = os.path.join(sip_path, 'metadata') for filename in args.i: # add test to see if it actually deleted - what if read only? os.remove(filename) print '%s has been deleted' % filename ififuncs.generate_log( new_log_textfile, 'EVENT = eventType=deletion,' ' eventOutcomeDetailNote=%s has been deleted,' ' agentName=os.remove()' % filename ) for metadata in os.listdir(metadata_dir): if os.path.basename(filename) in metadata: os.remove(os.path.join(metadata_dir, metadata)) print '%s has been deleted' % os.path.join(metadata_dir, metadata) ififuncs.generate_log( new_log_textfile, 'EVENT = eventType=deletion,' ' eventOutcomeDetailNote=%s has been deleted,' ' agentName=os.remove()' % os.path.join(metadata_dir, metadata) ) remove_from_manifest(sip_manifest, os.path.basename(filename), new_log_textfile) ififuncs.generate_log( new_log_textfile, 'EVENT = deletefiles.py finished' ) ififuncs.checksum_replace(sip_manifest, new_log_textfile, 'md5') finish = datetime.datetime.now() print '\n', user, 'ran this script at %s and it finished at %s' % (start, finish)
def main(args_): ''' Launch all the functions for creating an IFI SIP. ''' args = parse_args(args_) source = args.uuid_path sip_path = ififuncs.check_for_sip([source]) if sip_path is not None: oe_path = os.path.dirname(sip_path) uuid = os.path.basename(sip_path) sip_manifest = os.path.join(oe_path, uuid) + '_manifest.md5' else: # this is assuming that the other workflow will be the # special collections workflow that has the uuid as the parent. # some real checks should exist for this whole if/else flow. sip_path = args.uuid_path oe_path = os.path.dirname(args.uuid_path) uuid = os.path.basename(sip_path) sip_manifest = os.path.join(oe_path, uuid + '_manifest.md5') start = datetime.datetime.now() print(args) if args.user: user = args.user else: user = ififuncs.get_user() new_log_textfile = os.path.join(sip_path, 'logs' + '/' + uuid + '_sip_log.log') ififuncs.generate_log(new_log_textfile, 'EVENT = deletefiles.py started') ififuncs.generate_log( new_log_textfile, 'eventDetail=deletefiles.py %s' % ififuncs.get_script_version('deletefiles.py')) ififuncs.generate_log(new_log_textfile, 'Command line arguments: %s' % args) ififuncs.generate_log(new_log_textfile, 'EVENT = agentName=%s' % user) metadata_dir = os.path.join(sip_path, 'metadata') for filename in args.i: # add test to see if it actually deleted - what if read only? os.remove(filename) print(('%s has been deleted' % filename)) ififuncs.generate_log( new_log_textfile, 'EVENT = eventType=deletion,' ' eventOutcomeDetailNote=%s has been deleted,' ' agentName=os.remove()' % filename) for metadata in os.listdir(metadata_dir): if os.path.basename(filename) in metadata: os.remove(os.path.join(metadata_dir, metadata)) print(('%s has been deleted' % os.path.join(metadata_dir, metadata))) ififuncs.generate_log( new_log_textfile, 'EVENT = eventType=deletion,' ' eventOutcomeDetailNote=%s has been deleted,' ' agentName=os.remove()' % os.path.join(metadata_dir, metadata)) remove_from_manifest(sip_manifest, os.path.basename(filename), new_log_textfile) ififuncs.sort_manifest(sip_manifest) ififuncs.generate_log(new_log_textfile, 'EVENT = deletefiles.py finished') ififuncs.checksum_replace(sip_manifest, new_log_textfile, 'md5') finish = datetime.datetime.now() print('\n- %s ran this script at %s and it finished at %s' % (user, start, finish))
def main(args_): ''' Launches the functions that prepare and execute the concatenation. ''' uuid = ififuncs.create_uuid() args = parse_args(args_) print args log_name_source = os.path.join(args.o, '%s_concat_log.log' % time.strftime("_%Y_%m_%dT%H_%M_%S")) ififuncs.generate_log(log_name_source, 'concat.py started.') if args.mov: container = 'mov' else: container = 'mkv' ififuncs.generate_log( log_name_source, 'eventDetail=concat.py %s' % ififuncs.get_script_version('concat.py')) ififuncs.generate_log( log_name_source, 'Command line arguments: %s' % args ) if args.user: user = args.user else: user = ififuncs.get_user() if args.oe: if args.oe[:2] != 'oe': print 'First two characters must be \'oe\' and last four characters must be four digits' object_entry = ififuncs.get_object_entry() elif len(args.oe[2:]) not in range(4, 6): print 'First two characters must be \'oe\' and last four characters must be four digits' object_entry = ififuncs.get_object_entry() elif not args.oe[2:].isdigit(): object_entry = ififuncs.get_object_entry() print 'First two characters must be \'oe\' and last four characters must be four digits' else: object_entry = args.oe else: object_entry = ififuncs.get_object_entry() ififuncs.generate_log( log_name_source, 'EVENT = agentName=%s' % user ) source_uuid_check = '' if os.path.isfile(args.i[0]): source_uuid = ififuncs.get_source_uuid() elif os.path.isdir(args.i[0]): source_uuid_check = ififuncs.check_for_uuid(args) if source_uuid_check == False: source_uuid = ififuncs.get_source_uuid() else: source_uuid = source_uuid_check ififuncs.generate_log( log_name_source, 'Relationship, derivation, has source=%s' % source_uuid ) video_files = args.i concat_file = ififuncs.get_temp_concat('concat_stuff') ififuncs.generate_log( log_name_source, 'concatenation file=%s' % concat_file) if args.r: video_files = recursive_file_list(video_files) video_files = ififuncs.sanitise_filenames(video_files) for source_files in video_files: ififuncs.generate_log( log_name_source, 'source_files = %s' % source_files) make_chapters(video_files) ififuncs.concat_textfile(video_files, concat_file) ififuncs.generate_log( log_name_source, 'EVENT = Concatenation, status=started, eventType=Creation, agentName=ffmpeg, eventDetail=Source media concatenated into a single file output=%s' % os.path.join(args.o, '%s.%s' % (uuid, container))) source_bitstream_md5, fmd5_logfile = ffmpeg_concat(concat_file, args, uuid, container) output_file = os.path.join(args.o, '%s.%s' % (uuid, container)) ififuncs.generate_log( log_name_source, 'EVENT = Concatenation, status=finished, eventType=Creation, agentName=ffmpeg, eventDetail=Source media concatenated into a single file output=%s' % os.path.join(args.o, '%s.%s' % (uuid, container))) ififuncs.generate_log( log_name_source, 'EVENT = losslessness verification, status=started, eventType=messageDigestCalculation, agentName=ffmpeg, eventDetail=MD5s of AV streams of output file generated for validation') validation_logfile = os.path.join(args.o, '%s_validation.log' % uuid).replace('\\', '\\\\').replace(':', '\:') validation_env_dict = ififuncs.set_environment(validation_logfile) output_bitstream_md5 = subprocess.check_output([ 'ffmpeg', '-report', '-i', output_file, '-f', 'md5', '-map', '0:v', '-map', '0:a?', '-c', 'copy', '-' ], env=validation_env_dict).rstrip() ififuncs.generate_log( log_name_source, 'EVENT = losslessness verification, status=finished, eventType=messageDigestCalculation, agentName=ffmpeg, eventDetail=MD5s of AV streams of output file generated for validation') if source_bitstream_md5 == output_bitstream_md5: print 'process appears to be lossless' print source_bitstream_md5, output_bitstream_md5 ififuncs.generate_log( log_name_source, 'EVENT = losslessness verification, eventOutcome=pass') else: print 'something went wrong - not lossless!' print source_bitstream_md5,output_bitstream_md5 ififuncs.generate_log( log_name_source, 'EVENT = losslessness verification, eventOutcome=fail') if args.nochapters != True: subprocess.call(['mkvpropedit', output_file, '-c', 'chapters.txt']) ififuncs.generate_log( log_name_source, 'EVENT = eventType=modification, agentName=mkvpropedit, eventDetail=Chapters added to file detailing start point of source clips.') ififuncs.concat_textfile(video_files, concat_file) with open(log_name_source, 'r') as concat_log: concat_lines = concat_log.readlines() if not args.no_sip: sipcreator_log, sipcreator_manifest = sipcreator.main(['-i', output_file, '-u', uuid, '-oe', object_entry, '-user', user, '-o', args.o]) shutil.move(fmd5_logfile, os.path.dirname(sipcreator_log)) shutil.move(validation_logfile.replace('\\\\', '\\').replace('\:', ':'), os.path.dirname(sipcreator_log)) logs_dir = os.path.dirname(sipcreator_log) ififuncs.manifest_update(sipcreator_manifest, os.path.join(logs_dir, os.path.basename(fmd5_logfile))) ififuncs.manifest_update(sipcreator_manifest, os.path.join(logs_dir,(os.path.basename(validation_logfile.replace('\\\\', '\\').replace('\:', ':'))))) ififuncs.merge_logs(log_name_source, sipcreator_log, sipcreator_manifest)
def main(args_): # if multiple file are present, this script will treat them as a single # instantiation/representation and get aggregate metadata about the whole # package. For now, this will be a clumsy implementation - the first file # will provide most metadata. Things like duration/bitrate/filesize # will be calculated as a whole. # Although another way would be that every call is looped, and if # this could catch files that should not be in the package, eg. a 4:2:2 # file in a 4:2:0 package.. # yup - do it that way! args = parse_args(args_) all_files = ififuncs.recursive_file_list(args.input) silence = True if args.user: user = args.user else: user = ififuncs.get_user() for dirs in os.listdir(args.input): if ififuncs.validate_uuid4(dirs) is None: instantiationIdentif = dirs Accession_Number = get_accession_number(args.input) Reference_Number = get_reference_number(args.input) if args.p: for root, _, filenames in os.walk(args.input): if os.path.basename(root) == 'metadata': metadata_dir = root elif os.path.basename(root) == 'logs': logs_dir = root csv_filename = os.path.join(metadata_dir, Accession_Number + '.csv') sipcreator_log = os.path.join(logs_dir, instantiationIdentif + '_sip_log.log') ififuncs.generate_log(sipcreator_log, 'EVENT = makepbcore.py started') ififuncs.generate_log( sipcreator_log, 'eventDetail=makepbcore.py %s' % ififuncs.get_script_version('makepbcore.py')) ififuncs.generate_log(sipcreator_log, 'Command line arguments: %s' % args) ififuncs.generate_log(sipcreator_log, 'EVENT = agentName=%s' % user) else: csv_filename = 'blaa.csv' print ' - Metadata will be stored in %s' % csv_filename for filenames in os.listdir(args.input): if '_manifest.md5' in filenames: md5_manifest = os.path.join(args.input, filenames) elif 'manifest-sha512.txt' in filenames: sha512_manifest = os.path.join(args.input, filenames) make_csv(csv_filename) ms = 0 FrameCount = 0 instantFileSize = 0 instantFileSize_gigs = 0 for source in all_files: metadata = subprocess.check_output( ['mediainfo', '--Output=PBCore2', source]) root = etree.fromstring(metadata) print(' - Analysing %s') % source pbcore_namespace = root.xpath('namespace-uri(.)') track_type = root.xpath('//ns:essenceTrackType', namespaces={'ns': pbcore_namespace}) if len(track_type) > 0: for track in track_type: if track.text == 'Video': essenceTrackEncodvid = get_metadata( "ns:essenceTrackEncoding", track.getparent(), pbcore_namespace) vcodec_attributes = get_attributes(track.getparent(), pbcore_namespace) elif track.text == 'Audio': silence = False essenceTrackEncod_au = get_metadata( "ns:essenceTrackEncoding", track.getparent(), pbcore_namespace) acodec_attributes = get_attributes(track.getparent(), pbcore_namespace) ScanType = get_metadata( "//ns:essenceTrackAnnotation[@annotationType='ScanType']", root, pbcore_namespace) matrix_coefficients = get_metadata( "//ns:essenceTrackAnnotation[@annotationType='matrix_coefficients']", root, pbcore_namespace) transfer_characteris = get_metadata( "//ns:essenceTrackAnnotation[@annotationType='transfer_characteristics']", root, pbcore_namespace) colour_primaries = get_metadata( "//ns:essenceTrackAnnotation[@annotationType='color_primaries']", root, pbcore_namespace) FrameCount += int( get_metadata( "//ns:essenceTrackAnnotation[@annotationType='FrameCount']", root, pbcore_namespace)) instantFileSize += int( get_metadata("//ns:instantiationFileSize", root, pbcore_namespace)) instantDataRate = round( float( ififuncs.get_mediainfo('OverallBitRate', '--inform=General;%OverallBitRate%', source)) / 1000 / 1000, 2) ms += ififuncs.get_milliseconds(source) ColorSpace = get_metadata( "//ns:essenceTrackAnnotation[@annotationType='ColorSpace']", root, pbcore_namespace) ChromaSubsampling = get_metadata( "//ns:essenceTrackAnnotation[@annotationType='ChromaSubsampling']", root, pbcore_namespace) instantMediaty = get_metadata("//ns:instantiationMediaType", root, pbcore_namespace) essenceFrameSize = get_metadata("//ns:essenceTrackFrameSize", root, pbcore_namespace) essenceAspectRatio = get_metadata("//ns:essenceTrackAspectRatio", root, pbcore_namespace) PixelAspectRatio = get_metadata( "//ns:essenceTrackAnnotation[@annotationType='PixelAspectRatio']", root, pbcore_namespace) instantiationStandar = get_metadata( "//ns:instantiationAnnotation[@annotationType='Format']", root, pbcore_namespace) essenceFrameRate = get_metadata("//ns:essenceTrackFrameRate", root, pbcore_namespace) essenceTrackSampling = get_metadata("//ns:essenceTrackSamplingRate", root, pbcore_namespace) Interlacement = get_metadata( "//ns:instantiationAnnotation[@annotationType='Interlacement']", root, pbcore_namespace) Compression_Mode = get_metadata( "//ns:instantiationAnnotation[@annotationType='Compression_Mode']", root, pbcore_namespace) instantiationDate_modified = get_metadata( "//ns:instantiationDate[@dateType='file modification']", root, pbcore_namespace) pix_fmt = ififuncs.get_ffmpeg_fmt(source, 'video') audio_fmt = ififuncs.get_ffmpeg_fmt(source, 'audio') if not silence: audio_codecid = acodec_attributes['ref'] essenceBitDepth_au = ififuncs.get_mediainfo( 'duration', '--inform=Audio;%BitDepth%', source) else: audio_codecid = 'n/a' essenceBitDepth_au = 'n/a' essenceTrackEncod_au = 'n/a' video_codecid = vcodec_attributes['ref'] try: video_codec_version = vcodec_attributes['version'] except KeyError: video_codec_version = 'n/a' try: video_codec_profile = vcodec_attributes['annotation'][8:] except KeyError: video_codec_profile = 'n/a' tc = ififuncs.convert_millis(ms) instantiationDuratio = ififuncs.convert_timecode(25, tc) Donor = '' Edited_By = user Date_Created = '' Date_Last_Modified = '' Film_Or_Tape = 'Digital File' Date_Of_Donation = '' Habitat = '' Type_Of_Deposit = '' Depositor_Reference = '' Master_Viewing = 'Preservation Master' Language_Version = '' Condition_Rating = '' Companion_Elements = '' EditedNew = user FIO = 'In' CollectionTitle = '' Created_By = user instantiationDimensi = '' instantiationLocatio = '' instantTimeStart = '' instantFileSize_gigs = round( float(instantFileSize) / 1024 / 1024 / 1024, 3) instantColors = '' instantLanguage = '' instantAltMo = 'n/a' essenceBitDepth_vid = ififuncs.get_mediainfo('duration', '--inform=Video;%BitDepth%', source) instantiationChanCon = '' ififuncs.append_csv(csv_filename, [ Reference_Number, Donor, Edited_By, Date_Created, Date_Last_Modified, Film_Or_Tape, Date_Of_Donation, Accession_Number, Habitat, Type_Of_Deposit, Depositor_Reference, Master_Viewing, Language_Version, Condition_Rating, Companion_Elements, EditedNew, FIO, CollectionTitle, Created_By, instantiationIdentif, instantiationDate_modified, instantiationDimensi, instantiationStandar, instantiationLocatio, instantMediaty, instantFileSize, instantFileSize_gigs, instantTimeStart, instantDataRate, instantColors, instantLanguage, instantAltMo, essenceTrackEncodvid, essenceFrameRate, essenceTrackSampling, essenceBitDepth_vid, essenceFrameSize, essenceAspectRatio, essenceTrackEncod_au, essenceBitDepth_au, instantiationDuratio, instantiationChanCon, PixelAspectRatio, FrameCount, ColorSpace, ChromaSubsampling, ScanType, Interlacement, Compression_Mode, colour_primaries, transfer_characteris, matrix_coefficients, pix_fmt, audio_fmt, audio_codecid, video_codecid, video_codec_version, video_codec_profile ]) if args.p: ififuncs.generate_log( sipcreator_log, 'EVENT = Metadata extraction - eventDetail=Technical record creation using PBCore, eventOutcome=%s, agentName=makepbcore' % (csv_filename)) ififuncs.generate_log(sipcreator_log, 'EVENT = makepbcore.py finished') ififuncs.checksum_replace(md5_manifest, sipcreator_log, 'md5') ififuncs.checksum_replace(sha512_manifest, sipcreator_log, 'sha512') ififuncs.manifest_update(md5_manifest, csv_filename) print ' - Updating %s with %s' % (md5_manifest, csv_filename) ififuncs.sha512_update(sha512_manifest, csv_filename) print ' - Updating %s with %s' % (sha512_manifest, csv_filename)
def setup(args_): ''' Sets a bunch of filename variables and parses command line. some examples: if manifest_sidecar = /home/kieranjol/fakeeeeee/fakeeeeee_manifest.md5 then manifes_root = /home/kieranjol/fakeeeeee_manifest.md5 ''' parser = argparse.ArgumentParser( description='Copy directory with checksum comparison' 'and manifest generation.Written by Kieran O\'Leary.') parser.add_argument( 'source', help='Input directory' ) parser.add_argument( 'destination', help='Destination directory' ) parser.add_argument( '-l', '-lto', action='store_true', help='use gcp instead of rsync on osx for SPEED on LTO' ) parser.add_argument( '-move', action='store_true', help='Move files instead of copying - much faster!' ) rootpos = '' dircheck = None args = parser.parse_args(args_) if os.path.isdir(args.source): dircheck = check_for_sip(args.source) if dircheck != None: if os.path.isdir(dircheck): source = check_for_sip(args.source) destination = os.path.join(args.destination, os.path.basename(args.source)) os.makedirs(destination) else: source = args.source destination = args.destination normpath = os.path.normpath(source) #is there any benefit to this over os.path.basename dirname = os.path.split(os.path.basename(source))[1] if dirname == '': rootpos = 'y' dirname = raw_input( 'What do you want your destination folder to be called?\n' ) relative_path = normpath.split(os.sep)[-1] # or hardcode destination_final_path = os.path.join(destination, dirname) manifest_destination = destination + '/%s_manifest.md5' % dirname if os.path.isfile(manifest_destination): print 'Destination manifest already exists' manifest_filename = '%s_manifest.md5' % dirname desktop_manifest_dir = make_desktop_manifest_dir() # manifest = desktop manifest, looks like this can get rewritten later. manifest = os.path.join( desktop_manifest_dir, manifest_filename ) manifest_sidecar = os.path.join( os.path.dirname(source), relative_path + '_manifest.md5' ) manifest_root = source + '/%s_manifest.md5' % os.path.basename(source) log_name_filename = dirname + time.strftime("_%Y_%m_%dT%H_%M_%S") desktop_logs_dir = make_desktop_logs_dir() log_name_source = "%s/%s.log" % (desktop_logs_dir, log_name_filename) generate_log(log_name_source, 'copyit.py started.') ififuncs.generate_log( log_name_source, 'eventDetail=copyit.py %s' % ififuncs.get_script_version('copyit.py')) generate_log(log_name_source, 'Source: %s' % source) generate_log(log_name_source, 'Destination: %s' % destination) return args, rootpos, manifest_sidecar, log_name_source, destination_final_path, manifest_root, manifest_destination, manifest, destination, dirname, desktop_manifest_dir
def setup(args_): ''' Sets a bunch of filename variables and parses command line. some examples: if manifest_sidecar = /home/kieranjol/fakeeeeee/fakeeeeee_manifest.md5 then manifes_root = /home/kieranjol/fakeeeeee_manifest.md5 ''' parser = argparse.ArgumentParser( description='Copy directory with checksum comparison' 'and manifest generation.Written by Kieran O\'Leary.') parser.add_argument('source', help='Input directory') parser.add_argument('destination', help='Destination directory') parser.add_argument( '-l', '-lto', action='store_true', help='use gcp instead of rsync on osx for SPEED on LTO') parser.add_argument('-move', action='store_true', help='Move files instead of copying - much faster!') parser.add_argument( '-justcopy', action='store_true', help='Do not generate destination manifest and verify integrity :(') parser.add_argument( '-y', action='store_true', help= 'Answers YES to the question: Not enough free space, would you like to continue?' ) rootpos = '' dircheck = None args = parser.parse_args(args_) if os.path.isdir(args.source): dircheck = check_for_sip(args.source) if dircheck != None: if os.path.isdir(dircheck): source = check_for_sip(args.source) destination = os.path.join(args.destination, os.path.basename(args.source)) os.makedirs(destination) else: source = os.path.abspath(args.source) destination = args.destination normpath = os.path.normpath(source) #is there any benefit to this over os.path.basename dirname = os.path.split(os.path.basename(source))[1] if dirname == '': rootpos = 'y' ''' dirname = raw_input( 'What do you want your destination folder to be called?\n' ) ''' relative_path = normpath.split(os.sep)[-1] # or hardcode destination_final_path = os.path.join(destination, dirname) if rootpos == 'y': manifest_destination = os.path.dirname( destination) + '/%s_manifest.md5' % os.path.basename(destination) else: manifest_destination = destination + '/%s_manifest.md5' % dirname if os.path.isfile(manifest_destination): print('Destination manifest already exists') if rootpos == 'y': manifest_filename = '%s_manifest.md5' % os.path.basename(destination) else: manifest_filename = '%s_manifest.md5' % dirname desktop_manifest_dir = make_desktop_manifest_dir() # manifest = desktop manifest, looks like this can get rewritten later. manifest = os.path.join(desktop_manifest_dir, manifest_filename) manifest_sidecar = os.path.join(os.path.dirname(source), relative_path + '_manifest.md5') manifest_root = source + '/%s_manifest.md5' % os.path.basename(source) log_name_filename = dirname + time.strftime("_%Y_%m_%dT%H_%M_%S") desktop_logs_dir = make_desktop_logs_dir() log_name_source = "%s/%s.log" % (desktop_logs_dir, log_name_filename) generate_log(log_name_source, 'copyit.py started.') ififuncs.generate_log( log_name_source, 'eventDetail=copyit.py %s' % ififuncs.get_script_version('copyit.py')) generate_log(log_name_source, 'Source: %s' % source) generate_log(log_name_source, 'Destination: %s' % destination) print('Checking total size of input folder') total_input_size = ififuncs.get_folder_size(os.path.abspath(args.source)) print('Checking if enough space in destination folder') free_space = ififuncs.get_free_space(args.destination) if total_input_size > free_space: print('You do not have enough free space!') if args.y: go_forth_blindly = 'Y' else: go_forth_blindly = ififuncs.ask_yes_no( 'Would you like to continue anyway? Press Y or N') if go_forth_blindly == 'Y': generate_log( log_name_source, 'You do not have enough free space!, but the user has decided to continue anyhow' ) else: generate_log(log_name_source, 'You do not have enough free space! - Exiting') sys.exit() return args, rootpos, manifest_sidecar, log_name_source, destination_final_path, manifest_root, manifest_destination, manifest, destination, dirname, desktop_manifest_dir
def main(args_): ''' Launch all the functions for creating an IFI SIP. ''' args = parse_args(args_) start = datetime.datetime.now() inputs = args.i print args if args.user: user = args.user else: user = ififuncs.get_user() if args.oe: if args.oe[:2] != 'oe': print 'First two characters must be \'oe\' and last four characters must be four digits' object_entry = ififuncs.get_object_entry() elif len(args.oe[2:]) not in range(4, 6): print 'First two characters must be \'oe\' and last four characters must be four digits' object_entry = ififuncs.get_object_entry() elif not args.oe[2:].isdigit(): object_entry = ififuncs.get_object_entry() print 'First two characters must be \'oe\' and last four characters must be four digits' else: object_entry = args.oe else: object_entry = ififuncs.get_object_entry() sip_path = make_folder_path(os.path.join(args.o), args, object_entry) if args.u: if ififuncs.validate_uuid4(args.u) is None: uuid = args.u uuid_event = ( 'EVENT = eventType=Identifier assignement,' ' eventIdentifierType=UUID, value=%s, module=uuid.uuid4' ) % uuid else: print 'exiting due to invalid UUID' uuid_event = ( 'EVENT = exiting due to invalid UUID supplied on the commmand line: %s' % uuid) uuid = False else: uuid = os.path.basename(sip_path) uuid_event = ( 'EVENT = eventType=Identifier assignement,' ' eventIdentifierType=UUID, value=%s, module=uuid.uuid4') % uuid new_log_textfile = os.path.join(sip_path, 'logs' + '/' + uuid + '_sip_log.log') ififuncs.generate_log(new_log_textfile, 'EVENT = sipcreator.py started') ififuncs.generate_log( new_log_textfile, 'eventDetail=sipcreator.py %s' % ififuncs.get_script_version('sipcreator.py')) ififuncs.generate_log(new_log_textfile, 'Command line arguments: %s' % args) ififuncs.generate_log(new_log_textfile, 'EVENT = agentName=%s' % user) ififuncs.generate_log(new_log_textfile, uuid_event) if args.u is False: sys.exit() ififuncs.generate_log( new_log_textfile, 'EVENT = eventType=Identifier assignement,' ' eventIdentifierType=object entry, value=%s' % object_entry) metadata_dir = os.path.join(sip_path, 'metadata') logs_dir = os.path.join(sip_path, 'logs') log_names = move_files(inputs, sip_path, args) get_metadata(sip_path, new_log_textfile) ififuncs.hashlib_manifest(metadata_dir, metadata_dir + '/metadata_manifest.md5', metadata_dir) new_manifest_textfile = consolidate_manifests(sip_path, 'objects', new_log_textfile) consolidate_manifests(sip_path, 'metadata', new_log_textfile) ififuncs.hashlib_append(logs_dir, new_manifest_textfile, os.path.dirname(os.path.dirname(logs_dir))) ififuncs.sort_manifest(new_manifest_textfile) if not args.quiet: log_report(log_names) finish = datetime.datetime.now() print '\n', user, 'ran this script at %s and it finished at %s' % (start, finish) if args.d: content_title = create_content_title_text(args, sip_path) ififuncs.manifest_replace( new_manifest_textfile, os.path.join('objects', os.path.basename(args.i[0])).replace("\\", "/"), os.path.join('objects', content_title).replace("\\", "/")) return new_log_textfile, new_manifest_textfile
def main(args_): ''' Launch all the functions for creating an IFI SIP. ''' args = parse_args(args_) start = datetime.datetime.now() inputs = args.i if args.d: try: import clairmeta clairmeta_version = clairmeta.__version__ except ImportError: print( 'Exiting as Clairmeta is not installed. If there is a case for not using clairmeta, please let me know and i can make a workaround' ) sys.exit() print(args) user = ififuncs.determine_user(args) object_entry = get_object_entry(args) sip_path = make_folder_path(os.path.join(args.o), args, object_entry) uuid, uuid_event = determine_uuid(args, sip_path) new_log_textfile = os.path.join(sip_path, 'logs' + '/' + uuid + '_sip_log.log') if args.d: content_title = create_content_title_text(sip_path, args) ififuncs.generate_log(new_log_textfile, 'EVENT = sipcreator.py started') ififuncs.generate_log( new_log_textfile, 'eventDetail=sipcreator.py %s' % ififuncs.get_script_version('sipcreator.py')) ififuncs.generate_log(new_log_textfile, 'Command line arguments: %s' % args) ififuncs.generate_log(new_log_textfile, 'EVENT = agentName=%s' % user) ififuncs.generate_log(new_log_textfile, uuid_event) if not args.sc: ififuncs.generate_log( new_log_textfile, 'EVENT = eventType=Identifier assignement,' ' eventIdentifierType=object entry, value=%s' % object_entry) metadata_dir = os.path.join(sip_path, 'metadata') supplemental_dir = os.path.join(metadata_dir, 'supplemental') logs_dir = os.path.join(sip_path, 'logs') if args.accession: accession_number = ififuncs.get_accession_number() reference_number = ififuncs.get_reference_number() parent = ififuncs.ask_question( 'What is the parent record? eg MV 1234. Enter n/a if this is a born digital acquisition with no parent.' ) donor = ififuncs.ask_question( 'Who is the source of acquisition, as appears on the donor agreement? This will not affect Reproductions.' ) depositor_reference = ififuncs.ask_question( 'What is the donor/depositor number? This will not affect Reproductions.' ) acquisition_type = ififuncs.get_acquisition_type('') donation_date = ififuncs.ask_question( 'When was the donation date in DD/MM/YYYY format? Eg. 31/12/1999 - Unfortunately this is NOT using ISO 8601.' ) if args.zip: inputxml, inputtracexml, dfxml = ififuncs.generate_mediainfo_xmls( inputs[0], args.o, uuid, new_log_textfile) if args.manifest: shutil.copy( args.manifest, args.manifest.replace('_manifest.md5', '_manifest-md5.txt')) source_manifest = args.manifest.replace('_manifest.md5', '_manifest-md5.txt') else: source_manifest = os.path.join( args.o, os.path.basename(args.i[0]) + '_manifest-md5.txt') ififuncs.generate_log( new_log_textfile, 'EVENT = message digest calculation, status=started, eventType=messageDigestCalculation, agentName=hashlib, eventDetail=MD5 checksum of source files within ZIP' ) ififuncs.hashlib_manifest(args.i[0], source_manifest, os.path.dirname(args.i[0])) ififuncs.generate_log( new_log_textfile, 'EVENT = message digest calculation, status=finished, eventType=messageDigestCalculation, agentName=hashlib, eventDetail=MD5 checksum of source files within ZIP' ) ififuncs.generate_log( new_log_textfile, 'EVENT = packing, status=started, eventType=packing, agentName=makezip.py, eventDetail=Source object to be packed=%s' % inputs[0]) makezip_judgement, zip_file = makezip.main([ '-i', inputs[0], '-o', os.path.join(sip_path, 'objects'), '-basename', uuid + '.zip' ]) ififuncs.generate_log( new_log_textfile, 'EVENT = packing, status=finished, eventType=packing, agentName=makezip.py, eventDetail=Source object packed into=%s' % zip_file) if makezip_judgement is None: judgement = 'lossless' else: judgement = makezip_judgement ififuncs.generate_log( new_log_textfile, 'EVENT = losslessness verification, status=finished, eventType=messageDigestCalculation, agentName=makezip.py, eventDetail=embedded crc32 checksum validation, eventOutcome=%s' % judgement) ififuncs.generate_log( new_log_textfile, 'EVENT = losslessness verification, status=finished, eventType=messageDigestCalculation, agentName=makezip.py, eventDetail=embedded crc32 checksum validation, eventOutcome=%s' % judgement) else: log_names = move_files(inputs, sip_path, args, user) ififuncs.get_technical_metadata(sip_path, new_log_textfile) ififuncs.hashlib_manifest(metadata_dir, metadata_dir + '/metadata_manifest.md5', metadata_dir) if args.sc: normalise_objects_manifest(sip_path) new_manifest_textfile = consolidate_manifests(sip_path, 'objects', new_log_textfile) if args.zip: ififuncs.generate_log( new_log_textfile, 'EVENT = Message Digest Calculation, status=started, eventType=message digest calculation, eventDetail=%s module=hashlib' % zip_file) ififuncs.manifest_update(new_manifest_textfile, zip_file) ififuncs.generate_log( new_log_textfile, 'EVENT = Message Digest Calculation, status=finished, eventType=message digest calculation, eventDetail=%s module=hashlib' % zip_file) consolidate_manifests(sip_path, 'metadata', new_log_textfile) ififuncs.hashlib_append(logs_dir, new_manifest_textfile, os.path.dirname(os.path.dirname(logs_dir))) if args.supplement: os.makedirs(supplemental_dir) supplement_cmd = [ '-i', args.supplement, '-user', user, '-new_folder', supplemental_dir, os.path.dirname(sip_path), '-copy' ] package_update.main(supplement_cmd) if args.zip: os.makedirs(supplemental_dir) supplement_cmd = [ '-i', [inputxml, inputtracexml, dfxml, source_manifest], '-user', user, '-new_folder', supplemental_dir, os.path.dirname(sip_path), '-copy' ] package_update.main(supplement_cmd) if args.sc: print('Generating Digital Forensics XML') dfxml = accession.make_dfxml(args, sip_path, uuid) ififuncs.generate_log( new_log_textfile, 'EVENT = Metadata extraction - eventDetail=File system metadata extraction using Digital Forensics XML, eventOutcome=%s, agentName=makedfxml' % (dfxml)) ififuncs.manifest_update(new_manifest_textfile, dfxml) sha512_log = manifest.main([sip_path, '-sha512', '-s']) sha512_manifest = os.path.join(os.path.dirname(sip_path), uuid + '_manifest-sha512.txt') ififuncs.merge_logs_append(sha512_log, new_log_textfile, new_manifest_textfile) ififuncs.checksum_replace(sha512_manifest, new_log_textfile, 'sha512') os.remove(sha512_log) ififuncs.sort_manifest(new_manifest_textfile) if not args.quiet: if 'log_names' in locals(): log_report(log_names) finish = datetime.datetime.now() print('\n- %s ran this script at %s and it finished at %s' % (user, start, finish)) if args.d: process_dcp(sip_path, content_title, args, new_manifest_textfile, new_log_textfile, metadata_dir, clairmeta_version) if args.accession: register = accession.make_register() filmographic_dict = ififuncs.extract_metadata(args.csv)[0] for filmographic_record in filmographic_dict: if filmographic_record['Reference Number'].lower( ) == reference_number.lower(): if filmographic_record['Title'] == '': title = filmographic_record[ 'TitleSeries'] + '; ' + filmographic_record['EpisodeNo'] else: title = filmographic_record['Title'] oe_register = make_oe_register() ififuncs.append_csv( oe_register, (object_entry.upper()[:2] + '-' + object_entry[2:], donation_date, '1', '', title, donor, acquisition_type[1], accession_number, 'Representation of %s|Reproduction of %s' % (reference_number, parent), '')) accession_cmd = [ os.path.dirname(sip_path), '-user', user, '-f', '-number', accession_number, '-reference', reference_number, '-register', register, '-csv', args.csv, '-pbcore' ] if not parent.lower() == 'n/a': accession_cmd.extend(['-parent', parent]) accession_cmd.extend(['-donor', donor]) accession_cmd.extend(['-depositor_reference', depositor_reference]) accession_cmd.extend(['-acquisition_type', acquisition_type[2]]) accession_cmd.extend(['-donation_date', donation_date]) print(accession_cmd) accession.main(accession_cmd) return new_log_textfile, new_manifest_textfile
def main(args_): # if multiple file are present, this script will treat them as a single # instantiation/representation and get aggregate metadata about the whole # package. For now, this will be a clumsy implementation - the first file # will provide most metadata. Things like duration/bitrate/filesize # will be calculated as a whole. # Although another way would be that every call is looped, and if # this could catch files that should not be in the package, eg. a 4:2:2 # file in a 4:2:0 package.. # yup - do it that way! args = parse_args(args_) all_files = ififuncs.recursive_file_list(args.input) silence = True audio_only = True if args.user: user = args.user else: user = ififuncs.get_user() acquisition_type = '' if args.acquisition_type: acquisition_type = ififuncs.get_acquisition_type( args.acquisition_type)[0] instantiationIdentif = '' for dirs in os.listdir(args.input): if ififuncs.validate_uuid4(dirs) is None: instantiationIdentif = dirs Accession_Number = get_accession_number(args.input) if args.reference: Reference_Number = args.reference.upper() else: Reference_Number = get_reference_number(args.input) if args.p: for root, _, filenames in os.walk(args.input): if os.path.basename(root) == 'metadata': metadata_dir = root elif os.path.basename(root) == 'logs': logs_dir = root csv_filename = os.path.join( metadata_dir, Accession_Number + '_%s_pbcore.csv' % Reference_Number) sipcreator_log = os.path.join(logs_dir, instantiationIdentif + '_sip_log.log') ififuncs.generate_log(sipcreator_log, 'EVENT = makepbcore.py started') ififuncs.generate_log( sipcreator_log, 'eventDetail=makepbcore.py %s' % ififuncs.get_script_version('makepbcore.py')) ififuncs.generate_log(sipcreator_log, 'Command line arguments: %s' % args) ififuncs.generate_log(sipcreator_log, 'EVENT = agentName=%s' % user) else: csv_filename = 'blaa.csv' print((' - Metadata will be stored in %s' % csv_filename)) for filenames in os.listdir(args.input): if '_manifest.md5' in filenames: md5_manifest = os.path.join(args.input, filenames) elif 'manifest-sha512.txt' in filenames: sha512_manifest = os.path.join(args.input, filenames) make_csv(csv_filename) ms = 0 FrameCount = 0 instantFileSize_byte = 0 instantFileSize_gigs = 0 scan_types = [] matrix_list = [] transfer_list = [] colour_primaries_list = [] color_spaces = [] chroma = [] frame_sizes = [] par_list = [] container_list = [] fps_list = [] sample_rate_list = [] track_count_list = [] interlace_list = [] compression_list = [] pix_fmt_list = [] audio_fmt_list = [] audio_codecid_list = [] audio_codec_list = [] au_bitdepth_list = [] video_codecid_list = [] video_codec_version_list = [] video_codec_profile_list = [] timecode_list = [] channels_list = [] stl = False subtitle_check = ififuncs.get_digital_object_descriptor(args.input) if 'STL' in subtitle_check: stl = True for source in all_files: metadata = subprocess.check_output( ['mediainfo', '--Output=PBCore2', source]) new_metadata = subprocess.check_output( ['mediainfo', '--Output=XML', source]) try: root = etree.fromstring(metadata) new_root = etree.fromstring(new_metadata) except lxml.etree.XMLSyntaxError: print('Windows encoding detected - transforming into utf-8') root = etree.fromstring(metadata.decode('cp1252').encode('utf-8')) new_root = etree.fromstring( new_metadata.decode('cp1252').encode('utf-8')) print(((' - Analysing %s') % source)) pbcore_namespace = root.xpath('namespace-uri(.)') mediainfo_namespace = new_root.xpath('namespace-uri(.)') track_type = root.xpath('//ns:essenceTrackType', namespaces={'ns': pbcore_namespace}) new_track_type = new_root.xpath('//ns:track', namespaces={'ns': mediainfo_namespace}) if len(new_track_type) > 0: for track in new_track_type: if track.attrib['type'] == 'Video': audio_only = False essenceTrackEncodvid = ififuncs.get_metadata( "ns:Format", track, mediainfo_namespace) #vcodec_attributes = get_attributes(track.getparent(), pbcore_namespace) #vcodec_attributes = 'TODO' video_codecid = ififuncs.get_metadata( "ns:CodecID", track, mediainfo_namespace) video_codec_version = ififuncs.get_metadata( "ns:Format_Version", track, mediainfo_namespace) video_codec_profile = ififuncs.get_metadata( "ns:Format_Profile", track, mediainfo_namespace) video_codec_version_list.append(video_codec_version) video_codec_profile_list.append(video_codec_profile) elif track.attrib['type'] == 'Audio': silence = False essenceTrackEncod_au = ififuncs.get_metadata( "ns:Format", track, mediainfo_namespace) audio_codec_list.append(essenceTrackEncod_au) #acodec_attributes = get_attributes(track.getparent(), pbcore_namespace) audio_codecid = ififuncs.get_metadata( "ns:CodecID", track, mediainfo_namespace) essenceTrackSampling = ififuncs.get_mediainfo( 'samplerate', '--inform=Audio;%SamplingRate_String%', source) sample_rate_list.append(essenceTrackSampling) essenceBitDepth_au = ififuncs.get_metadata( "ns:BitDepth", track, mediainfo_namespace) audio_codecid_list.append(audio_codecid) au_bitdepth_list.append(essenceBitDepth_au) channels = ififuncs.get_metadata("//ns:Channels", track, mediainfo_namespace) channels_list.append(channels) if audio_only: essenceTrackEncodvid = 'n/a' video_codecid = 'n/a' video_codec_version = 'n/a' video_codec_profile = 'n/a' ScanType = ififuncs.get_metadata("//ns:ScanType", new_root, mediainfo_namespace) scan_types.append(ScanType) matrix_coefficients = ififuncs.get_metadata("//ns:matrix_coefficients", new_root, mediainfo_namespace) timecode_source, starting_timecode = get_timecode( pbcore_namespace, root, source) timecode_list.append(starting_timecode) matrix_list.append(matrix_coefficients) transfer_characteris = ififuncs.get_metadata( "//ns:transfer_characteristics", new_root, mediainfo_namespace) transfer_list.append(transfer_characteris) colour_primaries = ififuncs.get_metadata("//ns:colour_primaries", new_root, mediainfo_namespace) colour_primaries_list.append(colour_primaries) try: if audio_only: FrameCount = 'n/a' else: # increment if multiple objects are present try: FrameCount += int( ififuncs.get_metadata("//ns:FrameCount", new_root, mediainfo_namespace)) except ValueError: # don't increment if multiple values are returned as str FrameCount = ififuncs.get_metadata("//ns:FrameCount", new_root, mediainfo_namespace) except TypeError: # workaround for silent pic in DCP FrameCount = 'n/a' instantFileSize_byte += int( ififuncs.get_metadata("//ns:FileSize", new_root, mediainfo_namespace)) instantDataRate = round( float( ififuncs.get_mediainfo('OverallBitRate', '--inform=General;%OverallBitRate%', source)) / 1000 / 1000, 2) instantTracks = ififuncs.get_number_of_tracks(source) track_count_list.append(instantTracks) if stl is True: track_count_list.append('STL sidecar') ms += ififuncs.get_milliseconds(source) ColorSpace = ififuncs.get_metadata("//ns:ColorSpace", new_root, mediainfo_namespace) color_spaces.append(ColorSpace) ChromaSubsampling = get_metadata("//ns:ChromaSubsampling", new_root, mediainfo_namespace) chroma.append(ChromaSubsampling) instantMediaty = get_metadata("//ns:instantiationMediaType", root, pbcore_namespace) if audio_only: essenceFrameSize = 'n/a' else: essenceFrameSize = get_metadata("//ns:essenceTrackFrameSize", root, pbcore_namespace) frame_sizes.append(essenceFrameSize) PixelAspectRatio = ififuncs.get_metadata("//ns:PixelAspectRatio", new_root, mediainfo_namespace) par_list.append(PixelAspectRatio) general_root = new_root.xpath("//ns:track[@type='General']", namespaces={'ns': mediainfo_namespace})[0] instantiationStandar = ififuncs.get_metadata("ns:Format", general_root, mediainfo_namespace) container_list.append(instantiationStandar) essenceFrameRate = ififuncs.get_metadata("//ns:FrameRate", new_root, mediainfo_namespace) fps_list.append(essenceFrameRate) essenceAspectRatio = ififuncs.get_mediainfo( 'DAR', '--inform=Video;%DisplayAspectRatio_String%', source) Interlacement = ififuncs.get_metadata("//ns:ScanOrder", new_root, mediainfo_namespace) # FFV1/MKV seems to have this scanorder metadata here rather than Interlacement # FFV1/MKV is the only example I've seen so far that behaves like this :| # It could be that Interlacement is set at a codec level for FFV1, but others are # declared at the container level.. if Interlacement == 'n/a': Interlacement = get_metadata( "//ns:essenceTrackAnnotation[@annotationType='ScanOrder']", root, pbcore_namespace) interlace_list.append(Interlacement) Compression_Mode = ififuncs.get_metadata("//ns:Compression_Mode", new_root, mediainfo_namespace) colour_range = ififuncs.get_metadata("//ns:colour_range", new_root, mediainfo_namespace) # this needs to be clarified as it exists in general and codec format_version = ififuncs.get_metadata("ns:Format_Version", general_root, mediainfo_namespace) app_company_name = ififuncs.get_metadata( "//ns:Encoded_Application_CompanyName", new_root, mediainfo_namespace) app_name = ififuncs.get_metadata("//ns:Encoded_Application_Name", new_root, mediainfo_namespace) app_version = ififuncs.get_metadata("//ns:Encoded_Application_Version", new_root, mediainfo_namespace) library_name = ififuncs.get_metadata("//ns:Encoded_Library_Name", new_root, mediainfo_namespace) if library_name == 'n/a': library_name = ififuncs.get_metadata("//ns:Encoded_Library", general_root, mediainfo_namespace) library_version = ififuncs.get_metadata("//ns:Encoded_Library_Version", new_root, mediainfo_namespace) compression_list.append(Compression_Mode) instantiationDate_mo = get_metadata( "//ns:instantiationDate[@dateType='file modification']", root, pbcore_namespace) instantDate_other = 'n/a' instantDate_type = 'n/a' pix_fmt = ififuncs.get_ffmpeg_fmt(source, 'video') pix_fmt_list.append(pix_fmt) audio_fmt = ififuncs.get_ffmpeg_fmt(source, 'audio') audio_fmt_list.append(audio_fmt) essenceBitDepth_vid = ififuncs.get_mediainfo( 'duration', '--inform=Video;%BitDepth%', source) if silence: audio_codecid = 'n/a' essenceBitDepth_au = 'n/a' essenceTrackEncod_au = 'n/a' essenceTrackSampling = 'n/a' channels = 'n/a' ''' video_codecid = vcodec_attributes['ref'] video_codecid_list.append(video_codecid) try: video_codec_version = vcodec_attributes['version'] except KeyError: video_codec_version = 'n/a' try: video_codec_profile = vcodec_attributes['annotation'][8:] except KeyError: video_codec_profile = 'n/a' ''' metadata_error = '' metadata_list = [ scan_types, matrix_list, transfer_list, colour_primaries_list, color_spaces, chroma, frame_sizes, par_list, container_list, fps_list, sample_rate_list, track_count_list, interlace_list, compression_list, pix_fmt_list, audio_fmt_list, audio_codecid_list, audio_codec_list, au_bitdepth_list, video_codecid_list, video_codec_version_list, video_codec_profile_list, channels_list, timecode_list ] for i in metadata_list: if len(set(i)) > 1: metadata_error += 'WARNING - Your metadata values are not the same for all files - but this could be a false positive if dealing with atomised audio and video as with DCP: %s\n' % set( i) if args.p: ififuncs.generate_log( sipcreator_log, 'EVENT = Metadata mismatch - Your metadata values are not the same for all files - but this could be a false positive if dealing with atomised audio and video as with DCP: %s' % set(i)) print(metadata_error) tc = ififuncs.convert_millis(ms) instantiationDuratio = ififuncs.convert_timecode(25, tc) if args.donor: Donor = args.donor else: Donor = '' Edited_By = user Date_Created = '' Date_Last_Modified = '' Film_Or_Tape = 'Digital AV Object' Date_Of_Donation = '' if args.reproduction_creator: reproduction_creator = args.reproduction_creator else: reproduction_creator = '' if args.acquisition_type: if acquisition_type == 'Reproduction': Date_Of_Donation = instantiationDate_mo.split('T')[0] # if a reproduction, then there's no Donor/transfer of title. Donor = 'n/a' else: Date_Of_Donation = args.donation_date Habitat = '' backup_habitat = '' Type_Of_Deposit = acquisition_type if args.depositor_reference: Depositor_Reference = args.depositor_reference else: Depositor_Reference = '' Master_Viewing = 'Preservation Object' Language_Version = '' Condition_Rating = '' Companion_Elements = '' TTape_Origin = args.parent EditedNew = user FIO = 'In' CollectionTitle = '' Created_By = user instantTimeStart = 'n/a' instantFileSize_gigs = round( float(instantFileSize_byte) / 1024 / 1024 / 1024, 3) instantColors = 'n/a' instantLanguage = 'n/a' instantAltMo = 'n/a' instantiationChanCon = 'n/a' ''' no idea why these are here colour_range = colour_range format_version = format_version ''' TimeCode_FirstFrame = process_mixed_values(timecode_list) pix_fmt = process_mixed_values(pix_fmt_list) audio_fmt = process_mixed_values(audio_fmt_list) instantTracks = process_mixed_values(track_count_list) TimeCode_Source = timecode_source reproduction_reason = '' dig_object_descrip = ififuncs.get_digital_object_descriptor(args.input) if 'STL' in dig_object_descrip: dig_object_descrip = 'AS-11 package' dcp_check = ififuncs.find_cpl(args.input) if dcp_check is not None: essenceFrameSize, ChromaSubsampling, ColorSpace, FrameCount, essenceAspectRatio, instantiationDuratio, PixelAspectRatio, ScanType, dig_object_descrip, instantTracks, instantDataRate, essenceBitDepth_vid, instantMediaty = check_dcp( dcp_check) ififuncs.append_csv(csv_filename, [ Reference_Number, Donor, Edited_By, Date_Created, Date_Last_Modified, Film_Or_Tape, Date_Of_Donation, Accession_Number, Habitat, backup_habitat, TTape_Origin, Type_Of_Deposit, Depositor_Reference, Master_Viewing, Language_Version, Condition_Rating, Companion_Elements, EditedNew, FIO, CollectionTitle, Created_By, instantiationIdentif, instantDate_other, instantDate_type, instantiationDate_mo, instantiationStandar, instantMediaty, instantFileSize_byte, instantFileSize_gigs, instantTimeStart, instantDataRate, instantTracks, instantColors, instantLanguage, instantAltMo, essenceTrackEncodvid, essenceFrameRate, essenceTrackSampling, essenceBitDepth_vid, essenceFrameSize, essenceAspectRatio, essenceTrackEncod_au, essenceBitDepth_au, instantiationDuratio, instantiationChanCon, PixelAspectRatio, FrameCount, ColorSpace, ChromaSubsampling, ScanType, Interlacement, Compression_Mode, colour_primaries, transfer_characteris, matrix_coefficients, pix_fmt, audio_fmt, audio_codecid, video_codecid, video_codec_version, video_codec_profile, channels, colour_range, format_version, TimeCode_FirstFrame, TimeCode_Source, app_company_name, app_name, app_version, library_name, library_version, reproduction_creator, reproduction_reason, dig_object_descrip, ]) if args.p: ififuncs.generate_log( sipcreator_log, 'EVENT = Metadata extraction - eventDetail=Technical record creation using PBCore, eventOutcome=%s, agentName=makepbcore' % (csv_filename)) ififuncs.generate_log(sipcreator_log, 'EVENT = makepbcore.py finished') ififuncs.checksum_replace(md5_manifest, sipcreator_log, 'md5') ififuncs.checksum_replace(sha512_manifest, sipcreator_log, 'sha512') ififuncs.manifest_update(md5_manifest, csv_filename) print((' - Updating %s with %s' % (md5_manifest, csv_filename))) ififuncs.sha512_update(sha512_manifest, csv_filename) print((' - Updating %s with %s' % (sha512_manifest, csv_filename))) print(metadata_error)