def main(args_): ''' Batch process packages by running accession.py and makepbcore.py ''' args = parse_args(args_) oe_list = [] if args.csv: for line_item in ififuncs.extract_metadata(args.csv)[0]: oe_number = line_item['Object Entry'].lower() # this transforms OE-#### to oe#### transformed_oe = oe_number[:2] + oe_number[3:] oe_list.append(transformed_oe) if args.reference: reference_number = get_filmographic_number(args.reference) else: reference_number = ififuncs.get_reference_number() user = ififuncs.get_user() accession_number = get_number(args) accession_digits = int(accession_number[3:]) to_accession = initial_check(args, accession_digits, oe_list, reference_number) register = accession.make_register() if args.csv: desktop_logs_dir = ififuncs.make_desktop_logs_dir() new_csv = os.path.join(desktop_logs_dir, os.path.basename(args.csv)) filmographic_dict, headers = ififuncs.extract_metadata(args.csv) for oe_package in to_accession: for filmographic_record in filmographic_dict: if os.path.basename(oe_package).upper( )[:2] + '-' + os.path.basename( oe_package)[2:] == filmographic_record['Object Entry']: filmographic_record['Reference Number'] = to_accession[ oe_package][1] with open(new_csv, 'w') as csvfile: fieldnames = headers writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for i in filmographic_dict: writer.writerow(i) if args.dryrun: sys.exit() proceed = ififuncs.ask_yes_no('Do you want to proceed?') if proceed == 'Y': for package in sorted(to_accession.keys()): accession.main([ package, '-user', user, '-p', '-f', '-number', to_accession[package][0], '-reference', to_accession[package][1], '-register', register ]) collated_pbcore = gather_metadata(args.input) print '\nA helper accessions register has been generated in order to help with registration - located here: %s' % register print '\nA modified filmographic CSV has been generated with added reference numbers - located here: %s' % new_csv print '\nA collated CSV consisting of each PBCore report has been generated for batch database import - located here: %s' % collated_pbcore
def main(args_): ''' Batch process packages by running accession.py and makepbcore.py ''' args = parse_args(args_) user = ififuncs.get_user() accession_number = get_number(args) accession_digits = int(accession_number[3:]) to_accession = initial_check(args, accession_digits) proceed = ififuncs.ask_yes_no('Do you want to proceed?') if proceed == 'Y': for package in sorted(to_accession.keys()): accession.main([ package, '-user', user, '-p', '-f', '-number', to_accession[package] ])
def main(args_): ''' Launch all the functions for creating an IFI SIP. ''' args = parse_args(args_) start = datetime.datetime.now() inputs = args.i if args.d: try: import clairmeta clairmeta_version = clairmeta.__version__ except ImportError: print( 'Exiting as Clairmeta is not installed. If there is a case for not using clairmeta, please let me know and i can make a workaround' ) sys.exit() print(args) user = ififuncs.determine_user(args) object_entry = get_object_entry(args) sip_path = make_folder_path(os.path.join(args.o), args, object_entry) uuid, uuid_event = determine_uuid(args, sip_path) new_log_textfile = os.path.join(sip_path, 'logs' + '/' + uuid + '_sip_log.log') if args.d: content_title = create_content_title_text(sip_path, args) ififuncs.generate_log(new_log_textfile, 'EVENT = sipcreator.py started') ififuncs.generate_log( new_log_textfile, 'eventDetail=sipcreator.py %s' % ififuncs.get_script_version('sipcreator.py')) ififuncs.generate_log(new_log_textfile, 'Command line arguments: %s' % args) ififuncs.generate_log(new_log_textfile, 'EVENT = agentName=%s' % user) ififuncs.generate_log(new_log_textfile, uuid_event) if not args.sc: ififuncs.generate_log( new_log_textfile, 'EVENT = eventType=Identifier assignement,' ' eventIdentifierType=object entry, value=%s' % object_entry) metadata_dir = os.path.join(sip_path, 'metadata') supplemental_dir = os.path.join(metadata_dir, 'supplemental') logs_dir = os.path.join(sip_path, 'logs') if args.accession: accession_number = ififuncs.get_accession_number() reference_number = ififuncs.get_reference_number() parent = ififuncs.ask_question( 'What is the parent record? eg MV 1234. Enter n/a if this is a born digital acquisition with no parent.' ) donor = ififuncs.ask_question( 'Who is the source of acquisition, as appears on the donor agreement? This will not affect Reproductions.' ) depositor_reference = ififuncs.ask_question( 'What is the donor/depositor number? This will not affect Reproductions.' ) acquisition_type = ififuncs.get_acquisition_type('') donation_date = ififuncs.ask_question( 'When was the donation date in DD/MM/YYYY format? Eg. 31/12/1999 - Unfortunately this is NOT using ISO 8601.' ) if args.zip: inputxml, inputtracexml, dfxml = ififuncs.generate_mediainfo_xmls( inputs[0], args.o, uuid, new_log_textfile) if args.manifest: shutil.copy( args.manifest, args.manifest.replace('_manifest.md5', '_manifest-md5.txt')) source_manifest = args.manifest.replace('_manifest.md5', '_manifest-md5.txt') else: source_manifest = os.path.join( args.o, os.path.basename(args.i[0]) + '_manifest-md5.txt') ififuncs.generate_log( new_log_textfile, 'EVENT = message digest calculation, status=started, eventType=messageDigestCalculation, agentName=hashlib, eventDetail=MD5 checksum of source files within ZIP' ) ififuncs.hashlib_manifest(args.i[0], source_manifest, os.path.dirname(args.i[0])) ififuncs.generate_log( new_log_textfile, 'EVENT = message digest calculation, status=finished, eventType=messageDigestCalculation, agentName=hashlib, eventDetail=MD5 checksum of source files within ZIP' ) ififuncs.generate_log( new_log_textfile, 'EVENT = packing, status=started, eventType=packing, agentName=makezip.py, eventDetail=Source object to be packed=%s' % inputs[0]) makezip_judgement, zip_file = makezip.main([ '-i', inputs[0], '-o', os.path.join(sip_path, 'objects'), '-basename', uuid + '.zip' ]) ififuncs.generate_log( new_log_textfile, 'EVENT = packing, status=finished, eventType=packing, agentName=makezip.py, eventDetail=Source object packed into=%s' % zip_file) if makezip_judgement is None: judgement = 'lossless' else: judgement = makezip_judgement ififuncs.generate_log( new_log_textfile, 'EVENT = losslessness verification, status=finished, eventType=messageDigestCalculation, agentName=makezip.py, eventDetail=embedded crc32 checksum validation, eventOutcome=%s' % judgement) ififuncs.generate_log( new_log_textfile, 'EVENT = losslessness verification, status=finished, eventType=messageDigestCalculation, agentName=makezip.py, eventDetail=embedded crc32 checksum validation, eventOutcome=%s' % judgement) else: log_names = move_files(inputs, sip_path, args, user) ififuncs.get_technical_metadata(sip_path, new_log_textfile) ififuncs.hashlib_manifest(metadata_dir, metadata_dir + '/metadata_manifest.md5', metadata_dir) if args.sc: normalise_objects_manifest(sip_path) new_manifest_textfile = consolidate_manifests(sip_path, 'objects', new_log_textfile) if args.zip: ififuncs.generate_log( new_log_textfile, 'EVENT = Message Digest Calculation, status=started, eventType=message digest calculation, eventDetail=%s module=hashlib' % zip_file) ififuncs.manifest_update(new_manifest_textfile, zip_file) ififuncs.generate_log( new_log_textfile, 'EVENT = Message Digest Calculation, status=finished, eventType=message digest calculation, eventDetail=%s module=hashlib' % zip_file) consolidate_manifests(sip_path, 'metadata', new_log_textfile) ififuncs.hashlib_append(logs_dir, new_manifest_textfile, os.path.dirname(os.path.dirname(logs_dir))) if args.supplement: os.makedirs(supplemental_dir) supplement_cmd = [ '-i', args.supplement, '-user', user, '-new_folder', supplemental_dir, os.path.dirname(sip_path), '-copy' ] package_update.main(supplement_cmd) if args.zip: os.makedirs(supplemental_dir) supplement_cmd = [ '-i', [inputxml, inputtracexml, dfxml, source_manifest], '-user', user, '-new_folder', supplemental_dir, os.path.dirname(sip_path), '-copy' ] package_update.main(supplement_cmd) if args.sc: print('Generating Digital Forensics XML') dfxml = accession.make_dfxml(args, sip_path, uuid) ififuncs.generate_log( new_log_textfile, 'EVENT = Metadata extraction - eventDetail=File system metadata extraction using Digital Forensics XML, eventOutcome=%s, agentName=makedfxml' % (dfxml)) ififuncs.manifest_update(new_manifest_textfile, dfxml) sha512_log = manifest.main([sip_path, '-sha512', '-s']) sha512_manifest = os.path.join(os.path.dirname(sip_path), uuid + '_manifest-sha512.txt') ififuncs.merge_logs_append(sha512_log, new_log_textfile, new_manifest_textfile) ififuncs.checksum_replace(sha512_manifest, new_log_textfile, 'sha512') os.remove(sha512_log) ififuncs.sort_manifest(new_manifest_textfile) if not args.quiet: if 'log_names' in locals(): log_report(log_names) finish = datetime.datetime.now() print('\n- %s ran this script at %s and it finished at %s' % (user, start, finish)) if args.d: process_dcp(sip_path, content_title, args, new_manifest_textfile, new_log_textfile, metadata_dir, clairmeta_version) if args.accession: register = accession.make_register() filmographic_dict = ififuncs.extract_metadata(args.csv)[0] for filmographic_record in filmographic_dict: if filmographic_record['Reference Number'].lower( ) == reference_number.lower(): if filmographic_record['Title'] == '': title = filmographic_record[ 'TitleSeries'] + '; ' + filmographic_record['EpisodeNo'] else: title = filmographic_record['Title'] oe_register = make_oe_register() ififuncs.append_csv( oe_register, (object_entry.upper()[:2] + '-' + object_entry[2:], donation_date, '1', '', title, donor, acquisition_type[1], accession_number, 'Representation of %s|Reproduction of %s' % (reference_number, parent), '')) accession_cmd = [ os.path.dirname(sip_path), '-user', user, '-f', '-number', accession_number, '-reference', reference_number, '-register', register, '-csv', args.csv, '-pbcore' ] if not parent.lower() == 'n/a': accession_cmd.extend(['-parent', parent]) accession_cmd.extend(['-donor', donor]) accession_cmd.extend(['-depositor_reference', depositor_reference]) accession_cmd.extend(['-acquisition_type', acquisition_type[2]]) accession_cmd.extend(['-donation_date', donation_date]) print(accession_cmd) accession.main(accession_cmd) return new_log_textfile, new_manifest_textfile
def main(args_): ''' Batch process packages by running accession.py and makepbcore.py ''' args = parse_args(args_) oe_list = [] if args.oe_csv: if not args.filmographic: print(' - batchaccession.py - ERROR\n - No -filmographic argument supplied. This is mandatory when using the -oe_csv option. \n - Exiting..') sys.exit() oe_csv_extraction = ififuncs.extract_metadata(args.oe_csv) initial_oe_list = oe_csv_extraction[0] oe_dicts = process_oe_csv(oe_csv_extraction, args.input) # temp hack while we're performing both workflows helper_csv = args.oe_csv elif args.filmographic: initial_oe_list = ififuncs.extract_metadata(args.filmographic)[0] # temp hack while we're performing both workflows helper_csv = args.filmographic if args.oe_csv or args.filmographic: for line_item in ififuncs.extract_metadata(helper_csv)[0]: try: oe_number = line_item['Object Entry'].lower() except KeyError: oe_number = line_item['OE No.'].lower() # this transforms OE-#### to oe#### transformed_oe = oe_number[:2] + oe_number[3:] oe_list.append(transformed_oe) if not args.oe_csv: # No need to ask for the reference number if the OE csv option is supplied. # The assumption here is that the OE csv contains the reference numbers though. if args.reference: reference_number = get_filmographic_number(args.reference) else: reference_number = ififuncs.get_reference_number() donor = ififuncs.ask_question('Who is the source of acquisition, as appears on the donor agreement? This will not affect Reproductions.') depositor_reference = ififuncs.ask_question('What is the donor/depositor number? This will not affect Reproductions.') acquisition_type = ififuncs.get_acquisition_type('') user = ififuncs.get_user() accession_number = get_number(args) accession_digits = int(accession_number[3:]) if not args.oe_csv: to_accession = initial_check(args, accession_digits, oe_list, reference_number) else: to_accession = {} for oe_record in oe_dicts: if os.path.isdir(oe_record['source_path']): to_accession[oe_record['source_path']] = ['aaa' + str(accession_digits).zfill(4), oe_record['reference number'], oe_record['parent'], oe_record['donation_date']] accession_digits += 1 for success in sorted(to_accession.keys()): print('%s will be accessioned as %s' % (success, to_accession[success])) register = accession.make_register() if args.filmographic: desktop_logs_dir = ififuncs.make_desktop_logs_dir() if args.dryrun: new_csv_filename = time.strftime("%Y-%m-%dT%H_%M_%S_DRYRUN_SHEET_PLEASE_DO_NOT_INGEST_JUST_IGNORE_COMPLETELY") + os.path.basename(args.filmographic) else: new_csv_filename = time.strftime("%Y-%m-%dT%H_%M_%S_") + os.path.basename(args.filmographic) new_csv = os.path.join(desktop_logs_dir, new_csv_filename) if not args.oe_csv: filmographic_dict, headers = ififuncs.extract_metadata(args.filmographic) for oe_package in to_accession: for filmographic_record in filmographic_dict: if os.path.basename(oe_package).upper()[:2] + '-' + os.path.basename(oe_package)[2:] == filmographic_record['Object Entry']: filmographic_record['Reference Number'] = to_accession[oe_package][1] get_filmographic_titles(to_accession, filmographic_dict) with open(new_csv, 'w') as csvfile: fieldnames = headers # Removes Object Entry from headings as it's not needed in database. del fieldnames[1] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for i in filmographic_dict: i.pop('Object Entry', None) # Only include records that have reference numbers if not i['Reference Number'] == '': writer.writerow(i) if args.dryrun: sys.exit() proceed = ififuncs.ask_yes_no( 'Do you want to proceed?' ) if args.oe_csv: new_csv = args.filmographic if proceed == 'Y': for package in sorted(to_accession.keys(), key=natural_keys): accession_cmd = [ package, '-user', user, '-f', '-number', to_accession[package][0], '-reference', to_accession[package][1], '-register', register, '-csv', new_csv ] for oe_record in oe_dicts: if oe_record['source_path'] == package: if not oe_record['format'].lower() == 'dcdm': accession_cmd.append('-pbcore') if len(to_accession[package]) == 4: if not to_accession[package][2] == 'n/a': accession_cmd.extend(['-acquisition_type', '13']) if args.oe_csv: accession_cmd.extend(['-parent', to_accession[package][2]]) else: accession_cmd.extend(['-parent', order.main(package)]) else: accession_cmd.extend(['-donor', donor]) accession_cmd.extend(['-depositor_reference', depositor_reference]) accession_cmd.extend(['-acquisition_type', acquisition_type[2]]) print to_accession[package][3] accession_cmd.extend(['-donation_date', to_accession[package][3]]) print accession_cmd accession.main(accession_cmd) collated_pbcore = gather_metadata(args.input) sorted_filepath = ififuncs.sort_csv(register, 'accession number') print '\nA helper accessions register has been generated in order to help with registration - located here: %s' % sorted_filepath print '\nA modified filmographic CSV has been generated with added reference numbers - located here: %s' % new_csv print '\nA collated CSV consisting of each PBCore report has been generated for batch database import - located here: %s' % collated_pbcore
def main(args_): ''' Batch process packages by running accession.py and makepbcore.py ''' args = parse_args(args_) oe_list = [] if args.csv: for line_item in ififuncs.extract_metadata(args.csv)[0]: oe_number = line_item['Object Entry'].lower() # this transforms OE-#### to oe#### transformed_oe = oe_number[:2] + oe_number[3:] oe_list.append(transformed_oe) if args.reference: reference_number = get_filmographic_number(args.reference) else: reference_number = ififuncs.get_reference_number() donor = ififuncs.ask_question( 'Who is the source of acquisition, as appears on the donor agreement? This will not affect Reproductions.' ) depositor_reference = ififuncs.ask_question( 'What is the donor/depositor number? This will not affect Reproductions.' ) acquisition_type = ififuncs.get_acquisition_type('') user = ififuncs.get_user() accession_number = get_number(args) accession_digits = int(accession_number[3:]) to_accession = initial_check(args, accession_digits, oe_list, reference_number) register = accession.make_register() if args.csv: desktop_logs_dir = ififuncs.make_desktop_logs_dir() if args.dryrun: new_csv_filename = time.strftime( "%Y-%m-%dT%H_%M_%S_DRYRUN_SHEET_PLEASE_DO_NOT_INGEST_JUST_IGNORE_COMPLETELY" ) + os.path.basename(args.csv) else: new_csv_filename = time.strftime( "%Y-%m-%dT%H_%M_%S_") + os.path.basename(args.csv) new_csv = os.path.join(desktop_logs_dir, new_csv_filename) filmographic_dict, headers = ififuncs.extract_metadata(args.csv) for oe_package in to_accession: for filmographic_record in filmographic_dict: if os.path.basename(oe_package).upper( )[:2] + '-' + os.path.basename( oe_package)[2:] == filmographic_record['Object Entry']: filmographic_record['Reference Number'] = to_accession[ oe_package][1] with open(new_csv, 'w') as csvfile: fieldnames = headers # Removes Object Entry from headings as it's not needed in database. del fieldnames[1] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for i in filmographic_dict: i.pop('Object Entry', None) # Only include records that have reference numbers if not i['Reference Number'] == '': writer.writerow(i) if args.dryrun: sys.exit() proceed = ififuncs.ask_yes_no('Do you want to proceed?') if proceed == 'Y': for package in sorted(to_accession.keys(), key=natural_keys): accession_cmd = [ package, '-user', user, '-pbcore', '-f', '-number', to_accession[package][0], '-reference', to_accession[package][1], '-register', register, '-csv', new_csv ] if len(to_accession[package]) == 3: accession_cmd.extend(['-acquisition_type', '13']) accession_cmd.extend(['-parent', order.main(package)]) else: accession_cmd.extend(['-donor', donor]) accession_cmd.extend( ['-depositor_reference', depositor_reference]) accession_cmd.extend( ['-acquisition_type', acquisition_type[2]]) print accession_cmd accession.main(accession_cmd) collated_pbcore = gather_metadata(args.input) sorted_filepath = ififuncs.sort_csv(register, 'accession number') print '\nA helper accessions register has been generated in order to help with registration - located here: %s' % sorted_filepath print '\nA modified filmographic CSV has been generated with added reference numbers - located here: %s' % new_csv print '\nA collated CSV consisting of each PBCore report has been generated for batch database import - located here: %s' % collated_pbcore