def assign_uuid( job, filename, extracted_file_original_location, package_uuid, transfer_uuid, date, task_uuid, sip_directory, package_filename, ): """Assign a uuid to each file in the extracted package.""" file_uuid = str(uuid.uuid4()) # Correct the information in the path strings sent to this function. First # remove the SIP directory from the string. Second, make sure that file # paths have not been modified for processing purpose, i.e. in # Archivematica current terminology, sanitized. relative_path = filename.replace(sip_directory, TRANSFER_DIRECTORY, 1) relative_package_path = package_filename.replace(sip_directory, TRANSFER_DIRECTORY, 1) package_detail = "{} ({})".format(relative_package_path, package_uuid) event_detail = "Unpacked from: " + package_detail addFileToTransfer( relative_path, file_uuid, transfer_uuid, task_uuid, date, sourceType="unpacking", eventDetail=event_detail, originalLocation=extracted_file_original_location, ) updateSizeAndChecksum(file_uuid, filename, date, str(uuid.uuid4())) job.pyprint("Assigning new file UUID:", file_uuid, "to file", filename)
def addFile(filePath, transferPath, transferUUID, date, eventDetail="", fileUUID=uuid.uuid4().__str__()): taskUUID = uuid.uuid4().__str__() filePathRelativeToSIP = filePath.replace(transferPath, "%transferDirectory%", 1) addFileToTransfer( filePathRelativeToSIP, fileUUID, transferUUID, taskUUID, date, sourceType="unpacking", eventDetail=eventDetail ) updateSizeAndChecksum(fileUUID, filePath, date, uuid.uuid4.__str__())
def onceExtracted(command): extractedFiles = [] print "TODO - Metadata regarding removal of extracted archive" if removeOnceExtracted: packageFileUUID = sys.argv[6].__str__() sipDirectory = sys.argv[2].__str__() os.remove(replacementDic["%inputFile%"]) currentLocation = replacementDic["%inputFile%"].replace(sipDirectory, "%transferDirectory%", 1) fileWasRemoved(packageFileUUID, eventOutcomeDetailNote = "removed from: " + currentLocation) print "OUTPUT DIRECTORY: ", replacementDic["%outputDirectory%"] for w in os.walk(replacementDic["%outputDirectory%"].replace("*", "asterisk*")): path, directories, files = w for p in files: p = os.path.join(path, p) #print "path: ", p if os.path.isfile(p): extractedFiles.append(p) for ef in extractedFiles: fileUUID = uuid.uuid4().__str__() #print "File Extracted:", ef if True: #Add the file to the SIP #<arguments>"%relativeLocation%" "%SIPObjectsDirectory%" "%SIPLogsDirectory%" "%date%" "%taskUUID%" "%fileUUID%"</arguments> sipDirectory = sys.argv[2].__str__() transferUUID = sys.argv[3].__str__() date = sys.argv[4].__str__() taskUUID = sys.argv[5].__str__() packageFileUUID = sys.argv[6].__str__() filePathRelativeToSIP = ef.replace(sipDirectory,"%transferDirectory%", 1) print "File Extracted:: {" + fileUUID + "} ", filePathRelativeToSIP eventDetail="Unpacked from: {" + packageFileUUID + "}" + filePathRelativeToSIP addFileToTransfer(filePathRelativeToSIP, fileUUID, transferUUID, taskUUID, date, sourceType="unpacking", eventDetail=eventDetail) updateSizeAndChecksum(fileUUID, ef, date, uuid.uuid4.__str__()) run = sys.argv[0].__str__() + \ " \"" + transcoder.escapeForCommand(ef) + "\"" if True: #Add the file to the SIP run = run + " \"" + transcoder.escapeForCommand(sys.argv[2].__str__()) + "\"" + \ " \"" + transcoder.escapeForCommand(sys.argv[3].__str__()) + "\"" + \ " \"" + transcoder.escapeForCommand(sys.argv[4].__str__()) + "\"" + \ " \"" + transcoder.escapeForCommand(sys.argv[5].__str__()) + "\"" + \ " \"" + fileUUID + "\"" exitCode, stdOut, stdError = executeOrRun("command", run) print stdOut print >>sys.stderr, stdError if exitCode != 0 and command.exitCode == 0: command.exitCode = exitCode global extractedCount date = sys.argv[4].__str__().split(".", 1)[0] extractedCount = extractedCount + 1 replacementDic["%outputDirectory%"] = transcoder.fileFullName + '-' + extractedCount.__str__() + '-' + date
def assign_uuid(filename, package_uuid, transfer_uuid, date, task_uuid, sip_directory, package_filename): file_uuid = uuid.uuid4().__str__() relative_path = filename.replace(sip_directory, "%transferDirectory%", 1) relative_package_path = package_filename.replace(sip_directory, "%transferDirectory%", 1) package_detail = "{} ({})".format(relative_package_path, package_uuid) event_detail = "Unpacked from: " + package_detail addFileToTransfer(relative_path, file_uuid, transfer_uuid, task_uuid, date, sourceType="unpacking", eventDetail=event_detail) updateSizeAndChecksum(file_uuid, filename, date, uuid.uuid4().__str__()) print('Assigning new file UUID:', file_uuid, 'to file', filename)
def addFile(filePath, transferPath, transferUUID, date, eventDetail="", fileUUID=uuid.uuid4().__str__()): taskUUID = uuid.uuid4().__str__() filePathRelativeToSIP = filePath.replace(transferPath, "%transferDirectory%", 1) addFileToTransfer(filePathRelativeToSIP, fileUUID, transferUUID, taskUUID, date, sourceType="unpacking", eventDetail=eventDetail) updateSizeAndChecksum(fileUUID, filePath, date, uuid.uuid4.__str__())
def main(file_uuid=None, file_path='', date='', event_uuid=None, sip_directory='', sip_uuid=None, transfer_uuid=None, use='original', update_use=True): if file_uuid == "None": file_uuid = None if file_uuid: logger.error('File already has UUID: %s', file_uuid) if update_use: File.objects.filter(uuid=file_uuid).update(filegrpuse=use) return 0 # Stop if both or neither of them are used if all([sip_uuid, transfer_uuid]) or not any([sip_uuid, transfer_uuid]): logger.error('SIP exclusive-or Transfer UUID must be defined') return 2 # Transfer if transfer_uuid: file_path_relative_to_sip = file_path.replace(sip_directory, '%transferDirectory%', 1) transfer = Transfer.objects.get(uuid=transfer_uuid) event_type = 'ingestion' # For reingest, parse information from the METS if transfer.type == 'Archivematica AIP': info = get_file_info_from_mets(sip_directory, file_path_relative_to_sip) event_type = 'reingestion' file_uuid = info.get('uuid', file_uuid) use = info.get('filegrpuse', use) file_path_relative_to_sip = info.get('original_path', file_path_relative_to_sip) if not file_uuid: file_uuid = str(uuid.uuid4()) logger.info('Generated UUID for this file: %s.', file_uuid) addFileToTransfer(file_path_relative_to_sip, file_uuid, transfer_uuid, event_uuid, date, use=use, sourceType=event_type) # For reingest, the original location was parsed from the METS # Update the current location to reflect what's on disk if transfer.type == 'Archivematica AIP': print('updating current location for', file_uuid, 'with', info) File.objects.filter(uuid=file_uuid).update( currentlocation=info['current_path'] ) return 0 # Ingest if sip_uuid: file_uuid = str(uuid.uuid4()) file_path_relative_to_sip = file_path.replace(sip_directory, "%SIPDirectory%", 1) addFileToSIP(file_path_relative_to_sip, file_uuid, sip_uuid, event_uuid, date, use=use) return 0
def _import_file_from_fsentry(cmd, fsentry, transfer_uuid): premis_events = fsentry.get_premis_events() ingestion_date = None for evt in fsentry.get_premis_events(): if evt.event_type == "ingestion": ingestion_date = evt.event_date_time break file_obj = addFileToTransfer( "%transferDirectory%{}".format(fsentry.path), fsentry.file_uuid, transfer_uuid, None, ingestion_date, sourceType="ingestion", use="original", ) for rights_statement in fsentry.get_premis_rights(): load_rights(file_obj, rights_statement) for event in premis_events: _load_event(file_obj, event) try: premis_object = fsentry.get_premis_objects()[0] except IndexError: return # Populate extra attributes in the File object. file_obj.checksum = premis_object.message_digest file_obj.checksumtype = _convert_checksum_algo( premis_object.message_digest_algorithm ) file_obj.size = premis_object.size file_obj.save() # Populate format details of the File object. if premis_object.format_registry_name != "PRONOM": return try: format_version = FormatVersion.active.get( pronom_id=premis_object.format_registry_key ) except FormatVersion.DoesNotExist: return FileFormatVersion.objects.create(file_uuid=file_obj, format_version=format_version) FileID.objects.create( file=file_obj, format_name=format_version.format.description, format_version=format_version.version or "", format_registry_name=premis_object.format_registry_name, format_registry_key=premis_object.format_registry_key, )
def main( job, file_uuid=None, file_path="", date="", event_uuid=None, sip_directory="", sip_uuid=None, transfer_uuid=None, use="original", update_use=True, ): if file_uuid == "None": file_uuid = None if file_uuid: logger.error("File already has UUID: %s", file_uuid) if update_use: File.objects.filter(uuid=file_uuid).update(filegrpuse=use) return 0 # Stop if both or neither of them are used if all([sip_uuid, transfer_uuid]) or not any([sip_uuid, transfer_uuid]): logger.error("SIP exclusive-or Transfer UUID must be defined") return 2 # Transfer if transfer_uuid: file_path_relative_to_sip = file_path.replace(sip_directory, "%transferDirectory%", 1) transfer = Transfer.objects.get(uuid=transfer_uuid) event_type = "ingestion" # For reingest, parse information from the METS if transfer.type == "Archivematica AIP": info = get_file_info_from_mets(job, sip_directory, file_path_relative_to_sip) event_type = "reingestion" file_uuid = info.get("uuid", file_uuid) use = info.get("filegrpuse", use) file_path_relative_to_sip = info.get("original_path", file_path_relative_to_sip) if not file_uuid: file_uuid = str(uuid.uuid4()) logger.info("Generated UUID for this file: %s.", file_uuid) addFileToTransfer( file_path_relative_to_sip, file_uuid, transfer_uuid, event_uuid, date, use=use, sourceType=event_type, ) # For reingest, the original location was parsed from the METS # Update the current location to reflect what's on disk if transfer.type == "Archivematica AIP": job.print_output("updating current location for", file_uuid, "with", info) File.objects.filter(uuid=file_uuid).update( currentlocation=info["current_path"]) return 0 # Ingest if sip_uuid: file_uuid = str(uuid.uuid4()) file_path_relative_to_sip = file_path.replace(sip_directory, "%SIPDirectory%", 1) addFileToSIP(file_path_relative_to_sip, file_uuid, sip_uuid, event_uuid, date, use=use) return 0
parser.add_option("-e", "--use", action="store", dest="use", default="original") parser.add_option("--disable-update-filegrpuse", action="store_false", dest="update_use", default=True) (opts, args) = parser.parse_args() opts2 = vars(opts) # for key, value in opts2.iteritems(): # print type(key), key, type(value), value # exec 'opts.' + key + ' = value.decode("utf-8")' fileUUID = opts.fileUUID if not fileUUID or fileUUID == "None": fileUUID = uuid.uuid4().__str__() else: print >>sys.stderr, "File already has UUID:", fileUUID if opts.update_use: File.objects.filter(uuid=fileUUID).update(filegrpuse=opts.use) exit(0) if opts.sipUUID == "" and opts.transferUUID != "": filePathRelativeToSIP = opts.filePath.replace(opts.sipDirectory,"%transferDirectory%", 1) addFileToTransfer(filePathRelativeToSIP, fileUUID, opts.transferUUID, opts.eventIdentifierUUID, opts.date, use=opts.use) elif opts.sipUUID != "" and opts.transferUUID == "": filePathRelativeToSIP = opts.filePath.replace(opts.sipDirectory,"%SIPDirectory%", 1) addFileToSIP(filePathRelativeToSIP, fileUUID, opts.sipUUID, opts.eventIdentifierUUID, opts.date, use=opts.use) else: print >>sys.stderr, "SIP exclusive-or Transfer uuid must be defined" exit(2)
parser.add_option("-p", "--filePath", action="store", dest="filePath", default="") parser.add_option("-d", "--date", action="store", dest="date", default="") parser.add_option("-u", "--eventIdentifierUUID", action="store", dest="eventIdentifierUUID", default="") parser.add_option("-s", "--sipDirectory", action="store", dest="sipDirectory", default="") parser.add_option("-S", "--sipUUID", action="store", dest="sipUUID", default="") parser.add_option("-T", "--transferUUID", action="store", dest="transferUUID", default="") parser.add_option("-e", "--use", action="store", dest="use", default="original") (opts, args) = parser.parse_args() opts2 = vars(opts) # for key, value in opts2.iteritems(): # print type(key), key, type(value), value # exec 'opts.' + key + ' = value.decode("utf-8")' fileUUID = opts.fileUUID if not fileUUID or fileUUID == "None": fileUUID = uuid.uuid4().__str__() if opts.sipUUID == "" and opts.transferUUID != "": filePathRelativeToSIP = opts.filePath.replace(opts.sipDirectory,"%transferDirectory%", 1) addFileToTransfer(filePathRelativeToSIP, fileUUID, opts.transferUUID, opts.eventIdentifierUUID, opts.date, use=opts.use) elif opts.sipUUID != "" and opts.transferUUID == "": filePathRelativeToSIP = opts.filePath.replace(opts.sipDirectory,"%SIPDirectory%", 1) addFileToSIP(filePathRelativeToSIP, fileUUID, opts.sipUUID, opts.eventIdentifierUUID, opts.date, use=opts.use) else: print >>sys.stderr, "SIP exclusive-or Transfer uuid must be defined" exit(2)