def assign_uuid(
    job,
    filename,
    extracted_file_original_location,
    package_uuid,
    transfer_uuid,
    date,
    task_uuid,
    sip_directory,
    package_filename,
):
    """Assign a uuid to each file in the extracted package."""
    file_uuid = str(uuid.uuid4())
    # Correct the information in the path strings sent to this function. First
    # remove the SIP directory from the string. Second, make sure that file
    # paths have not been modified for processing purpose, i.e. in
    # Archivematica current terminology, sanitized.
    relative_path = filename.replace(sip_directory, TRANSFER_DIRECTORY, 1)
    relative_package_path = package_filename.replace(sip_directory,
                                                     TRANSFER_DIRECTORY, 1)
    package_detail = "{} ({})".format(relative_package_path, package_uuid)
    event_detail = "Unpacked from: " + package_detail
    addFileToTransfer(
        relative_path,
        file_uuid,
        transfer_uuid,
        task_uuid,
        date,
        sourceType="unpacking",
        eventDetail=event_detail,
        originalLocation=extracted_file_original_location,
    )
    updateSizeAndChecksum(file_uuid, filename, date, str(uuid.uuid4()))
    job.pyprint("Assigning new file UUID:", file_uuid, "to file", filename)
def addFile(filePath, transferPath, transferUUID, date, eventDetail="", fileUUID=uuid.uuid4().__str__()):
    taskUUID = uuid.uuid4().__str__()
    filePathRelativeToSIP = filePath.replace(transferPath, "%transferDirectory%", 1)
    addFileToTransfer(
        filePathRelativeToSIP, fileUUID, transferUUID, taskUUID, date, sourceType="unpacking", eventDetail=eventDetail
    )
    updateSizeAndChecksum(fileUUID, filePath, date, uuid.uuid4.__str__())
Exemplo n.º 3
0
def onceExtracted(command):
    extractedFiles = []
    print "TODO - Metadata regarding removal of extracted archive"
    if removeOnceExtracted:
        packageFileUUID = sys.argv[6].__str__()
        sipDirectory = sys.argv[2].__str__()
        os.remove(replacementDic["%inputFile%"])
        currentLocation =  replacementDic["%inputFile%"].replace(sipDirectory, "%transferDirectory%", 1)
        fileWasRemoved(packageFileUUID, eventOutcomeDetailNote = "removed from: " + currentLocation)

    print "OUTPUT DIRECTORY: ", replacementDic["%outputDirectory%"]
    for w in os.walk(replacementDic["%outputDirectory%"].replace("*", "asterisk*")):
        path, directories, files = w
        for p in files:
            p = os.path.join(path, p)
            #print "path: ", p
            if os.path.isfile(p):
                extractedFiles.append(p)
    for ef in extractedFiles:
        fileUUID = uuid.uuid4().__str__()
        #print "File Extracted:", ef
        if True: #Add the file to the SIP
            #<arguments>"%relativeLocation%" "%SIPObjectsDirectory%" "%SIPLogsDirectory%" "%date%" "%taskUUID%" "%fileUUID%"</arguments>
            sipDirectory = sys.argv[2].__str__()
            transferUUID = sys.argv[3].__str__()
            date = sys.argv[4].__str__()
            taskUUID = sys.argv[5].__str__()
            packageFileUUID = sys.argv[6].__str__()

            filePathRelativeToSIP = ef.replace(sipDirectory,"%transferDirectory%", 1)
            print "File Extracted:: {" + fileUUID + "} ", filePathRelativeToSIP
            eventDetail="Unpacked from: {" + packageFileUUID + "}" + filePathRelativeToSIP
            addFileToTransfer(filePathRelativeToSIP, fileUUID, transferUUID, taskUUID, date, sourceType="unpacking", eventDetail=eventDetail)
            updateSizeAndChecksum(fileUUID, ef, date, uuid.uuid4.__str__())


        run = sys.argv[0].__str__() + \
        " \"" + transcoder.escapeForCommand(ef) + "\""
        if True: #Add the file to the SIP
            run = run + " \"" + transcoder.escapeForCommand(sys.argv[2].__str__()) + "\"" + \
            " \"" + transcoder.escapeForCommand(sys.argv[3].__str__()) + "\"" + \
            " \"" + transcoder.escapeForCommand(sys.argv[4].__str__()) + "\"" + \
            " \"" + transcoder.escapeForCommand(sys.argv[5].__str__()) + "\"" + \
            " \"" + fileUUID + "\""

        exitCode, stdOut, stdError = executeOrRun("command", run)
        print stdOut
        print >>sys.stderr, stdError
        if exitCode != 0 and command.exitCode == 0:
            command.exitCode = exitCode

    global extractedCount
    date = sys.argv[4].__str__().split(".", 1)[0]
    extractedCount = extractedCount + 1
    replacementDic["%outputDirectory%"] = transcoder.fileFullName + '-' + extractedCount.__str__() + '-' + date
Exemplo n.º 4
0
def onceExtracted(command):
    extractedFiles = []
    print "TODO - Metadata regarding removal of extracted archive"
    if removeOnceExtracted:
        packageFileUUID = sys.argv[6].__str__()
        sipDirectory = sys.argv[2].__str__()
        os.remove(replacementDic["%inputFile%"])
        currentLocation =  replacementDic["%inputFile%"].replace(sipDirectory, "%transferDirectory%", 1)
        fileWasRemoved(packageFileUUID, eventOutcomeDetailNote = "removed from: " + currentLocation)

    print "OUTPUT DIRECTORY: ", replacementDic["%outputDirectory%"]
    for w in os.walk(replacementDic["%outputDirectory%"].replace("*", "asterisk*")):
        path, directories, files = w
        for p in files:
            p = os.path.join(path, p)
            #print "path: ", p
            if os.path.isfile(p):
                extractedFiles.append(p)
    for ef in extractedFiles:
        fileUUID = uuid.uuid4().__str__()
        #print "File Extracted:", ef
        if True: #Add the file to the SIP
            #<arguments>"%relativeLocation%" "%SIPObjectsDirectory%" "%SIPLogsDirectory%" "%date%" "%taskUUID%" "%fileUUID%"</arguments>
            sipDirectory = sys.argv[2].__str__()
            transferUUID = sys.argv[3].__str__()
            date = sys.argv[4].__str__()
            taskUUID = sys.argv[5].__str__()
            packageFileUUID = sys.argv[6].__str__()

            filePathRelativeToSIP = ef.replace(sipDirectory,"%transferDirectory%", 1)
            print "File Extracted:: {" + fileUUID + "} ", filePathRelativeToSIP
            eventDetail="Unpacked from: {" + packageFileUUID + "}" + filePathRelativeToSIP
            addFileToTransfer(filePathRelativeToSIP, fileUUID, transferUUID, taskUUID, date, sourceType="unpacking", eventDetail=eventDetail)
            updateSizeAndChecksum(fileUUID, ef, date, uuid.uuid4.__str__())


        run = sys.argv[0].__str__() + \
        " \"" + transcoder.escapeForCommand(ef) + "\""
        if True: #Add the file to the SIP
            run = run + " \"" + transcoder.escapeForCommand(sys.argv[2].__str__()) + "\"" + \
            " \"" + transcoder.escapeForCommand(sys.argv[3].__str__()) + "\"" + \
            " \"" + transcoder.escapeForCommand(sys.argv[4].__str__()) + "\"" + \
            " \"" + transcoder.escapeForCommand(sys.argv[5].__str__()) + "\"" + \
            " \"" + fileUUID + "\""

        exitCode, stdOut, stdError = executeOrRun("command", run)
        print stdOut
        print >>sys.stderr, stdError
        if exitCode != 0 and command.exitCode == 0:
            command.exitCode = exitCode

    global extractedCount
    date = sys.argv[4].__str__().split(".", 1)[0]
    extractedCount = extractedCount + 1
    replacementDic["%outputDirectory%"] = transcoder.fileFullName + '-' + extractedCount.__str__() + '-' + date
Exemplo n.º 5
0
def assign_uuid(filename, package_uuid, transfer_uuid, date, task_uuid, sip_directory, package_filename):
    file_uuid = uuid.uuid4().__str__()
    relative_path = filename.replace(sip_directory, "%transferDirectory%", 1)
    relative_package_path = package_filename.replace(sip_directory, "%transferDirectory%", 1)
    package_detail = "{} ({})".format(relative_package_path, package_uuid)
    event_detail = "Unpacked from: " + package_detail
    addFileToTransfer(relative_path, file_uuid, transfer_uuid, task_uuid, date,
                      sourceType="unpacking", eventDetail=event_detail)
    updateSizeAndChecksum(file_uuid, filename, date, uuid.uuid4().__str__())

    print('Assigning new file UUID:', file_uuid, 'to file', filename)
def addFile(filePath,
            transferPath,
            transferUUID,
            date,
            eventDetail="",
            fileUUID=uuid.uuid4().__str__()):
    taskUUID = uuid.uuid4().__str__()
    filePathRelativeToSIP = filePath.replace(transferPath,
                                             "%transferDirectory%", 1)
    addFileToTransfer(filePathRelativeToSIP,
                      fileUUID,
                      transferUUID,
                      taskUUID,
                      date,
                      sourceType="unpacking",
                      eventDetail=eventDetail)
    updateSizeAndChecksum(fileUUID, filePath, date, uuid.uuid4.__str__())
Exemplo n.º 7
0
def main(file_uuid=None, file_path='', date='', event_uuid=None, sip_directory='', sip_uuid=None, transfer_uuid=None, use='original', update_use=True):
    if file_uuid == "None":
        file_uuid = None
    if file_uuid:
        logger.error('File already has UUID: %s', file_uuid)
        if update_use:
            File.objects.filter(uuid=file_uuid).update(filegrpuse=use)
        return 0

    # Stop if both or neither of them are used
    if all([sip_uuid, transfer_uuid]) or not any([sip_uuid, transfer_uuid]):
        logger.error('SIP exclusive-or Transfer UUID must be defined')
        return 2

    # Transfer
    if transfer_uuid:
        file_path_relative_to_sip = file_path.replace(sip_directory, '%transferDirectory%', 1)
        transfer = Transfer.objects.get(uuid=transfer_uuid)
        event_type = 'ingestion'
        # For reingest, parse information from the METS
        if transfer.type == 'Archivematica AIP':
            info = get_file_info_from_mets(sip_directory, file_path_relative_to_sip)
            event_type = 'reingestion'
            file_uuid = info.get('uuid', file_uuid)
            use = info.get('filegrpuse', use)
            file_path_relative_to_sip = info.get('original_path', file_path_relative_to_sip)
        if not file_uuid:
            file_uuid = str(uuid.uuid4())
            logger.info('Generated UUID for this file: %s.', file_uuid)
        addFileToTransfer(file_path_relative_to_sip, file_uuid, transfer_uuid, event_uuid, date, use=use, sourceType=event_type)
        # For reingest, the original location was parsed from the METS
        # Update the current location to reflect what's on disk
        if transfer.type == 'Archivematica AIP':
            print('updating current location for', file_uuid, 'with', info)
            File.objects.filter(uuid=file_uuid).update(
                currentlocation=info['current_path']
            )
        return 0

    # Ingest
    if sip_uuid:
        file_uuid = str(uuid.uuid4())
        file_path_relative_to_sip = file_path.replace(sip_directory, "%SIPDirectory%", 1)
        addFileToSIP(file_path_relative_to_sip, file_uuid, sip_uuid, event_uuid, date, use=use)
        return 0
Exemplo n.º 8
0
def _import_file_from_fsentry(cmd, fsentry, transfer_uuid):
    premis_events = fsentry.get_premis_events()

    ingestion_date = None
    for evt in fsentry.get_premis_events():
        if evt.event_type == "ingestion":
            ingestion_date = evt.event_date_time
            break

    file_obj = addFileToTransfer(
        "%transferDirectory%{}".format(fsentry.path),
        fsentry.file_uuid,
        transfer_uuid,
        None,
        ingestion_date,
        sourceType="ingestion",
        use="original",
    )

    for rights_statement in fsentry.get_premis_rights():
        load_rights(file_obj, rights_statement)

    for event in premis_events:
        _load_event(file_obj, event)

    try:
        premis_object = fsentry.get_premis_objects()[0]
    except IndexError:
        return

    # Populate extra attributes in the File object.
    file_obj.checksum = premis_object.message_digest
    file_obj.checksumtype = _convert_checksum_algo(
        premis_object.message_digest_algorithm
    )
    file_obj.size = premis_object.size
    file_obj.save()

    # Populate format details of the File object.
    if premis_object.format_registry_name != "PRONOM":
        return
    try:
        format_version = FormatVersion.active.get(
            pronom_id=premis_object.format_registry_key
        )
    except FormatVersion.DoesNotExist:
        return
    FileFormatVersion.objects.create(file_uuid=file_obj, format_version=format_version)
    FileID.objects.create(
        file=file_obj,
        format_name=format_version.format.description,
        format_version=format_version.version or "",
        format_registry_name=premis_object.format_registry_name,
        format_registry_key=premis_object.format_registry_key,
    )
Exemplo n.º 9
0
def main(
    job,
    file_uuid=None,
    file_path="",
    date="",
    event_uuid=None,
    sip_directory="",
    sip_uuid=None,
    transfer_uuid=None,
    use="original",
    update_use=True,
):
    if file_uuid == "None":
        file_uuid = None
    if file_uuid:
        logger.error("File already has UUID: %s", file_uuid)
        if update_use:
            File.objects.filter(uuid=file_uuid).update(filegrpuse=use)
        return 0

    # Stop if both or neither of them are used
    if all([sip_uuid, transfer_uuid]) or not any([sip_uuid, transfer_uuid]):
        logger.error("SIP exclusive-or Transfer UUID must be defined")
        return 2

    # Transfer
    if transfer_uuid:
        file_path_relative_to_sip = file_path.replace(sip_directory,
                                                      "%transferDirectory%", 1)
        transfer = Transfer.objects.get(uuid=transfer_uuid)
        event_type = "ingestion"
        # For reingest, parse information from the METS
        if transfer.type == "Archivematica AIP":
            info = get_file_info_from_mets(job, sip_directory,
                                           file_path_relative_to_sip)
            event_type = "reingestion"
            file_uuid = info.get("uuid", file_uuid)
            use = info.get("filegrpuse", use)
            file_path_relative_to_sip = info.get("original_path",
                                                 file_path_relative_to_sip)
        if not file_uuid:
            file_uuid = str(uuid.uuid4())
            logger.info("Generated UUID for this file: %s.", file_uuid)
        addFileToTransfer(
            file_path_relative_to_sip,
            file_uuid,
            transfer_uuid,
            event_uuid,
            date,
            use=use,
            sourceType=event_type,
        )
        # For reingest, the original location was parsed from the METS
        # Update the current location to reflect what's on disk
        if transfer.type == "Archivematica AIP":
            job.print_output("updating current location for", file_uuid,
                             "with", info)
            File.objects.filter(uuid=file_uuid).update(
                currentlocation=info["current_path"])
        return 0

    # Ingest
    if sip_uuid:
        file_uuid = str(uuid.uuid4())
        file_path_relative_to_sip = file_path.replace(sip_directory,
                                                      "%SIPDirectory%", 1)
        addFileToSIP(file_path_relative_to_sip,
                     file_uuid,
                     sip_uuid,
                     event_uuid,
                     date,
                     use=use)
        return 0
    parser.add_option("-e",  "--use", action="store", dest="use", default="original")
    parser.add_option("--disable-update-filegrpuse", action="store_false", dest="update_use", default=True)


    (opts, args) = parser.parse_args()
    opts2 = vars(opts)
#    for key, value in opts2.iteritems():
#        print type(key), key, type(value), value
#        exec 'opts.' + key + ' = value.decode("utf-8")'
    fileUUID = opts.fileUUID
    if not fileUUID or fileUUID == "None":
        fileUUID = uuid.uuid4().__str__()
    else:
        print >>sys.stderr, "File already has UUID:", fileUUID
        if opts.update_use:
            File.objects.filter(uuid=fileUUID).update(filegrpuse=opts.use)
        exit(0) 


    if opts.sipUUID == "" and opts.transferUUID != "":
        filePathRelativeToSIP = opts.filePath.replace(opts.sipDirectory,"%transferDirectory%", 1)
        addFileToTransfer(filePathRelativeToSIP, fileUUID, opts.transferUUID, opts.eventIdentifierUUID, opts.date, use=opts.use)

    elif opts.sipUUID != "" and opts.transferUUID == "":
        filePathRelativeToSIP = opts.filePath.replace(opts.sipDirectory,"%SIPDirectory%", 1)
        addFileToSIP(filePathRelativeToSIP, fileUUID, opts.sipUUID, opts.eventIdentifierUUID, opts.date, use=opts.use)

    else:
        print >>sys.stderr, "SIP exclusive-or Transfer uuid must be defined"
        exit(2)
    parser.add_option("-p",  "--filePath",          action="store", dest="filePath", default="")
    parser.add_option("-d",  "--date",              action="store", dest="date", default="")
    parser.add_option("-u",  "--eventIdentifierUUID", action="store", dest="eventIdentifierUUID", default="")
    parser.add_option("-s",  "--sipDirectory", action="store", dest="sipDirectory", default="")
    parser.add_option("-S",  "--sipUUID", action="store", dest="sipUUID", default="")
    parser.add_option("-T",  "--transferUUID", action="store", dest="transferUUID", default="")
    parser.add_option("-e",  "--use", action="store", dest="use", default="original")


    (opts, args) = parser.parse_args()
    opts2 = vars(opts)
#    for key, value in opts2.iteritems():
#        print type(key), key, type(value), value
#        exec 'opts.' + key + ' = value.decode("utf-8")'
    fileUUID = opts.fileUUID
    if not fileUUID or fileUUID == "None":
        fileUUID = uuid.uuid4().__str__()


    if opts.sipUUID == "" and opts.transferUUID != "":
        filePathRelativeToSIP = opts.filePath.replace(opts.sipDirectory,"%transferDirectory%", 1)
        addFileToTransfer(filePathRelativeToSIP, fileUUID, opts.transferUUID, opts.eventIdentifierUUID, opts.date, use=opts.use)

    elif opts.sipUUID != "" and opts.transferUUID == "":
        filePathRelativeToSIP = opts.filePath.replace(opts.sipDirectory,"%SIPDirectory%", 1)
        addFileToSIP(filePathRelativeToSIP, fileUUID, opts.sipUUID, opts.eventIdentifierUUID, opts.date, use=opts.use)

    else:
        print >>sys.stderr, "SIP exclusive-or Transfer uuid must be defined"
        exit(2)