def test_insert_into_files_raises_if_both_sip_and_transfer_provided(self):
     with pytest.raises(Exception) as excinfo:
         databaseFunctions.insertIntoFiles("both",
                                           "both_path",
                                           sipUUID="sip",
                                           transferUUID="transfer")
     assert "both SIP and transfer UUID" in str(excinfo.value)
    def test_insert_into_files_with_sip(self):
        path = "%sharedDirectory%/no_such_file"
        assert File.objects.filter(currentlocation=path).count() == 0

        databaseFunctions.insertIntoFiles(
            "uuid", path, sipUUID="0049fa6c-152f-44a0-93b0-c5e856a02292")
        assert File.objects.filter(currentlocation=path).count() == 1
Пример #3
0
def addFileToTransfer(
    filePathRelativeToSIP,
    fileUUID,
    transferUUID,
    taskUUID,
    date,
    sourceType="ingestion",
    eventDetail="",
    use="original",
    originalLocation=None,
):
    if not originalLocation:
        originalLocation = filePathRelativeToSIP
    file_obj = insertIntoFiles(
        fileUUID,
        filePathRelativeToSIP,
        date,
        transferUUID=transferUUID,
        use=use,
        originalLocation=originalLocation,
    )
    insertIntoEvents(
        fileUUID=fileUUID,
        eventType=sourceType,
        eventDateTime=date,
        eventDetail=eventDetail,
        eventOutcome="",
        eventOutcomeDetailNote="",
    )
    addAccessionEvent(fileUUID, transferUUID, date)
    return file_obj
Пример #4
0
def addFileToSIP(
    filePathRelativeToSIP,
    fileUUID,
    sipUUID,
    taskUUID,
    date,
    sourceType="ingestion",
    use="original",
):
    insertIntoFiles(fileUUID,
                    filePathRelativeToSIP,
                    date,
                    sipUUID=sipUUID,
                    use=use)
    insertIntoEvents(
        fileUUID=fileUUID,
        eventType=sourceType,
        eventDateTime=date,
        eventDetail="",
        eventOutcome="",
        eventOutcomeDetailNote="",
    )
    def test_insert_into_files_with_original_path(self):
        # A filepath set during the extract contents microservice. Note the
        # filename contains underscorres from normalization.
        file_path = ("%transferDirectory%objects/another_parent_directory/"
                     "compressed_directory.zip")

        # What that path might look like when set correctly in the original
        # location field.
        original_location = ("%transferDirectory%objects/another parent "
                             "directory/compressed directory.zip")

        # If originalLocation is set, then test that it is set with the right
        # value. Check also that we haven't set the current location field
        # inaccurately.
        databaseFunctions.insertIntoFiles(
            fileUUID="e0a1fdc4-605a-4104-bf59-039859ee8238",
            filePath=file_path,
            sipUUID="0049fa6c-152f-44a0-93b0-c5e856a02292",
            originalLocation=original_location,
        )
        assert File.objects.filter(
            originallocation=original_location).count() == 1
        assert (File.objects.get(
            originallocation=original_location).currentlocation !=
                original_location)

        # If originalLocation is not set (here we use None to be explicit),
        # then default to the filePath.
        databaseFunctions.insertIntoFiles(
            fileUUID="554661f1-b331-452c-a583-0c582ebcb298",
            filePath=file_path,
            sipUUID="01cf9fb8-bc01-40b4-b830-feb66e912f40",
            originalLocation=None,
        )
        assert (File.objects.filter(uuid="554661f1-b331-452c-a583-0c582ebcb298"
                                    )[0].originallocation == file_path)
Пример #6
0
def compress_aip(job, compression, compression_level, sip_directory, sip_name,
                 sip_uuid):
    """Compresses AIP according to compression algorithm and level.
    compression = AIP compression algorithm, format: <program>-<algorithm>, eg. 7z-lzma, pbzip2-
    compression_level = AIP compression level, integer between 1 and 9 inclusive
    sip_directory = Absolute path to the directory where the SIP is
    sip_name = User-provided name of the SIP
    sip_uuid = SIP UUID

    Example inputs:
    compressAIP.py
        7z-lzma
        5
        %sharedDirectory%/watchedDirectories/workFlowDecisions/compressionAIPDecisions/ep-d87d5845-bd07-4200-b1a4-928e0cb6e1e4/
        ep
        d87d5845-bd07-4200-b1a4-928e0cb6e1e4
    """
    if compression_level == "0":
        compression_level = "1"

    # Default is uncompressed.
    compression = int(compression)
    ProcessingConfig.AIPCompressionAlgorithm.Name(compression)
    if compression == ProcessingConfig.AIP_COMPRESSION_ALGORITHM_UNSPECIFIED:
        compression = ProcessingConfig.AIP_COMPRESSION_ALGORITHM_UNCOMPRESSED

    # Translation to make compress_aip happy.
    mapping = {
        ProcessingConfig.AIP_COMPRESSION_ALGORITHM_UNCOMPRESSED: ("None", ""),
        ProcessingConfig.AIP_COMPRESSION_ALGORITHM_TAR: (
            "gzip",
            "tar.gzip",
        ),  # A3M-TODO: support
        ProcessingConfig.AIP_COMPRESSION_ALGORITHM_TAR_BZIP2:
        ("pbzip2", "pbzip2"),
        ProcessingConfig.AIP_COMPRESSION_ALGORITHM_TAR_GZIP:
        ("gzip", "tar.gzip"),
        ProcessingConfig.AIP_COMPRESSION_ALGORITHM_S7_COPY: ("7z", "copy"),
        ProcessingConfig.AIP_COMPRESSION_ALGORITHM_S7_BZIP2: ("7z", "bzip2"),
        ProcessingConfig.AIP_COMPRESSION_ALGORITHM_S7_LZMA: ("7z", "lzma"),
    }

    try:
        program, compression_algorithm = mapping[compression]
    except KeyError:
        msg = f"Invalid program-compression algorithm: {compression}"
        job.pyprint(msg, file=sys.stderr)
        return 255

    archive_path = f"{sip_name}-{sip_uuid}"
    uncompressed_location = sip_directory + archive_path

    # Even though no actual compression is taking place,
    # the location still needs to be set in the unit to ensure that the
    # %AIPFilename% variable is set appropriately.
    # Setting it to an empty string ensures the common
    # "%SIPDirectory%%AIPFilename%" pattern still points at the right thing.
    if program == "None":
        update_unit(sip_uuid, uncompressed_location)
        return 0

    job.pyprint("Compressing {} with {}, algorithm {}, level {}".format(
        uncompressed_location, program, compression_algorithm,
        compression_level))

    if program == "7z":
        compressed_location = uncompressed_location + ".7z"
        command = '/usr/bin/7z a -bd -t7z -y -m0={algorithm} -mx={level} -mta=on -mtc=on -mtm=on -mmt=on "{compressed_location}" "{uncompressed_location}"'.format(
            algorithm=compression_algorithm,
            level=compression_level,
            uncompressed_location=uncompressed_location,
            compressed_location=compressed_location,
        )
        tool_info_command = (
            r'echo program="7z"\; '
            r'algorithm="{}"\; '
            'version="`7z | grep Version`"'.format(compression_algorithm))
    elif program == "pbzip2":
        compressed_location = uncompressed_location + ".tar.bz2"
        command = '/bin/tar -c --directory "{sip_directory}" "{archive_path}" | /usr/bin/pbzip2 --compress -{level} > "{compressed_location}"'.format(
            sip_directory=sip_directory,
            archive_path=archive_path,
            level=compression_level,
            compressed_location=compressed_location,
        )
        tool_info_command = (
            r'echo program="pbzip2"\; '
            r'algorithm="{}"\; '
            'version="$((pbzip2 -V) 2>&1)"'.format(compression_algorithm))
    elif program == "gzip":
        compressed_location = uncompressed_location + ".tar.gz"
        command = '/bin/tar -c --directory "{sip_directory}" "{archive_path}" | /bin/gzip -{level} > "{compressed_location}"'.format(
            sip_directory=sip_directory,
            archive_path=archive_path,
            level=compression_level,
            compressed_location=compressed_location,
        )
        tool_info_command = (
            r'echo program="gzip"\; '
            r'algorithm="{}"\; '
            'version="$((gzip -V) 2>&1)"'.format(compression_algorithm))
    else:
        msg = f"Program {program} not recognized, exiting script prematurely."
        job.pyprint(msg, file=sys.stderr)
        return 255

    job.pyprint("Executing command:", command)
    exit_code, std_out, std_err = executeOrRun("bashScript",
                                               command,
                                               capture_output=True)
    job.write_output(std_out)
    job.write_error(std_err)

    # Add new AIP File
    file_uuid = sip_uuid
    databaseFunctions.insertIntoFiles(
        fileUUID=file_uuid,
        filePath=compressed_location.replace(sip_directory, "%SIPDirectory%",
                                             1),
        sipUUID=sip_uuid,
        use="aip",
    )

    # Add compression event
    job.pyprint("Tool info command:", tool_info_command)
    _, tool_info, tool_info_err = executeOrRun("bashScript",
                                               tool_info_command,
                                               capture_output=True)
    job.write_output(tool_info)
    job.write_error(tool_info_err)
    tool_output = f'Standard Output="{std_out}"; Standard Error="{std_err}"'
    databaseFunctions.insertIntoEvents(
        eventType="compression",
        eventDetail=tool_info,
        eventOutcomeDetailNote=tool_output,
        fileUUID=file_uuid,
    )

    update_unit(sip_uuid, compressed_location)

    return exit_code
 def test_insert_into_files_raises_if_no_sip_or_transfer_provided(self):
     with pytest.raises(Exception) as excinfo:
         databaseFunctions.insertIntoFiles("no_sip", "no_sip_path")
     assert "neither defined" in str(excinfo.value)