def handle_package(self, package):

        am_client = AMClient(ss_api_key=settings.ARCHIVEMATICA['api_key'],
                             ss_user_name=settings.ARCHIVEMATICA['username'],
                             ss_url=settings.ARCHIVEMATICA['baseurl'],
                             directory=self.tmp_dir)

        am_client.package_uuid = package.archivematica_identifier
        package_data = am_client.get_package_details()
        if isinstance(package_data, int):
            raise Exception(errors.error_lookup(package_data))

        if self.is_downloadable(package_data):
            try:
                download_path = am_client.download_package(
                    am_client.package_uuid)
                tmp_path = join(
                    self.tmp_dir,
                    f"{am_client.package_uuid}{self.get_extension(package_data)}"
                )
                move(download_path, tmp_path)
            except Exception as e:
                raise RoutineError(f"Error downloading data: {e}")
            package.type = package_data['package_type'].lower()
            package.data = package_data
        else:
            raise RoutineError(
                f"Package {package.archivematica_identifier} is not downloadable"
            )
示例#2
0
def approve_transfer(dirname, url, am_api_key, am_user):
    """
    Approve transfer with dirname.

    :returns: UUID of the approved transfer or None.
    """
    LOGGER.info("Approving %s", dirname)
    time.sleep(6)
    am = AMClient(am_url=url, am_user_name=am_user, am_api_key=am_api_key)
    try:
        # Find the waiting transfers available to be approved via the am client
        # interface.
        waiting_transfers = am.unapproved_transfers()["results"]
    except (KeyError, TypeError):
        LOGGER.error(
            "Request to unapproved transfers did not return the "
            "expected response, see the request log"
        )
        return None
    if not waiting_transfers:
        LOGGER.warning("There are no waiting transfers.")
        return None
    res = list(
        filter(
            lambda waiting: fsencode(waiting["directory"]) == fsencode(dirname),
            waiting_transfers,
        )
    )
    if not res:
        LOGGER.warning(
            "Requested directory %s not found in the waiting " "transfers list", dirname
        )
        return None
    LOGGER.info("Found waiting transfer: %s", res[0]["directory"])
    # We can reuse the existing AM Client but we didn't know all the kwargs
    # at the outset so we need to set its attributes here.
    am.transfer_type = res[0]["type"]
    am.transfer_directory = dirname
    # Approve the transfer and return the UUID of the transfer approved.
    approved = am.approve_transfer()
    if isinstance(approved, int):
        if errors.error_lookup(approved) is not None:
            LOGGER.error("Error approving transfer: %s", errors.error_lookup(approved))
            return None
    # Get will return None, or the UUID.
    return approved.get("uuid")
def setup_amclient():
    """Initialize and return an AMClient instance."""
    client = AMClient(
        ss_api_key=get_setting("storage_service_apikey", ""),
        ss_user_name=get_setting("storage_service_user", ""),
        ss_url=get_setting("storage_service_url", "").rstrip("/"),
    )
    return client
示例#4
0
 def get_client(self, origin):
     """Instantiates an Archivematica client based on SIP origin"""
     am_settings = settings.ARCHIVEMATICA_ORIGINS[origin]
     return AMClient(
         am_api_key=am_settings['api_key'],
         am_user_name=am_settings['username'],
         am_url=am_settings['baseurl'],
         transfer_source=am_settings['location_uuid'],
         processing_config=am_settings['processing_config']
     )
示例#5
0
def get_am_client(config):
    """Return an AM Client Object to work with throughout the rest
    of the script.
    """
    connection = config["connection"]
    amclient = AMClient(
        ss_url=connection["ss_url"],
        ss_user_name=connection["ss_user_name"],
        ss_api_key=connection["ss_api_key"],
        am_url=connection["am_url"],
        am_user_name=connection["am_user_name"],
        am_api_key=connection["am_api_key"],
        output_mode=connection["output_mode"],
    )
    return setup_amclient(amclient)
示例#6
0
def approve_transfer(dirname, url, am_api_key, am_user):
    """
    Approve transfer with dirname.

    :returns: UUID of the approved transfer or None.
    """
    LOGGER.info("Approving %s", dirname)
    time.sleep(6)
    am = AMClient(am_url=url, am_user_name=am_user, am_api_key=am_api_key)
    try:
        # Find the waiting transfers available to be approved via the am client
        # interface.
        waiting_transfers = am.unapproved_transfers()["results"]
    except (KeyError, TypeError):
        LOGGER.error(
            "Request to unapproved transfers did not return the "
            "expected response, see the request log"
        )
        return None
    if not waiting_transfers:
        LOGGER.warning("There are no waiting transfers.")
        return None
    res = list(
        filter(
            lambda waiting: fsencode(waiting["directory"]) == fsencode(dirname),
            waiting_transfers,
        )
    )
    if not res:
        LOGGER.warning(
            "Requested directory %s not found in the waiting " "transfers list", dirname
        )
        return None
    LOGGER.info("Found waiting transfer: %s", res[0]["directory"])
    # We can reuse the existing AM Client but we didn't know all the kwargs
    # at the outset so we need to set its attributes here.
    am.transfer_type = res[0]["type"]
    am.transfer_directory = dirname
    # Approve the transfer and return the UUID of the transfer approved.
    approved = am.approve_transfer()
    if isinstance(approved, int):
        if errors.error_lookup(approved) is not None:
            LOGGER.error("Error approving transfer: %s", errors.error_lookup(approved))
            return None
    # Get will return None, or the UUID.
    return approved.get("uuid")
示例#7
0
 def get_mets_via_api(self, transfer_name, sip_uuid=None, parse_xml=True):
     """Return METS once stored in an AIP."""
     if not sip_uuid:
         sip_uuid = self.get_sip_uuid(transfer_name)
     absolute_transfer_name = "{}-{}".format(transfer_name, sip_uuid)
     mets_name = "METS.{}.xml".format(sip_uuid)
     mets_path = "{}/data/{}".format(absolute_transfer_name, mets_name)
     mets_tmp_dir = tempfile.mkdtemp()
     mets_tmp_file = os.path.join(mets_tmp_dir, mets_name)
     AMClient(
         ss_api_key=self._ss_api_key,
         ss_user_name=self.ss_username,
         ss_url=self.ss_url.rstrip("/"),
         package_uuid=sip_uuid,
         relative_path=mets_path,
         saveas_filename=mets_tmp_file,
     ).extract_file()
     mets = ""
     with open(mets_tmp_file, "r") as mets_file:
         mets = mets_file.read()
     os.unlink(mets_tmp_file)
     if parse_xml:
         return etree.fromstring(mets.encode("utf8"))
     return mets
示例#8
0
def stream_mets_from_storage_service(
    transfer_name, sip_uuid, error_message="Unexpected error: {}"
):
    """Enable the streaming of an individual AIP METS file from the Storage
    Service.
    """
    absolute_transfer_name = "{}-{}".format(transfer_name, sip_uuid)
    mets_name = "METS.{}.xml".format(sip_uuid)
    mets_path = "{}/data/{}".format(absolute_transfer_name, mets_name)
    # We can't get a lot of debug information from AMClient yet, so we try to
    # download and then open, returning an error if the file can't be accessed.
    try:
        response = AMClient(
            ss_api_key=get_setting("storage_service_apikey", None),
            ss_user_name=get_setting("storage_service_user", "test"),
            ss_url=get_setting("storage_service_url", None).rstrip("/"),
            package_uuid=sip_uuid,
            relative_path=mets_path,
            stream=True,
        ).extract_file()
    except requests.exceptions.ConnectionError as err:
        err_response = {"success": False, "message": error_message.format(err)}
        return json_response(err_response, status_code=503)
    if response.status_code != 200:
        err_response = {
            "success": False,
            "message": error_message.format(response.content),
        }
        return json_response(err_response, status_code=response.status_code)
    content_type = "application/xml"
    content_disposition = "attachment; filename={};".format(mets_name)
    return _prepare_stream_response(
        payload=response,
        content_type=content_type,
        content_disposition=content_disposition,
    )
示例#9
0
def get_status(
    am_url,
    am_user,
    am_api_key,
    ss_url,
    ss_user,
    ss_api_key,
    unit_uuid,
    unit_type,
    hide_on_complete=False,
    delete_on_complete=False,
):
    """
    Get status of the SIP or Transfer with unit_uuid.

    :param str unit_uuid: UUID of the unit to query for.
    :param str unit_type: 'ingest' or 'transfer'
    :param bool hide_on_complete: Hide the unit in the dashboard if COMPLETE
    :returns: Dict with status of the unit from Archivematica or None.
    """
    # Get status
    url = "{}/api/{}/status/{}/".format(am_url, unit_type, unit_uuid)
    params = {"username": am_user, "api_key": am_api_key}
    unit_info = utils._call_url_json(url, params)
    if isinstance(unit_info, int):
        if errors.error_lookup(unit_info) is not None:
            return errors.error_lookup(unit_info)
    # If complete, hide in dashboard
    if hide_on_complete and unit_info and unit_info.get(
            "status") == "COMPLETE":
        LOGGER.info("Hiding %s %s in dashboard", unit_type, unit_uuid)
        url = "{}/api/{}/{}/delete/".format(am_url, unit_type, unit_uuid)
        LOGGER.debug("Method: DELETE; URL: %s; params: %s;", url, params)
        response = requests.delete(url, params=params)
        LOGGER.debug("Response: %s", response)
    # If Transfer is complete, get the SIP's status
    if (unit_info and unit_type == "transfer"
            and unit_info.get("status") == "COMPLETE"
            and unit_info.get("sip_uuid") != "BACKLOG"):
        LOGGER.info(
            "%s is a complete transfer, fetching SIP %s status.",
            unit_uuid,
            unit_info.get("sip_uuid"),
        )
        # Update DB to refer to this one
        unit = models.retrieve_unit_by_type_and_uuid(uuid=unit_uuid,
                                                     unit_type=unit_type)
        models.update_unit_type_and_uuid(unit=unit,
                                         unit_type="ingest",
                                         uuid=unit_info.get("sip_uuid"))
        # Get SIP status
        url = "{}/api/ingest/status/{}/".format(am_url,
                                                unit_info.get("sip_uuid"))
        unit_info = utils._call_url_json(url, params)
        if isinstance(unit_info, int):
            if errors.error_lookup(unit_info) is not None:
                return errors.error_lookup(unit_info)
        # If complete, hide in dashboard
        if hide_on_complete and unit_info and unit_info.get(
                "status") == "COMPLETE":
            LOGGER.info("Hiding SIP %s in dashboard", unit.uuid)
            url = "{}/api/ingest/{}/delete/".format(am_url, unit.uuid)
            LOGGER.debug("Method: DELETE; URL: %s; params: %s;", url, params)
            response = requests.delete(url, params=params)
            LOGGER.debug("Response: %s", response)
        # If complete and SIP status is 'UPLOADED', delete transfer source
        # files
        if delete_on_complete and unit_info and unit_info.get(
                "status") == "COMPLETE":
            am = AMClient(
                ss_url=ss_url,
                ss_user_name=ss_user,
                ss_api_key=ss_api_key,
                package_uuid=unit.uuid,
            )
            response = am.get_package_details()
            if response.get("status") == "UPLOADED":
                LOGGER.info(
                    "Deleting source files for SIP %s from watched "
                    "directory",
                    unit.uuid,
                )
                try:
                    shutil.rmtree(unit.path)
                    LOGGER.info("Source files deleted for SIP %s "
                                "deleted", unit.uuid)
                except OSError as e:
                    LOGGER.warning(
                        "Error deleting source files: %s. If "
                        "running this module remotely the "
                        "script might not have access to the "
                        "transfer source",
                        e,
                    )
    return unit_info
示例#10
0
def check_am_status(self, message, step_id, archive_id, transfer_name=None):

    step = Step.objects.get(pk=step_id)
    task_name = f"Archivematica status for step: {step_id}"

    # Get the current configuration
    am = AMClient()
    am.am_url = AM_URL
    am.am_user_name = AM_USERNAME
    am.am_api_key = AM_API_KEY
    am.transfer_source = AM_TRANSFER_SOURCE
    am.ss_url = AM_SS_URL
    am.ss_user_name = AM_SS_USERNAME
    am.ss_api_key = AM_SS_API_KEY

    try:
        periodic_task = PeriodicTask.objects.get(name=task_name)
        am_status = {
            'status': "PROCESSING",
            'microservice': "Waiting for upload"
        }

        try:
            am_status = am.get_unit_status(message["id"])
        except TypeError as e:
            if message == 1:
                """
                In case archivematica is not connected (Error 500, Error 502 etc), 
                archivematica returns as a result the number 1. By filtering the result in that way,
                we know if am.get_unit_status was executed successfully
                """
                step.set_output_data({"status": 1, "message": e})
                step.set_status(Status.FAILED)
                periodic_task = PeriodicTask.objects.get(name=task_name)
                periodic_task.delete()

            if message == 3:
                """
                In case there is an error in the request (Error 400, Error 404 etc), 
                archivematica returns as a result the number 3. By filtering the result in that way,
                we know if am.get_unit_status was executed successfully
                """
                step.set_output_data({"status": 1, "message": e})
                step.set_status(Status.FAILED)
                periodic_task = PeriodicTask.objects.get(name=task_name)
                periodic_task.delete()

            if step.status == Status.NOT_RUN:
                # As long as the package is in queue to upload get_unit_status returns nothing so a mock response is passed
                am_status = {
                    'status': "PROCESSING",
                    'microservice': "Waiting for upload",
                    'path': '',
                    'directory': '',
                    'name': 'Pending...',
                    'uuid': 'Pending...',
                    'message': 'Waiting for upload to Archivematica'
                }

            logger.warning("Error while checking archivematica status: ", e)

        status = am_status["status"]
        microservice = am_status["microservice"]

        logger.info(f"Status for {step_id} is: {status}")

        # Needs to validate both because just status=complete does not guarantee that aip is stored
        if status == "COMPLETE" and microservice == "Remove the processing directory":
            """
            Archivematica does not return the uuid of a package AIP so in order to find the AIP details we need to look to all the AIPs and find
            the one with the same name. This way we can get the uuid and the path which are needed to access the AIP file 
            """
            # Changes the :: to __ because archivematica by default does this transformation and this is needed so we can read the correct file
            transfer_name_with_underscores = transfer_name.replace("::", "__")

            aip_path = None
            aip_uuid = None

            aip_list = am.aips(
            )  # Retrieves all the AIPs (needs AM_SS_* configuration)
            path_artifact = None
            for aip in aip_list:
                # Looks for aips with the same transfer name
                if transfer_name_with_underscores in aip["current_path"]:
                    aip_path = aip["current_path"]
                    aip_uuid = aip["uuid"]

                    am_status["aip_uuid"] = aip_uuid
                    am_status["aip_path"] = aip_path

                    path_artifact = create_path_artifact(
                        "AIP", os.path.join(AIP_UPSTREAM_BASEPATH, aip_path))

            # If the path artifact is found return complete otherwise set in progress and try again
            if path_artifact:
                am_status["artifact"] = path_artifact

                finalize(
                    self=self,
                    status=states.SUCCESS,
                    retval={"status": 0},
                    task_id=None,
                    args=[archive_id, step_id],
                    kwargs=None,
                    einfo=None,
                )

                step.set_finish_date()
                step.set_status(Status.COMPLETED)

                periodic_task = PeriodicTask.objects.get(name=task_name)
                periodic_task.delete()
            else:
                step.set_status(Status.IN_PROGRESS)

        elif status == "FAILED" and microservice == "Move to the failed directory":
            step.set_status(Status.FAILED)

            periodic_task = PeriodicTask.objects.get(name=task_name)
            periodic_task.delete()

        elif status == "PROCESSING":
            step.set_status(Status.IN_PROGRESS)

        step.set_output_data(am_status)

    except Exception as e:
        logger.warning(
            f"Error while archiving {step.id}. Archivematica pipeline is full or settings configuration is wrong."
        )
        logger.warning(e)
        periodic_task = PeriodicTask.objects.get(name=task_name)
        periodic_task.delete()
        step.set_status(Status.FAILED)
示例#11
0
def archivematica(self, archive_id, step_id, input_data):
    """
    Gets the current step_id and the path to the sip folder and calls sends the sip to archivematica
    """
    archive = Archive.objects.get(pk=archive_id)
    path_to_sip = archive.path_to_sip

    logger.info(f"Starting archiving {path_to_sip}")

    current_step = Step.objects.get(pk=step_id)
    current_step.set_status(Status.IN_PROGRESS)

    archive_id = current_step.archive

    # Set task id
    current_step.set_task(self.request.id)

    # This is the absolute directory of the archivematica-sampledata folder in the system
    a3m_abs_directory = AM_ABS_DIRECTORY
    # This is the directory Archivematica "sees" on the local system
    a3m_rel_directory = AM_REL_DIRECTORY

    # Get the destination folder of the system
    system_dst = os.path.join(
        a3m_abs_directory,
        ntpath.basename(path_to_sip),
    )

    # Get the destination folder of archivematica
    archivematica_dst = os.path.join(
        a3m_rel_directory,
        ntpath.basename(path_to_sip),
    )

    # Adds an _ between Archive and the id because archivematica messes up with spaces
    transfer_name = ntpath.basename(path_to_sip) + "::Archive_" + str(
        archive_id.id)

    # Get configuration from archivematica from settings
    am = AMClient()
    am.am_url = AM_URL
    am.am_user_name = AM_USERNAME
    am.am_api_key = AM_API_KEY
    am.transfer_source = AM_TRANSFER_SOURCE
    am.transfer_directory = archivematica_dst
    am.transfer_name = transfer_name
    am.processing_config = "automated"

    # Create archivematica package
    logging.info(
        f"Creating archivematica package on Archivematica instance: {AM_URL} at directory {archivematica_dst} for user {AM_USERNAME}"
    )

    try:
        # After 2 seconds check if the folder has been transfered to archivematica
        package = am.create_package()
        if (package == 3):
            """
            In case there is an error in the request (Error 400, Error 404 etc), 
            archivematica returns as a result the number 3. By filtering the result in that way,
            we know if am.create_package was executed successfully
            """
            logger.error(
                f"Error while archiving {current_step.id}. Check your archivematica settings configuration."
            )
            current_step.set_status(Status.FAILED)
            return {
                "status": 1,
                "message": "Wrong Archivematica configuration"
            }

        step = Step.objects.get(pk=step_id)
        step.set_status(Status.WAITING)

        # Create the scheduler (sets every 10 seconds)
        schedule = IntervalSchedule.objects.create(
            every=5, period=IntervalSchedule.SECONDS)
        # Create a periodic task that checks the status of archivematica avery 10 seconds.
        PeriodicTask.objects.create(
            interval=schedule,
            name=f"Archivematica status for step: {current_step.id}",
            task="check_am_status",
            args=json.dumps(
                [package, current_step.id, archive_id.id, transfer_name]),
            expires=timezone.now() + timedelta(minutes=600),
        )

    except Exception as e:
        logger.error(
            f"Error while archiving {current_step.id}. Check your archivematica settings configuration."
        )
        current_step.set_status(Status.FAILED)
        current_step.set_output_data({"status": 1, "message": e})
        return {"status": 1, "message": e}

    return {"status": 0, "message": "Uploaded to Archivematica"}
示例#12
0
def get_status(
    am_url,
    am_user,
    am_api_key,
    ss_url,
    ss_user,
    ss_api_key,
    unit_uuid,
    unit_type,
    hide_on_complete=False,
    delete_on_complete=False,
):
    """
    Get status of the SIP or Transfer with unit_uuid.

    :param str unit_uuid: UUID of the unit to query for.
    :param str unit_type: 'ingest' or 'transfer'
    :param bool hide_on_complete: Hide the unit in the dashboard if COMPLETE
    :returns: Dict with status of the unit from Archivematica or None.
    """
    # Get status
    url = "{}/api/{}/status/{}/".format(am_url, unit_type, unit_uuid)
    params = {"username": am_user, "api_key": am_api_key}
    unit_info = utils._call_url_json(url, params)
    if isinstance(unit_info, int):
        if errors.error_lookup(unit_info) is not None:
            return errors.error_lookup(unit_info)
    # If complete, hide in dashboard
    if hide_on_complete and unit_info and unit_info.get("status") == "COMPLETE":
        LOGGER.info("Hiding %s %s in dashboard", unit_type, unit_uuid)
        url = "{}/api/{}/{}/delete/".format(am_url, unit_type, unit_uuid)
        LOGGER.debug("Method: DELETE; URL: %s; params: %s;", url, params)
        response = requests.delete(url, params=params)
        LOGGER.debug("Response: %s", response)
    # If Transfer is complete, get the SIP's status
    if (
        unit_info
        and unit_type == "transfer"
        and unit_info.get("status") == "COMPLETE"
        and unit_info.get("sip_uuid") != "BACKLOG"
    ):
        LOGGER.info(
            "%s is a complete transfer, fetching SIP %s status.",
            unit_uuid,
            unit_info.get("sip_uuid"),
        )
        # Update DB to refer to this one
        unit = models.retrieve_unit_by_type_and_uuid(
            uuid=unit_uuid, unit_type=unit_type
        )
        models.update_unit_type_and_uuid(
            unit=unit, unit_type="ingest", uuid=unit_info.get("sip_uuid")
        )
        # Get SIP status
        url = "{}/api/ingest/status/{}/".format(am_url, unit_info.get("sip_uuid"))
        unit_info = utils._call_url_json(url, params)
        if isinstance(unit_info, int):
            if errors.error_lookup(unit_info) is not None:
                return errors.error_lookup(unit_info)
        # If complete, hide in dashboard
        if hide_on_complete and unit_info and unit_info.get("status") == "COMPLETE":
            LOGGER.info("Hiding SIP %s in dashboard", unit.uuid)
            url = "{}/api/ingest/{}/delete/".format(am_url, unit.uuid)
            LOGGER.debug("Method: DELETE; URL: %s; params: %s;", url, params)
            response = requests.delete(url, params=params)
            LOGGER.debug("Response: %s", response)
        # If complete and SIP status is 'UPLOADED', delete transfer source
        # files
        if delete_on_complete and unit_info and unit_info.get("status") == "COMPLETE":
            am = AMClient(
                ss_url=ss_url,
                ss_user_name=ss_user,
                ss_api_key=ss_api_key,
                package_uuid=unit.uuid,
            )
            response = am.get_package_details()
            if response.get("status") == "UPLOADED":
                LOGGER.info(
                    "Deleting source files for SIP %s from watched " "directory",
                    unit.uuid,
                )
                try:
                    shutil.rmtree(unit.path)
                    LOGGER.info("Source files deleted for SIP %s " "deleted", unit.uuid)
                except OSError as e:
                    LOGGER.warning(
                        "Error deleting source files: %s. If "
                        "running this module remotely the "
                        "script might not have access to the "
                        "transfer source",
                        e,
                    )
    return unit_info