Пример #1
0
def get_documents_url():

    try:
        args = dict(flask.request.args)
        user_dict = util.get_api_user(args)
        value = document_getter.get_documents(args, user_dict, mongo_collection)
    except Exception as e:
        msg = f"ERROR: from /get_documents: {e}"
        util.log_email(msg, error=True)
        return msg

    return value
Пример #2
0
def archive_failed_url():

    try:
        args = dict(flask.request.args)
        user_dict = util.get_api_user(args)
        if not user_dict.get("admin"):
            raise Exception("/archive_failed is only available to admins.")
        value = status_updater.archive_failed(args, user_dict, mongo_collection)
    except Exception as e:
        msg = f"ERROR: from /archive_failed: {e}"
        util.log_email(msg, error=True)
        return msg

    return value
Пример #3
0
def get_last_document_url():

    try:
        args = dict(flask.request.args)
        user_dict = util.get_api_user(args)
        if not user_dict.get("admin"):
            raise Exception("/get_last_document is only available to admins.")
        value = document_getter.get_last_document(args, user_dict, mongo_collection)
    except Exception as e:
        msg = f"ERROR: from /get_last_document: {e}"
        util.log_email(msg, error=True)
        return msg

    return value
def mongo_ingest(metadata, collection):
    """
    dict must be entire body of post request where one of the top-level
    keys is "metadata" which has for its value a dict containing the
    metadata to ingest
    """
    doc = collection.find_one({"archivedPath": metadata["archivedPath"]})
    try:
        if not doc:
            metadata.update(
                {
                    "when_ready_for_pbs": None,
                    "when_submitted_to_pbs": None,
                    "when_archival_queued": None,
                    "when_archival_started": None,
                    "when_archival_completed": None,
                    "failed_multiple": False,
                    "archival_status": "processing_metadata",
                }
            )
            metadata = scrub_dict_keys(metadata)
            inserted_id = collection.insert_one(metadata).inserted_id
            log_email(f"Metadata inserted with id: {inserted_id}")
            metadata["_id"] = str(inserted_id)
            return metadata

        elif ("failed" in doc["archival_status"]) and (
            doc["failed_multiple"] is not True
        ):  # failed 1 time previously, allow this 1 retry
            mongo_set(
                "archivedPath",
                metadata["archivedPath"],
                {"archival_status": "ready_for_pbs", "failed_multiple": True},
                collection,
            )

            doc = collection.find_one({"archivedPath": metadata["archivedPath"]})
            return doc

        elif "dry_run" in doc["archival_status"]:  # do nothing, return doc
            return doc

        else:  # already archived and not a dry_run
            msg = f"{metadata['archivedPath']} already in Mongo."
            log_email(msg)
            raise Exception(msg)

    except Exception as e:
        raise Exception(f"Metadata insertion failed with error: {e}")
def validate_source_path(*, action: str, path: str, parent: Optional[str] = None):
    assert path, f"{action} source path ({path}) must not be empty"
    path = Path(path)
    assert path.is_absolute(), f"{action} source path ({path}) must be an absolute path"
    assert path.is_dir(), f"{action} source path ({path}) is not a directory"
    if parent:
        parent = Path(parent)
        assert (
            parent.exists() and parent.is_absolute()
        ), f"Parent directory {parent} isn't absolute"
        assert path.relative_to(parent), f"{path} does not begin with '{parent}'"
    if action == "archive":
        log_email(f"{path} will be archived")
    else:
        log_email(f"{path} will be retrieved from the archive")
    return str(path)
def get_mongo_client():
    try:
        user = urllib.parse.quote_plus(
            config.mongo.get("user")
        )  # percent-escape string
        passwd = urllib.parse.quote_plus(
            config.mongo.get("passwd")
        )  # percent-escape string
        host = config.mongo.get("host")
        port = config.mongo.get("port")
        uri = f"mongodb://{user}:{passwd}@{host}:{port}"
        client_obj = pymongo.MongoClient(uri, authSource=config.mongo.get("authdb"))
        client_obj.admin.command(
            "ismaster"
        )  # tests for client != None and good connection
    except Exception as e:
        log_email(f"ERROR: could not connect to '{host}:{port}': {e}")
        return None

    return client_obj
def get_mongo_collection(
    client_obj=None,
    database_name=config.mongo.get("db"),
    collection_name=config.mongo.get("collection"),
):
    if not isinstance(client_obj, pymongo.mongo_client.MongoClient):
        client_obj = get_mongo_client()
        if not client_obj:
            log_email(f"ERROR: get_mongo_client() failed.")
            return None
    try:
        db_obj = client_obj[database_name]
        collection_obj = db_obj[collection_name]
    except Exception as e:
        log_email(
            f"ERROR: could not connect to collection "
            + f"'{database_name}.{collection_name}': {e}"
        )
        return None

    return collection_obj
def submit_to_pbs(
    source: str, dest: str, action: str, group: str = None, obj_id: str = None
):
    # TODO: copy shell scripts to this repo and adjust curl for correct
    # name of endpoints
    script = (
        config.PBS_ARCHIVE_SCRIPT if action == "archive" else config.PBS_RETRIEVE_SCRIPT
    )

    cmd = f'/usr/local/bin/qsub -v IN="{source}",OUT="{dest}",GROUP="{group}",ID="{obj_id}" "{script}"'
    log_email(f"Submitting job: {cmd}")

    proc = subprocess.Popen(
        cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
    )
    (o, e) = proc.communicate()

    try:
        if proc.returncode == 0:
            job_id = o.decode().replace("\n", "")
        else:
            raise ValueError("error submitting job: " + e.decode())
    except Exception as e:
        msg = f"pbs error: {e}"
        log_email(msg)
        return None

    log_email(f"Submitted to PBS: {job_id}\nstderr:{e.decode()}\nstdout:{o.decode()}")
    return job_id
Пример #9
0
def get_permitted_records_list(user_dict, cursor):

    records = []

    if user_dict.get("admin"):
        for record in cursor:
            records.append(record)
        return records

    user_groups_list = user_dict.get("groups_list")
    if not isinstance(user_groups_list, list):
        raise Exception(
            util.gen_msg(
                f"user_dict['groups_list'] '{user_groups_list}' is not a list; user_dict: '{user_dict}'"
            )
        )

    for record in cursor:
        if "system_groups" not in record:
            util.log_email(
                util.gen_msg(f"record has no system_groups key; record: '{record}'")
            )
            continue
        allowed_groups_list = record.get("system_groups")
        if not isinstance(allowed_groups_list, list):
            util.log_email(
                util.gen_msg(
                    f"record['groups_list'] '{allowed_groups_list}' is not a list; record: '{record}'"
                )
            )
        for group in allowed_groups_list:
            if group in user_groups_list:
                records.append(record)
                break

    return records
def validate_destination_path(*, action: str, path: str, parent: Optional[str] = None):
    assert path, f"{action} destination path ({path}) must not be empty"
    log_email(f"received path: {path}")
    path = Path(path)
    assert (
        path.is_absolute()
    ), f"{action} destination path ({path}) must be an absolute path"
    if action != "retrieve":
        assert not path.exists(), f"{action} destination path ({path}) must not exist"
        if path.exists():
            log_email(f"path for {action} request {path} exists")
            return False
    if parent:
        parent = Path(parent)
        assert (
            parent.exists() and parent.is_absolute()
        ), f"Parent directory {parent} isn't absolute"
        assert path.relative_to(parent), f"{path} does not begin with '{parent}'"
    if action == "archive":
        log_email(f"archiver will deposit files at {path}")
    else:
        log_email(f"archiver will retrieve files to {path}")
    return str(path)
Пример #11
0
import flask
import flask_session
import markupsafe

## local imports:
import config
import util
import status_updater
import document_getter

## init mongodb collection object for config.mongo['collection'];
##   then can pass collection object to modules that need it:

mongo_collection = util.get_mongo_collection()
if not mongo_collection:
    util.log_email("ERROR: could not connect to collection.", error=True)
    sys.exit(3)

## initialize flask object:
app = flask.Flask(__name__.split(".")[0])

#############################################################################################################
## ROUTES:


@app.route("/archive", methods=["POST"])
def archive_url():
    url = "/archive"
    if flask.request.is_json:  ## submitted parameters thru api call w/ json
        return f"ERROR: POST reached unimplemented route '{url}'; args: '{flask.request.json}'"
    else:  ## submitted parameters thru web page form
def mongo_delete_doc(key, val, collection):
    query = {"_id": ObjectId(val)} if "_id" == key else {key: val}
    collection.delete_one(query)
    log_email(f"Deleting")
    return
def retrieve_archived_directory(
    *, json_arg, api_user, collection, debug: bool = False
) -> None:
    """
    :description: Retrieve a number of items from the archive.

    :param json_args: A decoded JSON string which it at its top level a
    dictionary.  Must have the following keys: requested_dirs, and
    api_key. The delivery path is inferred.

    :param debug: Cause a dry-run of submitting to pbs; the request will be
    ignored.
    """
    log_email(
        f"{api_user['fname']} ({api_user['username']}) retrieving: {json_arg['requested_dirs']}"
    )
    try:
        if "debug" in json_arg.keys():
            debug = json_arg["debug"]
        number_submitted = 0
        # [obj_id1, obj_id2]
        for obj_id in json_arg["requested_dirs"]:
            if not add_current_user(api_user, obj_id, collection):
                raise Exception(f"Could not add {api_user} to metadata for {obj_id}")
            metadata = get_document_by_objectid_1(obj_id, api_user, collection)
            source_path = metadata["archivedPath"]
            destination_path = f"/fastscratch/recovered{source_path}"
            system_groups = metadata.get("system_groups")
            if not system_groups:
                raise Exception(f"Error getting 'system_groups' for obj_id '{obj_id}'")
            intersect = permitted_groups(api_user, metadata)
            if not intersect:
                log_email(
                    f"user {api_user['username']} does not have permission to retrieve {obj_id}"
                )
                continue
            if not debug:
                job_id = submit_to_pbs(
                    source_path, destination_path, "retrieve", intersect[0], obj_id
                )
                if "retrievals" not in metadata.keys():
                    metadata["retrievals"] = []
                retrievals = metadata["retrievals"]
                if job_id:
                    next_retrieval = {
                        "job_id": job_id,
                        "retrieval_status": "submitted",
                        "when_retrieval_submitted": get_timestamp(),
                    }
                    number_submitted += 1
                else:
                    next_retrieval = {
                        "job_id": None,
                        "retrieval_status": "failed",
                        "when_retrieval_failed": get_timestamp(),
                    }
                    log_email(f"Error submitting to pbs for {obj_id}", True)
                retrievals.append(next_retrieval)
                mongo_set(
                    "archivedPath", source_path, {"retrievals": retrievals}, collection
                )
            else:
                return f"Dry run request to retrieve {obj_id}. No submission to pbs."

        return_msg = f"{number_submitted} out of {len(json_arg['requested_dirs'])}"
        +" retrieval requests successfully submitted."
        log_email(return_msg)
        return return_msg

    except Exception as e:
        err_msg = f"Error processing retrieval request: {e}"
        log_email(err_msg)
        return err_msg
def archive_directory(json_arg, api_user, collection, debug: bool = False) -> None:
    """
    :param json_arg: A decoded JSON string which it at its top level a
    dictionary.  Must have the following keys: requested_dest_dir,
    source_folder, and metadata.  Additional keys are ignored.

    :param debug: Cause a dry-run of submitting to PBS; the result will be
    metadata ingested and no submission to pbs.
    """
    try:  # validate request
        request_error = request_invalid(json_arg)
        if request_error:
            raise ValueError(request_error)
    except Exception as e:
        log_email(f"Error processing request: {e}")
        return f"Error processing request: {e}", 400

    try:  # validate and preprocess metadata
        metadata = process_metadata(json_arg, api_user)
        metadata_error = metadata_invalid(metadata)
        if metadata_error:
            raise ValueError(metadata_error)

    except Exception as e:
        return f"Error processing metadata: {e}", 400

    log_email(
        f"{api_user['fname']} {api_user['lname']} ({api_user['username']}) requesting"
        + f" to archive {metadata['source_folder']}"
    )

    try:  # create and insert archivedPath
        metadata = insert_archived_path(metadata)
    except Exception as e:
        return (f"Error processing and/or inserting archivedPath: {e}", 400)

    try:  # insert metadata into mongoDB
        metadata = mongo_ingest(metadata)
    except Exception as e:
        return f"Error ingesting metadata: {e}", 400

    try:  # validate tentative archivedPath
        source_path = metadata["source_folder"]
        destination_path = validate_destination_path(
            action="archive", path=metadata["archivedPath"], parent="/"
        )
        if destination_path:
            mongo_set(
                "archivedPath",
                destination_path,
                {
                    "when_ready_for_pbs": get_timestamp(),
                    "archival_status": "ready_for_pbs",
                },
                collection,
            )
        else:  # requested archivedPath not valid
            send_to_name = (
                f"{api_user['fname'].capitalize()} {api_user['lname'].capitalize()}"
            )
            send_email(api_user["email"], dup_archive_request_body(), send_to_name)
            raise Exception(
                f"archive destination path '{destination_path}' must not exist."
            )

    except Exception as e:  # destination_path already in archive
        source_path = metadata["source_folder"]
        destination_path = metadata["archivedPath"]
        status = metadata["archival_status"]
        if "completed" not in status:
            mongo_set(
                "archivedPath",
                destination_path,
                {
                    "archival_status": "failed",
                    "exception_caught": str(e),
                    "when_archival_failed": get_timestamp(),
                },
                collection,
            )
        msg = f"Error while validating tentative archivedPath: {e}"
        log_email(msg)
        return msg, 400

    try:
        if "debug" in json_arg.keys():
            debug = json_arg["debug"]
        if not debug:
            if is_valid_for_pbs(destination_path):
                mongo_set(
                    "archivedPath",
                    destination_path,
                    {"archival_status": "submitting"},
                    collection,
                )
                job_id = submit_to_pbs(source_path, destination_path, "archive")
                if job_id:  # successfully submitted
                    mongo_set(
                        "archivedPath",
                        destination_path,
                        {
                            "archival_status": "submitted",
                            "when_submitted_to_pbs": get_timestamp(),
                            "job_id": job_id,
                        },
                        collection,
                    )
                    return {"id": str(metadata["_id"])}
                else:  # failed
                    mongo_set(
                        "archivedPath",
                        destination_path,
                        {
                            "archival_status": "failed",
                            "when_archival_failed": get_timestamp(),
                            "job_id": job_id,
                        },
                        collection,
                    )
                    return "Submitting to pbs failed, please see logs.", 400

            else:
                status = metadata["archival_status"]
                msg = f"Archive request denied. Current status of "
                +f"{metadata['archivedPath']}: {status}"
                log_email(msg)
                return msg, 400
        else:
            if "completed" not in metadata["archival_status"]:
                mongo_set(
                    "archivedPath",
                    metadata["archivedPath"],
                    {"archival_status": "dry_run"},
                    collection,
                )
                return (
                    {
                        "message": f"Dry run request, metadata for '{metadata['archivedPath']}'"
                        + " present in mongo and not archived. Request not submitted"
                    },
                    200,
                )
            return (
                f"Dry run request and {metadata['archivedPath']} previously "
                + "archived. Request not submitted.",
                200,
            )

    except Exception as e:  # noqa: e722
        return f"Failed to send to queue with error: {e}", 400