Пример #1
0
def upload_unlock(upload_id: int) -> Response:
    """
    Unlock upload workspace.

    Parameters
    ----------
    upload_id : int
        The unique identifier for upload workspace.

    Returns
    -------
    Standard Response tuple containing response content, HTTP status, and HTTP headers.

    """
    # response_data = ERROR_REQUEST_NOT_IMPLEMENTED
    # status_code = status.INTERNAL_SERVER_ERROR
    logger.info("%s: Unlock upload workspace.", upload_id)

    try:
        # Make sure we have an upload_db_data to work with
        upload_db_data: Optional[Upload] = uploads.retrieve(upload_id)

        if upload_db_data is None:
            # Invalid workspace identifier
            raise NotFound(UPLOAD_NOT_FOUND)

        # Lock upload workspace
        # update database
        if upload_db_data.lock == Upload.UNLOCKED:
            logger.info("%s: Unlock: Workspace is already unlocked.",
                        upload_id)
        else:
            upload_db_data.lock = Upload.UNLOCKED

            # Store in DB
            uploads.update(upload_db_data)

        response_data = {
            'reason': UPLOAD_UNLOCKED_WORKSPACE
        }  # Get rid of pylint error
        status_code = status.OK

    except IOError:
        logger.error("%s: Unlock workspace request failed ", upload_id)
        raise InternalServerError(CANT_DELETE_FILE)
    except NotFound as nf:
        logger.info("%s: Unlock workspace: %s", upload_id, nf)
        raise
    except Exception as ue:
        logger.info(
            "Unknown error in unlock workspace. "
            " Add except clauses for '%s'. DO IT NOW!", ue)
        raise InternalServerError(UPLOAD_UNKNOWN_ERROR)

    headers = {'ARXIV-OWNER': upload_db_data.owner_user_id}
    return response_data, status_code, headers
Пример #2
0
def sanitize_upload(upload_id: int,
                    file: FileStorage,
                    with_sleep: int = 15) -> Dict[str, Any]:
    """
    Perform some expen$ive mutations on a :class:`.Thing`.

    Parameters
    ----------
    upload_id : int

    file : FileStorage
        Upload file/archive to be processed.

    Returns
    -------
    Still TBD

    """
    print(f'Task: Upload task for {upload_id}')
    upload: Optional[Upload] = uploads.retrieve(upload_id)
    if upload is None:
        # Revisit how to handle error
        raise RuntimeError('No such thing! %s' % upload_id)

    start_datetime = datetime.now(UTC)
    #uploadObj = filemanager.process.Upload.process_upload(upload)
    uploadObj = filemanager.process.upload.Upload(upload_id)

    # TODO: Remember to get rid of this sleep statement
    time.sleep(with_sleep)

    # Process upload
    uploadObj.process_upload(file)

    completion_datetime = datetime.now(UTC)

    # Colect information we want to retain
    upload.lastupload_logs = str(uploadObj.get_warnings())
    upload.lastupload_start_datetime = start_datetime
    upload.lastupload_completion_datetime = completion_datetime
    # Don't forget about storing file list
    upload.state = 'Active'

    # Save to DB
    uploads.update(upload)

    print(f'Task: Completed upload task for {upload_id}')

    return {'upload_id': upload_id, 'result': len(upload.name)}
Пример #3
0
def upload_unrelease(upload_id: int) -> Response:
    """
    Unrelease returns released workspace to active state.

    Reverses previous request to release workspace.

    Note that unrelease request does NOT restore workspace that has
    already been removed from filesystem.

    Parameters
    ----------
    upload_id : int
        The unique identifier for upload workspace.

    Returns
    -------
    dict
           Detailed information about the upload_db_data.

           logs - Errors and Warnings
           files - list of file details


       int
           An HTTP status code.
       dict
           Some extra headers to add to the response.

    """
    # Again, as with delete workspace, authentication, authorization, and
    # existence of workspace is verified in route level

    # Expect workspace to be in RELEASED state.

    logger.info("%s: Unrelease upload workspace.", upload_id)

    try:
        # Make sure we have an upload_db_data to work with
        upload_db_data: Optional[Upload] = uploads.retrieve(upload_id)

        if upload_db_data is None:
            # Invalid workspace identifier
            raise NotFound(UPLOAD_NOT_FOUND)

        # Unrelease upload workspace
        # update database
        if upload_db_data.state == Upload.DELETED:
            # logger.info(f"{upload_id}: Unrelease Failed: Workspace has been deleted.")
            # response_data = {'reason': UPLOAD_WORKSPACE_ALREADY_DELETED}
            # tatus_code = status.OK
            raise NotFound(UPLOAD_WORKSPACE_ALREADY_DELETED)

        if upload_db_data.state == Upload.ACTIVE:
            logger.info("%s: Unrelease: Workspace is already active.",
                        upload_id)
            response_data = {
                'reason': UPLOAD_UNRELEASED_WORKSPACE
            }  # Should this be an error?
            status_code = status.OK
        elif upload_db_data.state == Upload.RELEASED:
            logger.info("%s: Unrelease upload workspace.", upload_id)

            upload_db_data.state = Upload.ACTIVE

            # Store in DB
            uploads.update(upload_db_data)

            response_data = {'reason': UPLOAD_UNRELEASED_WORKSPACE}
            status_code = status.OK

    except IOError:
        logger.error("%s: Unrelease workspace request failed.", upload_id)
        raise InternalServerError(CANT_DELETE_FILE)
    except NotFound as nf:
        logger.info("%s: Unrelease workspace: '%s'", upload_id, nf)
        raise
    except Exception as ue:
        logger.info(
            "Unknown error in unrelease workspace. "
            " Add except clauses for '%s'. DO IT NOW!", ue)
        raise InternalServerError(UPLOAD_UNKNOWN_ERROR)

    headers = {'ARXIV-OWNER': upload_db_data.owner_user_id}
    return response_data, status_code, headers
Пример #4
0
def upload_lock(upload_id: int) -> Response:
    """
    Lock upload workspace.

    Prohibit all client operations on upload workspace.

    Lock may indicate process is using workspace content that otherwise
    might produce unknown results if workspace is updated during this process.
    Compile and publish are examples.

    Admins will be able to unlock upload workspace.

    Parameters
    ----------
    upload_id : int
        The unique identifier for upload workspace.

    Returns
    -------
    Standard Response tuple containing response content, HTTP status, and HTTP headers.

    """
    logger.info("%s: Lock upload workspace.", upload_id)

    try:
        # Make sure we have an upload_db_data to work with
        upload_db_data: Optional[Upload] = uploads.retrieve(upload_id)

        if upload_db_data is None:
            # Invalid workspace identifier
            raise NotFound(UPLOAD_NOT_FOUND)

        # Lock upload workspace
        # update database
        if upload_db_data.lock == Upload.LOCKED:
            logger.info("%s: Lock: Workspace is already locked.", upload_id)
        else:
            upload_db_data.lock = Upload.LOCKED

            # Store in DB
            uploads.update(upload_db_data)

        response_data = {
            'reason': UPLOAD_LOCKED_WORKSPACE
        }  # Get rid of pylint error
        status_code = status.OK

    except IOError:
        logger.error("%s: Lock workspace request failed ", upload_id)
        raise InternalServerError(CANT_DELETE_FILE)
    except NotFound as nf:
        logger.info("%s: Lock: %s", upload_id, nf)
        raise
    except Exception as ue:
        logger.info(
            "Unknown error lock workspace. "
            " Add except clauses for '%s'. DO IT NOW!", ue)
        raise InternalServerError(UPLOAD_UNKNOWN_ERROR)

    headers = {'ARXIV-OWNER': upload_db_data.owner_user_id}
    return response_data, status_code, headers
Пример #5
0
def upload(upload_id: Optional[int],
           file: FileStorage,
           archive: str,
           user: auth_domain.User,
           ancillary: bool = False) -> Response:
    """
    Upload individual files or compressed archive into specified workspace.

    Unpack, sanitize, and add files to upload workspace.

    Parameters
    ----------
    upload_id : int
        The unique identifier for the upload_db_data in question.
    file : :class:`FileStorage`
        File archive to be processed.
    archive : str
        Archive submission is targeting. Oversize thresholds are curently
        specified at the archive level.
    ancillary : bool
        If ``True``, the file is to be treated as an ancillary file. This means
        (presently) that the file is stored in a special subdirectory within
        the source package.

    Returns
    -------
    dict
        Complete summary of upload processing.
    int
        An HTTP status code.
    dict
        Some extra headers to add to the response.
    """
    # TODO: Hook up async processing (celery/redis) - doesn't work now
    # TODO: Will likely delete this code if processing time is reasonable
    # print(f'Controller: Schedule upload_db_data task for {upload_id}')
    #
    # result = sanitize_upload.delay(upload_id, file)
    #
    # headers = {'Location': url_for('upload_api.upload_status',
    #                              task_id=result.task_id)}
    # return ACCEPTED, status.ACCEPTED, headers
    # End delete

    # Check arguments for basic qualities like existing and such.

    # File argument is required to exist and have a name associated with it.
    # It is standard practice that if user fails to select file the filename is null.
    logger.debug('Handling upload request for %s', upload_id)
    if file is None:
        # Crash and burn...not quite...do we need info about client?
        logger.error('Upload request is missing file/archive payload.')
        raise BadRequest(UPLOAD_MISSING_FILE)

    if file.filename == '':
        # Client needs to select file, or provide name to upload payload
        logger.error(
            'Upload file is missing filename. File to upload may not be selected.'
        )
        raise BadRequest(UPLOAD_MISSING_FILENAME)

    # What about archive argument.
    if archive is None:
        # TODO: Discussion about how to treat omission of archive argument.
        # Is this an HTTP exception? Oversize limits are configured per archive.
        # Or is this a warning/error returned in upload summary?
        #
        # Most submissions can get by with default size limitations so we'll add a warning
        # message for the upload (this will appear on upload page and get logged). This
        # warning will get generated in process/upload.py and not here.
        logger.error("Upload 'archive' not specified. Oversize calculation "
                     "will use default values.")

    # If this is a new upload then we need to create a workspace and add to database.
    if upload_id is None:
        logger.debug('This is a new upload workspace.')
        try:
            logger.info(
                "Create new workspace: Upload request: "
                "file='%s' archive='%s'", file.filename, archive)
            user_id = str(user.user_id)

            if archive is None:
                arch = ''
            else:
                arch = archive

            current_time = datetime.now(UTC)
            new_upload = Upload(owner_user_id=user_id,
                                archive=arch,
                                created_datetime=current_time,
                                modified_datetime=current_time,
                                state=Upload.ACTIVE)
            # Store in DB
            uploads.store(new_upload)

            upload_id = new_upload.upload_id

        except IOError as e:
            logger.info("Error creating new workspace: %s", e)
            raise InternalServerError(f'{UPLOAD_IO_ERROR}: {e}')
        except (TypeError, ValueError) as dbe:
            logger.info("Error adding new workspace to database: '%s'.", dbe)
            raise InternalServerError(UPLOAD_DB_ERROR)
        except Exception as ue:
            logger.info(
                "Unknown error in upload for new workspace. "
                " Add except clauses for '%s'. DO IT NOW!", ue)
            raise InternalServerError(UPLOAD_UNKNOWN_ERROR)

    # At this point we expect upload to exist in system
    try:

        upload_db_data: Optional[Upload] = uploads.retrieve(upload_id)

        if upload_db_data is None:
            # Invalid workspace identifier
            raise NotFound(UPLOAD_NOT_FOUND)
        if upload_db_data.state != Upload.ACTIVE:
            # Do we log anything for these requests
            logger.debug('Forbidden, workspace not active')
            raise Forbidden(UPLOAD_NOT_ACTIVE)
        if upload_db_data.lock == Upload.LOCKED:
            logger.debug('Forbidden, workspace locked')
            raise Forbidden(UPLOAD_WORKSPACE_LOCKED)

        # Now handle upload package - process file or gzipped tar archive

        # NOTE: This will need to be migrated to task.py using Celery at
        #       some point in future. Depends in time it takes to process
        #       uploads.retrieve
        logger.info("%s: Upload files to existing "
                    "workspace: file='%s'", upload_db_data.upload_id,
                    file.filename)

        # Keep track of how long processing upload_db_data takes
        start_datetime = datetime.now(UTC)

        # Create Upload object
        upload_workspace = UploadWorkspace(upload_id)

        # Process upload_db_data
        upload_workspace.process_upload(file, ancillary=ancillary)

        completion_datetime = datetime.now(UTC)

        # Keep track of files processed (this included deleted files)
        file_list = upload_workspace.create_file_upload_summary()

        # Determine readiness state of upload content
        upload_status = Upload.READY

        if upload_workspace.has_errors():
            upload_status = Upload.ERRORS
        elif upload_workspace.has_warnings():
            upload_status = Upload.READY_WITH_WARNINGS

        # Create combine list of errors and warnings
        # TODO: Should I do this in Upload package?? Likely...
        all_errors_and_warnings = []

        for warn in upload_workspace.get_warnings():
            public_filepath, warning_message = warn
            all_errors_and_warnings.append(
                ['warn', public_filepath, warning_message])

        for error in upload_workspace.get_errors():
            public_filepath, warning_message = error
            # TODO: errors renamed fatal. Need to review 'errors' as to whether they are 'fatal'
            all_errors_and_warnings.append(
                ['fatal', public_filepath, warning_message])

        # Prepare upload_db_data details (DB). I'm assuming that in memory Redis
        # is not sufficient for results that may be needed in the distant future.
        # errors_and_warnings = upload_workspace.get_errors() + upload_workspace.get_warnings()
        errors_and_warnings = all_errors_and_warnings
        upload_db_data.lastupload_logs = json.dumps(errors_and_warnings)
        upload_db_data.lastupload_start_datetime = start_datetime
        upload_db_data.lastupload_completion_datetime = completion_datetime
        upload_db_data.lastupload_file_summary = json.dumps(file_list)
        upload_db_data.lastupload_upload_status = upload_status
        upload_db_data.state = Upload.ACTIVE

        # Store in DB
        uploads.update(upload_db_data)

        logger.info(
            "%s: Processed upload. "
            "Saved to DB. Preparing upload summary.", upload_db_data.upload_id)

        # Do we want affirmative log messages after processing each request
        # or maybe just report errors like:
        #    logger.info(f"{upload_db_data.upload_id}: Finished processing ...")

        # Upload action itself has very simple response
        headers = {
            'Location':
            url_for('upload_api.upload_files',
                    upload_id=upload_db_data.upload_id)
        }

        status_code = status.CREATED

        response_data = _status_data(upload_db_data, upload_workspace)
        logger.info("%s: Generating upload summary.", upload_db_data.upload_id)
        logger.debug('Response data: %s', response_data)
        headers.update({'ARXIV-OWNER': upload_db_data.owner_user_id})
        return response_data, status_code, headers

    except IOError as e:
        logger.error("%s: File upload_db_data request failed "
                     "for file='%s'", upload_id, file.filename)
        raise InternalServerError(f'{UPLOAD_IO_ERROR}: {e}') from e
    except (TypeError, ValueError) as dbe:
        logger.info("Error updating database: '%s'", dbe)
        raise InternalServerError(UPLOAD_DB_ERROR)
    except BadRequest as breq:
        logger.info("%s: '%s'.", upload_id, breq)
        raise
    except NotFound as nfdb:
        logger.info("%s: Upload: '{nfdb}'.", upload_id)
        raise nfdb
    except Forbidden as forb:
        logger.info("%s: Upload failed: '{forb}'.", upload_id)
        raise forb
    except Exception as ue:
        logger.info(
            "Unknown error with existing workspace."
            " Add except clauses for '%s'. DO IT NOW!", ue)
        raise InternalServerError(UPLOAD_UNKNOWN_ERROR)

    return None
Пример #6
0
def delete_workspace(upload_id: int) -> Response:
    """
    Delete workspace.

    Parameters
    ----------
    upload_id : int
        The unique identifier for the upload workspace.

    Returns
    -------
    dict
        Complete summary of upload processing.
    int
        An HTTP status code.
    dict
        Some extra headers to add to the response.

    """
    logger.info('%s: Deleting upload workspace.', upload_id)

    # Need to add several checks here

    # At this point I believe we know that caller is authorized to delete the
    # workspace. This is checked at the routes level.

    # Does workspace exist? Has it already been deleted? Generate 400:NotFound error.
    # Do we care is workspace is ACTIVE state? And not released? NO. But log it...
    # Do we want to stash source.log somewhere?
    # Do we care if workspace was modified recently...NO. Log it

    try:
        # Make sure we have an existing upload workspace to work with
        upload_db_data: Optional[Upload] = uploads.retrieve(upload_id)

        if upload_db_data is None:
            # invalid workspace identifier
            # Note: DB entry will exist for workspace that has already been
            #       deleted
            raise NotFound(UPLOAD_NOT_FOUND)

        # Actually remove entire workspace directory structure. Log
        # everything to global log since source log is being removed!

        # Initiate workspace deletion

        # Update database (but keep around) for historical reference. Does not
        # consume very much space. What about source log?
        # Create Upload object
        if upload_db_data.state == Upload.DELETED:
            logger.info(
                "%s: Workspace has already been deleted:"
                "current state is '%s'", upload_id, upload_db_data.state)
            raise NotFound(UPLOAD_WORKSPACE_NOT_FOUND)

        upload_workspace = UploadWorkspace(upload_id)

        # Call routine that will do the actual work
        upload_workspace.remove_workspace()

        # update database
        if upload_db_data.state != Upload.RELEASED:
            logger.info("%s: Workspace currently in '%s' state.", upload_id,
                        upload_db_data.state)

        upload_db_data.state = Upload.DELETED

        # Store in DB
        uploads.update(upload_db_data)

    except IOError:
        logger.error("%s: Delete workspace request failed ", upload_id)
        raise InternalServerError(CANT_DELETE_FILE)
    except NotFound as nf:
        logger.info("%s: Delete Workspace: '%s'", upload_id, nf)
        raise
    except Exception as ue:
        logger.info(
            "Unknown error in delete workspace. "
            " Add except clauses for '%s'. DO IT NOW!", ue)
        raise InternalServerError(UPLOAD_UNKNOWN_ERROR)

    # API doesn't provide for returning errors resulting from delete.
    # 401-unautorized and 403-forbidden are handled at routes level.
    # Add 400 response to openapi.yaml

    response_data = {
        'reason': UPLOAD_DELETED_WORKSPACE
    }  # Get rid of pylint error
    status_code = status.OK
    return response_data, status_code, {}