Exemplo n.º 1
0
def test_download_file_from_url_fails(inspire_app):
    url = "https://inspirehep.net/record/1759380/files/channelxi3.png"
    with requests_mock.Mocker() as mocker:
        mocker.get(
            "https://inspirehep.net/record/1759380/files/channelxi3.png",
            status_code=404,
        )
        with pytest.raises(DownloadFileError):
            download_file_from_url(url)
Exemplo n.º 2
0
def test_download_file_from_url_with_full_url(inspire_app):
    url = "https://inspirehep.net/record/1759380/files/channelxi3.png"
    expected_content = b"This is the file data"
    with requests_mock.Mocker() as mocker:
        mocker.get(
            "https://inspirehep.net/record/1759380/files/channelxi3.png",
            status_code=200,
            content=expected_content,
        )
        result = download_file_from_url(url)
        assert result == expected_content
Exemplo n.º 3
0
def test_download_file_from_url_with_relative_url(inspire_app):
    url = "/record/1759380/files/channelxi3.png"
    expected_content = b"This is the file data"
    with requests_mock.Mocker() as mocker:
        mocker.get(
            "http://localhost:5000/record/1759380/files/channelxi3.png",
            status_code=200,
            content=expected_content,
        )
        result = download_file_from_url(url)
        assert result == expected_content
Exemplo n.º 4
0
    def add_file(
        app_context,
        control_number,
        uuid,
        url,
        original_url=None,
        key=None,
        filename=None,
        *args,
        **kwargs,
    ):
        """Adds files to s3.

        Args:
            app_context: Original app context should be passed here if running in separate thread
        """
        with app_context.app.app_context():
            if current_s3_instance.is_s3_url(
                    url) and not current_app.config.get(
                        "UPDATE_S3_FILES_METADATA", False):
                result = {}
                if key not in url:
                    filename = filename or key
                    key = url.split("/")[-1]
                    result.update({"key": key, "filename": filename})
                LOGGER.info(
                    "File already on S3 - Skipping",
                    url=url,
                    key=key,
                    recid=control_number,
                    uuid=uuid,
                    thread=threading.get_ident(),
                )
                return result
            file_data = download_file_from_url(url)
            new_key = hash_data(file_data)
            mimetype = magic.from_buffer(file_data, mime=True)
            file_data = BytesIO(file_data)
            filename = filename or key
            acl = current_app.config["S3_FILE_ACL"]
            if current_s3_instance.file_exists(new_key):
                LOGGER.info(
                    "Replacing file metadata",
                    key=new_key,
                    recid=control_number,
                    uuid=uuid,
                    thread=threading.get_ident(),
                )
                current_s3_instance.replace_file_metadata(
                    new_key, filename, mimetype, acl)
            else:
                LOGGER.info(
                    "Uploading file to s3",
                    key=new_key,
                    recid=control_number,
                    uuid=uuid,
                    thread=threading.get_ident(),
                )
                current_s3_instance.upload_file(file_data, new_key, filename,
                                                mimetype, acl)
            result = {
                "key": new_key,
                "filename": filename,
                "url": current_s3_instance.get_file_url(new_key),
            }
            if (url.startswith("http")
                    and not current_s3_instance.is_s3_url(url)
                    and not original_url):
                result["original_url"] = url
            return result
Exemplo n.º 5
0
    def add_file(
        app_context,
        url,
        original_url=None,
        key=None,
        filename=None,
        *args,
        **kwargs,
    ):
        """Adds files to s3.

        Args:
            app_context: Original app context should be passed here if running in separate thread
        """
        with app_context.app.app_context():
            is_s3_or_public_url = current_s3_instance.is_s3_url_with_bucket_prefix(
                url) or current_s3_instance.is_public_url(url)
            if is_s3_or_public_url and not current_app.config.get(
                    "UPDATE_S3_FILES_METADATA", False):
                result = {}
                if key not in url:
                    filename = filename or key
                    key = url.split("/")[-1]
                    result.update({"key": key, "filename": filename})
                if current_s3_instance.is_s3_url(url):
                    url = current_s3_instance.get_public_url(key)
                    result.update({"url": url})

                LOGGER.info(
                    "File already on S3 - Skipping",
                    url=url,
                    key=key,
                    thread=threading.get_ident(),
                )
                return result
            file_data = download_file_from_url(url)
            new_key = hash_data(file_data)
            mimetype = magic.from_buffer(file_data, mime=True)
            file_data = BytesIO(file_data)
            filename = filename or key
            if not filename:
                filename = new_key
            if mimetype in current_app.config.get(
                    "FILES_RESTRICTED_MIMETYPES"):
                LOGGER.error(
                    "Unsupported file type - Aborting",
                    key=key,
                    mimetype=mimetype,
                    thread=threading.get_ident(),
                )
                raise UnsupportedFileError(mimetype)
            acl = current_app.config["S3_FILE_ACL"]
            if current_s3_instance.file_exists(new_key):
                LOGGER.info(
                    "Replacing file metadata",
                    key=new_key,
                    thread=threading.get_ident(),
                )
                current_s3_instance.replace_file_metadata(
                    new_key, filename, mimetype, acl)
            else:
                LOGGER.info(
                    "Uploading file to s3",
                    key=new_key,
                    thread=threading.get_ident(),
                )
                current_s3_instance.upload_file(file_data, new_key, filename,
                                                mimetype, acl)
            result = {
                "key": new_key,
                "filename": filename,
                "url": current_s3_instance.get_public_url(new_key),
            }
            if (url.startswith("http")
                    and not current_s3_instance.is_s3_url(url)
                    and not current_s3_instance.is_public_url(url)
                    and not original_url):
                result["original_url"] = url
            return result