Exemplo n.º 1
0
 def test_getbucketname(self):
     self.assertEqual(get_bucketname("s3://my-bucket/some/path/to/x.pdf"),
                      "my-bucket")
     self.assertEqual(get_bucketname("s3:/my-bucket/some/path/to/x.pdf"),
                      "my-bucket")
     self.assertEqual(get_bucketname("s3:/my-bucket/"), "my-bucket")
     self.assertEqual(get_bucketname("s3:/my-bucket"), "my-bucket")
Exemplo n.º 2
0
def download(model_endpoint):
    """
    model_endpoint is instance of one of:

        * pmworker.endpoint.DocumentUrl
        * pmworker.endpoint.PageUrl.

    Will download Document original file or Page associated .txt file
    from remote S3 location (shared storage) to local MEDIA_ROOT.

    This function makes sense only in case of settings.S3=True i.e.
    only if S3 media storage is enabled.
    """

    if model_endpoint.exists():
        return True

    remote_abspath = model_endpoint.url(ep=Endpoint.S3)
    local_abspath = model_endpoint.url()
    local_dirname = os.path.dirname(local_abspath)

    if not os.path.exists(os.path.dirname(local_abspath)):
        logger.debug(f"{local_dirname} does not exists. Creating.")
        os.makedirs(local_dirname, exist_ok=True)
    else:
        logger.debug(f"{local_dirname} already exists.")

    # if not s3_download:
    #    logger.debug("s3_download=False. Use test data dir.")
    #    logger.debug(f"Using file {test_local_alternative}")
    #    copy2doc_url(
    #        src_file_path=test_local_alternative,
    #        doc_url=model_endpoint.url()
    #    )

    s3_client = boto3.client('s3')

    bucketname = get_bucketname(model_endpoint.url(ep=Endpoint.S3))

    keyname = get_keyname(model_endpoint.url(ep=Endpoint.S3))

    if not model_endpoint.exists(ep=Endpoint.S3):
        logger.info(f"Endpoint s3:/{bucketname}/{keyname} missing")
        return False

    try:
        logger.debug((f"Downloading {remote_abspath} to {local_abspath}"))
        logger.debug(f"Uploading to bucket={bucketname} "
                     f"Keyname={keyname} "
                     f"Local={local_abspath}.")
        s3_client.download_file(bucketname, keyname, local_abspath)
    except botocore.exceptions.ClientError:
        logger.error(
            f"Error while downloading "
            f" {remote_abspath} to {local_abspath}"
            f" bucketname={bucketname} keyname={keyname}",
            exc_info=True)
        return False

    return True
Exemplo n.º 3
0
def remove_file(doc_url):
    """
    Will remove actually the directory associated with that user
    """
    s3 = boto3.resource('s3')
    bucketname = get_bucketname(doc_url)
    keyname = get_keyname(doc_url)

    bucket = s3.Bucket(bucketname)
    bucket.Object(keyname).delete()
Exemplo n.º 4
0
def upload_img(page_url):
    s3_url = page_url.img_url(ep=Endpoint.S3)
    img_url = page_url.img_url()
    bucketname = get_bucketname(s3_url)
    keyname = get_keyname(s3_url)
    s3_client = boto3.client('s3')

    logger.debug(f"Uploading to bucket={bucketname} "
                 f"Keyname={keyname} "
                 f"img_url={img_url} "
                 f"s3_url={s3_url}.")
    s3_client.upload_file(img_url, bucketname, keyname)
Exemplo n.º 5
0
def upload_document_to_s3(doc_ep):
    s3_url = doc_ep.url(ep=Endpoint.S3)
    local_url = doc_ep.url()
    bucketname = get_bucketname(s3_url)
    keyname = get_keyname(s3_url)
    s3_client = boto3.client('s3')

    if not os.path.exists(local_url):
        raise ValueError(f"{local_url} path does not exits")

    logger.debug(f"upload_document {local_url} to s3:/{bucketname}{keyname}")

    s3_client.upload_file(local_url, bucketname, keyname)
Exemplo n.º 6
0
def download_hocr(page_ep):

    if page_ep.hocr_exists():
        return True

    remote_abspath = page_ep.hocr_url(ep=Endpoint.S3)
    local_abspath = page_ep.hocr_url()
    local_dirname = os.path.dirname(local_abspath)

    if not os.path.exists(os.path.dirname(local_abspath)):
        logger.debug(f"{local_dirname} does not exists. Creating.")
        os.makedirs(local_dirname, exist_ok=True)
    else:
        logger.debug(f"{local_dirname} already exists.")

    s3_client = boto3.client('s3')

    bucketname = get_bucketname(page_ep.hocr_url(ep=Endpoint.S3))

    keyname = get_keyname(page_ep.hocr_url(ep=Endpoint.S3))

    if not s3_key_exists(remote_abspath):
        logger.info(f"Endpoint s3:/{bucketname}/{keyname} missing")
        return False

    try:
        logger.debug((f"Downloading {remote_abspath} to {local_abspath}"))
        logger.debug(f"Uploading to bucket={bucketname} "
                     f"Keyname={keyname} "
                     f"Local={local_abspath}.")
        s3_client.download_file(bucketname, keyname, local_abspath)
    except botocore.exceptions.ClientError:
        logger.error(
            f"Error while downloading "
            f" {remote_abspath} to {local_abspath}"
            f" bucketname={bucketname} keyname={keyname}",
            exc_info=True)
        return False

    return True