示例#1
0
async def init_blob_for_streaming_upload(
    container: ContainerClient,
    blob_name: str,
    content_type: str,
    content_encoding: str,
    data: Any,
    return_sas_token: bool = True,
) -> str:
    """
    Uploads the given data to a blob record.
    If a blob with the given name already exist, it throws an error.

    Returns a uri with a SAS token to access the newly created blob.
    """
    await create_container_using_client(container)
    logger.info(f"Streaming blob '{blob_name}'" +
                f"to container '{container.container_name}' on account:" +
                f"'{container.account_name}'")

    content_settings = ContentSettings(content_type=content_type,
                                       content_encoding=content_encoding)
    blob = container.get_blob_client(blob_name)
    await blob.stage_block()
    await blob.commit_block_list()
    await blob.upload_blob(data, content_settings=content_settings)
    logger.debug(f"  - blob '{blob_name}' uploaded. generating sas token.")

    if return_sas_token:
        sas_token = generate_blob_sas(
            blob.account_name,
            blob.container_name,
            blob.blob_name,
            account_key=blob.credential.account_key,
            permission=BlobSasPermissions(read=True),
            expiry=datetime.utcnow() + timedelta(days=14),
        )

        uri = blob.url + "?" + sas_token
    else:
        uri = remove_sas_token(blob.url)

    logger.debug(f"  - blob access url: '{uri}'.")

    return uri
    def _upload_file(self, file_path, storage_id):
        blob_service_client = BlobServiceClient.from_connection_string(conn_str=self._connection_string)
        try:
            blob_service_client.create_container(self._blob_container, public_access=PublicAccess.Container)
        except:
            pass
        blob_client = blob_service_client.get_blob_client(container=self._blob_container, blob=storage_id)

        with open(file_path, "rb") as data:
            blob_client.upload_blob(data)

        token = generate_blob_sas(
            account_name=self._storage_name,
            account_key=self._storage_key,
            container_name=self._blob_container,
            blob_name=storage_id,
            permission=BlobSasPermissions(read=True),
            expiry=datetime.utcnow() + timedelta(hours=1))
        return f"{blob_client.url}?{token}"
示例#3
0
    def create_blob_link(self, blob_folder, blob_name) -> str:
        if blob_folder:
            full_path_blob = f"{blob_folder}/{blob_name}"
        else:
            full_path_blob = blob_name
        url = f"https://{self.account_name}.blob.core.windows.net/{self.destination}/{full_path_blob}"
        sas_token = generate_blob_sas(
            account_name=self.account_name,
            account_key=self.account_key,
            container_name=self.destination,
            blob_name=full_path_blob,
            permission=BlobSasPermissions(read=True,
                                          delete_previous_version=False),
            expiry=datetime.utcnow() +
            timedelta(days=self.expiry_download_links),
        )

        url_with_sas = f"{url}?{sas_token}"
        return url_with_sas
示例#4
0
文件: azure.py 项目: zang3tsu/dvc
    def _generate_download_url(self, path_info, expires=3600):
        from azure.storage.blob import (  # pylint:disable=no-name-in-module
            BlobSasPermissions, generate_blob_sas,
        )

        expires_at = datetime.utcnow() + timedelta(seconds=expires)

        blob_client = self.blob_service.get_blob_client(
            path_info.bucket, path_info.path)

        sas_token = generate_blob_sas(
            blob_client.account_name,
            blob_client.container_name,
            blob_client.blob_name,
            account_key=blob_client.credential.account_key,
            permission=BlobSasPermissions(read=True),
            expiry=expires_at,
        )
        return blob_client.url + "?" + sas_token
示例#5
0
    def test_append_block_from_url_with_if_match(self):
        # Arrange
        source_blob_data = self.get_random_bytes(LARGE_BLOB_SIZE)
        source_blob_client = self._create_source_blob(source_blob_data)
        sas = source_blob_client.generate_shared_access_signature(
            permission=BlobSasPermissions(read=True, delete=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )

        destination_blob_name = self._get_blob_reference()
        destination_blob_client = self.bsc.get_blob_client(
            self.container_name, destination_blob_name)
        destination_blob_properties_on_creation = destination_blob_client.create_append_blob(
        )

        # Act part 1: make append block from url calls
        resp = destination_blob_client. \
            append_block_from_url(source_blob_client.url + '?' + sas,
                                  source_offset=0, source_length=LARGE_BLOB_SIZE,
                                  if_match=destination_blob_properties_on_creation.get('etag'))
        self.assertEqual(resp.get('blob_append_offset'), '0')
        self.assertEqual(resp.get('blob_committed_block_count'), 1)
        self.assertIsNotNone(resp.get('etag'))
        self.assertIsNotNone(resp.get('last_modified'))

        # Assert the destination blob is constructed correctly
        destination_blob_properties = destination_blob_client.get_blob_properties(
        )
        self.assertBlobEqual(destination_blob_client, source_blob_data)
        self.assertEqual(destination_blob_properties.get('etag'),
                         resp.get('etag'))
        self.assertEqual(destination_blob_properties.get('last_modified'),
                         resp.get('last_modified'))
        self.assertEqual(destination_blob_properties.get('size'),
                         LARGE_BLOB_SIZE)

        # Act part 2: put block from url with failing condition
        with self.assertRaises(ResourceModifiedError):
            destination_blob_client.append_block_from_url(
                source_blob_client.url + '?' + sas,
                source_offset=0,
                source_length=LARGE_BLOB_SIZE,
                if_match='0x111111111111111')
示例#6
0
    async def _test_append_block_from_url(self):
        # Arrange
        source_blob_name = self.get_resource_name("sourceblob")
        self.config.use_byte_buffer = True  # chunk upload
        source_blob_client, _ = await self._create_block_blob(blob_name=source_blob_name, data=self.byte_data)
        source_blob_sas = generate_blob_sas(
            source_blob_client.account_name,
            source_blob_client.container_name,
            source_blob_client.blob_name,
            snapshot=source_blob_client.snapshot,
            account_key=source_blob_client.credential.account_key,
            permission=BlobSasPermissions(read=True),
            expiry=datetime.utcnow() + timedelta(hours=1)
        )
        source_blob_url = source_blob_client.url + "?" + source_blob_sas

        self.config.use_byte_buffer = False
        destination_blob_client = await self._create_append_blob(cpk=TEST_ENCRYPTION_KEY)

        # Act
        append_blob_prop = await destination_blob_client.append_block_from_url(source_blob_url,
                                                                               source_offset=0,
                                                                               source_length=4 * 1024,
                                                                               cpk=TEST_ENCRYPTION_KEY)

        # Assert
        self.assertIsNotNone(append_blob_prop['etag'])
        self.assertIsNotNone(append_blob_prop['last_modified'])
        # TODO: verify that the swagger is correct, header wasn't added for the response
        # self.assertTrue(append_blob_prop['request_server_encrypted'])
        self.assertEqual(append_blob_prop['encryption_key_sha256'], TEST_ENCRYPTION_KEY.key_hash)

        # Act get the blob content without cpk should fail
        with self.assertRaises(HttpResponseError):
            await destination_blob_client.download_blob()

            # Act get the blob content
        blob = await destination_blob_client.download_blob(cpk=TEST_ENCRYPTION_KEY)

        # Assert content was retrieved with the cpk
        self.assertEqual(await blob.readall(), self.byte_data[0: 4 * 1024])
        self.assertEqual(blob.properties.encryption_key_sha256, TEST_ENCRYPTION_KEY.key_hash)
示例#7
0
    def url(self, name, expire=None):
        name = self._get_valid_path(name)

        if expire is None:
            expire = self.expiration_secs

        credential = None
        if expire:
            sas_token = generate_blob_sas(
                self.account_name,
                self.azure_container,
                name,
                account_key=self.account_key,
                permission=BlobSasPermissions(read=True),
                expiry=self._expire_at(expire))
            credential = sas_token

        container_blob_url = self.client.get_blob_client(
            filepath_to_uri(name)).url
        return BlobClient.from_blob_url(container_blob_url, credential=credential).url
示例#8
0
    def _generate_url(self, package: Package) -> str:
        path = self.get_path(package)

        url_params = generate_blob_sas(
            account_name=self.storage_account_name,
            container_name=self.storage_container_name,
            blob_name=path,
            account_key=self.storage_account_key,
            permission=BlobSasPermissions(read=True),
            expiry=datetime.now() + timedelta(seconds=self.expire_after),
            protocol="https",
        )

        url = "{}/{}/{}?{}".format(
            self.azure_storage_account_url,
            self.storage_container_name,
            path,
            url_params,
        )
        return url
    def test_append_block_from_url_with_source_if_none_match(self):
        # Arrange
        source_blob_data = self.get_random_bytes(LARGE_BLOB_SIZE)
        source_blob_client = self._create_source_blob(source_blob_data)
        source_blob_properties = source_blob_client.get_blob_properties()
        sas = generate_blob_sas(
            source_blob_client.account_name,
            source_blob_client.container_name,
            source_blob_client.blob_name,
            snapshot=source_blob_client.snapshot,
            account_key=source_blob_client.credential.account_key,
            permission=BlobSasPermissions(read=True, delete=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )

        destination_blob_client = self._create_blob()

        # Act part 1: make append block from url calls
        resp = destination_blob_client. \
            append_block_from_url(source_blob_client.url + '?' + sas,
                                  source_offset=0, source_length=LARGE_BLOB_SIZE,
                                  source_etag='0x111111111111111',
                                  source_match_condition=MatchConditions.IfModified)
        self.assertEqual(resp.get('blob_append_offset'), '0')
        self.assertEqual(resp.get('blob_committed_block_count'), 1)
        self.assertIsNotNone(resp.get('etag'))
        self.assertIsNotNone(resp.get('last_modified'))

        # Assert the destination blob is constructed correctly
        destination_blob_properties = destination_blob_client.get_blob_properties()
        self.assertBlobEqual(destination_blob_client, source_blob_data)
        self.assertEqual(destination_blob_properties.get('etag'), resp.get('etag'))
        self.assertEqual(destination_blob_properties.get('last_modified'), resp.get('last_modified'))
        self.assertEqual(destination_blob_properties.get('size'), LARGE_BLOB_SIZE)

        # Act part 2: put block from url with failing condition
        with self.assertRaises(ResourceNotFoundError):
            destination_blob_client.append_block_from_url(source_blob_client.url + '?' + sas,
                                                          source_offset=0, source_length=LARGE_BLOB_SIZE,
                                                          source_etag=source_blob_properties.get('etag'),
                                                          source_match_condition=MatchConditions.IfModified)
示例#10
0
    def _get_signed_url(self, prefix: str, oid: str, expires_in: int, filename: Optional[str] = None,
                        **permissions: bool) -> str:
        blob_name = self._get_blob_path(prefix, oid)
        permissions = BlobSasPermissions(**permissions)
        token_expires = (datetime.now(tz=timezone.utc) + timedelta(seconds=expires_in))

        extra_args = {}
        if filename:
            extra_args['content_disposition'] = f'attachment; filename="{filename}"'

        sas_token = generate_blob_sas(account_name=self.blob_svc_client.account_name,
                                      account_key=self.blob_svc_client.credential.account_key,
                                      container_name=self.container_name,
                                      blob_name=blob_name,
                                      permission=permissions,
                                      expiry=token_expires,
                                      **extra_args)

        blob_client = BlobClient(self.blob_svc_client.url, container_name=self.container_name, blob_name=blob_name,
                                 credential=sas_token)
        return blob_client.url  # type: ignore
示例#11
0
def get_container_uri(connection_string: str, container_name: str) -> str:
    """
    Creates and initialize a container; returns a URI with a SAS read/write token to access it.
    """
    container = create_container(connection_string, container_name)
    logger.info(
        f"Creating SAS token for container '{container_name}' on account: '{container.account_name}'"
    )

    sas_token = generate_container_sas(
        container.account_name,
        container.container_name,
        account_key=container.credential.account_key,
        permission=BlobSasPermissions(read=True,
                                      add=True,
                                      write=True,
                                      create=True),
        expiry=datetime.utcnow() + timedelta(days=14))

    uri = container.url + "?" + sas_token
    logger.debug(f"  - container url: '{uri}'.")
    return uri
示例#12
0
    async def _setup(self):
        if not self.is_playback():
            try:
                # create source blob to be copied from
                self.source_blob_name = self.get_resource_name('srcblob')
                self.source_blob_data = self.get_random_bytes(4 * 1024)
                source_blob = self.bsc.get_blob_client(self.container_name,
                                                       self.source_blob_name)

                await self.bsc.create_container(self.container_name)
                await source_blob.upload_blob(self.source_blob_data)

                # generate a SAS so that it is accessible with a URL
                sas_token = source_blob.generate_shared_access_signature(
                    permission=BlobSasPermissions(read=True),
                    expiry=datetime.utcnow() + timedelta(hours=1),
                )
                sas_source = BlobClient.from_blob_url(source_blob.url,
                                                      credential=sas_token)
                self.source_blob_url = sas_source.url
            except:
                pass
示例#13
0
    async def _setup(self, storage_account, key):
        # test chunking functionality by reducing the size of each chunk,
        # otherwise the tests would take too long to execute
        self.bsc = BlobServiceClient(self.account_url(storage_account, "blob"),
                                     credential=key,
                                     connection_data_block_size=4 * 1024,
                                     max_single_put_size=32 * 1024,
                                     max_block_size=4 * 1024,
                                     transport=AiohttpTestTransport())
        self.config = self.bsc._config
        self.container_name = self.get_resource_name('utcontainer')

        # create source blob to be copied from
        self.source_blob_name = self.get_resource_name('srcblob')
        self.source_blob_data = self.get_random_bytes(SOURCE_BLOB_SIZE)

        blob = self.bsc.get_blob_client(self.container_name,
                                        self.source_blob_name)

        if self.is_live:
            try:
                await self.bsc.create_container(self.container_name)
            except:
                pass
            await blob.upload_blob(self.source_blob_data, overwrite=True)

        # generate a SAS so that it is accessible with a URL
        sas_token = generate_blob_sas(
            blob.account_name,
            blob.container_name,
            blob.blob_name,
            snapshot=blob.snapshot,
            account_key=blob.credential.account_key,
            permission=BlobSasPermissions(read=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )
        self.source_blob_url = BlobClient.from_blob_url(
            blob.url, credential=sas_token).url
        self.source_blob_url_without_sas = blob.url
示例#14
0
 def generate_url(self, seconds, display_filename=None, content_type=None, inline=False):
     if content_type is None:
         content_type = self.content_type
     if display_filename is not None:
         disposition = "attachment; filename=" + display_filename
     elif inline:
         disposition = "inline"
     else:
         disposition = None
     token = generate_blob_sas(
         self.blob_client.account_name,
         self.blob_client.container_name,
         self.blob_client.blob_name,
         snapshot=self.blob_client.snapshot,
         account_key=self.blob_client.credential.account_key,
         permission=BlobSasPermissions(read=True),
         expiry=datetime.datetime.utcnow() + datetime.timedelta(seconds=seconds),
         cache_control='no-cache',
         content_disposition=disposition,
         content_type=content_type
     )
     return self.blob_client.url + '?' + token
示例#15
0
    def create_sas(self, blob_path, hour_exp: float, **permissions):
        permissions = BlobSasPermissions(
            read=permissions.pop('read', True),
            write=permissions.pop('write', True),
            delete=permissions.pop('delete', True),
            add=permissions.pop('add', True),
            create=permissions.pop('create', True))

        sas_token = generate_blob_sas(
            self.container_client.account_name,
            self.container_client.container_name,
            blob_name=blob_path,
            permission=permissions,
            account_key=self.container_client.credential.account_key,
            expiry=datetime.utcnow() + timedelta(hours=hour_exp),
            start=datetime.utcnow() - timedelta(minutes=1))

        blob_path = urllib.parse.quote(blob_path)
        sas_url = _make_url(self.container_url, blob_path, sas_token=sas_token)

        return sas_url
示例#16
0
    def build_resource_file(self,
                            file_path,
                            container_path: str,
                            duration_hours=24):
        """
        Uploads a local file to an Azure Blob storage container.

        :param str file_path: The local path to the file.
        :param str container_path: The path where the file should be placed in the container before executing the task
        :rtype: `azure.batch.models.ResourceFile`
        :return: A ResourceFile initialized with a SAS URL appropriate for Batch
        tasks.
        """
        # print( "Uploading file {} to container [{}]...".format( file_path, self.config.BLOB_CONTAINER_NAME)),
        blob_name = os.path.basename(file_path)
        blob_client = self.container_client.get_blob_client(blob_name)

        try:
            blob_client.delete_blob()
        except ResourceNotFoundError:
            pass

        with open(os.path.join(self.config.BATCH_DIRECTORY, file_path),
                  "rb") as data:
            blob_client.upload_blob(data, blob_type="BlockBlob")

        sas_token = generate_blob_sas(
            blob_client.account_name,
            blob_client.container_name,
            blob_client.blob_name,
            permission=BlobSasPermissions(read=True),
            expiry=datetime.datetime.utcnow() +
            datetime.timedelta(hours=duration_hours),
            account_key=self.config.STORAGE_ACCOUNT_KEY,
        )

        return models.ResourceFile(http_url=blob_client.url + "?" + sas_token,
                                   file_path=container_path)
    def test_set_blob_tags_using_blob_sas(self, resource_group, location, storage_account, storage_account_key):
        token = generate_account_sas(
            storage_account.name,
            storage_account_key,
            ResourceTypes(service=True, container=True, object=True),
            AccountSasPermissions(write=True, list=True, read=True, delete_previous_version=True, tag=True,
                                  filter_by_tags=True),
            datetime.utcnow() + timedelta(hours=1),
        )
        self._setup(storage_account, token)

        tags = {"year": '1000', "tag2": "secondtag", "tag3": "thirdtag", "habitat_type": 'Shallow Lowland Billabongs'}
        blob_client, _ = self._create_block_blob(tags=tags, container_name=self.container_name)
        token1 = generate_blob_sas(
            storage_account.name,
            self.container_name,
            blob_client.blob_name,
            account_key=storage_account_key,
            permission=BlobSasPermissions(delete_previous_version=True, tag=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )
        blob_client=BlobClient.from_blob_url(blob_client.url, token1)
        blob_client.set_blob_tags(tags=tags)
        tags_on_blob = blob_client.get_blob_tags()
        self.assertEqual(len(tags_on_blob), len(tags))

        if self.is_live:
            sleep(10)

        # To filter in a specific container use:
        # where = "@container='{}' and tag1='1000' and tag2 = 'secondtag'".format(container_name1)
        where = "\"year\"='1000' and tag2 = 'secondtag' and tag3='thirdtag'"

        blob_list = self.bsc.find_blobs_by_tags(filter_expression=where, results_per_page=2).by_page()
        first_page = next(blob_list)
        items_on_page1 = list(first_page)
        self.assertEqual(1, len(items_on_page1))
示例#18
0
    def setUp(self):
        super(StorageBlockBlobTest, self).setUp()
        url = self._get_account_url()
        credential = self._get_shared_key_credential()

        # test chunking functionality by reducing the size of each chunk,
        # otherwise the tests would take too long to execute
        self.bsc = BlobServiceClient(url,
                                     credential=credential,
                                     connection_data_block_size=4 * 1024,
                                     max_single_put_size=32 * 1024,
                                     max_block_size=4 * 1024)
        self.config = self.bsc._config
        self.container_name = self.get_resource_name('utcontainer')

        # create source blob to be copied from
        self.source_blob_name = self.get_resource_name('srcblob')
        self.source_blob_data = self.get_random_bytes(SOURCE_BLOB_SIZE)

        blob = self.bsc.get_blob_client(self.container_name,
                                        self.source_blob_name)
        if not self.is_playback():
            self.bsc.create_container(self.container_name)
            blob.upload_blob(self.source_blob_data)

        # generate a SAS so that it is accessible with a URL
        sas_token = generate_blob_sas(
            blob.account_name,
            blob.container_name,
            blob.blob_name,
            snapshot=blob.snapshot,
            account_key=blob.credential.account_key,
            permission=BlobSasPermissions(read=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )
        self.source_blob_url = BlobClient.from_blob_url(
            blob.url, credential=sas_token).url
    def test_append_block_from_url_and_validate_content_md5(self):
        # Arrange
        source_blob_data = self.get_random_bytes(LARGE_BLOB_SIZE)
        source_blob_client = self._create_source_blob(source_blob_data)
        src_md5 = StorageContentValidation.get_content_md5(source_blob_data)
        sas = generate_blob_sas(
            source_blob_client.account_name,
            source_blob_client.container_name,
            source_blob_client.blob_name,
            snapshot=source_blob_client.snapshot,
            account_key=source_blob_client.credential.account_key,
            permission=BlobSasPermissions(read=True, delete=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )

        destination_blob_client = self._create_blob()

        # Act part 1: make append block from url calls with correct md5
        resp = destination_blob_client.append_block_from_url(source_blob_client.url + '?' + sas,
                                                             source_content_md5=src_md5)
        self.assertEqual(resp.get('blob_append_offset'), '0')
        self.assertEqual(resp.get('blob_committed_block_count'), 1)
        self.assertIsNotNone(resp.get('etag'))
        self.assertIsNotNone(resp.get('last_modified'))

        # Assert the destination blob is constructed correctly
        destination_blob_properties = destination_blob_client.get_blob_properties()
        self.assertBlobEqual(destination_blob_client, source_blob_data)
        self.assertEqual(destination_blob_properties.get('etag'), resp.get('etag'))
        self.assertEqual(destination_blob_properties.get('last_modified'), resp.get('last_modified'))

        # Act part 2: put block from url with wrong md5
        with self.assertRaises(HttpResponseError):
            destination_blob_client.append_block_from_url(source_blob_client.url + '?' + sas,
                                                          source_content_md5=StorageContentValidation.get_content_md5(
                                                              b"POTATO"))
示例#20
0
    def _setup(self, bsc):
        self.container_name = self.get_resource_name('utcontainer')

        # create source blob to be copied from
        self.source_blob_name = self.get_resource_name('srcblob')
        self.source_blob_data = self.get_random_bytes(4 * 1024)
        source_blob = bsc.get_blob_client(self.container_name, self.source_blob_name)

        if self.is_live:
            bsc.create_container(self.container_name)
            source_blob.upload_blob(self.source_blob_data)

        # generate a SAS so that it is accessible with a URL
        sas_token = generate_blob_sas(
            source_blob.account_name,
            source_blob.container_name,
            source_blob.blob_name,
            snapshot=source_blob.snapshot,
            account_key=source_blob.credential.account_key,
            permission=BlobSasPermissions(read=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )
        sas_source = BlobClient.from_blob_url(source_blob.url, credential=sas_token)
        self.source_blob_url = sas_source.url
示例#21
0
def aggregate_results(job_id: str,
                      model_version: str,
                      job_name: str,
                      job_submission_timestamp: str) -> str:
    log.info(f'server_job, aggregate_results starting, job_id: {job_id}')

    container_url = sas_blob_utils.build_azure_storage_uri(account=api_config.STORAGE_ACCOUNT_NAME,
                                                           container=api_config.STORAGE_CONTAINER_API)
    # when people download this, the timestamp will have : replaced by _
    output_file_path = f'api_{api_config.API_INSTANCE_NAME}/job_{job_id}/{job_id}_detections_{job_name}_{job_submission_timestamp}.json'

    with ContainerClient.from_container_url(container_url,
                                            credential=api_config.STORAGE_ACCOUNT_KEY) as container_client:
        # check if the result blob has already been written (could be another instance of the API / worker thread)
        # and if so, skip aggregating and uploading the results, and just generate the SAS URL, which
        # could be needed still if the previous request_status was `problem`.
        blob_client = container_client.get_blob_client(output_file_path)
        if blob_client.exists():
            log.warning(f'The output file already exists, likely because another monitoring thread already wrote it.')
        else:
            task_outputs_dir = f'api_{api_config.API_INSTANCE_NAME}/job_{job_id}/task_outputs/'
            generator = container_client.list_blobs(name_starts_with=task_outputs_dir)

            blobs = [i for i in generator if i.name.endswith('.json')]

            all_results = []
            for blob_props in tqdm(blobs):
                with container_client.get_blob_client(blob_props) as blob_client:
                    stream = io.BytesIO()
                    blob_client.download_blob().readinto(stream)
                    stream.seek(0)
                    task_results = json.load(stream)
                    all_results.extend(task_results)

            api_output = {
                'info': {
                    'detector': f'megadetector_v{model_version}',
                    'detection_completion_time': get_utc_time(),
                    'format_version': api_config.OUTPUT_FORMAT_VERSION
                },
                'detection_categories': api_config.DETECTOR_LABEL_MAP,
                'images': all_results
            }

            # upload the output JSON to the Job folder
            api_output_as_bytes = bytes(json.dumps(api_output, ensure_ascii=False, indent=1), encoding='utf-8')
            _ = container_client.upload_blob(name=output_file_path, data=api_output_as_bytes)

    output_sas = generate_blob_sas(
        account_name=api_config.STORAGE_ACCOUNT_NAME,
        container_name=api_config.STORAGE_CONTAINER_API,
        blob_name=output_file_path,
        account_key=api_config.STORAGE_ACCOUNT_KEY,
        permission=BlobSasPermissions(read=True, write=False),
        expiry=datetime.utcnow() + timedelta(days=api_config.OUTPUT_SAS_EXPIRATION_DAYS)
    )
    output_sas_url = sas_blob_utils.build_azure_storage_uri(
        account=api_config.STORAGE_ACCOUNT_NAME,
        container=api_config.STORAGE_CONTAINER_API,
        blob=output_file_path,
        sas_token=output_sas
    )
    log.info(f'server_job, aggregate_results done, job_id: {job_id}')
    log.info(f'output_sas_url: {output_sas_url}')
    return output_sas_url
    def _setup(self,
               storage_account_name,
               key,
               container_prefix='utcontainer'):
        account_url = self.account_url(storage_account_name, "blob")
        if not isinstance(account_url, str):
            account_url = account_url.encode('utf-8')
            key = key.encode('utf-8')
        self.bsc = BlobServiceClient(account_url,
                                     credential=key,
                                     connection_data_block_size=4 * 1024,
                                     max_single_put_size=32 * 1024,
                                     max_block_size=4 * 1024)
        self.config = self.bsc._config
        self.container_name = self.get_resource_name(container_prefix)

        # create source blob to be copied from
        self.source_blob_name = self.get_resource_name('srcblob')
        self.source_blob_name_with_special_chars = 'भारत¥test/testsubÐirÍ/' + self.get_resource_name(
            'srcÆblob')
        self.source_blob_data = self.get_random_bytes(SOURCE_BLOB_SIZE)
        self.source_blob_with_special_chars_data = self.get_random_bytes(
            SOURCE_BLOB_SIZE)

        blob = self.bsc.get_blob_client(self.container_name,
                                        self.source_blob_name)
        blob_with_special_chars = self.bsc.get_blob_client(
            self.container_name, self.source_blob_name_with_special_chars)

        if self.is_live:
            self.bsc.create_container(self.container_name)
            blob.upload_blob(self.source_blob_data)
            blob_with_special_chars.upload_blob(
                self.source_blob_with_special_chars_data)

        # generate a SAS so that it is accessible with a URL
        sas_token = generate_blob_sas(
            blob.account_name,
            blob.container_name,
            blob.blob_name,
            snapshot=blob.snapshot,
            account_key=blob.credential.account_key,
            permission=BlobSasPermissions(read=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )
        # generate a SAS so that it is accessible with a URL
        sas_token_for_special_chars = generate_blob_sas(
            blob_with_special_chars.account_name,
            blob_with_special_chars.container_name,
            blob_with_special_chars.blob_name,
            snapshot=blob_with_special_chars.snapshot,
            account_key=blob_with_special_chars.credential.account_key,
            permission=BlobSasPermissions(read=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )
        self.source_blob_url_without_sas = blob.url
        self.source_blob_url = BlobClient.from_blob_url(
            blob.url, credential=sas_token).url
        self.source_blob_url_with_special_chars = BlobClient.from_blob_url(
            blob_with_special_chars.url,
            credential=sas_token_for_special_chars).url
示例#23
0
    def generate_url(self,
                     blob_name: str,
                     read: bool = True,
                     add: bool = False,
                     create: bool = False,
                     write: bool = False,
                     delete: bool = False,
                     sas: bool = False,
                     access_time: int = 1) -> str:
        """
        Generate's blob URL. It can also generate Shared Access Signature (SAS) if ``sas=True``.

        :param bool write: Write access

            .. versionadded:: 2.0

        :param bool create: Create access

            .. versionadded:: 2.0

        :param bool add: Add access

            .. versionadded:: 2.0

        :param bool read: Read access

            .. versionadded:: 2.0

        :param bool delete: Delete access

            .. versionadded:: 2.0

        :param int access_time: Time till the URL is valid
        :param str blob_name: Name of the blob, this could be a path
        :param bool sas: Set ``True`` to generate SAS key
        :return: Blob URL

        **Example without ``sas``**

        >>> import os
        >>> from azblobexplorer import AzureBlobDownload
        >>> az = AzureBlobDownload('account name', 'account key', 'container name')
        >>> az.generate_url("filename.txt")
        https://containername.blob.core.windows.net/blobname/filename.txt

        **Example with ``upload_to`` and ``sas``**

        >>> import os
        >>> from azblobexplorer import AzureBlobDownload
        >>> az = AzureBlobDownload('account name', 'account key', 'container name')
        >>> az.generate_url("filename.txt", sas=True)
        https://containername.blob.core.windows.net/blobname/filename.txt?se=2019-11-05T16%3A33%3A46Z&sp=w&sv=2019-02-02&sr=b&sig=t%2BpUG2C2FQKp/Hb8SdCsmaZCZxbYXHUedwsquItGx%2BM%3D
        """

        blob = self.container_client.get_blob_client(blob_name)

        if sas:
            sas_token = generate_blob_sas(
                blob.account_name,
                blob.container_name,
                blob.blob_name,
                account_key=blob.credential.account_key,
                permission=BlobSasPermissions(read, add, create, write,
                                              delete),
                expiry=datetime.utcnow() + timedelta(hours=access_time))
            return blob.url + '?' + sas_token
        else:
            return blob.url
    def test_put_block_from_url_and_commit_with_cpk(self, resource_group, location, storage_account, storage_account_key):
        # Arrange
        # test chunking functionality by reducing the size of each chunk,
        # otherwise the tests would take too long to execute
        bsc = BlobServiceClient(
            self.account_url(storage_account.name, "blob"),
            credential=storage_account_key,
            connection_data_block_size=1024,
            max_single_put_size=1024,
            min_large_block_upload_threshold=1024,
            max_block_size=1024,
            max_page_size=1024)
        self._setup(bsc)
        # create source blob and get source blob url
        source_blob_name = self.get_resource_name("sourceblob")
        self.config.use_byte_buffer = True  # Make sure using chunk upload, then we can record the request
        source_blob_client, _ = self._create_block_blob(bsc, blob_name=source_blob_name, data=self.byte_data)
        source_blob_sas = generate_blob_sas(
            source_blob_client.account_name,
            source_blob_client.container_name,
            source_blob_client.blob_name,
            snapshot=source_blob_client.snapshot,
            account_key=source_blob_client.credential.account_key,
            permission=BlobSasPermissions(read=True),
            expiry=datetime.utcnow() + timedelta(hours=1)
        )
        source_blob_url = source_blob_client.url + "?" + source_blob_sas

        # create destination blob
        self.config.use_byte_buffer = False
        destination_blob_client, _ = self._create_block_blob(bsc, cpk=TEST_ENCRYPTION_KEY)

        # Act part 1: make put block from url calls
        destination_blob_client.stage_block_from_url(block_id=1, source_url=source_blob_url,
                                                     source_offset=0, source_length=4 * 1024,
                                                     cpk=TEST_ENCRYPTION_KEY)
        destination_blob_client.stage_block_from_url(block_id=2, source_url=source_blob_url,
                                                     source_offset=4 * 1024, source_length=4 * 1024,
                                                     cpk=TEST_ENCRYPTION_KEY)

        # Assert blocks
        committed, uncommitted = destination_blob_client.get_block_list('all')
        self.assertEqual(len(uncommitted), 2)
        self.assertEqual(len(committed), 0)

        # commit the blocks without cpk should fail
        block_list = [BlobBlock(block_id='1'), BlobBlock(block_id='2')]
        with self.assertRaises(HttpResponseError):
            destination_blob_client.commit_block_list(block_list)

        # Act commit the blocks with cpk should succeed
        put_block_list_resp = destination_blob_client.commit_block_list(block_list,
                                                                        cpk=TEST_ENCRYPTION_KEY)

        # Assert
        self.assertIsNotNone(put_block_list_resp['etag'])
        self.assertIsNotNone(put_block_list_resp['last_modified'])
        self.assertTrue(put_block_list_resp['request_server_encrypted'])
        self.assertEqual(put_block_list_resp['encryption_key_sha256'], TEST_ENCRYPTION_KEY.key_hash)

        # Act get the blob content
        blob = destination_blob_client.download_blob(cpk=TEST_ENCRYPTION_KEY)

        # Assert content was retrieved with the cpk
        self.assertEqual(blob.readall(), self.byte_data[0: 8 * 1024])
        self.assertEqual(blob.properties.etag, put_block_list_resp['etag'])
        self.assertEqual(blob.properties.last_modified, put_block_list_resp['last_modified'])
        self.assertEqual(blob.properties.encryption_key_sha256, TEST_ENCRYPTION_KEY.key_hash)
        self._teardown(bsc)
示例#25
0
    async def test_update_page_from_url(self, storage_account_name,
                                        storage_account_key):
        # Arrange
        # test chunking functionality by reducing the size of each chunk,
        # otherwise the tests would take too long to execute
        bsc = BlobServiceClient(
            self.account_url(storage_account_name, "blob"),
            storage_account_key,
            max_single_put_size=1024,
            min_large_block_upload_threshold=1024,
            max_block_size=1024,
            max_page_size=1024,
            transport=AiohttpTestTransport(connection_data_block_size=1024))
        await self._setup(bsc)
        source_blob_name = self.get_resource_name("sourceblob")
        self.config.use_byte_buffer = True  # Make sure using chunk upload, then we can record the request
        source_blob_client, _ = await self._create_block_blob(
            bsc, blob_name=source_blob_name, data=self.byte_data)
        source_blob_sas = generate_blob_sas(
            source_blob_client.account_name,
            source_blob_client.container_name,
            source_blob_client.blob_name,
            snapshot=source_blob_client.snapshot,
            account_key=source_blob_client.credential.account_key,
            permission=BlobSasPermissions(read=True),
            expiry=datetime.utcnow() + timedelta(hours=1))
        source_blob_url = source_blob_client.url + "?" + source_blob_sas

        self.config.use_byte_buffer = False
        blob_client = await self._create_page_blob(bsc,
                                                   cpk=TEST_ENCRYPTION_KEY)

        # Act
        page_blob_prop = await blob_client.upload_pages_from_url(
            source_blob_url,
            offset=0,
            length=len(self.byte_data),
            source_offset=0,
            cpk=TEST_ENCRYPTION_KEY)

        # Assert
        self.assertIsNotNone(page_blob_prop['etag'])
        self.assertIsNotNone(page_blob_prop['last_modified'])
        self.assertTrue(page_blob_prop['request_server_encrypted'])
        # TODO: FIX SWAGGER
        # self.assertEqual(page_blob_prop['encryption_key_sha256'], TEST_ENCRYPTION_KEY.key_hash)

        # Act get the blob content without cpk should fail
        with self.assertRaises(HttpResponseError):
            await blob_client.download_blob()

        # Act get the blob content
        blob = await blob_client.download_blob(
            offset=0,
            length=len(self.byte_data),
            cpk=TEST_ENCRYPTION_KEY,
        )

        # Assert content was retrieved with the cpk
        self.assertEqual(await blob.readall(), self.byte_data)
        self.assertEqual(blob.properties.encryption_key_sha256,
                         TEST_ENCRYPTION_KEY.key_hash)
示例#26
0
def aggregate_results(job_id, model_version, job_name,
                      job_submission_timestamp):
    log.info(f'server_job, aggregate_results starting, job_id: {job_id}')

    task_outputs_dir = f'api_{api_config.API_INSTANCE_NAME}/job_{job_id}/task_outputs/'

    container_url = sas_blob_utils.build_azure_storage_uri(
        account=api_config.STORAGE_ACCOUNT_NAME,
        container=api_config.STORAGE_CONTAINER_API)

    all_results = []

    with ContainerClient.from_container_url(
            container_url,
            credential=api_config.STORAGE_ACCOUNT_KEY) as container_client:
        generator = container_client.list_blobs(
            name_starts_with=task_outputs_dir)

        blobs = [i for i in generator if i.name.endswith('.json')]

        for blob_props in tqdm(blobs):
            with container_client.get_blob_client(blob_props) as blob_client:
                stream = io.BytesIO()
                blob_client.download_blob().readinto(stream)
                stream.seek(0)
                task_results = json.load(stream)
                all_results.extend(task_results)

        api_output = {
            'info': {
                'detector': f'megadetector_v{model_version}',
                'detection_completion_time': get_utc_time(),
                'format_version': api_config.OUTPUT_FORMAT_VERSION
            },
            'detection_categories': api_config.DETECTOR_LABEL_MAP,
            'images': all_results
        }

        # upload the output JSON to the Job folder
        api_output_as_bytes = bytes(json.dumps(api_output,
                                               ensure_ascii=False,
                                               indent=1),
                                    encoding='utf-8')
        output_file_path = f'api_{api_config.API_INSTANCE_NAME}/job_{job_id}/{job_id}_detections_{job_name}_{job_submission_timestamp}.json'
        _ = container_client.upload_blob(name=output_file_path,
                                         data=api_output_as_bytes)

    output_sas = generate_blob_sas(
        account_name=api_config.STORAGE_ACCOUNT_NAME,
        container_name=api_config.STORAGE_CONTAINER_API,
        blob_name=output_file_path,
        account_key=api_config.STORAGE_ACCOUNT_KEY,
        permission=BlobSasPermissions(read=True, write=False),
        expiry=datetime.utcnow() +
        timedelta(days=api_config.OUTPUT_SAS_EXPIRATION_DAYS))
    output_sas_url = sas_blob_utils.build_azure_storage_uri(
        account=api_config.STORAGE_ACCOUNT_NAME,
        container=api_config.STORAGE_CONTAINER_API,
        blob=output_file_path,
        sas_token=output_sas)
    log.info(f'server_job, aggregate_results done, job_id: {job_id}')
    log.info(f'output_sas_url: {output_sas_url}')
    return output_sas_url
示例#27
0
def main(req: func.HttpRequest) -> func.HttpResponse:
    logging.info('Python HTTP trigger function processed a request.')
    try:

        category = req.params.get('category')
        if not category:
            try:
                req_body = req.get_json()
            except ValueError:
                pass
            else:
                category = req_body.get('category')

        if category:
            logging.info("category is something")
            connect_str: str = 'DefaultEndpointsProtocol=https;AccountName=djbvideoappsto;AccountKey=Q2w9wi3v0JbTMUIV0kMc0K0kRHtWhTciQ4S7ZgdYSHhic59ZMQk/BlQPIFYQ/fft8uPQYymym97GgYxY4dbvOg==;EndpointSuffix=core.windows.net'

            # Create the BlobServiceClient object which will be used to create a container client
            blob_service_client = BlobServiceClient.from_connection_string(
                connect_str)

            # Create a unique name for the container
            container_name = "djbtest"

            container = blob_service_client.get_container_client(
                container_name)

            logging.info("Before List Blobs")
            blobs = container.list_blobs()

            ret = []
            for blob in blobs:
                logging.info("Generate Token")
                logging.info(blob.name)
                sas_token = generate_blob_sas(
                    container.account_name,
                    container.container_name,
                    blob.name,
                    account_key=blob_service_client.credential.account_key,
                    permission=BlobSasPermissions(read=True),
                    expiry=datetime.utcnow() + timedelta(hours=1))
                logging.info("Before create video object")
                video = {
                    "Name": blob.name,
                    "Account": container.account_name,
                    "Container": container.container_name,
                    "SasToken": sas_token
                }
                logging.info("Before append to list")
                ret.append(video)

            logging.info("Converting to JSON")
            json_dump = json.dumps(ret)

            logging.info("About to return")
            print(json_dump)
            return func.HttpResponse(json_dump)

        else:
            return func.HttpResponse(
                "https://www.google.com/logos/doodles/2020/israel-kamakawiwooles-61st-birthday-6753651837108391.2-s.png",
                status_code=200)

    except Exception as ex:
        logging.exception('Exception:')
        logging.info(ex)
        logging.error(ex)
示例#28
0
def retrieve_transcript(identifier, language, speaker_type, service_config):
    blob_service_client = BlobServiceClient.from_connection_string(
        service_config['connection_string'])
    container_client = blob_service_client.get_container_client(identifier)
    blob_client = container_client.get_blob_client('audio.wav')
    sas_blob = generate_blob_sas(account_name=service_config['account_name'],
                                 container_name=identifier,
                                 blob_name='audio.wav',
                                 account_key=service_config['account_key'],
                                 permission=BlobSasPermissions(read=True),
                                 expiry=datetime.utcnow() +
                                 timedelta(hours=24))
    uri = blob_client.url + '?' + sas_blob
    logging.info("Starting transcription client...")

    # configure API key authorization: subscription_key
    configuration = cris_client.Configuration()
    configuration.api_key["Ocp-Apim-Subscription-Key"] = service_config[
        'subscription_key']
    configuration.host = f"https://{service_config['service_region']}.api.cognitive.microsoft.com/speechtotext/v3.0"

    # create the client object and authenticate
    client = cris_client.ApiClient(configuration)

    # create an instance of the transcription api class
    api = cris_client.DefaultApi(api_client=client)
    try:
        # Specify transcription properties by passing a dict to the properties parameter. See
        # https://docs.microsoft.com/azure/cognitive-services/speech-service/batch-transcription#configuration-properties
        # for supported parameters.
        properties = {
            "punctuationMode": "Automatic",
            "profanityFilterMode": "None",
            "wordLevelTimestampsEnabled": True,
            "diarizationEnabled": (speaker_type == "both"),
            "timeToLive": "PT1H"
        }

        # Use base models for transcription.
        transcription_definition = cris_client.Transcription(
            display_name="Simple transcription",
            description="Simple transcription description",
            locale=language,
            content_urls=[uri],
            properties=properties)

        created_transcription, status, headers = api.create_transcription_with_http_info(
            transcription=transcription_definition)

        # get the transcription Id from the location URI
        transcription_id = headers["location"].split("/")[-1]

        # Log information about the created transcription. If you should ask for support, please
        # include this information.
        logging.info(
            f"Created new transcription with id '{transcription_id}' in region {service_config['service_region']}"
        )

        logging.info("Checking status.")

        transcript = {}
        completed = False
        while not completed:
            # wait for 5 seconds before refreshing the transcription status
            time.sleep(5)

            transcription = api.get_transcription(transcription_id)
            logging.info(f"Transcriptions status: {transcription.status}")

            if transcription.status in ("Failed", "Succeeded"):
                completed = True

            if transcription.status == "Succeeded":
                pag_files = api.get_transcription_files(transcription_id)
                for file_data in _paginate(api, pag_files):
                    if file_data.kind != "Transcription":
                        continue

                    results_url = file_data.links.content_url
                    results = requests.get(results_url)
                    transcript = json.loads(results.content)
            elif transcription.status == "Failed":
                raise Exception(
                    f"Transcription failed: {transcription.properties.error.message}"
                )
    finally:
        delete_all_transcriptions(api)
    return transcript