示例#1
0
def save_text_file(data, containerName, blobName, accountName, accountKey):
    '''
    save a textfile to azure block blob storage.
        Parameters
        ----------
        data: str
            (text)data to upload
        
        containerName: str
            container in storage account

        blobName: str
            name of blob in container

        accountName: str
            name of storage account

        accountKey
            access key for storage account
    
        Returns
        -------
    '''    
    # Create BlockBlockService
    block_blob_service = BlockBlobService(
        account_name=accountName, account_key=accountKey
        ) 

    block_blob_service.create_blob_from_text(containerName, blobName, data)    
示例#2
0
def save_to(result, season, table, season_type, save_destination):
    if save_destination == "local":
        result.to_csv('/wc/data/' + season + '_' + table + '.csv',
                      index=False,
                      sep=';')
        #result.to_csv('/home/martin/temp/nbapc_crawled_data/' + season + '_' + table + '_' + season_type + '.csv', index=False, sep=';')
        print('CSV was successfully saved as:' + season + '_' + table + '_' +
              season_type + '.csv')
    elif save_destination == "s3":
        bucket = os.environ['s3bucket']
        csv_buffer = StringIO()
        result.to_csv(csv_buffer, index=False, sep=';')
        s3 = boto3.resource('s3')
        s3.Object(bucket, 'crawled_data/' + season + '_' + table +
                  '.csv').put(Body=csv_buffer.getvalue())
        print('CSV was successfully uploaded to s3: crawled_data/' + season +
              '_' + table + '.csv')
    elif save_destination == "wasb":
        wasbaccountname = os.environ['wasbaccountname']
        containername = os.environ['containername']
        wasbaccountkey = os.environ['wasbaccountkey']
        csv_buffer = StringIO()
        result.to_csv(csv_buffer, index=False, sep=';')
        block_blob_service = BlockBlobService(account_name=wasbaccountname,
                                              account_key=wasbaccountkey)
        block_blob_service.create_blob_from_text(containername,
                                                 season + '_' + table + '.csv',
                                                 csv_buffer.getvalue())
        print(
            'CSV was successfully uploaded to Azure Storage Account container: '
            + containername + '/' + season + '_' + table + '.csv')
示例#3
0
def publishPowerBI(blockBlobService: BlockBlobService, x: int,
                   imgByteArr: Image, folder: str, eventVideo: list, text: str,
                   caption: str):

    blobOutputTrainNumber = 'trainnumber/' + eventVideo[
        "filename"] + "_" + text + "_" + str(x) + ".png"
    blockBlobService.create_blob_from_text(
        os.environ['remoteStorageOutputContainer'], blobOutputTrainNumber,
        imgByteArr)
    # create SAS url such that video can be read from blob storage
    sasTokenRead = blockBlobService.generate_blob_shared_access_signature(
        os.environ['remoteStorageOutputContainer'], blobOutputTrainNumber,
        BlobPermissions.READ,
        datetime.utcnow() + timedelta(hours=1))
    sasPictureTrainNumber = os.environ['storUrl'] + os.environ[
        'remoteStorageOutputContainer'] + "/" + blobOutputTrainNumber + "?" + sasTokenRead

    #write data to Power BI as streaming data set
    streamingDataStr = '[{{"location": "{0}", "track": "{1}", "time": "{2}", "trainNumber": "{3}", "probGraffiti": "{4}", "caption": "{5}", "sasPictureTrainNumber":"{6}", "sasPictureGraffiti":"{7}"}}]'.format(
        eventVideo["location"], eventVideo["track"], eventVideo["timestamp"],
        text, 0.0, caption, sasPictureTrainNumber, "").encode("utf-8")

    try:
        req = urllib2.Request(os.environ['powerBIConnectionString'],
                              streamingDataStr)
        gcontext = ssl.SSLContext()
        response = urllib2.urlopen(req, context=gcontext)
        logging.info(
            "POST request to Power BI with data:{0}".format(streamingDataStr))
        logging.info("Response: HTTP {0} {1}\n".format(response.getcode(),
                                                       response.read()))
    except:
        logging.info(
            "Writing to Power BI dashboard failed, no breaking error, continue"
        )
def import_historic_csv_files(url, account_name, account_key, container_name):
    block_blob_service = BlockBlobService(account_name, account_key)
    blobs_already_imported = get_blob_names_already_imported(
        block_blob_service, container_name)

    counter = 0
    historic_csv_links = get_historic_csv_file_links_to_download(url)
    for csv_link in historic_csv_links:
        filename = os.path.basename(csv_link)
        if filename not in blobs_already_imported:
            counter += 1
            # import csv file - read file's content and upload it to Azure Blob Storage
            r = requests.get(csv_link)
            # change the encoding for the request content
            r.encoding = 'utf-8'
            block_blob_service.create_blob_from_text(container_name, filename,
                                                     r.text)
            logging.info(
                f"Created blob file '{filename}' in the container '{container_name}'"
            )

    if counter == 0:
        logging.info(
            f"Nothing new to import. All CSV files are already uploaded to Blob Storage."
        )
示例#5
0
def save_json(data, blob_path, filename, container_name, bbs=None):
    if not bbs:
        bbs = BlockBlobService(account_name=config["account_name"],
                               account_key=config["account_key"])
    blob_name = os.path.join(blob_path, filename)
    blob_name = remove_container_name_from_blob_path(blob_name, container_name)
    bbs.create_blob_from_text(container_name, blob_name, json.dumps(data))
def main(config_filename, output_model_name, sample_frac):
	''' Coordinate application of trained models to large static image set '''
	config = ConfigFile(config_filename, output_model_name)

	if config.model_source == 'batchaitraining':
		config = load_batchaitraining_model_components(config)
	elif config.model_source == 'mmlspark':
		config = load_mmlspark_model_components(config)
	else:
		raise Exception('Model source not recognized')

	df = load_data(config, sample_frac)

	if config.model_source == 'batchaitraining':
		predictions = df.withColumn('pred_label',
									config.label_udf('features')) \
			.select('filepath', 'pred_label')
	elif config.model_source == 'mmlspark':
		predictions = config.mmlspark_model.transform(df)
		predictions = config.tf.transform(predictions).select(
			'filepath', 'pred_label')

	output_str = predictions.toPandas().to_csv(index=False)
	blob_service = BlockBlobService(config.storage_account_name,
									config.storage_account_key)
	blob_service.create_blob_from_text(
			config.container_prediction_results,
			config.predictions_filename,
			output_str)

	return
    def test_create_blob_with_container_sas(self):
        # SAS URL is calculated from storage key, so this test runs live only
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        token = self.bs.generate_container_shared_access_signature(
            self.container_name,
            permission=ContainerPermissions.READ + ContainerPermissions.CREATE,
            expiry=datetime.utcnow() + timedelta(hours=1),
        )

        bs_with_sas = BlockBlobService(
            account_name=self.settings.STORAGE_ACCOUNT_NAME,
            sas_token=token,
            protocol=self.settings.PROTOCOL)

        # Act
        test_blob_name = "testblobname"
        test_blob_content = "test-blob-content"
        bs_with_sas.create_blob_from_text(self.container_name,
                                          blob_name=test_blob_name,
                                          text=test_blob_content)
        blob_created_successfully = bs_with_sas.exists(
            container_name=self.container_name, blob_name=test_blob_name)

        # Assert
        self.assertTrue(blob_created_successfully,
                        "blob:" + test_blob_name + "was created successfully")
示例#8
0
def write_full_transcript_to_blob(id, transcript):
    print("saving transcript as blob...")
    blob_name = id
    block_blob_service = BlockBlobService(account_name, account_key)
    block_blob_service.create_blob_from_text(corpus_container_name, blob_name,
                                             json.dumps(transcript))
    print("blob:  %s saved." % blob_name)
示例#9
0
def WriteBlob(blob_name, txt):
    """ 単一 BLOB ファイルを作成しテキストを保存する。 """
    try:
        #blob_name = r'sample.txt';

        hellopython = "BlockBlobService"
        blob_service = BlockBlobService(account_name, account_key)

        hellopython = "create_container"
        blob_service.create_container(log_container_name,
                                      public_access=PublicAccess.Blob)

        hellopython = "create_blob_from_bytes"
        #blob_service.create_blob_from_bytes(
        #    log_container_name,
        #    log_blob_name,
        #    b'<center><h1>Hello World!</h1></center>',
        #    content_settings=ContentSettings('text/html')
        #)

        hellopython = "create_blob_from_text"
        blob_service.create_blob_from_text(log_container_name, blob_name, txt)

        hellopython = "make_blob_url"
        print(blob_service.make_blob_url(log_container_name, log_blob_name))
        hellopython = "Hello Python!"
    except:
        print(r"Exception.")
示例#10
0
def main(msg: func.QueueMessage) -> None:
    logging.info('Python queue trigger function processed a queue item: %s',
                 msg.get_body().decode('utf-8'))

    # get bearer token and authenticate to ADLSgen2 using Managed Identity of Azure Function
    credentials = MSIAuthentication(resource='https://storage.azure.com/')
    blob_service = BlockBlobService("testedlstorgen",
                                    token_credential=credentials)

    # get timestamp
    now = datetime.now()
    nowstr = datetime.strftime(datetime.now(), "%Y%m%dT%H%M%S%Z")
    key = round((now - datetime(2019, 1, 1, 0, 0, 0)).total_seconds())
    logging.info("key: " + str(key))

    # Add record to csv file. Notice that AppendBlob is not yet supported on ADLSgen2, see https://docs.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-known-issues
    records = blob_service.get_blob_to_text(
        "raw",
        "testprivcmddataflow/WideWorldImporters-Sales/address/SalesLTAddress.txt"
    ).content
    records += "\n" + str(
        key
    ) + ",8713 Yosemite Ct.,,Bothell,Washington,United States,98011,268af621-76d7-4c78-9441-144fd139821a,2006-07-01 00:00:00.0000000"
    blob_service.create_blob_from_text(
        "raw",
        "testprivcmddataflow/WideWorldImporters-Sales/address/SalesLTAddress.txt",
        records)

    # Create event such that ADFv2 is triggered
    blob_service = BlockBlobService("testedlstorgen",
                                    token_credential=credentials)
    blob_service.create_blob_from_text("adftrigger",
                                       "adftrigger" + nowstr + ".txt", "")
示例#11
0
def post_review():
    data = request.get_json()

    if 'text' in data:
        data["analysis"] = get_emo(data["text"])
        client = pymongo.MongoClient(os.getenv('CUSTOMCONNSTR_MONGOURL'))

        db = client['cloud-computing-homework-5-db']
        collection = db['reviews']
        
        collection.insert_one(data)
        

    elif 'audio' in data:
        block_blob_service = BlockBlobService(
            account_name='cch5blobstorage', account_key=os.getenv('BLOB_STORAGE_KEY'))
        container_name = 'review-audio-blobs'
        blob_id = uuid.uuid1()
        block_blob_service.create_blob_from_text(
            container_name, blob_id, data['audio'])
        # print(get_speech_to_text(blob_id))
        #print("push")
    else:
        return '', 400

    return '', 201
示例#12
0
    def store_contents(self,
                       content_type: ContentType,
                       container_name: str,
                       content_name: str,
                       content_data: str = None,
                       content_path: str = None):
        """Function that stores file content. Based on the ContentType the function
        can either save a text or can read a file path (content_path) and copies it

        :param content_type: Content type, can be file or text
        :type content_type: enum
        :param container_name: Container name
        :type container_name: str
        :param content_name: File name
        :type content_name: str
        :param content_data: File content as text
        :type content_data: str
        :param content_path: File path to be copied into the storage
        :type content_path: str
        
        :raises: :class:`CustomException`
        """

        try:
            self._logger.info('storing contents')

            if self._vdc_storage_account_key == None:
                self._vdc_storage_account_key = self.get_storage_account_key()

            block_blob_service = BlockBlobService(
                account_name=self._vdc_storage_account_name,
                account_key=self._vdc_storage_account_key)

            # Create container if does not exists
            if not block_blob_service.exists(container_name):
                self._logger.info(
                    'container {} does not exists, proceeding to create it'.
                    format(container_name))
                block_blob_service.create_container(container_name)

            self._logger.info('saving blob')

            if content_type == ContentType.TEXT:
                block_blob_service.create_blob_from_text(
                    container_name=container_name,
                    blob_name=content_name,
                    text=content_data)
            elif content_type == ContentType.FILE:
                if Path(content_path).exists():
                    block_blob_service.create_blob_from_path(
                        container_name=container_name,
                        blob_name=content_name,
                        file_path=content_path)
                else:
                    raise CustomException('File does not exist.')

        except Exception as ex:
            raise CustomException(
                'There was an unhandled exception: {}'.format(str(ex)))
示例#13
0
文件: csv_blob.py 项目: yzhu32/kedro
 def _save(self, data: pd.DataFrame) -> None:
     blob_service = BlockBlobService(**self._credentials)
     blob_service.create_blob_from_text(
         container_name=self._container_name,
         blob_name=self._filepath,
         text=data.to_csv(**self._save_args),
         **self._blob_from_text_args
     )
示例#14
0
def put_file_in_azure(azure_emulator_coords: azure_utils.StorageCoordinates,
                      azure_service: blob.BlockBlobService,
                      sample_stream_content: str) -> typing.Generator:

    filename = 'test_put_file.txt'
    azure_service.create_blob_from_text(azure_emulator_coords.container,
                                        filename, sample_stream_content)
    yield filename
    azure_service.delete_blob(azure_emulator_coords.container, filename)
示例#15
0
 def save_in_blob(self, df, filename):
     data = df.to_csv(index=False, encoding='utf-8')
     try:
         block_blob_service = BlockBlobService(
             account_name=self.config['blob_account_name'],
             account_key=self.config['blob_account_key'])
         block_blob_service.create_blob_from_text(
             self.config['blob_image_container'], filename, data)
     except Exception as e:
         print(e)
示例#16
0
    def write_blob_from_text(self, sas_uri, blob_name, text):
        sas_service = BlockBlobService(
            account_name=self.get_account_from_uri(sas_uri),
            sas_token=self.get_sas_key_from_uri(sas_uri))

        container_name = self.get_container_from_uri(sas_uri)

        sas_service.create_blob_from_text(container_name, blob_name, text, 'utf-8')

        return sas_service.make_blob_url(container_name, blob_name, sas_token=self.get_sas_key_from_uri(sas_uri))
示例#17
0
class AzureCloudStorage(CloudStorageInterface):

    def __init__(self, account, key, container):
        self.block_blob_service = BlockBlobService(account_name=account, account_key=key)
        self.container_name = container
        self.block_blob_service.create_container(self.container_name)

    def storeBlob(self, path, data, content_type):
        logging.info("Storing to Azure [%s:%s] data: [%s]", self.container_name, path, data)
        self.block_blob_service.create_blob_from_text(self.container_name, blob_name=path, text=data)
示例#18
0
def encrypt_and_store_aes_key(az_client, az_config, key_version, key):
    encrypted = az_client.encrypt(az_config['key_vault'],
                                  az_config['key_name'], key_version,
                                  'RSA-OAEP', key)
    block_blob_service = BlockBlobService(
        account_name=az_config['account_name'],
        account_key=az_config['account_key'])
    block_blob_service.create_blob_from_text(
        az_config['container_name'], 'aes_key-' + az_config['uuid'],
        base64.b64encode(encrypted.result))
示例#19
0
    def combine_azure(self):
        from azure.storage.blob import BlockBlobService, ContentSettings

        feed_uri = self.settings.get("FEED_URI")
        feed_prefix = self.settings.get("CITY_SCRAPERS_DIFF_FEED_PREFIX",
                                        "%Y/%m/%d")
        account_name, account_key = feed_uri[8::].split("@")[0].split(":")
        container = feed_uri.split("@")[1].split("/")[0]
        blob_service = BlockBlobService(account_name=account_name,
                                        account_key=account_key)

        max_days_previous = 3
        days_previous = 0
        prefix_blobs = []
        while days_previous <= max_days_previous:
            prefix_blobs = [
                blob for blob in blob_service.list_blobs(
                    container,
                    prefix=(
                        datetime.now() -
                        timedelta(days=days_previous)).strftime(feed_prefix),
                )
            ]
            if len(prefix_blobs) > 0:
                break
            days_previous += 1

        spider_blob_names = self.get_spider_paths(
            [blob.name for blob in prefix_blobs])
        meetings = []
        for blob_name in spider_blob_names:
            feed_text = blob_service.get_blob_to_text(container, blob_name)
            meetings.extend([
                json.loads(line) for line in feed_text.content.split("\n")
                if line
            ])
        meetings = sorted(meetings, key=itemgetter("start"))
        yesterday_iso = (datetime.now() - timedelta(days=1)).isoformat()[:19]
        upcoming = [
            meeting for meeting in meetings if meeting["start"] > yesterday_iso
        ]

        blob_service.create_blob_from_text(
            container,
            "latest.json",
            "\n".join([json.dumps(meeting) for meeting in meetings]),
            content_settings=ContentSettings(cache_control="no-cache"),
        )

        blob_service.create_blob_from_text(
            container,
            "upcoming.json",
            "\n".join([json.dumps(meeting) for meeting in upcoming]),
            content_settings=ContentSettings(cache_control="no-cache"),
        )
示例#20
0
    def create_blob_sas_defintion(self, storage_account_name, vault_url):
        """
        Creates a service SAS definition with access to a blob container.
        """

        from azure.storage.blob import BlockBlobService, ContainerPermissions
        from azure.keyvault.models import SasTokenType, SasDefinitionAttributes
        from azure.keyvault import SecretId

        # create the blob sas definition template
        # the sas template uri for service sas definitions contains the storage entity url with the template token
        # this sample demonstrates constructing the template uri for a blob container, but a similar approach can
        # be used for all other storage service, i.e. File, Queue, Table

        # create a template sas token for the container
        service = BlockBlobService(account_name=storage_account_name,
                                   # don't sign the template with the storage account key use key 00000000
                                   account_key='00000000')
        permissions = ContainerPermissions(read=True, write=True, delete=True, list=True)
        temp_token = service.generate_container_shared_access_signature(container_name='blobcontainer',
                                                                        permission=permissions,
                                                                        expiry='2020-01-01')

        # use the BlockBlobService to construct the template uri for the container sas definition
        blob_sas_template_uri = service.make_container_url(container_name='blobcontainer',
                                                           protocol='https',
                                                           sas_token=temp_token)
        # create the sas definition in the vault
        attributes = SasDefinitionAttributes(enabled=True)
        blob_sas_def = self.client.set_sas_definition(vault_base_url=vault_url,
                                                               storage_account_name=storage_account_name,
                                                               sas_definition_name='blobcontall',
                                                               template_uri=blob_sas_template_uri,
                                                               sas_type=SasTokenType.service,
                                                               validity_period='PT2H',
                                                               sas_definition_attributes=attributes)

        # use the sas definition to provision a sas token and use it to  create a BlockBlobClient
        # which can interact with blobs in the container

        # get the secret_id of the container sas definition and get the token from the vault as a secret
        sas_secret_id = SecretId(uri=blob_sas_def.secret_id)
        blob_sas_token = self.client.get_secret(vault_base_url=sas_secret_id.vault,
                                                         secret_name=sas_secret_id.name,
                                                         secret_version=sas_secret_id.version).value
        service = BlockBlobService(account_name=storage_account_name,
                                   sas_token=blob_sas_token)
        service.create_blob_from_text(container_name='blobcontainer',
                                      blob_name='blob2',
                                      text=u'test blob2 data')
        blobs = list(service.list_blobs(container_name='blobcontainer'))

        for blob in blobs:
            service.delete_blob(container_name='blobcontainer',
                                blob_name=blob.name)
示例#21
0
class Blob():

    def __init__(self, container_name='smartbin'):
        
        self.blob_service = BlockBlobService(account_name = 'iotinfstore', account_key = 'VxbwD/Cjvfi+ZObPIkqZ7AT8NKG3AqF6m0jYEUiwU12xtpiotyxIRNRyKu208P1+W+DdYKZd0SzFii2SrcsgWQ==') 

        self.container_name = container_name

    def write_data_to_blob(self,data):
        
        self.blob_service.create_blob_from_text(self.container_name, str(uuid.uuid4()), data)
class AzureBlobWriter:
    def __init__(self, file_path="index.txt"):
        # Create a new file on every instantiation
        epoch_time = str(time.time()).split('.')[0]
        self._file_path = "{0}-{1}".format(file_path, epoch_time)
        self._blob_service = BlockBlobService(
            account_name=app.config['AZURE_BLOB_ACCOUNT'],
            account_key=app.config['AZURE_BLOB_KEY'])
        self._container_name = "ttds-indexes"

    def read(self):
        content = None
        try:
            # Get the most recently written file with the specified name
            highest_blob = 0
            blob_to_load = ""
            blob_name = self._file_path.rsplit("-", 1)[0]
            blob_found = False

            container = self._blob_service.list_blobs(self._container_name)
            for blob in container:
                name_parts = blob.name.rsplit("-", 1)
                blob_prefix = name_parts[0]
                blob_order = int(name_parts[1])
                if blob_prefix == blob_name and blob_order > highest_blob:
                    highest_blob = blob_order
                    blob_to_load = blob.name
                    blob_found = True

            if blob_found:
                blob = self._blob_service.get_blob_to_text(
                    self._container_name, blob_to_load)
                content = blob.content
        except:
            print("Unexpected error at {}.read: {}".format(
                __name__, sys.exc_info()))
        else:
            print("Read content from {} by {} at {}".format(
                self._file_path, __name__,
                datetime.now().strftime(app.config['DATETIME_FORMAT'])))
        return content

    def write(self, content):
        try:
            self._blob_service.create_blob_from_text(self._container_name,
                                                     self._file_path, content)
        except:
            print("Unexpected error at {}.write: {}".format(
                __name__, sys.exc_info()))
        else:
            print(
                "Content of size {} has been written to {} by {} at {}".format(
                    sizeof_fmt(content), self._file_path, __name__,
                    datetime.now().strftime(app.config['DATETIME_FORMAT'])))
示例#23
0
def upload_timestamp_file(
        azure_emulator_coords: azure_utils.StorageCoordinates,
        azure_service: blob.BlockBlobService,
        sample_timestamp: str) -> typing.Generator:

    azure_service.create_blob_from_text(azure_emulator_coords.container,
                                        endpoint.TIMESTAMP_FILENAME,
                                        sample_timestamp)
    yield
    azure_service.delete_blob(azure_emulator_coords.container,
                              endpoint.TIMESTAMP_FILENAME)
def extract_transform(data):
    user=os.environ['STORAGE_USERNAME']
    password=os.environ['STORAGE_PASSWORD']
    container_name ='landingzone'
    
    block_blob_service = BlockBlobService(account_name=user, account_key=password)

    blobname =data["name"]+"_" + str(uuid.uuid4()) + ".csv"
    csv=data["name"]+","+str(data["count"])+","+data["meta"]
    block_blob_service.create_blob_from_text(container_name, blobname, csv)

    return "Uploaded blob "+blobname+" to landing zone\n"
示例#25
0
def write_blob_data(file_name, content):
    try:
        block_blob_service = BlockBlobService(account_name=accountName,
                                              account_key=accountKey)
        block_blob_service.create_blob_from_text(postCallContainerName,
                                                 file_name, content)
        tmp = tempfile.NamedTemporaryFile()
        block_blob_service.get_blob_to_stream(postCallContainerName, file_name,
                                              tmp)
        return 201
    except:
        return 400
def main():

    # Get credential
    parser = configparser.ConfigParser()
    parser.read('config.ini')
    STORAGE_ACCOUNT_NAME = parser.get('credential', 'STORAGE_ACCOUNT_NAME')
    STORAGE_ACCOUNT_KEY = parser.get('credential', 'STORAGE_ACCOUNT_KEY')
    CONTAINER_NAME_METADATA = parser.get('credential',
                                         'CONTAINER_NAME_METADATA')

    # access to blob storage
    block_blob_service = BlockBlobService(account_name=STORAGE_ACCOUNT_NAME,
                                          account_key=STORAGE_ACCOUNT_KEY)

    # load structured data from blob
    blob_text = block_blob_service.get_blob_to_text(CONTAINER_NAME_METADATA,
                                                    'image-url.tsv')
    #print(blob_text.content)
    df = pd.DataFrame.from_csv(StringIO(blob_text.content),
                               index_col=None,
                               sep='\t')
    print(df.shape)

    # Frequency
    df_crosstab = pd.crosstab(index=df["Category"], columns="count")
    print(df_crosstab)

    print(df.shape)
    df_sub = pd.concat([
        df.loc[df['Category'] == 'daily snack'],
        df.loc[df['Category'] == 'groceries'],
        df.loc[df['Category'] == 'dining out'],
        df.loc[df['Category'] == 'clothes and accessories'],
        df.loc[df['Category'] == 'fuel'],
        df.loc[df['Category'] == 'entertinement']
    ])
    print(df_sub.shape)
    print(df_sub.Category.unique())

    # write cleaned dataframe to blob
    print("-----------write cleaned dataframe to blob------------")
    df_sub_str = df_sub.to_csv(sep='\t', index=False)

    #dfblobname = 'dataframe_6_classes.tsv'
    dfblobname = 'image-url-6-classes.tsv'
    settings = ContentSettings(content_type='text/tab-separated-values')
    block_blob_service.create_blob_from_text(CONTAINER_NAME_METADATA,
                                             dfblobname,
                                             df_sub_str,
                                             content_settings=settings)

    return
示例#27
0
def write_blob(file_name, text):
    log("Writing blob: {}".format(file_name))
    try:
        block_blob_service = BlockBlobService(account_name=account_name,
                                              account_key=account_key)
        block_blob_service.create_container(container_name)
        # Set the permission so the blobs are public.
        block_blob_service.set_container_acl(
            container_name, public_access=PublicAccess.Container)
        block_blob_service.create_blob_from_text(container_name, file_name,
                                                 text)
    except Exception as e:
        log("Error while writing blob..{}".format(e))
def main():

    # Get credential
    parser = configparser.ConfigParser()
    parser.read('config.ini')
    STORAGE_ACCOUNT_NAME = parser.get('credential', 'STORAGE_ACCOUNT_NAME')
    STORAGE_ACCOUNT_KEY = parser.get('credential', 'STORAGE_ACCOUNT_KEY')
    CONTAINER_NAME = parser.get('credential', 'CONTAINER_NAME')
    CONTAINER_NAME_METADATA = parser.get('credential',
                                         'CONTAINER_NAME_METADATA')

    # access to blob storage
    block_blob_service = BlockBlobService(account_name=STORAGE_ACCOUNT_NAME,
                                          account_key=STORAGE_ACCOUNT_KEY)
    block_blob_service.set_container_acl(CONTAINER_NAME,
                                         public_access=PublicAccess.Container)
    generator = block_blob_service.list_blobs(CONTAINER_NAME)

    # empty dataframe
    df = pd.DataFrame({'Url': [], 'Category': []})

    # get label from file
    blob_text = block_blob_service.get_blob_to_text(
        CONTAINER_NAME_METADATA, 'receipt_list_labelled.csv')
    #print(blob_text.content)
    df_label = pd.DataFrame.from_csv(StringIO(blob_text.content),
                                     index_col=None,
                                     sep=',')
    #print(df_label.shape); print(df_label)

    # index
    index = 0
    for blob in generator:
        print(blob.name)
        # populate dataframe
        df.loc[index, 'Category'] = df_label.loc[index, 'category']
        df.loc[
            index,
            'Url'] = imageurl = "https://" + STORAGE_ACCOUNT_NAME + ".blob.core.windows.net/" + CONTAINER_NAME + "/" + blob.name
        print(imageurl)
        index = index + 1

    # write dataframe to blob
    print("-----------------------")
    df_str = df.to_csv(sep='\t', index=False)

    dfblobname = 'image-url.tsv'
    block_blob_service.create_blob_from_text(CONTAINER_NAME_METADATA,
                                             dfblobname, df_str)

    return
示例#29
0
def process(data):
    user=os.environ['STORAGE_USERNAME']
    password=os.environ['STORAGE_PASSWORD']
    container_landing ='landingzone'
    container_staging = 'staging'

    block_blob_service = BlockBlobService(account_name=user, account_key=password)

    blobname = data["data"]["url"].split("/")[-1]
    content = block_blob_service.get_blob_to_text(container_landing, blobname).content
    

    block_blob_service.create_blob_from_text(container_staging,blobname,preprocessing(content))
    block_blob_service.delete_blob(container_landing,blobname,delete_snapshots='include')
示例#30
0
class azureObjectStorage:
    def __init__(self, account_name, account_key):
        self.blobService = BlockBlobService(account_name=account_name,
                                            account_key=account_key)

    def put(self, container, key, value, type='text/plain'):
        if isinstance(value, str):
            self.blobService.create_blob_from_text(
                container, key, value, content_settings=ContentSettings(type))
        elif type(value) == file:
            self.blobService.create_blob_from_steam(
                container, key, value, content_settings=ContentSettings(type))
        else:
            raise TypeError
示例#31
0
    def update_status_svg(self, spider, svg):
        from azure.storage.blob import BlockBlobService, ContentSettings

        blob_service = BlockBlobService(
            account_name=self.crawler.settings.get("AZURE_ACCOUNT_NAME"),
            account_key=self.crawler.settings.get("AZURE_ACCOUNT_KEY"),
        )
        blob_service.create_blob_from_text(
            self.crawler.settings.get("CITY_SCRAPERS_STATUS_CONTAINER"),
            "{}.svg".format(spider.name),
            svg,
            content_settings=ContentSettings(content_type="image/svg+xml",
                                             cache_control="no-cache"),
        )
示例#32
0
def nightly(build: str, account: str, container: str, sas: str, **_) -> None:
    client = BlockBlobService(account_name=account, sas_token=sas)

    modules_list = []
    for wheel_file in glob.iglob(os.path.join(build, 'build/*.whl')):
        package_name = os.path.basename(wheel_file).split('-', maxsplit=1)[0].replace('_', '-')
        sdist_file = next(glob.iglob(os.path.join(build, 'source', f'{package_name}*.tar.gz')))

        content_type, content_encoding = mimetypes.guess_type(os.path.basename(wheel_file))
        content_settings = ContentSettings(content_type, content_encoding)
        client.create_blob_from_path(container_name=container,
                                     blob_name=f'{package_name}/{os.path.basename(wheel_file)}',
                                     file_path=wheel_file,
                                     content_settings=content_settings)

        content_type, content_encoding = mimetypes.guess_type(os.path.basename(sdist_file))
        content_settings = ContentSettings(content_type, content_encoding)
        client.create_blob_from_path(container_name=container,
                                     blob_name=f'{package_name}/{os.path.basename(sdist_file)}',
                                     file_path=sdist_file,
                                     content_settings=content_settings)

        package_blobs = (os.path.basename(b.name) for b in client.list_blobs(container, prefix=package_name + '/') 
                                                  if b.name != f"{package_name}/")

        client.create_blob_from_text(container_name=container, 
                                     blob_name=f'{package_name}/',
                                     text=generate_package_list_in_html(f'Links for {package_name}', package_blobs),
                                     content_settings=ContentSettings('text/html'))
        
        modules_list.append(f"{package_name}/")
    
    client.create_blob_from_text(container_name=container, 
                                 blob_name='index.html',
                                 text=generate_package_list_in_html('Simple Index', modules_list),
                                 content_settings=ContentSettings('text/html'))