svc_pr = ServicePrincipalAuthentication( tenant_id=tenant_id, service_principal_id=service_principal_id, service_principal_password=service_principal_password, ) ws = Workspace(ws.subscription_id, ws.resource_group, ws.name, auth=svc_pr) print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep="\n") def_blob_store = ws.get_default_datastore() print("Blobstore's name: {}".format(def_blob_store.name)) # create a list of datasets stored in blob print("Checking for new datasets") blob_service = BlockBlobService(def_blob_store.account_name, def_blob_store.account_key) generator = blob_service.list_blobs(def_blob_store.container_name, prefix="prednet/data/raw_data") datasets = [] for blob in generator: dataset = blob.name.split("/")[3] if (dataset not in datasets and dataset.startswith("UCSD") and not dataset.endswith("txt")): datasets.append(dataset) print("Found dataset:", dataset) # Get all published pipeline objects in the workspace all_pub_pipelines = PublishedPipeline.list(ws) # Create a list of datasets for which we have (old) and don't have (new) a # published pipeline
def main(myblob: func.InputStream): logging.info(f"Python blob trigger function processed blob \n" f"Name: {myblob.name}\n" f"Blob Size: {myblob.length} bytes") file = myblob.read() logging.info(type(file)) csvf = io.BytesIO(file) logging.info(type(csvf)) #logging.info (csvf.getvalue()) csvf = csvf.getvalue().decode('UTF-8') #logging.info (type(csvf)) #logging.info (csvf) sniffer = csv.Sniffer() cnt = sniffer.sniff(csvf) logging.info(cnt.delimiter) df = pd.read_csv(io.BytesIO(file), sep=cnt.delimiter, dtype=str) if df.get("ttl") is not None: df["ttl"] = pd.to_numeric(df["ttl"]) logging.info(df) results = [] results = json.loads(df.to_json(orient='records')) logging.info(len(results)) out = [] client = cosmos_client.CosmosClient( url_connection=config['ENDPOINT'], auth={'masterKey': config['PRIMARYKEY']}) # Upload the created file, use local_file_name for the blob name #block_blob_service = BlockBlobService(account_name='accountname', account_key='accountkey') #block_blob_service.create_blob_from_path(container_name, local_file_name, full_path_to_file) for item in results: logging.info("Import") item['id'] = item['CONTRACT_ID'] item = json.dumps(item).replace('null', '""') item = json.loads(item) logging.info(json.dumps(item, indent=2)) try: logging.info("Try to create the data....") client.CreateItem(config['DBLink'], item) logging.info("Item was created in Cosmos") item['Status'] = 'Create' out.append(item) except errors.HTTPFailure as e: #logging.info(e.status_code) #logging.info(e._http_error_message) if e.status_code == 409: logging.info("We need to update this id") query = { 'query': 'SELECT * FROM c where c.id="%s"' % item['id'] } options = {} docs = client.QueryItems(config['DBLink'], query, options) doc = list(docs)[0] # Get the document link from attribute `_self` doc_link = doc['_self'] client.ReplaceItem(doc_link, item) item['Status'] = 'Update' out.append(item) else: item['Status'] = 'Error' out.append(item) #logging.info (out) out = json.dumps(out) df = pd.read_json(out) df.to_csv('results.csv') # Upload the created file, use local_file_name for the blob name block_blob_service = BlockBlobService( account_name=os.environ['account_name'], account_key=os.environ['account_key']) block_blob_service.create_blob_from_path('transferin', 'results.csv', 'results.csv')
from flask import Blueprint, Response, jsonify, request, session, flash, redirect, url_for, Flask, render_template, current_app as app from azure.storage.blob import ContentSettings from azure.storage.blob import BlockBlobService import httplib, urllib, base64 from azure.storage.blob import PublicAccess import time import operator import os import cognitive_face as CF import requests import random global cname cname = "a" + str(random.randrange(1, 10000)) block_blob_service = BlockBlobService( account_name="spstorageone", account_key= "khLKjd9wd2xX+aUcvsDV70n1c8/r3rBRuxxZDarqlHK4JDUDpqax/tGpY0VJJxroplz8H+dXNV0iOJ0b5u4iAQ==" ) block_blob_service.create_container(cname, public_access=PublicAccess.Container) i = 1 j = 1 while i == 1: try: tt = "a" + str(j) j = j + 1 _url = block_blob_service.create_blob_from_path( cname, tt, "image.jpg", content_settings=ContentSettings(content_type="image/jpg"))
if (response.status_code == 201): resjson = response.json() saslocator_id = str(resjson['d']['Id']) saslocator_baseuri = str(resjson['d']['BaseUri']) sto_asset_name = os.path.basename(os.path.normpath(saslocator_baseuri)) saslocator_cac = str(resjson['d']['ContentAccessComponent']) print_phase_message("POST Status.............................: " + str(response.status_code)) print_phase_message("SAS URL Locator StartTime...............: " + str(resjson['d']['StartTime'])) print_phase_message("SAS URL Locator Id......................: " + saslocator_id) print_phase_message("SAS URL Locator Base URI................: " + saslocator_baseuri) print_phase_message("SAS URL Locator Content Access Component: " + saslocator_cac) else: print_phase_message("POST Status: " + str(response.status_code) + " - SAS URL Locator Creation ERROR." + str(response.content)) ### Use the Azure Blob Blob Servic library from the Azure Storage SDK. block_blob_service = BlockBlobService(account_name=sto_account_name, sas_token=saslocator_cac[1:]) ### Define a callback method to show progress of large uploads def uploadCallback(current, total): if (current != None): print_phase_message('{0:2,f}/{1:2,.0f} MB'.format(current,total/1024/1024)) ### Start upload the video file print_phase_header("Uploading the Video File") with open(VIDEO_PATH, mode='rb') as file: video_content = file.read() video_content_length = len(video_content) response = block_blob_service.create_blob_from_path( sto_asset_name, VIDEO_NAME,
# In[125]: plt.imshow(orig) plt.show() # In[126]: from azure.storage.blob import BlockBlobService try: account_name = '****' account_key = '****' container_name = 'handwrittenblobs' #blob_name='test' local_path = os.getcwd() + "/Result/" #local_file_name = file_name print(local_path + file_name) orig_rgb = cv2.cvtColor(orig, cv2.COLOR_BGR2RGB) cv2.imwrite(local_path + file_name, orig_rgb) full_path_to_file = os.path.join(local_path, file_name) service = block_blob_service = BlockBlobService(account_name=account_name, account_key=account_key) blob = service.create_blob_from_path(container_name, file_name, full_path_to_file) except Exception as e: print(e)
self.subscription_id = encode(conf['subscription_id']) self.secret_key = encode(conf['secret_key']) self.resource_group = encode(conf['resource_group']) self.storage_account_name = encode(conf['storage_account']['name']) self.storage_account_key = encode(conf['storage_account']['key']) except KeyError as err: raise AttributeError( 'Please provide a value for "{0}" configuration key'.format( err.args[0])) # load the configuration data cfg = Configuration('configuration.json') # azure block service object blob_service = BlockBlobService(cfg.storage_account_name, cfg.storage_account_key) # container name azure_blob_container_name = cfg.container_name # training data container name azure_blob_training_data_container_name = cfg.training_data_container_name # create the container that will host the data blobs blob_service.create_container(azure_blob_container_name, fail_on_exist=False) # the function that load the data from the training blob, partition the data # and upload it to the container blobs def partition_and_upload_dataset_to_blob(blob_service, azure_blob_container_name):
def main(msg: func.QueueMessage) -> None: logging.info('Python queue trigger function processed a queue item: %s', msg.get_body().decode('utf-8')) queue_msg = json.dumps({ 'id': msg.id, 'body': msg.get_body().decode('utf-8'), 'expiration_time': (msg.expiration_time.isoformat() if msg.expiration_time else None), 'insertion_time': (msg.insertion_time.isoformat() if msg.insertion_time else None), 'time_next_visible': (msg.time_next_visible.isoformat() if msg.time_next_visible else None), 'pop_receipt': msg.pop_receipt, 'dequeue_count': msg.dequeue_count }) logging.debug(queue_msg) try: msg_json = json.loads(msg.get_body().decode('utf-8')) img_url = msg_json['imageUrl'] user_name = msg_json["userName"] original_filename = msg_json['fileName'] filetype = msg_json['fileExtension'] original_file_directory = msg_json['directoryComponents'] # Only 1 object in this list for now due to single message processing. image_object_list = [] with Image.open(urlopen(img_url)) as img: width, height = img.size image = ImageInfo(original_filename, img_url, height, width) # Append image object to the list image_object_list.append(image) data_access = ImageTagDataAccess(get_postgres_provider()) user_id = data_access.create_user(user_name) logging.debug( "Add new images to the database, and retrieve a dictionary ImageId's mapped to ImageUrl's" ) image_id_url_map = data_access.add_new_images(image_object_list, user_id) copy_destination = os.getenv('DESTINATION_CONTAINER_NAME') # Create blob service for storage account blob_service = BlockBlobService( account_name=os.getenv('STORAGE_ACCOUNT_NAME'), account_key=os.getenv('STORAGE_ACCOUNT_KEY')) # Copy images to permanent storage and get a dictionary of images for which to update URLs in DB. # and a list of failures. If the list of failures contains any items, return a status code other than 200. image_id = list(image_id_url_map.values())[0] new_blob_name = (str(image_id) + filetype) response = urlopen(img_url) image_bytes = response.read() # Per Azure notes https://docs.microsoft.com/en-us/azure/storage/blobs/storage-properties-metadata: # The name of your metadata must conform to the naming conventions for C# identifiers. Dashes do not work. # Azure blob is also setting the keys to full lowercase. blob_metadata = { "userFilePath": original_file_directory, "originalFilename": original_filename, "uploadUser": user_name } blob_create_response = blob_service.create_blob_from_bytes( copy_destination, new_blob_name, image_bytes, metadata=blob_metadata) update_urls_dictionary = { image_id: blob_service.make_blob_url(copy_destination, new_blob_name) } # Otherwise, dictionary contains permanent image URLs for each image ID that was successfully copied. if not blob_create_response: logging.error( "ERROR: Image copy/delete operation failed. Check state of images in storage." ) else: logging.debug("Now updating permanent URLs in the DB...") data_access.update_image_urls(update_urls_dictionary, user_id) # content = json.dumps({"imageUrls": list(update_urls_dictionary.values())}) logging.debug("success onboarding.") except Exception as e: logging.error("Exception: " + str(e)) raise e # TODO: Handle errors and exceptions on the poison queue
def main(mytimer: func.TimerRequest) -> None: utc_timestamp = datetime.utcnow().replace( tzinfo=timezone.utc).isoformat() if mytimer.past_due: logging.info('The timer is past due!') logging.info('Python timer trigger function ran at %s', utc_timestamp) # Storage account credentials account_name = "XXXXXXXXXXX" account_key = "XXXXXXXXXXX" blob_service = BlockBlobService(account_name=account_name, account_key=account_key) members = pd.read_csv(StringIO(blob_service.get_blob_to_text(container_name='oajustice', blob_name='members').content)) #.drop(['Unnamed: 0'], axis=1) # API credentials keys = {"MCAPIKeyPublic": "XXXXXXXXXXX", "MCAPIKeySecret": "XXXXXXXXXXX"} r = requests.post( 'http://apibeta.membercentral.com/v1/authenticate', json=keys) token = r.json() token = token["data"]["token"] token = {'Authorization': 'Bearer ' + token} members_url = 'http://apibeta.membercentral.com/v1/member' params = {"count": 10000} req = requests.get(members_url, json=params, headers=token) ''' Query member API, check if entries have been added or updated since last event trigger. If so, make list of member IDs added or updated, make revursive calls to API to get updates and additions, update member data, write out CSV binary, and commit to Azure Blob storage. ''' # Get linst index of all member URIs last_update = pd.to_datetime(extract_values(req.json(), 'datelastupdated'), infer_datetime_format=True) now = datetime.now() # CHack for updates if any(last_update > now.replace(tzinfo=timezone.utc)-timedelta(days=1)): updates = pd.DataFrame(extract_values(req.json(), 'membernumber'), columns=['member_number']) updates['last_update'] = pd.to_datetime(extract_values(req.json(), 'datelastupdated'), infer_datetime_format=True) # Creats a data frame of member IDs and update timestamps for members with changes updates = updates.loc[updates['last_update'] > now.replace(tzinfo=timezone.utc)-timedelta(days=1)] # Make API calls to get updates and additions member_updates = [] base_url = 'http://apibeta.membercentral.com/v1/member/' for uri in updates['member_number']: member_uri = base_url + str(uri) response = requests.get(member_uri, headers=token) if response.status_code == 200: member_updates.append(response.json()["data"]["member"]) else: print(response.text) print(response.status_code) # Update data and comit to Blod storage member_updates = json_normalize(member_updates) member_updates.set_index('membernumber', inplace=True) members.set_index(members['membernumber'], inplace=True) members.update(member_updates) output = StringIO() output = members.to_csv(encoding="utf-8", index=False) blob_service.create_blob_from_text('oajustice', 'members', 'Members.csv', output)
# Imports from azure.storage.blob import (BlockBlobService) import os # Read environment variables instanceId = str(os.environ['instanceId']) accountName = str(os.environ['accountName']) accountKey = str(os.environ['accountKey']) containerName = str(os.environ['containerName']) file_name = str(os.environ['file_name']) print("Provided Input File Name is - " + str(file_name)) # Initialize blob blobService = BlockBlobService(account_name=accountName, account_key=accountKey) # Get the list with the matching file names content = blobService.list_blobs(containerName) blob_list = [] for blob in content: name = blob.name if name == file_name: blob_list.append(blob.name) print("Following filenames will be get deleted after successfull execution.") print(blob_list) # Delete one-by-one file by looping over a list for file_to_delete in blob_list: blobService.delete_blob(containerName, file_to_delete, snapshot=None) print("File " + str(file_to_delete) + " deleted successfully.")
def readfile(): blob_service = BlockBlobService(account_name=blob_account_name, account_key=blob_account_key) blob_service.get_blob_to_path(mycontainer, myblobname, mydatafile) mydata = pd.read_csv(mydatafile, header=0, sep=";",dtype=dtype_dic) return mydata
from azure.storage.blob import ContentSettings import mysql.connector from mysql.connector import errorcode import os app = Flask(__name__) config = { 'host':'myserver-mysql-ashu.mysql.database.azure.com', 'user':'******', 'password':'******', 'database':'mysqlashudb', 'ssl_ca':'BaltimoreCyberTrustRoot.crt.pem' } block_blob_service = BlockBlobService(account_name='ashuazurestorage', account_key='HGvsHgPPFOp64gztvR6B9g+RNUUqzwhl+aNid8wpwca1uwejBMEhyVkP3oev1SKEnI5eeq4EIXWfcvzWjxAjuQ==') block_blob_service.set_container_acl('ashu-blob-container', public_access=PublicAccess.Container) @app.route('/') def index(): return redirect(url_for('login')) @app.route('/login', methods=['POST', 'GET']) def login(): if request.method == 'POST': username = request.form['username'] print(username) session['logged_in'] = True session['username'] = username return redirect(url_for('dashboard')) return render_template('login.html')
def execute_bes( api_key, # type: str base_url, # type: str blob_storage_account, # type: str blob_storage_apikey, # type: str blob_container, # type: str blob_path_prefix=None, # type: str blob_charset=None, # type: str inputs=None, # type: Dict[str, pd.DataFrame] params=None, output_names=None, # type: List[str] nb_seconds_between_status_queries=5, # type: int requests_session=None # type: requests.Session ): """ Executes an AzureMl web service in batch mode (BES: Batch Execution Service). Its inputs are the same than `execute_rr` but in addition it takes information about the blob storage service to use. Indeed in batch mode, all inputs and outputs go through an intermediate blob storage. The AzureML job status is queried every 5 seconds by default, you may wish to change that number with `nb_seconds_between_status_queries`. :param api_key: the api key for the service to call :param base_url: the URL of the service to call :param blob_storage_account: the storage account to use to store the inputs and outputs :param blob_storage_apikey: the storage api key to use to store the inputs and outputs :param blob_container: the container in the blob storage, that will be used to store the inputs and outputs :param blob_path_prefix: an optional prefix that will be used to store the blobs :param blob_charset: optional encoding of files used on the blob storage :param inputs: an optional dictionary containing the inputs, by name. Inputs should be DataFrames. :param params: an optional dictionary containing the parameters by name, or a DataFrame containing the parameters. :param output_names: an optional list of expected output names. Note that contrary to rr mode, no outputs will be provided if this is empty. :param nb_seconds_between_status_queries: nb of seconds that the engine waits between job status queries. By default this is set to 5. :param requests_session: an optional requests.Session object, for example created from create_session_for_proxy() :return: a dictionary of outputs, by name. Outputs are DataFrames """ # 0 create the blob service client and the generic batch mode client batch_client = BatchClient(requests_session=requests_session) # if we're here without error that means that `azure-storage` is available from azure.storage.blob import BlockBlobService from azmlclient.base_databinding_blobs import blob_refs_to_dfs blob_service = BlockBlobService(account_name=blob_storage_account, account_key=blob_storage_apikey, request_session=requests_session) # 1- Push inputs to blob storage and create output references print('Pushing inputs to blob storage') input_refs, output_refs = batch_client.push_inputs_to_blob__and__create_output_references( inputs, output_names=output_names, blob_service=blob_service, blob_container=blob_container, blob_path_prefix=blob_path_prefix, charset=blob_charset) # 2- Create the query body request_body = batch_client.create_request_body(input_refs, params, output_refs) # 3- Perform the call json_job_id = None try: # -- a) create the job print('Creating job') json_job_id = batch_client.execute_batch_createJob( base_url, api_key, request_body) # -- b) start the job print('Starting job ' + str(json_job_id)) batch_client.execute_batch_startJob(base_url, api_key, json_job_id) print('Job ' + str(json_job_id) + ' started') # -- polling loop outputs_refs2 = None while outputs_refs2 is None: # -- c) poll job status print('Polling job status for job ' + str(json_job_id)) statusOrResult = batch_client.execute_batch_getJobStatusOrResult( base_url, api_key, json_job_id) # -- e) check the job status and read response into a dictionary outputs_refs2 = batch_client.read_status_or_result(statusOrResult) # wait print('Waiting ' + str(nb_seconds_between_status_queries) + 's until next call.') time.sleep(nb_seconds_between_status_queries) finally: # -- e) delete the job if not (json_job_id is None): print('Deleting job ' + str(json_job_id)) batch_client.execute_batch_deleteJob(base_url, api_key, json_job_id) # 4- Retrieve the outputs print('Job ' + str(json_job_id) + ' completed, results: ') print(json.dumps(outputs_refs2, indent=4)) print('Retrieving the outputs from the blob storage') # dont use the output of the job status (outputs_refs2), it does not contain the connectionString result_dfs = blob_refs_to_dfs(output_refs, requests_session=requests_session) return result_dfs
from azure.storage import AccessPolicy from azure.storage.blob import BlockBlobService, ContentSettings, ContainerPermissions from datetime import datetime, timedelta # The name of the new Shared Access policy policy_name = 'readandlistonly' # The Storage Account Name storage_account_name = 'mystore' storage_account_key = 'mykey' storage_container_name = 'mycontainer' example_file_path = '..\\sampledata\\sample.log' policy_name = 'mysaspolicy' # Create the blob service, using the name and key for your Azure Storage account blob_service = BlockBlobService(storage_account_name, storage_account_key) # Create the container, if it does not already exist blob_service.create_container(storage_container_name) # Upload an example file to the container blob_service.create_blob_from_path( storage_container_name, 'sample.log', example_file_path, ) # Create a new policy that expires after a week access_policy = AccessPolicy(permission=ContainerPermissions.READ + ContainerPermissions.LIST, expiry=datetime.utcnow() + timedelta(weeks=1))
raise ValueError("local direcotry, storage resource group, storage account, and container must be specified as arguments") # Authenticate to Azure resource manager automation_runas_connection = automationassets.get_automation_connection("AzureRunAsConnection") azure_credential = get_automation_runas_credential(automation_runas_connection) subscription_id = str(automation_runas_connection["SubscriptionId"]) # Get storage key storage_client = azure.mgmt.storage.StorageManagementClient( azure_credential, subscription_id) storage_keys = storage_client.storage_accounts.list_keys(storage_resource_group, storage_account_name) storage_account_key = storage_keys.keys[0].value # Authenticate to the storage account blobservice = BlockBlobService(account_name=storage_account_name, account_key=storage_account_key) # If local directory does not exist, create it if not os.path.exists(local_file_path): os.makedirs(local_file_path) # If blob is specified, just download the blob, else download everything in the container if blob_name is not None: blob = blobservice.get_blob_properties(storage_account_container_name, blob_name) else: blobs = blobservice.list_blobs(storage_account_container_name) # Dowload all blobs from the container and create local file system to match for blob in blobs: download_blob(blob, local_file_path)
def test_job_level_mounting(self, resource_group, location, cluster, storage_account, storage_account_key): """Tests if it's possible to mount external file systems for a job.""" job_name = 'job' # Create file share and container to mount on the job level if storage_account.name != helpers.FAKE_STORAGE.name: files = FileService(storage_account.name, storage_account_key) files.create_share('jobshare', fail_on_exist=False) blobs = BlockBlobService(storage_account.name, storage_account_key) blobs.create_container('jobcontainer', fail_on_exist=False) job = self.client.jobs.create( resource_group.name, helpers.DEFAULT_WORKSPACE_NAME, helpers.DEFAULT_EXPERIMENT_NAME, job_name, parameters=models.JobCreateParameters( cluster=models.ResourceId(id=cluster.id), node_count=1, mount_volumes=models. MountVolumes(azure_file_shares=[ models.AzureFileShareReference( account_name=storage_account.name, azure_file_url='https://{0}.file.core.windows.net/{1}'. format(storage_account.name, 'jobshare'), relative_mount_path='job_afs', credentials=models.AzureStorageCredentialsInfo( account_key=storage_account_key), ) ], azure_blob_file_systems=[ models.AzureBlobFileSystemReference( account_name=storage_account.name, container_name='jobcontainer', relative_mount_path='job_bfs', credentials=models. AzureStorageCredentialsInfo( account_key=storage_account_key), ) ]), # Put standard output on cluster level AFS to check that the job has access to it. std_out_err_path_prefix='$AZ_BATCHAI_MOUNT_ROOT/{0}'.format( helpers.AZURE_FILES_MOUNTING_PATH), # Create two output directories on job level AFS and blobfuse. output_directories=[ models.OutputDirectory( id='OUTPUT1', path_prefix='$AZ_BATCHAI_JOB_MOUNT_ROOT/job_afs'), models.OutputDirectory( id='OUTPUT2', path_prefix='$AZ_BATCHAI_JOB_MOUNT_ROOT/job_bfs') ], # Check that the job preparation has access to job level file systems. job_preparation=models.JobPreparation( command_line= 'echo afs > $AZ_BATCHAI_OUTPUT_OUTPUT1/prep_afs.txt; ' 'echo bfs > $AZ_BATCHAI_OUTPUT_OUTPUT2/prep_bfs.txt; ' 'echo done'), # Check that the job has access to job custom_toolkit_settings=models.CustomToolkitSettings( command_line= 'echo afs > $AZ_BATCHAI_OUTPUT_OUTPUT1/job_afs.txt; ' 'echo bfs > $AZ_BATCHAI_OUTPUT_OUTPUT2/job_bfs.txt; ' 'mkdir $AZ_BATCHAI_OUTPUT_OUTPUT1/afs; ' 'echo afs > $AZ_BATCHAI_OUTPUT_OUTPUT1/afs/job_afs.txt; ' 'mkdir $AZ_BATCHAI_OUTPUT_OUTPUT2/bfs; ' 'echo bfs > $AZ_BATCHAI_OUTPUT_OUTPUT2/bfs/job_bfs.txt; ' 'echo done'))).result() self.assertEqual( helpers.wait_for_job_completion(self.is_live, self.client, resource_group.name, job.name, helpers.MINUTE), models.ExecutionState.succeeded) job = self.client.jobs.get(resource_group.name, helpers.DEFAULT_WORKSPACE_NAME, helpers.DEFAULT_EXPERIMENT_NAME, job.name) # Assert job and job prep standard output is populated on cluster level filesystem helpers.assert_job_files_are( self, self.client, resource_group.name, job.name, helpers.STANDARD_OUTPUT_DIRECTORY_ID, { u'stdout.txt': u'done\n', u'stderr.txt': u'', u'stdout-job_prep.txt': u'done\n', u'stderr-job_prep.txt': u'' }) # Assert files are generated on job level AFS helpers.assert_job_files_are(self, self.client, resource_group.name, job.name, 'OUTPUT1', { u'job_afs.txt': u'afs\n', u'prep_afs.txt': u'afs\n', u'afs': None }) # Assert files are generated on job level blobfuse helpers.assert_job_files_are(self, self.client, resource_group.name, job.name, 'OUTPUT2', { u'job_bfs.txt': u'bfs\n', u'prep_bfs.txt': u'bfs\n', u'bfs': None }) # Assert subfolders are available via API helpers.assert_job_files_in_path_are(self, self.client, resource_group.name, job.name, 'OUTPUT1', 'afs', {u'job_afs.txt': u'afs\n'}) helpers.assert_job_files_in_path_are(self, self.client, resource_group.name, job.name, 'OUTPUT2', 'bfs', {u'job_bfs.txt': u'bfs\n'}) # Assert that we can access the output files created on job level mount volumes directly in storage using path # segment returned by the server. if storage_account.name != helpers.FAKE_STORAGE.name: files = FileService(storage_account.name, storage_account_key) self.assertTrue( files.exists( 'jobshare', job.job_output_directory_path_segment + '/' + helpers.OUTPUT_DIRECTORIES_FOLDER_NAME, 'job_afs.txt')) blobs = BlockBlobService(storage_account.name, storage_account_key) self.assertTrue( blobs.exists( 'jobcontainer', job.job_output_directory_path_segment + '/' + helpers.OUTPUT_DIRECTORIES_FOLDER_NAME + '/job_bfs.txt')) # After the job is done the filesystems should be unmounted automatically, check this by submitting a new job. checker = self.client.jobs.create( resource_group.name, helpers.DEFAULT_WORKSPACE_NAME, helpers.DEFAULT_EXPERIMENT_NAME, 'checker', parameters=models.JobCreateParameters( location=location, cluster=models.ResourceId(id=cluster.id), node_count=1, std_out_err_path_prefix='$AZ_BATCHAI_MOUNT_ROOT/{0}'.format( helpers.AZURE_FILES_MOUNTING_PATH), custom_toolkit_settings=models.CustomToolkitSettings( command_line='echo job; df | grep -E "job_bfs|job_afs"')) ).result() # Check the job failed because there are not job level mount volumes anymore self.assertEqual( helpers.wait_for_job_completion(self.is_live, self.client, resource_group.name, checker.name, helpers.MINUTE), models.ExecutionState.failed) # Check that the cluster level AFS was still mounted helpers.assert_job_files_are(self, self.client, resource_group.name, checker.name, helpers.STANDARD_OUTPUT_DIRECTORY_ID, { u'stdout.txt': u'job\n', u'stderr.txt': u'' })
def watchModels(self, productIds): # Run coinitialize for the new thread to be able to log pythoncom.CoInitialize() if ("STORAGE_ACCOUNT_NAME" in app.config and app.config["STORAGE_ACCOUNT_NAME"]) and ( "STORAGE_ACCOUNT_KEY" in app.config and app.config["STORAGE_ACCOUNT_KEY"]): self.blob_service = BlockBlobService( account_name=app.config["STORAGE_ACCOUNT_NAME"], account_key=app.config["STORAGE_ACCOUNT_KEY"]) else: self.loggerInstance.logHandledException( "modelRefreshTask", Exception( "Failed to read storage account name and key values from configurations" )) raise Exception( 'Failed to read storage account name and key values from configurations' ) for productId in productIds: self.firstTime[productId] = True while True: for productId in productIds: self.loggerInstance.logInsights( "modelRefreshTask: Running model watcher for {0}".format( productId)) isChanged = False try: now = datetime.datetime.now(pytz.utc) if self.blob_service: allblobsList = [ blob for blob in list( self.blob_service.list_blobs( app. config["STORAGE_ACCOUNT_CONTAINER_NAME"])) if blob.name.startswith("{0}/models".format( productId)) ] if not len(allblobsList) > 0: self.firstTime[productId] = False continue folders = list( set([ int(blob.name.split("/")[2]) for blob in allblobsList ])) latestFolder = str(max(folders)) downloadList = [ blob for blob in allblobsList if blob.name.startswith("{0}/models/{1}".format( productId, latestFolder)) ] for blob in downloadList: if self.firstTime[productId] or ( now - blob.properties.last_modified ).seconds / 60 < 5: blobname = blob.name dirpath = os.path.join( os.getcwd(), app.config["TRAINED_MODELS_PATH"], productId) try: os.makedirs(dirpath) except: pass self.blob_service.get_blob_to_path( app. config["STORAGE_ACCOUNT_CONTAINER_NAME"], blobname, os.path.join(dirpath, blobname.split("/")[-1])) isChanged = True if self.firstTime[productId]: self.firstTime[productId] = False except Exception as e: pass if isChanged: try: self.loggerInstance.logInsights( "modelRefreshTask: Models are changed for {0}. Triggering model refresh." .format(productId)) refreshModel(productId) except Exception as e: self.loggerInstance.logHandledException( "modelRefreshTask", "Failed to refresh model: {0}".format(str(e))) time.sleep(5 * 60)
def classify(container_name, num_topics): nltk.download('wordnet') # List Blobs in the container block_blob_service = BlockBlobService( account_name=GUTENBERG_BLOB_ACCOUNT_NAME, account_key=GUTENBERG_BLOB_ACCOUNT_KEY) logging.info("Listing blobs in the container...") generator = block_blob_service.list_blobs(container_name) data = [] doc_map = {} doc_id = 1 # First level data cleaning for blob in generator: logging.info("Blob name: " + blob.name) readblob = block_blob_service.get_blob_to_bytes( container_name, # name of the container blob.name) if blob.name != "README": doc_map[doc_id] = blob.name blob_content = str(readblob.content) raw = blob_content.replace('\n', '').replace('\r', '').replace('\r\n', '') cleaned_raw = raw.replace('\\r\\n', '') data.append(cleaned_raw) doc_id += 1 else: pass # Tokenizing and Lemmatizing token_data = [] for doc in data: tokens = clean_text(doc) token_data.append(tokens) pickled_token_data = pickle.dumps(token_data) pickled_docmap = pickle.dumps(doc_map) # Store token data and document map for gensim block_blob_service.create_blob_from_bytes(container_models, "token_data", pickled_token_data) block_blob_service.create_blob_from_bytes(container_models, "docmap", pickled_docmap) dictionary = corpora.Dictionary(token_data) corpus = [dictionary.doc2bow(text) for text in token_data] # Train LDA Model ldamodel = gensim.models.ldamodel.LdaModel(corpus, num_topics=num_topics, id2word=dictionary, passes=10) pickled_ldamodel = pickle.dumps(ldamodel) block_blob_service.create_blob_from_bytes(container_models, 'ldamodel', pickled_ldamodel) # Construct LDA Blob URL lda_model_url = "https://" + GUTENBERG_BLOB_ACCOUNT_NAME + ".blob.core.windows.net/" + container_models + "/" + "ldamodel" token_data_url = "https://" + GUTENBERG_BLOB_ACCOUNT_NAME + ".blob.core.windows.net/" + container_models + "/" + "token_data" response = {} response["lda_model_url"] = lda_model_url response["token_data_url"] = token_data_url return response
from datetime import datetime from FlaskWebProject import app, db, login from werkzeug.security import generate_password_hash, check_password_hash from flask_login import UserMixin from azure.storage.blob import BlockBlobService import string, random from werkzeug import secure_filename from flask import flash blob_container = app.config['BLOB_CONTAINER'] blob_service = BlockBlobService(account_name=app.config['BLOB_ACCOUNT'], account_key=app.config['BLOB_STORAGE_KEY']) def id_generator(size=32, chars=string.ascii_uppercase + string.digits): return ''.join(random.choice(chars) for _ in range(size)) class User(UserMixin, db.Model): __tablename__ = 'users' id = db.Column(db.Integer, primary_key=True) username = db.Column(db.String(64), index=True, unique=True) password_hash = db.Column(db.String(128)) def __repr__(self): return '<User {}>'.format(self.username) def set_password(self, password): self.password_hash = generate_password_hash(password) def check_password(self, password): return check_password_hash(self.password_hash, password) @login.user_loader
def get_conn(self): """Return the BlockBlobService object.""" conn = self.get_connection(self.conn_id) service_options = conn.extra_dejson return BlockBlobService(account_name=conn.login, account_key=conn.password, **service_options)
def get_blob_service(): storage_account_name = os.environ['STORAGE_ACCOUNT_NAME'] storage_account_key = os.environ['STORAGE_ACCOUNT_KEY'] return BlockBlobService(account_name=storage_account_name, account_key=storage_account_key)
def do_recognition(): ################## #### READ DATA ### ################## day_now = 0 day_before = 1 account_name = 'watchstorage' account_key = 'TJWcjsCs4aK9Xorw4DIAZGvKz0AFb2kvgSh49t+3nADR2usZ1ED14GLBQ/klJsSSrKykxu0ghCXn46+0bv2J8Q==' container_name_ = 'jnj' blob_service = BlockBlobService(account_name=account_name, account_key=account_key) blobs = [] blob_date = [] generator = blob_service.list_blobs(container_name_) for blob in generator: blobs.append(blob.name) blob_date.append(blob.name[:10]) blob_table = pd.DataFrame() blob_table['date'] = blob_date blob_table['blobname'] = blobs today = date.today().strftime('%Y-%m-%d') yesterday = (date.today() - timedelta(1)).strftime('%Y-%m-%d') blob_table = blob_table[(blob_table['date'] == yesterday) | (blob_table['date'] == today)] if blob_table.shape[0] > 0: blob_df = pd.DataFrame() for blobname in blob_table['blobname']: blob_Class = blob_service.get_blob_to_text( container_name=container_name_, blob_name=blobname) blob_String = blob_Class.content for chunk in pd.read_csv(StringIO(blob_String), chunksize=10000): blob_df = blob_df.append(chunk) print("READ DATA FRAMES SIZE :", blob_df.shape[0]) ################# ################# feature_list = [ 'aoa', 'ate', 'apf', 'rms', 'std', 'minimax', 'cor', 'mean', 'min', 'max' ] preserved_features = ['start'] for watch_id in blob_df['id'].unique()[::-1]: print("Watch ", watch_id, " is being processed") df_temp = io.read_g9(blob_df[blob_df['id'] == watch_id], sort=False) df_temp = df_temp.drop_duplicates(keep='last')[::2].sort_index() print("READ DATA FRAMES SIZE AFTER CLEANING :", df_temp.shape[0]) # Time to do analysis is specified start = yesterday + 'T16:00:00.0000Z' start_temp = np.datetime64(start) t = pd.Timestamp(start_temp) end = today + 'T16:00:00.0000Z' end_temp = np.datetime64(end) end_time = pd.Timestamp(end_temp) # Initialize whole_window_size = timedelta(minutes=5) window_size = timedelta(seconds=2) window_slide = timedelta(seconds=1) samples_count = [] a = 0 df_out = pd.DataFrame() t_start_list = [] t_end_list = [] outcome_list = [] while (t + whole_window_size < end_time): label_list = [] increment = 0 DF = pd.DataFrame() t_end5min = t + whole_window_size print("doing time:", t, ' - ', t_end5min) t_start_list.append(time_to_str(t)) t_end_list.append(time_to_str(t_end5min)) if df_temp.between_time(t.to_pydatetime().time(), t_end5min.to_pydatetime().time()\ ,include_start=True, include_end=False).shape[0] >= 10: while (t + window_slide < t_end5min): t_end = t + window_size snippet_df = df_temp.between_time( t.to_pydatetime().time(), t_end.to_pydatetime().time(), include_start=True, include_end=False) if snippet_df.shape[0] >= 20: increment += 1 ser = ff.extract_features(snippet_df, index=increment, feature_list=feature_list ,\ preserved_features=preserved_features) DF = DF.append(ser) t = t_end else: t = t_end5min if DF.shape[0] <= 11: outcome = 7.0 else: df_X = DF.set_index(pd.DatetimeIndex(DF['start'])).drop( 'start', axis=1) del DF df_X.fillna(df_X.mean().fillna(0), inplace=True) X_test = df_X.values y_pred = logreg.predict(X_test) u, c = np.unique(y_pred, return_counts=True) outcome = u[np.argmax(c)] outcome_list.append(label_dict[int(outcome)]) out_ser = pd.Series(outcome, name=(t - whole_window_size, t)) df_out = df_out.append(out_ser) plt.plot(list(range(df_out.shape[0])), df_out[0], "*") ## Send predictions plt.show() dict_list = [] for i in range(len(outcome_list)): payload_dict = { 'address': watch_id.split("-")[2], 'starttime': t_start_list[i], 'endtime': t_end_list[i], 'tasklocation': 'Activity', 'taskname': outcome_list[i], 'name': outcome_list[i], 'value': 1 } dict_list.append(payload_dict) payload = json.dumps(dict_list) url = "https://colife-dashboard.silverline.mobi/uploadActivityLabelForSmartWatch" headers = { 'content-type': "application/json", 'cache-control': "no-cache", 'postman-token': "87b2b04f-175f-4a9b-f2c8-bf31de2cae7d" } response = requests.request("POST", url, data=payload, headers=headers) print(response.text) return True
from django.db import models import requests from collections import namedtuple import json import urllib.parse from azure.storage.blob import BlockBlobService from azure.storage.blob import PublicAccess from azure.storage.blob import ContentSettings import os block_blob_service = BlockBlobService( account_name='hearmes', account_key= 'NZ/H80jO9ma8KsVS9pi0EFgZEj5FhYZYHaGqYg5/HKXP+EhWytB0JbUpWqktKqkfWnw89AwPp4//Q8YhZN6tqg==' ) # Create your models here. #TODO: Model to upload document & details: Name, Birthdate, Document class Migrant(models.Model): migrant_id = models.AutoField(primary_key=True) first_name = models.CharField(max_length=30) last_name = models.CharField(max_length=30) destination = models.CharField(max_length=30) age = models.CharField(max_length=3) job = models.CharField(max_length=30) message_text = models.CharField(max_length=600, null=True) message_img_path = models.CharField(max_length=255, null=True) tags = models.CharField(max_length=255, null=True) date = models.CharField(max_length=30, null=True) anonymity = models.CharField(max_length=30, null=True)
def gen_all_data(opts): tftdw = TFTDWriter(opts) tc = TaskIDCounter() bbs = BlockBlobService(account_name=auth.store_name(), account_key=auth.store_key()) task_pq = PriorityQueue() jobs = [] job_to_task = {} setattr( opts, 'gd_id', util.db_insert(table='gd_runs', git_commit=util.get_commit(), wait_n_secs=opts.wait_n_secs, n_jobs_at_once=opts.n_jobs_at_once, n_tfrs_per_file=opts.n_tfrs_per_file, max_n_nodes_train=opts.max_n_nodes_train, max_n_nodes_test=opts.max_n_nodes_test, find_max_tries=opts.find_max_tries, find_percent_to_keep=opts.find_percent_to_keep, query_limit=opts.limit, timeout_ms=opts.timeout_ms)) assert (not bbs.exists(util.gd_scratch_bcname(gd_id=opts.gd_id))) assert (not bbs.exists(util.gd_tfr_bcname(gd_id=opts.gd_id))) bbs.create_container(util.gd_scratch_bcname(gd_id=opts.gd_id)) bbs.create_container(util.gd_tfr_bcname(gd_id=opts.gd_id)) def launch_task(task): job = gen_data_for.remote(opts, task) jobs.append(job) job_to_task[job] = task def push_task(task, prio=None): if prio is None: prio = task.id.node_id task_pq.put_nowait((prio, task)) def reload_jobs(): while not task_pq.empty() and len(jobs) < opts.n_jobs_at_once: launch_task(task_pq.get_nowait()[1]) def push_problems(): util.log(author='push_problems', msg='starting') problem_infos = [] for is_train in [True, False]: conn = util._connect() try: with conn.cursor() as cursor: cursor.execute(mk_query(opts=opts, is_train=is_train)) problem_infos.extend([ (is_train, result) for result in list(cursor.fetchall_unbuffered()) ]) finally: conn.close() util.log(author='push_problems', msg='found %d problems' % len(problem_infos)) for is_train, info in problem_infos: with tempfile.TemporaryDirectory() as tmpdir: tmpfilename = os.path.join(tmpdir, "%s.dimacs" % str(uuid.uuid4())) bbs.get_blob_to_path(info['bcname'], info['bname'], tmpfilename) s = solver.Solver(solver.Context(), solver.Options()) s.from_file(tmpfilename) os.system('rm %s' % tmpfilename) task = Task(id=tc.fresh_id(info['problem_id'], is_train=is_train), bcnf=to_blob(opts, bbs, s.serialize())) assert (task.id.problem_id == info['problem_id']) push_task(task) util.log(author='push_problems', msg='pushed all problems') push_problems_thread = threading.Thread(target=push_problems, args=()) push_problems_thread.start() def get_ready_job(): while True: reload_jobs() if jobs: ready_jobs, _ = ray.wait(jobs, num_returns=1, timeout=opts.wait_n_secs) if ready_jobs: job = ready_jobs[0] jobs.remove(job) assert (job in job_to_task) task = job_to_task[job] del job_to_task[job] return job, task time.sleep(1) task_result_q = Queue() def process_task_result(): while True: task, task_result = task_result_q.get() delete_blob(opts, bbs, task.bcnf) for btfd in task_result.btfds: tfd = from_blob(opts, bbs, btfd, delete=True) assert (tfd.n_vars > 0) assert (tfd.n_clauses > 0) dp_id = util.db_insert( table='gd_dps', gd_id=opts.gd_id, problem_id=task.id.problem_id, node_id=task.id.node_id, node_depth=task.id.node_depth, is_train=task.id.is_train, n_vars=tfd.n_vars, n_clauses=tfd.n_clauses, n_cells=np.shape(tfd.CL_idxs)[0], percent_vars_in_core=float( np.mean(tfd.core_var_mask.astype(np.float32))), percent_clauses_in_core=float( np.mean(tfd.core_clause_mask.astype(np.float32)))) tftdw.write_tftd(tftd=tfd_to_tftd( dp_id=dp_id, is_train=task.id.is_train, tfd=tfd)) process_results_thread = threading.Thread(target=process_task_result, args=()) process_results_thread.start() try: while True: job, task = get_ready_job() try: task_result = ray.get(job) except Exception as e: tb = traceback.format_exc() util.log(kind='error', author='remote-worker', msg="TASK-ID: %s\n%s\n%s" % (str(task.id), str(e), tb)) push_task(task, prio=1000000) continue if task_result.new_bcnfs: child_ids = [ tc.next_child_id(task.id) for _ in task_result.new_bcnfs ] for child_id, child_bcnf in zip(child_ids, task_result.new_bcnfs): push_task(Task(id=child_id, bcnf=child_bcnf)) task_result_q.put((task, task_result)) except Exception as e: tb = traceback.format_exc() util.log(kind='error', author='master', msg="FAILING\n%s\n%s" % (str(e), tb)) print("Exception: ", e) print("Failing...") finally: print("Finally...") util.log(kind='info', author='master', msg="finalizing") tftdw.finalize() util.log(kind='info', author='master', msg="deleting scratch blob container") bbs.delete_container(util.gd_scratch_bcname(gd_id=opts.gd_id)) util.log(kind='info', author='master', msg="finished") print("All done!")
# uncomment this if you want small files to be excluded from upload #if size < minFileSize: #data['error'] = "File size too small. min={0} bytes".format(minFileSize) return data # get file info fileInfo = getFileInfo() # print(fileInfo) # print(args.container, blob_name, args.file) # create blob service service = BlockBlobService(account_name=args.account, sas_token=args.sas) # upload helper def upload_blob(path): blob_name = os.path.basename(path) try: service.create_blob_from_path(args.container, blob_name, path, if_unmodified_since=if_unmodified_since) except AzureHttpError as e: error = 'Upload failed: ' + e.error_code; fileInfo['error'] = error print(error); if not fileInfo['error']: upload_blob(filePath)
def __init__(self, bucket_name, access_key, secret_key): self.service = BlockBlobService(account_name=access_key, account_key=secret_key) self.bucket_name = bucket_name
] for cmd in backup_cmds: print(container.exec_run(cmd)) copy_from_container_cmds = [ f'docker cp ees-mssql:/tmp/content.bak {os.path.join(os.getcwd(), "backup-data", "mssql")}', f'docker cp ees-mssql:/tmp/statistics.bak {os.path.join(os.getcwd(), "backup-data", "mssql")}', ] import time for cmd in copy_from_container_cmds: print(subprocess.run(cmd.split())) time.sleep(1) # Backup cache blob container to backup-data/content-cache os.makedirs(os.path.join('backup-data', 'content-cache')) block_blob_service = BlockBlobService(is_emulated=True) generator = block_blob_service.list_blobs('cache') for blob in generator: if '/' in blob.name: head, tail = os.path.split(blob.name) os.makedirs(os.path.join(os.getcwd(), 'backup-data', 'content-cache', head), exist_ok=True) block_blob_service.get_blob_to_path( 'cache', blob.name, os.path.join(os.getcwd(), 'backup-data', 'content-cache', head, tail)) else: block_blob_service.get_blob_to_path( 'cache', blob.name, os.path.join(os.getcwd(), 'backup-data', 'content-cache',
#def eye_aspect(cords): from flask import Flask from twilio.rest import Client from picamera import PiCamera from datetime import datetime import time from azure.storage.blob import BlockBlobService from azure.storage.blob import ContentSettings camera = PiCamera() block_blob_service = BlockBlobService( account_name='irisdriving', account_key= 'xNhodNyQZdly5H/LcEVZxEUvS4e4yiXEDg+45Ybw114KxPswAz3vIHnfhfzkvwlLz2muqXl3DZI6cbXqptbb2Q==' ) def callme(): account_sid = "AC22bf4ab1edd875930cc2be19249fb20f" auth_token = "e0bf551c89039c6b299854ce2c07eb26" client = Client(account_sid, auth_token) #Make the call call = client.api.account.calls\ .create(to="+19785006516", # Any phone number from_="+16177185216", # Must be a valid Twilio number url="http://twimlets.com/holdmusic?Bucket=com.twilio.music.ambient") print(call.sid)
# ============================================================================= import os import re import pytz import datetime from azure.storage.blob import BlockBlobService from azure.storage.blob import ContentSettings import pydocumentdb.document_client as document_client import config from helper import merge_roi_label # ============================================================================= # Create containers and collections # ============================================================================= # Establish a link to blob block_blob_service = BlockBlobService(account_name=config.storage_account_name, account_key=config.storage_account_key) # Create a container for saving images if block_blob_service.create_container(config.blob_container_image): print("Container for images was created successfully.") else: print("Container for images exists already.") # Create a container for saving models if block_blob_service.create_container(config.blob_container_model): print("Container for models was created successfully.") else: print("Container for models exists already.")
storageAccount = os.environ['STORAGE_ACCOUNT'] objectName = 'db_{:%Y%m%d%H%M%S}'.format(datetime.datetime.now()) containerName = os.environ['CONTAINER_NAME'] dbUser = os.environ['DB_USER'] dbName = os.environ['DB_NAME'] dbPort = os.environ['DB_PORT'] dbHost = os.environ['DB_HOST'] # Get secrets dbPasswordFile = open('/backupCredentials/dbPassword.txt', 'r') dbPassword = dbPasswordFile.read() storageKeyFile = open('/backupCredentials/storageKey.txt', 'r') storageKey = storageKeyFile.read() # Backup DB print 'Backing up' dumpCommand = 'PGPASSWORD="******" nice -n 19 pg_dump -C -F c -h%s -U%s -p%s %s > %s' % ( dbPassword, dbHost, dbUser, dbPort, dbName, objectName) os.popen(dumpCommand) # Upload to Azure Blob print 'Uploading' azureStorage = BlockBlobService(account_name=storageAccount, account_key=storageKey) azureStorage.create_blob_from_path( containerName, objectName, objectName, content_settings=ContentSettings(content_type='application/octet-stream'))
def main(): # Get credential parser = configparser.ConfigParser() parser.read('config.ini') STORAGE_ACCOUNT_NAME = parser.get('credential', 'STORAGE_ACCOUNT_NAME_9') STORAGE_ACCOUNT_KEY = parser.get('credential', 'STORAGE_ACCOUNT_KEY_9') CONTAINER_NAME = parser.get('credential', 'CONTAINER_NAME_9') VISION_API_KEY = parser.get( 'credential', 'VISION_API_KEY_9') # need to use Agitare account CONTAINER_NAME_OCR = parser.get('credential', 'CONTAINER_NAME_OCR_9') CONTAINER_NAME_STRUCTUREDDATA = parser.get( 'credential', 'CONTAINER_NAME_STRUCTUREDDATA_9') # access to blob storage block_blob_service = BlockBlobService(account_name=STORAGE_ACCOUNT_NAME, account_key=STORAGE_ACCOUNT_KEY) block_blob_service.set_container_acl(CONTAINER_NAME, public_access=PublicAccess.Container) generator = block_blob_service.list_blobs(CONTAINER_NAME) # empty dataframe df = pd.DataFrame({'Text': [], 'Category': [], 'ReceiptID': []}) # get label from file blob_text = block_blob_service.get_blob_to_text(CONTAINER_NAME, 'receipts_list-utf8.csv') #print(blob_text.content) df_label = pd.DataFrame.from_csv(StringIO(blob_text.content), index_col=None, sep=',') #print(df_label.shape); print(df_label) # index index = 0 for blob in generator: if index <= 2: print(blob.name) imageurl = "https://" + STORAGE_ACCOUNT_NAME + ".blob.core.windows.net/" + CONTAINER_NAME + "/" + blob.name print(imageurl) # OCR parameters params = {'language': 'en', 'detectOrientation ': 'true'} headers = dict() headers['Ocp-Apim-Subscription-Key'] = VISION_API_KEY headers['Content-Type'] = 'application/json' image_url = { 'url': imageurl } image_file = None result = processRequest(image_url, image_file, headers, params) if result is not None: #print(result) result_str = json.dumps(result) #print(result_str) # write result into blob ocrblobname = blob.name[:-3] + 'json' block_blob_service.create_blob_from_text( CONTAINER_NAME_OCR, ocrblobname, result_str) # extract text text = extractText(result) #print (text) # populate dataframe df.loc[index, 'Text'] = text else: # populate dataframe df.loc[index, 'Text'] = None df.loc[index, 'Category'] = df_label.loc[index, 'category'] df.loc[index, 'ReceiptID'] = blob.name else: break index = index + 1 # write dataframe to blob print("-----------------------") df_str = df.to_csv(sep='\t', index=False) dfblobname = 'dataframe.tsv' block_blob_service.create_blob_from_text(CONTAINER_NAME_STRUCTUREDDATA, dfblobname, df_str) return