def sas_auth(self): # With account account = CloudStorageAccount(account_name='<account_name>', sas_token='<sas_token>') client = account.create_table_service() # Directly client = TableService(account_name='<account_name>', sas_token='<sas_token>')
def _get_table_client_from_storage_account(storage_account, session): primary_key = StorageUtilities.get_storage_primary_key(storage_account['resourceGroup'], storage_account['name'], session) return TableService( account_name=storage_account['name'], account_key=primary_key )
def key_auth(self): # With account account = CloudStorageAccount(account_name='<account_name>', account_key='<account_key>') client = account.create_table_service() # Directly client = TableService(account_name='<account_name>', account_key='<account_key>')
def __init__(self, locator, jobChunkSize=maxAzureTablePropertySize): super(AzureJobStore, self).__init__() accountName, namePrefix = locator.split(':', 1) if '--' in namePrefix: raise ValueError( "Invalid name prefix '%s'. Name prefixes may not contain %s." % (namePrefix, self.nameSeparator)) if not self.containerNameRe.match(namePrefix): raise ValueError( "Invalid name prefix '%s'. Name prefixes must contain only digits, " "hyphens or lower-case letters and must not start or end in a " "hyphen." % namePrefix) # Reserve 13 for separator and suffix if len(namePrefix) > self.maxContainerNameLen - self.maxNameLen - len( self.nameSeparator): raise ValueError(( "Invalid name prefix '%s'. Name prefixes may not be longer than 50 " "characters." % namePrefix)) if '--' in namePrefix: raise ValueError( "Invalid name prefix '%s'. Name prefixes may not contain " "%s." % (namePrefix, self.nameSeparator)) self.locator = locator self.jobChunkSize = jobChunkSize self.accountKey = _fetchAzureAccountKey(accountName) self.accountName = accountName # Table names have strict requirements in Azure self.namePrefix = self._sanitizeTableName(namePrefix) # These are the main API entry points. self.tableService = TableService(account_key=self.accountKey, account_name=accountName) self.blobService = BlockBlobService(account_key=self.accountKey, account_name=accountName) # Serialized jobs table self.jobItems = None # Job<->file mapping table self.jobFileIDs = None # Container for all shared and unshared files self.files = None # Stats and logging strings self.statsFiles = None # File IDs that contain stats and logging strings self.statsFileIDs = None
class AzureTableConnection: def __init__(self, tableName): self.tableName = tableName self.tableService = TableService( account_name=os.environ['STORAGEACCOUNTNAME'], account_key=os.environ['STORAGEACCOUNTKEY']) def commitBatch(self, batch): self.tableService.commit_batch(self.tableName, batch) def getData(self, partitionKey, rowKey): startRowKey = '{0}_0'.format(rowKey) endRowKey = '{0}_9999'.format(rowKey) filterExpression = "PartitionKey eq '{0}' and \ RowKey gt '{1}' and \ RowKey lt '{2}'" \ .format(partitionKey, startRowKey, endRowKey) return self.tableService.query_entities(self.tableName, filter=filterExpression)
def retries(self): # By default, retries are performed with an exponential backoff. # Any custom retry logic may be used by simply defining a retry function, # but several easy pre-written options are available with modifiable settings. client = TableService(account_name='<account_name>', account_key='<account_key>') # Use an exponential retry, but modify the backoff settings # Here, we increase the initial back off, increase the number of retry attempts # and decrease the base of the exponential backoff. client.retry = ExponentialRetry(initial_backoff=30, increment_power=2, max_attempts=5).retry # Use a default linear retry policy instead client.retry = LinearRetry().retry # Turn off retries client.retry = no_retry
class AzureStorage(): def __init__(self, container=None): self.AZURE_STORAGE_ACCOUNT = 'logodetectionstorage' self.AZURE_STORAGE_KEY = 'jPJyzct+8WD1lKU5M+ZwDflWUGRu+YBpH8n/3Z6qR7WD7uc3HV2U1rtiQKesLRq2tU3jtXIe26RklAYdKzoydA==' self.table_service = TableService(account_name=self.AZURE_STORAGE_ACCOUNT, account_key=self.AZURE_STORAGE_KEY) self.blob_service = BlockBlobService(account_name=self. AZURE_STORAGE_ACCOUNT, account_key=self.AZURE_STORAGE_KEY) self.container = "input" self.table_list = [] #everything in the table for this logo self.logo = "" def query(self, tableName, partitionKey, rowKey): task = self.table_service.get_entity(tableName, partitionKey, rowKey) return task def retrieve_table(self, tableName): #tasks = table_service.query_entities(tableName, filter="PartitionKey eq 'tasksSeattle'", select='description') try: tasks = self.table_service.query_entities(tableName) except: return None self.logo = tableName for task in tasks: self.table_list.append(task) self.table_list = sorted(self.table_list, key=lambda k: k['has_logo'], reverse=True) return self.table_list def download_blob(self, path, logoName): #download pic into logoName file path = "images/" + logoName self.blob_service.get_blob_to_path(self.container, path, "test.jpeg") def exists(self, name): try: self.blob_service.get_blob_properties(self.container, name) return True except: return False
def read_from_secondary(self): # If you are using RA-GRS accounts, you may want to enable reading from the # secondary endpoint. Note that your application will have to handle this # data potentially being out of date as the secondary may be behind the # primary. client = TableService(account_name='<account_name>', account_key='<account_key>') # The location mode is set to primary by default meaning that all requests # are sent to the primary endpoint. If you'd like to instead read from the # secondary endpoint by default, set location mode to secondary. Note that # writes will continue to go to primary as they are not allowed on secondary. client.location_mode = LocationMode.SECONDARY # You may also decide you want to retry to secondary. This is useful if # you'd like to automatically handle the primary being temporarily down. # Again, your application will have to handle data being potentially out # of date. Retry to secondary logic may be built into a custom retry policy, # but our retry policies have a flag to enable it. Here we use the same # exponential retry as by default, but allow it to retry to secondary if # the initial request to primary fails. client.location_mode = LocationMode.PRIMARY # Reset the location_mode to start with primary client.retry = ExponentialRetry(retry_to_secondary=True).retry
def callbacks(self): # Callbacks may be used read or modify the request and response. # The request_callback is called when the request is complete except for # adding the authentication and date headers. # The response_callback is called when the HTTP response is received before # any parsing is done. # Custom client request id client = TableService(account_name='<account_name>', account_key='<account_key>') def request_callback(request): request.headers['x-ms-client-request-id'] = '<my custom id>' client.request_callback = request_callback # View data from the response def response_callback(response): status = response.status headers = response.headers # Force an exists call to succeed by resetting the status client.response_callback = response_callback
def clean_storage_account(connection_string): pool = ThreadPool(16) no_retry = azure.storage.common.retry.no_retry try: blob_service = BlobServiceClient.from_connection_string( connection_string) blob_service.retry = no_retry pool.map( lambda container: delete_container(blob_service, container.name), blob_service.list_containers(timeout=3)) except azure.core.exceptions.ServiceRequestError: print("No blob service") try: file_service = ShareServiceClient.from_connection_string( connection_string) file_service.retry = no_retry pool.map(lambda share: delete_file_share(file_service, share.name), file_service.list_shares(timeout=3)) except azure.core.exceptions.ServiceRequestError: print("No file service") try: queue_service = QueueServiceClient.from_connection_string( connection_string) queue_service.retry = no_retry pool.map(lambda queue: delete_queue(queue_service, queue.name), queue_service.list_queues(timeout=3)) except azure.core.exceptions.ServiceRequestError: print("No queue service") try: table_service = TableService(connection_string=connection_string) table_service.retry = no_retry pool.map(lambda table: delete_table(table_service, table.name), table_service.list_tables(timeout=3)) except azure.common.AzureException: print("No table service")
def proxy(self): # Unauthenticated client = TableService(account_name='<account_name>', account_key='<account_key>') client.set_proxy('127.0.0.1', '8888') # Authenticated client = TableService(account_name='<account_name>', account_key='<account_key>') proxy_user = '******' proxy_password = '******' client.set_proxy('127.0.0.1', '8888', user=proxy_user, password=proxy_password)
def _unmerge_resource(table_client: azuretable.TableService, entity: dict) -> None: """Remove node from entity :param azuretable.TableService table_client: table client """ while True: entity = table_client.get_entity(_STORAGE_CONTAINERS['table_images'], entity['PartitionKey'], entity['RowKey']) # merge VmList into entity evms = [] for i in range(0, _MAX_VMLIST_PROPERTIES): prop = 'VmList{}'.format(i) if prop in entity: evms.extend(entity[prop].split(',')) if _NODEID in evms: evms.remove(_NODEID) for i in range(0, _MAX_VMLIST_PROPERTIES): prop = 'VmList{}'.format(i) start = i * _MAX_VMLIST_IDS_PER_PROPERTY end = start + _MAX_VMLIST_IDS_PER_PROPERTY if end > len(evms): end = len(evms) if start < end: entity[prop] = ','.join(evms[start:end]) else: entity[prop] = None etag = entity['etag'] entity.pop('etag') try: table_client.update_entity(_STORAGE_CONTAINERS['table_images'], entity=entity, if_match=etag) break except azure.common.AzureHttpError as ex: if ex.status_code != 412: raise
def __init__(self, cred, subs_id, my_storage_rg, vmss_rg_name, vmss_name, storage, pan_handle, logger=None): self.credentials = cred self.subscription_id = subs_id self.logger = logger self.hub_name = vmss_rg_name self.storage_name = storage self.panorama_handler = pan_handle self.vmss_table_name = re.sub(self.ALPHANUM, '', vmss_name + 'vmsstable') self.vmss_rg_name = vmss_rg_name try: self.resource_client = ResourceManagementClient(cred, subs_id) self.compute_client = ComputeManagementClient(cred, subs_id) self.network_client = NetworkManagementClient(cred, subs_id) self.store_client = StorageManagementClient(cred, subs_id) store_keys = self.store_client.storage_accounts.list_keys( my_storage_rg, storage).keys[0].value self.table_service = TableService(account_name=storage, account_key=store_keys) except Exception as e: self.logger.error("Getting Azure Infra handlers failed %s" % str(e)) raise e rg_list = self.resource_client.resource_groups.list() self.managed_spokes = [] self.managed_spokes.append(vmss_rg_name) self.new_spokes = []
def test_sas_delete(self): # SAS URL is calculated from storage key, so this test runs live only if TestMode.need_recording_file(self.test_mode): return # Arrange entity = self._insert_random_entity() token = self.ts.generate_table_shared_access_signature( self.table_name, TablePermissions.DELETE, datetime.utcnow() + timedelta(hours=1), ) # Act service = TableService( account_name=self.settings.STORAGE_ACCOUNT_NAME, sas_token=token, ) self._set_test_proxy(service, self.settings) service.delete_entity(self.table_name, entity.PartitionKey, entity.RowKey) # Assert with self.assertRaises(AzureMissingResourceHttpError): self.ts.get_entity(self.table_name, entity.PartitionKey, entity.RowKey)
def test_sas_add_outside_range(self): # SAS URL is calculated from storage key, so this test runs live only if TestMode.need_recording_file(self.test_mode): return # Arrange token = self.ts.generate_table_shared_access_signature( self.table_name, TablePermissions.ADD, datetime.utcnow() + timedelta(hours=1), start_pk='test', start_rk='test1', end_pk='test', end_rk='test1', ) # Act service = TableService( account_name=self.settings.STORAGE_ACCOUNT_NAME, sas_token=token, ) self._set_test_proxy(service, self.settings) with self.assertRaises(AzureHttpError): entity = self._create_random_entity_dict() service.insert_entity(self.table_name, entity)
def test_account_sas(self): # SAS URL is calculated from storage key, so this test runs live only if TestMode.need_recording_file(self.test_mode): return # Arrange table_name = self._create_table() entity = { 'PartitionKey': 'test', 'RowKey': 'test1', 'text': 'hello', } self.ts.insert_entity(table_name, entity) entity['RowKey'] = 'test2' self.ts.insert_entity(table_name, entity) token = self.ts.generate_account_shared_access_signature( ResourceTypes.OBJECT, AccountPermissions.READ, datetime.utcnow() + timedelta(hours=1), datetime.utcnow() - timedelta(minutes=1), ) # Act service = TableService( account_name=self.settings.STORAGE_ACCOUNT_NAME, sas_token=token, ) self._set_test_proxy(service, self.settings) entities = list(service.query_entities(table_name)) # Assert self.assertEqual(len(entities), 2) self.assertEqual(entities[0].text, 'hello') self.assertEqual(entities[1].text, 'hello')
class azure_table: def __init__(self, table_name='HemoniDataTable'): connection_string = "**" self.table_client = TableService(connection_string=connection_string) self.table_name = table_name if self.table_client.exists(table_name): pass else: self.table_client.create_table(table_name=table_name) def delete_table(self): self.table_client.delete_table(table_name=self.table_name) def insert_entity(self, entity): """ When inserting an entity into a table, you must specify values for the PartitionKey and RowKey system properties. Together, these properties form the primary key and must be unique within the table. Both the PartitionKey and RowKey values must be string values; each key value may be up to 64 KB in size. If you are using an integer value for the key value, you should convert the integer to a fixed-width string, because they are canonically sorted. For example, you should convert the value 1 to 0000001 to ensure proper sorting. :param entity:The entity to insert. Could be a dict or an entity object. Must contain a PartitionKey and a RowKey. :return: null """ self.table_client.insert_or_replace_entity(table_name=self.table_name, entity=entity) def get_entity(self, partition, row): """ Get an entity from the specified table. Throws if the entity does not exist. :param partition: The PartitionKey of the entity. :param row: The RowKey of the entity. :return: """ return self.table_client.get_entity(self.table_name, partition_key=partition, row_key=row)
def _unmerge_resources(table_client: azuretable.TableService) -> None: """Remove node from the image table :param azuretable.TableService table_client: table client """ logger.debug( 'removing node {} from the image table for container mode {}'.format( _NODEID, _CONTAINER_MODE.name.lower())) try: entities = table_client.query_entities( _STORAGE_CONTAINERS['table_images'], filter='PartitionKey eq \'{}\''.format(_PARTITION_KEY)) except azure.common.AzureMissingResourceHttpError: entities = [] mode_prefix = _CONTAINER_MODE.name.lower() + ':' for entity in entities: if entity['Resource'].startswith(mode_prefix): _unmerge_resource(table_client, entity) logger.info( 'node {} removed from the image table for container mode {}'.format( _NODEID, _CONTAINER_MODE.name.lower()))
def distribute_global_resources(loop: asyncio.BaseEventLoop, blob_client: azureblob.BlockBlobService, table_client: azuretable.TableService) -> None: """Distribute global services/resources :param asyncio.BaseEventLoop loop: event loop :param azureblob.BlockBlobService blob_client: blob client :param azuretable.TableService table_client: table client """ # remove node from the image table because cascade relies on it to know # when its work is done _unmerge_resources(table_client) # get globalresources from table try: entities = table_client.query_entities( _STORAGE_CONTAINERS['table_globalresources'], filter='PartitionKey eq \'{}\''.format(_PARTITION_KEY)) except azure.common.AzureMissingResourceHttpError: entities = [] nentities = 0 for ent in entities: resource = ent['Resource'] grtype, image = get_container_image_name_from_resource(resource) if grtype == _CONTAINER_MODE.name.lower(): nentities += 1 _DIRECTDL_QUEUE.put(resource) key_fingerprint = ent.get('KeyFingerprint', None) if key_fingerprint is not None: _DIRECTDL_KEY_FINGERPRINT_DICT[image] = key_fingerprint else: logger.info('skipping resource {}:'.format(resource) + 'not matching container mode "{}"'.format( _CONTAINER_MODE.name.lower())) if nentities == 0: logger.info('no global resources specified') return logger.info('{} global resources matching container mode "{}"'.format( nentities, _CONTAINER_MODE.name.lower())) # run async func in loop loop.run_until_complete( download_monitor_async(loop, blob_client, table_client, nentities))
class EventRepository: events_by_date_table = "eventsByDate" event_duplicates_table = "eventDuplicates" def __init__(self, connection_string=None): if not connection_string: connection_string = "AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;DefaultEndpointsProtocol=http;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;QueueEndpoint=http://127.0.0.1:10001/devstoreaccount1;TableEndpoint=http://127.0.0.1:10002/devstoreaccount1;" self.table_client = TableService(connection_string=connection_string, is_emulated=True) def list_events_by_date(self, dt: datetime.date) -> List[dict]: pk = self._date_to_pk(dt) for event in self.table_client.query_entities( self.events_by_date_table, filter="PartitionKey eq '%s'" % (pk, )): if 'place' in event: event['place'] = json.loads(event['place']) if 'dates' in event: event['dates'] = json.loads(event['dates']) if 'raw_dates' in event: event['raw_dates'] = event['raw_dates'].split('\n') if 'tags' in event: event['tags'] = event['tags'].split(',') if 'type' in event: event['type'] = event['type'].split(',') if 'cost' in event: event['cost'] = event['cost'].split(',') yield event def remove_rows(self, dt, row_keys): pk = self._date_to_pk(dt) for key in row_keys: self.table_client.delete_entity(self.events_by_date_table, pk, key) def save_events_by_date(self, events: List[dict], dt: datetime.date, table_name=events_by_date_table): partition_keys = set() for event in events: if 'PartitionKey' not in event: if dt: event['PartitionKey'] = self._date_to_pk(dt) else: event['PartitionKey'] = str(datetime.date.today().year) if 'RowKey' not in event: full_text = event['title'] + "\n" + event[ 'short_description'] + "\n" + event['description'] event['RowKey'] = str(hash(full_text)) event['place'] = json.dumps(event['place'], ensure_ascii=False) event['dates'] = json.dumps(event['dates']) event['tags'] = ",".join(event['tags']) if 'type' in event: event['type'] = ",".join(event['type']) if "raw_dates" in event: event['raw_dates'] = "\n".join(event['raw_dates']) if 'cost' in event and event['cost']: event['cost'] = ",".join(str(c) for c in event['cost']) else: event['cost'] = None self.table_client.insert_or_replace_entity(table_name, event) partition_keys.add(event['PartitionKey']) for pk in partition_keys: self.table_client.insert_or_replace_entity(table_name, { "PartitionKey": "PARTITIONS", "RowKey": pk }) def save_events_json(self, events: List[dict]): grouped_events = group_by_dates(events) for dt, events in grouped_events.items(): self.save_events_by_date(events, dt) def save_verified_events(self, events: List[Event]): pk = datetime.datetime.now().timestamp() % 255 for event in events: event_description = event.to_str() event_hash = hash(event_description) self.table_client.insert_or_replace_entity( "verifiedEvents", { "PartitionKey": str(pk), "RowKey": str(event_hash), "Text": event_description, "Labels": ",".join(event.event_tags) }) @staticmethod def _date_to_pk(dt: datetime.date): return "%d_%d_%d" % (dt.year, dt.month, dt.day)
class ShoppingCartServiceCloud: """Shopping Cart Methods called from the API to interact with the DB.""" def __init__(self, shards=1): self.shards = shards self.table_name = "ShoppingCartTable" try: self.db = TableService( endpoint_suffix="table.cosmos.azure.com", connection_string=os.getenv("AZURE_COSMOS_CONNECTION_STRING"), ) except ValueError: raise Exception( "Please initialize $AZURE_COSMOS_CONNECTION_STRING") try: self.db.create_table(self.table_name, fail_on_exist=True) except AzureConflictHttpError: # Accept error only if already exists pass def get_product_items(self, customer_id): row_key = utils.hash_key(customer_id) partition_key = 'ShoppingCart' + str(row_key % self.shards).zfill(3) # Get Entity try: items = self.db.get_entity(self.table_name, partition_key, str(row_key)) product_items = json.loads(items.ProductItems) except AzureMissingResourceHttpError: product_items = [] return product_items def update_product_items(self, customer_id, product_items): row_key = utils.hash_key(customer_id) partition_key = 'ShoppingCart' + str(row_key % self.shards).zfill(3) product_items = [ item for item in product_items if item["unitCount"] > 0 ] # Insert or Update Items items = Entity() items.PartitionKey = partition_key items.RowKey = str(row_key) items.CustomerId = customer_id items.ProductItems = json.dumps(product_items) self.db.insert_or_replace_entity(self.table_name, items) def delete_shopping_cart(self, customer_id): row_key = utils.hash_key(customer_id) partition_key = 'ShoppingCart' + str(row_key % self.shards).zfill(3) # Get Items to Checkout before Delete try: items = self.db.get_entity(self.table_name, partition_key, str(row_key)) checkout_items = json.loads(items.ProductItems) except AzureMissingResourceHttpError: checkout_items = [] self.db.delete_entity(self.table_name, partition_key, str(row_key)) return checkout_items
class AzureJobStore(AbstractJobStore): """ A job store that uses Azure's blob store for file storage and Table Service to store job info with strong consistency. """ # Dots in container names should be avoided because container names are used in HTTPS bucket # URLs where the may interfere with the certificate common name. We use a double underscore # as a separator instead. # containerNameRe = re.compile(r'^[a-z0-9][a-z0-9-]+[a-z0-9]$') # See https://msdn.microsoft.com/en-us/library/azure/dd135715.aspx # minContainerNameLen = 3 maxContainerNameLen = 63 maxNameLen = 10 nameSeparator = 'xx' # Table names must be alphanumeric # Length of a jobID - used to test if a stats file has been read already or not jobIDLength = len(str(uuid.uuid4())) def __init__(self, locator, jobChunkSize=maxAzureTablePropertySize): super(AzureJobStore, self).__init__() accountName, namePrefix = locator.split(':', 1) if '--' in namePrefix: raise ValueError("Invalid name prefix '%s'. Name prefixes may not contain %s." % (namePrefix, self.nameSeparator)) if not self.containerNameRe.match(namePrefix): raise ValueError("Invalid name prefix '%s'. Name prefixes must contain only digits, " "hyphens or lower-case letters and must not start or end in a " "hyphen." % namePrefix) # Reserve 13 for separator and suffix if len(namePrefix) > self.maxContainerNameLen - self.maxNameLen - len(self.nameSeparator): raise ValueError(("Invalid name prefix '%s'. Name prefixes may not be longer than 50 " "characters." % namePrefix)) if '--' in namePrefix: raise ValueError("Invalid name prefix '%s'. Name prefixes may not contain " "%s." % (namePrefix, self.nameSeparator)) self.locator = locator self.jobChunkSize = jobChunkSize self.accountKey = _fetchAzureAccountKey(accountName) self.accountName = accountName # Table names have strict requirements in Azure self.namePrefix = self._sanitizeTableName(namePrefix) # These are the main API entry points. self.tableService = TableService(account_key=self.accountKey, account_name=accountName) self.blobService = BlockBlobService(account_key=self.accountKey, account_name=accountName) # Serialized jobs table self.jobItems = None # Job<->file mapping table self.jobFileIDs = None # Container for all shared and unshared files self.files = None # Stats and logging strings self.statsFiles = None # File IDs that contain stats and logging strings self.statsFileIDs = None @property def keyPath(self): return self.config.cseKey def initialize(self, config): if self._jobStoreExists(): raise JobStoreExistsException(self.locator) logger.debug("Creating job store at '%s'" % self.locator) self._bind(create=True) super(AzureJobStore, self).initialize(config) def resume(self): if not self._jobStoreExists(): raise NoSuchJobStoreException(self.locator) logger.debug("Using existing job store at '%s'" % self.locator) self._bind(create=False) super(AzureJobStore, self).resume() def destroy(self): self._bind() for name in 'jobItems', 'jobFileIDs', 'files', 'statsFiles', 'statsFileIDs': resource = getattr(self, name) if resource is not None: if isinstance(resource, AzureTable): resource.delete_table() elif isinstance(resource, AzureBlobContainer): resource.delete_container() else: assert False setattr(self, name, None) def _jobStoreExists(self): """ Checks if job store exists by querying the existence of the statsFileIDs table. Note that this is the last component that is deleted in :meth:`.destroy`. """ for attempt in retry_azure(): with attempt: try: exists = self.tableService.exists(table_name=self._qualify('statsFileIDs')) except AzureMissingResourceHttpError as e: if e.status_code == 404: return False else: raise else: return exists def _bind(self, create=False): table = self._bindTable container = self._bindContainer for name, binder in (('jobItems', table), ('jobFileIDs', table), ('files', container), ('statsFiles', container), ('statsFileIDs', table)): if getattr(self, name) is None: setattr(self, name, binder(self._qualify(name), create=create)) def _qualify(self, name): return self.namePrefix + self.nameSeparator + name.lower() def jobs(self): # How many jobs have we done? total_processed = 0 for jobEntity in self.jobItems.query_entities(): # Process the items in the page yield AzureJob.fromEntity(jobEntity) total_processed += 1 if total_processed % 1000 == 0: # Produce some feedback for the user, because this can take # a long time on, for example, Azure logger.debug("Processed %d total jobs" % total_processed) logger.debug("Processed %d total jobs" % total_processed) def create(self, jobNode): jobStoreID = self._newJobID() job = AzureJob.fromJobNode(jobNode, jobStoreID, self._defaultTryCount()) entity = job.toEntity(chunkSize=self.jobChunkSize) self.jobItems.insert_entity(entity=entity) return job def exists(self, jobStoreID): if self.jobItems.get_entity(row_key=str(jobStoreID)) is None: return False return True def load(self, jobStoreID): jobEntity = self.jobItems.get_entity(row_key=str(jobStoreID)) if jobEntity is None: raise NoSuchJobException(jobStoreID) return AzureJob.fromEntity(jobEntity) def update(self, job): self.jobItems.update_entity(entity=job.toEntity(chunkSize=self.jobChunkSize)) def delete(self, jobStoreID): try: self.jobItems.delete_entity(row_key=str(jobStoreID)) except AzureMissingResourceHttpError: # Job deletion is idempotent, and this job has been deleted already return filterString = "PartitionKey eq '%s'" % jobStoreID for fileEntity in self.jobFileIDs.query_entities(filter=filterString): jobStoreFileID = fileEntity.RowKey self.deleteFile(jobStoreFileID) def getEnv(self): return dict(AZURE_ACCOUNT_KEY=self.accountKey) class BlobInfo(namedtuple('BlobInfo', ('account', 'container', 'name'))): @property @memoize def service(self): return BlockBlobService(account_name=self.account, account_key=_fetchAzureAccountKey(self.account)) @classmethod def getSize(cls, url): blob = cls._parseWasbUrl(url) blob = blob.service.get_blob_properties(blob.container, blob.name) return blob.properties.content_length @classmethod def _readFromUrl(cls, url, writable): blob = cls._parseWasbUrl(url) for attempt in retry_azure(): with attempt: blob.service.get_blob_to_stream(container_name=blob.container, blob_name=blob.name, stream=writable) @classmethod def _writeToUrl(cls, readable, url): blob = cls._parseWasbUrl(url) blob.service.create_blob_from_stream(container_name=blob.container, blob_name=blob.name, max_connections=1, stream=readable) @classmethod def _parseWasbUrl(cls, url): """ :param urlparse.ParseResult url: x :rtype: AzureJobStore.BlobInfo """ assert url.scheme in ('wasb', 'wasbs') try: container, account = url.netloc.split('@') except ValueError: raise InvalidImportExportUrlException(url) suffix = '.blob.core.windows.net' if account.endswith(suffix): account = account[:-len(suffix)] else: raise InvalidImportExportUrlException(url) assert url.path[0] == '/' return cls.BlobInfo(account=account, container=container, name=url.path[1:]) @classmethod def _supportsUrl(cls, url, export=False): return url.scheme.lower() in ('wasb', 'wasbs') def writeFile(self, localFilePath, jobStoreID=None): jobStoreFileID = self._newFileID() self.updateFile(jobStoreFileID, localFilePath) self._associateFileWithJob(jobStoreFileID, jobStoreID) return jobStoreFileID def updateFile(self, jobStoreFileID, localFilePath): with open(localFilePath, 'rb') as read_fd: with self._uploadStream(jobStoreFileID, self.files) as write_fd: while True: buf = read_fd.read(self._maxAzureBlockBytes) write_fd.write(buf) if len(buf) == 0: break def readFile(self, jobStoreFileID, localFilePath, symlink=False): try: with self._downloadStream(jobStoreFileID, self.files) as read_fd: with open(localFilePath, 'wb') as write_fd: while True: buf = read_fd.read(self._maxAzureBlockBytes) write_fd.write(buf) if not buf: break except AzureMissingResourceHttpError: raise NoSuchFileException(jobStoreFileID) def deleteFile(self, jobStoreFileID): try: self.files.delete_blob(blob_name=str(jobStoreFileID)) self._dissociateFileFromJob(jobStoreFileID) except AzureMissingResourceHttpError: pass def fileExists(self, jobStoreFileID): # As Azure doesn't have a blob_exists method (at least in the # python API) we just try to download the metadata, and hope # the metadata is small so the call will be fast. try: self.files.get_blob_metadata(blob_name=str(jobStoreFileID)) return True except AzureMissingResourceHttpError: return False @contextmanager def writeFileStream(self, jobStoreID=None): # TODO: this (and all stream methods) should probably use the # Append Blob type, but that is not currently supported by the # Azure Python API. jobStoreFileID = self._newFileID() with self._uploadStream(jobStoreFileID, self.files) as fd: yield fd, jobStoreFileID self._associateFileWithJob(jobStoreFileID, jobStoreID) @contextmanager def updateFileStream(self, jobStoreFileID): with self._uploadStream(jobStoreFileID, self.files, checkForModification=True) as fd: yield fd def getEmptyFileStoreID(self, jobStoreID=None): jobStoreFileID = self._newFileID() with self._uploadStream(jobStoreFileID, self.files) as _: pass self._associateFileWithJob(jobStoreFileID, jobStoreID) return jobStoreFileID @contextmanager def readFileStream(self, jobStoreFileID): if not self.fileExists(jobStoreFileID): raise NoSuchFileException(jobStoreFileID) with self._downloadStream(jobStoreFileID, self.files) as fd: yield fd @contextmanager def writeSharedFileStream(self, sharedFileName, isProtected=None): assert self._validateSharedFileName(sharedFileName) sharedFileID = self._newFileID(sharedFileName) with self._uploadStream(sharedFileID, self.files, encrypted=isProtected) as fd: yield fd @contextmanager def readSharedFileStream(self, sharedFileName): assert self._validateSharedFileName(sharedFileName) sharedFileID = self._newFileID(sharedFileName) if not self.fileExists(sharedFileID): raise NoSuchFileException(sharedFileID) with self._downloadStream(sharedFileID, self.files) as fd: yield fd def writeStatsAndLogging(self, statsAndLoggingString): # TODO: would be a great use case for the append blobs, once implemented in the Azure SDK jobStoreFileID = self._newFileID() encrypted = self.keyPath is not None if encrypted: statsAndLoggingString = encryption.encrypt(statsAndLoggingString, self.keyPath) self.statsFiles.create_blob_from_text(blob_name=str(jobStoreFileID), text=statsAndLoggingString, metadata=dict(encrypted=str(encrypted))) self.statsFileIDs.insert_entity(entity={'RowKey': jobStoreFileID}) def readStatsAndLogging(self, callback, readAll=False): suffix = '_old' numStatsFiles = 0 for attempt in retry_azure(): with attempt: for entity in self.statsFileIDs.query_entities(): jobStoreFileID = entity.RowKey hasBeenRead = len(jobStoreFileID) > self.jobIDLength if not hasBeenRead: with self._downloadStream(jobStoreFileID, self.statsFiles) as fd: callback(fd) # Mark this entity as read by appending the suffix self.statsFileIDs.insert_entity(entity={'RowKey': jobStoreFileID + suffix}) self.statsFileIDs.delete_entity(row_key=str(jobStoreFileID)) numStatsFiles += 1 elif readAll: # Strip the suffix to get the original ID jobStoreFileID = jobStoreFileID[:-len(suffix)] with self._downloadStream(jobStoreFileID, self.statsFiles) as fd: callback(fd) numStatsFiles += 1 return numStatsFiles _azureTimeFormat = "%Y-%m-%dT%H:%M:%SZ" def getPublicUrl(self, jobStoreFileID): try: self.files.get_blob_properties(blob_name=str(jobStoreFileID)) except AzureMissingResourceHttpError: raise NoSuchFileException(jobStoreFileID) startTime = (datetime.utcnow() - timedelta(minutes=5)) endTime = datetime.utcnow() + self.publicUrlExpiration sas_token = self.files.generate_blob_shared_access_signature(blob_name=str(jobStoreFileID), permission=BlobPermissions.READ, start=startTime, expiry=endTime) return self.files.make_blob_url(blob_name=str(jobStoreFileID)) + '?' + sas_token def getSharedPublicUrl(self, sharedFileName): jobStoreFileID = self._newFileID(sharedFileName) return self.getPublicUrl(jobStoreFileID) def _newJobID(self): # raw UUIDs don't work for Azure property names because the '-' character is disallowed. return str(uuid.uuid4()).replace('-', '_') # A dummy job ID under which all shared files are stored. sharedFileJobID = uuid.UUID('891f7db6-e4d9-4221-a58e-ab6cc4395f94') def _newFileID(self, sharedFileName=None): if sharedFileName is None: ret = str(uuid.uuid4()) else: ret = str(uuid.uuid5(self.sharedFileJobID, sharedFileName)) return ret.replace('-', '_') def _associateFileWithJob(self, jobStoreFileID, jobStoreID=None): if jobStoreID is not None: self.jobFileIDs.insert_entity(entity={'PartitionKey': EntityProperty('Edm.String', jobStoreID), 'RowKey': EntityProperty('Edm.String', jobStoreFileID)}) def _dissociateFileFromJob(self, jobStoreFileID): entities = list(self.jobFileIDs.query_entities(filter="RowKey eq '%s'" % jobStoreFileID)) if entities: assert len(entities) == 1 jobStoreID = entities[0].PartitionKey self.jobFileIDs.delete_entity(partition_key=str(jobStoreID), row_key=str(jobStoreFileID)) def _bindTable(self, tableName, create=False): for attempt in retry_azure(): with attempt: try: exists = self.tableService.exists(table_name=tableName) except AzureMissingResourceHttpError as e: if e.status_code != 404: raise else: if exists: return AzureTable(self.tableService, tableName) if create: self.tableService.create_table(tableName) return AzureTable(self.tableService, tableName) else: return None def _bindContainer(self, containerName, create=False): for attempt in retry_azure(): with attempt: try: self.blobService.get_container_properties(containerName) except AzureMissingResourceHttpError as e: if e.status_code == 404: if create: self.blobService.create_container(containerName) else: return None else: raise return AzureBlobContainer(self.blobService, containerName) def _sanitizeTableName(self, tableName): """ Azure table names must start with a letter and be alphanumeric. This will never cause a collision if uuids are used, but otherwise may not be safe. """ return 'a' + ''.join([x for x in tableName if x.isalnum()]) # Maximum bytes that can be in any block of an Azure block blob # https://github.com/Azure/azure-storage-python/blob/4c7666e05a9556c10154508335738ee44d7cb104/azure/storage/blob/blobservice.py#L106 _maxAzureBlockBytes = 4 * 1024 * 1024 @contextmanager def _uploadStream(self, jobStoreFileID, container, checkForModification=False, encrypted=None): """ :param encrypted: True to enforce encryption (will raise exception unless key is set), False to prevent encryption or None to encrypt if key is set. """ if checkForModification: try: expectedVersion = container.get_blob_properties(blob_name=str(jobStoreFileID)).properties.etag except AzureMissingResourceHttpError: expectedVersion = None if encrypted is None: encrypted = self.keyPath is not None elif encrypted: if self.keyPath is None: raise RuntimeError('Encryption requested but no key was provided') maxBlockSize = self._maxAzureBlockBytes if encrypted: # There is a small overhead for encrypted data. maxBlockSize -= encryption.overhead store = self class UploadPipe(WritablePipe): def readFrom(self, readable): blocks = [] try: while True: buf = readable.read(maxBlockSize) if len(buf) == 0: # We're safe to break here even if we never read anything, since # putting an empty block list creates an empty blob. break if encrypted: buf = encryption.encrypt(buf, store.keyPath) blockID = store._newFileID() container.put_block(blob_name=str(jobStoreFileID), block=buf, block_id=blockID) blocks.append(BlobBlock(blockID)) except: with panic(log=logger): # This is guaranteed to delete any uncommitted blocks. container.delete_blob(blob_name=str(jobStoreFileID)) if checkForModification and expectedVersion is not None: # Acquire a (60-second) write lock, leaseID = container.acquire_blob_lease(blob_name=str(jobStoreFileID), lease_duration=60) # check for modification, blob = container.get_blob_properties(blob_name=str(jobStoreFileID)) if blob.properties.etag != expectedVersion: container.release_blob_lease(blob_name=str(jobStoreFileID), lease_id=leaseID) raise ConcurrentFileModificationException(jobStoreFileID) # commit the file, container.put_block_list(blob_name=str(jobStoreFileID), block_list=blocks, lease_id=leaseID, metadata=dict(encrypted=str(encrypted))) # then release the lock. container.release_blob_lease(blob_name=str(jobStoreFileID), lease_id=leaseID) else: # No need to check for modification, just blindly write over whatever # was there. container.put_block_list(blob_name=str(jobStoreFileID), block_list=blocks, metadata=dict(encrypted=str(encrypted))) with UploadPipe() as writable: yield writable @contextmanager def _downloadStream(self, jobStoreFileID, container): # The reason this is not in the writer is so we catch non-existant blobs early blob = container.get_blob_properties(blob_name=str(jobStoreFileID)) encrypted = strict_bool(blob.metadata['encrypted']) if encrypted and self.keyPath is None: raise AssertionError('Content is encrypted but no key was provided.') outer_self = self class DownloadPipe(ReadablePipe): def writeTo(self, writable): chunkStart = 0 fileSize = blob.properties.content_length while chunkStart < fileSize: chunkEnd = chunkStart + outer_self._maxAzureBlockBytes - 1 buf = container.get_blob_to_bytes(blob_name=str(jobStoreFileID), start_range=chunkStart, end_range=chunkEnd).content if encrypted: buf = encryption.decrypt(buf, outer_self.keyPath) writable.write(buf) chunkStart = chunkEnd + 1 with DownloadPipe() as readable: yield readable
entity.CompletedTasks = len(complete_tasks) entity._State = get_search_state(all_tasks_complete, any_failures) if not incomplete_tasks: entity.EndTime = datetime.datetime.utcnow() table_service.update_entity('SearchEntity', entity) return else: table_service.update_entity('SearchEntity', entity) time.sleep(5) if __name__ == '__main__': storage_account = sys.argv[1] storage_key = sys.argv[2] batch_account = sys.argv[3] batch_key = sys.argv[4] batch_url = sys.argv[5] job_id = sys.argv[6] entity_pk = sys.argv[7] entity_rk = sys.argv[8] table_service = TableService(account_name=storage_account, account_key=storage_key) blob_service = BlockBlobService(account_name=storage_account, account_key=storage_key) credentials = batchauth.SharedKeyCredentials(batch_account, batch_key) batch_client = batch.BatchServiceClient(credentials, base_url=batch_url) wait_for_tasks_to_complete(table_service, batch_client, entity_pk, entity_rk, job_id)
def coalesce_data(table_client: azuretable.TableService) -> tuple: """Coalesce perf data from table :param azure.cosmosdb.table.TableService table_client: table client :rtype: tuple :return: (timing, sizes, offer, sku) """ print('graphing data from {} with pk={}'.format(_TABLE_NAME, _PARTITION_KEY)) entities = table_client.query_entities( _TABLE_NAME, filter='PartitionKey eq \'{}\''.format(_PARTITION_KEY)) data = {} # process events for ent in entities: nodeid = ent['NodeId'] event = ent['Event'] if nodeid not in data: data[nodeid] = {} if event not in data[nodeid]: data[nodeid][event] = [] ev = { 'timestamp': datetime.datetime.fromtimestamp(float(ent['RowKey'])), } try: ev['message'] = _parse_message(event, ent['Message']) except KeyError: ev['message'] = None data[nodeid][event].append(ev) del entities sizes = {} offer = None sku = None for nodeid in data: if offer is None: offer = data[nodeid]['nodeprep:start'][0]['message']['offer'] sku = data[nodeid]['nodeprep:start'][0]['message']['sku'] # calculate dt timings timing = { 'nodeprep': _compute_delta_t(data, nodeid, 'nodeprep:start', 0, 'nodeprep:end', 0), 'global_resources_loaded': _compute_delta_t(data, nodeid, 'cascade:start', 0, 'cascade:gr-done', 0), } try: timing['docker_install'] = _compute_delta_t( data, nodeid, 'nodeprep:start', 0, 'privateregistry:start', 0) except KeyError: # when no private registry setup exists, install time is # equivalent to nodeprep time timing['docker_install'] = timing['nodeprep'] try: timing['private_registry_setup'] = _compute_delta_t( data, nodeid, 'privateregistry:start', 0, 'privateregistry:end', 0) except KeyError: timing['private_registry_setup'] = 0 try: timing['docker_shipyard_container_pull'] = _compute_delta_t( data, nodeid, 'shipyard:pull-start', 0, 'shipyard:pull-end', 0) except KeyError: timing['docker_shipyard_container_pull'] = 0 data[nodeid]['start'] = data[nodeid]['nodeprep:start'][0][ 'timestamp'].timestamp() data[nodeid].pop('nodeprep:start') data[nodeid].pop('nodeprep:end') data[nodeid].pop('privateregistry:start', None) data[nodeid].pop('privateregistry:end', None) data[nodeid].pop('shipyard:pull-start', None) data[nodeid].pop('shipyard:pull-end', None) data[nodeid].pop('cascade:start') data[nodeid].pop('cascade:gr-done') for event in data[nodeid]: # print(event, data[nodeid][event]) if event == 'cascade:pull-start': _diff_events(data, nodeid, event, 'cascade:pull-end', timing, 'pull:', sizes) elif event == 'cascade:save-start': _diff_events(data, nodeid, event, 'cascade:save-end', timing, 'save:', sizes) elif event == 'cascade:torrent-start': _diff_events(data, nodeid, event, 'cascade:load-start', timing, 'torrent:') elif event == 'cascade:load-start': _diff_events(data, nodeid, event, 'cascade:load-end', timing, 'load:', sizes) data[nodeid].pop('cascade:pull-start', None) data[nodeid].pop('cascade:pull-end', None) data[nodeid].pop('cascade:save-start', None) data[nodeid].pop('cascade:save-end', None) data[nodeid].pop('cascade:torrent-start', None) data[nodeid].pop('cascade:load-start', None) data[nodeid].pop('cascade:load-end', None) data[nodeid]['timing'] = timing return data, sizes, offer, sku
from flask import Flask, render_template, request from azure.cosmosdb.table import TableService table_service = TableService( account_name='cloudshell703046314', account_key= 'fTJlnFKqcwkuu4BCIJiOnIqGEB3aNBkY/yaZ55tm7UWKtzuTv5/pdHgzL2HunOGu8IuMHEEV92nMY0wi2ZANGw==' ) #task = {'PartitionKey': 'first', 'RowKey': '002', # 'ID': '0002', 'address': 'India','stock':40} #table_service.update_entity('customer', task) app = Flask(__name__) @app.route("/") def index(): return "hi from home page" @app.route("/user/<name>") def profile(name): # task = {'PartitionKey': 'First', 'RowKey': '1000','description': 'product1', 'priority': 200} # table_service.update_entity('product', task) return render_template('hello.html', name=name) @app.route("/submit", methods=['GET', 'POST']) def submit(): if request.method == 'POST': stock = request.form['stock'] material = request.form['material']
def custom_endpoint(self): # Custom endpoints are necessary for certain regions. # The most common usage is to connect to the China cloud. client = TableService(account_name='<account_name>', account_key='<account_key>', endpoint_suffix='core.chinacloudapi.cn')
from azure.cosmosdb.table import TableService conn_string = "DefaultEndpointsProtocol=https;AccountName=hackgt19;AccountKey=VluI82NNObOQHE95tf8333Mdkg5lKMS5ejHT6vHeITXlIrjIcPNAzENYTFI4qRNK1OaltlIx4qHozbCdpallqQ==;TableEndpoint=https://hackgt19.table.cosmos.azure.com:443/;" # table = TableService(endpoint_suffix = "table.cosmos.azure.com", connection_string= the_connection_string) table = TableService(connection_string=conn_string) print(table) table.create_table("test") print(table.get_table("test"))
def _get_or_create_table_service(self): if self.table_Service is None: self.table_Service = TableService( account_name=self.storage_account_name, account_key=self._get_account_key()) return self.table_Service
def __init__(self, tableName): self.tableName = tableName self.tableService = TableService( account_name=os.environ['STORAGEACCOUNTNAME'], account_key=os.environ['STORAGEACCOUNTKEY'])
from azure.cosmosdb.table import TableService the_connection_string = "DefaultEndpointsProtocol=https;AccountName=njesptable;AccountKey=SbXCFNzDS0gZGFlNMAQ6kPoIWZp1hvFFLIGAqMwdRLdgC4N7NMj6NAb8vOwOWnFA9APrPBDLXJXaskYg1Kb2hA==;TableEndpoint=https://njesptable.table.cosmosdb.azure.com:443/;" TableService(endpoint_suffix="table.cosmosdb.azure.com", connection_string=the_connection_string)