def get_tags(): #TODO: Error checking # global d # d = {} clarifai_api = ClarifaiApi() blob_service = BlobService('calhacks', 'mm7EmY+T+MGahePBDSDU5LHpZR5tRXuh4MSco4jFrzHovOPEf06e18c89pxtPIo4NDVhhjSeaQY/FQmKNxjjyA==') blob_name = request.form['blob_id'] # blob_name = blob_name.decode('utf-8') blob_service.get_blob_to_path('imagestore', blob_name, 'out.png') print("checkpoint 1") i = open ('out.png', 'r') strd = "" for line in i: strd += line.strip() fname = 'img.png' with open (fname, 'wb') as f: f.write (base64.b64decode(strd)) f.close() f = open (fname, 'rb') result = clarifai_api.tag_images(f) st = result['results'][0]['result']['tag']['classes'][0:6] print(st) for i in []:#['food', 'nobody', 'still life', 'meal', 'dish', 'plate', 'delicious', 'isolated', 'cutout', 'unhealthy', 'one', 'background']: while i in st: st.remove(i) js = json.dumps(search_terms(st)) print(js) return js
def submit(): blob_service = BlobService(account_name=ACCOUNT_NAME, account_key=ACCOUNT_KEY) # Get a SAS signature (read for 24 hours) for the input container save to a string inputsig = sasUrl(account=ACCOUNT_NAME, key=ACCOUNT_KEY, container=INPUT_CONTAINER, permission='r') # Get a SAS signature (write for 24 hours) for the output container save to a string outputsig = sasUrl(account = ACCOUNT_NAME, key = ACCOUNT_KEY, container = OUTPUT_CONTAINER, permission = 'rwl') # List all the blobs and dump the content to a string blobs = blob_service.list_blobs(INPUT_CONTAINER) bloblist = [] for blob in blobs: bloblist.append(blob.name) os.environ[SLURMDEMO_INPUTSIG] = inputsig os.environ[SLURMDEMO_OUTPUTSIG] = outputsig os.environ[SLURMDEMO_BLOBLIST] = json.dumps(bloblist) os.environ[SLURMDEMO_INPUTCONTAINER] = INPUT_CONTAINER os.environ[SLURMDEMO_OUTPUTCONTAINER] = OUTPUT_CONTAINER os.environ[SLURMDEMO_INPUTACCOUNT] = ACCOUNT_NAME os.environ[SLURMDEMO_OUTPUTACCOUNT] = ACCOUNT_NAME # Call sbatch cli = "sbatch -N 2 -n 2 --array=0-{nb} slurmdemo.sh".format(nb=len(bloblist)) run(cli, showoutput=True)
def _createExternalStore(): from toil.jobStores.azureJobStore import _fetchAzureAccountKey blobService = BlobService(account_key=_fetchAzureAccountKey(AzureJobStoreTest.accountName), account_name=AzureJobStoreTest.accountName) containerName = 'import-export-test-%s' % uuid.uuid4() blobService.create_container(containerName) return containerName
def getblob(request): assert isinstance(request, HttpRequest) blob_service = BlobService(account_name='araldrift', account_key='YOURKEYGOESHERE') # http://<storage-account-name>.blob.core.windows.net/<container-name>/<blob-name> blob_service.get_blob_to_path('flow', 'NARYN.day', './NARYN.day') # return HttpResponse('ok ye of little faith') return HttpResponse(json.dumps('i am just a wee bit of json'))
def __create_blob_container(self, storage_acc_name): sms = self.__get_service_mgmt_object() # Retrieve the primary key of your storage account # Maybe the secondary key works too? storage_acc_key = None acounts = sms.list_storage_accounts() for account in acounts: if account.service_name == storage_acc_name: storageServiceObj = sms.get_storage_account_keys(account.service_name) storage_acc_key = storageServiceObj.storage_service_keys.primary # Create a container blob_service = BlobService(account_name=storage_acc_name, account_key=storage_acc_key) container_name = namesgenerator.get_random_name() container_name += "container" blob_service.create_container(container_name) # This is the url to the container we just created container_url_template = "http://{}.blob.core.windows.net/{}" container_url = container_url_template.format(storage_acc_name, container_name) #print "Created blob container with URL ",container_url return container_url
def enumerate_objects(container): blob_service = BlobService(AZURE_ACCOUNT_NAME, AZURE_ACCOUNT_KEY) blobs = blob_service.list_blobs(container) items = [] for blob in blobs: items.append(blob.name) return items
def gethydrograph(request): ''' JSON return of a particular hydorgaph by start / stop / station / time interval In response it will generate a 404 error if the value is not found or return a JSON response with the requested slice. ''' assert isinstance(request, HttpRequest) start = request.GET.get('start', None) end = request.GET.get('end', None) # Here we can thieve the time parse code from LOS and use this line: # rd, rh = extract_time(request) # but for now this is commented out of course station = request.GET.get('station',None) interval = request.GET.get('interval',None) blob_service = BlobService(account_name='araldrift', account_key='XXXXXXXXXXXXXXXXXXXXXXX') blob_service.get_blob_to_path('flow', 'NARYN.day', './tmp.csv') f = file('./tmp.csv') h = [] while True: line = f.readline() if line == "": break h.append(line) f.close() json_encoded_result = json.dumps(h) # Keep the LOS original for reference (see time formatting): # clean_req = {"rdidx": rdidx,"rdepth": rdepth,"rd": rd.strftime("%Y%m%d"), "rh": rh,"rparm": rparm,} # clean_req = {"start": start, "end": end, "station": station, "interocitor": interval,} # json_request = json.dumps(clean_req) # json_encoded_result = json_request try: ##################### # # This is the commented out LOS code (stripped out) # ##################### # Convert the depth to an int. # depthIndex = int(rdidx) # Get the depth slice from the dataset. # ret = getDepthSliceByIndex((rd,rh), depthIndex, rparm, rnorm) # Serialize out to JSON. # json_encoded_result = json.dumps(ret) # Cache the response to blob storage. (Would be great if this was async...) # cacheResult(request.GET, json_encoded_result) # Send the view on to the client. ##################### return HttpResponse(json_encoded_result, content_type="application/json" ) except Exception as a: return HttpResponseNotFound(content="No dice, either the inputs were out of range, the file couldn't be retrieved, or the winds weren't in your favor.")
def gethydrograph(request): ''' Returns streamflow data by start / stop / station In response it will generate a 404 error if the value is not found or return a JSON response with the requested slice or a .csv file by default ''' assert isinstance(request, HttpRequest) start = request.GET.get('start', None) end = request.GET.get('end', None) station = request.GET.get('station',None) interval = request.GET.get('interval',None) jsondat = request.GET.get('jsondat',None) plot = request.GET.get('plot',None) #start blob service stationfile = station + '.day.new' downloadablefile = station + '_' + start + '_' + end + '.csv' blob_service = BlobService(account_name='araldrift', account_key='otLzzkwQHQD3xFTQxwxy64PCL6eDINWGjSB7x6Ta2XVw3+3ffI5O2MhAEavf/r8qIW4G/dKrZAVg1R64nK7hDQ==') blob_service.get_blob_to_path('flow', stationfile, './tmp.csv') f = file('./tmp.csv') #read in pandas data and subsetting d_cols = ["DATE","FLOW"] d = pd.read_csv('./tmp.csv', sep=" ", names=d_cols) df = d[(d.DATE >= start) & (d.DATE <= end)] h = df.to_json(orient='records') json_encoded_result = json.dumps(h) df.plot(x='DATE', y='FLOW', figsize=(14,6)) plt.savefig('./plot_test.png') #h = [] #while True: # line = f.readline() # if line == "": break # h.append(line) #f.close() try: if jsondat in ['TRUE']: response = HttpResponse(json_encoded_result, content_type="application/json" ) return response elif plot in ['TRUE']: image_data = open("./plot_test.png", "rb").read() response = HttpResponse(image_data, content_type='image/png') return response else: response = HttpResponse(content_type='text/csv') response['Content-Disposition'] = 'attachment; filename=' +downloadablefile df.to_csv(response, index=False, lineterminator='\r\n') return response except Exception as a: return HttpResponseNotFound(content="No dice, either the inputs were out of range, the file couldn't be retrieved, or the winds weren't in your favor.")
def upload_from_text(container, content): filename = str(uuid.uuid4()) blob_service = BlobService(account_name=config.AZURE_STORAGE_NAME, account_key=config.AZURE_STORAGE_KEY) try: blob_service.put_block_blob_from_text(container, filename, content) return generate_blob_url(container, filename) except: return ""
def _createExternalStore(self): from toil.jobStores.azureJobStore import _fetchAzureAccountKey from azure.storage.blob import BlobService blobService = BlobService(account_key=_fetchAzureAccountKey(self.accountName), account_name=self.accountName) containerName = 'import-export-test-%s' % uuid.uuid4() blobService.create_container(containerName) return containerName
class AzureStorage(Storage): account_name = settings.AZURE_ACCOUNT_NAME account_key = settings.AZURE_ACCOUNT_KEY azure_container = settings.AZURE_CONTAINER def __init__(self, *args, **kwargs): super(AzureStorage, self).__init__(*args, **kwargs) self._connection = None @property def connection(self): if self._connection is None: # Create connection self._connection = BlobService(self.account_name, self.account_key) # Create container if needed containers = [c for c in self._connection.list_containers(prefix=self.azure_container) if c.name == self.azure_container ] if len(containers) == 0: self._connection.create_container(self.azure_container, {'origin': 'created by Django web app'}, fail_on_exist=True) return self._connection def _open(self, name, mode="rb"): stream = SimpleUploadedFile(name, None) self.connection.get_blob_to_file(self.azure_container, name, stream) stream.seek(0) return stream def exists(self, name): try: self.connection.get_blob_properties(self.azure_container, name) except AzureMissingResourceHttpError: return False else: return True def delete(self, name): self.connection.delete_blob(self.azure_container, name) def size(self, name): properties = self.connection.get_blob_properties(self.azure_container, name) return properties["content-length"] def _save(self, name, content): self.connection.put_block_blob_from_file(self.azure_container, name, content) return name def url(self, name): ap = AccessPolicy(expiry=(timezone.datetime.utcnow() + timezone.timedelta(seconds=600)).strftime('%Y-%m-%dT%H:%M:%SZ'), \ start=(timezone.datetime.utcnow() + timezone.timedelta(seconds=-600)).strftime('%Y-%m-%dT%H:%M:%SZ'), \ permission='r') sap = SharedAccessPolicy(ap) sas = SharedAccessSignature(self.account_name, self.account_key) url = sas.generate_signed_query_string(path=self.azure_container + '/' + name, resource_type='b', shared_access_policy=sap) return self.connection.make_blob_url(self.azure_container, name) + "?" + url
def uploadFile(sourceFile, destUrl, destKey): storageparts = split_storage_url(destUrl) blobservice = BlobService(storageparts[0], destKey) try: fh=open(sourceFile, "r") except: print "No such file", sourceFile return log('uploading ' + str(sourceFile), True) blobservice.put_page_blob_from_file(storageparts[2], sourceFile, fh, getsize(sourceFile))
def generateImageUrl(request): account_name = "faceemoji" account_key = "kaoJiy0T7r6sXyo4wFYKCLgpAXbILKvkloeF+kFpCEUxC+bL9BxGA3WtofVxHcLPn3lMjw/UO/0sS1GCN3/AQw==" blob_service = BlobService(account_name, account_key) content = base64.b64decode(request.data) st = datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d-%H-%M-%S') blob_name = hashlib.sha224(st).hexdigest() + 'image.png' blob_service.put_block_blob_from_bytes('image', blob_name, content) img_url = blob_service.make_blob_url('image', blob_name) return img_url
def getblob(request): assert isinstance(request, HttpRequest) blob_service = BlobService(account_name='araldrift', account_key='otLzzkwQHQD3xFTQxwxy64PCL6eDINWGjSB7x6Ta2XVw3+3ffI5O2MhAEavf/r8qIW4G/dKrZAVg1R64nK7hDQ==') # http://<storage-account-name>.blob.core.windows.net/<container-name>/<blob-name> name = 'test.txt' fpath = '{0}\{1}'.format(tempfile.gettempdir(),name) blob_service.get_blob_to_path('flow', 'NARYN.day', fpath) response = HttpResponse(content_type='text/plain') response['Content-Disposition'] = 'attachment; filename=test.txt' blob.Properties.ContentDisposition = "attachment; filename=" + downloadName; return response
def upload_file_to_azure(in_file, file_name, container_name=settings.AZURE_CONTAINER): try: blob_service = BlobService(AZURE_ACCOUNT_NAME, AZURE_ACCOUNT_KEY) blob_service.put_block_blob_from_path( container_name=container_name, blob_name=file_name, file_path=in_file, x_ms_blob_content_type='application/octet-stream' ) except Exception as ex: print("Failed to upload blob: {0}".format(ex))
def _getUrlForTestFile(cls, size=None): from toil.jobStores.azureJobStore import _fetchAzureAccountKey fileName = 'testfile_%s' % uuid.uuid4() containerName = cls._externalStore() url = 'wasb://%s@%s.blob.core.windows.net/%s' % (containerName, cls.accountName, fileName) if size is None: return url blobService = BlobService(account_key=_fetchAzureAccountKey(cls.accountName), account_name=cls.accountName) content = os.urandom(size) blobService.put_block_blob_from_text(containerName, fileName, content) return url, hashlib.md5(content).hexdigest()
class azure_storage_writer (object): """storage operation wrapper, desiged for writing logs to storage""" def __init__(self, account_name, account_key, container, prefix): self._blob = BlobService(account_name=account_name, account_key=account_key) self._cur_path = None self._buf = io.StringIO() self._prefix = prefix self._container = container self._blob.create_container(container) self._logger = create_timed_rotating_log() def write_log(self, entity): path = self._get_path(entity[0]) if (self._cur_path == None): self._cur_path = path elif(self._cur_path != path): self._dump_buf_to_storage() self._buf.close() self._buf = io.StringIO() self._cur_path = path self._buf.write(entity[1]) self._buf.write("\n") def close(self): if (self._cur_path != None): self._dump_buf_to_storage() self._buf.close() def _dump_buf_to_storage(self): self._logger.info("Begin dump to azure blob") loop = 0; while True: try: self._blob.put_block_blob_from_text(self._container,self._cur_path, self._buf.getvalue()) break except AzureHttpError as e: self._logger.warn("Hit an AzureHttpError " + str(e)) self._logger.warn("Retry times: {0}".format(loop)) loop = loop + 1 if loop >= 3: raise e except Exception as e: self._logger.warn("Hit an Exception " + str(e)) raise e self._logger.info("Dump to azure blob succeeded.") def _get_path(self, timestamp): #timestamp = int(timestamp) d = datetime.fromtimestamp(int(timestamp)) part = str.format("logs-part-{}.txt", d.minute // 5) path_str = d.strftime('%Y-%m-%d/%H') return str.format("{}/{}/{}", self._prefix, path_str, part)
class _BlobStorageFileHandler(object): def __init__(self, account_name=None, account_key=None, protocol='https', container='logs', zip_compression=False, max_connections=1, max_retries=5, retry_wait=1.0): self.service = BlobService(account_name, account_key, protocol) self.container_created = False hostname = gethostname() self.meta = {'hostname': hostname.replace('_', '-'), 'process': os.getpid()} self.container = (container % self.meta).lower() self.meta['hostname'] = hostname self.zip_compression = zip_compression self.max_connections = max_connections self.max_retries = max_retries self.retry_wait = retry_wait def put_file_into_storage(self, dirName, fileName): """ Ship the outdated log file to the specified blob container. """ if not self.container_created: self.service.create_container(self.container) self.container_created = True fd, tmpfile_path = None, '' try: file_path = os.path.join(dirName, fileName) if self.zip_compression: suffix, content_type = '.zip', 'application/zip' fd, tmpfile_path = mkstemp(suffix=suffix) with os.fdopen(fd, 'wb') as f: with ZipFile(f, 'w', ZIP_DEFLATED) as z: z.write(file_path, arcname=fileName) file_path = tmpfile_path else: suffix, content_type = '', 'text/plain' self.service.put_block_blob_from_path(self.container, fileName + suffix, file_path, x_ms_blob_content_type=content_type, max_connections=self.max_connections, max_retries=self.max_retries, retry_wait=self.retry_wait) finally: if self.zip_compression and fd: os.remove(tmpfile_path)
def main(): #continue here if verbose: print('Copying') print('Storage account:',storage_account_name) print('Storage Key:',storage_account_key) print('Container Name:',storage_container_name) print('Input file:',inputfile) print('Output Blob:',outputblob) blob_service = BlobService(account_name=storage_account_name, account_key = storage_account_key) #blob_service.create_container(storage_container_name,x_ms_blob_public_access='containter') blob_service.put_block_blob_from_path(storage_container_name, outputblob,inputfile,x_ms_blob_content_type="image/jpeg")
def pushToAzureCDN (data): import pickle from azure.storage.blob import BlobService blob_service = BlobService(account_name=azureAccount, account_key=azureAccountKey) blob_service.put_block_blob_from_bytes( azureContainer, azureFile, pickle.dumps(data), content_encoding='application/octet-stream' )
def _prepareTestFile(self, containerName, size=None): from toil.jobStores.azureJobStore import _fetchAzureAccountKey from azure.storage.blob import BlobService fileName = 'testfile_%s' % uuid.uuid4() url = 'wasb://%s@%s.blob.core.windows.net/%s' % (containerName, self.accountName, fileName) if size is None: return url blobService = BlobService(account_key=_fetchAzureAccountKey(self.accountName), account_name=self.accountName) content = os.urandom(size) blobService.put_block_blob_from_text(containerName, fileName, content) return url, hashlib.md5(content).hexdigest()
def prepare_storage(settings): default_storage_account_name = settings["DEFAULT_STORAGE_ACCOUNT_NAME"] storage_access_key = settings["STORAGE_ACCESS_KEY"] blob_service = BlobService(default_storage_account_name, storage_access_key) blob_service.create_container('bosh') blob_service.create_container( container_name='stemcell', x_ms_blob_public_access='blob' ) # Prepare the table for storing meta datas of storage account and stemcells table_service = TableService(default_storage_account_name, storage_access_key) table_service.create_table('stemcells')
def upload_azure_blob(account, account_key, container, filename, file, file_type='file/csv'): block_blob_service = BlobService(account_name=account, account_key=account_key) # block_blob_service.put_block_blob_from_path( # container, # blockblob, # file, # x_ms_blob_content_type='file/csv' # ) block_blob_service.create_blob_from_stream(container, filename, file) generator = block_blob_service.list_blobs(container) for blob in generator: print(blob.name)
def upload_results(): """ :return: None """ logger = logging.getLogger(__name__) results_fpath = '/data/wsdm_cup/results/results.tsv' logger.info('Uploading results from {0}'.format(results_fpath)) blob_service = BlobService(account_name='wsdmcupchallenge', sas_token=Config.SAS_TOKEN) blob_service.put_block_blob_from_path(container_name='bletchleypark', blob_name='results.tsv', file_path=results_fpath) logger.info('Done uploading') return
def uri_get_file(creds, uri, conn=None): assert uri.startswith('wabs://') url_tup = urlparse(uri) if conn is None: conn = BlobService(creds.account_name, creds.account_key, sas_token=creds.access_token, protocol='https') # Determin the size of the target blob props = conn.get_blob_properties(url_tup.netloc, url_tup.path.lstrip('/')) blob_size = int(props['content-length']) ret_size = 0 data = io.BytesIO() # WABS requires large files to be downloaded in 4MB chunks while ret_size < blob_size: ms_range = 'bytes={0}-{1}'.format(ret_size, ret_size + WABS_CHUNK_SIZE - 1) while True: # Because we're downloading in chunks, catch rate limiting and # connection errors here instead of letting them bubble up to the # @retry decorator so that we don't have to start downloading the # whole file over again. try: part = conn.get_blob(url_tup.netloc, url_tup.path.lstrip('/'), x_ms_range=ms_range) except EnvironmentError as e: if e.errno in (errno.EBUSY, errno.ECONNRESET): logger.warning( msg="retrying after encountering exception", detail=("Exception traceback:\n{0}".format( traceback.format_exception(*sys.exc_info()))), hint="") gevent.sleep(30) else: raise else: break length = len(part) ret_size += length data.write(part) if length > 0 and length < WABS_CHUNK_SIZE: break elif length == 0: break return data.getvalue()
def download_data(key): blob_service = BlobService(account_name='asosdsrecruiting', account_key=key) blobs = [] marker = None while True: batch = blob_service.list_blobs('recruitingdata', marker=marker) blobs.extend(batch) if not batch.next_marker: break marker = batch.next_marker for blob in blobs: file_name = blob.name print('Downloading: ' + file_name) blob_service.get_blob_to_path('recruitingdata', file_name, file_name.replace('/', '_') + '.csv')
def __init__(self, accountName, namePrefix, config=None, jobChunkSize=maxAzureTablePropertySize): self.jobChunkSize = jobChunkSize self.keyPath = None self.account_key = _fetchAzureAccountKey(accountName) self.accountName = accountName # Table names have strict requirements in Azure self.namePrefix = self._sanitizeTableName(namePrefix) logger.debug("Creating job store with name prefix '%s'" % self.namePrefix) # These are the main API entrypoints. self.tableService = TableService(account_key=self.account_key, account_name=accountName) self.blobService = BlobService(account_key=self.account_key, account_name=accountName) exists = self._jobStoreExists() self._checkJobStoreCreation(config is not None, exists, accountName + ":" + self.namePrefix) # Serialized jobs table self.jobItems = self._getOrCreateTable(self.qualify('jobs')) # Job<->file mapping table self.jobFileIDs = self._getOrCreateTable(self.qualify('jobFileIDs')) # Container for all shared and unshared files self.files = self._getOrCreateBlobContainer(self.qualify('files')) # Stats and logging strings self.statsFiles = self._getOrCreateBlobContainer(self.qualify('statsfiles')) # File IDs that contain stats and logging strings self.statsFileIDs = self._getOrCreateTable(self.qualify('statsFileIDs')) super(AzureJobStore, self).__init__(config=config) if self.config.cseKey is not None: self.keyPath = self.config.cseKey
def save_image_to_azure(profile, url): try: response = request('GET', url) response.raise_for_status() except ConnectionError: pass else: service = BlobService( account_name=storagesettings.AZURE_ACCOUNT_NAME, account_key=storagesettings.AZURE_ACCOUNT_KEY) service.put_block_blob_from_bytes( 'avatars', profile.id, response.content, x_ms_blob_content_type=response.headers['content-type'] )
def connect(config=False): import lib.misc as misc from azure.storage.blob import BlobService global blob_service, container # Connect to the cloud service. if not config: config = misc.config['_private'] container = 'streams' if not 'azure' in config: return None, None if not blob_service: blob_service = BlobService(config['azure']['storage_account_name'], config['azure']['primary_access_key']) blob_service.create_container(container, x_ms_blob_public_access='container') return blob_service, container
def __init__(self, account_name, account_key, container, prefix): self._blob = BlobService(account_name=account_name, account_key=account_key) self._cur_path = None self._buf = io.StringIO() self._prefix = prefix self._container = container self._blob.create_container(container) self._logger = create_timed_rotating_log()
class AzureJobStore(AbstractJobStore): """ A job store that uses Azure's blob store for file storage and Table Service to store job info with strong consistency.""" @classmethod def loadOrCreateJobStore(cls, locator, config=None, **kwargs): account, namePrefix = locator.split(':', 1) if '--' in namePrefix: raise ValueError( "Invalid name prefix '%s'. Name prefixes may not contain " "%s." % (namePrefix, cls.nameSeparator)) if not cls.containerNameRe.match(namePrefix): raise ValueError( "Invalid name prefix '%s'. Name prefixes must contain only digits, " "hyphens or lower-case letters and must not start or end in a " "hyphen." % namePrefix) # Reserve 13 for separator and suffix if len(namePrefix) > cls.maxContainerNameLen - cls.maxNameLen - len( cls.nameSeparator): raise ValueError(( "Invalid name prefix '%s'. Name prefixes may not be longer than 50 " "characters." % namePrefix)) if '--' in namePrefix: raise ValueError( "Invalid name prefix '%s'. Name prefixes may not contain " "%s." % (namePrefix, cls.nameSeparator)) return cls(account, namePrefix, config=config, **kwargs) # Dots in container names should be avoided because container names are used in HTTPS bucket # URLs where the may interfere with the certificate common name. We use a double # underscore as a separator instead. # containerNameRe = re.compile(r'^[a-z0-9](-?[a-z0-9]+)+[a-z0-9]$') # See https://msdn.microsoft.com/en-us/library/azure/dd135715.aspx # minContainerNameLen = 3 maxContainerNameLen = 63 maxNameLen = 10 nameSeparator = 'xx' # Table names must be alphanumeric # Do not invoke the constructor, use the factory method above. def __init__(self, accountName, namePrefix, config=None, jobChunkSize=maxAzureTablePropertySize): self.jobChunkSize = jobChunkSize self.keyPath = None self.account_key = _fetchAzureAccountKey(accountName) self.accountName = accountName # Table names have strict requirements in Azure self.namePrefix = self._sanitizeTableName(namePrefix) logger.debug("Creating job store with name prefix '%s'" % self.namePrefix) # These are the main API entrypoints. self.tableService = TableService(account_key=self.account_key, account_name=accountName) self.blobService = BlobService(account_key=self.account_key, account_name=accountName) exists = self._jobStoreExists() self._checkJobStoreCreation(config is not None, exists, accountName + ":" + self.namePrefix) # Serialized jobs table self.jobItems = self._getOrCreateTable(self.qualify('jobs')) # Job<->file mapping table self.jobFileIDs = self._getOrCreateTable(self.qualify('jobFileIDs')) # Container for all shared and unshared files self.files = self._getOrCreateBlobContainer(self.qualify('files')) # Stats and logging strings self.statsFiles = self._getOrCreateBlobContainer( self.qualify('statsfiles')) # File IDs that contain stats and logging strings self.statsFileIDs = self._getOrCreateTable( self.qualify('statsFileIDs')) super(AzureJobStore, self).__init__(config=config) if self.config.cseKey is not None: self.keyPath = self.config.cseKey # Length of a jobID - used to test if a stats file has been read already or not jobIDLength = len(str(uuid.uuid4())) def qualify(self, name): return self.namePrefix + self.nameSeparator + name def jobs(self): # How many jobs have we done? total_processed = 0 for jobEntity in self.jobItems.query_entities_auto(): # Process the items in the page yield AzureJob.fromEntity(jobEntity) total_processed += 1 if total_processed % 1000 == 0: # Produce some feedback for the user, because this can take # a long time on, for example, Azure logger.info("Processed %d total jobs" % total_processed) logger.info("Processed %d total jobs" % total_processed) def create(self, command, memory, cores, disk, preemptable, predecessorNumber=0): jobStoreID = self._newJobID() job = AzureJob(jobStoreID=jobStoreID, command=command, memory=memory, cores=cores, disk=disk, preemptable=preemptable, remainingRetryCount=self._defaultTryCount(), logJobStoreFileID=None, predecessorNumber=predecessorNumber) entity = job.toItem(chunkSize=self.jobChunkSize) entity['RowKey'] = jobStoreID self.jobItems.insert_entity(entity=entity) return job def exists(self, jobStoreID): if self.jobItems.get_entity(row_key=jobStoreID) is None: return False return True def load(self, jobStoreID): jobEntity = self.jobItems.get_entity(row_key=jobStoreID) if jobEntity is None: raise NoSuchJobException(jobStoreID) return AzureJob.fromEntity(jobEntity) def update(self, job): self.jobItems.update_entity( row_key=job.jobStoreID, entity=job.toItem(chunkSize=self.jobChunkSize)) def delete(self, jobStoreID): try: self.jobItems.delete_entity(row_key=jobStoreID) except AzureMissingResourceHttpError: # Job deletion is idempotent, and this job has been deleted already return filterString = "PartitionKey eq '%s'" % jobStoreID for fileEntity in self.jobFileIDs.query_entities(filter=filterString): jobStoreFileID = fileEntity.RowKey self.deleteFile(jobStoreFileID) def deleteJobStore(self): self.jobItems.delete_table() self.jobFileIDs.delete_table() self.files.delete_container() self.statsFiles.delete_container() self.statsFileIDs.delete_table() def _jobStoreExists(self): """ Checks if job store exists by querying the existence of the statsFileIDs table. Note that this is the last component that is deleted in deleteJobStore. """ for attempt in retry_azure(): with attempt: try: table = self.tableService.query_tables( table_name=self.qualify('statsFileIDs')) return table is not None except AzureMissingResourceHttpError as e: if e.status_code == 404: return False else: raise def getEnv(self): return dict(AZURE_ACCOUNT_KEY=self.account_key) @classmethod def _readFromUrl(cls, url, writable): blobService, containerName, blobName = cls._extractBlobInfoFromUrl(url) blobService.get_blob_to_file(containerName, blobName, writable) @classmethod def _writeToUrl(cls, readable, url): blobService, containerName, blobName = cls._extractBlobInfoFromUrl(url) blobService.put_block_blob_from_file(containerName, blobName, readable) blobService.get_blob(containerName, blobName) @staticmethod def _extractBlobInfoFromUrl(url): """ :return: (blobService, containerName, blobName) """ def invalidUrl(): raise RuntimeError("The URL '%s' is invalid" % url.geturl()) netloc = url.netloc.split('@') if len(netloc) != 2: invalidUrl() accountEnd = netloc[1].find('.blob.core.windows.net') if accountEnd == -1: invalidUrl() containerName, accountName = netloc[0], netloc[1][0:accountEnd] blobName = url.path[1:] # urlparse always includes a leading '/' blobService = BlobService( account_key=_fetchAzureAccountKey(accountName), account_name=accountName) return blobService, containerName, blobName @classmethod def _supportsUrl(cls, url, export=False): return url.scheme.lower() == 'wasb' or url.scheme.lower() == 'wasbs' def writeFile(self, localFilePath, jobStoreID=None): jobStoreFileID = self._newFileID() self.updateFile(jobStoreFileID, localFilePath) self._associateFileWithJob(jobStoreFileID, jobStoreID) return jobStoreFileID def updateFile(self, jobStoreFileID, localFilePath): with open(localFilePath) as read_fd: with self._uploadStream(jobStoreFileID, self.files) as write_fd: while True: buf = read_fd.read(self._maxAzureBlockBytes) write_fd.write(buf) if len(buf) == 0: break def readFile(self, jobStoreFileID, localFilePath): try: with self._downloadStream(jobStoreFileID, self.files) as read_fd: with open(localFilePath, 'w') as write_fd: while True: buf = read_fd.read(self._maxAzureBlockBytes) write_fd.write(buf) if not buf: break except AzureMissingResourceHttpError: raise NoSuchFileException(jobStoreFileID) def deleteFile(self, jobStoreFileID): try: self.files.delete_blob(blob_name=jobStoreFileID) self._dissociateFileFromJob(jobStoreFileID) except AzureMissingResourceHttpError: pass def fileExists(self, jobStoreFileID): # As Azure doesn't have a blob_exists method (at least in the # python API) we just try to download the metadata, and hope # the metadata is small so the call will be fast. try: self.files.get_blob_metadata(blob_name=jobStoreFileID) return True except AzureMissingResourceHttpError: return False @contextmanager def writeFileStream(self, jobStoreID=None): # TODO: this (and all stream methods) should probably use the # Append Blob type, but that is not currently supported by the # Azure Python API. jobStoreFileID = self._newFileID() with self._uploadStream(jobStoreFileID, self.files) as fd: yield fd, jobStoreFileID self._associateFileWithJob(jobStoreFileID, jobStoreID) @contextmanager def updateFileStream(self, jobStoreFileID): with self._uploadStream(jobStoreFileID, self.files, checkForModification=True) as fd: yield fd def getEmptyFileStoreID(self, jobStoreID=None): jobStoreFileID = self._newFileID() self.files.put_blob(blob_name=jobStoreFileID, blob='', x_ms_blob_type='BlockBlob') self._associateFileWithJob(jobStoreFileID, jobStoreID) return jobStoreFileID @contextmanager def readFileStream(self, jobStoreFileID): if not self.fileExists(jobStoreFileID): raise NoSuchFileException(jobStoreFileID) with self._downloadStream(jobStoreFileID, self.files) as fd: yield fd @contextmanager def writeSharedFileStream(self, sharedFileName, isProtected=None): assert self._validateSharedFileName(sharedFileName) sharedFileID = self._newFileID(sharedFileName) with self._uploadStream(sharedFileID, self.files, encrypted=isProtected) as fd: yield fd @contextmanager def readSharedFileStream(self, sharedFileName): assert self._validateSharedFileName(sharedFileName) sharedFileID = self._newFileID(sharedFileName) if not self.fileExists(sharedFileID): raise NoSuchFileException(sharedFileID) with self._downloadStream(sharedFileID, self.files) as fd: yield fd def writeStatsAndLogging(self, statsAndLoggingString): # TODO: would be a great use case for the append blobs, once implemented in the Azure SDK jobStoreFileID = self._newFileID() encrypted = self.keyPath is not None if encrypted: statsAndLoggingString = encryption.encrypt(statsAndLoggingString, self.keyPath) self.statsFiles.put_block_blob_from_text( blob_name=jobStoreFileID, text=statsAndLoggingString, x_ms_meta_name_values=dict(encrypted=str(encrypted))) self.statsFileIDs.insert_entity(entity={'RowKey': jobStoreFileID}) def readStatsAndLogging(self, callback, readAll=False): suffix = '_old' numStatsFiles = 0 for entity in self.statsFileIDs.query_entities(): jobStoreFileID = entity.RowKey hasBeenRead = len(jobStoreFileID) > self.jobIDLength if not hasBeenRead: with self._downloadStream(jobStoreFileID, self.statsFiles) as fd: callback(fd) # Mark this entity as read by appending the suffix self.statsFileIDs.insert_entity( entity={'RowKey': jobStoreFileID + suffix}) self.statsFileIDs.delete_entity(row_key=jobStoreFileID) numStatsFiles += 1 elif readAll: # Strip the suffix to get the original ID jobStoreFileID = jobStoreFileID[:-len(suffix)] with self._downloadStream(jobStoreFileID, self.statsFiles) as fd: callback(fd) numStatsFiles += 1 return numStatsFiles _azureTimeFormat = "%Y-%m-%dT%H:%M:%SZ" def getPublicUrl(self, jobStoreFileID): try: self.files.get_blob_properties(blob_name=jobStoreFileID) except AzureMissingResourceHttpError: raise NoSuchFileException(jobStoreFileID) # Compensate of a little bit of clock skew startTimeStr = (datetime.utcnow() - timedelta(minutes=5)).strftime( self._azureTimeFormat) endTime = datetime.utcnow() + self.publicUrlExpiration endTimeStr = endTime.strftime(self._azureTimeFormat) sap = SharedAccessPolicy( AccessPolicy(startTimeStr, endTimeStr, BlobSharedAccessPermissions.READ)) sas_token = self.files.generate_shared_access_signature( blob_name=jobStoreFileID, shared_access_policy=sap) return self.files.make_blob_url( blob_name=jobStoreFileID) + '?' + sas_token def getSharedPublicUrl(self, sharedFileName): jobStoreFileID = self._newFileID(sharedFileName) return self.getPublicUrl(jobStoreFileID) def _newJobID(self): # raw UUIDs don't work for Azure property names because the '-' character is disallowed. return str(uuid.uuid4()).replace('-', '_') # A dummy job ID under which all shared files are stored. sharedFileJobID = uuid.UUID('891f7db6-e4d9-4221-a58e-ab6cc4395f94') def _newFileID(self, sharedFileName=None): if sharedFileName is None: ret = str(uuid.uuid4()) else: ret = str(uuid.uuid5(self.sharedFileJobID, str(sharedFileName))) return ret.replace('-', '_') def _associateFileWithJob(self, jobStoreFileID, jobStoreID=None): if jobStoreID is not None: self.jobFileIDs.insert_entity(entity={ 'PartitionKey': jobStoreID, 'RowKey': jobStoreFileID }) def _dissociateFileFromJob(self, jobStoreFileID): entities = self.jobFileIDs.query_entities(filter="RowKey eq '%s'" % jobStoreFileID) if entities: assert len(entities) == 1 jobStoreID = entities[0].PartitionKey self.jobFileIDs.delete_entity(partition_key=jobStoreID, row_key=jobStoreFileID) def _getOrCreateTable(self, tableName): # This will not fail if the table already exists. for attempt in retry_azure(): with attempt: self.tableService.create_table(tableName) return AzureTable(self.tableService, tableName) def _getOrCreateBlobContainer(self, containerName): for attempt in retry_azure(): with attempt: self.blobService.create_container(containerName) return AzureBlobContainer(self.blobService, containerName) def _sanitizeTableName(self, tableName): """ Azure table names must start with a letter and be alphanumeric. This will never cause a collision if uuids are used, but otherwise may not be safe. """ return 'a' + filter(lambda x: x.isalnum(), tableName) # Maximum bytes that can be in any block of an Azure block blob # https://github.com/Azure/azure-storage-python/blob/4c7666e05a9556c10154508335738ee44d7cb104/azure/storage/blob/blobservice.py#L106 _maxAzureBlockBytes = 4 * 1024 * 1024 @contextmanager def _uploadStream(self, jobStoreFileID, container, checkForModification=False, encrypted=None): """ :param encrypted: True to enforce encryption (will raise exception unless key is set), False to prevent encryption or None to encrypt if key is set. """ if checkForModification: try: expectedVersion = container.get_blob_properties( blob_name=jobStoreFileID)['etag'] except AzureMissingResourceHttpError: expectedVersion = None if encrypted is None: encrypted = self.keyPath is not None elif encrypted: if self.keyPath is None: raise RuntimeError( 'Encryption requested but no key was provided') maxBlockSize = self._maxAzureBlockBytes if encrypted: # There is a small overhead for encrypted data. maxBlockSize -= encryption.overhead readable_fh, writable_fh = os.pipe() with os.fdopen(readable_fh, 'r') as readable: with os.fdopen(writable_fh, 'w') as writable: def reader(): blockIDs = [] try: while True: buf = readable.read(maxBlockSize) if len(buf) == 0: # We're safe to break here even if we never read anything, since # putting an empty block list creates an empty blob. break if encrypted: buf = encryption.encrypt(buf, self.keyPath) blockID = self._newFileID() container.put_block(blob_name=jobStoreFileID, block=buf, blockid=blockID) blockIDs.append(blockID) except: # This is guaranteed to delete any uncommitted # blocks. container.delete_blob(blob_name=jobStoreFileID) raise if checkForModification and expectedVersion is not None: # Acquire a (60-second) write lock, leaseID = container.lease_blob( blob_name=jobStoreFileID, x_ms_lease_action='acquire')['x-ms-lease-id'] # check for modification, blobProperties = container.get_blob_properties( blob_name=jobStoreFileID) if blobProperties['etag'] != expectedVersion: container.lease_blob(blob_name=jobStoreFileID, x_ms_lease_action='release', x_ms_lease_id=leaseID) raise ConcurrentFileModificationException( jobStoreFileID) # commit the file, container.put_block_list(blob_name=jobStoreFileID, block_list=blockIDs, x_ms_lease_id=leaseID, x_ms_meta_name_values=dict( encrypted=str(encrypted))) # then release the lock. container.lease_blob(blob_name=jobStoreFileID, x_ms_lease_action='release', x_ms_lease_id=leaseID) else: # No need to check for modification, just blindly write over whatever # was there. container.put_block_list(blob_name=jobStoreFileID, block_list=blockIDs, x_ms_meta_name_values=dict( encrypted=str(encrypted))) thread = ExceptionalThread(target=reader) thread.start() yield writable # The writable is now closed. This will send EOF to the readable and cause that # thread to finish. thread.join() @contextmanager def _downloadStream(self, jobStoreFileID, container): # The reason this is not in the writer is so we catch non-existant blobs early blobProps = container.get_blob_properties(blob_name=jobStoreFileID) encrypted = strict_bool(blobProps['x-ms-meta-encrypted']) if encrypted and self.keyPath is None: raise AssertionError( 'Content is encrypted but no key was provided.') readable_fh, writable_fh = os.pipe() with os.fdopen(readable_fh, 'r') as readable: with os.fdopen(writable_fh, 'w') as writable: def writer(): try: chunkStartPos = 0 fileSize = int(blobProps['Content-Length']) while chunkStartPos < fileSize: chunkEndPos = chunkStartPos + self._maxAzureBlockBytes - 1 buf = container.get_blob( blob_name=jobStoreFileID, x_ms_range="bytes=%d-%d" % (chunkStartPos, chunkEndPos)) if encrypted: buf = encryption.decrypt(buf, self.keyPath) writable.write(buf) chunkStartPos = chunkEndPos + 1 finally: # Ensure readers aren't left blocking if this thread crashes. # This close() will send EOF to the reading end and ultimately cause the # yield to return. It also makes the implict .close() done by the enclosing # "with" context redundant but that should be ok since .close() on file # objects are idempotent. writable.close() thread = ExceptionalThread(target=writer) thread.start() yield readable thread.join()
def connection(self): if self._connection is None: self._connection = BlobService(self.account_name, self.account_key) return self._connection
def gethydrograph(request): ''' Returns streamflow data by start / stop / station In response it will generate a 404 error if the value is not found or return a JSON response with the requested slice or a .csv file by default ''' assert isinstance(request, HttpRequest) start = request.GET.get('start', None) end = request.GET.get('end', None) station = request.GET.get('station', None) interval = request.GET.get('interval', None) jsondat = request.GET.get('jsondat', None) plot = request.GET.get('plot', None) #start blob service stationfile = station + '.day.new' downloadablefile = station + '_' + start + '_' + end + '.csv' blob_service = BlobService( account_name='araldrift', account_key= 'otLzzkwQHQD3xFTQxwxy64PCL6eDINWGjSB7x6Ta2XVw3+3ffI5O2MhAEavf/r8qIW4G/dKrZAVg1R64nK7hDQ==' ) blob_service.get_blob_to_path('flow', stationfile, './tmp.csv') f = file('./tmp.csv') #read in pandas data and subsetting d_cols = ["DATE", "FLOW"] d = pd.read_csv('./tmp.csv', sep=" ", names=d_cols) df = d[(d.DATE >= start) & (d.DATE <= end)] h = df.to_json(orient='records') json_encoded_result = json.dumps(h) df.plot(x='DATE', y='FLOW', figsize=(14, 6)) plt.savefig('./plot_test.png') #h = [] #while True: # line = f.readline() # if line == "": break # h.append(line) #f.close() try: if jsondat in ['TRUE']: response = HttpResponse(json_encoded_result, content_type="application/json") return response elif plot in ['TRUE']: image_data = open("./plot_test.png", "rb").read() response = HttpResponse(image_data, content_type='image/png') return response else: response = HttpResponse(content_type='text/csv') response[ 'Content-Disposition'] = 'attachment; filename=' + downloadablefile df.to_csv(response, index=False, lineterminator='\r\n') return response except Exception as a: return HttpResponseNotFound( content= "No dice, either the inputs were out of range, the file couldn't be retrieved, or the winds weren't in your favor." )
class AzureConnector(): def __init__(self, config): tree = ET.parse('SharedConfig.xml') self.myMachineName = tree.find('.//Instance').get("id") self.sms = ServiceManagementService( subscription_id=config.get("azure", "subscription_id"), cert_file=config.get("azure", "cert_file") ); self.bus_service = ServiceBusService( service_namespace=config.get("azure", "bus_namespace"), shared_access_key_name=config.get("azure", "bus_shared_access_key_name"), shared_access_key_value=config.get("azure", "bus_shared_access_key_value")) self.command_queue = config.get("azure", "commandQueuePath") for tries in range(1,10): try: self.bus_service.create_queue(self.command_queue) break except: print "Esperando" self.status_topic = config.get("azure", "statusTopicPath") self.bus_service.create_queue(self.status_topic) self.storage = BlobService(account_name=config.get("azure", "account_name"), account_key=config.get("azure", "account_key")) self.algo_storage_name = config.get("azure", "algorithm_storage_name") self.storage.create_container(self.algo_storage_name, fail_on_exist=False) self.proj_storage_name = config.get("azure", "project_storage_name") self.storage.create_container(self.proj_storage_name, fail_on_exist=False) def check_new_tasks(self): for tries in range(1,2): try: message = self.bus_service.receive_queue_message(self.command_queue, peek_lock=False, timeout=60) break except: message = None if message is None or message.body is None: return None job_description = json.loads(message.body.replace('/AzureBlobStorage/', '')) command = CommandMetadata( command_id = job_description["command_id"], algorithm_directory = job_description["algorithm_prfx"], project_prfx = job_description["project_prfx"], project_input_files = job_description["project_input_files"], algorithm_executable_name = job_description["algorithm_executable_name"], algorithm_parameters = job_description["algorithm_parameters"], sent_timestamp = datetime.datetime.strptime(job_description["sent_timestamp"], "%d/%m/%Y %H:%M:%S"), machine_size=job_description["machine_size"]) # Retornar dados sobre o comando consumido da fila return command # Não há nada na fila return None def list_algo_files(self, prfx): list = self.storage.list_blobs(container_name=self.algo_storage_name, prefix=prfx) result = [] for blob in list: result.append(blob.name) return result def download_algo_zip(self, algorithm_bin_file, tmp_file): print "download_algo_zip(algorithm_bin_file="+algorithm_bin_file+", tmp_file="+tmp_file+")" for tries in range(1,5): try: self.storage.get_blob_to_path(self.algo_storage_name, algorithm_bin_file, tmp_file, open_mode='wb', snapshot=None, x_ms_lease_id=None, progress_callback=None) break except Exception as e: if tries == 5: print("Muitos erros de conexão. Operação abortada.") else: print("Erro de conexão com serviço. Retentando..." + e.__str__()) def download_file_to_project(self, project_name, blob_name, dir): print "download_file_to_project(project_name="+project_name+", blob_name="+blob_name+", dir="+dir+")" for tries in range(1,5): try: self.storage.get_blob_to_path(self.proj_storage_name, os.path.join(project_name,blob_name), os.path.join(dir,os.path.join(project_name,blob_name)), open_mode='wb', snapshot=None, x_ms_lease_id=None, progress_callback=None) break except Exception as e: if tries == 5: print("Muitos erros de conexão. Operação abortada.") else: print("Erro de conexão com serviço. Retentando..." + e.__str__()) def download_file_to_project(self, project_name, blob_name, dir): print "download_file_to_project(project_name="+project_name+", blob_name="+blob_name+", dir="+dir+")" for tries in range(1,5): try: self.storage.get_blob_to_path(self.proj_storage_name, os.path.join(project_name,blob_name), os.path.join(dir,os.path.join(project_name,blob_name)), open_mode='wb', snapshot=None, x_ms_lease_id=None, progress_callback=None) break except Exception as e: if tries == 5: print("Muitos erros de conexão. Operação abortada.") else: print("Erro de conexão com serviço. Retentando..." + e.__str__()) def upload_proj_file(self, project_name, blob_name, dir): print "upload_proj_file(project_name="+project_name+", blob_name="+blob_name+", dir="+dir+")" if blob_name[0] == '/': blob_name = blob_name[1:] for tries in range(1,5): try: self.storage.put_block_blob_from_path(self.proj_storage_name, os.path.join(project_name,blob_name), os.path.join(dir,os.path.join(project_name,blob_name))) break except Exception as e: if tries == 5: print("Muitos erros de conexão. Operação abortada.") else: print("Erro de conexão com serviço. Retentando..." + e.__str__()) def download_file_to_algo(self, blob_name, dir): print "download_file_to_algo(blob_name="+blob_name+", dir="+dir+")" for tries in range(1,5): try: self.storage.get_blob_to_path(container_name=self.algo_storage_name, blob_name=os.path.join(blob_name), file_path=os.path.join(dir,blob_name), open_mode='wb', snapshot=None, x_ms_lease_id=None, progress_callback=None) break except Exception as e: if tries == 5: print("Muitos erros de conexão. Operação abortada.") else: print("Erro de conexão com serviço. Retentando..." + e.__str__()) def send_status(self, main_status): for tries in range(1,5): try: self.bus_service.send_topic_message(topic_name=self.status_topic, message=Message(main_status.encode('utf-8'))) break except Exception as e: if tries == 5: print("Muitos erros de conexão. Operação abortada.") else: print("Erro de conexão com serviço. Retentando..." + e.__str__()) def shutdown_myself(self): # A máquina virtual irá cometer suicídio. print("Removendo máquina virtual da nuvem...") for tries in range(1,5): try: self.sms.delete_deployment( service_name=self.myMachineName, deployment_name=self.myMachineName, delete_vhd=True) exit(0) break except Exception as e: if tries == 5: print("Muitos erros de conexão. Operação abortada.") else: print("Erro de conexão com serviço. Retentando..." + e.__str__())
def _cleanUpExternalStore(self, containerName): from toil.jobStores.azureJobStore import _fetchAzureAccountKey from azure.storage.blob import BlobService blobService = BlobService(account_key=_fetchAzureAccountKey(self.accountName), account_name=self.accountName) blobService.delete_container(containerName)
def service(self): return BlobService(account_name=self.account, account_key=_fetchAzureAccountKey(self.account))
def __init__(self): self.storage_account = getenv('STORAGE_ACCOUNT') self.blob_service = BlobService(self.storage_account, getenv('STORAGE_KEY'))
class BlobSource(DataSource): def __init__(self): self.storage_account = getenv('STORAGE_ACCOUNT') self.blob_service = BlobService(self.storage_account, getenv('STORAGE_KEY')) def load(self, sparkContext, container, path): path = ('/' if path[0] != '/' else '') + path uri = 'wasb://%s@%s.blob.core.windows.net%s' % ( container, self.storage_account, path) print 'Loading from %s' % uri return sparkContext.textFile(uri) def download(self, container, path): print 'Downloading blob from %s/%s' % (container, path) self.blob_service.get_blob_to_path(container, path, path) print 'Downloaded blob to ' + path def saveAsJson(self, payload, container, path): path = path.lstrip('/') print path print 'Saving to %s/%s' % (container, path) json_string = json.dumps(payload, ensure_ascii=False).encode('utf-8') try: self.blob_service.put_blob( container, path, json_string, 'BlockBlob', x_ms_blob_cache_control='max-age=3600', x_ms_blob_content_type='application/json') except Exception as e: print 'Failed to save %s/%s: %s' % (container, path, str(e)) raise def saveAsText(self, rdd, container, path): path = path.lstrip('/') path = '/' + path print 'Saving rdd to %s%s' % (container, path) uri = 'wasb://%s@%s.blob.core.windows.net%s' % ( container, self.storage_account, path) try: rdd.saveAsTextFile(uri) except Exception as e: print 'Failed to save %s%s: %s' % (container, path, str(e)) raise def deleteAllBut(self, container, exceptFolderName): print 'deleteAllBut called' try: bloblistingresult = self.blob_service.list_blobs(container) for i in bloblistingresult: print i.name if not exceptFolderName in i.name: try: print 'deleting' self.blob_service.delete_blob(container, i.name) print 'deleted' except Exception as e: print 'Failed to delete %s/%s: %s' % (container, i.name, str(e)) raise except Exception as e: print 'Failed to list things in %s: %s' % (container, str(e)) raise
class AzureFS(LoggingMixIn, Operations): """Azure Blob Storage filesystem""" blobs = None containers = dict() # <cname, dict(stat:dict, #files:None|dict<fname, stat>) fds = dict() # <fd, (path, bytes, dirty)> fd = 0 def __init__(self, account, key): self.blobs = BlobService(account, key) self.rebuild_container_list() def convert_to_epoch(self, date): """Converts Tue, 31 Jul 2012 07:17:34 GMT format to epoch""" return int(time.mktime(time.strptime(date, TIME_FORMAT))) def rebuild_container_list(self): cmap = dict() cnames = set() for c in self.blobs.list_containers(): date = c.properties.last_modified cstat = dict(st_mode=(S_IFDIR | 0755), st_uid=getuid(), st_size=0, st_mtime=self.convert_to_epoch(date)) cname = c.name cmap['/' + cname] = dict(stat=cstat, files=None) cnames.add(cname) cmap['/'] = dict(files={}, stat=dict(st_mode=(S_IFDIR | 0755), st_uid=getuid(), st_size=0, st_mtime=int(time.time()))) self.containers = cmap # destroys fs tree cache resistant to misses def _parse_path(self, path): # returns </dir, file(=None)> if path.count('/') > 1: # file return str(path[:path.rfind('/')]), str(path[path.rfind('/') + 1:]) else: # dir pos = path.rfind('/', 1) if pos == -1: return path, None else: return str(path[:pos]), None def parse_container(self, path): base_container = path[1:] # /abc/def/g --> abc if base_container.find('/') > -1: base_container = base_container[:base_container.find('/')] return str(base_container) def _get_dir(self, path, contents_required=False): if not self.containers: self.rebuild_container_list() if path in self.containers and not (contents_required and \ self.containers[path]['files'] is None): return self.containers[path] cname = self.parse_container(path) if '/' + cname not in self.containers: raise FuseOSError(ENOENT) else: if self.containers['/' + cname]['files'] is None: # fetch contents of container log.info("------> CONTENTS NOT FOUND: %s" % cname) blobs = self.blobs.list_blobs(cname) dirstat = dict(st_mode=(S_IFDIR | 0755), st_size=0, st_uid=getuid(), st_mtime=time.time()) if self.containers['/' + cname]['files'] is None: self.containers['/' + cname]['files'] = dict() for f in blobs: blob_name = f.name blob_date = f.properties.last_modified blob_size = long(f.properties.content_length) node = dict(st_mode=(S_IFREG | 0644), st_size=blob_size, st_mtime=self.convert_to_epoch(blob_date), st_uid=getuid()) if blob_name.find('/') == -1: # file just under container self.containers['/' + cname]['files'][blob_name] = node return self.containers['/' + cname] return None def _get_file(self, path): d, f = self._parse_path(path) dir = self._get_dir(d, True) if dir is not None and f in dir['files']: return dir['files'][f] def getattr(self, path, fh=None): d, f = self._parse_path(path) if f is None: dir = self._get_dir(d) return dir['stat'] else: file = self._get_file(path) if file: return file raise FuseOSError(ENOENT) # FUSE def mkdir(self, path, mode): if path.count('/') <= 1: # create on root name = path[1:] if not 3 <= len(name) <= 63: log.error("Container names can be 3 through 63 chars long.") raise FuseOSError(ENAMETOOLONG) if name is not name.lower(): log.error("Container names cannot contain uppercase \ characters.") raise FuseOSError(EACCES) if name.count('--') > 0: log.error('Container names cannot contain consecutive \ dashes (-).') raise FuseOSError(EAGAIN) #TODO handle all "-"s must be preceded by letter or numbers #TODO starts with only letter or number, can contain letter, nr,'-' resp = self.blobs.create_container(name) if resp: self.rebuild_container_list() log.info("CONTAINER %s CREATED" % name) else: raise FuseOSError(EACCES) log.error("Invalid container name or container already \ exists.") else: raise FuseOSError(ENOSYS) # TODO support 2nd+ level mkdirs def rmdir(self, path): if path.count('/') == 1: c_name = path[1:] resp = self.blobs.delete_container(c_name) if resp: if path in self.containers: del self.containers[path] else: raise FuseOSError(EACCES) else: raise FuseOSError(ENOSYS) # TODO support 2nd+ level mkdirs def create(self, path, mode): node = dict(st_mode=(S_IFREG | mode), st_size=0, st_nlink=1, st_uid=getuid(), st_mtime=time.time()) d, f = self._parse_path(path) if not f: log.error("Cannot create files on root level: /") raise FuseOSError(ENOSYS) dir = self._get_dir(d, True) if not dir: raise FuseOSError(EIO) dir['files'][f] = node return self.open(path, data='') # reusing handler provider def open(self, path, flags=0, data=None): if data == None: # download contents c_name = self.parse_container(path) f_name = path[path.find('/', 1) + 1:] try: data = self.blobs.get_blob(c_name, f_name) except AzureMissingResourceHttpError: dir = self._get_dir('/' + c_name, True) if f_name in dir['files']: del dir['files'][f_name] raise FuseOSError(ENOENT) except AzureException as e: log.error("Read blob failed HTTP %d" % e.code) raise FuseOSError(EAGAIN) self.fd += 1 self.fds[self.fd] = (path, data, False) return self.fd def flush(self, path, fh=None): if not fh: raise FuseOSError(EIO) else: if fh not in self.fds: raise FuseOSError(EIO) path = self.fds[fh][0] data = self.fds[fh][1] dirty = self.fds[fh][2] if not dirty: return 0 # avoid redundant write d, f = self._parse_path(path) c_name = self.parse_container(path) if data is None: data = '' try: if len(data) < 64 * 1024 * 1024: # 64 mb self.blobs.put_blob(c_name, f, data, 'BlockBlob') else: # divide file by blocks and upload block_size = 8 * 1024 * 1024 num_blocks = int(math.ceil(len(data) * 1.0 / block_size)) rd = str(random.randint(1, 1e8)) block_ids = list() for i in range(num_blocks): part = data[i * block_size:min((i + 1) * block_size, len(data))] block_id = base64.encodestring( '%s_%s' % (rd, (8 - len(str(i))) * '0' + str(i))) self.blobs.put_block(c_name, f, part, block_id) block_ids.append(block_id) self.blobs.put_block_list(c_name, f, block_ids) except AzureException: raise FuseOSError(EAGAIN) dir = self._get_dir(d, True) if not dir or f not in dir['files']: raise FuseOSError(EIO) # update local data dir['files'][f]['st_size'] = len(data) dir['files'][f]['st_mtime'] = time.time() self.fds[fh] = (path, data, False) # mark as not dirty return 0 def release(self, path, fh=None): if fh is not None and fh in self.fds: del self.fds[fh] def truncate(self, path, length, fh=None): return 0 # assume done, no need def write(self, path, data, offset, fh=None): if not fh or fh not in self.fds: raise FuseOSError(ENOENT) else: d = self.fds[fh][1] if d is None: d = "" self.fds[fh] = (self.fds[fh][0], d[:offset] + data, True) return len(data) def unlink(self, path): c_name = self.parse_container(path) d, f = self._parse_path(path) try: self.blobs.delete_blob(c_name, f) _dir = self._get_dir(path, True) if _dir and f in _dir['files']: del _dir['files'][f] return 0 except AzureMissingResourceHttpError: raise FuseOSError(ENOENT) except Exception as e: raise FuseOSError(EAGAIN) def readdir(self, path, fh): if path == '/': return ['.', '..'] + [x[1:] for x in self.containers.keys() \ if x is not '/'] dir = self._get_dir(path, True) if not dir: raise FuseOSError(ENOENT) return ['.', '..'] + dir['files'].keys() def read(self, path, size, offset, fh): if not fh or fh not in self.fds: raise FuseOSError(ENOENT) f_name = path[path.find('/', 1) + 1:] c_name = path[1:path.find('/', 1)] try: data = self.blobs.get_blob(c_name, f_name) self.fds[fh] = (self.fds[fh][0], data, False) return data[offset:offset + size] except URLError, e: if e.code == 404: raise FuseOSError(ENOENT) elif e.code == 403: raise FUSEOSError(EPERM) else: log.error("Read blob failed HTTP %d" % e.code) raise FuseOSError(EAGAIN) data = self.fds[fh][1] if data is None: data = "" return data[offset:offset + size]
def _save(self, name, content): blob_service = BlobService(account_name=accountName, account_key=accountKey) import mimetypes small_content = content content.open() content_type = None if hasattr(content.file, 'content_type'): content_type = content.file.content_type else: content_type = mimetypes.guess_type(name)[0] content_str = content.read() blob_service.put_blob( 'videos', name, content_str, x_ms_blob_type='BlockBlob', x_ms_blob_content_type=content_type, x_ms_blob_cache_control= 'public, max-age=3600, s-maxage=86400' #cache in the browser for 1 hr, on the edge for 24 hrs ) content.close() # if "avatars" in name: #creating and saving thumbnail # small_image_name = name # small_image_name = string.replace(small_image_name, "avatars", "thumbnails") # thumbnail = StringIO.StringIO() # size = 22, 22 # image = small_content.file # image = Image.open(image) # small_image = image.resize(size, Image.ANTIALIAS) # small_image.save(thumbnail,'JPEG',quality=70, optimize=True) # img = InMemoryUploadedFile(thumbnail, None, 'small.jpg', 'image/jpeg', thumbnail.len, None) # small_content.file = img # small_content.open() # stream = small_content.read() # blob_service.put_blob( # 'pictures', # small_image_name, # stream, # x_ms_blob_type='BlockBlob', # x_ms_blob_content_type=content_type, # x_ms_blob_cache_control ='public, max-age=604800, s-maxage=604800' #cache in the browser and on the edge for 7 days # ) # small_content.close() # elif "photos" in name: # small_image_name = name # small_image_name = string.replace(small_image_name, "photos", "thumbnails") # thumbnail = StringIO.StringIO() # #size = 40, 40 # height = 38 # image = small_content.file # image = Image.open(image) # wpercent = (height/float(image.size[1])) # bsize = int((float(image.size[0])*float(wpercent))) # small_image = image.resize((bsize,height), PIL.Image.ANTIALIAS) # small_image.save(thumbnail,'JPEG',quality=70, optimize=True) # img = InMemoryUploadedFile(thumbnail, None, 'small.jpg', 'image/jpeg', thumbnail.len, None) # small_content.file = img # small_content.open() # stream = small_content.read() # blob_service.put_blob( # 'pictures', # small_image_name, # stream, # x_ms_blob_type='BlockBlob', # x_ms_blob_content_type=content_type, # x_ms_blob_cache_control ='public, max-age=3600, s-maxage=86400' #cache in the browser for 1 hr, on the edge for 24 hrs # ) # small_content.close() # else: # pass return name
def __init__(self, account, key): self.blobs = BlobService(account, key) self.rebuild_container_list()
def Main(argv=sys.argv): logging.basicConfig(level=logging.INFO) try: argv = FLAGS(argv) # parse flags except flags.FlagsError as e: logging.error('%s\nUsage: %s ARGS\n%s', e, sys.argv[0], FLAGS) sys.exit(1) if FLAGS.bucket is None: raise ValueError('Must specify a valid bucket for this test.') logging.info('Storage provider is %s, bucket is %s, scenario is %s', FLAGS.storage_provider, FLAGS.bucket, FLAGS.scenario) host_to_connect = None if FLAGS.host is not None: logging.info('Will use user-specified host endpoint: %s', FLAGS.host) host_to_connect = FLAGS.host if FLAGS.storage_provider == 'AZURE': if FLAGS.azure_key is None or FLAGS.azure_account is None: raise ValueError('Must specify azure account and key') else: global _AZURE_BLOB_SERVICE _AZURE_BLOB_SERVICE = BlobService(FLAGS.azure_account, FLAGS.azure_key) # There are DNS lookup issues with the provider Azure when doing # "high" number of concurrent requests using multiple threads. The error # came from getaddrinfo() called by the azure python library. By reducing # the concurrent thread count to 10 or below, the issue can be mitigated. # If we lower the thread count, we need to lower the total object count # too so the time to write these object remains short global LIST_CONSISTENCY_THREAD_COUNT LIST_CONSISTENCY_THREAD_COUNT = 10 global LIST_CONSISTENCY_OBJECT_COUNT LIST_CONSISTENCY_OBJECT_COUNT = 1000 storage_schema = STORAGE_TO_SCHEMA_DICT[FLAGS.storage_provider] if FLAGS.scenario == 'OneByteRW': return OneByteRWBenchmark(storage_schema, host_to_connect) elif FLAGS.scenario == 'ListConsistency': list_latency = {} list_inconsistency_window = {} inconsistent_list_count = {} for scenario in [ LIST_AFTER_WRITE_SCENARIO, LIST_AFTER_UPDATE_SCENARIO ]: list_latency[scenario] = [] list_inconsistency_window[scenario] = [] inconsistent_list_count[scenario] = 0.0 logging.info('Running list consistency tests for %d iterations...', FLAGS.iterations) for _ in range(FLAGS.iterations): result = ListConsistencyBenchmark(storage_schema, host_to_connect) # Analyze the result for both scenarios. for scenario in [ LIST_AFTER_WRITE_SCENARIO, LIST_AFTER_UPDATE_SCENARIO ]: result_consistent = '%s%s' % (scenario, LIST_RESULT_SUFFIX_CONSISTENT) if result_consistent in result: if result[result_consistent]: list_latency[scenario].append( result['%s%s' % (scenario, LIST_RESULT_SUFFIX_LATENCY)]) else: inconsistent_list_count[scenario] += 1 list_inconsistency_window[scenario].append( result['%s%s' % (scenario, LIST_RESULT_SUFFIX_INCONSISTENCY_WINDOW)]) # All iterations completed, ready to print out final stats. logging.info('\n\nFinal stats:') for scenario in [ LIST_AFTER_WRITE_SCENARIO, LIST_AFTER_UPDATE_SCENARIO ]: logging.info( '%s consistency percentage: %f', scenario, 100 * (1 - inconsistent_list_count[scenario] / FLAGS.iterations)) if len(list_inconsistency_window[scenario]) > 0: logging.info( '%s inconsistency window: %s', scenario, json.dumps(PercentileCalculator( list_inconsistency_window[scenario]), sort_keys=True)) if len(list_latency[scenario]) > 0: logging.info( '%s latency: %s', scenario, json.dumps(PercentileCalculator(list_latency[scenario]), sort_keys=True)) return 0 elif FLAGS.scenario == 'SingleStreamThroughput': return SingleStreamThroughputBenchmark(storage_schema, host_to_connect) elif FLAGS.scenario == 'CleanupBucket': return CleanupBucket(storage_schema)
def apathetic_container_delete(container_name, *args, **kwargs): conn = BlobService(*args, **kwargs) conn.delete_container(container_name) return conn
from azure.storage.blob import BlobService source_account = BlobService( account_name='cngssd', account_key= 'DVvOtpOVW71er9ztR3mooJk4Zc3ZNovW9YV3qu4Y6bkN0eCfHutpcNVXW6gtpfolRk4CcAlmftz/+SDwm2BQag==' ) dest_account = BlobService( account_name='testcng', account_key= 'piWr6zleZ1sL8aopv5Y4NRYyrVWaW2/QrXcPpsxRec4IxtEoR1IyRmZCkbdyq50Bfu0qidF8SicQahdM+OExvg==' ) # list blobs source_blobs = [] source_marker = None while True: batch = source_account.list_blobs('vhds', marker=source_marker) source_blobs.extend(batch) if not batch.next_marker: break source_marker = batch.next_marker for blob in source_blobs: print(blob.name) dest_blobs = [] dest_marker = None while True: batch = dest_account.list_blobs('vhds', marker=dest_marker) dest_blobs.extend(batch) if not batch.next_marker:
# -*- coding: utf-8 -*- """ Created on Fri Oct 09 16:36:56 2015 @author: justin.malinchak """ # List blobs in container from azure.storage.blob import BlobService blob_service = BlobService( account_name='portalvhdss5m831rhl98hj', account_key= 'Z1MliCYE7p9Ks9kYQoGeM4V99hODtiJL82BVi/zIm06jLYh7n0tV8YaZHzITKixMwUUmjJ1Vp05XrgHG+gXFlg==' ) blobs = [] marker = None while True: batch = blob_service.list_blobs('mycontainer', marker=marker) blobs.extend(batch) if not batch.next_marker: break marker = batch.next_marker for blob in blobs: bname = blob.name print('') print(bname) print('') bpathname = 'C:\\Batches\\$Work\\' + bname blob_service.get_blob_to_path('mycontainer', bname, bpathname)
from sklearn import metrics from __future__ import division from sklearn import linear_model from azureml import services # ###Step 1. Read in the Data from blob # In[64]: #Connection String CONTAINERNAME = 'test1' STORAGEACCOUNTNAME = 'weigstoragefordsvm' STORAGEACCOUNTKEY = 'FUyNCM83pY4K2srBfZv4yDr6ru7d+BfbmHPPtucqS7EIgvUSQBG4zPkznpCuClWVOMitAQXG3aJFbvuD7mBkhQ==' BLOBNAME = 'demo_ex_9_stratified_1_1000_copy.csv' blob_service = BlobService(account_name=STORAGEACCOUNTNAME, account_key=STORAGEACCOUNTKEY) #Read in as text t1 = time.time() data = blob_service.get_blob_to_text(CONTAINERNAME, BLOBNAME).split("\n") t2 = time.time() print(("It takes %s seconds to read in " + BLOBNAME) % (t2 - t1)) #Add column names and separate columns colnames = [ 'medallion', 'hack_license', 'vendor_id', 'rate_code', 'store_and_fwd_flag', 'pickup_datetime', 'dropoff_datetime', 'passenger_count', 'trip_time_in_secs', 'trip_distance', 'pickup_longitude', 'pickup_latitude', 'dropoff_longitude', 'dropoff_latitude', 'payment_type', 'fare_amount', 'surcharge', 'mta_tax', 'tolls_amount', 'total_amount', 'tip_amount', 'tipped', 'tip_class',
class AzureJobStore(AbstractJobStore): """ A job store that uses Azure's blob store for file storage and Table Service to store job info with strong consistency. """ # Dots in container names should be avoided because container names are used in HTTPS bucket # URLs where the may interfere with the certificate common name. We use a double underscore # as a separator instead. # containerNameRe = re.compile(r'^[a-z0-9](-?[a-z0-9]+)+[a-z0-9]$') # See https://msdn.microsoft.com/en-us/library/azure/dd135715.aspx # minContainerNameLen = 3 maxContainerNameLen = 63 maxNameLen = 10 nameSeparator = 'xx' # Table names must be alphanumeric # Length of a jobID - used to test if a stats file has been read already or not jobIDLength = len(str(uuid.uuid4())) def __init__(self, locator, jobChunkSize=maxAzureTablePropertySize): super(AzureJobStore, self).__init__() accountName, namePrefix = locator.split(':', 1) if '--' in namePrefix: raise ValueError( "Invalid name prefix '%s'. Name prefixes may not contain %s." % (namePrefix, self.nameSeparator)) if not self.containerNameRe.match(namePrefix): raise ValueError( "Invalid name prefix '%s'. Name prefixes must contain only digits, " "hyphens or lower-case letters and must not start or end in a " "hyphen." % namePrefix) # Reserve 13 for separator and suffix if len(namePrefix) > self.maxContainerNameLen - self.maxNameLen - len( self.nameSeparator): raise ValueError(( "Invalid name prefix '%s'. Name prefixes may not be longer than 50 " "characters." % namePrefix)) if '--' in namePrefix: raise ValueError( "Invalid name prefix '%s'. Name prefixes may not contain " "%s." % (namePrefix, self.nameSeparator)) self.locator = locator self.jobChunkSize = jobChunkSize self.accountKey = _fetchAzureAccountKey(accountName) self.accountName = accountName # Table names have strict requirements in Azure self.namePrefix = self._sanitizeTableName(namePrefix) # These are the main API entry points. self.tableService = TableService(account_key=self.accountKey, account_name=accountName) self.blobService = BlobService(account_key=self.accountKey, account_name=accountName) # Serialized jobs table self.jobItems = None # Job<->file mapping table self.jobFileIDs = None # Container for all shared and unshared files self.files = None # Stats and logging strings self.statsFiles = None # File IDs that contain stats and logging strings self.statsFileIDs = None @property def keyPath(self): return self.config.cseKey def initialize(self, config): if self._jobStoreExists(): raise JobStoreExistsException(self.locator) logger.debug("Creating job store at '%s'" % self.locator) self._bind(create=True) super(AzureJobStore, self).initialize(config) def resume(self): if not self._jobStoreExists(): raise NoSuchJobStoreException(self.locator) logger.debug("Using existing job store at '%s'" % self.locator) self._bind(create=False) super(AzureJobStore, self).resume() def destroy(self): self._bind() for name in 'jobItems', 'jobFileIDs', 'files', 'statsFiles', 'statsFileIDs': resource = getattr(self, name) if resource is not None: if isinstance(resource, AzureTable): resource.delete_table() elif isinstance(resource, AzureBlobContainer): resource.delete_container() else: assert False setattr(self, name, None) def _jobStoreExists(self): """ Checks if job store exists by querying the existence of the statsFileIDs table. Note that this is the last component that is deleted in :meth:`.destroy`. """ for attempt in retry_azure(): with attempt: try: table = self.tableService.query_tables( table_name=self._qualify('statsFileIDs')) except AzureMissingResourceHttpError as e: if e.status_code == 404: return False else: raise else: return table is not None def _bind(self, create=False): table = self._bindTable container = self._bindContainer for name, binder in (('jobItems', table), ('jobFileIDs', table), ('files', container), ('statsFiles', container), ('statsFileIDs', table)): if getattr(self, name) is None: setattr(self, name, binder(self._qualify(name), create=create)) def _qualify(self, name): return self.namePrefix + self.nameSeparator + name.lower() def jobs(self): # How many jobs have we done? total_processed = 0 for jobEntity in self.jobItems.query_entities_auto(): # Process the items in the page yield AzureJob.fromEntity(jobEntity) total_processed += 1 if total_processed % 1000 == 0: # Produce some feedback for the user, because this can take # a long time on, for example, Azure logger.debug("Processed %d total jobs" % total_processed) logger.debug("Processed %d total jobs" % total_processed) def create(self, jobNode): jobStoreID = self._newJobID() job = AzureJob.fromJobNode(jobNode, jobStoreID, self._defaultTryCount()) entity = job.toItem(chunkSize=self.jobChunkSize) entity['RowKey'] = EntityProperty('Edm.String', jobStoreID) self.jobItems.insert_entity(entity=entity) return job def exists(self, jobStoreID): if self.jobItems.get_entity(row_key=bytes(jobStoreID)) is None: return False return True def load(self, jobStoreID): jobEntity = self.jobItems.get_entity(row_key=bytes(jobStoreID)) if jobEntity is None: raise NoSuchJobException(jobStoreID) return AzureJob.fromEntity(jobEntity) def update(self, job): self.jobItems.update_entity( row_key=bytes(job.jobStoreID), entity=job.toItem(chunkSize=self.jobChunkSize)) def delete(self, jobStoreID): try: self.jobItems.delete_entity(row_key=bytes(jobStoreID)) except AzureMissingResourceHttpError: # Job deletion is idempotent, and this job has been deleted already return filterString = "PartitionKey eq '%s'" % jobStoreID for fileEntity in self.jobFileIDs.query_entities(filter=filterString): jobStoreFileID = fileEntity.RowKey self.deleteFile(jobStoreFileID) def getEnv(self): return dict(AZURE_ACCOUNT_KEY=self.accountKey) class BlobInfo(namedtuple('BlobInfo', ('account', 'container', 'name'))): @property @memoize def service(self): return BlobService(account_name=self.account, account_key=_fetchAzureAccountKey(self.account)) @classmethod def getSize(cls, url): blob = cls._parseWasbUrl(url) blobProps = blob.service.get_blob_properties(blob.container, blob.name) return int(blobProps['content-length']) @classmethod def _readFromUrl(cls, url, writable): blob = cls._parseWasbUrl(url) for attempt in retry_azure(): with attempt: blob.service.get_blob_to_file(container_name=blob.container, blob_name=blob.name, stream=writable) @classmethod def _writeToUrl(cls, readable, url): blob = cls._parseWasbUrl(url) blob.service.put_block_blob_from_file(container_name=blob.container, blob_name=blob.name, stream=readable) @classmethod def _parseWasbUrl(cls, url): """ :param urlparse.ParseResult url: x :rtype: AzureJobStore.BlobInfo """ assert url.scheme in ('wasb', 'wasbs') try: container, account = url.netloc.split('@') except ValueError: raise InvalidImportExportUrlException(url) suffix = '.blob.core.windows.net' if account.endswith(suffix): account = account[:-len(suffix)] else: raise InvalidImportExportUrlException(url) assert url.path[0] == '/' return cls.BlobInfo(account=account, container=container, name=url.path[1:]) @classmethod def _supportsUrl(cls, url, export=False): return url.scheme.lower() in ('wasb', 'wasbs') def writeFile(self, localFilePath, jobStoreID=None): jobStoreFileID = self._newFileID() self.updateFile(jobStoreFileID, localFilePath) self._associateFileWithJob(jobStoreFileID, jobStoreID) return jobStoreFileID def updateFile(self, jobStoreFileID, localFilePath): with open(localFilePath) as read_fd: with self._uploadStream(jobStoreFileID, self.files) as write_fd: while True: buf = read_fd.read(self._maxAzureBlockBytes) write_fd.write(buf) if len(buf) == 0: break def readFile(self, jobStoreFileID, localFilePath): try: with self._downloadStream(jobStoreFileID, self.files) as read_fd: with open(localFilePath, 'w') as write_fd: while True: buf = read_fd.read(self._maxAzureBlockBytes) write_fd.write(buf) if not buf: break except AzureMissingResourceHttpError: raise NoSuchFileException(jobStoreFileID) def deleteFile(self, jobStoreFileID): try: self.files.delete_blob(blob_name=bytes(jobStoreFileID)) self._dissociateFileFromJob(jobStoreFileID) except AzureMissingResourceHttpError: pass def fileExists(self, jobStoreFileID): # As Azure doesn't have a blob_exists method (at least in the # python API) we just try to download the metadata, and hope # the metadata is small so the call will be fast. try: self.files.get_blob_metadata(blob_name=bytes(jobStoreFileID)) return True except AzureMissingResourceHttpError: return False @contextmanager def writeFileStream(self, jobStoreID=None): # TODO: this (and all stream methods) should probably use the # Append Blob type, but that is not currently supported by the # Azure Python API. jobStoreFileID = self._newFileID() with self._uploadStream(jobStoreFileID, self.files) as fd: yield fd, jobStoreFileID self._associateFileWithJob(jobStoreFileID, jobStoreID) @contextmanager def updateFileStream(self, jobStoreFileID): with self._uploadStream(jobStoreFileID, self.files, checkForModification=True) as fd: yield fd def getEmptyFileStoreID(self, jobStoreID=None): jobStoreFileID = self._newFileID() with self._uploadStream(jobStoreFileID, self.files) as _: pass self._associateFileWithJob(jobStoreFileID, jobStoreID) return jobStoreFileID @contextmanager def readFileStream(self, jobStoreFileID): if not self.fileExists(jobStoreFileID): raise NoSuchFileException(jobStoreFileID) with self._downloadStream(jobStoreFileID, self.files) as fd: yield fd @contextmanager def writeSharedFileStream(self, sharedFileName, isProtected=None): assert self._validateSharedFileName(sharedFileName) sharedFileID = self._newFileID(sharedFileName) with self._uploadStream(sharedFileID, self.files, encrypted=isProtected) as fd: yield fd @contextmanager def readSharedFileStream(self, sharedFileName): assert self._validateSharedFileName(sharedFileName) sharedFileID = self._newFileID(sharedFileName) if not self.fileExists(sharedFileID): raise NoSuchFileException(sharedFileID) with self._downloadStream(sharedFileID, self.files) as fd: yield fd def writeStatsAndLogging(self, statsAndLoggingString): # TODO: would be a great use case for the append blobs, once implemented in the Azure SDK jobStoreFileID = self._newFileID() encrypted = self.keyPath is not None if encrypted: statsAndLoggingString = encryption.encrypt(statsAndLoggingString, self.keyPath) self.statsFiles.put_block_blob_from_text( blob_name=bytes(jobStoreFileID), text=statsAndLoggingString, x_ms_meta_name_values=dict(encrypted=str(encrypted))) self.statsFileIDs.insert_entity(entity={'RowKey': jobStoreFileID}) def readStatsAndLogging(self, callback, readAll=False): suffix = '_old' numStatsFiles = 0 for attempt in retry_azure(): with attempt: for entity in self.statsFileIDs.query_entities(): jobStoreFileID = entity.RowKey hasBeenRead = len(jobStoreFileID) > self.jobIDLength if not hasBeenRead: with self._downloadStream(jobStoreFileID, self.statsFiles) as fd: callback(fd) # Mark this entity as read by appending the suffix self.statsFileIDs.insert_entity( entity={'RowKey': jobStoreFileID + suffix}) self.statsFileIDs.delete_entity( row_key=bytes(jobStoreFileID)) numStatsFiles += 1 elif readAll: # Strip the suffix to get the original ID jobStoreFileID = jobStoreFileID[:-len(suffix)] with self._downloadStream(jobStoreFileID, self.statsFiles) as fd: callback(fd) numStatsFiles += 1 return numStatsFiles _azureTimeFormat = "%Y-%m-%dT%H:%M:%SZ" def getPublicUrl(self, jobStoreFileID): try: self.files.get_blob_properties(blob_name=bytes(jobStoreFileID)) except AzureMissingResourceHttpError: raise NoSuchFileException(jobStoreFileID) # Compensate of a little bit of clock skew startTimeStr = (datetime.utcnow() - timedelta(minutes=5)).strftime( self._azureTimeFormat) endTime = datetime.utcnow() + self.publicUrlExpiration endTimeStr = endTime.strftime(self._azureTimeFormat) sap = SharedAccessPolicy( AccessPolicy(startTimeStr, endTimeStr, BlobSharedAccessPermissions.READ)) sas_token = self.files.generate_shared_access_signature( blob_name=bytes(jobStoreFileID), shared_access_policy=sap) return self.files.make_blob_url( blob_name=bytes(jobStoreFileID)) + '?' + sas_token def getSharedPublicUrl(self, sharedFileName): jobStoreFileID = self._newFileID(sharedFileName) return self.getPublicUrl(jobStoreFileID) def _newJobID(self): # raw UUIDs don't work for Azure property names because the '-' character is disallowed. return str(uuid.uuid4()).replace('-', '_') # A dummy job ID under which all shared files are stored. sharedFileJobID = uuid.UUID('891f7db6-e4d9-4221-a58e-ab6cc4395f94') def _newFileID(self, sharedFileName=None): if sharedFileName is None: ret = bytes(uuid.uuid4()) else: ret = bytes(uuid.uuid5(self.sharedFileJobID, bytes(sharedFileName))) return ret.replace('-', '_') def _associateFileWithJob(self, jobStoreFileID, jobStoreID=None): if jobStoreID is not None: self.jobFileIDs.insert_entity( entity={ 'PartitionKey': EntityProperty('Edm.String', jobStoreID), 'RowKey': EntityProperty('Edm.String', jobStoreFileID) }) def _dissociateFileFromJob(self, jobStoreFileID): entities = self.jobFileIDs.query_entities(filter="RowKey eq '%s'" % jobStoreFileID) if entities: assert len(entities) == 1 jobStoreID = entities[0].PartitionKey self.jobFileIDs.delete_entity(partition_key=bytes(jobStoreID), row_key=bytes(jobStoreFileID)) def _bindTable(self, tableName, create=False): for attempt in retry_azure(): with attempt: try: tables = self.tableService.query_tables( table_name=tableName) except AzureMissingResourceHttpError as e: if e.status_code != 404: raise else: if tables: assert tables[0].name == tableName return AzureTable(self.tableService, tableName) if create: self.tableService.create_table(tableName) return AzureTable(self.tableService, tableName) else: return None def _bindContainer(self, containerName, create=False): for attempt in retry_azure(): with attempt: try: self.blobService.get_container_properties(containerName) except AzureMissingResourceHttpError as e: if e.status_code == 404: if create: self.blobService.create_container(containerName) else: return None else: raise return AzureBlobContainer(self.blobService, containerName) def _sanitizeTableName(self, tableName): """ Azure table names must start with a letter and be alphanumeric. This will never cause a collision if uuids are used, but otherwise may not be safe. """ return 'a' + ''.join([x for x in tableName if x.isalnum()]) # Maximum bytes that can be in any block of an Azure block blob # https://github.com/Azure/azure-storage-python/blob/4c7666e05a9556c10154508335738ee44d7cb104/azure/storage/blob/blobservice.py#L106 _maxAzureBlockBytes = 4 * 1024 * 1024 @contextmanager def _uploadStream(self, jobStoreFileID, container, checkForModification=False, encrypted=None): """ :param encrypted: True to enforce encryption (will raise exception unless key is set), False to prevent encryption or None to encrypt if key is set. """ if checkForModification: try: expectedVersion = container.get_blob_properties( blob_name=bytes(jobStoreFileID))['etag'] except AzureMissingResourceHttpError: expectedVersion = None if encrypted is None: encrypted = self.keyPath is not None elif encrypted: if self.keyPath is None: raise RuntimeError( 'Encryption requested but no key was provided') maxBlockSize = self._maxAzureBlockBytes if encrypted: # There is a small overhead for encrypted data. maxBlockSize -= encryption.overhead store = self class UploadPipe(WritablePipe): def readFrom(self, readable): blockIDs = [] try: while True: buf = readable.read(maxBlockSize) if len(buf) == 0: # We're safe to break here even if we never read anything, since # putting an empty block list creates an empty blob. break if encrypted: buf = encryption.encrypt(buf, store.keyPath) blockID = store._newFileID() container.put_block(blob_name=bytes(jobStoreFileID), block=buf, blockid=blockID) blockIDs.append(blockID) except: with panic(log=logger): # This is guaranteed to delete any uncommitted blocks. container.delete_blob(blob_name=bytes(jobStoreFileID)) if checkForModification and expectedVersion is not None: # Acquire a (60-second) write lock, leaseID = container.lease_blob( blob_name=bytes(jobStoreFileID), x_ms_lease_action='acquire')['x-ms-lease-id'] # check for modification, blobProperties = container.get_blob_properties( blob_name=bytes(jobStoreFileID)) if blobProperties['etag'] != expectedVersion: container.lease_blob(blob_name=bytes(jobStoreFileID), x_ms_lease_action='release', x_ms_lease_id=leaseID) raise ConcurrentFileModificationException( jobStoreFileID) # commit the file, container.put_block_list( blob_name=bytes(jobStoreFileID), block_list=blockIDs, x_ms_lease_id=leaseID, x_ms_meta_name_values=dict(encrypted=str(encrypted))) # then release the lock. container.lease_blob(blob_name=bytes(jobStoreFileID), x_ms_lease_action='release', x_ms_lease_id=leaseID) else: # No need to check for modification, just blindly write over whatever # was there. container.put_block_list( blob_name=bytes(jobStoreFileID), block_list=blockIDs, x_ms_meta_name_values=dict(encrypted=str(encrypted))) with UploadPipe() as writable: yield writable @contextmanager def _downloadStream(self, jobStoreFileID, container): # The reason this is not in the writer is so we catch non-existant blobs early blobProps = container.get_blob_properties( blob_name=bytes(jobStoreFileID)) encrypted = strict_bool(blobProps['x-ms-meta-encrypted']) if encrypted and self.keyPath is None: raise AssertionError( 'Content is encrypted but no key was provided.') outer_self = self class DownloadPipe(ReadablePipe): def writeTo(self, writable): chunkStart = 0 fileSize = int(blobProps['Content-Length']) while chunkStart < fileSize: chunkEnd = chunkStart + outer_self._maxAzureBlockBytes - 1 buf = container.get_blob(blob_name=bytes(jobStoreFileID), x_ms_range="bytes=%d-%d" % (chunkStart, chunkEnd)) if encrypted: buf = encryption.decrypt(buf, outer_self.keyPath) writable.write(buf) chunkStart = chunkEnd + 1 with DownloadPipe() as readable: yield readable
# upload a file to Azure Blob Storage from azure.storage.blob import BlobService # Add your details here blob_service = BlobService(account_name="", account_key="") blob_service.put_block_blob_from_path("container", "remote-name.jpg", "localfile.jpg")
class AzureIOStore(IOStore): """ A class that lets you get input from and send output to Azure Storage. """ def __init__(self, account_name, container_name, name_prefix=""): """ Make a new AzureIOStore that reads from and writes to the given container in the given account, adding the given prefix to keys. All paths will be interpreted as keys or key prefixes. If the name prefix does not end with a trailing slash, and is not empty, one will be added automatically. Account keys are retrieved from the AZURE_ACCOUNT_KEY environment variable or from the ~/.toilAzureCredentials file, as in Toil itself. """ # Make sure azure libraries actually loaded assert (have_azure) self.account_name = account_name self.container_name = container_name self.name_prefix = name_prefix if self.name_prefix != "" and not self.name_prefix.endswith("/"): # Make sure it has the trailing slash required. self.name_prefix += "/" # Sneak into Toil and use the same keys it uses self.account_key = toil.jobStores.azureJobStore._fetchAzureAccountKey( self.account_name) # This will hold out Azure blob store connection self.connection = None def __getstate__(self): """ Return the state to use for pickling. We don't want to try and pickle an open Azure connection. """ return (self.account_name, self.account_key, self.container_name, self.name_prefix) def __setstate__(self, state): """ Set up after unpickling. """ self.account_name = state[0] self.account_key = state[1] self.container_name = state[2] self.name_prefix = state[3] self.connection = None def __connect(self): """ Make sure we have an Azure connection, and set one up if we don't. """ if self.connection is None: RealtimeLogger.debug("Connecting to account {}, using " "container {} and prefix {}".format( self.account_name, self.container_name, self.name_prefix)) # Connect to the blob service where we keep everything self.connection = BlobService(account_name=self.account_name, account_key=self.account_key) @backoff def read_input_file(self, input_path, local_path): """ Get input from Azure. """ self.__connect() RealtimeLogger.debug("Loading {} from AzureIOStore".format(input_path)) # Download the blob. This is known to be synchronous, although it can # call a callback during the process. self.connection.get_blob_to_path(self.container_name, self.name_prefix + input_path, local_path) def list_input_directory(self, input_path, recursive=False, with_times=False): """ Loop over fake /-delimited directories on Azure. The prefix may or may not not have a trailing slash; if not, one will be added automatically. Returns the names of files and fake directories in the given input fake directory, non-recursively. If with_times is specified, will yield (name, time) pairs including modification times as datetime objects. Times on directories are None. """ self.__connect() RealtimeLogger.info( "Enumerating {} from AzureIOStore".format(input_path)) # Work out what the directory name to list is fake_directory = self.name_prefix + input_path if fake_directory != "" and not fake_directory.endswith("/"): # We have a nonempty prefix, and we need to end it with a slash fake_directory += "/" # This will hold the marker that we need to send back to get the next # page, if there is one. See <http://stackoverflow.com/a/24303682> marker = None # This holds the subdirectories we found; we yield each exactly once if # we aren't recursing. subdirectories = set() while True: # Get the results from Azure. We don't use delimiter since Azure # doesn't seem to provide the placeholder entries it's supposed to. result = self.connection.list_blobs(self.container_name, prefix=fake_directory, marker=marker) RealtimeLogger.info("Found {} files".format(len(result))) for blob in result: # Yield each result's blob name, but directory names only once # Drop the common prefix relative_path = blob.name[len(fake_directory):] if (not recursive) and "/" in relative_path: # We found a file in a subdirectory, and we aren't supposed # to be recursing. subdirectory, _ = relative_path.split("/", 1) if subdirectory not in subdirectories: # It's a new subdirectory. Yield and remember it subdirectories.add(subdirectory) if with_times: yield subdirectory, None else: yield subdirectory else: # We found an actual file if with_times: mtime = blob.properties.last_modified if isinstance(mtime, datetime.datetime): # Make sure we're getting proper localized datetimes # from the new Azure Storage API. assert (mtime.tzinfo is not None and mtime.tzinfo.utcoffset(mtime) is not None) else: # Convert mtime from a string as in the old API. mtime = dateutil.parser.parse(mtime).replace( tzinfo=dateutil.tz.tzutc()) yield relative_path, mtime else: yield relative_path # Save the marker marker = result.next_marker if not marker: break @backoff def write_output_file(self, local_path, output_path): """ Write output to Azure. Will create the container if necessary. """ self.__connect() RealtimeLogger.debug("Saving {} to AzureIOStore".format(output_path)) try: # Make the container self.connection.create_container(self.container_name) except azure.WindowsAzureConflictError: # The container probably already exists pass # Upload the blob (synchronously) # TODO: catch no container error here, make the container, and retry self.connection.put_block_blob_from_path( self.container_name, self.name_prefix + output_path, local_path) @backoff def exists(self, path): """ Returns true if the given input or output file exists in Azure already. """ self.__connect() marker = None while True: try: # Make the container self.connection.create_container(self.container_name) except azure.WindowsAzureConflictError: # The container probably already exists pass # Get the results from Azure. result = self.connection.list_blobs(self.container_name, prefix=self.name_prefix + path, marker=marker) for blob in result: # Look at each blob if blob.name == self.name_prefix + path: # Found it return True # Save the marker marker = result.next_marker if not marker: break return False @backoff def get_mtime(self, path): """ Returns the modification time of the given blob if it exists, or None otherwise. """ self.__connect() marker = None while True: # Get the results from Azure. result = self.connection.list_blobs(self.container_name, prefix=self.name_prefix + path, marker=marker) for blob in result: # Look at each blob if blob.name == self.name_prefix + path: # Found it mtime = blob.properties.last_modified if isinstance(mtime, datetime.datetime): # Make sure we're getting proper localized datetimes # from the new Azure Storage API. assert (mtime.tzinfo is not None and mtime.tzinfo.utcoffset(mtime) is not None) else: # Convert mtime from a string as in the old API. mtime = dateutil.parser.parse(mtime).replace( tzinfo=dateutil.tz.tzutc()) return mtime # Save the marker marker = result.next_marker if not marker: break return None @backoff def get_size(self, path): """ Returns the size in bytes of the given blob if it exists, or None otherwise. """ self.__connect() marker = None while True: # Get the results from Azure. result = self.connection.list_blobs(self.container_name, prefix=self.name_prefix + path, marker=marker) for blob in result: # Look at each blob if blob.name == self.name_prefix + path: # Found it size = blob.properties.content_length return size # Save the marker marker = result.next_marker if not marker: break return None
class AzureStorage(Storage): account_name = settings.AZURE_ACCOUNT_NAME account_key = settings.AZURE_ACCOUNT_KEY azure_container = settings.AZURE_CONTAINER def __init__(self, *args, **kwargs): super(AzureStorage, self).__init__(*args, **kwargs) self._connection = None @property def connection(self): if self._connection is None: # Create connection self._connection = BlobService(self.account_name, self.account_key) # Create container if needed containers = [ c for c in self._connection.list_containers( prefix=self.azure_container) if c.name == self.azure_container ] if len(containers) == 0: self._connection.create_container( self.azure_container, {'origin': 'created by Django web app'}, fail_on_exist=True) return self._connection def _open(self, name, mode="rb"): stream = SimpleUploadedFile(name, None) self.connection.get_blob_to_file(self.azure_container, name, stream) stream.seek(0) return stream def exists(self, name): try: self.connection.get_blob_properties(self.azure_container, name) except AzureMissingResourceHttpError: return False else: return True def delete(self, name): self.connection.delete_blob(self.azure_container, name) def size(self, name): properties = self.connection.get_blob_properties( self.azure_container, name) return properties["content-length"] def _save(self, name, content): self.connection.put_block_blob_from_file(self.azure_container, name, content) return name def url(self, name): ap = AccessPolicy(expiry=(timezone.datetime.utcnow() + timezone.timedelta(seconds=600)).strftime('%Y-%m-%dT%H:%M:%SZ'), \ start=(timezone.datetime.utcnow() + timezone.timedelta(seconds=-600)).strftime('%Y-%m-%dT%H:%M:%SZ'), \ permission='r') sap = SharedAccessPolicy(ap) sas = SharedAccessSignature(self.account_name, self.account_key) url = sas.generate_signed_query_string(path=self.azure_container + '/' + name, resource_type='b', shared_access_policy=sap) return self.connection.make_blob_url(self.azure_container, name) + "?" + url
def bs(self): if not self._bs: self._bs = BlobService(self.get_storage_name(), self.get_key()) return self._bs
USERID = 'ENTER DB USERID' PASSWORD = '******' DB_DRIVER = 'SQL Server Native Client 11.0' # Specify the Azure Storage Account name where you will have a private blob to copy in the CSV file STORAGEACCOUNTNAME = "ENTER AZURE STORAGE ACCOUNT NAME" # Sepcify the storage account key. # You can retrieve it "Primary Access Key" found on Azure portal Storage account blade by clicking on the "Key" icon. # More info: https://azure.microsoft.com/en-us/documentation/articles/storage-create-storage-account/#manage-your-storage-access-keys STORAGEKEY = "ENTER STORAGE ACCOUNT KEY " #Read dataset #Dataset is read from a public blob and copied to a private blob to locad it into SQL DW via Polybase f = urllib2.urlopen('https://cahandson.blob.core.windows.net/nyctaxi/nyctaxipoint1pct.csv') taxisample = f.read() blob_service = BlobService(account_name=STORAGEACCOUNTNAME, account_key=STORAGEKEY) blob_service.create_container('nyctaxinb') blob_service.put_block_blob_from_bytes( 'nyctaxinb', 'nyctaxipoint1pct.csv', taxisample ) # Construct the SQL DW Connection string driver = 'DRIVER={' + DB_DRIVER + '}' server = 'SERVER=' + SERVER_NAME database = 'DATABASE=' + DATABASE_NAME uid = 'UID=' + USERID pwd = 'PWD=' + PASSWORD CONNECTION_STRING = ';'.join([driver,server,database,uid,pwd, 'Encrypt=yes;TrustServerCertificate=no']) print CONNECTION_STRING
from azure.storage.blob import BlobService import datetime import string import base64 import urllib from static.app_keys import blob_account_name, blob_account_key #get accountName and accountKey from app_keys module accountName = blob_account_name accountKey = blob_account_key #create the blob_service object which connects to the Azure Storage account blob_service = BlobService(accountName, accountKey) #flag variable to verify upload uploaded = False #uploadBlob takes in the username which is used for the storage container name #file is the file to be uploaded #filename is concatenated onto the URL for user readability #token and secret are used for oAuth verification which must happen at every step. def uploadBlob(username, file, filename): #try: global uploaded username = username.lower() returnList = [] #decode base64 image string decodedFile = file.decode("base64") blob_service.create_container(username, x_ms_blob_public_access="container") #get current datetime in UTC for a completely unique identifier
import string from unidecode import unidecode from urllib import parse from azure.storage.blob import BlobService from datetime import datetime import animesources shows = [] with open('title-map.json') as titlemap_file: titlemap = json.load(titlemap_file) with open('multi-season.json') as multiseason_file: multiseason = json.load(multiseason_file) with open('azure.json') as azure_file: azure_storage = json.load(azure_file) azure_blob = BlobService(account_name=azure_storage['account'], account_key=azure_storage['key']) with open('proxies.json') as proxies_file: proxy_data = json.load(proxies_file) proxy = proxy_data['uk'] sources = [ animesources.Crunchyroll(titlemap, multiseason, 'uk', proxy), animesources.Funimation(titlemap, multiseason, 'gb', proxy), animesources.Netflix(titlemap, multiseason, 'uk', proxy), animesources.Daisuki(titlemap, multiseason, 'uk', proxy), animesources.Viewster(titlemap, multiseason, 'uk', proxy), animesources.Animax(titlemap, multiseason, 'uk', proxy), animesources.HiDive(titlemap, multiseason, 'uk', proxy) ] for source in sources: source.UpdateShowList(shows) print(source.GetName() + ': ' + str(len(shows)))
print "=== Converting Images ===" command = "ffmpeg -i {0} -r 25 -vf scale=640:-1 -q:v 9 -an -f image2 {1}".format( filename, image_file_name) os.system(command) # if there is no video key return if len(sys.argv) <= 2: exit() print "=== Uploading to Azure ===" video_key = sys.argv[2] # walk all files in dir and push to bucket key = raw_input("Please enter azure vidoepath blob storage key: ") blob_service = BlobService(account_name='videopathmobilefiles', account_key=key) basepath = os.path.dirname(os.path.abspath(__file__)) + "/" + output_folder container_name = video_key.lower() blob_service.create_container(container_name, x_ms_blob_public_access='container') # collect files for uploading filepaths = [] for path, subdirs, files in os.walk(basepath): for name in files: # don't upload hidden files if name[0] == ".": continue pathname = os.path.join(path, name) filepaths.append(pathname)
def do_lzop_get(creds, url, path, decrypt, do_retry=True): """ Get and decompress a WABS URL This streams the content directly to lzop; the compressed version is never stored on disk. """ assert url.endswith('.lzo'), 'Expect an lzop-compressed file' assert url.startswith('wabs://') conn = BlobService(creds.account_name, creds.account_key, sas_token=creds.access_token, protocol='https') def log_wal_fetch_failures_on_error(exc_tup, exc_processor_cxt): def standard_detail_message(prefix=''): return (prefix + ' There have been {n} attempts to fetch wal ' 'file {url} so far.'.format(n=exc_processor_cxt, url=url)) typ, value, tb = exc_tup del exc_tup # Screen for certain kinds of known-errors to retry from if issubclass(typ, socket.error): socketmsg = value[1] if isinstance(value, tuple) else value logger.info( msg='Retrying fetch because of a socket error', detail=standard_detail_message( "The socket error's message is '{0}'.".format(socketmsg))) else: # For all otherwise untreated exceptions, report them as a # warning and retry anyway -- all exceptions that can be # justified should be treated and have error messages # listed. logger.warning( msg='retrying WAL file fetch from unexpected exception', detail=standard_detail_message( 'The exception type is {etype} and its value is ' '{evalue} and its traceback is {etraceback}'.format( etype=typ, evalue=value, etraceback=''.join(traceback.format_tb(tb))))) # Help Python GC by resolving possible cycles del tb def download(): with files.DeleteOnError(path) as decomp_out: with get_download_pipeline(PIPE, decomp_out.f, decrypt) as pl: g = gevent.spawn(write_and_return_error, url, conn, pl.stdin) try: # Raise any exceptions guarded by # write_and_return_error. exc = g.get() if exc is not None: raise exc except AzureMissingResourceHttpError: # Short circuit any re-try attempts under certain race # conditions. pl.abort() logger.warning( msg=('could no longer locate object while ' 'performing wal restore'), detail=('The absolute URI that could not be ' 'located is {url}.'.format(url=url)), hint=('This can be normal when Postgres is trying ' 'to detect what timelines are available ' 'during restoration.')) decomp_out.remove_regardless = True return False logger.info( msg='completed download and decompression', detail='Downloaded and decompressed "{url}" to "{path}"'. format(url=url, path=path)) return True if do_retry: download = retry( retry_with_count(log_wal_fetch_failures_on_error))(download) return download()
class Azure(object): ''' A class used to connect to the Azure storage and upload/download files using blob storage ''' def __init__(self, params={}): ''' Constructor for the Azure object ''' if "user" in params: self.user = params["user"] else: self.user = None if "key" in params: self.key = params["key"] else: self.key = None def connect(self, host, port, user, password, secure): ''' Connect to the Azure service with given user and key @param user - username to use to connect to @param key - key to use to connect ''' kwargs = {} err = None if not host is None: kwargs["host_base"] = "." + host if not user is None: kwargs["account_name"] = user elif not self.user is None: kwargs["account_name"] = self.user if not password is None: kwargs["account_key"] = password elif not self.key is None: kwargs["account_key"] = self.key kwargs["protocol"] = "https" if secure else "http" try: self.service = BlobService(**kwargs) except Exception as e: err = e.message self.service = None if self.service is None: raise OsakaException("Failed to connect to Azure:" + ("" if err is None else err)) @classmethod def getSchemes(clazz): ''' Returns a list of schemes this handler handles Note: handling the scheme of another handler produces unknown results @returns list of handled schemes ''' return ["azure", "azures"] def close(self): ''' Close this service ''' pass def put(self, path, url): ''' Put a file up to the cloud @param path - path to upload @param url - path in cloud to upload too ''' if os.path.isdir(path): return walk(self.put, path, url) cont, blob = get_container_and_path(urlparse.urlparse(url).path) self.service.create_container(cont) self.service.put_block_blob_from_path(cont, blob, path) return True def get(self, url, dest): ''' Get file(s) from the cloud @param url - url on cloud to pull down (on cloud) @param dest - dest to download too ''' cont, blob = get_container_and_path(urlparse.urlparse(url).path) for b in self.service.list_blobs(cont, prefix=blob): destination = os.path.join(dest, os.path.relpath( b.name, blob)) if blob != b.name else dest if not os.path.exists(os.path.dirname(destination)): os.mkdir(os.path.dirname(destination)) self.service.get_blob_to_path(cont, b.name, destination) return True def rm(self, url): ''' Remove this url and all children urls @param url - url to remove ''' cont, blob = get_container_and_path(urlparse.urlparse(url).path) for b in self.service.list_blobs(cont, prefix=blob): self.service.delete_blob(cont, b.name) return True
def uri_put_file(creds, uri, fp, content_type=None): assert fp.tell() == 0 assert uri.startswith('wabs://') def log_upload_failures_on_error(exc_tup, exc_processor_cxt): def standard_detail_message(prefix=''): return (prefix + ' There have been {n} attempts to upload ' 'file {url} so far.'.format(n=exc_processor_cxt, url=uri)) typ, value, tb = exc_tup del exc_tup # Screen for certain kinds of known-errors to retry from if issubclass(typ, socket.error): socketmsg = value[1] if isinstance(value, tuple) else value logger.info( msg='Retrying upload because of a socket error', detail=standard_detail_message( "The socket error's message is '{0}'.".format(socketmsg))) else: # For all otherwise untreated exceptions, report them as a # warning and retry anyway -- all exceptions that can be # justified should be treated and have error messages # listed. logger.warning( msg='retrying file upload from unexpected exception', detail=standard_detail_message( 'The exception type is {etype} and its value is ' '{evalue} and its traceback is {etraceback}'.format( etype=typ, evalue=value, etraceback=''.join(traceback.format_tb(tb))))) # Help Python GC by resolving possible cycles del tb # Because we're uploading in chunks, catch rate limiting and # connection errors which occur for each individual chunk instead of # failing the whole file and restarting. @retry(retry_with_count(log_upload_failures_on_error)) def upload_chunk(chunk, block_id): if isinstance(chunk, str): chunk = chunk.encode('utf-8') check_sum = base64.b64encode(md5(chunk).digest()).decode('utf-8') conn.put_block(url_tup.netloc, url_tup.path.lstrip('/'), chunk, block_id, content_md5=check_sum) url_tup = urlparse(uri) kwargs = dict(x_ms_blob_type='BlockBlob') if content_type is not None: kwargs['x_ms_blob_content_type'] = content_type conn = BlobService(creds.account_name, creds.account_key, sas_token=creds.access_token, protocol='https') conn.put_blob(url_tup.netloc, url_tup.path.lstrip('/'), b'', **kwargs) # WABS requires large files to be uploaded in 4MB chunks block_ids = [] length, index = 0, 0 pool_size = os.getenv('WABS_UPLOAD_POOL_SIZE', 5) p = gevent.pool.Pool(size=pool_size) while True: data = fp.read(WABS_CHUNK_SIZE) if data: length += len(data) block_id = base64.b64encode( str(index).encode('utf-8')).decode('utf-8') p.wait_available() p.spawn(upload_chunk, data, block_id) block_ids.append(block_id) index += 1 else: p.join() break conn.put_block_list(url_tup.netloc, url_tup.path.lstrip('/'), block_ids) # To maintain consistency with the S3 version of this function we must # return an object with a certain set of attributes. Currently, that set # of attributes consists of only 'size' return _Key(size=len(data))
def getanalysis(request): assert isinstance(request, HttpRequest) latstart = request.GET.get('latstart', None) latend = request.GET.get('latend', None) lonstart = request.GET.get('lonstart', None) lonend = request.GET.get('lonend', None) sea = request.GET.get('season', None) #start SSH ssh = paramiko.SSHClient() blob_service = BlobService( account_name='araldrift', account_key= 'otLzzkwQHQD3xFTQxwxy64PCL6eDINWGjSB7x6Ta2XVw3+3ffI5O2MhAEavf/r8qIW4G/dKrZAVg1R64nK7hDQ==' ) blob_service.get_blob_to_path('security', 'id_rsa', './id_rsa') privkey = paramiko.RSAKey.from_private_key_file(filename='./id_rsa', password='******') ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) try: ssh.connect('40.112.209.249', username='******', password='******', allow_agent=False, pkey=None, key_filename=None, timeout=10, look_for_keys=False, compress=False) except paramiko.SSHException: return HttpResponse() quit() #stdin,stdout,stderr = ssh.exec_command("ls /etc/") cmd = '/home/araldif/anaconda3/bin/python /datadrive/from_webapp/xarray_analysis.py ' + latstart + ' ' + latend + ' ' + lonstart + ' ' + lonend + ' ' + sea #cmd = '/datadrive/from_webapp/xarray_analysis.py' #cmd = 'python /datadrive/from_webapp/test.py ' + name stdin, stdout, stderr = ssh.exec_command(cmd) h = [] for line in stderr.readlines(): h.append(line) stderr.close() ssh.close() try: imageoutfile1 = 'prec_' + str(sea) + '_' + str(latstart) + '_' + str( latend) + '_' + str(lonstart) + '_' + str(lonend) + '.png' imageoutfile2 = './' + imageoutfile1 blob_service = BlobService( account_name='araldrift', account_key= 'otLzzkwQHQD3xFTQxwxy64PCL6eDINWGjSB7x6Ta2XVw3+3ffI5O2MhAEavf/r8qIW4G/dKrZAVg1R64nK7hDQ==' ) blob_service.get_blob_to_path('flow', imageoutfile1, imageoutfile2) image_data = open(imageoutfile2, "rb").read() response = HttpResponse(image_data, content_type='image/png') return response except: return HttpResponse(h, content_type='text/plain')
while not successful: try: blobService.put_blob(HEATMAP_CONTAINER, row[KEY_FIELD], json.dumps(tileTimePeriodResultSet), "BlockBlob", x_ms_blob_cache_control="max-age=3600", x_ms_blob_content_type="application/json") successful = True except: print "error putting heatmap: ", sys.exc_info()[0] continue yield None blobService = BlobService(account_name=os.environ["LOCATION_STORAGE_ACCOUNT"], account_key=os.environ["LOCATION_STORAGE_KEY"]) blobService.create_container(HEATMAP_CONTAINER) blobService.set_container_acl(HEATMAP_CONTAINER, x_ms_blob_public_access='container') def check_config(): if not "LOCATION_STORAGE_ACCOUNT" in os.environ: print "Required environment variable LOCATION_STORAGE_ACCOUNT missing." if not "LOCATION_STORAGE_KEY" in os.environ: print "Required environment variable LOCATION_STORAGE_KEY missing." if not "LOCATIONS_ROOT" in os.environ: print "Required environment variable LOCATIONS_ROOT missing."