Пример #1
0
def get_tags():
	#TODO: Error checking
#       global d 
#       d = {}

	clarifai_api = ClarifaiApi()
	blob_service = BlobService('calhacks', 'mm7EmY+T+MGahePBDSDU5LHpZR5tRXuh4MSco4jFrzHovOPEf06e18c89pxtPIo4NDVhhjSeaQY/FQmKNxjjyA==')      

	blob_name = request.form['blob_id']
#       blob_name = blob_name.decode('utf-8')
	blob_service.get_blob_to_path('imagestore', blob_name, 'out.png')       
	print("checkpoint 1")
	i = open ('out.png', 'r')
	strd = ""
	for line in i:
		strd += line.strip()
	fname = 'img.png'
	with open (fname, 'wb') as f:
		f.write (base64.b64decode(strd))
		f.close()

	f = open (fname, 'rb')
	result = clarifai_api.tag_images(f)
	st = result['results'][0]['result']['tag']['classes'][0:6]
	print(st)

	for i in []:#['food', 'nobody', 'still life', 'meal', 'dish', 'plate', 'delicious', 'isolated', 'cutout', 'unhealthy', 'one', 'background']: 
		while i in st:
			st.remove(i)
	js = json.dumps(search_terms(st))
	print(js)
	return js
Пример #2
0
def getblob(request):
    assert isinstance(request, HttpRequest)
    blob_service = BlobService(account_name='araldrift', account_key='YOURKEYGOESHERE')
    # http://<storage-account-name>.blob.core.windows.net/<container-name>/<blob-name>
    blob_service.get_blob_to_path('flow', 'NARYN.day', './NARYN.day')  
    # return HttpResponse('ok ye of little faith')
    return HttpResponse(json.dumps('i am just a wee bit of json'))
Пример #3
0
def get_tags():
	#TODO: Error checking
	global d 
	d = {}
	clarifai_api = ClarifaiApi()
	blob_service = BlobService('calhacks', 'mm7EmY+T+MGahePBDSDU5LHpZR5tRXuh4MSco4jFrzHovOPEf06e18c89pxtPIo4NDVhhjSeaQY/FQmKNxjjyA==')	

	blob_name = request.data
	blob_name = blob_name.decode('utf-8')
	blob_service.get_blob_to_path('imagestore', blob_name, 'out.png')	
	print("checkpoint 1")
	i = open ('out.png', 'r')
	strd = ""
	for line in i:
		strd += line.strip()
	fname = 'img.png'
	with open (fname, 'wb') as f:
		f.write (base64.b64decode(strd))
		f.close()

	f = open (fname, 'rb')
	result = clarifai_api.tag_images(f)
	print(result)
	st = result['results'][0]['result']['tag']['classes'][0:6]

	for i in ['food', 'nobody', 'still life', 'meal', 'dish', 'plate', 'delicious', 'isolated', 'cutout', 'unhealthy', 'one', 'background']: 
		while i in st:
			st.remove(i)
	d = {blob_name: search_terms(st)}
	return "success!"
Пример #4
0
def gethydrograph(request):
    '''
    JSON return of a particular hydorgaph by start / stop / station / time interval

    In response it will 
        generate a 404 error if the value is not found
        or
        return a JSON response with the requested slice.
    '''
    assert isinstance(request, HttpRequest)
    
    start  = request.GET.get('start', None)
    end = request.GET.get('end', None)

    # Here we can thieve the time parse code from LOS and use this line: 
    # rd, rh = extract_time(request)
    # but for now this is commented out of course

    station = request.GET.get('station',None)
    interval  = request.GET.get('interval',None)

    blob_service = BlobService(account_name='araldrift', account_key='XXXXXXXXXXXXXXXXXXXXXXX')
    blob_service.get_blob_to_path('flow', 'NARYN.day', './tmp.csv')  
    f = file('./tmp.csv')
    h = []
    while True:
        line = f.readline()
        if line == "": break
        h.append(line)
    f.close()
    json_encoded_result = json.dumps(h)

    # Keep the LOS original for reference (see time formatting): 
    # clean_req = {"rdidx": rdidx,"rdepth": rdepth,"rd": rd.strftime("%Y%m%d"), "rh": rh,"rparm": rparm,}
    # clean_req = {"start": start, "end": end, "station": station, "interocitor": interval,}
    # json_request = json.dumps(clean_req)
    # json_encoded_result = json_request

    try:
        #####################
        #
        # This is the commented out LOS code (stripped out)
        #
        #####################
        # Convert the depth to an int.
        # depthIndex = int(rdidx)
        # Get the depth slice from the dataset.
        # ret = getDepthSliceByIndex((rd,rh), depthIndex, rparm, rnorm)
        # Serialize out to JSON.
        # json_encoded_result = json.dumps(ret)
        # Cache the response to blob storage. (Would be great if this was async...)
        # cacheResult(request.GET, json_encoded_result)
        # Send the view on to the client.
        #####################

        return HttpResponse(json_encoded_result, content_type="application/json" )
    except Exception as a:
        return HttpResponseNotFound(content="No dice, either the inputs were out of range, the file couldn't be retrieved, or the winds weren't in your favor.")
Пример #5
0
def gethydrograph(request):
    '''
    Returns streamflow data by start / stop / station 
    In response it will 
        generate a 404 error if the value is not found
        or
        return a JSON response with the requested slice or a .csv file by default
    '''
    assert isinstance(request, HttpRequest)
    
    start  = request.GET.get('start', None)
    end = request.GET.get('end', None)
    station = request.GET.get('station',None)
    interval  = request.GET.get('interval',None)
    jsondat = request.GET.get('jsondat',None)
    plot = request.GET.get('plot',None)

    #start blob service
    stationfile = station + '.day.new'
    downloadablefile = station + '_' + start + '_' + end + '.csv'
    blob_service = BlobService(account_name='araldrift', account_key='otLzzkwQHQD3xFTQxwxy64PCL6eDINWGjSB7x6Ta2XVw3+3ffI5O2MhAEavf/r8qIW4G/dKrZAVg1R64nK7hDQ==')
    blob_service.get_blob_to_path('flow', stationfile, './tmp.csv')  
    f = file('./tmp.csv')
    
    #read in pandas data and subsetting
    d_cols = ["DATE","FLOW"]
    d = pd.read_csv('./tmp.csv', sep=" ", names=d_cols)
    df = d[(d.DATE >= start) & (d.DATE <= end)] 
    h = df.to_json(orient='records')
    json_encoded_result = json.dumps(h)
    df.plot(x='DATE', y='FLOW', figsize=(14,6))
    plt.savefig('./plot_test.png')
   
   
    #h = []
    #while True:
     #   line = f.readline()
      #  if line == "": break
       # h.append(line)
    #f.close()
    try:
        if jsondat in ['TRUE']:
           response = HttpResponse(json_encoded_result, content_type="application/json" )
           return response

        elif plot in ['TRUE']:
            image_data = open("./plot_test.png", "rb").read()
            response = HttpResponse(image_data, content_type='image/png')
            return response

        else:
            response = HttpResponse(content_type='text/csv')
            response['Content-Disposition'] = 'attachment; filename=' +downloadablefile
            df.to_csv(response, index=False, lineterminator='\r\n')
            return response
    except Exception as a:
        return HttpResponseNotFound(content="No dice, either the inputs were out of range, the file couldn't be retrieved, or the winds weren't in your favor.")
Пример #6
0
def getblob(request):
    assert isinstance(request, HttpRequest)
    blob_service = BlobService(account_name='araldrift', account_key='otLzzkwQHQD3xFTQxwxy64PCL6eDINWGjSB7x6Ta2XVw3+3ffI5O2MhAEavf/r8qIW4G/dKrZAVg1R64nK7hDQ==')
    # http://<storage-account-name>.blob.core.windows.net/<container-name>/<blob-name>
    name = 'test.txt'
    fpath = '{0}\{1}'.format(tempfile.gettempdir(),name)
    blob_service.get_blob_to_path('flow', 'NARYN.day', fpath)
    response = HttpResponse(content_type='text/plain')
    response['Content-Disposition'] = 'attachment; filename=test.txt'
    blob.Properties.ContentDisposition = "attachment; filename=" + downloadName;
    return response
Пример #7
0
def getblob(request):
    assert isinstance(request, HttpRequest)
    blob_service = BlobService(
        account_name='araldrift',
        account_key=
        'otLzzkwQHQD3xFTQxwxy64PCL6eDINWGjSB7x6Ta2XVw3+3ffI5O2MhAEavf/r8qIW4G/dKrZAVg1R64nK7hDQ=='
    )
    # http://<storage-account-name>.blob.core.windows.net/<container-name>/<blob-name>
    name = 'test.txt'
    fpath = '{0}\{1}'.format(tempfile.gettempdir(), name)
    blob_service.get_blob_to_path('flow', 'NARYN.day', fpath)
    response = HttpResponse(content_type='text/plain')
    response['Content-Disposition'] = 'attachment; filename=test.txt'
    blob.Properties.ContentDisposition = "attachment; filename=" + downloadName
    return response
def download_data(key):
    blob_service = BlobService(account_name='asosdsrecruiting', account_key=key)

    blobs = []
    marker = None
    while True:
        batch = blob_service.list_blobs('recruitingdata', marker=marker)
        blobs.extend(batch)
        if not batch.next_marker:
            break
        marker = batch.next_marker

    for blob in blobs:
        file_name = blob.name
        print('Downloading: ' + file_name)
        blob_service.get_blob_to_path('recruitingdata', file_name, file_name.replace('/', '_') + '.csv')
Пример #9
0
def get_tags():
    # TODO: Error checking
    global d
    d = {}
    clarifai_api = ClarifaiApi()
    blob_service = BlobService(
        "calhacks", "mm7EmY+T+MGahePBDSDU5LHpZR5tRXuh4MSco4jFrzHovOPEf06e18c89pxtPIo4NDVhhjSeaQY/FQmKNxjjyA=="
    )

    blob_name = request.data
    blob_name = blob_name.decode("utf-8")
    blob_service.get_blob_to_path("imagestore", blob_name, "out.png")
    print("checkpoint 1")
    i = open("out.png", "r")
    strd = ""
    for line in i:
        strd += line.strip()
    fname = "img.png"
    with open(fname, "wb") as f:
        f.write(base64.b64decode(strd))
        f.close()

    f = open(fname, "rb")
    result = clarifai_api.tag_images(f)
    print(result)
    st = result["results"][0]["result"]["tag"]["classes"][0:6]

    for i in [
        "food",
        "nobody",
        "still life",
        "meal",
        "dish",
        "plate",
        "delicious",
        "isolated",
        "cutout",
        "unhealthy",
        "one",
        "background",
    ]:
        while i in st:
            st.remove(i)
    d = {blob_name: search_terms(st)}
    return "success!"
Пример #10
0
def getanalysis(request):
    assert isinstance(request, HttpRequest)
    latstart  = request.GET.get('latstart', None)
    latend  = request.GET.get('latend', None)
    lonstart  = request.GET.get('lonstart', None)
    lonend  = request.GET.get('lonend', None)
    sea  = request.GET.get('season', None)

    #start SSH 
    ssh = paramiko.SSHClient()
    blob_service = BlobService(account_name='araldrift', account_key='otLzzkwQHQD3xFTQxwxy64PCL6eDINWGjSB7x6Ta2XVw3+3ffI5O2MhAEavf/r8qIW4G/dKrZAVg1R64nK7hDQ==')
    blob_service.get_blob_to_path('security', 'id_rsa', './id_rsa')
    privkey = paramiko.RSAKey.from_private_key_file (filename='./id_rsa', password='******' )
    ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())

    try:
        ssh.connect('40.112.209.249', username='******', password='******', allow_agent=False, pkey=None, key_filename=None, timeout=10, look_for_keys=False, compress=False)
    except paramiko.SSHException:
           return HttpResponse()
           quit()

    #stdin,stdout,stderr = ssh.exec_command("ls /etc/")
    cmd = '/home/araldif/anaconda3/bin/python /datadrive/from_webapp/xarray_analysis.py ' + latstart + ' ' + latend + ' ' + lonstart + ' ' + lonend + ' ' + sea 
    #cmd = '/datadrive/from_webapp/xarray_analysis.py'
    #cmd = 'python /datadrive/from_webapp/test.py ' + name 
    stdin,stdout,stderr = ssh.exec_command(cmd)
    
    h = []
    for line in stderr.readlines():
        h.append(line)
    stderr.close()
    ssh.close()
    
    try:
       imageoutfile1 = 'prec_' + str(sea) + '_' + str(latstart) + '_' + str(latend) + '_' + str(lonstart) + '_' + str(lonend) + '.png'
       imageoutfile2 = './' + imageoutfile1
       
       blob_service = BlobService(account_name='araldrift', account_key='otLzzkwQHQD3xFTQxwxy64PCL6eDINWGjSB7x6Ta2XVw3+3ffI5O2MhAEavf/r8qIW4G/dKrZAVg1R64nK7hDQ==')
       blob_service.get_blob_to_path('flow', imageoutfile1, imageoutfile2)
       image_data = open(imageoutfile2, "rb").read()
       response = HttpResponse(image_data, content_type='image/png')
       return response
    except:
      return HttpResponse(h,content_type='text/plain')
class BlobSource(DataSource):
    def __init__(self):
        self.storage_account = getenv('STORAGE_ACCOUNT')
        self.blob_service = BlobService(self.storage_account,
                                        getenv('STORAGE_KEY'))

    def load(self, sparkContext, container, path):
        path = ('/' if path[0] != '/' else '') + path
        uri = 'wasb://%s@%s.blob.core.windows.net%s' % (
            container, self.storage_account, path)
        print 'Loading from %s' % uri
        return sparkContext.textFile(uri)

    def download(self, container, path):
        print 'Downloading blob from %s/%s' % (container, path)
        self.blob_service.get_blob_to_path(container, path, path)
        print 'Downloaded blob to ' + path

    def saveAsJson(self, payload, container, path):
        path = path.lstrip('/')
        print path
        print 'Saving to %s/%s' % (container, path)
        json_string = json.dumps(payload, ensure_ascii=False).encode('utf-8')
        try:
            self.blob_service.put_blob(
                container,
                path,
                json_string,
                'BlockBlob',
                x_ms_blob_cache_control='max-age=3600',
                x_ms_blob_content_type='application/json')
        except Exception as e:
            print 'Failed to save %s/%s: %s' % (container, path, str(e))
            raise

    def saveAsText(self, rdd, container, path):
        path = path.lstrip('/')
        path = '/' + path
        print 'Saving rdd to %s%s' % (container, path)
        uri = 'wasb://%s@%s.blob.core.windows.net%s' % (
            container, self.storage_account, path)
        try:
            rdd.saveAsTextFile(uri)
        except Exception as e:
            print 'Failed to save %s%s: %s' % (container, path, str(e))
            raise

    def deleteAllBut(self, container, exceptFolderName):
        print 'deleteAllBut called'
        try:
            bloblistingresult = self.blob_service.list_blobs(container)
            for i in bloblistingresult:
                print i.name
                if not exceptFolderName in i.name:
                    try:
                        print 'deleting'
                        self.blob_service.delete_blob(container, i.name)
                        print 'deleted'
                    except Exception as e:
                        print 'Failed to delete %s/%s: %s' % (container,
                                                              i.name, str(e))
                        raise
        except Exception as e:
            print 'Failed to list things in %s: %s' % (container, str(e))
            raise
Пример #12
0
def getanalysis(request):
    assert isinstance(request, HttpRequest)
    latstart = request.GET.get('latstart', None)
    latend = request.GET.get('latend', None)
    lonstart = request.GET.get('lonstart', None)
    lonend = request.GET.get('lonend', None)
    sea = request.GET.get('season', None)

    #start SSH
    ssh = paramiko.SSHClient()
    blob_service = BlobService(
        account_name='araldrift',
        account_key=
        'otLzzkwQHQD3xFTQxwxy64PCL6eDINWGjSB7x6Ta2XVw3+3ffI5O2MhAEavf/r8qIW4G/dKrZAVg1R64nK7hDQ=='
    )
    blob_service.get_blob_to_path('security', 'id_rsa', './id_rsa')
    privkey = paramiko.RSAKey.from_private_key_file(filename='./id_rsa',
                                                    password='******')
    ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())

    try:
        ssh.connect('40.112.209.249',
                    username='******',
                    password='******',
                    allow_agent=False,
                    pkey=None,
                    key_filename=None,
                    timeout=10,
                    look_for_keys=False,
                    compress=False)
    except paramiko.SSHException:
        return HttpResponse()
        quit()

    #stdin,stdout,stderr = ssh.exec_command("ls /etc/")
    cmd = '/home/araldif/anaconda3/bin/python /datadrive/from_webapp/xarray_analysis.py ' + latstart + ' ' + latend + ' ' + lonstart + ' ' + lonend + ' ' + sea
    #cmd = '/datadrive/from_webapp/xarray_analysis.py'
    #cmd = 'python /datadrive/from_webapp/test.py ' + name
    stdin, stdout, stderr = ssh.exec_command(cmd)

    h = []
    for line in stderr.readlines():
        h.append(line)
    stderr.close()
    ssh.close()

    try:
        imageoutfile1 = 'prec_' + str(sea) + '_' + str(latstart) + '_' + str(
            latend) + '_' + str(lonstart) + '_' + str(lonend) + '.png'
        imageoutfile2 = './' + imageoutfile1

        blob_service = BlobService(
            account_name='araldrift',
            account_key=
            'otLzzkwQHQD3xFTQxwxy64PCL6eDINWGjSB7x6Ta2XVw3+3ffI5O2MhAEavf/r8qIW4G/dKrZAVg1R64nK7hDQ=='
        )
        blob_service.get_blob_to_path('flow', imageoutfile1, imageoutfile2)
        image_data = open(imageoutfile2, "rb").read()
        response = HttpResponse(image_data, content_type='image/png')
        return response
    except:
        return HttpResponse(h, content_type='text/plain')
Пример #13
0
class AzureIOStore(IOStore):
    """
    A class that lets you get input from and send output to Azure Storage.
    
    """
    def __init__(self, account_name, container_name, name_prefix=""):
        """
        Make a new AzureIOStore that reads from and writes to the given
        container in the given account, adding the given prefix to keys. All
        paths will be interpreted as keys or key prefixes.
        
        If the name prefix does not end with a trailing slash, and is not empty,
        one will be added automatically.
        
        Account keys are retrieved from the AZURE_ACCOUNT_KEY environment
        variable or from the ~/.toilAzureCredentials file, as in Toil itself.
        
        """

        # Make sure azure libraries actually loaded
        assert (have_azure)

        self.account_name = account_name
        self.container_name = container_name
        self.name_prefix = name_prefix

        if self.name_prefix != "" and not self.name_prefix.endswith("/"):
            # Make sure it has the trailing slash required.
            self.name_prefix += "/"

        # Sneak into Toil and use the same keys it uses
        self.account_key = toil.jobStores.azureJobStore._fetchAzureAccountKey(
            self.account_name)

        # This will hold out Azure blob store connection
        self.connection = None

    def __getstate__(self):
        """
        Return the state to use for pickling. We don't want to try and pickle
        an open Azure connection.
        """

        return (self.account_name, self.account_key, self.container_name,
                self.name_prefix)

    def __setstate__(self, state):
        """
        Set up after unpickling.
        """

        self.account_name = state[0]
        self.account_key = state[1]
        self.container_name = state[2]
        self.name_prefix = state[3]

        self.connection = None

    def __connect(self):
        """
        Make sure we have an Azure connection, and set one up if we don't.
        """

        if self.connection is None:
            RealtimeLogger.debug("Connecting to account {}, using "
                                 "container {} and prefix {}".format(
                                     self.account_name, self.container_name,
                                     self.name_prefix))

            # Connect to the blob service where we keep everything
            self.connection = BlobService(account_name=self.account_name,
                                          account_key=self.account_key)

    @backoff
    def read_input_file(self, input_path, local_path):
        """
        Get input from Azure.
        """

        self.__connect()

        RealtimeLogger.debug("Loading {} from AzureIOStore".format(input_path))

        # Download the blob. This is known to be synchronous, although it can
        # call a callback during the process.
        self.connection.get_blob_to_path(self.container_name,
                                         self.name_prefix + input_path,
                                         local_path)

    def list_input_directory(self,
                             input_path,
                             recursive=False,
                             with_times=False):
        """
        Loop over fake /-delimited directories on Azure. The prefix may or may
        not not have a trailing slash; if not, one will be added automatically.
        
        Returns the names of files and fake directories in the given input fake
        directory, non-recursively.
        
        If with_times is specified, will yield (name, time) pairs including
        modification times as datetime objects. Times on directories are None.
        
        """

        self.__connect()

        RealtimeLogger.info(
            "Enumerating {} from AzureIOStore".format(input_path))

        # Work out what the directory name to list is
        fake_directory = self.name_prefix + input_path

        if fake_directory != "" and not fake_directory.endswith("/"):
            # We have a nonempty prefix, and we need to end it with a slash
            fake_directory += "/"

        # This will hold the marker that we need to send back to get the next
        # page, if there is one. See <http://stackoverflow.com/a/24303682>
        marker = None

        # This holds the subdirectories we found; we yield each exactly once if
        # we aren't recursing.
        subdirectories = set()

        while True:

            # Get the results from Azure. We don't use delimiter since Azure
            # doesn't seem to provide the placeholder entries it's supposed to.
            result = self.connection.list_blobs(self.container_name,
                                                prefix=fake_directory,
                                                marker=marker)

            RealtimeLogger.info("Found {} files".format(len(result)))

            for blob in result:
                # Yield each result's blob name, but directory names only once

                # Drop the common prefix
                relative_path = blob.name[len(fake_directory):]

                if (not recursive) and "/" in relative_path:
                    # We found a file in a subdirectory, and we aren't supposed
                    # to be recursing.
                    subdirectory, _ = relative_path.split("/", 1)

                    if subdirectory not in subdirectories:
                        # It's a new subdirectory. Yield and remember it
                        subdirectories.add(subdirectory)

                        if with_times:
                            yield subdirectory, None
                        else:
                            yield subdirectory
                else:
                    # We found an actual file
                    if with_times:
                        mtime = blob.properties.last_modified

                        if isinstance(mtime, datetime.datetime):
                            # Make sure we're getting proper localized datetimes
                            # from the new Azure Storage API.
                            assert (mtime.tzinfo is not None and
                                    mtime.tzinfo.utcoffset(mtime) is not None)
                        else:
                            # Convert mtime from a string as in the old API.
                            mtime = dateutil.parser.parse(mtime).replace(
                                tzinfo=dateutil.tz.tzutc())

                        yield relative_path, mtime

                    else:
                        yield relative_path

            # Save the marker
            marker = result.next_marker

            if not marker:
                break

    @backoff
    def write_output_file(self, local_path, output_path):
        """
        Write output to Azure. Will create the container if necessary.
        """

        self.__connect()

        RealtimeLogger.debug("Saving {} to AzureIOStore".format(output_path))

        try:
            # Make the container
            self.connection.create_container(self.container_name)
        except azure.WindowsAzureConflictError:
            # The container probably already exists
            pass

        # Upload the blob (synchronously)
        # TODO: catch no container error here, make the container, and retry
        self.connection.put_block_blob_from_path(
            self.container_name, self.name_prefix + output_path, local_path)

    @backoff
    def exists(self, path):
        """
        Returns true if the given input or output file exists in Azure already.
        
        """

        self.__connect()

        marker = None

        while True:

            try:
                # Make the container
                self.connection.create_container(self.container_name)
            except azure.WindowsAzureConflictError:
                # The container probably already exists
                pass

            # Get the results from Azure.
            result = self.connection.list_blobs(self.container_name,
                                                prefix=self.name_prefix + path,
                                                marker=marker)

            for blob in result:
                # Look at each blob

                if blob.name == self.name_prefix + path:
                    # Found it
                    return True

            # Save the marker
            marker = result.next_marker

            if not marker:
                break

        return False

    @backoff
    def get_mtime(self, path):
        """
        Returns the modification time of the given blob if it exists, or None
        otherwise.
        
        """

        self.__connect()

        marker = None

        while True:

            # Get the results from Azure.
            result = self.connection.list_blobs(self.container_name,
                                                prefix=self.name_prefix + path,
                                                marker=marker)

            for blob in result:
                # Look at each blob

                if blob.name == self.name_prefix + path:
                    # Found it
                    mtime = blob.properties.last_modified

                    if isinstance(mtime, datetime.datetime):
                        # Make sure we're getting proper localized datetimes
                        # from the new Azure Storage API.
                        assert (mtime.tzinfo is not None
                                and mtime.tzinfo.utcoffset(mtime) is not None)
                    else:
                        # Convert mtime from a string as in the old API.
                        mtime = dateutil.parser.parse(mtime).replace(
                            tzinfo=dateutil.tz.tzutc())

                    return mtime

            # Save the marker
            marker = result.next_marker

            if not marker:
                break

        return None

    @backoff
    def get_size(self, path):
        """
        Returns the size in bytes of the given blob if it exists, or None
        otherwise.
        
        """

        self.__connect()

        marker = None

        while True:

            # Get the results from Azure.
            result = self.connection.list_blobs(self.container_name,
                                                prefix=self.name_prefix + path,
                                                marker=marker)

            for blob in result:
                # Look at each blob

                if blob.name == self.name_prefix + path:
                    # Found it
                    size = blob.properties.content_length

                    return size

            # Save the marker
            marker = result.next_marker

            if not marker:
                break

        return None
Пример #14
0
blob_service = BlobService(account_name="<account_name>", account_key="<account_key>")

blob_service.create_container("datacontainer")

blob_service.create_container("datacontainer", x_ms_blob_public_access="container")

blob_service.set_container_acl("datacontainer", x_ms_blob_public_access="container")


blob_service.put_block_blob_from_path(
    "datacontainer", "datablob", "StorageClientPy.py", x_ms_blob_content_type="text/x-script.phyton"
)


blobs = []
marker = None
while True:
    batch = blob_service.list_blobs("datacontainer", marker=marker)
    blobs.extend(batch)
    if not batch.next_marker:
        break
    marker = batch.next_marker
for blob in blobs:
    print(blob.name)


blob_service.get_blob_to_path("datacontainer", "datablob", "out-StorageClientPy.py")


blob_service.delete_blob("datacontainer", "datablob")
Пример #15
0
# -*- coding: utf-8 -*-
"""
Created on Fri Oct 09 16:36:56 2015

@author: justin.malinchak
"""


# List blobs in container
from azure.storage.blob import BlobService
blob_service = BlobService(account_name='portalvhdss5m831rhl98hj', account_key='Z1MliCYE7p9Ks9kYQoGeM4V99hODtiJL82BVi/zIm06jLYh7n0tV8YaZHzITKixMwUUmjJ1Vp05XrgHG+gXFlg==')

blobs = []
marker = None
while True:
    batch = blob_service.list_blobs('mycontainer', marker=marker)
    blobs.extend(batch)
    if not batch.next_marker:
        break
    marker = batch.next_marker
for blob in blobs:
    bname = blob.name
    print('')
    print(bname)
    print('')
    bpathname = 'C:\\Batches\\$Work\\' + bname
    blob_service.get_blob_to_path('mycontainer', bname, bpathname)
    print('')
    print('blob downloaded ' + bpathname)
    print('')
Пример #16
0
class Azure(object):
    '''
    A class used to connect to the Azure storage and
    upload/download files using blob storage
    '''
    def __init__(self, params={}):
        '''
        Constructor for the Azure object

        '''
        if "user" in params:
            self.user = params["user"]
        else:
            self.user = None
        if "key" in params:
            self.key = params["key"]
        else:
            self.key = None

    def connect(self, host, port, user, password, secure):
        '''
        Connect to the Azure service with given user and key
        @param user - username to use to connect to
        @param key - key to use to connect
        '''
        kwargs = {}
        err = None
        if not host is None:
            kwargs["host_base"] = "." + host
        if not user is None:
            kwargs["account_name"] = user
        elif not self.user is None:
            kwargs["account_name"] = self.user
        if not password is None:
            kwargs["account_key"] = password
        elif not self.key is None:
            kwargs["account_key"] = self.key
        kwargs["protocol"] = "https" if secure else "http"
        try:
            self.service = BlobService(**kwargs)
        except Exception as e:
            err = e.message
            self.service = None
        if self.service is None:
            raise OsakaException("Failed to connect to Azure:" +
                                 ("" if err is None else err))

    @classmethod
    def getSchemes(clazz):
        '''
        Returns a list of schemes this handler handles
        Note: handling the scheme of another handler produces unknown results
        @returns list of handled schemes
        '''
        return ["azure", "azures"]

    def close(self):
        '''
        Close this service
        '''
        pass

    def put(self, path, url):
        '''
        Put a file up to the cloud
        @param path - path to upload
        @param url - path in cloud to upload too
        '''
        if os.path.isdir(path):
            return walk(self.put, path, url)
        cont, blob = get_container_and_path(urlparse.urlparse(url).path)
        self.service.create_container(cont)
        self.service.put_block_blob_from_path(cont, blob, path)
        return True

    def get(self, url, dest):
        '''
        Get file(s) from the cloud
        @param url - url on cloud to pull down (on cloud)
        @param dest - dest to download too
        '''
        cont, blob = get_container_and_path(urlparse.urlparse(url).path)
        for b in self.service.list_blobs(cont, prefix=blob):
            destination = os.path.join(dest, os.path.relpath(
                b.name, blob)) if blob != b.name else dest
            if not os.path.exists(os.path.dirname(destination)):
                os.mkdir(os.path.dirname(destination))
            self.service.get_blob_to_path(cont, b.name, destination)
        return True

    def rm(self, url):
        '''
        Remove this url and all children urls
        @param url - url to remove
        '''
        cont, blob = get_container_and_path(urlparse.urlparse(url).path)
        for b in self.service.list_blobs(cont, prefix=blob):
            self.service.delete_blob(cont, b.name)
        return True
Пример #17
0
class AzureIOStore(IOStore):
    """
    A class that lets you get input from and send output to Azure Storage.
    
    """
    
    def __init__(self, account_name, container_name, name_prefix=""):
        """
        Make a new AzureIOStore that reads from and writes to the given
        container in the given account, adding the given prefix to keys. All
        paths will be interpreted as keys or key prefixes.
        
        If the name prefix does not end with a trailing slash, and is not empty,
        one will be added automatically.
        
        Account keys are retrieved from the AZURE_ACCOUNT_KEY environment
        variable or from the ~/.toilAzureCredentials file, as in Toil itself.
        
        """
        
        # Make sure azure libraries actually loaded
        assert(have_azure)
        
        self.account_name = account_name
        self.container_name = container_name
        self.name_prefix = name_prefix
        
        if self.name_prefix != "" and not self.name_prefix.endswith("/"):
            # Make sure it has the trailing slash required.
            self.name_prefix += "/"
        
        # Sneak into Toil and use the same keys it uses
        self.account_key = toil.jobStores.azureJobStore._fetchAzureAccountKey(
            self.account_name)
            
        # This will hold out Azure blob store connection
        self.connection = None
        
    def __getstate__(self):
        """
        Return the state to use for pickling. We don't want to try and pickle
        an open Azure connection.
        """
     
        return (self.account_name, self.account_key, self.container_name, 
            self.name_prefix)
        
    def __setstate__(self, state):
        """
        Set up after unpickling.
        """
        
        self.account_name = state[0]
        self.account_key = state[1]
        self.container_name = state[2]
        self.name_prefix = state[3]
        
        self.connection = None
        
    def __connect(self):
        """
        Make sure we have an Azure connection, and set one up if we don't.
        """
        
        if self.connection is None:
            RealTimeLogger.get().debug("Connecting to account {}, using "
                "container {} and prefix {}".format(self.account_name,
                self.container_name, self.name_prefix))
        
            # Connect to the blob service where we keep everything
            self.connection = BlobService(
                account_name=self.account_name, account_key=self.account_key)
            
    @backoff        
    def read_input_file(self, input_path, local_path):
        """
        Get input from Azure.
        """
        
        self.__connect()
        
        
        RealTimeLogger.get().debug("Loading {} from AzureIOStore".format(
            input_path))
        
        # Download the blob. This is known to be synchronous, although it can
        # call a callback during the process.
        self.connection.get_blob_to_path(self.container_name,
            self.name_prefix + input_path, local_path)
            
    def list_input_directory(self, input_path, recursive=False,
        with_times=False):
        """
        Loop over fake /-delimited directories on Azure. The prefix may or may
        not not have a trailing slash; if not, one will be added automatically.
        
        Returns the names of files and fake directories in the given input fake
        directory, non-recursively.
        
        If with_times is specified, will yield (name, time) pairs including
        modification times as datetime objects. Times on directories are None.
        
        """
        
        self.__connect()
        
        RealTimeLogger.get().info("Enumerating {} from AzureIOStore".format(
            input_path))
        
        # Work out what the directory name to list is
        fake_directory = self.name_prefix + input_path
        
        if fake_directory != "" and not fake_directory.endswith("/"):
            # We have a nonempty prefix, and we need to end it with a slash
            fake_directory += "/"
        
        # This will hold the marker that we need to send back to get the next
        # page, if there is one. See <http://stackoverflow.com/a/24303682>
        marker = None
        
        # This holds the subdirectories we found; we yield each exactly once if
        # we aren't recursing.
        subdirectories = set()
        
        while True:
        
            # Get the results from Azure. We don't use delimiter since Azure
            # doesn't seem to provide the placeholder entries it's supposed to.
            
            result = self.connection.list_blobs(self.container_name, 
                marker=marker)
                
            RealTimeLogger.get().info("Found {} files".format(len(result)))
                
            for blob in result:
                # Yield each result's blob name, but directory names only once
                
                # Drop the common prefix
                relative_path = blob.name
                
                if (not recursive) and "/" in relative_path:
                    # We found a file in a subdirectory, and we aren't supposed
                    # to be recursing.
                    subdirectory, _ = relative_path.split("/", 1)
                    
                    if subdirectory not in subdirectories:
                        # It's a new subdirectory. Yield and remember it
                        subdirectories.add(subdirectory)
                        
                        if with_times:
                            yield subdirectory, None
                        else:
                            yield subdirectory
                else:
                    # We found an actual file 
                    if with_times:
                        mtime = dateutil.parser.parse(
                            blob.properties.last_modified).replace(
                            tzinfo=dateutil.tz.tzutc())
                        yield relative_path, mtime
                            
                    else:
                        yield relative_path
                
            # Save the marker
            marker = result.next_marker
                
            if not marker:
                break
                
    @backoff
    def write_output_file(self, local_path, output_path):
        """
        Write output to Azure. Will create the container if necessary.
        """
        
        self.__connect()
        
        RealTimeLogger.get().debug("Saving {} to AzureIOStore".format(
            output_path))
        
        try:
            # Make the container
            self.connection.create_container(self.container_name)
        except azure.WindowsAzureConflictError:
            # The container probably already exists
            pass
        
        # Upload the blob (synchronously)
        # TODO: catch no container error here, make the container, and retry
        self.connection.put_block_blob_from_path(self.container_name,
            self.name_prefix + output_path, local_path)
    
    @backoff        
    def exists(self, path):
        """
        Returns true if the given input or output file exists in Azure already.
        
        """
        
        self.__connect()
        
        marker = None
        
        while True:
        
            try:
                # Make the container
                self.connection.create_container(self.container_name)
            except azure.WindowsAzureConflictError:
                # The container probably already exists
                pass
            
            # Get the results from Azure.
            result = self.connection.list_blobs(self.container_name, 
                prefix=self.name_prefix + path, marker=marker)
                
            for blob in result:
                # Look at each blob
                
                if blob.name == self.name_prefix + path:
                    # Found it
                    return True
                
            # Save the marker
            marker = result.next_marker
                
            if not marker:
                break 
        
        return False
        
        
    @backoff        
    def get_mtime(self, path):
        """
        Returns the modification time of the given blob if it exists, or None
        otherwise.
        
        """
        
        self.__connect()
        
        marker = None
        
        while True:
        
            # Get the results from Azure.
            result = self.connection.list_blobs(self.container_name, 
                prefix=self.name_prefix + path, marker=marker)
                
            for blob in result:
                # Look at each blob
                
                if blob.name == self.name_prefix + path:
                    # Found it
                    return dateutil.parser.parse(
                        blob.properties.last_modified).replace(
                        tzinfo=dateutil.tz.tzutc())
                
            # Save the marker
            marker = result.next_marker
                
            if not marker:
                break 
        
        return None
Пример #18
0
class AzureConnector():

    def __init__(self, config):

        tree = ET.parse('SharedConfig.xml')
        self.myMachineName = tree.find('.//Instance').get("id")

        self.sms = ServiceManagementService(
            subscription_id=config.get("azure", "subscription_id"),
            cert_file=config.get("azure", "cert_file")
        );

        self.bus_service = ServiceBusService(
            service_namespace=config.get("azure", "bus_namespace"),
            shared_access_key_name=config.get("azure", "bus_shared_access_key_name"),
            shared_access_key_value=config.get("azure", "bus_shared_access_key_value"))

        self.command_queue = config.get("azure", "commandQueuePath")
        for tries in range(1,10):
            try:
                self.bus_service.create_queue(self.command_queue)
                break
            except:
                print "Esperando"
            
        self.status_topic = config.get("azure", "statusTopicPath")
        self.bus_service.create_queue(self.status_topic)

        self.storage = BlobService(account_name=config.get("azure", "account_name"),
                                   account_key=config.get("azure", "account_key"))

        self.algo_storage_name = config.get("azure", "algorithm_storage_name")
        self.storage.create_container(self.algo_storage_name, fail_on_exist=False)

        self.proj_storage_name = config.get("azure", "project_storage_name")
        self.storage.create_container(self.proj_storage_name, fail_on_exist=False)

    def check_new_tasks(self):

        for tries in range(1,2):
            try:
                message = self.bus_service.receive_queue_message(self.command_queue, peek_lock=False, timeout=60)
                break
            except:
                message = None

        if message is None or message.body is None:
            return None

        job_description = json.loads(message.body.replace('/AzureBlobStorage/', ''))

        command = CommandMetadata(
            command_id = job_description["command_id"],
            algorithm_directory = job_description["algorithm_prfx"],
            project_prfx = job_description["project_prfx"],
            project_input_files = job_description["project_input_files"],
            algorithm_executable_name = job_description["algorithm_executable_name"],
            algorithm_parameters = job_description["algorithm_parameters"],
            sent_timestamp = datetime.datetime.strptime(job_description["sent_timestamp"], "%d/%m/%Y %H:%M:%S"),
            machine_size=job_description["machine_size"])

        # Retornar dados sobre o comando consumido da fila
        return command

        # Não há nada na fila
        return None

    def list_algo_files(self, prfx):

        list = self.storage.list_blobs(container_name=self.algo_storage_name, prefix=prfx)
        result = []
        for blob in list:
            result.append(blob.name)
        return result

    def download_algo_zip(self, algorithm_bin_file, tmp_file):
        print "download_algo_zip(algorithm_bin_file="+algorithm_bin_file+", tmp_file="+tmp_file+")"
        for tries in range(1,5):
            try:
                self.storage.get_blob_to_path(self.algo_storage_name, algorithm_bin_file, tmp_file,
                                 open_mode='wb', snapshot=None, x_ms_lease_id=None,
                                 progress_callback=None)
                break

            except Exception as e:

                if tries == 5:
                    print("Muitos erros de conexão. Operação abortada.")
                else:
                    print("Erro de conexão com serviço. Retentando..." + e.__str__())

    def download_file_to_project(self, project_name, blob_name, dir):
        print "download_file_to_project(project_name="+project_name+", blob_name="+blob_name+", dir="+dir+")"
        for tries in range(1,5):
            try:
                self.storage.get_blob_to_path(self.proj_storage_name,
                                              os.path.join(project_name,blob_name),
                                              os.path.join(dir,os.path.join(project_name,blob_name)),
                                              open_mode='wb', snapshot=None, x_ms_lease_id=None,
                                              progress_callback=None)
                break

            except Exception as e:

                if tries == 5:
                    print("Muitos erros de conexão. Operação abortada.")
                else:
                    print("Erro de conexão com serviço. Retentando..." + e.__str__())

    def download_file_to_project(self, project_name, blob_name, dir):
        print "download_file_to_project(project_name="+project_name+", blob_name="+blob_name+", dir="+dir+")"
        for tries in range(1,5):
            try:
                self.storage.get_blob_to_path(self.proj_storage_name,
                                              os.path.join(project_name,blob_name),
                                              os.path.join(dir,os.path.join(project_name,blob_name)),
                                              open_mode='wb', snapshot=None, x_ms_lease_id=None,
                                              progress_callback=None)
                break

            except Exception as e:

                if tries == 5:
                    print("Muitos erros de conexão. Operação abortada.")
                else:
                    print("Erro de conexão com serviço. Retentando..." + e.__str__())

    def upload_proj_file(self, project_name, blob_name, dir):
        print "upload_proj_file(project_name="+project_name+", blob_name="+blob_name+", dir="+dir+")"
        if blob_name[0] == '/':
            blob_name = blob_name[1:]
        for tries in range(1,5):
            try:
                self.storage.put_block_blob_from_path(self.proj_storage_name,
                                              os.path.join(project_name,blob_name),
                                              os.path.join(dir,os.path.join(project_name,blob_name)))
                break

            except Exception as e:

                if tries == 5:
                    print("Muitos erros de conexão. Operação abortada.")
                else:
                    print("Erro de conexão com serviço. Retentando..." + e.__str__())

    def download_file_to_algo(self, blob_name, dir):
        print "download_file_to_algo(blob_name="+blob_name+", dir="+dir+")"

        for tries in range(1,5):
            try:
                self.storage.get_blob_to_path(container_name=self.algo_storage_name,
                                              blob_name=os.path.join(blob_name),
                                              file_path=os.path.join(dir,blob_name),
                                              open_mode='wb', snapshot=None, x_ms_lease_id=None,
                                              progress_callback=None)
                break

            except Exception as e:

                if tries == 5:
                    print("Muitos erros de conexão. Operação abortada.")
                else:
                    print("Erro de conexão com serviço. Retentando..." + e.__str__())


    def send_status(self, main_status):
        for tries in range(1,5):
            try:
                self.bus_service.send_topic_message(topic_name=self.status_topic,
                                                    message=Message(main_status.encode('utf-8')))
                break

            except Exception as e:

                if tries == 5:
                    print("Muitos erros de conexão. Operação abortada.")
                else:
                    print("Erro de conexão com serviço. Retentando..." + e.__str__())

    def shutdown_myself(self):

        # A máquina virtual irá cometer suicídio.
        print("Removendo máquina virtual da nuvem...")
        for tries in range(1,5):
            try:
                self.sms.delete_deployment(
                    service_name=self.myMachineName,
                    deployment_name=self.myMachineName, delete_vhd=True)
                exit(0)
                break

            except Exception as e:

                if tries == 5:
                    print("Muitos erros de conexão. Operação abortada.")
                else:
                    print("Erro de conexão com serviço. Retentando..." + e.__str__())
Пример #19
0
def module_impl(rm, log, params, check_mode=False):

    if not HAS_AZURE:
        raise Exception("The Azure python sdk is not installed (try 'pip install azure')")

    if not HAS_REQUESTS:
        raise Exception("The requests python module is not installed (try 'pip install requests')")

    resource_group = params.get('resource_group')
    account_name = params.get('account_name')
    container_name = params.get('container_name')
    mode = params.get('mode')
    x_ms_meta_name_values = params.get('x_ms_meta_name_values')
    x_ms_blob_public_access = params.get('x_ms_blob_public_access')
    x_ms_blob_cache_control = params.get('x_ms_blob_cache_control')
    x_ms_blob_content_encoding = params.get('x_ms_blob_content_encoding')
    x_ms_blob_content_language = params.get('x_ms_blob_content_language')
    x_ms_blob_content_type = params.get('x_ms_blob_content_type')
    prefix = params.get('prefix')
    marker = params.get('marker')
    max_results = params.get('max_results')
    blob_name = params.get('blob_name')
    file_path = params.get('file_path')
    overwrite = params.get('overwrite')
    permissions = params.get('permissions')
    hours = params.get('hours')
    days = params.get('days')
    access_token = params.get('access_token')

    results = dict(changed=False)

    storage_client = rm.storage_client
    
    if not resource_group:
        raise Exception("Parameter error: resource_group cannot be None.")
    
    if not account_name:
        raise Exception("Parameter error: account_name cannot be None.")

    if not container_name:
        raise Exception("Parameter error: container_name cannot be None.")

    if not NAME_PATTERN.match(container_name):
        raise Exception("Parameter error: container_name must consist of lowercase letters, numbers and hyphens. It must begin with " +
            "a letter or number. It may not contain two consecutive hyphens.")

    # add file path validation

    results['account_name'] = account_name
    results['resource_group'] = resource_group 
    results['container_name'] = container_name

    # put (upload), get (download), geturl (return download url (Ansible 1.3+), getstr (download object as string (1.3+)), list (list keys (2.0+)), create (bucket), delete (bucket), and delobj (delete object)
    try:
        log('Getting keys')
        keys = {}
        response = storage_client.storage_accounts.list_keys(resource_group, account_name)
        keys[KeyName.key1] = response.storage_account_keys.key1
        keys[KeyName.key2] = response.storage_account_keys.key2
    except AzureHttpError as e:
        log('Error getting keys for account %s' % account_name)
        raise Exception(str(e.message))

    try:
        log('Create blob service')
        bs = BlobService(account_name, keys[KeyName.key1])
    except Exception as e:
        log('Error creating blob service.')
        raise Exception(str(e.args[0]))

    if mode == 'create':
        container = get_container_facts(bs, container_name)
        if container is not None:
            # container exists
            results['container'] = container
            results['msg'] = "Container already exists."
            return results
        # create the container
        if not check_mode:
            log('Create container %s' % container_name)
            bs.create_container(container_name, x_ms_meta_name_values, x_ms_blob_public_access)
            results['container'] = get_container_facts(bs, container_name)
        results['msg'] = "Container created successfully."
        results['changed'] = True
        return results

    if mode == 'update':
        container = get_container_facts(bs, container_name)
        if container is None:
            # container does not exist
            if not check_mode:
                log('Create container %s' % container_name)
                bs.create_container(container_name, x_ms_meta_name_values, x_ms_blob_public_access)
            results['changed'] = True
            results['msg'] = 'Container created successfully.'
            return results     
        # update existing container
        results['msg'] = "Container not changed."
        if x_ms_meta_name_values:
            if not check_mode:
                log('Update x_ms_meta_name_values for container %s' % container_name)
                bs.set_container_metadata(container_name, x_ms_meta_name_values)
            results['changed'] = True
            results['msg'] = 'Container meta data updated successfully.'
        if x_ms_blob_public_access:
            access = x_ms_blob_public_access
            if x_ms_blob_public_access == 'private':
                access = None
            if not check_mode:
                log('Set access to %s for container %s' % (access, container_name))
                bs.set_container_acl(container_name=container_name, x_ms_blob_public_access=access)
            results['changed'] = True
            results['msg'] = 'Container ACL updated successfully.'
        if permissions:
            if hours == 0 and days == 0:
                raise Exception("Parameter error: expecting hours > 0 or days > 0")
            id = "%s-%s" % (container_name, permissions) 
            si = get_identifier(id, hours, days, permissions)
            identifiers = SignedIdentifiers()
            identifiers.signed_identifiers.append(si)
            if not check_mode:
                log('Set permissions to %s for container %s' % (permissions, container_name))
                bs.set_container_acl(container_name=container_name,signed_identifiers=identifiers)
            results['changed'] = True
            results['msg'] = 'Container ACL updated successfully.'
        results['container'] = get_container_facts(bs, container_name)
        return results

    if mode == 'delete':
        container = get_container_facts(bs, container_name)
        if container is None:
            results['msg'] = "Container %s could not be found." % container_name
            return results
        if not check_mode:
            log('Deleting container %s' % container_name)
            bs.delete_container(container_name)
        results['changed'] = True
        results['msg'] = 'Container deleted successfully.'
        return results

    if mode == 'delete_blob':
        if blob_name is None:
            raise Exception("Parameter error: blob_name cannot be None.")
        
        container = container_check(bs, container_name)
        blob = get_blob_facts(bs, container_name, blob_name)

        if not blob:
            results['msg'] = 'Blob %s could not be found in container %s.' % (blob_name, container_name)
            return results

        if not check_mode:
            log('Deleteing %s from container %s.' % (blob_name, container_name))
            bs.delete_blob(container_name, blob_name)
        results['changed'] = True
        results['msg'] = 'Blob successfully deleted.'
        return results

    if mode == 'put':
        if not blob_name:
            raise Exception("Parameter error: blob_name cannot be None.")

        if not file_path :
            raise Exception("Parameter error: file_path cannot be None.")

        if not path_check(file_path):
            raise Exception("File %s does not exist." % file_path)

        container = get_container_facts(bs, container_name)
        blob = None
        if container is not None:
            blob = get_blob_facts(bs, container_name, blob_name)

        if container is not None and blob is not None:
            # both container and blob already exist
            md5_remote = blob['content-md5']
            md5_local = get_md5(file_path)
            results['container'] = container
            results['blob'] = blob

            if md5_local == md5_remote:
                sum_matches = True
                results['msg'] = 'File checksums match. File not uploaded.'
                if overwrite == 'always':
                    if not check_mode:
                        log('Uploading %s to container %s.' % (file_path, container_name))
                        put_block_blob(
                            bs,
                            container_name,
                            blob_name,
                            file_path,
                            x_ms_meta_name_values,
                            x_ms_blob_cache_control,
                            x_ms_blob_content_encoding,
                            x_ms_blob_content_language,
                            x_ms_blob_content_type
                        )
                        results['blob'] = get_blob_facts(bs, container_name, blob_name)
                    results['changed'] = True
                    results['msg'] = 'File successfully uploaded.'
            else:
                sum_matches = False
                if overwrite in ('always', 'different'):
                    if not check_mode:
                        log('Uploading %s to container %s.' % (file_path, container_name))
                        put_block_blob(
                            bs,
                            container_name,
                            blob_name,
                            file_path,
                            x_ms_meta_name_values,
                            x_ms_blob_cache_control,
                            x_ms_blob_content_encoding,
                            x_ms_blob_content_language,
                            x_ms_blob_content_type
                        )
                        results['blob'] = get_blob_facts(bs, container_name, blob_name)
                    results['changed'] = True
                    results['msg'] = 'File successfully uploaded.'
                else:
                    results['msg'] = "WARNING: Checksums do not match. Use overwrite parameter to force upload."
            return results

        if container is None:
            # container does not exist. create container and upload.
            if not check_mode:
                log('Creating container %s.' % container_name)
                bs.create_container(container_name, x_ms_meta_name_values, x_ms_blob_public_access)
                log('Uploading %s to container %s.' % (file_path, container_name))
                put_block_blob(
                    bs,
                    container_name,
                    blob_name,
                    file_path,
                    x_ms_meta_name_values,
                    x_ms_blob_cache_control,
                    x_ms_blob_content_encoding,
                    x_ms_blob_content_language,
                    x_ms_blob_content_type
                )
                results['conainer'] = get_container_facts(bs, container_name)
                results['blob'] = get_blob_facts(bs, container_name, blob_name)
            results['changed'] = True
            results['msg'] = 'Successfully created container and uploaded file.'
            return results

        if container is not None:
            # container exists. just upload.
            if not check_mode:
                log('Uploading %s to container %s.' % (file_path, container_name))
                put_block_blob(
                    bs,
                    container_name,
                    blob_name,
                    file_path,
                    x_ms_meta_name_values,
                    x_ms_blob_cache_control,
                    x_ms_blob_content_encoding,
                    x_ms_blob_content_language,
                    x_ms_blob_content_type
                )
                results['blob'] = get_blob_facts(bs, container_name, blob_name)
            results['changed'] = True
            results['msg'] = 'Successfully updloaded file.'
            return results

    if mode == 'list':
        container = container_check(bs, container_name)
        response = bs.list_blobs(
            container_name,
            prefix,
            marker,
            max_results
        )
        results['blobs'] = []
        for blob in response.blobs:
            b = dict(
                name = blob.name,
                snapshot = blob.snapshot,
                last_modified = blob.properties.last_modified,
                content_length = blob.properties.content_length,
                blob_type = blob.properties.blob_type,
            )
            results['blobs'].append(b)
        return results

    if mode == 'get':
        if file_path is None:
            raise Exception("Parameter error: file_path cannot be None.")
        
        container = container_check(bs, container_name)
        blob = blob_check(bs, container_name, blob_name)
        path_exists = path_check(file_path)
        
        if not path_exists or overwrite == 'always':
            if not check_mode:
                bs.get_blob_to_path(container_name, blob_name, file_path)
            results['changed'] = True
            results['msg'] = "Blob %s successfully downloaded to %s." % (blob_name, file_path)
            return results

        if path_exists:
            md5_remote = blob['content-md5']
            md5_local = get_md5(file_path)

            if md5_local == md5_remote:
                sum_matches = True
                if overwrite == 'always':
                    if not check_mode:
                        bs.get_blob_to_path(container_name, blob_name, file_path)
                    results['changed'] = True
                    results['msg'] = "Blob %s successfully downloaded to %s." % (blob_name, file_path)
                else:
                    results['msg'] = "Local and remote object are identical, ignoring. Use overwrite parameter to force."
            else:
                sum_matches = False
                if overwrite in ('always', 'different'):
                    if not check_mode:
                        bs.get_blob_to_path(container_name, blob_name, file_path)
                    results['changed'] = True
                    results['msg'] = "Blob %s successfully downloaded to %s." % (blob_name, file_path)
                else:
                    results['msg'] ="WARNING: Checksums do not match. Use overwrite parameter to force download."
        
        if sum_matches is True and overwrite == 'never':
            results['msg'] = "Local and remote object are identical, ignoring. Use overwrite parameter to force."
        
        return results

    if mode == 'get_url':
        if not blob_name:
            raise Exception("Parameter error: blob_name cannot be None.")

        container = container_check(bs, container_name)
        blob = blob_check(bs, container_name, blob_name)

        url = bs.make_blob_url(
            container_name=container_name,
            blob_name=blob_name,
            sas_token=access_token)
        results['url'] = url
        results['msg'] = "Url: %s" % url
        return results

    if mode == 'get_token':
        if hours == 0 and days == 0:
            raise Exception("Parameter error: expecting hours > 0 or days > 0")
        container = container_check(bs, container_name)
        blob = blob_check(bs, container_name, blob_name)
        results['blob_name'] = blob_name
        sap = get_shared_access_policy(permissions, hours=hours, days=days)
        token = bs.generate_shared_access_signature(container_name, blob_name, sap)
        results['access_token'] = token
        return results
Пример #20
0
@author: justin.malinchak
"""

# List blobs in container
from azure.storage.blob import BlobService

blob_service = BlobService(
    account_name='portalvhdss5m831rhl98hj',
    account_key=
    'Z1MliCYE7p9Ks9kYQoGeM4V99hODtiJL82BVi/zIm06jLYh7n0tV8YaZHzITKixMwUUmjJ1Vp05XrgHG+gXFlg=='
)

blobs = []
marker = None
while True:
    batch = blob_service.list_blobs('mycontainer', marker=marker)
    blobs.extend(batch)
    if not batch.next_marker:
        break
    marker = batch.next_marker
for blob in blobs:
    bname = blob.name
    print('')
    print(bname)
    print('')
    bpathname = 'C:\\Batches\\$Work\\' + bname
    blob_service.get_blob_to_path('mycontainer', bname, bpathname)
    print('')
    print('blob downloaded ' + bpathname)
    print('')
Пример #21
0
def gethydrograph(request):
    '''
    Returns streamflow data by start / stop / station 
    In response it will 
        generate a 404 error if the value is not found
        or
        return a JSON response with the requested slice or a .csv file by default
    '''
    assert isinstance(request, HttpRequest)

    start = request.GET.get('start', None)
    end = request.GET.get('end', None)
    station = request.GET.get('station', None)
    interval = request.GET.get('interval', None)
    jsondat = request.GET.get('jsondat', None)
    plot = request.GET.get('plot', None)

    #start blob service
    stationfile = station + '.day.new'
    downloadablefile = station + '_' + start + '_' + end + '.csv'
    blob_service = BlobService(
        account_name='araldrift',
        account_key=
        'otLzzkwQHQD3xFTQxwxy64PCL6eDINWGjSB7x6Ta2XVw3+3ffI5O2MhAEavf/r8qIW4G/dKrZAVg1R64nK7hDQ=='
    )
    blob_service.get_blob_to_path('flow', stationfile, './tmp.csv')
    f = file('./tmp.csv')

    #read in pandas data and subsetting
    d_cols = ["DATE", "FLOW"]
    d = pd.read_csv('./tmp.csv', sep=" ", names=d_cols)
    df = d[(d.DATE >= start) & (d.DATE <= end)]
    h = df.to_json(orient='records')
    json_encoded_result = json.dumps(h)
    df.plot(x='DATE', y='FLOW', figsize=(14, 6))
    plt.savefig('./plot_test.png')

    #h = []
    #while True:
    #   line = f.readline()
    #  if line == "": break
    # h.append(line)
    #f.close()
    try:
        if jsondat in ['TRUE']:
            response = HttpResponse(json_encoded_result,
                                    content_type="application/json")
            return response

        elif plot in ['TRUE']:
            image_data = open("./plot_test.png", "rb").read()
            response = HttpResponse(image_data, content_type='image/png')
            return response

        else:
            response = HttpResponse(content_type='text/csv')
            response[
                'Content-Disposition'] = 'attachment; filename=' + downloadablefile
            df.to_csv(response, index=False, lineterminator='\r\n')
            return response
    except Exception as a:
        return HttpResponseNotFound(
            content=
            "No dice, either the inputs were out of range, the file couldn't be retrieved, or the winds weren't in your favor."
        )
Пример #22
0
# coding: utf-8

# In[186]:

#start the connection to Azure Blob Storage
from azure.storage.blob import BlobService
import pandas as pd

#connect to your blob storage account
blob_service = BlobService(account_name='YourAccount', account_key='YourKey')

#get the latest dataset that was scored
#we temporarely store the data in a local CSV file
blob_service.get_blob_to_path('amltest', 'output.csv', 'tmp.csv')


# In[187]:

#convert text file into Pandas DataFrame
data =  pd.read_csv("tmp.csv")

#Remove the whitespace from the columns names to avoid Python errors
cols = data.columns
cols = cols.map(lambda x: x.replace(' ', '_') if isinstance(x, (str, unicode)) else x)
data.columns = cols



# In[188]: