def spin_camera_loop(messenger, shared_mem_file): intervals_per_cam = dict() if camera_config["blob"] is not None: blob_service_client = BlobServiceClient.from_connection_string( camera_config["blob"]) logging.info( f"Created blob service client: {blob_service_client.account_name}") while not received_twin_patch: for key, cam in camera_config["cameras"].items(): if not cam["enabled"]: continue curtime = time.time() if key not in intervals_per_cam: intervals_per_cam[key] = dict() current_source = intervals_per_cam[key] current_source['timer'] = 0 current_source['rtsp'] = cam['rtsp'] current_source['interval'] = float(cam['interval']) current_source['video'] = VideoStream(cam['rtsp'], float(cam['interval'])) current_source['video'].start() # this will keep track of how long we need to wait between # bursts of activity # find the current video streamer for keycurrent, camcurrent in intervals_per_cam.items(): if camcurrent['rtsp'] == cam['rtsp']: video_streamer = camcurrent['video'] # video_streamer = current_source['video'] # TODO fix multi video sources time issue. # not enough time has passed since the last collection if curtime - current_source['timer'] < float(cam['interval']): continue current_source['timer'] = curtime # block until we get something frame_id, img = video_streamer.get_frame_with_id() if img is None: logging.warn("No frame retrieved. Is video running?") continue logging.info(f"Grabbed frame {frame_id} from {cam['rtsp']}") camId = f"{cam['space']}/{key}" # send to blob storage and retrieve the timestamp by which we will identify the video curtimename = None perf = None if camera_config["blob"] is not None: start_upload = time.time() curtimename, _ = send_img_to_blob(blob_service_client, img, camId) total_upload = time.time() - start_upload perf = {"upload": total_upload} detections = [] if cam['detector'] is not None and cam[ 'inference'] is not None and cam['inference']: start_inf = time.time() res = infer(cam['detector'], img, frame_id, curtimename, shared_mem_file) total_inf = time.time() - start_inf detections = res["detections"] perf = {**perf, **res["perf"]} perf["imgencode"] = total_inf - perf["imgprep"] - perf[ "detection"] logging.info(f"perf: {perf}") # message the image capture upstream if curtimename is not None: messenger.send_image_and_detection(camId, curtimename, frame_id, detections) messenger.send_perf(camId, curtimename, frame_id, perf) logging.info( f"Notified of image upload: {cam['rtsp']} to {cam['space']}" ) # shutdown current video captures for key, cam in intervals_per_cam.items(): cam['video'].stop()
from io import BytesIO from PIL import Image from pkcs7 import PKCS7Encoder import _thread import requests import json # from datetime import timedelta, datetime, date, time as t2 # import time from random import randint # set to your own subscription key value subscription_key = '99d0310d30c24046a148cbf795a34121' blob_service_client = BlobServiceClient.from_connection_string( "DefaultEndpointsProtocol=https;AccountName=oneteamblob;AccountKey=qcv7bSwg5vFNZRt1gY9XLPcv6OWKdKakKCj5znpUQRNQTPAOkLbhnCuZpt/1m4Gc9f5tV55x0CEzcVWjCubTaQ==;EndpointSuffix=core.windows.net" ) # cap = cv2.VideoCapture("rtsp://*****:*****@10.76.53.16:8554/stream0/out.h264") # Create a unique name for the container container_name = "facedetection" face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml') left_eye_cascade = cv2.CascadeClassifier('haarcascade_lefteye_2splits.xml') right_eye_cascade = cv2.CascadeClassifier('haarcascade_righteye_2splits.xml') frontalface_alt = cv2.CascadeClassifier('haarcascade_frontalface_alt.xml') def storeblob(name): print(name) blob_client = blob_service_client.get_blob_client(container=container_name,
import logging import azure.functions as func import json import os from azure.storage.blob import BlobServiceClient # # Azure Blob Integration # graph_connection_string = os.environ["AzureGraphStorage"] graph_container = os.environ["AzureGraphContainer"] blob_service_client = BlobServiceClient.from_connection_string( conn_str=graph_connection_string) graph_container_client = blob_service_client.get_container_client( container=graph_container) def upload_blob(img_temp_file, target_file, properties): metadata = { "parent_document_name": base64.encodebytes(bytes(properties[0], 'utf8')).decode("utf-8") } blob_client = graph_container_client.upload_blob(name=target_file, data=img_temp_file, metadata=metadata, overwrite=True) def main(req: func.HttpRequest) -> func.HttpResponse: logging.info('Graphout Python HTTP trigger function processed a request.')
def run(self): """Performs the file validation process.""" files_blob = BlobStorage( blob_storage_connection_string, base_imais, processed_imais, quarantined_imais, ).get_file_metadata_info() count_xml_files = len(files_blob) # init conditioner i = 0 # loop to read all files within the received list # invoke function to go over xml files while i < count_xml_files: # get name of the file for processing file_name = files_blob[i]["file_name"] log.info(f"initializing file type validation: {file_name}") print(f"initializing file type validation: {file_name}") # read each file in a loop read_file_spec = BlobClient.from_connection_string( conn_str=blob_storage_connection_string, container_name=base_imais, blob_name=file_name, ) # download file over stream # read entire file - blocking stream until completion # decode individual file to utf-8 - type [str] download_blob_file_stream = read_file_spec.download_blob() read_entire_file = download_blob_file_stream.readall() xml_file = read_entire_file.decode("utf-8") # parsing xml and creating object # finding root element # parsing string to xml tree = ET.ElementTree(ET.fromstring(xml_file)) xml_data = tree.getroot() # converting to string using [utf-8] # converting string to dictionary # get cfe root element xml_to_str = ET.tostring(xml_data, encoding="utf-8", method="xml") data_dict = dict(xmltodict.parse(xml_to_str)) # get the file type on the base containers # used to determine the type of the process get_file_type = list(data_dict.keys())[0] # print(get_file_type) log.info(f"file type: {get_file_type}") print(f"file type: {get_file_type}") ################## # model = CFe ################## if get_file_type != "CFe": # set connectivity to blob storage blob_service_client = BlobServiceClient.from_connection_string( conn_str=blob_storage_connection_string) # get name of the file for copy activity log.info(f"initializing copy of the file: {file_name}") print(f"initializing copy of the file: {file_name}") # build command to copy file [] # concat strings to build base http address # container and file name source_blob = http_base_container + file_name copied_blob = blob_service_client.get_blob_client( quarantined_imais, file_name) log.info( f"destination container of copied file: {quarantined_imais}" ) print( f"destination container of copied file: {quarantined_imais}" ) ############## # copy started ############## start = time.time() copied_blob.start_copy_from_url(source_blob) props = copied_blob.get_blob_properties() status = props.copy.status log.info( f"time taken to copy file [secs]: {round(time.time() - start, 2)}" ) log.info("copy status: " + status) print( f"time taken to copy file [secs]: {round(time.time() - start, 2)}" ) print("copy status: " + status) if status != "success": props = copied_blob.get_blob_properties() print(props.copy.status) copy_id = props.copy.id copied_blob.abort_copy(copy_id) props = copied_blob.get_blob_properties() print(props.copy.status) ############## # delete started ############## # instantiate a container client container_client = blob_service_client.get_container_client( base_imais) log.info(f"base location of deletion process: {base_imais}") print(f"base location of deletion process: {base_imais}") # delete blob files log.info( f"initializing deletion of the file from base location: {file_name}" ) print( f"initializing deletion of the file from base location: {file_name}" ) start = time.time() container_client.delete_blobs(file_name) log.info( f"time taken to delete file from source in [secs]: {round(time.time() - start, 2)}" ) print( f"time taken to delete file from source in [secs]: {round(time.time() - start, 2)}" ) # finish entire process i += 1
import json import config import os, uuid from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient, __version__ token = util.prompt_for_user_token(config.username, config.scope, client_id=config.client_id, client_secret=config.client_secret, redirect_uri=config.redirect_url) sp = spotipy.Spotify(auth=token) # blob client container_name = 'spotifyusertracksforallplaylists' blob_service_client = BlobServiceClient.from_connection_string(config.conn_str) container_client = blob_service_client.create_container(container_name) results = sp.user_playlists(config.username, limit=50) playlists = [] for i, item in enumerate(results['items']): playlist_name = item['name'] playlist_id = item['id'] playlist_info = {} playlist_info['name'] = playlist_name playlist_info['id'] = playlist_id playlist_tracks = sp.user_playlist_tracks(config.username, playlist_id)
def __init__(self, container_name): self.container_name = container_name self.blob_service_client = BlobServiceClient.from_connection_string( env.CONNECTION_STRING)
def _refresh_containers_cache_file(connection_string, container, cache_file, multiple_env=False, environment="base"): """ .. versionadded:: Sodium Downloads the entire contents of an Azure storage container to the local filesystem. :param connection_string: The connection string to use to access the specified Azure Blob Container. :param container: The name of the target Azure Blob Container. :param cache_file: The path of where the file will be cached. :param multiple_env: Specifies whether the pillar should interpret top level folders as pillar environments. :param environment: Specifies which environment the container represents when in single environment mode. This is ignored if multiple_env is set as True. """ try: # Create the BlobServiceClient object which will be used to create a container client blob_service_client = BlobServiceClient.from_connection_string( connection_string) # Create the ContainerClient object container_client = blob_service_client.get_container_client(container) except Exception as exc: # pylint: disable=broad-except log.error("Exception: %s", exc) return False metadata = {} def _walk_blobs(saltenv="base", prefix=None): # Walk the blobs in the container with a generator blob_list = container_client.walk_blobs(name_starts_with=prefix) # Iterate over the generator while True: try: blob = next(blob_list) except StopIteration: break log.debug("Raw blob attributes: %s", blob) # Directories end with "/". if blob.name.endswith("/"): # Recurse into the directory _walk_blobs(prefix=blob.name) continue if multiple_env: saltenv = "base" if (not prefix or prefix == ".") else prefix[:-1] if saltenv not in metadata: metadata[saltenv] = {} if container not in metadata[saltenv]: metadata[saltenv][container] = [] metadata[saltenv][container].append(blob) _walk_blobs(saltenv=environment) # write the metadata to disk if os.path.isfile(cache_file): os.remove(cache_file) log.debug("Writing Azure blobs pillar cache file") with salt.utils.files.fopen(cache_file, "wb") as fp_: pickle.dump(metadata, fp_) return metadata
def __init__( self, name: str, datasource_name: str, container: str, execution_engine: Optional[ExecutionEngine] = None, default_regex: Optional[dict] = None, sorters: Optional[list] = None, name_starts_with: str = "", delimiter: str = "/", azure_options: Optional[dict] = None, batch_spec_passthrough: Optional[dict] = None, ): """ InferredAssetAzureDataConnector for connecting to Azure Blob Storage. Args: name (str): required name for data_connector datasource_name (str): required name for datasource container (str): container for Azure Blob Storage execution_engine (ExecutionEngine): optional reference to ExecutionEngine default_regex (dict): optional regex configuration for filtering data_references sorters (list): optional list of sorters for sorting data_references name_starts_with (str): Azure prefix delimiter (str): Azure delimiter azure_options (dict): wrapper object for **kwargs batch_spec_passthrough (dict): dictionary with keys that will be added directly to batch_spec """ logger.debug(f'Constructing InferredAssetAzureDataConnector "{name}".') super().__init__( name=name, datasource_name=datasource_name, execution_engine=execution_engine, default_regex=default_regex, sorters=sorters, batch_spec_passthrough=batch_spec_passthrough, ) self._container = container self._name_starts_with = FilePathDataConnector.sanitize_prefix( name_starts_with) self._delimiter = delimiter if azure_options is None: azure_options = {} # Thanks to schema validation, we are guaranteed to have one of `conn_str` or `account_url` to # use in authentication (but not both). If the format or content of the provided keys is invalid, # the assignment of `self._account_name` and `self._azure` will fail and an error will be raised. conn_str: Optional[str] = azure_options.get("conn_str") account_url: Optional[str] = azure_options.get("account_url") assert bool(conn_str) ^ bool( account_url ), "You must provide one of `conn_str` or `account_url` to the `azure_options` key in your config (but not both)" try: if conn_str is not None: self._account_name = re.search(r".*?AccountName=(.+?);.*?", conn_str).group(1) self._azure = BlobServiceClient.from_connection_string( **azure_options) elif account_url is not None: self._account_name = re.search( r"(?:https?://)?(.+?).blob.core.windows.net", account_url).group(1) self._azure = BlobServiceClient(**azure_options) except (TypeError, AttributeError): raise ImportError( "Unable to load Azure BlobServiceClient (it is required for InferredAssetAzureDataConnector). \ Please ensure that you have provided the appropriate keys to `azure_options` for authentication." )
def __init__(self, container='ackbarstorage'): self.container = container self.conn_str = os.environ[ 'AML_PARAMETER_AZURE_STORAGE_CONNECTION_STRING'] self.blob_service_client = BlobServiceClient.from_connection_string( self.conn_str)
def __init__(self): """ Store BlobServiceClient """ connect_str = os.getenv('AZURE_STORAGE_CONNECTION_STRING') self.client = BlobServiceClient.from_connection_string(connect_str)
import logging from azure.storage.blob import BlobServiceClient root = logging.getLogger() root.setLevel(logging.INFO) handler = logging.StreamHandler(sys.stdout) root.addHandler(handler) if __name__ == "__main__": local_file_name = str(uuid.uuid4()) + ".txt" root.info("file name: {}".format(local_file_name)) blob_service_client = BlobServiceClient.from_connection_string( 'AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1O' 'UzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;' 'DefaultEndpointsProtocol=http;' 'BlobEndpoint={}/devstoreaccount1'.format("http://0.0.0.0:10000"), logging_enable=True) container_client = blob_service_client.get_container_client("test") try: container_client.create_container() except Exception: # ignore errors if container exists. pass blob_client = blob_service_client.get_blob_client(container="test", blob=local_file_name) data = b'a' * 4 * 1024 * 1024 blob_client.upload_blob(data, blob_type="BlockBlob") list_response = container_client.list_blobs() for l in list_response:
def get_container_client(connectionStr, containerName): serviceClient = BlobServiceClient.from_connection_string(connectionStr) containerClient = serviceClient.get_container_client(containerName) return containerClient, serviceClient
def connect(self): log.debug(output_messages['DEBUG_CONNECTING_TO_STORAGE'] % self._storage_type, class_name=AZURE_STORAGE_NAME) try: self._storage = BlobServiceClient.from_connection_string(self._account, connection_timeout=300) except Exception: raise RuntimeError(output_messages['INFO_UNABLE_AZURE_CONNECTION'])
epoch = datetime.utcfromtimestamp(0) auth_email = os.environ['auth_contact_email'] app = FastAPI( title="Work Zone Data Collection Tool Rest API", description='This API hosts work zone data collected by the WZDC ' + '(work zone data collection) tool. This data includes RSM messages, both in xml and uper (binary) formats. This API ' + f'requires an APi key in the header. Contact <a href="mailto: {auth_email}">{auth_email}</a> for more information on how to acquire and use an API key.', docs_url="/", ) storage_conn_str = os.environ['storage_connection_string'] sql_conn_str = os.environ['sql_connection_string'] blob_service_client = BlobServiceClient.from_connection_string( storage_conn_str) cnxn = pyodbc.connect(sql_conn_str) cursor = cnxn.cursor() storedProcFindKey = os.environ['stored_procedure_find_key'] # exec create_token @token_hash = '{0}', @type = '{1}', @expires = '{2}' storedProcCreateToken = os.environ['stored_procedure_create_token'] storedProcFindToken = os.environ['stored_procedure_find_token'] authorization_key_header = 'auth_key' container_name = os.environ['source_container_name'] file_types_dict = { 'rsm-xml': {
def procesaExcel(): class TabConfigSys: llave_Config1 = None llave_Config2 = None llave_Config3 = None llave_Config4 = None llave_Config5 = None class ObjetoJava: Parametros = None NombreSp = None Aplicativo = None DataBase = None config = TabConfigSys() config.llave_Config1 = 'SERVICIO' config.llave_Config2 = 'CONFIGURACION' config.llave_Config3 = 'SERVIDOR' config.llave_Config4 = 'URL' config.llave_Config5 = 'CONECTION' objJava = ObjetoJava() objJava.Parametros = json.dumps(config.__dict__) objJava.NombreSp = 'Configuracion' objJava.Aplicativo = APLICATIVO data = json.dumps(objJava.__dict__) headers = {'content-type': 'application/json'} r = requests.post(url=API_ENDPOINT, data=data, headers=headers) if r.status_code == requests.codes.ok: results = json.loads(r.text) else: print('Error al consultar el api') conn_str = results[0]["Dato_Char1"] container_name = results[0]["Dato_Char2"] blob_service_client = BlobServiceClient.from_connection_string( conn_str=conn_str) container = blob_service_client.get_container_client( container=container_name) generator = container.list_blobs() class ClientesDomiciliados: Identificacion = None EstadoExclusion = None AreaSolicitante = None FechaVigenciaExclu = None UsrModifica = None for blobs in generator: blob_client = blob_service_client.get_blob_client( container=container_name, blob=blobs.name) df = pnd.read_excel(blob_client.download_blob().readall(), sheet_name=results[0]["Dato_Char3"]) #print( df.columns.values[1]) for _, row in df.iterrows(): objClientDomi = ClientesDomiciliados() objClientDomi.Identificacion = row['Identificacion'] objClientDomi.EstadoExclusion = row['EstadoExclusion'] objClientDomi.AreaSolicitante = row['AreaSolicitante'] objClientDomi.FechaVigenciaExclu = row[ 'FechaVigenciaExclu']._date_repr objClientDomi.UsrModifica = 'ServProcesaExcel' #print(objClientDomi.Identificacion) objJava = ObjetoJava() objJava.Parametros = json.dumps(objClientDomi.__dict__) objJava.NombreSp = 'ExclusionDomiciliaciones' objJava.Aplicativo = APLICATIVO data = json.dumps(objJava.__dict__) headers = {'content-type': 'application/json'} r = requests.post(url=API_ENDPOINT, data=data, headers=headers) if r.status_code == requests.codes.ok: results = json.loads(r.text) #print(results) else: print('Error al consultar el api')
def __init__(self, parent, schema, name, endpoint=""): super().__init__(parent, name, schema, endpoint) con_string = self._get_secret_or_env("AZURE_STORAGE_CONNECTION_STRING") if con_string: self.bsc = BlobServiceClient.from_connection_string(con_string)
# Define variables container_name = "upload-test-6" upload_folder = "/home/magnus/Downloads/upload_f" # Read config config = configparser.ConfigParser() current_file_path = Path(os.path.abspath(__file__)) folder_directory = current_file_path.parent.parent config_path = os.path.join(folder_directory, "azure-config.ini") config.read(config_path) # Assign variables from config blob_account_url = config["azure_blob_storage"]["connection_string"] # List all containers in storage account blob_service_client = BlobServiceClient.from_connection_string( conn_str=blob_account_url) all_containers = blob_service_client.list_containers() container_list = [] print("\n\nContainers in storage account:") for container in all_containers: print("\t" + container["name"]) container_list.append(container["name"]) # List all files in upload folder print("\nFiles in folder", upload_folder + ":") for file_name in os.listdir(upload_folder): print("\t" + file_name) # Upload files def upload_blob(upload_file_path):
def main(): """Main function""" # Get args args = get_args() # container container_in = args.container_in container_out = args.container_out # Azure credentials sas_token = args.sas storage_account_name = args.storage azure_accounts = list() azure_accounts.append({ "storage": storage_account_name, "sas": sas_token, "container": container_in }) azure_accounts.append({ "storage": storage_account_name, "sas": sas_token, "container": container_out }) oauth_login = args.oauth_login oauth_client_id = args.oauth_client_id oauth_client_secret = args.oauth_client_secret # requires hadoop 3.2+ # azure_oauth = { # "endpoint": oauth_login, # "client-id": oauth_client_id, # "client-secret": oauth_client_secret # } azure_oauth = False # VM cores = args.vm_cores ram = args.vm_ram shuffle_partitions = args.shuffle_partitions # Date, state country = args.country state = args.state # process config roam_dist_stops = args.roam_dist_stops roam_dist_events = args.roam_dist_events # Path in - path out blob_in = f"wasbs://{container_in}@{storage_account_name}.blob.core.windows.net/stoplocation-v8_r70-s5-a70-h6/country={country}/year=2020/" if azure_oauth: # we can leverage abfss blob_in = f"abfss://{container_in}@{storage_account_name}.dfs.core.windows.net/stoplocation-v8_r70-s5-a70-h6/country={country}/year=2020/" timezones_in = "abfss://[email protected]/utils_states_timezones/" path_out_distinct = f"distinct_user_clusters-v8_r70-s5-a70-h6_clustered_{roam_dist_stops}m/country={country}" path_out_all = f"all_user_clusters-v8_r70-s5-a70-h6_clustered_{roam_dist_stops}m/country={country}" # config spark conf = getSparkConfig(cores, ram, shuffle_partitions, azure_accounts, azure_oauth) # set prop for handling partition columns as strings (fixes prefixes as int) conf.set("spark.sql.sources.partitionColumnTypeInference.enabled", "false") # Create spark session sc = SparkContext(conf=conf).getOrCreate() sqlContext = SQLContext(sc) spark = sqlContext.sparkSession # Init azure client blob_service_client = BlobServiceClient.from_connection_string( CONN_STRING.format(storage_account_name, sas_token)) # build keys, date is mandatory, prefix opt partition_key = f"state={state}" print("process "+partition_key) start_time = time.time() local_dir = LOCAL_PATH+partition_key print("write temp to "+local_dir) # cleanup local if exists if (os.path.isdir(local_dir)): map(os.unlink, (os.path.join(local_dir, f) for f in os.listdir(local_dir))) # Input dataset print("read dataset table") read_time = time.time() dfs = spark.read.format("parquet").load(blob_in) dfs_timezones = spark.read.format("parquet").load(timezones_in) # apply partition filter dfs_state = dfs.where(f"state = '{state}'") print("processing with spark") spark_time = time.time() w = Window().partitionBy('userId').orderBy('begin') dfs_state = add_distance_column(dfs_state, order_column='begin') dfs_state = dfs_state.fillna(0, subset=['next_travelled_distance']) dfs_state = dfs_state.withColumn('lag_next_travelled_distance', F.lag( col('next_travelled_distance')).over(w)) dfs_state = dfs_state.withColumn('lag_end', F.lag('end').over(w)) dfs_state = dfs_state.withColumn('rn', F.when(((col('lag_next_travelled_distance') != col('prev_travelled_distance')) | (col('prev_travelled_distance') > 0) | (col('lag_next_travelled_distance') > 0) | (col('distance_prev') > roam_dist_events) | ((F.dayofyear(col('begin')) - F.dayofyear(col('lag_end')) == 1) & (F.hour(col('begin')) < 6)) ) & ((col('lag_end').isNull()) | (col('lag_end') < col('begin'))), 1).otherwise(0)) # Remove prev_travelled distance when rn == 0 (it happens when lag_end and begin overlap) dfs_state = dfs_state.withColumn('prev_travelled_distance', F.when( col('rn') == 0, 0).otherwise(col('prev_travelled_distance'))) w = Window().partitionBy('userId').orderBy( 'begin').rangeBetween(Window.unboundedPreceding, 0) dfs_state = dfs_state.withColumn('group', F.sum('rn').over(w)) dfs_state = dfs_state.groupBy('userId', 'group', 'state').agg(F.mean('latitude').alias('latitude'), F.mean('longitude').alias( 'longitude'), F.min('begin').alias( 'begin'), F.max('end').alias('end')).drop('group') dfs_destinations = get_destinations(dfs_state, roam_dist=roam_dist_stops) dfs_destinations = dfs_destinations.withColumn( 'prefix', dfs_destinations.userId.substr(1, 2)) dfs_destinations = dfs_destinations.withColumn( 'dayofyear', F.dayofyear('begin')) dfs_destinations = dfs_destinations.withColumn('year', F.year('begin')) # dfs_destinations = dfs_destinations.withColumn('state', F.lit(state)) # Local time dfs_destinations.createOrReplaceTempView("dfs_destinations") dfs_destinations = spark.sql(""" SELECT dfs_destinations_distinct.*, geohash(clusterLatitude, clusterLongitude, 7) as geohash7 from dfs_destinations """) dfs_destinations = dfs_destinations.withColumn('geohash5', F.substring(col('geohash7'), 1, 5)) dfs_destinations = dfs_destinations.join(F.broadcast(dfs_timezones), on='geohash5').drop('geohash5') dfs_destinations = dfs_destinations.withColumn('local_begin', F.from_utc_timestamp(col('begin'), col('tzid'))) dfs_destinations = dfs_destinations.withColumn('offset', ( (col('local_begin').cast('long') - col('begin').cast('long')) / 3600).cast('int')).drop('local_begin') dfs_destinations.persist(StorageLevel.DISK_ONLY) # Write local_dir_all = local_dir + "/all/" dfs_destinations_all = dfs_destinations.select( 'prefix', 'userId', 'clusterId', 'begin', 'end', 'offset', 'year', 'dayofyear') dfs_destinations_all.repartition(256, "prefix", "year", "dayofyear").write.partitionBy( "prefix", "year", "dayofyear").format('parquet').mode('overwrite').save(local_dir_all) local_dir_distinct = local_dir+"/distinct/" dfs_destinations_distinct = dfs_destinations.select( 'prefix', 'userId', 'clusterId', 'clusterLatitude', 'clusterLongitude', 'geohash7', 'year').distinct() dfs_destinations_distinct.repartition(256, "prefix", "year").write.partitionBy( "prefix", "year").format('parquet').mode('overwrite').save(local_dir_distinct) dfs_destinations.unpersist() print("upload local data to azure") upload_time = time.time() # upload parts 1 "prefix/year" print(f"upload files for distinct") # upload with threads dfutures = [] with ThreadPoolExecutor(max_workers=THREADS) as executor: years = [2020] s_key = f"state={state}" for fprefix in enumerate_prefixes(): print(f"upload files for distinct: {fprefix}") prefix_dir = local_dir_distinct+"prefix="+fprefix prefix_key = f"prefix={fprefix}" for fyear in years: f_dir = prefix_dir + "/year="+str(fyear) f_key = prefix_key + "/year="+str(fyear) # print(f"read files for distinct from {f_dir}") if (os.path.isdir(f_dir)): files = [filename for filename in os.listdir( f_dir) if filename.startswith("part-")] if len(files) > 0: for file_local in files: file_path = f_dir+"/"+file_local part_num = int(file_local.split('-')[1]) part_key = '{:05d}'.format(part_num) # fix name as static hash to be reproducible filename_hash = hashlib.sha1( str.encode(f_key+s_key+part_key)).hexdigest() blob_key = "{}/{}/{}/part-{}-{}.snappy.parquet".format( path_out_distinct, f_key, s_key, part_key, filename_hash) # print("upload " + file_path + " to " + container_out+":"+blob_key) # upload_blob(blob_service_client,container_out, blob_key, file_path) future = executor.submit( upload_blob, blob_service_client,container_out, blob_key, file_path) dfutures.append(future) # else: # print(f"no files to upload for {f_key}") # else: # print(f"missing partition for {f_key}") # end of loop, wait for futures for future in dfutures: bkey = future.result() # ensure we wait all tasks # TODO check if all done ddone = concurrent.futures.wait(dfutures) # upload parts 2 "prefix/year/dayofyear" print(f"upload files for all") years = [2020] s_key = f"state={state}" # upload with threads afutures = [] with ThreadPoolExecutor(max_workers=THREADS) as executor: for fprefix in enumerate_prefixes(): print(f"upload files for all: {fprefix}") prefix_dir = local_dir_all+"prefix="+fprefix prefix_key = f"prefix={fprefix}" for fyear in years: f_dir = prefix_dir + "/year="+str(fyear) f_key = prefix_key + "/year="+str(fyear) # print(f"read files for all from {f_dir}") for fday in range(1, 367): d_dir = f_dir + "/dayofyear="+str(fday) d_key = f_key + "/dayofyear="+str(fday) # print(f"read files for all from {d_dir}") if (os.path.isdir(d_dir)): files = [filename for filename in os.listdir( d_dir) if filename.startswith("part-")] if len(files) > 0: for file_local in files: file_path = d_dir+"/"+file_local part_num = int(file_local.split('-')[1]) part_key = '{:05d}'.format(part_num) # fix name as static hash to be reproducible filename_hash = hashlib.sha1( str.encode(d_key+s_key+part_key)).hexdigest() blob_key = "{}/{}/{}/part-{}-{}.snappy.parquet".format( path_out_all, d_key, s_key, part_key, filename_hash) # print("upload " + file_path + " to " + container_out+":"+blob_key) # upload_blob(blob_service_client,container_out, blob_key, file_path) future = executor.submit( upload_blob, blob_service_client,container_out, blob_key, file_path) afutures.append(future) # else: # print(f"no files to upload for {d_key}") # else: # print(f"missing partition for {d_key}") # end of loop, wait for futures for future in afutures: bkey = future.result() # ensure we wait all tasks # TODO check if all done adone = concurrent.futures.wait(afutures) print("--- {} seconds elapsed ---".format(int(time.time() - start_time))) print() shutdown_time = time.time() spark.stop() end_time = time.time() print("Done in {} seconds (read:{} spark:{} upload:{} shutdown:{})".format( int(end_time - start_time), int(spark_time - read_time), int(upload_time - spark_time), int(shutdown_time - upload_time), int(end_time - shutdown_time) )) print('Done.')
# Pass in a container name and get names of all blobs in the container. import os from dotenv import load_dotenv from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient load_dotenv() ConnectionString = os.getenv("ConnectionString") # """The below code prints all the blobs in the container(bucket)""" blob_service_client = BlobServiceClient.from_connection_string( ConnectionString) def blobs(container_name): """ Returns blob list from azure blob storage container. Usage : ``` for blob in blobs("container_name"): print("\\t" + blob.name) ``` """ container_client = blob_service_client.get_container_client(container_name) blob_list = container_client.list_blobs() return blob_list def getData(container_name, blob_name):
logger.info("Source: " + sourceProtocol + " path:" + sourcePath) logger.info("Destination " + destinationProtocol + " path:" + destinationPath) protocolConnection = "https" if "RD_CONFIG_ACCOUNT_NAME" in os.environ: account_name = os.environ["RD_CONFIG_ACCOUNT_NAME"] if "RD_CONFIG_ACCESS_KEY" in os.environ: access_key = os.environ["RD_CONFIG_ACCESS_KEY"] if "RD_CONFIG_PROTOCOL" in os.environ: protocolConnection = os.environ["RD_CONFIG_PROTOCOL"] connection_string = "DefaultEndpointsProtocol={};AccountName={};AccountKey={};EndpointSuffix=core.windows.net".format( protocolConnection, account_name, access_key) blob_service_client = BlobServiceClient.from_connection_string(conn_str=connection_string, logging_enable=True) container_client = blob_service_client.get_container_client(args.container) try: container_client.create_container() except: logger.info("Container exists") logger.info("") if sourceProtocol == "azure": source_list = get_blobs_from_container(sourcePath) else: source_list = get_files_from_folder(sourcePath) if destinationProtocol == "azure":
def blob_connect(): connect_string = 'DefaultEndpointsProtocol=https;AccountName=respacimages;AccountKey=ges4SuaECA10B++lZjlNfhTTorcRkqZXH9+PmyaBG6kFCWH2esd3dE5KHlp63hkHNCPw2cT7bv/bfu2TyRFJEg==;EndpointSuffix=core.windows.net' return BlobServiceClient.from_connection_string(connect_string)
def __init__(self): self.azure_service_client = BlobServiceClient.from_connection_string(settings.DIFFGRAM_AZURE_CONNECTION_STRING) self.azure_container_name = settings.DIFFGRAM_AZURE_CONTAINER_NAME self.azure_container_name_ml = settings.ML__DIFFGRAM_AZURE_CONTAINER_NAME
def __init__(self): # blob 컨테이너에 연결하기 위한 클래스 self.blob_service_client = BlobServiceClient.from_connection_string( connect_str) self.container_client = self.blob_service_client.get_container_client( BLOB_CONTAINER)
def upload_image(wine_id): # Credit: https://docs.microsoft.com/en-us/azure/storage/blobs/storage-quickstart-blobs-python # Retrieve the connection string for use with the application. The storage # connection string is stored in an environment variable on the machine # running the application called AZURE_STORAGE_CONNECTION_STRING. If the environment variable is # created after the application is launched in a console or with Visual Studio, # the shell or application needs to be closed and reloaded to take the # environment variable into account. connect_str = os.getenv('AZURE_STORAGE_CONNECTION_STRING') # Get the user unput image file Credit: https://pythonise.com/series/learning-flask/flask-uploading-files if request.method == "POST": if request.files: image = request.files["filename"] if image.filename == "": the_wine = mongo.db.wines.find_one({"_id": ObjectId(wine_id)}) return render_template('image_upload.html', wine=the_wine, upload_error='No image selected', user_name='User: '******'username']) if allowed_image(image.filename): filename = secure_filename(image.filename) image.save(os.path.join(app.config["IMAGE_UPLOADS"], filename)) else: print("That file extension is not allowed") the_wine = mongo.db.wines.find_one({"_id": ObjectId(wine_id)}) return render_template( 'image_upload.html', wine=the_wine, upload_error= 'Incorrect file type selected - must be: "JPEG", "JPG", "PNG" or "GIF"', user_name='User: '******'username']) # Get static file and save to upload_images directory to upload local_path = "./upload_images" local_file_name = filename upload_file_path = os.path.join(local_path, local_file_name) # Create the BlobServiceClient object which will be used to create a container client blob_service_client = BlobServiceClient.from_connection_string(connect_str) # Set the name for the container container_name = "caveduvins" container_client = ContainerClient.from_connection_string( conn_str=connect_str, container_name=container_name) # Set the upload file name upload_file_name = wine_id + str(uuid.uuid4()) + ".jpg" # Create a blob client using the local file name as the name for the blob blob_client = blob_service_client.get_blob_client(container=container_name, blob=upload_file_name) # Upload the created file with open(upload_file_path, "rb") as data: blob_client.upload_blob(data) # Delete the file from upload_images directory os.remove(upload_file_path) # create a url for the image image_url = "https://mystorageacct180671.blob.core.windows.net/" + container_name + "/" + upload_file_name wineid = wine_id flash("Image uploaded") if 'username' in session: user_return = 'User: '******'username'] else: user_return = 'Cave du Vins' return render_template( "index.html", update=mongo.db.wines.update( {'_id': ObjectId(wineid)}, # Credit: https://stackoverflow.com/questions/10290621/ # how-do-i-partially-update-an-object-in-mongodb-so-the-new- # object-will-overlay {"$set": { 'photo_url': image_url }}), user_name=user_return, colours=mongo.db.colours.find(), country=mongo.db.country.find(), region=mongo.db.region.find(), grape=mongo.db.grape.find(), results_winename="", results_vintage="", results_colour="", results_country="", results_region="", results_grape="", results=mongo.db.wines.find({'_id': ObjectId(wineid)}))
# pip install azure-storage-blob from azure.storage.blob import ContainerClient, BlobServiceClient, BlobClient, StandardBlobTier, PremiumPageBlobTier cs = "" block_service_client: BlobServiceClient = BlobServiceClient.from_connection_string( cs) account_info = block_service_client.get_account_information() print(account_info) CONTAINER_NAME = "kontener1" BLOB_NAME = "auto.jpg" try: container_client: ContainerClient = ContainerClient.from_connection_string( cs, CONTAINER_NAME) container_client.create_container() container_client.set_container_metadata({"departament": "IT"}) print(container_client.get_container_properties().metadata) except Exception as exc: print(exc) blob_client: BlobClient = BlobClient.from_connection_string( conn_str=cs, container_name=CONTAINER_NAME, blob_name=BLOB_NAME) # zapisz with open("../Dzien02/images/WY3371X.jpg", "rb") as fd: blob_client.upload_blob(fd, overwrite=True) #blob_client.set_standard_blob_tier(StandardBlobTier.Cool) #blob_client.set_premium_page_blob_tier(PremiumPageBlobTier.)
import azure.functions as func import datetime import logging import datetime from typing import List from azure.storage.blob import BlobServiceClient, BlobProperties from config import DefaultConfig CONFIG = DefaultConfig() blob_service_client = None try: blob_service_client = BlobServiceClient.from_connection_string( CONFIG.STORAGE_CONNECTION) except Exception as e: logging.exception(e) def main( mytimer: func.TimerRequest ) -> None: # sould be executed once at hour (0 0 */1 * * *) utc_timestamp = datetime.datetime.utcnow().replace( tzinfo=datetime.timezone.utc).isoformat() if mytimer.past_due: logging.info('The timer is past due!') delete() logging.info('Python timer trigger function ran at %s', utc_timestamp)
print( 'The environment variable AZ_BLOB_CONNECTION_STRING could not be verified, this variable is required ' 'and needs to contain the Azure blob connection string') sys.exit(1) # Verify AZ_BLOB_CONTAINER env variable, this is the blob container value if not AZ_BLOB_CONTAINER: print( 'The environment variable AZ_BLOB_CONTAINER could not be verified, this variable is required ' 'and needs to contain the value for Azure blob container') sys.exit(1) # Attempt creating the blob container if does not exist already # Create the BlobServiceClient object which will be used to create a container client blob_service_client = BlobServiceClient.from_connection_string( AZ_BLOB_CONNECTION_STRING) if __name__ == "__main__": searchable = False if len(sys.argv) != 4: sys.exit( 'usage: python3 AzDownloadBlob.oy <container_name> <blob_name> <target_file>' ) container_name = sys.argv[1] print("container is %s" % container_name) blob_name = sys.argv[2] print("blob_name is %s" % blob_name) target_file = sys.argv[3]
all_packages = get_targets(args.target_package_list) working_directory = os.path.abspath(args.working_folder) download_dir = os.path.join(working_directory, "download") unzip_directory = os.path.join(working_directory, "unzip") upload_directory = os.path.join(working_directory, "upload") logging.info("Targeted Packages: {}".format(all_packages)) logging.info("Targeted Working Directory: {}".format(working_directory)) logging.info("Prepping Working Environment") prep_env([download_dir, unzip_directory, upload_directory]) # download the sdist format for specifier in all_packages: download_package(specifier, download_dir) # unzip, tar repackage_data(download_dir, unzip_directory, upload_directory) # instantiate blob client and upload data service = BlobServiceClient.from_connection_string( conn_str=args.connection_string) container_client = service.get_container_client(DESTINATION_CONTAINER) results = upload_data(upload_directory, container_client, service.primary_endpoint) # output URI links for each blob logging.info("Uploaded {} sdists.".format(len(results))) for uri in results: print(uri)
def main(mytimer: func.TimerRequest, outputBlob: func.Out[str]) -> None: utc_timestamp = datetime.datetime.utcnow().replace( tzinfo=datetime.timezone.utc).isoformat() url = 'https://www.vegvesen.no/ws/no/vegvesen/veg/trafikkpublikasjon/vaer/2/GetMeasuredWeatherData' user = os.environ['Vegvesen_user'] pwd = os.environ['Vegvesen_pwd'] blob_service_client = BlobServiceClient.from_connection_string( os.environ['Blockblob']) f = blob_service_client.get_blob_client("actuals", 'weather_observations.json') road_ids = {"SN79791": 80, "SN84905": 323, "SN94195": 228} df_out = pd.DataFrame(columns = \ ['Station_id' ,'observation_time' , 'air_temp' , 'relative_humidity' , 'dew_point_temp' , 'wind_speed' , 'wind_bearing' , 'min_visibility_dist' , 'precipitation_intensity' , 'road_friction' , 'road_temp' ] ) response = requests.get(url, auth=(user, pwd)) soup = BeautifulSoup(response.content, 'xml') for station_id, road_id in road_ids.items(): site = soup.find('measurementSiteReference', id=road_id).parent df_out = df_out.append( { "Station_id": station_id, 'observation_time': np.nan if site.find('measurementTimeDefault') is None else parse( site.find('measurementTimeDefault').get_text()).astimezone( timezone('Etc/UTC')), 'air_temp': np.nan if site.find('airTemperature') is None else site.find('airTemperature').string, 'relative_humidity': np.nan if site.find('relativeHumidity') is None else site.find('relativeHumidity').string, 'dew_point_temp': np.nan if site.find('dewPointTemperature') is None else site.find('dewPointTemperature').string, 'wind_speed': np.nan if site.find('windSpeed') is None else site.find('windSpeed').string, 'wind_bearing': np.nan if site.find('windDirectionBearing') is None else site.find('windDirectionBearing').string, 'min_visibility_dist': np.nan if site.find('minimumVisibilityDistance') is None else site.find('minimumVisibilityDistance').string, 'precipitation_intensity': np.nan if site.find('precipitationIntensity') is None else site.find('precipitationIntensity').string, 'road_friction': np.nan if site.find('friction') is None else site.find('friction').string, 'road_temp': np.nan if site.find('roadSurfaceTemperature') is None else site.find('roadSurfaceTemperature').string }, ignore_index=True) outputBlob.set( df_out.to_json(orient='records', force_ascii=False, indent=2))
def main(argv): dimhelp = 'fragment size (samples) in {} direction' parser = argparse.ArgumentParser( prog='upload', description='Upload cubes to oneseismic storage', epilog='%(prog)s relies on azure connection strings, see {}'.format( 'https://docs.microsoft.com/azure/storage/common/storage-configure-connection-string' ), ) parser.add_argument('meta', type=str, help='metadata json') parser.add_argument('input', type=str, help='input SEG-Y file') parser.add_argument( '--subcube-dim-0', '-i', type=int, default=120, metavar='I', help=dimhelp.format('X'), ) parser.add_argument( '--subcube-dim-1', '-j', type=int, default=120, metavar='J', help=dimhelp.format('Y'), ) parser.add_argument( '--subcube-dim-2', '-k', type=int, default=120, metavar='K', help=dimhelp.format('Z'), ) parser.add_argument( '--connection-string', '-s', metavar='', type=str, help=''' Azure connection string for blob store auth. Can also be set with the env-var AZURE_CONNECTION_STRING ''', ) args = parser.parse_args(argv) params = { 'subcube-dims': ( args.subcube_dim_0, args.subcube_dim_1, args.subcube_dim_2, ), } if args.meta == '-': meta = json.load(sys.stdin) else: with open(args.meta) as f: meta = json.load(f) connection_string = os.environ.get('AZURE_CONNECTION_STRING', None) if args.connection_string: connection_string = args.connection_string if connection_string is None: problem = 'No azure connection string' solution = 'use --connection-string or env-var AZURE_CONNECTION_STRING' sys.exit('{} - {}'.format(problem, solution)) blob = BlobServiceClient.from_connection_string(connection_string) with open(args.input, 'rb') as input: upload(params, meta, input, blob)