Exemplo n.º 1
0
def spin_camera_loop(messenger, shared_mem_file):

    intervals_per_cam = dict()

    if camera_config["blob"] is not None:
        blob_service_client = BlobServiceClient.from_connection_string(
            camera_config["blob"])
        logging.info(
            f"Created blob service client: {blob_service_client.account_name}")

    while not received_twin_patch:

        for key, cam in camera_config["cameras"].items():

            if not cam["enabled"]:
                continue

            curtime = time.time()

            if key not in intervals_per_cam:
                intervals_per_cam[key] = dict()
                current_source = intervals_per_cam[key]
                current_source['timer'] = 0
                current_source['rtsp'] = cam['rtsp']
                current_source['interval'] = float(cam['interval'])
                current_source['video'] = VideoStream(cam['rtsp'],
                                                      float(cam['interval']))
                current_source['video'].start()

            # this will keep track of how long we need to wait between
            # bursts of activity

            # find the current video streamer
            for keycurrent, camcurrent in intervals_per_cam.items():
                if camcurrent['rtsp'] == cam['rtsp']:
                    video_streamer = camcurrent['video']
            # video_streamer = current_source['video']

            # TODO fix multi video sources time issue.
            # not enough time has passed since the last collection
            if curtime - current_source['timer'] < float(cam['interval']):
                continue

            current_source['timer'] = curtime

            # block until we get something
            frame_id, img = video_streamer.get_frame_with_id()
            if img is None:
                logging.warn("No frame retrieved. Is video running?")
                continue

            logging.info(f"Grabbed frame {frame_id} from {cam['rtsp']}")

            camId = f"{cam['space']}/{key}"

            # send to blob storage and retrieve the timestamp by which we will identify the video
            curtimename = None
            perf = None
            if camera_config["blob"] is not None:
                start_upload = time.time()
                curtimename, _ = send_img_to_blob(blob_service_client, img,
                                                  camId)
                total_upload = time.time() - start_upload
                perf = {"upload": total_upload}

            detections = []

            if cam['detector'] is not None and cam[
                    'inference'] is not None and cam['inference']:
                start_inf = time.time()
                res = infer(cam['detector'], img, frame_id, curtimename,
                            shared_mem_file)
                total_inf = time.time() - start_inf

                detections = res["detections"]
                perf = {**perf, **res["perf"]}
                perf["imgencode"] = total_inf - perf["imgprep"] - perf[
                    "detection"]
                logging.info(f"perf: {perf}")

            # message the image capture upstream
            if curtimename is not None:
                messenger.send_image_and_detection(camId, curtimename,
                                                   frame_id, detections)
                messenger.send_perf(camId, curtimename, frame_id, perf)
                logging.info(
                    f"Notified of image upload: {cam['rtsp']} to {cam['space']}"
                )

    # shutdown current video captures
    for key, cam in intervals_per_cam.items():
        cam['video'].stop()
Exemplo n.º 2
0
from io import BytesIO
from PIL import Image
from pkcs7 import PKCS7Encoder

import _thread

import requests
import json
# from datetime import  timedelta, datetime, date, time as t2
# import time
from random import randint
# set to your own subscription key value
subscription_key = '99d0310d30c24046a148cbf795a34121'

blob_service_client = BlobServiceClient.from_connection_string(
    "DefaultEndpointsProtocol=https;AccountName=oneteamblob;AccountKey=qcv7bSwg5vFNZRt1gY9XLPcv6OWKdKakKCj5znpUQRNQTPAOkLbhnCuZpt/1m4Gc9f5tV55x0CEzcVWjCubTaQ==;EndpointSuffix=core.windows.net"
)
# cap = cv2.VideoCapture("rtsp://*****:*****@10.76.53.16:8554/stream0/out.h264")

# Create a unique name for the container
container_name = "facedetection"

face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
left_eye_cascade = cv2.CascadeClassifier('haarcascade_lefteye_2splits.xml')
right_eye_cascade = cv2.CascadeClassifier('haarcascade_righteye_2splits.xml')
frontalface_alt = cv2.CascadeClassifier('haarcascade_frontalface_alt.xml')


def storeblob(name):
    print(name)
    blob_client = blob_service_client.get_blob_client(container=container_name,
Exemplo n.º 3
0
import logging
import azure.functions as func
import json
import os
from azure.storage.blob import BlobServiceClient

#
# Azure Blob Integration
#
graph_connection_string = os.environ["AzureGraphStorage"]
graph_container = os.environ["AzureGraphContainer"]

blob_service_client = BlobServiceClient.from_connection_string(
    conn_str=graph_connection_string)
graph_container_client = blob_service_client.get_container_client(
    container=graph_container)


def upload_blob(img_temp_file, target_file, properties):
    metadata = {
        "parent_document_name":
        base64.encodebytes(bytes(properties[0], 'utf8')).decode("utf-8")
    }
    blob_client = graph_container_client.upload_blob(name=target_file,
                                                     data=img_temp_file,
                                                     metadata=metadata,
                                                     overwrite=True)


def main(req: func.HttpRequest) -> func.HttpResponse:
    logging.info('Graphout Python HTTP trigger function processed a request.')
Exemplo n.º 4
0
    def run(self):
        """Performs the file validation process."""
        files_blob = BlobStorage(
            blob_storage_connection_string,
            base_imais,
            processed_imais,
            quarantined_imais,
        ).get_file_metadata_info()
        count_xml_files = len(files_blob)

        # init conditioner
        i = 0

        # loop to read all files within the received list
        # invoke function to go over xml files
        while i < count_xml_files:

            # get name of the file for processing
            file_name = files_blob[i]["file_name"]
            log.info(f"initializing file type validation: {file_name}")
            print(f"initializing file type validation: {file_name}")

            # read each file in a loop
            read_file_spec = BlobClient.from_connection_string(
                conn_str=blob_storage_connection_string,
                container_name=base_imais,
                blob_name=file_name,
            )

            # download file over stream
            # read entire file - blocking stream until completion
            # decode individual file to utf-8 - type [str]
            download_blob_file_stream = read_file_spec.download_blob()
            read_entire_file = download_blob_file_stream.readall()
            xml_file = read_entire_file.decode("utf-8")

            # parsing xml and creating object
            # finding root element
            # parsing string to xml
            tree = ET.ElementTree(ET.fromstring(xml_file))
            xml_data = tree.getroot()

            # converting to string using [utf-8]
            # converting string to dictionary
            # get cfe root element
            xml_to_str = ET.tostring(xml_data, encoding="utf-8", method="xml")
            data_dict = dict(xmltodict.parse(xml_to_str))

            # get the file type on the base containers
            # used to determine the type of the process
            get_file_type = list(data_dict.keys())[0]
            # print(get_file_type)
            log.info(f"file type: {get_file_type}")
            print(f"file type: {get_file_type}")

            ##################
            # model = CFe
            ##################

            if get_file_type != "CFe":

                # set connectivity to blob storage
                blob_service_client = BlobServiceClient.from_connection_string(
                    conn_str=blob_storage_connection_string)

                # get name of the file for copy activity
                log.info(f"initializing copy of the file: {file_name}")
                print(f"initializing copy of the file: {file_name}")

                # build command to copy file []
                # concat strings to build base http address
                # container and file name
                source_blob = http_base_container + file_name

                copied_blob = blob_service_client.get_blob_client(
                    quarantined_imais, file_name)
                log.info(
                    f"destination container of copied file: {quarantined_imais}"
                )
                print(
                    f"destination container of copied file: {quarantined_imais}"
                )

                ##############
                # copy started
                ##############
                start = time.time()
                copied_blob.start_copy_from_url(source_blob)
                props = copied_blob.get_blob_properties()
                status = props.copy.status
                log.info(
                    f"time taken to copy file [secs]: {round(time.time() - start, 2)}"
                )
                log.info("copy status: " + status)
                print(
                    f"time taken to copy file [secs]: {round(time.time() - start, 2)}"
                )
                print("copy status: " + status)

                if status != "success":
                    props = copied_blob.get_blob_properties()
                    print(props.copy.status)
                    copy_id = props.copy.id
                    copied_blob.abort_copy(copy_id)
                    props = copied_blob.get_blob_properties()
                    print(props.copy.status)

                ##############
                # delete started
                ##############
                # instantiate a container client
                container_client = blob_service_client.get_container_client(
                    base_imais)
                log.info(f"base location of deletion process: {base_imais}")
                print(f"base location of deletion process: {base_imais}")

                # delete blob files
                log.info(
                    f"initializing deletion of the file from base location: {file_name}"
                )
                print(
                    f"initializing deletion of the file from base location: {file_name}"
                )
                start = time.time()
                container_client.delete_blobs(file_name)
                log.info(
                    f"time taken to delete file from source in [secs]: {round(time.time() - start, 2)}"
                )
                print(
                    f"time taken to delete file from source in [secs]: {round(time.time() - start, 2)}"
                )

            # finish entire process
            i += 1
import json
import config
import os, uuid
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient, __version__

token = util.prompt_for_user_token(config.username,
                                   config.scope,
                                   client_id=config.client_id,
                                   client_secret=config.client_secret,
                                   redirect_uri=config.redirect_url)

sp = spotipy.Spotify(auth=token)

# blob client
container_name = 'spotifyusertracksforallplaylists'
blob_service_client = BlobServiceClient.from_connection_string(config.conn_str)

container_client = blob_service_client.create_container(container_name)

results = sp.user_playlists(config.username, limit=50)

playlists = []
for i, item in enumerate(results['items']):
    playlist_name = item['name']
    playlist_id = item['id']

    playlist_info = {}
    playlist_info['name'] = playlist_name
    playlist_info['id'] = playlist_id

    playlist_tracks = sp.user_playlist_tracks(config.username, playlist_id)
 def __init__(self, container_name):
     self.container_name = container_name
     self.blob_service_client = BlobServiceClient.from_connection_string(
         env.CONNECTION_STRING)
Exemplo n.º 7
0
def _refresh_containers_cache_file(connection_string,
                                   container,
                                   cache_file,
                                   multiple_env=False,
                                   environment="base"):
    """
    .. versionadded:: Sodium

    Downloads the entire contents of an Azure storage container to the local filesystem.

    :param connection_string: The connection string to use to access the specified Azure Blob Container.

    :param container: The name of the target Azure Blob Container.

    :param cache_file: The path of where the file will be cached.

    :param multiple_env: Specifies whether the pillar should interpret top level folders as pillar environments.

    :param environment: Specifies which environment the container represents when in single environment mode. This is
        ignored if multiple_env is set as True.

    """
    try:
        # Create the BlobServiceClient object which will be used to create a container client
        blob_service_client = BlobServiceClient.from_connection_string(
            connection_string)

        # Create the ContainerClient object
        container_client = blob_service_client.get_container_client(container)
    except Exception as exc:  # pylint: disable=broad-except
        log.error("Exception: %s", exc)
        return False

    metadata = {}

    def _walk_blobs(saltenv="base", prefix=None):
        # Walk the blobs in the container with a generator
        blob_list = container_client.walk_blobs(name_starts_with=prefix)

        # Iterate over the generator
        while True:
            try:
                blob = next(blob_list)
            except StopIteration:
                break

            log.debug("Raw blob attributes: %s", blob)

            # Directories end with "/".
            if blob.name.endswith("/"):
                # Recurse into the directory
                _walk_blobs(prefix=blob.name)
                continue

            if multiple_env:
                saltenv = "base" if (not prefix
                                     or prefix == ".") else prefix[:-1]

            if saltenv not in metadata:
                metadata[saltenv] = {}

            if container not in metadata[saltenv]:
                metadata[saltenv][container] = []

            metadata[saltenv][container].append(blob)

    _walk_blobs(saltenv=environment)

    # write the metadata to disk
    if os.path.isfile(cache_file):
        os.remove(cache_file)

    log.debug("Writing Azure blobs pillar cache file")

    with salt.utils.files.fopen(cache_file, "wb") as fp_:
        pickle.dump(metadata, fp_)

    return metadata
    def __init__(
        self,
        name: str,
        datasource_name: str,
        container: str,
        execution_engine: Optional[ExecutionEngine] = None,
        default_regex: Optional[dict] = None,
        sorters: Optional[list] = None,
        name_starts_with: str = "",
        delimiter: str = "/",
        azure_options: Optional[dict] = None,
        batch_spec_passthrough: Optional[dict] = None,
    ):
        """
        InferredAssetAzureDataConnector for connecting to Azure Blob Storage.

        Args:
            name (str): required name for data_connector
            datasource_name (str): required name for datasource
            container (str): container for Azure Blob Storage
            execution_engine (ExecutionEngine): optional reference to ExecutionEngine
            default_regex (dict): optional regex configuration for filtering data_references
            sorters (list): optional list of sorters for sorting data_references
            name_starts_with (str): Azure prefix
            delimiter (str): Azure delimiter
            azure_options (dict): wrapper object for **kwargs
            batch_spec_passthrough (dict): dictionary with keys that will be added directly to batch_spec
        """
        logger.debug(f'Constructing InferredAssetAzureDataConnector "{name}".')

        super().__init__(
            name=name,
            datasource_name=datasource_name,
            execution_engine=execution_engine,
            default_regex=default_regex,
            sorters=sorters,
            batch_spec_passthrough=batch_spec_passthrough,
        )

        self._container = container
        self._name_starts_with = FilePathDataConnector.sanitize_prefix(
            name_starts_with)
        self._delimiter = delimiter

        if azure_options is None:
            azure_options = {}

        # Thanks to schema validation, we are guaranteed to have one of `conn_str` or `account_url` to
        # use in authentication (but not both). If the format or content of the provided keys is invalid,
        # the assignment of `self._account_name` and `self._azure` will fail and an error will be raised.
        conn_str: Optional[str] = azure_options.get("conn_str")
        account_url: Optional[str] = azure_options.get("account_url")
        assert bool(conn_str) ^ bool(
            account_url
        ), "You must provide one of `conn_str` or `account_url` to the `azure_options` key in your config (but not both)"

        try:
            if conn_str is not None:
                self._account_name = re.search(r".*?AccountName=(.+?);.*?",
                                               conn_str).group(1)
                self._azure = BlobServiceClient.from_connection_string(
                    **azure_options)
            elif account_url is not None:
                self._account_name = re.search(
                    r"(?:https?://)?(.+?).blob.core.windows.net",
                    account_url).group(1)
                self._azure = BlobServiceClient(**azure_options)
        except (TypeError, AttributeError):
            raise ImportError(
                "Unable to load Azure BlobServiceClient (it is required for InferredAssetAzureDataConnector). \
                Please ensure that you have provided the appropriate keys to `azure_options` for authentication."
            )
Exemplo n.º 9
0
 def __init__(self, container='ackbarstorage'):
     self.container = container
     self.conn_str = os.environ[
         'AML_PARAMETER_AZURE_STORAGE_CONNECTION_STRING']
     self.blob_service_client = BlobServiceClient.from_connection_string(
         self.conn_str)
 def __init__(self):
     """ Store BlobServiceClient """
     connect_str = os.getenv('AZURE_STORAGE_CONNECTION_STRING')
     self.client = BlobServiceClient.from_connection_string(connect_str)
Exemplo n.º 11
0
import logging

from azure.storage.blob import BlobServiceClient

root = logging.getLogger()
root.setLevel(logging.INFO)
handler = logging.StreamHandler(sys.stdout)
root.addHandler(handler)

if __name__ == "__main__":
    local_file_name = str(uuid.uuid4()) + ".txt"
    root.info("file name: {}".format(local_file_name))

    blob_service_client = BlobServiceClient.from_connection_string(
        'AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1O'
        'UzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;'
        'DefaultEndpointsProtocol=http;'
        'BlobEndpoint={}/devstoreaccount1'.format("http://0.0.0.0:10000"),
        logging_enable=True)

    container_client = blob_service_client.get_container_client("test")
    try:
        container_client.create_container()
    except Exception:
        # ignore errors if container exists.
        pass
    blob_client = blob_service_client.get_blob_client(container="test",
                                                      blob=local_file_name)
    data = b'a' * 4 * 1024 * 1024
    blob_client.upload_blob(data, blob_type="BlockBlob")
    list_response = container_client.list_blobs()
    for l in list_response:
Exemplo n.º 12
0
def get_container_client(connectionStr, containerName):
    serviceClient = BlobServiceClient.from_connection_string(connectionStr)
    containerClient = serviceClient.get_container_client(containerName)

    return containerClient, serviceClient
Exemplo n.º 13
0
 def connect(self):
     log.debug(output_messages['DEBUG_CONNECTING_TO_STORAGE'] % self._storage_type, class_name=AZURE_STORAGE_NAME)
     try:
         self._storage = BlobServiceClient.from_connection_string(self._account, connection_timeout=300)
     except Exception:
         raise RuntimeError(output_messages['INFO_UNABLE_AZURE_CONNECTION'])
Exemplo n.º 14
0
epoch = datetime.utcfromtimestamp(0)

auth_email = os.environ['auth_contact_email']

app = FastAPI(
    title="Work Zone Data Collection Tool Rest API",
    description='This API hosts work zone data collected by the WZDC ' +
    '(work zone data collection) tool. This data includes RSM messages, both in xml and uper (binary) formats. This API '
    +
    f'requires an APi key in the header. Contact <a href="mailto: {auth_email}">{auth_email}</a> for more information on how to acquire and use an API key.',
    docs_url="/",
)

storage_conn_str = os.environ['storage_connection_string']
sql_conn_str = os.environ['sql_connection_string']
blob_service_client = BlobServiceClient.from_connection_string(
    storage_conn_str)

cnxn = pyodbc.connect(sql_conn_str)
cursor = cnxn.cursor()

storedProcFindKey = os.environ['stored_procedure_find_key']
# exec create_token @token_hash = '{0}', @type = '{1}', @expires = '{2}'
storedProcCreateToken = os.environ['stored_procedure_create_token']
storedProcFindToken = os.environ['stored_procedure_find_token']

authorization_key_header = 'auth_key'

container_name = os.environ['source_container_name']

file_types_dict = {
    'rsm-xml': {
Exemplo n.º 15
0
def procesaExcel():
    class TabConfigSys:
        llave_Config1 = None
        llave_Config2 = None
        llave_Config3 = None
        llave_Config4 = None
        llave_Config5 = None

    class ObjetoJava:
        Parametros = None
        NombreSp = None
        Aplicativo = None
        DataBase = None

    config = TabConfigSys()
    config.llave_Config1 = 'SERVICIO'
    config.llave_Config2 = 'CONFIGURACION'
    config.llave_Config3 = 'SERVIDOR'
    config.llave_Config4 = 'URL'
    config.llave_Config5 = 'CONECTION'

    objJava = ObjetoJava()
    objJava.Parametros = json.dumps(config.__dict__)
    objJava.NombreSp = 'Configuracion'
    objJava.Aplicativo = APLICATIVO
    data = json.dumps(objJava.__dict__)

    headers = {'content-type': 'application/json'}
    r = requests.post(url=API_ENDPOINT, data=data, headers=headers)
    if r.status_code == requests.codes.ok:
        results = json.loads(r.text)
    else:
        print('Error al consultar el api')

    conn_str = results[0]["Dato_Char1"]
    container_name = results[0]["Dato_Char2"]
    blob_service_client = BlobServiceClient.from_connection_string(
        conn_str=conn_str)
    container = blob_service_client.get_container_client(
        container=container_name)
    generator = container.list_blobs()

    class ClientesDomiciliados:
        Identificacion = None
        EstadoExclusion = None
        AreaSolicitante = None
        FechaVigenciaExclu = None
        UsrModifica = None

    for blobs in generator:
        blob_client = blob_service_client.get_blob_client(
            container=container_name, blob=blobs.name)
        df = pnd.read_excel(blob_client.download_blob().readall(),
                            sheet_name=results[0]["Dato_Char3"])
        #print( df.columns.values[1])
        for _, row in df.iterrows():
            objClientDomi = ClientesDomiciliados()
            objClientDomi.Identificacion = row['Identificacion']
            objClientDomi.EstadoExclusion = row['EstadoExclusion']
            objClientDomi.AreaSolicitante = row['AreaSolicitante']
            objClientDomi.FechaVigenciaExclu = row[
                'FechaVigenciaExclu']._date_repr
            objClientDomi.UsrModifica = 'ServProcesaExcel'
            #print(objClientDomi.Identificacion)
            objJava = ObjetoJava()
            objJava.Parametros = json.dumps(objClientDomi.__dict__)
            objJava.NombreSp = 'ExclusionDomiciliaciones'
            objJava.Aplicativo = APLICATIVO
            data = json.dumps(objJava.__dict__)
            headers = {'content-type': 'application/json'}
            r = requests.post(url=API_ENDPOINT, data=data, headers=headers)
            if r.status_code == requests.codes.ok:
                results = json.loads(r.text)
                #print(results)
            else:
                print('Error al consultar el api')
Exemplo n.º 16
0
    def __init__(self, parent, schema, name, endpoint=""):
        super().__init__(parent, name, schema, endpoint)

        con_string = self._get_secret_or_env("AZURE_STORAGE_CONNECTION_STRING")
        if con_string:
            self.bsc = BlobServiceClient.from_connection_string(con_string)
Exemplo n.º 17
0
# Define variables
container_name = "upload-test-6"
upload_folder = "/home/magnus/Downloads/upload_f"

# Read config
config = configparser.ConfigParser()
current_file_path = Path(os.path.abspath(__file__))
folder_directory = current_file_path.parent.parent
config_path = os.path.join(folder_directory, "azure-config.ini")
config.read(config_path)

# Assign variables from config
blob_account_url = config["azure_blob_storage"]["connection_string"]

# List all containers in storage account
blob_service_client = BlobServiceClient.from_connection_string(
    conn_str=blob_account_url)
all_containers = blob_service_client.list_containers()
container_list = []
print("\n\nContainers in storage account:")
for container in all_containers:
    print("\t" + container["name"])
    container_list.append(container["name"])

# List all files in upload folder
print("\nFiles in folder", upload_folder + ":")
for file_name in os.listdir(upload_folder):
    print("\t" + file_name)


# Upload files
def upload_blob(upload_file_path):
def main():
    """Main function"""

    # Get args
    args = get_args()

    # container
    container_in = args.container_in
    container_out = args.container_out

    # Azure credentials
    sas_token = args.sas
    storage_account_name = args.storage
    azure_accounts = list()
    azure_accounts.append({
        "storage": storage_account_name,
        "sas": sas_token,
        "container": container_in
    })
    azure_accounts.append({
        "storage": storage_account_name,
        "sas": sas_token,
        "container": container_out
    })

    oauth_login = args.oauth_login
    oauth_client_id = args.oauth_client_id
    oauth_client_secret = args.oauth_client_secret

    # requires hadoop 3.2+
    # azure_oauth = {
    #     "endpoint": oauth_login,
    #     "client-id": oauth_client_id,
    #     "client-secret": oauth_client_secret
    # }
    azure_oauth = False

    # VM
    cores = args.vm_cores
    ram = args.vm_ram
    shuffle_partitions = args.shuffle_partitions

    # Date, state
    country = args.country
    state = args.state

    # process config
    roam_dist_stops = args.roam_dist_stops
    roam_dist_events = args.roam_dist_events

    # Path in - path out
    blob_in = f"wasbs://{container_in}@{storage_account_name}.blob.core.windows.net/stoplocation-v8_r70-s5-a70-h6/country={country}/year=2020/"
    if azure_oauth:
        # we can leverage abfss
        blob_in = f"abfss://{container_in}@{storage_account_name}.dfs.core.windows.net/stoplocation-v8_r70-s5-a70-h6/country={country}/year=2020/"
    timezones_in = "abfss://[email protected]/utils_states_timezones/"

    path_out_distinct = f"distinct_user_clusters-v8_r70-s5-a70-h6_clustered_{roam_dist_stops}m/country={country}"
    path_out_all = f"all_user_clusters-v8_r70-s5-a70-h6_clustered_{roam_dist_stops}m/country={country}"

    # config spark
    conf = getSparkConfig(cores, ram, shuffle_partitions,
                          azure_accounts, azure_oauth)

    # set prop for handling partition columns as strings (fixes prefixes as int)
    conf.set("spark.sql.sources.partitionColumnTypeInference.enabled", "false")

    # Create spark session
    sc = SparkContext(conf=conf).getOrCreate()
    sqlContext = SQLContext(sc)
    spark = sqlContext.sparkSession

    # Init azure client
    blob_service_client = BlobServiceClient.from_connection_string(
        CONN_STRING.format(storage_account_name, sas_token))

    #  build keys, date is mandatory, prefix opt
    partition_key = f"state={state}"

    print("process "+partition_key)
    start_time = time.time()
    local_dir = LOCAL_PATH+partition_key
    print("write temp to "+local_dir)

    # cleanup local if exists
    if (os.path.isdir(local_dir)):
        map(os.unlink, (os.path.join(local_dir, f)
                        for f in os.listdir(local_dir)))

    # Input dataset
    print("read dataset table")
    read_time = time.time()

    dfs = spark.read.format("parquet").load(blob_in)
    dfs_timezones = spark.read.format("parquet").load(timezones_in)

    # apply partition filter
    dfs_state = dfs.where(f"state = '{state}'")

    print("processing with spark")
    spark_time = time.time()

    w = Window().partitionBy('userId').orderBy('begin')

    dfs_state = add_distance_column(dfs_state, order_column='begin')
    dfs_state = dfs_state.fillna(0, subset=['next_travelled_distance'])
    dfs_state = dfs_state.withColumn('lag_next_travelled_distance', F.lag(
        col('next_travelled_distance')).over(w))
    dfs_state = dfs_state.withColumn('lag_end', F.lag('end').over(w))
    dfs_state = dfs_state.withColumn('rn', F.when(((col('lag_next_travelled_distance') != col('prev_travelled_distance')) |
                                                   (col('prev_travelled_distance') > 0) |
                                                   (col('lag_next_travelled_distance') > 0) |
                                                   (col('distance_prev') > roam_dist_events) |
                                                   ((F.dayofyear(col('begin')) - F.dayofyear(col('lag_end')) == 1) &
                                                    (F.hour(col('begin')) < 6))
                                                   ) &
                                                  ((col('lag_end').isNull()) | (col('lag_end') < col('begin'))), 1).otherwise(0))
    # Remove prev_travelled distance when rn == 0 (it happens when lag_end and begin overlap)
    dfs_state = dfs_state.withColumn('prev_travelled_distance', F.when(
        col('rn') == 0, 0).otherwise(col('prev_travelled_distance')))

    w = Window().partitionBy('userId').orderBy(
        'begin').rangeBetween(Window.unboundedPreceding, 0)

    dfs_state = dfs_state.withColumn('group', F.sum('rn').over(w))

    dfs_state = dfs_state.groupBy('userId', 'group', 'state').agg(F.mean('latitude').alias('latitude'),
                                                                  F.mean('longitude').alias(
                                                                      'longitude'),
                                                                  F.min('begin').alias(
                                                                      'begin'),
                                                                  F.max('end').alias('end')).drop('group')

    dfs_destinations = get_destinations(dfs_state, roam_dist=roam_dist_stops)
    dfs_destinations = dfs_destinations.withColumn(
        'prefix', dfs_destinations.userId.substr(1, 2))
    dfs_destinations = dfs_destinations.withColumn(
        'dayofyear', F.dayofyear('begin'))
    dfs_destinations = dfs_destinations.withColumn('year', F.year('begin'))
    # dfs_destinations = dfs_destinations.withColumn('state', F.lit(state))

    # Local time
    dfs_destinations.createOrReplaceTempView("dfs_destinations")
    dfs_destinations = spark.sql("""
      SELECT dfs_destinations_distinct.*, geohash(clusterLatitude, clusterLongitude, 7) as geohash7
      from dfs_destinations
      """)
    dfs_destinations = dfs_destinations.withColumn('geohash5', F.substring(col('geohash7'), 1, 5))
    dfs_destinations = dfs_destinations.join(F.broadcast(dfs_timezones), on='geohash5').drop('geohash5')
    dfs_destinations = dfs_destinations.withColumn('local_begin', F.from_utc_timestamp(col('begin'), col('tzid')))
    dfs_destinations = dfs_destinations.withColumn('offset', (
                (col('local_begin').cast('long') - col('begin').cast('long')) / 3600).cast('int')).drop('local_begin')
    dfs_destinations.persist(StorageLevel.DISK_ONLY)

    # Write
    local_dir_all = local_dir + "/all/"
    dfs_destinations_all = dfs_destinations.select(
        'prefix', 'userId', 'clusterId', 'begin', 'end', 'offset', 'year', 'dayofyear')
    dfs_destinations_all.repartition(256, "prefix", "year", "dayofyear").write.partitionBy(
        "prefix", "year", "dayofyear").format('parquet').mode('overwrite').save(local_dir_all)

    local_dir_distinct = local_dir+"/distinct/"
    dfs_destinations_distinct = dfs_destinations.select(
        'prefix', 'userId', 'clusterId', 'clusterLatitude', 'clusterLongitude', 'geohash7', 'year').distinct()
    dfs_destinations_distinct.repartition(256, "prefix", "year").write.partitionBy(
        "prefix", "year").format('parquet').mode('overwrite').save(local_dir_distinct)

    dfs_destinations.unpersist()

    print("upload local data to azure")
    upload_time = time.time()

    # upload parts 1  "prefix/year"
    print(f"upload files for distinct")
    # upload with threads
    dfutures = []    
    with ThreadPoolExecutor(max_workers=THREADS) as executor:
        years = [2020]
        s_key = f"state={state}"
        for fprefix in enumerate_prefixes():
            print(f"upload files for distinct: {fprefix}")
            prefix_dir = local_dir_distinct+"prefix="+fprefix
            prefix_key = f"prefix={fprefix}"

            for fyear in years:
                f_dir = prefix_dir + "/year="+str(fyear)
                f_key = prefix_key + "/year="+str(fyear)

                # print(f"read files for distinct from {f_dir}")

                if (os.path.isdir(f_dir)):
                    files = [filename for filename in os.listdir(
                        f_dir) if filename.startswith("part-")]

                    if len(files) > 0:

                        for file_local in files:
                            file_path = f_dir+"/"+file_local
                            part_num = int(file_local.split('-')[1])
                            part_key = '{:05d}'.format(part_num)
                            # fix name as static hash to be reproducible
                            filename_hash = hashlib.sha1(
                                str.encode(f_key+s_key+part_key)).hexdigest()

                            blob_key = "{}/{}/{}/part-{}-{}.snappy.parquet".format(
                                path_out_distinct, f_key, s_key, part_key, filename_hash)

                            # print("upload " + file_path + " to " + container_out+":"+blob_key)
                            # upload_blob(blob_service_client,container_out, blob_key, file_path)
                            future = executor.submit(
                                upload_blob, blob_service_client,container_out, blob_key, file_path)
                            dfutures.append(future)                                        

                    # else:
                    #    print(f"no files to upload for {f_key}")

                # else:
                #    print(f"missing partition for {f_key}")

        # end of loop, wait for futures
        for future in dfutures:
            bkey = future.result()

    # ensure we wait all tasks
    # TODO check if all done
    ddone = concurrent.futures.wait(dfutures)

    # upload parts 2 "prefix/year/dayofyear"
    print(f"upload files for all")
    years = [2020]
    s_key = f"state={state}"
    # upload with threads
    afutures = []    
    with ThreadPoolExecutor(max_workers=THREADS) as executor:    
        for fprefix in enumerate_prefixes():
            print(f"upload files for all: {fprefix}")
            prefix_dir = local_dir_all+"prefix="+fprefix
            prefix_key = f"prefix={fprefix}"

            for fyear in years:
                f_dir = prefix_dir + "/year="+str(fyear)
                f_key = prefix_key + "/year="+str(fyear)

                # print(f"read files for all from {f_dir}")

                for fday in range(1, 367):
                    d_dir = f_dir + "/dayofyear="+str(fday)
                    d_key = f_key + "/dayofyear="+str(fday)

                    # print(f"read files for all from {d_dir}")

                    if (os.path.isdir(d_dir)):
                        files = [filename for filename in os.listdir(
                            d_dir) if filename.startswith("part-")]

                        if len(files) > 0:

                            for file_local in files:
                                file_path = d_dir+"/"+file_local
                                part_num = int(file_local.split('-')[1])
                                part_key = '{:05d}'.format(part_num)
                                # fix name as static hash to be reproducible
                                filename_hash = hashlib.sha1(
                                    str.encode(d_key+s_key+part_key)).hexdigest()

                                blob_key = "{}/{}/{}/part-{}-{}.snappy.parquet".format(
                                    path_out_all, d_key, s_key, part_key, filename_hash)

                                # print("upload " + file_path + " to " + container_out+":"+blob_key)
                                # upload_blob(blob_service_client,container_out, blob_key, file_path)
                                future = executor.submit(
                                    upload_blob, blob_service_client,container_out, blob_key, file_path)
                                afutures.append(future)                                       
                        # else:
                        #     print(f"no files to upload for {d_key}")

                    # else:
                    #     print(f"missing partition for {d_key}")
        # end of loop, wait for futures
        for future in afutures:
            bkey = future.result()

    # ensure we wait all tasks
    # TODO check if all done
    adone = concurrent.futures.wait(afutures)

    print("--- {} seconds elapsed ---".format(int(time.time() - start_time)))
    print()
    shutdown_time = time.time()
    spark.stop()

    end_time = time.time()
    print("Done in {} seconds (read:{} spark:{} upload:{} shutdown:{})".format(
        int(end_time - start_time),
        int(spark_time - read_time),
        int(upload_time - spark_time),
        int(shutdown_time - upload_time),
        int(end_time - shutdown_time)
    ))
    print('Done.')
Exemplo n.º 19
0
# Pass in a container name and get names of all blobs in the container.
import os
from dotenv import load_dotenv

from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient

load_dotenv()

ConnectionString = os.getenv("ConnectionString")

# """The below code prints all the blobs in the container(bucket)"""
blob_service_client = BlobServiceClient.from_connection_string(
    ConnectionString)


def blobs(container_name):
    """
    Returns blob list from azure blob storage container.

    Usage : 
    ```
    for blob in blobs("container_name"):
        print("\\t" + blob.name)
    ```
    """
    container_client = blob_service_client.get_container_client(container_name)
    blob_list = container_client.list_blobs()
    return blob_list


def getData(container_name, blob_name):
logger.info("Source: " + sourceProtocol + " path:" + sourcePath)
logger.info("Destination " + destinationProtocol + " path:" + destinationPath)

protocolConnection = "https"

if "RD_CONFIG_ACCOUNT_NAME" in os.environ:
    account_name = os.environ["RD_CONFIG_ACCOUNT_NAME"]
if "RD_CONFIG_ACCESS_KEY" in os.environ:
    access_key = os.environ["RD_CONFIG_ACCESS_KEY"]
if "RD_CONFIG_PROTOCOL" in os.environ:
    protocolConnection = os.environ["RD_CONFIG_PROTOCOL"]

connection_string = "DefaultEndpointsProtocol={};AccountName={};AccountKey={};EndpointSuffix=core.windows.net".format(
    protocolConnection, account_name, access_key)
blob_service_client = BlobServiceClient.from_connection_string(conn_str=connection_string, logging_enable=True)

container_client = blob_service_client.get_container_client(args.container)

try:
    container_client.create_container()
except:
    logger.info("Container exists")
    logger.info("")

if sourceProtocol == "azure":
    source_list = get_blobs_from_container(sourcePath)
else:
    source_list = get_files_from_folder(sourcePath)

if destinationProtocol == "azure":
Exemplo n.º 21
0
def blob_connect():
    connect_string = 'DefaultEndpointsProtocol=https;AccountName=respacimages;AccountKey=ges4SuaECA10B++lZjlNfhTTorcRkqZXH9+PmyaBG6kFCWH2esd3dE5KHlp63hkHNCPw2cT7bv/bfu2TyRFJEg==;EndpointSuffix=core.windows.net'
    return BlobServiceClient.from_connection_string(connect_string)
Exemplo n.º 22
0
 def __init__(self):
     self.azure_service_client = BlobServiceClient.from_connection_string(settings.DIFFGRAM_AZURE_CONNECTION_STRING)
     self.azure_container_name = settings.DIFFGRAM_AZURE_CONTAINER_NAME
     self.azure_container_name_ml = settings.ML__DIFFGRAM_AZURE_CONTAINER_NAME
Exemplo n.º 23
0
 def __init__(self):
     # blob 컨테이너에 연결하기 위한 클래스
     self.blob_service_client = BlobServiceClient.from_connection_string(
         connect_str)
     self.container_client = self.blob_service_client.get_container_client(
         BLOB_CONTAINER)
Exemplo n.º 24
0
def upload_image(wine_id):
    # Credit: https://docs.microsoft.com/en-us/azure/storage/blobs/storage-quickstart-blobs-python

    # Retrieve the connection string for use with the application. The storage
    # connection string is stored in an environment variable on the machine
    # running the application called AZURE_STORAGE_CONNECTION_STRING. If the environment variable is
    # created after the application is launched in a console or with Visual Studio,
    # the shell or application needs to be closed and reloaded to take the
    # environment variable into account.

    connect_str = os.getenv('AZURE_STORAGE_CONNECTION_STRING')

    # Get the user unput image file Credit: https://pythonise.com/series/learning-flask/flask-uploading-files
    if request.method == "POST":
        if request.files:
            image = request.files["filename"]
            if image.filename == "":
                the_wine = mongo.db.wines.find_one({"_id": ObjectId(wine_id)})
                return render_template('image_upload.html',
                                       wine=the_wine,
                                       upload_error='No image selected',
                                       user_name='User: '******'username'])

            if allowed_image(image.filename):
                filename = secure_filename(image.filename)
                image.save(os.path.join(app.config["IMAGE_UPLOADS"], filename))
            else:
                print("That file extension is not allowed")
                the_wine = mongo.db.wines.find_one({"_id": ObjectId(wine_id)})
                return render_template(
                    'image_upload.html',
                    wine=the_wine,
                    upload_error=
                    'Incorrect file type selected - must be: "JPEG", "JPG", "PNG" or "GIF"',
                    user_name='User: '******'username'])

    # Get static file and save to upload_images directory to upload
    local_path = "./upload_images"
    local_file_name = filename
    upload_file_path = os.path.join(local_path, local_file_name)

    # Create the BlobServiceClient object which will be used to create a container client
    blob_service_client = BlobServiceClient.from_connection_string(connect_str)

    # Set the name for the container
    container_name = "caveduvins"
    container_client = ContainerClient.from_connection_string(
        conn_str=connect_str, container_name=container_name)

    # Set the upload file name
    upload_file_name = wine_id + str(uuid.uuid4()) + ".jpg"

    # Create a blob client using the local file name as the name for the blob
    blob_client = blob_service_client.get_blob_client(container=container_name,
                                                      blob=upload_file_name)

    # Upload the created file
    with open(upload_file_path, "rb") as data:
        blob_client.upload_blob(data)

    # Delete the file from upload_images directory
    os.remove(upload_file_path)

    # create a url for the image
    image_url = "https://mystorageacct180671.blob.core.windows.net/" + container_name + "/" + upload_file_name
    wineid = wine_id

    flash("Image uploaded")

    if 'username' in session:
        user_return = 'User: '******'username']
    else:
        user_return = 'Cave du Vins'

    return render_template(
        "index.html",
        update=mongo.db.wines.update(
            {'_id': ObjectId(wineid)},
            # Credit: https://stackoverflow.com/questions/10290621/
            # how-do-i-partially-update-an-object-in-mongodb-so-the-new-
            # object-will-overlay
            {"$set": {
                'photo_url': image_url
            }}),
        user_name=user_return,
        colours=mongo.db.colours.find(),
        country=mongo.db.country.find(),
        region=mongo.db.region.find(),
        grape=mongo.db.grape.find(),
        results_winename="",
        results_vintage="",
        results_colour="",
        results_country="",
        results_region="",
        results_grape="",
        results=mongo.db.wines.find({'_id': ObjectId(wineid)}))
Exemplo n.º 25
0
# pip install azure-storage-blob

from azure.storage.blob import ContainerClient, BlobServiceClient, BlobClient, StandardBlobTier, PremiumPageBlobTier

cs = ""

block_service_client: BlobServiceClient = BlobServiceClient.from_connection_string(
    cs)
account_info = block_service_client.get_account_information()
print(account_info)

CONTAINER_NAME = "kontener1"
BLOB_NAME = "auto.jpg"

try:
    container_client: ContainerClient = ContainerClient.from_connection_string(
        cs, CONTAINER_NAME)
    container_client.create_container()
    container_client.set_container_metadata({"departament": "IT"})
    print(container_client.get_container_properties().metadata)
except Exception as exc:
    print(exc)

blob_client: BlobClient = BlobClient.from_connection_string(
    conn_str=cs, container_name=CONTAINER_NAME, blob_name=BLOB_NAME)
# zapisz
with open("../Dzien02/images/WY3371X.jpg", "rb") as fd:
    blob_client.upload_blob(fd, overwrite=True)
    #blob_client.set_standard_blob_tier(StandardBlobTier.Cool)
    #blob_client.set_premium_page_blob_tier(PremiumPageBlobTier.)
Exemplo n.º 26
0
import azure.functions as func
import datetime
import logging
import datetime

from typing import List

from azure.storage.blob import BlobServiceClient, BlobProperties

from config import DefaultConfig

CONFIG = DefaultConfig()

blob_service_client = None
try:
    blob_service_client = BlobServiceClient.from_connection_string(
        CONFIG.STORAGE_CONNECTION)
except Exception as e:
    logging.exception(e)


def main(
    mytimer: func.TimerRequest
) -> None:  # sould be executed once at hour (0 0 */1 * * *)
    utc_timestamp = datetime.datetime.utcnow().replace(
        tzinfo=datetime.timezone.utc).isoformat()

    if mytimer.past_due:
        logging.info('The timer is past due!')

    delete()
    logging.info('Python timer trigger function ran at %s', utc_timestamp)
    print(
        'The environment variable AZ_BLOB_CONNECTION_STRING could not be verified, this variable is required '
        'and needs to contain the Azure blob connection string')
    sys.exit(1)

# Verify AZ_BLOB_CONTAINER env variable, this is the blob container value
if not AZ_BLOB_CONTAINER:
    print(
        'The environment variable AZ_BLOB_CONTAINER could not be verified, this variable is required '
        'and needs to contain the value for Azure blob container')
    sys.exit(1)

# Attempt creating the blob container if does not exist already

# Create the BlobServiceClient object which will be used to create a container client
blob_service_client = BlobServiceClient.from_connection_string(
    AZ_BLOB_CONNECTION_STRING)

if __name__ == "__main__":
    searchable = False
    if len(sys.argv) != 4:
        sys.exit(
            'usage: python3 AzDownloadBlob.oy <container_name> <blob_name> <target_file>'
        )

    container_name = sys.argv[1]
    print("container is %s" % container_name)

    blob_name = sys.argv[2]
    print("blob_name is %s" % blob_name)

    target_file = sys.argv[3]
Exemplo n.º 28
0
    all_packages = get_targets(args.target_package_list)
    working_directory = os.path.abspath(args.working_folder)
    download_dir = os.path.join(working_directory, "download")
    unzip_directory = os.path.join(working_directory, "unzip")
    upload_directory = os.path.join(working_directory, "upload")

    logging.info("Targeted Packages: {}".format(all_packages))
    logging.info("Targeted Working Directory: {}".format(working_directory))

    logging.info("Prepping Working Environment")
    prep_env([download_dir, unzip_directory, upload_directory])

    # download the sdist format
    for specifier in all_packages:
        download_package(specifier, download_dir)

    # unzip, tar
    repackage_data(download_dir, unzip_directory, upload_directory)

    # instantiate blob client and upload data
    service = BlobServiceClient.from_connection_string(
        conn_str=args.connection_string)
    container_client = service.get_container_client(DESTINATION_CONTAINER)
    results = upload_data(upload_directory, container_client,
                          service.primary_endpoint)

    # output URI links for each blob
    logging.info("Uploaded {} sdists.".format(len(results)))
    for uri in results:
        print(uri)
Exemplo n.º 29
0
def main(mytimer: func.TimerRequest, outputBlob: func.Out[str]) -> None:
    utc_timestamp = datetime.datetime.utcnow().replace(
        tzinfo=datetime.timezone.utc).isoformat()

    url = 'https://www.vegvesen.no/ws/no/vegvesen/veg/trafikkpublikasjon/vaer/2/GetMeasuredWeatherData'
    user = os.environ['Vegvesen_user']
    pwd = os.environ['Vegvesen_pwd']

    blob_service_client = BlobServiceClient.from_connection_string(
        os.environ['Blockblob'])
    f = blob_service_client.get_blob_client("actuals",
                                            'weather_observations.json')

    road_ids = {"SN79791": 80, "SN84905": 323, "SN94195": 228}

    df_out = pd.DataFrame(columns = \
        ['Station_id'
        ,'observation_time'
        , 'air_temp'
        , 'relative_humidity'
        , 'dew_point_temp'
        , 'wind_speed'
        , 'wind_bearing'
        , 'min_visibility_dist'
        , 'precipitation_intensity'
        , 'road_friction'
        , 'road_temp'
         ] )

    response = requests.get(url, auth=(user, pwd))
    soup = BeautifulSoup(response.content, 'xml')

    for station_id, road_id in road_ids.items():
        site = soup.find('measurementSiteReference', id=road_id).parent
        df_out = df_out.append(
            {
                "Station_id":
                station_id,
                'observation_time':
                np.nan
                if site.find('measurementTimeDefault') is None else parse(
                    site.find('measurementTimeDefault').get_text()).astimezone(
                        timezone('Etc/UTC')),
                'air_temp':
                np.nan if site.find('airTemperature') is None else
                site.find('airTemperature').string,
                'relative_humidity':
                np.nan if site.find('relativeHumidity') is None else
                site.find('relativeHumidity').string,
                'dew_point_temp':
                np.nan if site.find('dewPointTemperature') is None else
                site.find('dewPointTemperature').string,
                'wind_speed':
                np.nan if site.find('windSpeed') is None else
                site.find('windSpeed').string,
                'wind_bearing':
                np.nan if site.find('windDirectionBearing') is None else
                site.find('windDirectionBearing').string,
                'min_visibility_dist':
                np.nan if site.find('minimumVisibilityDistance') is None else
                site.find('minimumVisibilityDistance').string,
                'precipitation_intensity':
                np.nan if site.find('precipitationIntensity') is None else
                site.find('precipitationIntensity').string,
                'road_friction':
                np.nan if site.find('friction') is None else
                site.find('friction').string,
                'road_temp':
                np.nan if site.find('roadSurfaceTemperature') is None else
                site.find('roadSurfaceTemperature').string
            },
            ignore_index=True)

    outputBlob.set(
        df_out.to_json(orient='records', force_ascii=False, indent=2))
Exemplo n.º 30
0
def main(argv):
    dimhelp = 'fragment size (samples) in {} direction'
    parser = argparse.ArgumentParser(
        prog='upload',
        description='Upload cubes to oneseismic storage',
        epilog='%(prog)s relies on azure connection strings, see {}'.format(
            'https://docs.microsoft.com/azure/storage/common/storage-configure-connection-string'
        ),
    )
    parser.add_argument('meta', type=str, help='metadata json')
    parser.add_argument('input', type=str, help='input SEG-Y file')
    parser.add_argument(
        '--subcube-dim-0',
        '-i',
        type=int,
        default=120,
        metavar='I',
        help=dimhelp.format('X'),
    )
    parser.add_argument(
        '--subcube-dim-1',
        '-j',
        type=int,
        default=120,
        metavar='J',
        help=dimhelp.format('Y'),
    )
    parser.add_argument(
        '--subcube-dim-2',
        '-k',
        type=int,
        default=120,
        metavar='K',
        help=dimhelp.format('Z'),
    )
    parser.add_argument(
        '--connection-string',
        '-s',
        metavar='',
        type=str,
        help='''
            Azure connection string for blob store auth. Can also be set
            with the env-var AZURE_CONNECTION_STRING
        ''',
    )
    args = parser.parse_args(argv)

    params = {
        'subcube-dims': (
            args.subcube_dim_0,
            args.subcube_dim_1,
            args.subcube_dim_2,
        ),
    }

    if args.meta == '-':
        meta = json.load(sys.stdin)
    else:
        with open(args.meta) as f:
            meta = json.load(f)

    connection_string = os.environ.get('AZURE_CONNECTION_STRING', None)
    if args.connection_string:
        connection_string = args.connection_string

    if connection_string is None:
        problem = 'No azure connection string'
        solution = 'use --connection-string or env-var AZURE_CONNECTION_STRING'
        sys.exit('{} - {}'.format(problem, solution))

    blob = BlobServiceClient.from_connection_string(connection_string)
    with open(args.input, 'rb') as input:
        upload(params, meta, input, blob)