class FileLayer(object): def __init__(self, storage_account_name="", storage_account_access_key=""): self.storage_account_name = storage_account_name self.storage_account_access_key = storage_account_access_key self.file_service = BlockBlobService( account_name=self.storage_account_name, account_key=self.storage_account_access_key) def download_blob(self, blobpath="", local_file_path="", container="testing-environment"): """ Downloads file in the Azure Blob to current local directory Args: blobpath: blob name oz azure local_file_path:File path on local system container: Azure Blob Container name Returns: Downloaded local file path """ if len(local_file_path) == 0: local_path = "/" + blobpath else: local_path = local_file_path os.makedirs("/".join(local_path.split("/")[:-1]), exist_ok=True) self.file_service.get_blob_to_path(container_name=container, blob_name=blobpath, file_path=local_path) print("downloaded to = {}".format(local_path)) return local_path def upload_to_blob(self, local_file_path="", blobpath="", container="testing-environment"): """ Uploads local file to the blob Args: blobpath: blob name oz azure local_file_path:File path on local system container: Azure Blob Container name Returns: Blob name on azure """ if len(bloppath) == 0: blob_file = "/".join(local_file_path.split("/")[1:]) else: blob_file = blobpath self.file_service.create_blob_from_path(container_name=container, blob_name=blob_file, file_path=local_file_path) return blob_file def read_pickle(self, path_on_blob, container="testing-environment"): """ Method reads serialized object stored in azure containers Args: path_on_blob: pickle file Path on the blob container: Azure Blob Container name Returns:Serialized object """ model_obj = pickle.loads( self.file_service.get_blob_to_bytes( container_name="testing-environment", blob_name=path_on_blob).content) return model_obj def read_config(self, path_on_blob="", container="testing-environment"): """ Reads the json config file present on the blob Args: path_on_blob: Json file Path on the blob container: Azure Blob Container name Returns: """ config_file = self.file_service.get_blob_to_text( container_name=container, blob_name=path_on_blob) return json.loads(config_file.content) def list_folders_in_blob_path(self, blob_path="", container="testing-environment"): """ Lists the Azure blob folder contents Args: blob_path: Folder path pn the blob container: Azure Blob Container name Returns: """ list_generator = self.file_service.list_blobs(container_name=container, prefix=blob_path) folders_under_blob = list(set([pth.name for pth in list_generator])) return folders_under_blob def copy_blob_same_storage(self, sourceblobpath="", destinationblobpath="", sourcecontainer="", destinationcontainer=""): """ This method copies blob across containers in same storage account. Args: sourceblobpath:source blob name destinationblobpath:destionation blob name sourcecontainer:source container name destinationcontainer:destination container """ if len(destinationblobpath) == 0: destinationblobpath = sourceblobpath source_blob_url = self.file_service.make_blob_url( sourcecontainer, sourceblobpath) self.file_service.copy_blob(destinationcontainer, destinationblobpath, source_blob_url) def copy_blob_across_storage(self, sourceblobpath="", destinationblobpath="", sourcecontainer="", destinationcontainer="", destination_source_account_name="", destination_source_account_key=""): """ This method copies blob across different storage accounts. Args: sourceblobpath:source blob name destinationblobpath:destionation blob name sourcecontainer:source container name destinationcontainer:destination container destination_source_account_name: storage account name for destination storage account destination_source_account_key:storage account key for destination storage account """ destinationfileservice = BlockBlobService( account_name=destination_source_account_name, account_key=destination_source_account_key) local_path = self.download_blob(blobpath=sourceblobpath, container=sourcecontainer) if len(destinationblobpath) == 0: destinationblobpath = "/".join(local_path.split("/")[1:]) destinationfileservice.create_blob_from_path( container_name=destinationcontainer, blob_name=destinationblobpath, file_path=local_path) os.remove(local_path)
class AzureBlobFileSystem(object): def __init__(self, account_name=None, account_key=None, sas_token=None, connection_string=None, **storage_options): account_name = account_name or os.environ.get( "AZURE_BLOB_ACCOUNT_NAME") account_key = account_key or os.environ.get("AZURE_BLOB_ACCOUNT_KEY") sas_token = sas_token or os.environ.get("AZURE_BLOB_SAS_TOKEN") connection_string = connection_string or os.environ.get( "AZURE_BLOB_CONNECTION_STRING") print(account_name, account_key) self.connection = BlockBlobService( account_name=account_name, account_key=account_key, sas_token=sas_token, connection_string=connection_string, protocol=storage_options.get("protocol") or "https", endpoint_suffix=storage_options.get("endpoint_suffix"), custom_domain=storage_options.get("custom_domain")) self.sep = "/" def ls(self, container, pattern=None): return list( set( filter( lambda x: fnmatch.fnmatch(x, pattern) if pattern else x, map(lambda x: x.name, self.connection.list_blobs(container))))) def mkdir(self, container, dir_name): self.touch(container, "{dir_name}/".format(dir_name=dir_name)) def rm(self, container, full_path): if self.connection.exists(container, full_path): path_delete_lease = None try: path_delete_lease = self.connection.acquire_blob_lease( container, full_path) self.connection.delete_blob(container, full_path, lease_id=path_delete_lease) except: if path_delete_lease is not None: self.connection.release_blob_lease(container, full_path, path_delete_lease) else: raise IOError( "File '{file}' does not exist under container '{container}'". format(file=full_path, container=container)) def touch(self, container, full_path): container_lease = None try: container_lease = self.connection.acquire_container_lease( container) self.connection.create_blob_from_text(container, full_path, "") finally: if container_lease is not None: self.connection.release_container_lease( container, container_lease) return full_path def mv(self, container, src_path, dst_path): try: self.cp(container, src_path, dst_path) self.rm(container, src_path) return True except: self.rm(container, dst_path) return False def cp(self, container, full_src_path, full_dst_path): copy_container_lease = None try: copy_container_lease = self.connection.acquire_container_lease( container) self.connection.copy_blob( container, full_dst_path, self.connection.make_blob_url(container, full_src_path)) finally: if copy_container_lease is not None: self.connection.release_container_lease( container, copy_container_lease) def du(self, container): return { blob.name: blob.properties.content_length for blob in self.connection.list_blobs(container) } def last_modified(self, container, full_path): return self.connection.get_blob_properties( container, full_path).properties.last_modified def head(self, container, full_path, bytes_count): return self.connection.get_blob_to_bytes(container, full_path, start_range=0, end_range=bytes_count - 1).content def tail(self, container, full_path, bytes_count): size = self.connection.get_blob_properties( container, full_path).properties.content_length return self.connection.get_blob_to_bytes(container, full_path, start_range=size - bytes_count, end_range=size - 1).content def exists(self, container, full_path): return self.connection.exists(container, full_path)
def main(msg: func.QueueMessage) -> None: logging.info("Processing audio analysis queue...") stopwords = nltk.corpus.stopwords.words("portuguese") input_message = msg.get_body().decode('utf-8') logging.info(input_message) input_message = json.loads(input_message) logging.info("Processing file " + input_message["blob"] + "...") table_service = TableService(account_name=ACCOUNT_NAME, account_key=ACCOUNT_KEY) records = table_service.query_entities( TABLE_NAME_API_T2S, filter="PartitionKey eq 'recording' and RowKey eq '" + input_message["meeting-code"] + "' and RecognitionStatus eq 'Success'") if len(records.items) == 0: blob_service = BlockBlobService(account_name=ACCOUNT_NAME, account_key=ACCOUNT_KEY) blob_entry = blob_service.get_blob_to_bytes(CONTAINER_NAME, input_message["blob"], timeout=60) audio_bytes = blob_entry.content url_token_api = "https://"+AI_API_REGION + \ ".api.cognitive.microsoft.com/sts/v1.0/issueToken" api_key = SPEECH2TEXT_API_KEY headers = {"Content-Length": "0", "Ocp-Apim-Subscription-Key": api_key} start_time = datetime.now() api_response = requests.post(url_token_api, headers=headers) access_token = str(api_response.content.decode('utf-8')) url_stt_api = "https://"+AI_API_REGION + \ ".stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1?language=pt-BR" headers = { "Authorization": "Bearer {0}".format(access_token), "Content-Length": str(len(audio_bytes)), "Content-type": "audio/wav", "codec": "audio/pcm", "samplerate": "16000" } record = {} api_response = None res_json = None try: api_response = requests.post(url_stt_api, headers=headers, params=None, data=audio_bytes) end_time = datetime.now() api_time = end_time - start_time logging.info(api_response) res_json = json.loads(api_response.content.decode('utf-8')) record["RecognitionStatus"] = res_json["RecognitionStatus"] record["TextConverted"] = res_json["DisplayText"] record["ApiResponse"] = json.dumps(res_json) record["ApiTimeResponseSeconds"] = api_time.seconds logging.info("Speech to text processed.") except Exception as error: record["RecognitionStatus"] = "Request Fail" record["Exception"] = traceback.format_exc() logging.error(error) finally: record["PartitionKey"] = input_message["meeting-code"] record["RowKey"] = input_message["file-name"] table_service.insert_or_replace_entity(TABLE_NAME_API_T2S, record) logging.info("Result persisted.") logging.info("Result:" + str(res_json)) if res_json is not None and "Message" in res_json: raise Exception(res_json["Message"]) if res_json is not None and res_json["RecognitionStatus"] == "Success": logging.info("Decoded speech: " + str(res_json["DisplayText"])) records = table_service.query_entities( TABLE_NAME_TRACKING, filter="PartitionKey eq 'tracking-analysis' and RowKey eq '" + input_message["meeting-code"] + "'") texts_converted = [] if len(records.items) > 0: record = records.items[0] if "TextConverted" in records.items[0]: texts_converted = json.loads(record["TextConverted"]) text_converted = { "file-name": input_message["file-name"], "text": res_json["DisplayText"] } if text_converted not in texts_converted: texts_converted.append(text_converted) record["TextConverted"] = json.dumps(texts_converted) else: text_converted = { "file-name": input_message["file-name"], "text": res_json["DisplayText"] } texts_converted.append(text_converted) record["TextConverted"] = json.dumps(texts_converted) else: text_converted = { "file-name": input_message["file-name"], "text": res_json["DisplayText"] } texts_converted.append(text_converted) record = { "PartitionKey": "tracking-analysis", "RowKey": input_message["meeting-code"], "TextConverted": json.dumps(texts_converted) } text_list = [] for text_converted in texts_converted: text_list.append(text_converted["text"]) logging.info("Text List: " + str(text_list)) text_list = set(text_list) freq_dist = processar_palavra_chave(text_list) record["FreqDist"] = freq_dist table_service.insert_or_replace_entity(TABLE_NAME_TRACKING, record) logging.info("Message processed successfully:" + str(res_json["DisplayText"])) else: print("Descartado por falha no reconhecimento de voz.") logging.info( "Item discarded. Bad quality or audio file corrupted.") else: logging.info("Item already processed.")