def test_batch_merge(self): # Arrange # Act entity = Entity() entity.PartitionKey = '001' entity.RowKey = 'batch_merge' entity.test = EntityProperty(EdmType.BOOLEAN, 'true') entity.test2 = 'value' entity.test3 = 3 entity.test4 = EntityProperty(EdmType.INT64, '1234567890') entity.test5 = datetime.utcnow() self.ts.insert_entity(self.table_name, entity) entity = self.ts.get_entity(self.table_name, '001', 'batch_merge') self.assertEqual(3, entity.test3) entity = Entity() entity.PartitionKey = '001' entity.RowKey = 'batch_merge' entity.test2 = 'value1' batch = TableBatch() batch.merge_entity(entity) resp = self.ts.commit_batch(self.table_name, batch) # Assert self.assertIsNotNone(resp) entity = self.ts.get_entity(self.table_name, '001', 'batch_merge') self.assertEqual('value1', entity.test2) self.assertEqual(1234567890, entity.test4) self.assertEqual(resp[0], entity.etag)
def batch(self): table_name = self._create_table() entity = Entity() entity.PartitionKey = 'batch' entity.test = True # All operations in the same batch must have the same partition key but different row keys # Batches can hold from 1 to 100 entities # Batches are atomic. All operations completed simulatenously. If one operation fails, they all fail. # Insert, update, merge, insert or merge, insert or replace, and delete entity operations are supported # Context manager style with self.service.batch(table_name) as batch: for i in range(0, 5): entity.RowKey = 'context_{}'.format(i) batch.insert_entity(entity) # Commit style batch = TableBatch() for i in range(0, 5): entity.RowKey = 'commit_{}'.format(i) batch.insert_entity(entity) self.service.commit_batch(table_name, batch) self.service.delete_table(table_name)
def test_batch_reuse(self): # Arrange table2 = self._get_table_reference('table2') self.ts.create_table(table2) # Act entity = Entity() entity.PartitionKey = '003' entity.RowKey = 'batch_all_operations_together-1' entity.test = EntityProperty(EdmType.BOOLEAN, 'true') entity.test2 = 'value' entity.test3 = 3 entity.test4 = EntityProperty(EdmType.INT64, '1234567890') entity.test5 = datetime.utcnow() batch = TableBatch() batch.insert_entity(entity) entity.RowKey = 'batch_all_operations_together-2' batch.insert_entity(entity) entity.RowKey = 'batch_all_operations_together-3' batch.insert_entity(entity) entity.RowKey = 'batch_all_operations_together-4' batch.insert_entity(entity) self.ts.commit_batch(self.table_name, batch) self.ts.commit_batch(table2, batch) batch = TableBatch() entity.RowKey = 'batch_all_operations_together' batch.insert_entity(entity) entity.RowKey = 'batch_all_operations_together-1' batch.delete_entity(entity.PartitionKey, entity.RowKey) entity.RowKey = 'batch_all_operations_together-2' entity.test3 = 10 batch.update_entity(entity) entity.RowKey = 'batch_all_operations_together-3' entity.test3 = 100 batch.merge_entity(entity) entity.RowKey = 'batch_all_operations_together-4' entity.test3 = 10 batch.insert_or_replace_entity(entity) entity.RowKey = 'batch_all_operations_together-5' batch.insert_or_merge_entity(entity) self.ts.commit_batch(self.table_name, batch) resp = self.ts.commit_batch(table2, batch) # Assert self.assertEqual(6, len(resp)) entities = list( self.ts.query_entities(self.table_name, "PartitionKey eq '003'", '')) self.assertEqual(5, len(entities))
def _create_random_entity_class(self, pk=None, rk=None): ''' Creates a class-based entity with fixed values, using all of the supported data types. ''' partition = pk if pk is not None else self.get_resource_name('pk') row = rk if rk is not None else self.get_resource_name('rk') entity = Entity() entity.PartitionKey = partition entity.RowKey = row entity.age = 39 entity.sex = 'male' entity.name = 'John Doe' entity.married = True entity.deceased = False entity.optional = None entity.evenratio = 3.0 entity.ratio = 3.1 entity.large = 933311100 entity.Birthday = datetime(1973, 10, 4) entity.birthday = datetime(1970, 10, 4) entity.binary = EntityProperty(EdmType.BINARY, b'binary') entity.other = EntityProperty(EdmType.INT32, 20) entity.clsid = EntityProperty( EdmType.GUID, 'c9da6455-213d-42c9-9a79-3e9149a57833') return entity
def test_query_entities_large(self): # Arrange table_name = self._create_query_table(0) total_entities_count = 1000 entities_per_batch = 50 for j in range(total_entities_count // entities_per_batch): batch = TableBatch() for i in range(entities_per_batch): entity = Entity() entity.PartitionKey = 'large' entity.RowKey = 'batch{0}-item{1}'.format(j, i) entity.test = EntityProperty(EdmType.BOOLEAN, 'true') entity.test2 = 'hello world;' * 100 entity.test3 = 3 entity.test4 = EntityProperty(EdmType.INT64, '1234567890') entity.test5 = datetime(2016, 12, 31, 11, 59, 59, 0) batch.insert_entity(entity) self.ts.commit_batch(table_name, batch) # Act start_time = datetime.now() entities = list(self.ts.query_entities(table_name)) elapsed_time = datetime.now() - start_time # Assert print('query_entities took {0} secs.'.format( elapsed_time.total_seconds())) # azure allocates 5 seconds to execute a query # if it runs slowly, it will return fewer results and make the test fail self.assertEqual(len(entities), total_entities_count)
def _create_random_base_entity_class(self): ''' Creates a class-based entity with only pk and rk. ''' partition = self.get_resource_name('pk') row = self.get_resource_name('rk') entity = Entity() entity.PartitionKey = partition entity.RowKey = row return entity
def update_video_index_progress_table(ID, total_segments, index): try: entity = Entity() entity.PartitionKey = ID + '_' + str(int(index) // 200) entity.RowKey = total_segments entity['t_' + str(index)] = index print('entity #' + str(index)) table_service.merge_entity('VideosIndexProgress', entity) except Exception as e: print(e)
def test_batch_all_operations_together_context_manager(self): # Arrange # Act entity = Entity() entity.PartitionKey = '003' entity.RowKey = 'batch_all_operations_together-1' entity.test = EntityProperty(EdmType.BOOLEAN, 'true') entity.test2 = 'value' entity.test3 = 3 entity.test4 = EntityProperty(EdmType.INT64, '1234567890') entity.test5 = datetime.utcnow() self.ts.insert_entity(self.table_name, entity) entity.RowKey = 'batch_all_operations_together-2' self.ts.insert_entity(self.table_name, entity) entity.RowKey = 'batch_all_operations_together-3' self.ts.insert_entity(self.table_name, entity) entity.RowKey = 'batch_all_operations_together-4' self.ts.insert_entity(self.table_name, entity) with self.ts.batch(self.table_name) as batch: entity.RowKey = 'batch_all_operations_together' batch.insert_entity(entity) entity.RowKey = 'batch_all_operations_together-1' batch.delete_entity(entity.PartitionKey, entity.RowKey) entity.RowKey = 'batch_all_operations_together-2' entity.test3 = 10 batch.update_entity(entity) entity.RowKey = 'batch_all_operations_together-3' entity.test3 = 100 batch.merge_entity(entity) entity.RowKey = 'batch_all_operations_together-4' entity.test3 = 10 batch.insert_or_replace_entity(entity) entity.RowKey = 'batch_all_operations_together-5' batch.insert_or_merge_entity(entity) # Assert entities = list( self.ts.query_entities(self.table_name, "PartitionKey eq '003'", '')) self.assertEqual(5, len(entities))
def update_corpus_inverted_index(): new_entities = table_service.query_entities(source_azure_table, filter="Status eq 'Unscanned'") for new_entity in new_entities: corpus_entity = Entity() corpus_entity.PartitionKey = new_entity.RowKey corpus_entity.RowKey = new_entity.PartitionKey table_service.insert_or_replace_entity('CorpusInvertedIndex', corpus_entity) new_entity.Status = 'Scanned' table_service.update_entity(source_azure_table, new_entity)
def insert_user(email, name, penalty, following): ## can insert or update supplier_dashboard_fragment print('Creating User') task = Entity() task.PartitionKey = email task.RowKey = name task.penalty = penalty task.following = following table_service.insert_or_replace_entity(USER_TABLE, task) print('Created in User: {name} with uuid {uuid}'.format(email=email, name=name))
def test_batch_too_many_ops(self): # Arrange entity = self._create_default_entity_dict('001', 'batch_negative_1') self.ts.insert_entity(self.table_name, entity) # Act with self.assertRaises(AzureBatchValidationError): batch = TableBatch() for i in range(0, 101): entity = Entity() entity.PartitionKey = 'large' entity.RowKey = 'item{0}'.format(i) batch.insert_entity(entity) self.ts.commit_batch(self.table_name, batch)
def update_inverted_indexes_azure_table(vid_id, video_inverted_index): for term in video_inverted_index: try: entity = Entity() entity.PartitionKey = vid_id entity.RowKey = urllib.parse.quote_plus(term) for timestamp in video_inverted_index[term]: sentence = video_inverted_index[term][timestamp] # property name for start time 21.19 will be t_21_19 entity['t_' + str(timestamp).replace('.', '_')] = sentence table_service.insert_or_merge_entity('VideosInvertedIndexes', entity) except Exception as e: print('Failed adding term', term) print(e)
def update_product_items(self, customer_id, product_items): row_key = utils.hash_key(customer_id) partition_key = 'ShoppingCart' + str(row_key % self.shards).zfill(3) product_items = [ item for item in product_items if item["unitCount"] > 0 ] # Insert or Update Items items = Entity() items.PartitionKey = partition_key items.RowKey = str(row_key) items.CustomerId = customer_id items.ProductItems = json.dumps(product_items) self.db.insert_or_replace_entity(self.table_name, items)
def insert_uploads(food, expiry, image, price, quantity, company): ## can insert or update supplier_dashboard_fragment print('Creating Uploads') id_upload = str(uuid.uuid1()) task = Entity() task.PartitionKey = food task.RowKey = id_upload task.expiry = expiry task.image = image task.price = price task.quantity = quantity task.company = company table_service.insert_or_replace_entity(UPLOADS_TABLE, task) print('Created in Uploads: {name} with uuid {uuid}'.format(name=food, uuid=id_upload)) return id_upload
def insert_supplier(name, location, category, phone, closing): ## can insert or update supplier_dashboard_fragment print('Creating Supplier') id_supplier = str(uuid.uuid1()) task = Entity() task.PartitionKey = name task.RowKey = id_supplier task.location = location print(location) location_data = geolocator.geocode(location) task.lat = location_data.latitude task.long = location_data.longitude task.category = category task.phone = phone task.closing = closing table_service.insert_or_replace_entity(SUPPLIER_TABLE, task) print('Created in Supplier: {name} with uuid {uuid}'.format( name=name, uuid=id_supplier))
def test_batch_inserts(self): # Arrange # Act entity = Entity() entity.PartitionKey = 'batch_inserts' entity.test = EntityProperty(EdmType.BOOLEAN, 'true') entity.test2 = 'value' entity.test3 = 3 entity.test4 = EntityProperty(EdmType.INT64, '1234567890') batch = TableBatch() for i in range(100): entity.RowKey = str(i) batch.insert_entity(entity) self.ts.commit_batch(self.table_name, batch) entities = list(self.ts.query_entities(self.table_name, "PartitionKey eq 'batch_inserts'", '')) # Assert self.assertIsNotNone(entities) self.assertEqual(100, len(entities))
def create_entity_class(self): ''' Creates a class-based entity with fixed values, using all of the supported data types. ''' entity = Entity() # Partition key and row key must be strings and are required entity.PartitionKey = 'pk{}'.format(str(uuid.uuid4()).replace('-', '')) entity.RowKey = 'rk{}'.format(str(uuid.uuid4()).replace('-', '')) # Some basic types are inferred entity.age = 39 # EdmType.INT64 entity.large = 933311100 # EdmType.INT64 entity.sex = 'male' # EdmType.STRING entity.married = True # EdmType.BOOLEAN entity.ratio = 3.1 # EdmType.DOUBLE entity.birthday = datetime(1970, 10, 4) # EdmType.DATETIME # Binary, Int32 and GUID must be explicitly typed entity.binary = EntityProperty(EdmType.BINARY, b'xyz') entity.other = EntityProperty(EdmType.INT32, 20) entity.clsid = EntityProperty(EdmType.GUID, 'c9da6455-213d-42c9-9a79-3e9149a57833') return entity
def create_entity(station: WeatherStationTuple) -> dict: """ Conversion from input data to desired properties and types """ entity = Entity() entity.provider = 'bom' entity.country = 'Australia' entity.country_code = 'AU' entity.state = station.state entity.site = station.site entity.name = station.name entity.start_year = station.start_year entity.end_year = station.end_year location = { 'type': "point", 'coordinates': [float(station.longitude), float(station.latitude)] } entity.location = json.dumps(location) entity.PartitionKey = f"{entity.country_code}.{entity.state}" entity.RowKey = entity.site return entity
def main(): print('Started function app') inputMessage = open(os.environ['inputMessage']).read() message_obj = json.loads(inputMessage) file_name = message_obj['file_name'] vid_id = message_obj['ID'] max_duration = float(message_obj['duration']) # region Debug # file_name = "Data_structures_Binary_Tree.wav" # vid_id = "Data_structures_Binary_Tree.mp4" # max_duration = 976.5 # endregion print('Started processing file') audio_container_name = "audio-container" audio_file_url = r"https://{0}.blob.core.windows.net/{1}/{2}".format( storage_acc_name, audio_container_name, file_name) audio_obj = urlopen(audio_file_url) print('Finished reading file named:', file_name) r = sr.Recognizer() start = 0 duration = 10.0 segment_counter = 0 global SEGMENTS_CONFIDENCE SEGMENTS_CONFIDENCE = [] threads = [] with sr.AudioFile(audio_obj) as source: # r.record doesn't read exactly 'duration' seconds of the audio source, but a bit more = actual_duration seconds_per_buffer = (source.CHUNK + 0.0) / source.SAMPLE_RATE buffers_per_duration = math.ceil(duration / seconds_per_buffer) actual_duration = round(seconds_per_buffer * buffers_per_duration, 2) global TOTAL_SEGMENTS TOTAL_SEGMENTS = math.ceil(max_duration / actual_duration) # // is div for i in range(int(TOTAL_SEGMENTS) // 200 + 1): entity = Entity() entity.PartitionKey = str(vid_id) + '_' + str(i) entity.RowKey = str(TOTAL_SEGMENTS) table_service.insert_entity('VideosIndexProgress', entity) print('Created records in VideosIndexProgress Table') while start < max_duration: audio = r.record( source, duration=min(max_duration - start, duration) ) # although 'duration' is passed, 'actual_duration' will be read t = Thread(target=process_segment, args=(audio, vid_id, start, segment_counter, 'asr-to-parser-q')) threads.append(t) t.start() start += actual_duration print("start time of segment:", str(start)) segment_counter += 1 for t in threads: t.join() save_dic_to_blob(vid_id) delete_blob(file_name, 'audio-container') print('Adding confidence to VideosMetaData table') confidence = sum(SEGMENTS_CONFIDENCE) / len(SEGMENTS_CONFIDENCE) if len( SEGMENTS_CONFIDENCE) != 0 else 0 update_confidence_in_metadata(vid_id=vid_id, confidence=confidence) print('finished processing ' + str(len(threads)) + ' segments')