Пример #1
0
    def test_batch_update(self):
        # Arrange

        # Act
        entity = Entity()
        entity.PartitionKey = '001'
        entity.RowKey = 'batch_update'
        entity.test = EntityProperty(EdmType.BOOLEAN, 'true')
        entity.test2 = 'value'
        entity.test3 = 3
        entity.test4 = EntityProperty(EdmType.INT64, '1234567890')
        entity.test5 = datetime.utcnow()
        self.ts.insert_entity(self.table_name, entity)

        entity = self.ts.get_entity(self.table_name, '001', 'batch_update')
        self.assertEqual(3, entity.test3)
        entity.test2 = 'value1'

        batch = TableBatch()
        batch.update_entity(entity)
        resp = self.ts.commit_batch(self.table_name, batch)

        # Assert
        self.assertIsNotNone(resp)
        entity = self.ts.get_entity(self.table_name, '001', 'batch_update')
        self.assertEqual('value1', entity.test2)
        self.assertEqual(resp[0], entity.etag)
Пример #2
0
 def _create_random_base_entity_class(self):
     '''
     Creates a class-based entity with only pk and rk.
     '''
     partition = self.get_resource_name('pk')
     row = self.get_resource_name('rk')
     entity = Entity()
     entity.PartitionKey = partition
     entity.RowKey = row
     return entity
Пример #3
0
def update_video_index_progress_table(ID, total_segments, index):
    try:
        entity = Entity()
        entity.PartitionKey = ID + '_' + str(int(index) // 200)
        entity.RowKey = total_segments
        entity['t_' + str(index)] = index
        print('entity #' + str(index))
        table_service.merge_entity('VideosIndexProgress', entity)
    except Exception as e:
        print(e)
Пример #4
0
    def test_query_entities_large(self):
        # Arrange
        table_name = self._create_query_table(0)
        total_entities_count = 1000
        entities_per_batch = 50

        for j in range(total_entities_count // entities_per_batch):
            batch = TableBatch()
            for i in range(entities_per_batch):
                entity = Entity()
                entity.PartitionKey = 'large'
                entity.RowKey = 'batch{0}-item{1}'.format(j, i)
                entity.test = EntityProperty(EdmType.BOOLEAN, 'true')
                entity.test2 = 'hello world;' * 100
                entity.test3 = 3
                entity.test4 = EntityProperty(EdmType.INT64, '1234567890')
                entity.test5 = datetime(2016, 12, 31, 11, 59, 59, 0)
                batch.insert_entity(entity)
            self.ts.commit_batch(table_name, batch)

        # Act
        start_time = datetime.now()
        entities = list(self.ts.query_entities(table_name))
        elapsed_time = datetime.now() - start_time

        # Assert
        print('query_entities took {0} secs.'.format(
            elapsed_time.total_seconds()))
        # azure allocates 5 seconds to execute a query
        # if it runs slowly, it will return fewer results and make the test fail
        self.assertEqual(len(entities), total_entities_count)
Пример #5
0
def update_corpus_inverted_index():
    new_entities = table_service.query_entities(source_azure_table,
                                                filter="Status eq 'Unscanned'")
    for new_entity in new_entities:
        corpus_entity = Entity()
        corpus_entity.PartitionKey = new_entity.RowKey
        corpus_entity.RowKey = new_entity.PartitionKey
        table_service.insert_or_replace_entity('CorpusInvertedIndex',
                                               corpus_entity)
        new_entity.Status = 'Scanned'
        table_service.update_entity(source_azure_table, new_entity)
Пример #6
0
    def test_batch_too_many_ops(self):
        # Arrange
        entity = self._create_default_entity_dict('001', 'batch_negative_1')
        self.ts.insert_entity(self.table_name, entity)

        # Act
        with self.assertRaises(AzureBatchValidationError):
            batch = TableBatch()
            for i in range(0, 101):
                entity = Entity()
                entity.PartitionKey = 'large'
                entity.RowKey = 'item{0}'.format(i)
                batch.insert_entity(entity)
            self.ts.commit_batch(self.table_name, batch)
Пример #7
0
def update_inverted_indexes_azure_table(vid_id, video_inverted_index):
    for term in video_inverted_index:
        try:
            entity = Entity()
            entity.PartitionKey = vid_id
            entity.RowKey = urllib.parse.quote_plus(term)
            for timestamp in video_inverted_index[term]:
                sentence = video_inverted_index[term][timestamp]
                # property name for start time 21.19 will be t_21_19
                entity['t_' + str(timestamp).replace('.', '_')] = sentence
            table_service.insert_or_merge_entity('VideosInvertedIndexes',
                                                 entity)
        except Exception as e:
            print('Failed adding term', term)
            print(e)
Пример #8
0
 def _create_base_entity_class(self):
     # Partition key and row key must be strings and are required
     entity = Entity()
     entity['PartitionKey'] = 'pk{}'.format(
         str(uuid.uuid4()).replace('-', ''))
     entity['RowKey'] = 'rk{}'.format(str(uuid.uuid4()).replace('-', ''))
     return entity
Пример #9
0
def insert_supplier(name, location, category, phone, closing):
    ## can insert or update supplier_dashboard_fragment
    print('Creating Supplier')
    id_supplier = str(uuid.uuid1())
    task = Entity()
    task.PartitionKey = name
    task.RowKey = id_supplier
    task.location = location
    print(location)
    location_data = geolocator.geocode(location)
    task.lat = location_data.latitude
    task.long = location_data.longitude
    task.category = category
    task.phone = phone
    task.closing = closing
    table_service.insert_or_replace_entity(SUPPLIER_TABLE, task)
    print('Created in Supplier: {name} with uuid {uuid}'.format(
        name=name, uuid=id_supplier))
Пример #10
0
def insert_uploads(food, expiry, image, price, quantity, company):
    ## can insert or update supplier_dashboard_fragment
    print('Creating Uploads')
    id_upload = str(uuid.uuid1())
    task = Entity()
    task.PartitionKey = food
    task.RowKey = id_upload
    task.expiry = expiry
    task.image = image
    task.price = price
    task.quantity = quantity
    task.company = company
    table_service.insert_or_replace_entity(UPLOADS_TABLE, task)
    print('Created in Uploads: {name} with uuid {uuid}'.format(name=food,
                                                               uuid=id_upload))
    return id_upload
Пример #11
0
    def batch(self):
        table_name = self._create_table()

        entity = Entity()
        entity.PartitionKey = 'batch'
        entity.test = True

        # All operations in the same batch must have the same partition key but different row keys
        # Batches can hold from 1 to 100 entities
        # Batches are atomic. All operations completed simulatenously. If one operation fails, they all fail.
        # Insert, update, merge, insert or merge, insert or replace, and delete entity operations are supported

        # Context manager style
        with self.service.batch(table_name) as batch:
            for i in range(0, 5):
                entity.RowKey = 'context_{}'.format(i)
                batch.insert_entity(entity)

        # Commit style
        batch = TableBatch()
        for i in range(0, 5):
            entity.RowKey = 'commit_{}'.format(i)
            batch.insert_entity(entity)
        self.service.commit_batch(table_name, batch)

        self.service.delete_table(table_name)
Пример #12
0
 def toEntity(self, chunkSize=maxAzureTablePropertySize):
     """
     :param chunkSize: the size of a chunk for splitting up the serialized job into chunks
     that each fit into a property value of the an Azure table entity
     :rtype: dict
     """
     assert chunkSize <= maxAzureTablePropertySize
     item = {}
     serializedAndEncodedJob = bz2.compress(pickle.dumps(self, protocol=pickle.HIGHEST_PROTOCOL))
     jobChunks = [serializedAndEncodedJob[i:i + chunkSize]
                  for i in range(0, len(serializedAndEncodedJob), chunkSize)]
     for attributeOrder, chunk in enumerate(jobChunks):
         item['_' + str(attributeOrder).zfill(3)] = EntityProperty('Edm.Binary', chunk)
     item['RowKey'] = str(self.jobStoreID)
     item['PartitionKey'] = str(AzureTable.defaultPartition)
     return Entity(item)
Пример #13
0
def insert_user(email, name, penalty, following):
    ## can insert or update supplier_dashboard_fragment
    print('Creating User')
    task = Entity()
    task.PartitionKey = email
    task.RowKey = name
    task.penalty = penalty
    task.following = following
    table_service.insert_or_replace_entity(USER_TABLE, task)
    print('Created in User: {name} with uuid {uuid}'.format(email=email,
                                                            name=name))
Пример #14
0
    def test_batch_inserts(self):
        # Arrange

        # Act
        entity = Entity()
        entity.PartitionKey = 'batch_inserts'
        entity.test = EntityProperty(EdmType.BOOLEAN, 'true')
        entity.test2 = 'value'
        entity.test3 = 3
        entity.test4 = EntityProperty(EdmType.INT64, '1234567890')

        batch = TableBatch()
        for i in range(100):
            entity.RowKey = str(i)
            batch.insert_entity(entity)
        self.ts.commit_batch(self.table_name, batch)

        entities = list(self.ts.query_entities(self.table_name, "PartitionKey eq 'batch_inserts'", ''))

        # Assert
        self.assertIsNotNone(entities)
        self.assertEqual(100, len(entities))
Пример #15
0
    def update_product_items(self, customer_id, product_items):
        row_key = utils.hash_key(customer_id)
        partition_key = 'ShoppingCart' + str(row_key % self.shards).zfill(3)
        product_items = [
            item for item in product_items if item["unitCount"] > 0
        ]

        # Insert or Update Items
        items = Entity()
        items.PartitionKey = partition_key
        items.RowKey = str(row_key)
        items.CustomerId = customer_id
        items.ProductItems = json.dumps(product_items)

        self.db.insert_or_replace_entity(self.table_name, items)
Пример #16
0
    def test_batch_reuse(self):
        # Arrange

        table2 = self._get_table_reference('table2')
        self.ts.create_table(table2)

        # Act
        entity = Entity()
        entity.PartitionKey = '003'
        entity.RowKey = 'batch_all_operations_together-1'
        entity.test = EntityProperty(EdmType.BOOLEAN, 'true')
        entity.test2 = 'value'
        entity.test3 = 3
        entity.test4 = EntityProperty(EdmType.INT64, '1234567890')
        entity.test5 = datetime.utcnow()

        batch = TableBatch()
        batch.insert_entity(entity)
        entity.RowKey = 'batch_all_operations_together-2'
        batch.insert_entity(entity)
        entity.RowKey = 'batch_all_operations_together-3'
        batch.insert_entity(entity)
        entity.RowKey = 'batch_all_operations_together-4'
        batch.insert_entity(entity)

        self.ts.commit_batch(self.table_name, batch)
        self.ts.commit_batch(table2, batch)

        batch = TableBatch()
        entity.RowKey = 'batch_all_operations_together'
        batch.insert_entity(entity)
        entity.RowKey = 'batch_all_operations_together-1'
        batch.delete_entity(entity.PartitionKey, entity.RowKey)
        entity.RowKey = 'batch_all_operations_together-2'
        entity.test3 = 10
        batch.update_entity(entity)
        entity.RowKey = 'batch_all_operations_together-3'
        entity.test3 = 100
        batch.merge_entity(entity)
        entity.RowKey = 'batch_all_operations_together-4'
        entity.test3 = 10
        batch.insert_or_replace_entity(entity)
        entity.RowKey = 'batch_all_operations_together-5'
        batch.insert_or_merge_entity(entity)

        self.ts.commit_batch(self.table_name, batch)
        resp = self.ts.commit_batch(table2, batch)

        # Assert
        self.assertEqual(6, len(resp))
        entities = list(
            self.ts.query_entities(self.table_name, "PartitionKey eq '003'",
                                   ''))
        self.assertEqual(5, len(entities))
Пример #17
0
def main():
    print('Started function app')

    inputMessage = open(os.environ['inputMessage']).read()
    message_obj = json.loads(inputMessage)
    file_name = message_obj['file_name']
    vid_id = message_obj['ID']
    max_duration = float(message_obj['duration'])

    # region Debug
    # file_name = "Data_structures_Binary_Tree.wav"
    # vid_id = "Data_structures_Binary_Tree.mp4"
    # max_duration = 976.5
    # endregion

    print('Started processing file')

    audio_container_name = "audio-container"
    audio_file_url = r"https://{0}.blob.core.windows.net/{1}/{2}".format(
        storage_acc_name, audio_container_name, file_name)
    audio_obj = urlopen(audio_file_url)

    print('Finished reading file named:', file_name)

    r = sr.Recognizer()
    start = 0
    duration = 10.0
    segment_counter = 0

    global SEGMENTS_CONFIDENCE
    SEGMENTS_CONFIDENCE = []
    threads = []
    with sr.AudioFile(audio_obj) as source:
        # r.record doesn't read exactly 'duration' seconds of the audio source, but a bit more = actual_duration
        seconds_per_buffer = (source.CHUNK + 0.0) / source.SAMPLE_RATE
        buffers_per_duration = math.ceil(duration / seconds_per_buffer)
        actual_duration = round(seconds_per_buffer * buffers_per_duration, 2)

        global TOTAL_SEGMENTS
        TOTAL_SEGMENTS = math.ceil(max_duration / actual_duration)
        # // is div
        for i in range(int(TOTAL_SEGMENTS) // 200 + 1):
            entity = Entity()
            entity.PartitionKey = str(vid_id) + '_' + str(i)
            entity.RowKey = str(TOTAL_SEGMENTS)
            table_service.insert_entity('VideosIndexProgress', entity)

        print('Created records in VideosIndexProgress Table')

        while start < max_duration:
            audio = r.record(
                source, duration=min(max_duration - start, duration)
            )  # although 'duration' is passed, 'actual_duration' will be read
            t = Thread(target=process_segment,
                       args=(audio, vid_id, start, segment_counter,
                             'asr-to-parser-q'))
            threads.append(t)
            t.start()
            start += actual_duration
            print("start time of segment:", str(start))
            segment_counter += 1
    for t in threads:
        t.join()
    save_dic_to_blob(vid_id)
    delete_blob(file_name, 'audio-container')

    print('Adding confidence to VideosMetaData table')

    confidence = sum(SEGMENTS_CONFIDENCE) / len(SEGMENTS_CONFIDENCE) if len(
        SEGMENTS_CONFIDENCE) != 0 else 0
    update_confidence_in_metadata(vid_id=vid_id, confidence=confidence)

    print('finished processing ' + str(len(threads)) + ' segments')
Пример #18
0
 def _create_random_entity_class(self, pk=None, rk=None):
     '''
     Creates a class-based entity with fixed values, using all
     of the supported data types.
     '''
     partition = pk if pk is not None else self.get_resource_name('pk')
     row = rk if rk is not None else self.get_resource_name('rk')
     entity = Entity()
     entity.PartitionKey = partition
     entity.RowKey = row
     entity.age = 39
     entity.sex = 'male'
     entity.name = 'John Doe'
     entity.married = True
     entity.deceased = False
     entity.optional = None
     entity.evenratio = 3.0
     entity.ratio = 3.1
     entity.large = 933311100
     entity.Birthday = datetime(1973, 10, 4)
     entity.birthday = datetime(1970, 10, 4)
     entity.binary = EntityProperty(EdmType.BINARY, b'binary')
     entity.other = EntityProperty(EdmType.INT32, 20)
     entity.clsid = EntityProperty(
         EdmType.GUID, 'c9da6455-213d-42c9-9a79-3e9149a57833')
     return entity
Пример #19
0
    def test_batch_all_operations_together_context_manager(self):
        # Arrange

        # Act
        entity = Entity()
        entity.PartitionKey = '003'
        entity.RowKey = 'batch_all_operations_together-1'
        entity.test = EntityProperty(EdmType.BOOLEAN, 'true')
        entity.test2 = 'value'
        entity.test3 = 3
        entity.test4 = EntityProperty(EdmType.INT64, '1234567890')
        entity.test5 = datetime.utcnow()
        self.ts.insert_entity(self.table_name, entity)
        entity.RowKey = 'batch_all_operations_together-2'
        self.ts.insert_entity(self.table_name, entity)
        entity.RowKey = 'batch_all_operations_together-3'
        self.ts.insert_entity(self.table_name, entity)
        entity.RowKey = 'batch_all_operations_together-4'
        self.ts.insert_entity(self.table_name, entity)

        with self.ts.batch(self.table_name) as batch:
            entity.RowKey = 'batch_all_operations_together'
            batch.insert_entity(entity)
            entity.RowKey = 'batch_all_operations_together-1'
            batch.delete_entity(entity.PartitionKey, entity.RowKey)
            entity.RowKey = 'batch_all_operations_together-2'
            entity.test3 = 10
            batch.update_entity(entity)
            entity.RowKey = 'batch_all_operations_together-3'
            entity.test3 = 100
            batch.merge_entity(entity)
            entity.RowKey = 'batch_all_operations_together-4'
            entity.test3 = 10
            batch.insert_or_replace_entity(entity)
            entity.RowKey = 'batch_all_operations_together-5'
            batch.insert_or_merge_entity(entity)

        # Assert
        entities = list(
            self.ts.query_entities(self.table_name, "PartitionKey eq '003'",
                                   ''))
        self.assertEqual(5, len(entities))
Пример #20
0
    def create_entity_class(self):
        '''
        Creates a class-based entity with fixed values, using all of the supported data types.
        '''
        entity = Entity()

        # Partition key and row key must be strings and are required
        entity.PartitionKey = 'pk{}'.format(str(uuid.uuid4()).replace('-', ''))
        entity.RowKey = 'rk{}'.format(str(uuid.uuid4()).replace('-', ''))

        # Some basic types are inferred
        entity.age = 39  # EdmType.INT64
        entity.large = 933311100  # EdmType.INT64
        entity.sex = 'male'  # EdmType.STRING
        entity.married = True  # EdmType.BOOLEAN
        entity.ratio = 3.1  # EdmType.DOUBLE
        entity.birthday = datetime(1970, 10, 4)  # EdmType.DATETIME

        # Binary, Int32 and GUID must be explicitly typed
        entity.binary = EntityProperty(EdmType.BINARY, b'xyz')
        entity.other = EntityProperty(EdmType.INT32, 20)
        entity.clsid = EntityProperty(EdmType.GUID,
                                      'c9da6455-213d-42c9-9a79-3e9149a57833')
        return entity
def create_entity(station: WeatherStationTuple) -> dict:
    """ Conversion from input data to desired properties and types """

    entity = Entity()
    entity.provider = 'bom'
    entity.country = 'Australia'
    entity.country_code = 'AU'
    entity.state = station.state
    entity.site = station.site
    entity.name = station.name
    entity.start_year = station.start_year
    entity.end_year = station.end_year

    location = {
        'type': "point",
        'coordinates': [float(station.longitude),
                        float(station.latitude)]
    }
    entity.location = json.dumps(location)

    entity.PartitionKey = f"{entity.country_code}.{entity.state}"
    entity.RowKey = entity.site

    return entity