def _execute_stream_bts( self, identity_events: List[TimeAndRecord], identity: str, schema_loader: SchemaLoader, old_state: Optional[Dict] = None) -> Dict[Key, Any]: if self._stream_bts is None: return {} stream_bts_name = schema_loader.add_schema_spec(self._stream_bts) stream_transformer_schema = schema_loader.get_schema_object( stream_bts_name) store = self._get_store(schema_loader) if old_state: for k, v in old_state.items(): store.save(k, v) if identity_events: stream_transformer = StreamingTransformer( stream_transformer_schema, identity) for time, event in identity_events: stream_transformer.run_evaluate(event) stream_transformer.run_finalize() return self._get_store(schema_loader).get_all(identity)
def test_aggregate_schema_contains_identity_field(aggregate_schema_spec): schema_loader = SchemaLoader() name = schema_loader.add_schema_spec(aggregate_schema_spec) aggregate_schema = MockAggregateSchema(name, schema_loader) assert len(aggregate_schema.nested_schema) == 2 assert '_identity' in aggregate_schema.nested_schema
def empty_memory_store() -> MemoryStore: schema_loader = SchemaLoader() name = schema_loader.add_schema_spec({ 'Name': 'memstore', 'Type': Type.BLURR_STORE_MEMORY }) return schema_loader.get_store(name)
def activity_aggregate_schema( activity_aggregate_schema_spec: Dict[str, Any], store_spec: Dict[str, Any]) -> ActivityAggregateSchema: schema_loader = SchemaLoader() name = schema_loader.add_schema_spec(activity_aggregate_schema_spec) schema_loader.add_schema_spec(store_spec, name) return ActivityAggregateSchema(name, schema_loader)
def identity_aggregate_schema( identity_aggregate_schema_spec: Dict[str, Any], store_spec: Dict[str, Any]) -> IdentityAggregateSchema: schema_loader = SchemaLoader() name = schema_loader.add_schema_spec(identity_aggregate_schema_spec) schema_loader.add_schema_spec(store_spec, name) return IdentityAggregateSchema(name, schema_loader)
def test_get_store_error_wrong_type(schema_loader: SchemaLoader) -> None: schema_loader.get_store('test') error = next(x for x in schema_loader.get_errors('test', False) if isinstance(x, InvalidTypeError)) assert isinstance(error, InvalidTypeError) assert error.reason == InvalidTypeError.Reason.INCORRECT_BASE assert error.expected_base_type == InvalidTypeError.BaseTypes.STORE
def window_aggregate_schema(schema_loader_with_mem_store: SchemaLoader, mem_store_name: str, stream_dtc_name: str) -> WindowAggregateSchema: schema_loader_with_mem_store.add_schema_spec({ 'Type': Type.BLURR_AGGREGATE_BLOCK, 'Name': 'session', 'Store': mem_store_name, 'Fields': [ { 'Name': 'events', 'Type': Type.INTEGER, 'Value': 'session.events + 1', }, ], }, stream_dtc_name) name = schema_loader_with_mem_store.add_schema_spec({ 'Type': Type.BLURR_AGGREGATE_WINDOW, 'Name': 'test_window_name', 'WindowType': Type.DAY, 'WindowValue': 1, 'Source': stream_dtc_name + '.session', 'Fields': [{ 'Name': 'total_events', 'Type': Type.INTEGER, 'Value': 'sum(source.events)' }] }) return WindowAggregateSchema(name, schema_loader_with_mem_store)
def get_per_identity_records( self, events: Iterable, data_processor: DataProcessor ) -> Generator[Tuple[str, TimeAndRecord], None, None]: """ Uses the given iteratable events and the data processor convert the event into a list of Records along with its identity and time. :param events: iteratable events. :param data_processor: DataProcessor to process each event in events. :return: yields Tuple[Identity, TimeAndRecord] for all Records in events, """ schema_loader = SchemaLoader() stream_bts_name = schema_loader.add_schema_spec(self._stream_bts) stream_transformer_schema: StreamingTransformerSchema = schema_loader.get_schema_object( stream_bts_name) for event in events: try: for record in data_processor.process_data(event): try: id = stream_transformer_schema.get_identity(record) time = stream_transformer_schema.get_time(record) yield (id, (time, record)) except Exception as err: logging.error('{} in parsing Record {}.'.format( err, record)) except Exception as err: logging.error('{} in parsing Event {}.'.format(err, event))
def aggregate_schema_without_store(): schema_loader = SchemaLoader() aggregate_schema_spec = get_aggregate_schema_spec() del aggregate_schema_spec['Store'] name = schema_loader.add_schema_spec(aggregate_schema_spec) return MockAggregateSchema(fully_qualified_name=name, schema_loader=schema_loader)
def _execute_window_dtc(self, identity: str, schema_loader: SchemaLoader) -> List[Dict]: if self._window_dtc is None: logging.debug('Window DTC not provided') return [] stream_transformer = StreamingTransformer( self._get_streaming_transformer_schema(schema_loader), identity) all_data = self._get_store(schema_loader).get_all(identity) stream_transformer.run_restore(all_data) exec_context = Context() exec_context.add(stream_transformer._schema.name, stream_transformer) block_obj = None for aggregate in stream_transformer._nested_items.values(): if not isinstance(aggregate, BlockAggregate): continue if block_obj is not None: raise Exception( ('Window operation is supported against Streaming ', 'DTC with only one BlockAggregate')) block_obj = aggregate if block_obj is None: raise Exception( 'No BlockAggregate found in the Streaming DTC file') window_data = [] window_dtc_name = schema_loader.add_schema_spec(self._window_dtc) window_transformer_schema = schema_loader.get_schema_object( window_dtc_name) window_transformer = WindowTransformer(window_transformer_schema, identity, exec_context) logging.debug('Running Window DTC for identity {}'.format(identity)) anchors = 0 blocks = 0 for key, data in all_data.items(): if key.group != block_obj._schema.name: continue try: blocks += 1 if window_transformer.run_evaluate( block_obj.run_restore(data)): anchors += 1 window_data.append( window_transformer.run_flattened_snapshot) except PrepareWindowMissingBlocksError as err: logging.debug('{} with {}'.format(err, key)) if anchors == 0: logging.debug( 'No anchors found for identity {} out of {} blocks'.format( identity, blocks)) return window_data
def test_variable_aggregate_initialization(schema_spec): schema_loader = SchemaLoader() name = schema_loader.add_schema_spec(schema_spec) schema = VariableAggregateSchema(name, schema_loader) assert match_fields(schema._spec['Fields']) loader_spec = schema_loader.get_schema_spec(name) assert match_fields(loader_spec['Fields'])
def test_field_schema() -> MockFieldSchema: schema_loader = SchemaLoader() name = schema_loader.add_schema_spec({ 'Name': 'max_attempts', 'Type': Type.INTEGER, 'Value': 5 }) return MockFieldSchema(name, schema_loader)
def test_get_store_success(nested_schema_spec: Dict) -> None: nested_schema_spec['Store'] = { 'Name': 'memstore', 'Type': Type.BLURR_STORE_MEMORY } schema_loader = SchemaLoader() schema_loader.add_schema_spec(nested_schema_spec) assert isinstance(schema_loader.get_store('test.memstore'), MemoryStore)
def _get_store(schema_loader: SchemaLoader) -> Store: stores = schema_loader.get_all_stores() if not stores: fq_name_and_schema = schema_loader.get_schema_specs_of_type( Type.BLURR_STORE_DYNAMO, Type.BLURR_STORE_MEMORY) return schema_loader.get_store(next(iter(fq_name_and_schema))) return stores[0]
def test_initialization_with_invalid_source( schema_loader_with_mem_store: SchemaLoader, window_schema_spec: Dict[str, Any]): name = schema_loader_with_mem_store.add_schema_spec(window_schema_spec) schema = WindowAggregateSchema(name, schema_loader_with_mem_store) assert len(schema.errors) == 0 assert len(schema_loader_with_mem_store.get_errors()) == 0
def test_schema_init(dynamo_store_spec: Dict[str, Any]) -> None: schema_loader = SchemaLoader() name = schema_loader.add_schema_spec(dynamo_store_spec) store_schema = schema_loader.get_schema_object(name) assert store_schema.name == dynamo_store_spec['Name'] assert store_schema.table_name == dynamo_store_spec['Table'] assert store_schema.rcu == 5 assert store_schema.wcu == 5
def test_schema_collection_missing_nested_attribute_adds_error( schema_collection_spec: Dict[str, Any]): schema_loader = SchemaLoader() name = schema_loader.add_schema_spec(schema_collection_spec) schema = MockSchemaCollection(name, schema_loader, 'MissingNested') assert len(schema.errors) == 1 assert isinstance(schema.errors[0], RequiredAttributeError) assert schema.errors[0].attribute == 'MissingNested'
def test_schema_init_with_read_write_units( dynamo_store_spec: Dict[str, Any]) -> None: dynamo_store_spec['ReadCapacityUnits'] = 10 dynamo_store_spec['WriteCapacityUnits'] = 10 schema_loader = SchemaLoader() name = schema_loader.add_schema_spec(dynamo_store_spec) store_schema = schema_loader.get_schema_object(name) assert store_schema.rcu == 10 assert store_schema.wcu == 10
def store(dynamo_store_spec: Dict[str, Any]) -> DynamoStore: schema_loader = SchemaLoader() name = schema_loader.add_schema_spec(dynamo_store_spec) with mock.patch( 'blurr.store.dynamo_store.DynamoStore.get_dynamodb_resource', new=override_boto3_dynamodb_resource): dynamo_store = schema_loader.get_store(name) yield dynamo_store dynamo_store._table.delete()
def validate(spec: Dict[str, Any]) -> None: schema_loader = SchemaLoader() bts_name = schema_loader.add_schema_spec(spec) if not bts_name: raise InvalidSpecError(spec) schema_loader.raise_errors() schema_loader.get_schema_object(bts_name) print(schema_loader.get_errors()) schema_loader.raise_errors()
def test_get_schema_object(schema_loader: SchemaLoader) -> None: assert isinstance(schema_loader.get_schema_object('test'), StreamingTransformerSchema) is True field_schema = schema_loader.get_schema_object('test.test_group.events') assert isinstance(field_schema, IntegerFieldSchema) is True # Assert that the same object is returned and a new one is not created. assert field_schema.when is None field_schema.when = 'True' assert schema_loader.get_schema_object('test.test_group.events').when == 'True'
def schema_loader_with_mem_store(stream_bts_name: str) -> SchemaLoader: schema_loader = SchemaLoader() name = schema_loader.add_schema_spec({ 'Name': 'memstore', 'Type': Type.BLURR_STORE_MEMORY }, stream_bts_name) store = schema_loader.get_store(stream_bts_name + '.' + name) init_memory_store(store) return schema_loader
def test_aggregate_schema_missing_attributes_adds_error(aggregate_schema_spec): del aggregate_schema_spec[AggregateSchema.ATTRIBUTE_FIELDS] schema_loader = SchemaLoader() name = schema_loader.add_schema_spec(aggregate_schema_spec) schema = MockAggregateSchema(name, schema_loader) assert 1 == len(schema.errors) assert isinstance(schema.errors[0], RequiredAttributeError) assert AggregateSchema.ATTRIBUTE_FIELDS == schema.errors[0].attribute
def test_schema_collection_empty_nested_attribute_adds_error( schema_collection_spec: Dict[str, Any]): del schema_collection_spec['Fields'][0] schema_loader = SchemaLoader() name = schema_loader.add_schema_spec(schema_collection_spec) schema = MockSchemaCollection(name, schema_loader, 'Fields') assert len(schema.errors) == 1 assert isinstance(schema.errors[0], EmptyAttributeError) assert schema.errors[0].attribute == 'Fields'
def test_add_valid_simple_schema_with_parent() -> None: schema_loader = SchemaLoader() assert schema_loader.add_schema_spec({ 'Name': 'test', 'Type': 'test_type' }, 'parent') == 'test' assert schema_loader.get_schema_spec('parent.test') == { 'Name': 'test', 'Type': 'test_type' }
def test_get_attribute(collection_schema_spec: Dict[str, Any]) -> None: schema_loader = SchemaLoader() name = schema_loader.add_schema_spec(collection_schema_spec) schema_collection = MockBaseSchemaCollection( name, schema_loader, AggregateSchema.ATTRIBUTE_FIELDS) item_collection = MockBaseItemCollection(schema_collection, EvaluationContext()) # Check nested items access assert item_collection.event_count == 0 # make sure normal properties are not broken assert item_collection._schema == schema_collection
def test_evaluate_needs_evaluation_false( collection_schema_spec: Dict[str, Any]) -> None: schema_loader = SchemaLoader() collection_schema_spec['When'] = 'False' name = schema_loader.add_schema_spec(collection_schema_spec) schema_collection = MockBaseSchemaCollection( name, schema_loader, AggregateSchema.ATTRIBUTE_FIELDS) item_collection = MockBaseItemCollection(schema_collection, EvaluationContext()) item_collection.run_evaluate() assert item_collection.event_count == 0
def test_block_aggregate_schema_missing_split_attribute_adds_error( schema_spec, store_spec): del schema_spec[BlockAggregateSchema.ATTRIBUTE_SPLIT] schema_loader = SchemaLoader() name = schema_loader.add_schema_spec(schema_spec) schema_loader.add_schema_spec(store_spec, name) schema = BlockAggregateSchema(name, schema_loader) assert 1 == len(schema.errors) assert isinstance(schema.errors[0], RequiredAttributeError) assert BlockAggregateSchema.ATTRIBUTE_SPLIT == schema.errors[0].attribute
def test_field_evaluate_implicit_typecast_bool(): schema_loader = SchemaLoader() name = schema_loader.add_schema_spec({ 'Name': 'max_attempts', 'Type': Type.BOOLEAN, 'Value': '1+2' }) field_schema = BooleanFieldSchema(name, schema_loader) field = Field(field_schema, EvaluationContext()) field.run_evaluate() assert field._snapshot is True
def test_field_evaluate_implicit_typecast_integer(): schema_loader = SchemaLoader() name = schema_loader.add_schema_spec({ 'Name': 'max_attempts', 'Type': Type.INTEGER, 'Value': '23.45' }) field_schema = IntegerFieldSchema(name, schema_loader) field = Field(field_schema, EvaluationContext()) field.run_evaluate() assert field._snapshot == 23