def _execute_window_dtc(self, identity: str, schema_loader: SchemaLoader) -> List[Dict]: if self._window_dtc is None: logging.debug('Window DTC not provided') return [] stream_transformer = StreamingTransformer( self._get_streaming_transformer_schema(schema_loader), identity) all_data = self._get_store(schema_loader).get_all(identity) stream_transformer.run_restore(all_data) exec_context = Context() exec_context.add(stream_transformer._schema.name, stream_transformer) block_obj = None for aggregate in stream_transformer._nested_items.values(): if not isinstance(aggregate, BlockAggregate): continue if block_obj is not None: raise Exception( ('Window operation is supported against Streaming ', 'DTC with only one BlockAggregate')) block_obj = aggregate if block_obj is None: raise Exception( 'No BlockAggregate found in the Streaming DTC file') window_data = [] window_dtc_name = schema_loader.add_schema_spec(self._window_dtc) window_transformer_schema = schema_loader.get_schema_object( window_dtc_name) window_transformer = WindowTransformer(window_transformer_schema, identity, exec_context) logging.debug('Running Window DTC for identity {}'.format(identity)) anchors = 0 blocks = 0 for key, data in all_data.items(): if key.group != block_obj._schema.name: continue try: blocks += 1 if window_transformer.run_evaluate( block_obj.run_restore(data)): anchors += 1 window_data.append( window_transformer.run_flattened_snapshot) except PrepareWindowMissingBlocksError as err: logging.debug('{} with {}'.format(err, key)) if anchors == 0: logging.debug( 'No anchors found for identity {} out of {} blocks'.format( identity, blocks)) return window_data
def test_streaming_transformer_evaluate(schema_loader: SchemaLoader, schema_spec: Dict[str, Any]) -> None: streaming_bts = schema_loader.add_schema_spec(schema_spec) transformer_schema = StreamingTransformerSchema(streaming_bts, schema_loader) transformer = StreamingTransformer(transformer_schema, 'user1') transformer.run_evaluate(Record()) assert transformer._snapshot == {'test_group': {'_identity': 'user1', 'events': 1}}
def test_streaming_transformer_evaluate_time_error(schema_loader: SchemaLoader, schema_spec: Dict[str, Any]) -> None: del schema_spec['Import'] streaming_bts = schema_loader.add_schema_spec(schema_spec) transformer_schema = StreamingTransformerSchema(streaming_bts, schema_loader) transformer = StreamingTransformer(transformer_schema, 'user1') with pytest.raises(NameError, match='name \'datetime\' is not defined'): assert transformer.run_evaluate(Record())
def stream_transformer(schema_loader, stream_schema_spec): stream_bts_name = schema_loader.add_schema_spec(stream_schema_spec) stream_transformer = StreamingTransformer( schema_loader.get_schema_object(stream_bts_name), 'user1') stream_transformer.run_restore( {Key(KeyType.DIMENSION, 'user1', 'state'): { 'country': 'US' }}) return stream_transformer
def test_streaming_transformer_evaluate_user_mismatch(schema_loader: SchemaLoader, schema_spec: Dict[str, Any]) -> None: streaming_bts = schema_loader.add_schema_spec(schema_spec) transformer_schema = StreamingTransformerSchema(streaming_bts, schema_loader) transformer = StreamingTransformer(transformer_schema, 'user2') with pytest.raises( IdentityError, match='Identity in transformer \(user2\) and new record \(user1\) do not match'): assert transformer.run_evaluate(Record())
def _execute_stream_bts( self, identity_events: List[TimeAndRecord], identity: str, schema_loader: SchemaLoader, old_state: Optional[Dict] = None) -> Dict[Key, Any]: if self._stream_bts is None: return {} stream_bts_name = schema_loader.add_schema_spec(self._stream_bts) stream_transformer_schema = schema_loader.get_schema_object( stream_bts_name) store = self._get_store(schema_loader) if old_state: for k, v in old_state.items(): store.save(k, v) if identity_events: stream_transformer = StreamingTransformer( stream_transformer_schema, identity) for time, event in identity_events: stream_transformer.run_evaluate(event) stream_transformer.run_finalize() return self._get_store(schema_loader).get_all(identity)
def test_streaming_transformer_finalize(schema_loader: SchemaLoader, schema_spec: Dict[str, Any]) -> None: streaming_bts = schema_loader.add_schema_spec(schema_spec) transformer_schema = StreamingTransformerSchema(streaming_bts, schema_loader) transformer = StreamingTransformer(transformer_schema, 'user1') store = schema_loader.get_store('test.memstore') transformer.run_finalize() assert store.get(Key(KeyType.DIMENSION, 'user1', 'test_group')) is None transformer.run_evaluate(Record()) transformer.run_finalize() assert store.get(Key(KeyType.DIMENSION, 'user1', 'test_group')) == { '_identity': 'user1', 'events': 1 }
def test_execution_error_missing_field(caplog, schema_loader: SchemaLoader) -> None: caplog.set_level(logging.DEBUG) context = Context({ 'test': StreamingTransformer(schema_loader.get_schema_object('test'), 'user1') }) with raises(MissingAttributeError, match='missing_field not defined in test_group'): Expression('test.test_group.missing_field').evaluate( EvaluationContext(context)) assert ( 'MissingAttributeError in evaluating expression test.test_group.missing_field. ' 'Error: missing_field not defined in test_group') in caplog.text with raises(MissingAttributeError, match='missing_field not defined in test_group'): Expression('test.test_group[\'missing_field\']').evaluate( EvaluationContext(context))
def _execute_stream_dtc(self, identity_events: List[Tuple[datetime, Record]], identity: str, schema_loader: SchemaLoader) -> Dict[Key, Any]: if self._stream_dtc is None: return {} stream_dtc_name = schema_loader.add_schema_spec(self._stream_dtc) stream_transformer_schema = schema_loader.get_schema_object( stream_dtc_name) stream_transformer = StreamingTransformer(stream_transformer_schema, identity) for time, event in identity_events: stream_transformer.run_evaluate(event) stream_transformer.run_finalize() return self._get_store(schema_loader).get_all(identity)