示例#1
0
    def _execute_window_dtc(self, identity: str,
                            schema_loader: SchemaLoader) -> List[Dict]:
        if self._window_dtc is None:
            logging.debug('Window DTC not provided')
            return []

        stream_transformer = StreamingTransformer(
            self._get_streaming_transformer_schema(schema_loader), identity)
        all_data = self._get_store(schema_loader).get_all(identity)
        stream_transformer.run_restore(all_data)

        exec_context = Context()
        exec_context.add(stream_transformer._schema.name, stream_transformer)

        block_obj = None
        for aggregate in stream_transformer._nested_items.values():
            if not isinstance(aggregate, BlockAggregate):
                continue
            if block_obj is not None:
                raise Exception(
                    ('Window operation is supported against Streaming ',
                     'DTC with only one BlockAggregate'))
            block_obj = aggregate

        if block_obj is None:
            raise Exception(
                'No BlockAggregate found in the Streaming DTC file')

        window_data = []

        window_dtc_name = schema_loader.add_schema_spec(self._window_dtc)
        window_transformer_schema = schema_loader.get_schema_object(
            window_dtc_name)
        window_transformer = WindowTransformer(window_transformer_schema,
                                               identity, exec_context)

        logging.debug('Running Window DTC for identity {}'.format(identity))

        anchors = 0
        blocks = 0
        for key, data in all_data.items():
            if key.group != block_obj._schema.name:
                continue
            try:
                blocks += 1
                if window_transformer.run_evaluate(
                        block_obj.run_restore(data)):
                    anchors += 1
                    window_data.append(
                        window_transformer.run_flattened_snapshot)
            except PrepareWindowMissingBlocksError as err:
                logging.debug('{} with {}'.format(err, key))

        if anchors == 0:
            logging.debug(
                'No anchors found for identity {} out of {} blocks'.format(
                    identity, blocks))

        return window_data
示例#2
0
def test_streaming_transformer_evaluate(schema_loader: SchemaLoader,
                                        schema_spec: Dict[str, Any]) -> None:
    streaming_bts = schema_loader.add_schema_spec(schema_spec)
    transformer_schema = StreamingTransformerSchema(streaming_bts, schema_loader)
    transformer = StreamingTransformer(transformer_schema, 'user1')
    transformer.run_evaluate(Record())

    assert transformer._snapshot == {'test_group': {'_identity': 'user1', 'events': 1}}
示例#3
0
def test_streaming_transformer_evaluate_time_error(schema_loader: SchemaLoader,
                                                   schema_spec: Dict[str, Any]) -> None:
    del schema_spec['Import']
    streaming_bts = schema_loader.add_schema_spec(schema_spec)
    transformer_schema = StreamingTransformerSchema(streaming_bts, schema_loader)
    transformer = StreamingTransformer(transformer_schema, 'user1')
    with pytest.raises(NameError, match='name \'datetime\' is not defined'):
        assert transformer.run_evaluate(Record())
示例#4
0
def stream_transformer(schema_loader, stream_schema_spec):
    stream_bts_name = schema_loader.add_schema_spec(stream_schema_spec)
    stream_transformer = StreamingTransformer(
        schema_loader.get_schema_object(stream_bts_name), 'user1')
    stream_transformer.run_restore(
        {Key(KeyType.DIMENSION, 'user1', 'state'): {
             'country': 'US'
         }})
    return stream_transformer
示例#5
0
def test_streaming_transformer_evaluate_user_mismatch(schema_loader: SchemaLoader,
                                                      schema_spec: Dict[str, Any]) -> None:
    streaming_bts = schema_loader.add_schema_spec(schema_spec)
    transformer_schema = StreamingTransformerSchema(streaming_bts, schema_loader)
    transformer = StreamingTransformer(transformer_schema, 'user2')
    with pytest.raises(
            IdentityError,
            match='Identity in transformer \(user2\) and new record \(user1\) do not match'):
        assert transformer.run_evaluate(Record())
示例#6
0
    def _execute_stream_bts(
            self,
            identity_events: List[TimeAndRecord],
            identity: str,
            schema_loader: SchemaLoader,
            old_state: Optional[Dict] = None) -> Dict[Key, Any]:
        if self._stream_bts is None:
            return {}

        stream_bts_name = schema_loader.add_schema_spec(self._stream_bts)
        stream_transformer_schema = schema_loader.get_schema_object(
            stream_bts_name)
        store = self._get_store(schema_loader)

        if old_state:
            for k, v in old_state.items():
                store.save(k, v)

        if identity_events:
            stream_transformer = StreamingTransformer(
                stream_transformer_schema, identity)

            for time, event in identity_events:
                stream_transformer.run_evaluate(event)
            stream_transformer.run_finalize()

        return self._get_store(schema_loader).get_all(identity)
示例#7
0
def test_streaming_transformer_finalize(schema_loader: SchemaLoader,
                                        schema_spec: Dict[str, Any]) -> None:
    streaming_bts = schema_loader.add_schema_spec(schema_spec)
    transformer_schema = StreamingTransformerSchema(streaming_bts, schema_loader)
    transformer = StreamingTransformer(transformer_schema, 'user1')
    store = schema_loader.get_store('test.memstore')

    transformer.run_finalize()
    assert store.get(Key(KeyType.DIMENSION, 'user1', 'test_group')) is None

    transformer.run_evaluate(Record())
    transformer.run_finalize()
    assert store.get(Key(KeyType.DIMENSION, 'user1', 'test_group')) == {
        '_identity': 'user1',
        'events': 1
    }
示例#8
0
def test_execution_error_missing_field(caplog,
                                       schema_loader: SchemaLoader) -> None:
    caplog.set_level(logging.DEBUG)
    context = Context({
        'test':
        StreamingTransformer(schema_loader.get_schema_object('test'), 'user1')
    })
    with raises(MissingAttributeError,
                match='missing_field not defined in test_group'):
        Expression('test.test_group.missing_field').evaluate(
            EvaluationContext(context))
    assert (
        'MissingAttributeError in evaluating expression test.test_group.missing_field. '
        'Error: missing_field not defined in test_group') in caplog.text

    with raises(MissingAttributeError,
                match='missing_field not defined in test_group'):
        Expression('test.test_group[\'missing_field\']').evaluate(
            EvaluationContext(context))
示例#9
0
    def _execute_stream_dtc(self, identity_events: List[Tuple[datetime,
                                                              Record]],
                            identity: str,
                            schema_loader: SchemaLoader) -> Dict[Key, Any]:
        if self._stream_dtc is None:
            return {}

        stream_dtc_name = schema_loader.add_schema_spec(self._stream_dtc)
        stream_transformer_schema = schema_loader.get_schema_object(
            stream_dtc_name)

        stream_transformer = StreamingTransformer(stream_transformer_schema,
                                                  identity)
        for time, event in identity_events:
            stream_transformer.run_evaluate(event)
        stream_transformer.run_finalize()

        return self._get_store(schema_loader).get_all(identity)