Exemplo n.º 1
0
    def _execute_stream_bts(
            self,
            identity_events: List[TimeAndRecord],
            identity: str,
            schema_loader: SchemaLoader,
            old_state: Optional[Dict] = None) -> Dict[Key, Any]:
        if self._stream_bts is None:
            return {}

        stream_bts_name = schema_loader.add_schema_spec(self._stream_bts)
        stream_transformer_schema = schema_loader.get_schema_object(
            stream_bts_name)
        store = self._get_store(schema_loader)

        if old_state:
            for k, v in old_state.items():
                store.save(k, v)

        if identity_events:
            stream_transformer = StreamingTransformer(
                stream_transformer_schema, identity)

            for time, event in identity_events:
                stream_transformer.run_evaluate(event)
            stream_transformer.run_finalize()

        return self._get_store(schema_loader).get_all(identity)
Exemplo n.º 2
0
def test_aggregate_schema_contains_identity_field(aggregate_schema_spec):
    schema_loader = SchemaLoader()
    name = schema_loader.add_schema_spec(aggregate_schema_spec)

    aggregate_schema = MockAggregateSchema(name, schema_loader)
    assert len(aggregate_schema.nested_schema) == 2
    assert '_identity' in aggregate_schema.nested_schema
Exemplo n.º 3
0
def empty_memory_store() -> MemoryStore:
    schema_loader = SchemaLoader()
    name = schema_loader.add_schema_spec({
        'Name': 'memstore',
        'Type': Type.BLURR_STORE_MEMORY
    })
    return schema_loader.get_store(name)
def activity_aggregate_schema(
        activity_aggregate_schema_spec: Dict[str, Any],
        store_spec: Dict[str, Any]) -> ActivityAggregateSchema:
    schema_loader = SchemaLoader()
    name = schema_loader.add_schema_spec(activity_aggregate_schema_spec)
    schema_loader.add_schema_spec(store_spec, name)
    return ActivityAggregateSchema(name, schema_loader)
Exemplo n.º 5
0
def identity_aggregate_schema(
        identity_aggregate_schema_spec: Dict[str, Any],
        store_spec: Dict[str, Any]) -> IdentityAggregateSchema:
    schema_loader = SchemaLoader()
    name = schema_loader.add_schema_spec(identity_aggregate_schema_spec)
    schema_loader.add_schema_spec(store_spec, name)
    return IdentityAggregateSchema(name, schema_loader)
Exemplo n.º 6
0
def test_get_store_error_wrong_type(schema_loader: SchemaLoader) -> None:
    schema_loader.get_store('test')
    error = next(x for x in schema_loader.get_errors('test', False)
                 if isinstance(x, InvalidTypeError))
    assert isinstance(error, InvalidTypeError)
    assert error.reason == InvalidTypeError.Reason.INCORRECT_BASE
    assert error.expected_base_type == InvalidTypeError.BaseTypes.STORE
Exemplo n.º 7
0
def window_aggregate_schema(schema_loader_with_mem_store: SchemaLoader, mem_store_name: str,
                            stream_dtc_name: str) -> WindowAggregateSchema:
    schema_loader_with_mem_store.add_schema_spec({
        'Type': Type.BLURR_AGGREGATE_BLOCK,
        'Name': 'session',
        'Store': mem_store_name,
        'Fields': [
            {
                'Name': 'events',
                'Type': Type.INTEGER,
                'Value': 'session.events + 1',
            },
        ],
    }, stream_dtc_name)
    name = schema_loader_with_mem_store.add_schema_spec({
        'Type': Type.BLURR_AGGREGATE_WINDOW,
        'Name': 'test_window_name',
        'WindowType': Type.DAY,
        'WindowValue': 1,
        'Source': stream_dtc_name + '.session',
        'Fields': [{
            'Name': 'total_events',
            'Type': Type.INTEGER,
            'Value': 'sum(source.events)'
        }]
    })

    return WindowAggregateSchema(name, schema_loader_with_mem_store)
Exemplo n.º 8
0
 def get_per_identity_records(
     self, events: Iterable, data_processor: DataProcessor
 ) -> Generator[Tuple[str, TimeAndRecord], None, None]:
     """
     Uses the given iteratable events and the data processor convert the event into a list of
     Records along with its identity and time.
     :param events: iteratable events.
     :param data_processor: DataProcessor to process each event in events.
     :return: yields Tuple[Identity, TimeAndRecord] for all Records in events,
     """
     schema_loader = SchemaLoader()
     stream_bts_name = schema_loader.add_schema_spec(self._stream_bts)
     stream_transformer_schema: StreamingTransformerSchema = schema_loader.get_schema_object(
         stream_bts_name)
     for event in events:
         try:
             for record in data_processor.process_data(event):
                 try:
                     id = stream_transformer_schema.get_identity(record)
                     time = stream_transformer_schema.get_time(record)
                     yield (id, (time, record))
                 except Exception as err:
                     logging.error('{} in parsing Record {}.'.format(
                         err, record))
         except Exception as err:
             logging.error('{} in parsing Event {}.'.format(err, event))
Exemplo n.º 9
0
def aggregate_schema_without_store():
    schema_loader = SchemaLoader()
    aggregate_schema_spec = get_aggregate_schema_spec()
    del aggregate_schema_spec['Store']
    name = schema_loader.add_schema_spec(aggregate_schema_spec)
    return MockAggregateSchema(fully_qualified_name=name,
                               schema_loader=schema_loader)
Exemplo n.º 10
0
    def _execute_window_dtc(self, identity: str,
                            schema_loader: SchemaLoader) -> List[Dict]:
        if self._window_dtc is None:
            logging.debug('Window DTC not provided')
            return []

        stream_transformer = StreamingTransformer(
            self._get_streaming_transformer_schema(schema_loader), identity)
        all_data = self._get_store(schema_loader).get_all(identity)
        stream_transformer.run_restore(all_data)

        exec_context = Context()
        exec_context.add(stream_transformer._schema.name, stream_transformer)

        block_obj = None
        for aggregate in stream_transformer._nested_items.values():
            if not isinstance(aggregate, BlockAggregate):
                continue
            if block_obj is not None:
                raise Exception(
                    ('Window operation is supported against Streaming ',
                     'DTC with only one BlockAggregate'))
            block_obj = aggregate

        if block_obj is None:
            raise Exception(
                'No BlockAggregate found in the Streaming DTC file')

        window_data = []

        window_dtc_name = schema_loader.add_schema_spec(self._window_dtc)
        window_transformer_schema = schema_loader.get_schema_object(
            window_dtc_name)
        window_transformer = WindowTransformer(window_transformer_schema,
                                               identity, exec_context)

        logging.debug('Running Window DTC for identity {}'.format(identity))

        anchors = 0
        blocks = 0
        for key, data in all_data.items():
            if key.group != block_obj._schema.name:
                continue
            try:
                blocks += 1
                if window_transformer.run_evaluate(
                        block_obj.run_restore(data)):
                    anchors += 1
                    window_data.append(
                        window_transformer.run_flattened_snapshot)
            except PrepareWindowMissingBlocksError as err:
                logging.debug('{} with {}'.format(err, key))

        if anchors == 0:
            logging.debug(
                'No anchors found for identity {} out of {} blocks'.format(
                    identity, blocks))

        return window_data
Exemplo n.º 11
0
def test_variable_aggregate_initialization(schema_spec):
    schema_loader = SchemaLoader()
    name = schema_loader.add_schema_spec(schema_spec)
    schema = VariableAggregateSchema(name, schema_loader)
    assert match_fields(schema._spec['Fields'])

    loader_spec = schema_loader.get_schema_spec(name)
    assert match_fields(loader_spec['Fields'])
Exemplo n.º 12
0
def test_field_schema() -> MockFieldSchema:
    schema_loader = SchemaLoader()
    name = schema_loader.add_schema_spec({
        'Name': 'max_attempts',
        'Type': Type.INTEGER,
        'Value': 5
    })
    return MockFieldSchema(name, schema_loader)
Exemplo n.º 13
0
def test_get_store_success(nested_schema_spec: Dict) -> None:
    nested_schema_spec['Store'] = {
        'Name': 'memstore',
        'Type': Type.BLURR_STORE_MEMORY
    }
    schema_loader = SchemaLoader()
    schema_loader.add_schema_spec(nested_schema_spec)
    assert isinstance(schema_loader.get_store('test.memstore'), MemoryStore)
Exemplo n.º 14
0
    def _get_store(schema_loader: SchemaLoader) -> Store:
        stores = schema_loader.get_all_stores()
        if not stores:
            fq_name_and_schema = schema_loader.get_schema_specs_of_type(
                Type.BLURR_STORE_DYNAMO, Type.BLURR_STORE_MEMORY)
            return schema_loader.get_store(next(iter(fq_name_and_schema)))

        return stores[0]
Exemplo n.º 15
0
def test_initialization_with_invalid_source(
        schema_loader_with_mem_store: SchemaLoader,
        window_schema_spec: Dict[str, Any]):
    name = schema_loader_with_mem_store.add_schema_spec(window_schema_spec)

    schema = WindowAggregateSchema(name, schema_loader_with_mem_store)
    assert len(schema.errors) == 0
    assert len(schema_loader_with_mem_store.get_errors()) == 0
Exemplo n.º 16
0
def test_schema_init(dynamo_store_spec: Dict[str, Any]) -> None:
    schema_loader = SchemaLoader()
    name = schema_loader.add_schema_spec(dynamo_store_spec)
    store_schema = schema_loader.get_schema_object(name)
    assert store_schema.name == dynamo_store_spec['Name']
    assert store_schema.table_name == dynamo_store_spec['Table']
    assert store_schema.rcu == 5
    assert store_schema.wcu == 5
Exemplo n.º 17
0
def test_schema_collection_missing_nested_attribute_adds_error(
        schema_collection_spec: Dict[str, Any]):
    schema_loader = SchemaLoader()
    name = schema_loader.add_schema_spec(schema_collection_spec)
    schema = MockSchemaCollection(name, schema_loader, 'MissingNested')

    assert len(schema.errors) == 1
    assert isinstance(schema.errors[0], RequiredAttributeError)
    assert schema.errors[0].attribute == 'MissingNested'
Exemplo n.º 18
0
def test_schema_init_with_read_write_units(
        dynamo_store_spec: Dict[str, Any]) -> None:
    dynamo_store_spec['ReadCapacityUnits'] = 10
    dynamo_store_spec['WriteCapacityUnits'] = 10
    schema_loader = SchemaLoader()
    name = schema_loader.add_schema_spec(dynamo_store_spec)
    store_schema = schema_loader.get_schema_object(name)
    assert store_schema.rcu == 10
    assert store_schema.wcu == 10
Exemplo n.º 19
0
def store(dynamo_store_spec: Dict[str, Any]) -> DynamoStore:
    schema_loader = SchemaLoader()
    name = schema_loader.add_schema_spec(dynamo_store_spec)
    with mock.patch(
            'blurr.store.dynamo_store.DynamoStore.get_dynamodb_resource',
            new=override_boto3_dynamodb_resource):
        dynamo_store = schema_loader.get_store(name)
    yield dynamo_store
    dynamo_store._table.delete()
Exemplo n.º 20
0
def validate(spec: Dict[str, Any]) -> None:
    schema_loader = SchemaLoader()
    bts_name = schema_loader.add_schema_spec(spec)
    if not bts_name:
        raise InvalidSpecError(spec)
    schema_loader.raise_errors()
    schema_loader.get_schema_object(bts_name)
    print(schema_loader.get_errors())
    schema_loader.raise_errors()
Exemplo n.º 21
0
def test_get_schema_object(schema_loader: SchemaLoader) -> None:
    assert isinstance(schema_loader.get_schema_object('test'), StreamingTransformerSchema) is True
    field_schema = schema_loader.get_schema_object('test.test_group.events')
    assert isinstance(field_schema, IntegerFieldSchema) is True

    # Assert that the same object is returned and a new one is not created.
    assert field_schema.when is None
    field_schema.when = 'True'
    assert schema_loader.get_schema_object('test.test_group.events').when == 'True'
Exemplo n.º 22
0
def schema_loader_with_mem_store(stream_bts_name: str) -> SchemaLoader:
    schema_loader = SchemaLoader()
    name = schema_loader.add_schema_spec({
        'Name': 'memstore',
        'Type': Type.BLURR_STORE_MEMORY
    }, stream_bts_name)
    store = schema_loader.get_store(stream_bts_name + '.' + name)
    init_memory_store(store)
    return schema_loader
Exemplo n.º 23
0
def test_aggregate_schema_missing_attributes_adds_error(aggregate_schema_spec):
    del aggregate_schema_spec[AggregateSchema.ATTRIBUTE_FIELDS]

    schema_loader = SchemaLoader()
    name = schema_loader.add_schema_spec(aggregate_schema_spec)
    schema = MockAggregateSchema(name, schema_loader)

    assert 1 == len(schema.errors)
    assert isinstance(schema.errors[0], RequiredAttributeError)
    assert AggregateSchema.ATTRIBUTE_FIELDS == schema.errors[0].attribute
Exemplo n.º 24
0
def test_schema_collection_empty_nested_attribute_adds_error(
        schema_collection_spec: Dict[str, Any]):
    del schema_collection_spec['Fields'][0]
    schema_loader = SchemaLoader()
    name = schema_loader.add_schema_spec(schema_collection_spec)
    schema = MockSchemaCollection(name, schema_loader, 'Fields')

    assert len(schema.errors) == 1
    assert isinstance(schema.errors[0], EmptyAttributeError)
    assert schema.errors[0].attribute == 'Fields'
Exemplo n.º 25
0
def test_add_valid_simple_schema_with_parent() -> None:
    schema_loader = SchemaLoader()

    assert schema_loader.add_schema_spec({
        'Name': 'test',
        'Type': 'test_type'
    }, 'parent') == 'test'
    assert schema_loader.get_schema_spec('parent.test') == {
        'Name': 'test',
        'Type': 'test_type'
    }
Exemplo n.º 26
0
def test_get_attribute(collection_schema_spec: Dict[str, Any]) -> None:
    schema_loader = SchemaLoader()
    name = schema_loader.add_schema_spec(collection_schema_spec)
    schema_collection = MockBaseSchemaCollection(
        name, schema_loader, AggregateSchema.ATTRIBUTE_FIELDS)
    item_collection = MockBaseItemCollection(schema_collection,
                                             EvaluationContext())
    # Check nested items access
    assert item_collection.event_count == 0
    # make sure normal properties are not broken
    assert item_collection._schema == schema_collection
Exemplo n.º 27
0
def test_evaluate_needs_evaluation_false(
        collection_schema_spec: Dict[str, Any]) -> None:
    schema_loader = SchemaLoader()
    collection_schema_spec['When'] = 'False'
    name = schema_loader.add_schema_spec(collection_schema_spec)
    schema_collection = MockBaseSchemaCollection(
        name, schema_loader, AggregateSchema.ATTRIBUTE_FIELDS)
    item_collection = MockBaseItemCollection(schema_collection,
                                             EvaluationContext())
    item_collection.run_evaluate()
    assert item_collection.event_count == 0
Exemplo n.º 28
0
def test_block_aggregate_schema_missing_split_attribute_adds_error(
        schema_spec, store_spec):
    del schema_spec[BlockAggregateSchema.ATTRIBUTE_SPLIT]

    schema_loader = SchemaLoader()
    name = schema_loader.add_schema_spec(schema_spec)
    schema_loader.add_schema_spec(store_spec, name)
    schema = BlockAggregateSchema(name, schema_loader)

    assert 1 == len(schema.errors)
    assert isinstance(schema.errors[0], RequiredAttributeError)
    assert BlockAggregateSchema.ATTRIBUTE_SPLIT == schema.errors[0].attribute
Exemplo n.º 29
0
def test_field_evaluate_implicit_typecast_bool():
    schema_loader = SchemaLoader()
    name = schema_loader.add_schema_spec({
        'Name': 'max_attempts',
        'Type': Type.BOOLEAN,
        'Value': '1+2'
    })
    field_schema = BooleanFieldSchema(name, schema_loader)
    field = Field(field_schema, EvaluationContext())
    field.run_evaluate()

    assert field._snapshot is True
Exemplo n.º 30
0
def test_field_evaluate_implicit_typecast_integer():
    schema_loader = SchemaLoader()
    name = schema_loader.add_schema_spec({
        'Name': 'max_attempts',
        'Type': Type.INTEGER,
        'Value': '23.45'
    })
    field_schema = IntegerFieldSchema(name, schema_loader)
    field = Field(field_schema, EvaluationContext())
    field.run_evaluate()

    assert field._snapshot == 23