def kibana_handle_schema_change(
        tenant: str,
        alias_name: str,
        schema_old: Mapping[Any, Any],
        schema_new: Mapping[Any, Any],
        subscription: Mapping[str, Any],  # Subscription.definition
        es_index: Mapping[Any, Any],
        es_conn,
        kibana_conn):
    node_new = Node(schema_new)
    kibana_index = make_kibana_index(alias_name, node_new)
    schema_name = schema_new.get('name')
    if schema_old is not None:
        if schema_old.get('name'):
            schema_name = schema_old.get('name')
        node_old = Node(schema_old)
        if Node.compare(node_old, node_new) == {}:
            return False  # schema not substantially different
    if not check_for_kibana_update(schema_name, tenant, alias_name,
                                   subscription, kibana_index, es_index,
                                   es_conn, kibana_conn):
        return False

    return update_kibana_index(tenant, alias_name, schema_new, subscription,
                               kibana_index, es_index, es_conn, kibana_conn)
def get_es_types_from_schema(schema: Node):
    # since we handle union types, we sort these in increasing importance
    # to ES's handling of them. I.E. if it can be an object or a string,
    # it's more tolerant to treat it as an object, etc.
    mappings = {}
    # basic avro types
    for avro_type, es_type in config.AVRO_TYPES:
        matches = [
            i for i in schema.find_children(
                {'attr_contains': [{
                    'avro_type': avro_type
                }]})
        ]
        __handle_mapping_addition(matches, mappings, avro_type, es_type)
    # logical avro types
    for avro_type, es_type in config.AVRO_LOGICAL_TYPES:
        matches = [
            i for i in schema.find_children(
                {'attr_contains': [{
                    'logical_type': avro_type
                }]})
        ]
        __handle_mapping_addition(matches, mappings, avro_type, es_type)
    # aether types
    for aether_type, es_type in config.AETHER_TYPES:
        matches = [
            i for i in schema.find_children(
                {'match_attr': [{
                    '__extended_type': aether_type
                }]})
        ]
        __handle_mapping_addition(matches, mappings, aether_type, es_type)
    return mappings
示例#3
0
def auto_visualizations(
    alias_name: str,
    alias_index: str,
    node: Node,
    subscription: Mapping[str, Any],  # Subscription.definition
    path_filters: List[Callable[[str], bool]] = __default_path_filters()
) -> Dict[str, Any]:
    LOG.debug(f'Getting visualizations for {alias_name}')
    visualizations = {}
    for _type in _supported_types():
        handlers = _vis_for_type(_type)
        for vis_type, fn in handlers:
            if _type in AETHER_TYPES:
                paths = [
                    i for i in node.find_children(
                        {'match_attr': [{
                            '__extended_type': _type
                        }]})
                ]
            elif _type in AVRO_TYPES:
                paths = [
                    i for i in node.find_children(
                        {'attr_contains': [{
                            'avro_type': _type
                        }]})
                ]

            title_template = '{alias} {form_name} ({field_name} -> {vis_type})'
            id_template = '{alias}_{form_name}_{field_name}_{vis_type}'

            for path in paths:
                if path_filters and not all([fn(path) for fn in path_filters]):
                    LOG.debug(f'{path} ignored for visualization (filtered).')
                    continue
                LOG.debug(f'visualizing path -> {path}')
                form_name = index_handler.get_formname(path)
                field_name = index_handler.remove_formname(path)
                title = title_template.format(alias=alias_name.capitalize(),
                                              form_name=form_name,
                                              field_name=field_name,
                                              vis_type=vis_type.capitalize())
                _id = id_template.format(alias=alias_name,
                                         form_name=form_name.lower(),
                                         field_name=field_name.lower(),
                                         vis_type=vis_type.lower())
                res = fn(title=title,
                         alias=alias_index,
                         field_name=field_name,
                         node=node.get_node(path),
                         subscription=subscription)
                visualizations[_id] = res
    return visualizations
示例#4
0
 def find_path_in_schema(self, schema: Node, test):
     _base_name = f'{schema.name}.'
     matches = [
         i[len(_base_name):] if i.startswith(_base_name) else i
         for i in schema.find_children({'match_attr': [{
             'name': test
         }]})
     ]
     return matches if matches else []
示例#5
0
def test__comparison_nested_attr(ComplexSchema):
    a = deepcopy(ComplexSchema)
    b = deepcopy(ComplexSchema)
    path = 'operator_type'
    # change a node's attribute
    a.children[path].__lookup = [{"something": "else"}]
    # I don't always trust deepcopy...
    assert (a.children[path].__lookup != b.children[path].__lookup)
    res = Node.compare(b, a)
    assert (any([path in k for k in res.keys()]))
示例#6
0
 def __init__(self,
              schema: Mapping[Any, Any] = None,
              node: Node = None,
              raw_schema: str = None):
     if not any([schema, node, raw_schema]):
         raise ValueError(
             'Must include one of: schema (dict) node (Node) or raw_schema (JSON)'
         )
     if node:
         schema = node
     else:
         if schema:
             schema = Node(schema)
         else:
             schema = Node(json.loads(raw_schema))
     self._base_name = schema.name
     self.load_defaults()
     self.schema = schema
     self.spavro_schema = parse_schema(self.schema._source)
示例#7
0
def schema_defined_visualizations(
        alias_name: str,
        alias_index: str,
        node: Node,
        subscription: Mapping[str, Any],  # Subscription.definition
) -> Dict[str, Any]:
    visualizations = {}
    paths = [
        i
        for i in node.find_children({'has_attr': ['__default_visualization']})
    ]
    LOG.debug(f'schemas found at paths {paths}')
    title_template = '{alias} {form_name} ({field_name} -> {vis_type})'
    id_template = '{alias}_{form_name}_{field_name}_{vis_type}'
    for path in paths:
        target_node = node.get_node(path)
        vis_name = target_node.__default_visualization
        if vis_name not in SCHEMA_VIS_MAP:
            LOG.debug(
                f'@path: {path} has preferred type {vis_name}. No handler found'
            )
            continue
        vis_type, fn = SCHEMA_VIS_MAP.get(vis_name)
        LOG.debug(f'visualizing path -> {path}')
        form_name = index_handler.get_formname(path)
        field_name = index_handler.remove_formname(path)
        title = title_template.format(alias=alias_name.capitalize(),
                                      form_name=form_name,
                                      field_name=field_name,
                                      vis_type=vis_type.capitalize())
        _id = id_template.format(alias=alias_name,
                                 form_name=form_name.lower(),
                                 field_name=field_name.lower(),
                                 vis_type=vis_type.lower())
        res = fn(title=title,
                 alias=alias_index,
                 field_name=field_name,
                 node=target_node,
                 subscription=subscription)
        visualizations[_id] = res
    return visualizations
def test__process_geo_field():
    to_test = [
        [TYPE_INSTRUCTIONS, AUTOGEN_SCHEMA, SAMPLE_DOC, 'autogen'],
        [TYPE_INSTRUCTIONS, SIMPLE_SCHEMA, SAMPLE_DOC2, 'simple']
    ]
    for instr, schema, doc, name in to_test:
        node = Node(schema)
        processor = ESItemProcessor(name, instr, node)
        # processor.schema_obj = schema
        # processor.load()
        res = processor._find_geopoints()
        assert(res.get('lat') is not None)
        doc = processor.process(doc)
        assert(doc.get('geo_point').get('lon') is not None)
def _format_lookups(schema: Node, default='Other', strip_form_name=True):
    matching = schema.collect_matching({'has_attr': ['__lookup']})
    if not matching:
        return {}
    if not strip_form_name:
        return {
            key: _format_single_lookup(node, default)
            for key, node in matching
        }
    else:
        return {
            remove_formname(key): _format_single_lookup(node, default)
            for key, node in matching
        }
def _find_timestamp(schema: Node):
    # takes a field matching timestamp, or the first timestamp
    matching = schema.collect_matching(
        {'match_attr': [{
            '__extended_type': 'dateTime'
        }]})
    fields = sorted([remove_formname(key) for key, node in matching])
    timestamps = [f for f in fields if 'timestamp' in f]
    preferred = consumer_config.get('es_options', {}).get('index_time', None)
    if fields and preferred in fields:
        return preferred
    elif timestamps:
        return timestamps[0]
    elif fields:
        return fields[0]
    else:
        return consumer_config.get('es_options',
                                   {}).get('auto_timestamp', None)
示例#11
0
def ComplexSchema():
    return Node(ANNOTATED_SCHEMA)  # noqa
示例#12
0
def AutoGenSchema():
    return Node(AUTOGEN_SCHEMA)  # noqa
示例#13
0
def SimpleSchema():
    return Node(SIMPLE_SCHEMA)  # noqa
示例#14
0
    def _update_topic(self, topic, schema: Mapping[Any, Any]):
        self.log.debug(f'{self.tenant} is updating topic: {topic}')
        subscription = self._job_subscription_for_topic(topic)
        if not subscription:
            self.log.error(f'Could not find subscription for topic {topic}')
            return
        node: Node = Node(schema)
        self.log.debug('getting index')
        es_index = index_handler.get_es_index_from_subscription(
            subscription.definition.get('es_options'),
            name=self._name_from_topic(topic),
            tenant=self.tenant.lower(),
            schema=node
        )
        self.log.debug(f'index {es_index}')
        alias_request = subscription.definition.get('es_options', {}).get('alias_name')
        if alias_request:
            alias = f'{alias_request}'.lower()
        else:
            alias = index_handler.get_alias_from_namespace(node.namespace)
        # Try to add the indices / ES alias
        es_instance = self._job_elasticsearch().get_session()
        if index_handler.es_index_changed(es_instance, es_index, self.tenant):
            self.log.debug(f'{self.tenant} updated schema for {topic}')
            self.log.debug(f'registering ES index:\n{json.dumps(es_index, indent=2)}')
            index_handler.update_es_index(
                es_instance,
                es_index,
                self.tenant,
                alias
            )
        conn: KibanaInstance = self._job_kibana()

        old_schema = self._schemas.get(topic)
        updated_kibana = index_handler.kibana_handle_schema_change(
            self.tenant.lower(),
            alias,
            old_schema,
            schema,
            subscription.definition,
            es_index,
            es_instance,
            conn
        )

        if updated_kibana:
            self.log.info(
                f'Registered kibana index {alias} for {self.tenant}'
            )
        else:
            self.log.info(
                f'Kibana index {alias} did not need update.'
            )

        self._indices[topic] = es_index
        self.log.debug(f'{self.tenant}:{topic} | idx: {es_index}')
        # update processor for type
        doc_type, instr = list(es_index['body']['mappings'].items())[0]
        self._doc_types[topic] = doc_type
        self._processors[topic] = ESItemProcessor(topic, instr, node)
        self._routes[topic] = self._processors[topic].create_route()
示例#15
0
def test__comparison_unhandled():
    a = datetime.now()
    b = datetime.now()
    assert (Node.compare_objects(a, b) is False)
def test__other_init_methods():
    raw_schema_gen = SampleGenerator(
        raw_schema=json.dumps(EXAMPLE_SIMPLE_SCHEMA))
    assert (raw_schema_gen.make_sample() is not None)
    node_gen = SampleGenerator(node=Node(EXAMPLE_SIMPLE_SCHEMA))
    assert (node_gen.make_sample() is not None)
def merge_kibana_artifacts(
    tenant: str,
    alias_name: str,
    schema: Mapping[Any, Any],
    subscription: Mapping[str, Any],  # Subscription.definition
    kibana_index: Mapping[Any, Any],  # individual kibana index contribution
    kibana_conn,
    old_artifact: Mapping[Any, Any] = None  # artifact describes multiple types
):
    schema_name = schema.get('name')
    index_hash = utils.hash(kibana_index)
    # TODO
    alias_index = f'{tenant}.{alias_name}'
    auto_vis_flag = subscription.get('kibana_options',
                                     {}).get('auto_visualization')
    if auto_vis_flag == 'full':
        LOG.info('Creating automatic visualizations')
        visualizations = auto_visualizations(alias_name, alias_index,
                                             Node(schema), subscription)
    elif auto_vis_flag == 'schema':
        LOG.info('Only creating vis from @aether_default_visualization')
        visualizations = schema_defined_visualizations(alias_name, alias_index,
                                                       Node(schema),
                                                       subscription)
    else:
        LOG.info('Not creating visualizations')
        visualizations = {}
    vis_hashes = {k: utils.hash(v) for k, v in visualizations.items()}

    if not old_artifact:
        # use the new one since there is no old one

        artifact = make_kibana_artifact(index={schema_name: index_hash},
                                        visualization=vis_hashes)
        return kibana_index, artifact, visualizations
    old_index_hash = old_artifact.get('hashes', {}).get('index',
                                                        {}).get(schema_name)
    old_vis_hashes = old_artifact.get('hashes', {}).get('visualization', {})
    updated_visuals = {
        key: visualizations[key]
        for key, _hash in vis_hashes.items()
        if _hash not in old_vis_hashes.values()
    }
    if updated_visuals:
        LOG.debug(f'updated visuals: {list(updated_visuals.keys())}')
    # no change, ignore
    if (old_index_hash == index_hash) and (len(updated_visuals) == 0):
        return None, None, None

    # we need to reconcile the update
    try:
        old_kibana_index = handle_kibana_artifact(alias_name,
                                                  tenant,
                                                  kibana_conn,
                                                  mode='READ',
                                                  _type='index-pattern')
    except (HTTPError, ConsumerHttpException) as her:
        LOG.info(f'Old Kibana index not found {her}')
        old_kibana_index = {}

    new_kibana_index = utils.merge_dicts(old_kibana_index, kibana_index)
    artifact = make_kibana_artifact(index={schema_name: index_hash},
                                    visualization=vis_hashes,
                                    old_artifact=old_artifact)
    return new_kibana_index, artifact, updated_visuals
示例#18
0
def test__comparison_none(SimpleSchema):
    assert (Node.compare(SimpleSchema, SimpleSchema) == {})
示例#19
0
def PolySchemaA():
    return Node(POLY_SCHEMA_A)  # noqa
示例#20
0
def SimpleSchema():
    return Node(EXAMPLE_SIMPLE_SCHEMA)
示例#21
0
def PolySchemaB():
    return Node(POLY_SCHEMA_B)  # noqa
示例#22
0
def ComplexSchema():
    return Node(EXAMPLE_ANNOTATED_SCHEMA)
示例#23
0
def AutoSchema():
    return Node(EXAMPLE_AUTOGEN_SCHEMA)
示例#24
0
def test__comparison_all(SimpleSchema):
    a = deepcopy(SimpleSchema)
    b = deepcopy(SimpleSchema)
    a.name = 'SomethingElse'  # change root node (changes all paths)
    assert (len(Node.compare(a, b)) == 15)  # all nodes