class Connection(base.Connection): """Base event Connection class for MongoDB driver.""" CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES, COMMON_AVAILABLE_CAPABILITIES) STORAGE_CAPABILITIES = utils.update_nested( base.Connection.STORAGE_CAPABILITIES, AVAILABLE_STORAGE_CAPABILITIES, ) def record_events(self, event_models): """Write the events to database. :param event_models: a list of models.Event objects. """ error = None for event_model in event_models: traits = [] if event_model.traits: for trait in event_model.traits: traits.append({ 'trait_name': trait.name, 'trait_type': trait.dtype, 'trait_value': trait.value }) try: self.db.event.insert_one({ '_id': event_model.message_id, 'event_type': event_model.event_type, 'timestamp': event_model.generated, 'traits': traits, 'raw': event_model.raw }) except pymongo.errors.DuplicateKeyError as ex: LOG.info(_LI("Duplicate event detected, skipping it: %s") % ex) except Exception as ex: LOG.exception(_LE("Failed to record event: %s") % ex) error = ex if error: raise error def get_events(self, event_filter, pagination=None): """Return an iter of models.Event objects. :param event_filter: storage.EventFilter object, consists of filters for events that are stored in database. :param pagination: Pagination parameters. """ limit = None if pagination: if pagination.get('sort'): LOG.warning(_LW('Driver does not support sort functionality')) limit = pagination.get('limit') if limit == 0: return q = pymongo_utils.make_events_query_from_filter(event_filter) if limit is not None: results = self.db.event.find(q, limit=limit) else: results = self.db.event.find(q) for event in results: traits = [] for trait in event['traits']: traits.append( models.Trait(name=trait['trait_name'], dtype=int(trait['trait_type']), value=trait['trait_value'])) yield models.Event(message_id=event['_id'], event_type=event['event_type'], generated=event['timestamp'], traits=traits, raw=event.get('raw')) def get_event_types(self): """Return all event types as an iter of strings.""" return self.db.event.distinct('event_type') def get_trait_types(self, event_type): """Return a dictionary containing the name and data type of the trait. Only trait types for the provided event_type are returned. :param event_type: the type of the Event. """ trait_names = set() events = self.db.event.find({'event_type': event_type}) for event in events: for trait in event['traits']: trait_name = trait['trait_name'] if trait_name not in trait_names: # Here we check that our method return only unique # trait types. Method will return only one trait type. It # is proposed that certain trait name could have only one # trait type. trait_names.add(trait_name) yield { 'name': trait_name, 'data_type': trait['trait_type'] } def get_traits(self, event_type, trait_name=None): """Return all trait instances associated with an event_type. If trait_type is specified, only return instances of that trait type. :param event_type: the type of the Event to filter by :param trait_name: the name of the Trait to filter by """ if not trait_name: events = self.db.event.find({'event_type': event_type}) else: # We choose events that simultaneously have event_type and certain # trait_name, and retrieve events contains only mentioned traits. events = self.db.event.find( { '$and': [{ 'event_type': event_type }, { 'traits.trait_name': trait_name }] }, {'traits': { '$elemMatch': { 'trait_name': trait_name } }}) for event in events: for trait in event['traits']: yield models.Trait(name=trait['trait_name'], dtype=trait['trait_type'], value=trait['trait_value'])
class Connection(base.Connection): """Put the event data into a SQLAlchemy database. Tables:: - EventType - event definition - { id: event type id desc: description of event } - Event - event data - { id: event id message_id: message id generated = timestamp of event event_type_id = event type -> eventtype.id } - TraitInt - int trait value - { event_id: event -> event.id key: trait name value: integer value } - TraitDatetime - datetime trait value - { event_id: event -> event.id key: trait name value: datetime value } - TraitText - text trait value - { event_id: event -> event.id key: trait name value: text value } - TraitFloat - float trait value - { event_id: event -> event.id key: trait name value: float value } """ CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES, AVAILABLE_CAPABILITIES) STORAGE_CAPABILITIES = utils.update_nested( base.Connection.STORAGE_CAPABILITIES, AVAILABLE_STORAGE_CAPABILITIES, ) def __init__(self, url, conf): # Set max_retries to 0, since oslo.db in certain cases may attempt # to retry making the db connection retried max_retries ^ 2 times # in failure case and db reconnection has already been implemented # in storage.__init__.get_connection_from_config function options = dict(conf) options['max_retries'] = 0 # oslo.db doesn't support options defined by Panko for opt in storage.OPTS: options.pop(opt.name, None) self._engine_facade = db_session.EngineFacade(url, **options) def upgrade(self): engine = self._engine_facade.get_engine() models.Base.metadata.create_all(engine) def clear(self): engine = self._engine_facade.get_engine() for table in reversed(models.Base.metadata.sorted_tables): engine.execute(table.delete()) engine.dispose() def _get_or_create_event_type(self, event_type, session=None): """Check if an event type with the supplied name is already exists. If not, we create it and return the record. This may result in a flush. """ try: if session is None: session = self._engine_facade.get_session() with session.begin(subtransactions=True): et = session.query(models.EventType).filter( models.EventType.desc == event_type).first() if not et: et = models.EventType(event_type) session.add(et) except dbexc.DBDuplicateEntry: et = self._get_or_create_event_type(event_type, session) return et def record_events(self, event_models): """Write the events to SQL database via sqlalchemy. :param event_models: a list of model.Event objects. """ session = self._engine_facade.get_session() error = None for event_model in event_models: event = None try: with session.begin(): event_type = self._get_or_create_event_type( event_model.event_type, session=session) event = models.Event(event_model.message_id, event_type, event_model.generated, event_model.raw) session.add(event) session.flush() if event_model.traits: trait_map = {} for trait in event_model.traits: if trait_map.get(trait.dtype) is None: trait_map[trait.dtype] = [] trait_map[trait.dtype].append({ 'event_id': event.id, 'key': trait.name, 'value': trait.value }) for dtype in trait_map.keys(): model = TRAIT_ID_TO_MODEL[dtype] session.execute(model.__table__.insert(), trait_map[dtype]) except dbexc.DBDuplicateEntry as e: LOG.info(_LI("Duplicate event detected, skipping it: %s") % e) except KeyError as e: LOG.exception(_LE('Failed to record event: %s') % e) except Exception as e: LOG.exception(_LE('Failed to record event: %s') % e) error = e if error: raise error def _get_pagination_query(self, query, pagination, api_model, model): limit = pagination.get('limit') marker = None if pagination.get('marker'): marker_filter = event_storage.EventFilter( message_id=pagination.get('marker')) markers = list(self.get_events(marker_filter)) if markers: marker = markers[0] else: raise storage.InvalidMarker('Marker %s not found.' % pagination['marker']) if not pagination.get('sort'): pagination['sort'] = api_model.DEFAULT_SORT sort_keys = [s[0] for s in pagination['sort']] sort_dirs = [s[1] for s in pagination['sort']] return oslo_sql_utils.paginate_query(query, model, limit, sort_keys, sort_dirs=sort_dirs, marker=marker) def get_events(self, event_filter, pagination=None): """Return an iterable of model.Event objects. :param event_filter: EventFilter instance :param pagination: Pagination parameters. """ pagination = pagination or {} session = self._engine_facade.get_session() with session.begin(): # Build up the join conditions event_join_conditions = [ models.EventType.id == models.Event.event_type_id ] if event_filter.event_type: event_join_conditions.append( models.EventType.desc == event_filter.event_type) # Build up the where conditions event_filter_conditions = [] if event_filter.message_id: event_filter_conditions.append( models.Event.message_id == event_filter.message_id) if event_filter.start_timestamp: event_filter_conditions.append( models.Event.generated >= event_filter.start_timestamp) if event_filter.end_timestamp: event_filter_conditions.append( models.Event.generated <= event_filter.end_timestamp) trait_subq = None # Build trait filter if event_filter.traits_filter: filters = list(event_filter.traits_filter) trait_filter = filters.pop() key = trait_filter.pop('key') op = trait_filter.pop('op', 'eq') trait_type, value = list(trait_filter.items())[0] trait_subq = _build_trait_query(session, trait_type, key, value, op) for trait_filter in filters: key = trait_filter.pop('key') op = trait_filter.pop('op', 'eq') trait_type, value = list(trait_filter.items())[0] q = _build_trait_query(session, trait_type, key, value, op) trait_subq = trait_subq.filter( trait_subq.subquery().c.ev_id == q.subquery().c.ev_id) trait_subq = trait_subq.subquery() query = (session.query(models.Event.id).join( models.EventType, sa.and_(*event_join_conditions))) if trait_subq is not None: query = query.join(trait_subq, trait_subq.c.ev_id == models.Event.id) if event_filter.admin_proj: no_proj_q = session.query(models.TraitText.event_id).filter( models.TraitText.key == 'project_id') admin_q = (session.query( models.TraitText.event_id).filter(~sa.exists().where( models.TraitText.event_id == no_proj_q.subquery().c.event_id)).union( session.query(models.TraitText.event_id).filter( sa.and_( models.TraitText.key == 'project_id', models.TraitText.value == event_filter.admin_proj, models.Event.id == models.TraitText.event_id)))) query = query.filter(sa.exists().where( models.Event.id == admin_q.subquery().c.trait_text_event_id)) if event_filter_conditions: query = query.filter(sa.and_(*event_filter_conditions)) query = self._get_pagination_query(query, pagination, api_models.Event, models.Event) event_list = collections.OrderedDict() # get a list of all events that match filters for (id_, generated, message_id, desc, raw) in query.add_columns( models.Event.generated, models.Event.message_id, models.EventType.desc, models.Event.raw).all(): event_list[id_] = api_models.Event(message_id, desc, generated, [], raw) # Query all traits related to events. # NOTE (gordc): cast is done because pgsql defaults to TEXT when # handling unknown values such as null. trait_q = (session.query( models.TraitDatetime.event_id, models.TraitDatetime.key, models.TraitDatetime.value, sa.cast(sa.null(), sa.Integer), sa.cast(sa.null(), sa.Float(53)), sa.cast(sa.null(), sa.String(255))).filter(sa.exists().where( models.TraitDatetime.event_id == query.subquery().c.id)) ).union_all( session.query(models.TraitInt.event_id, models.TraitInt.key, sa.null(), models.TraitInt.value, sa.null(), sa.null()).filter(sa.exists().where( models.TraitInt.event_id == query.subquery().c.id)), session.query(models.TraitFloat.event_id, models.TraitFloat.key, sa.null(), sa.null(), models.TraitFloat.value, sa.null()).filter(sa.exists().where( models.TraitFloat.event_id == query.subquery().c.id)), session.query(models.TraitText.event_id, models.TraitText.key, sa.null(), sa.null(), sa.null(), models.TraitText.value).filter( sa.exists().where( models.TraitText.event_id == query.subquery().c.id))) for id_, key, t_date, t_int, t_float, t_text in (trait_q.order_by( models.TraitDatetime.key)).all(): if t_int is not None: dtype = api_models.Trait.INT_TYPE val = t_int elif t_float is not None: dtype = api_models.Trait.FLOAT_TYPE val = t_float elif t_date is not None: dtype = api_models.Trait.DATETIME_TYPE val = t_date else: dtype = api_models.Trait.TEXT_TYPE val = t_text try: trait_model = api_models.Trait(key, dtype, val) event_list[id_].append_trait(trait_model) except KeyError: # NOTE(gordc): this is expected as we do not set REPEATABLE # READ (bug 1506717). if query is run while recording new # event data, trait query may return more data than event # query. they can be safely discarded. pass return event_list.values() def get_event_types(self): """Return all event types as an iterable of strings.""" session = self._engine_facade.get_session() with session.begin(): query = (session.query(models.EventType.desc).order_by( models.EventType.desc)) for name in query.all(): # The query returns a tuple with one element. yield name[0] def get_trait_types(self, event_type): """Return a dictionary containing the name and data type of the trait. Only trait types for the provided event_type are returned. :param event_type: the type of the Event """ session = self._engine_facade.get_session() with session.begin(): for trait_model in [ models.TraitText, models.TraitInt, models.TraitFloat, models.TraitDatetime ]: query = (session.query(trait_model.key).join( models.Event, models.Event.id == trait_model.event_id).join( models.EventType, sa.and_( models.EventType.id == models.Event.event_type_id, models.EventType.desc == event_type)).distinct()) dtype = TRAIT_MODEL_TO_ID.get(trait_model) for row in query.all(): yield {'name': row[0], 'data_type': dtype} def get_traits(self, event_type, trait_type=None): """Return all trait instances associated with an event_type. If trait_type is specified, only return instances of that trait type. :param event_type: the type of the Event to filter by :param trait_type: the name of the Trait to filter by """ session = self._engine_facade.get_session() with session.begin(): for trait_model in [ models.TraitText, models.TraitInt, models.TraitFloat, models.TraitDatetime ]: query = (session.query( trait_model.key, trait_model.value).join( models.Event, models.Event.id == trait_model.event_id).join( models.EventType, sa.and_( models.EventType.id == models.Event.event_type_id, models.EventType.desc == event_type)).order_by( trait_model.key)) if trait_type: query = query.filter(trait_model.key == trait_type) dtype = TRAIT_MODEL_TO_ID.get(trait_model) for k, v in query.all(): yield api_models.Trait(name=k, dtype=dtype, value=v) def clear_expired_event_data(self, ttl): """Clear expired data from the backend storage system. Clearing occurs according to the time-to-live. :param ttl: Number of seconds to keep records for. """ session = self._engine_facade.get_session() with session.begin(): end = timeutils.utcnow() - datetime.timedelta(seconds=ttl) event_q = (session.query( models.Event.id).filter(models.Event.generated < end)) event_subq = event_q.subquery() for trait_model in [ models.TraitText, models.TraitInt, models.TraitFloat, models.TraitDatetime ]: (session.query(trait_model).filter( trait_model.event_id.in_(event_subq)).delete( synchronize_session="fetch")) event_rows = event_q.delete() # remove EventType and TraitType with no corresponding # matching events and traits (session.query(models.EventType).filter( ~models.EventType.events.any()).delete( synchronize_session="fetch")) LOG.info(_LI("%d events are removed from database"), event_rows)
class Connection(base.Connection): """Put the event data into an ElasticSearch db. Events in ElasticSearch are indexed by day and stored by event_type. An example document:: {"_index":"events_2014-10-21", "_type":"event_type0", "_id":"dc90e464-65ab-4a5d-bf66-ecb956b5d779", "_score":1.0, "_source":{"timestamp": "2014-10-21T20:02:09.274797" "traits": {"id4_0": "2014-10-21T20:02:09.274797", "id3_0": 0.7510790937279408, "id2_0": 5, "id1_0": "18c97ba1-3b74-441a-b948-a702a30cbce2"} } } """ CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES, AVAILABLE_CAPABILITIES) STORAGE_CAPABILITIES = utils.update_nested( base.Connection.STORAGE_CAPABILITIES, AVAILABLE_STORAGE_CAPABILITIES, ) index_name = 'events' # NOTE(gordc): mainly for testing, data is not searchable after write, # it is only searchable after periodic refreshes. _refresh_on_write = False def __init__(self, url, conf): url_split = netutils.urlsplit(url) self.conn = es.Elasticsearch(url_split.netloc) def upgrade(self): iclient = es.client.IndicesClient(self.conn) ts_template = { 'template': '*', 'mappings': { '_default_': { 'properties': { 'traits': { 'type': 'nested' } } } } } iclient.put_template(name='enable_timestamp', body=ts_template) def record_events(self, events): def _build_bulk_index(event_list): for ev in event_list: traits = {t.name: t.value for t in ev.traits} yield { '_op_type': 'create', '_index': '%s_%s' % (self.index_name, ev.generated.date().isoformat()), '_type': ev.event_type, '_id': ev.message_id, '_source': { 'timestamp': ev.generated.isoformat(), 'traits': traits, 'raw': ev.raw } } error = None for ok, result in helpers.streaming_bulk(self.conn, _build_bulk_index(events)): if not ok: __, result = result.popitem() if result['status'] == 409: LOG.info( _LI('Duplicate event detected, skipping it: %s') % result) else: LOG.exception(_LE('Failed to record event: %s') % result) error = storage.StorageUnknownWriteError(result) if self._refresh_on_write: self.conn.indices.refresh(index='%s_*' % self.index_name) while self.conn.cluster.pending_tasks(local=True)['tasks']: pass if error: raise error def _make_dsl_from_filter(self, indices, ev_filter): q_args = {} filters = [] if ev_filter.start_timestamp: filters.append({ 'range': { 'timestamp': { 'ge': ev_filter.start_timestamp.isoformat() } } }) while indices[0] < ( '%s_%s' % (self.index_name, ev_filter.start_timestamp.date().isoformat())): del indices[0] if ev_filter.end_timestamp: filters.append({ 'range': { 'timestamp': { 'le': ev_filter.end_timestamp.isoformat() } } }) while indices[-1] > ( '%s_%s' % (self.index_name, ev_filter.end_timestamp.date().isoformat())): del indices[-1] q_args['index'] = indices if ev_filter.event_type: q_args['doc_type'] = ev_filter.event_type if ev_filter.message_id: filters.append({'term': {'_id': ev_filter.message_id}}) if ev_filter.traits_filter or ev_filter.admin_proj: or_cond = [] trait_filters = [] for t_filter in ev_filter.traits_filter or []: value = None for val_type in ['integer', 'string', 'float', 'datetime']: if t_filter.get(val_type): value = t_filter.get(val_type) if isinstance(value, six.string_types): value = value.lower() elif isinstance(value, datetime.datetime): value = value.isoformat() break if t_filter.get('op') in ['gt', 'ge', 'lt', 'le']: op = (t_filter.get('op').replace('ge', 'gte').replace( 'le', 'lte')) trait_filters.append({ 'range': { "traits.%s" % t_filter['key']: { op: value } } }) else: tf = { "query": { "query_string": { "query": "traits.%s: \"%s\"" % (t_filter['key'], value) } } } if t_filter.get('op') == 'ne': tf = {"not": tf} trait_filters.append(tf) if ev_filter.admin_proj: or_cond = [{ 'missing': { 'field': 'traits.project_id' } }, { 'term': { 'traits.project_id': ev_filter.admin_proj } }] filters.append({ 'nested': { 'path': 'traits', 'query': { 'filtered': { 'filter': { 'bool': { 'must': trait_filters, 'should': or_cond } } } } } }) q_args['body'] = { 'query': { 'filtered': { 'filter': { 'bool': { 'must': filters } } } } } return q_args def get_events(self, event_filter, pagination=None): limit = None if pagination: if pagination.get('sort'): LOG.warning(_LW('Driver does not support sort functionality')) limit = pagination.get('limit') if limit == 0: return iclient = es.client.IndicesClient(self.conn) indices = iclient.get_mapping('%s_*' % self.index_name).keys() if indices: filter_args = self._make_dsl_from_filter(indices, event_filter) if limit is not None: filter_args['size'] = limit results = self.conn.search( fields=['_id', 'timestamp', '_type', '_source'], sort='timestamp:asc', **filter_args) trait_mappings = {} for record in results['hits']['hits']: trait_list = [] if not record['_type'] in trait_mappings: trait_mappings[record['_type']] = list( self.get_trait_types(record['_type'])) for key in record['_source']['traits'].keys(): value = record['_source']['traits'][key] for t_map in trait_mappings[record['_type']]: if t_map['name'] == key: dtype = t_map['data_type'] break else: dtype = models.Trait.TEXT_TYPE trait_list.append( models.Trait(name=key, dtype=dtype, value=models.Trait.convert_value( dtype, value))) gen_ts = timeutils.normalize_time( timeutils.parse_isotime(record['_source']['timestamp'])) yield models.Event(message_id=record['_id'], event_type=record['_type'], generated=gen_ts, traits=sorted( trait_list, key=operator.attrgetter('dtype')), raw=record['_source']['raw']) def get_event_types(self): iclient = es.client.IndicesClient(self.conn) es_mappings = iclient.get_mapping('%s_*' % self.index_name) seen_types = set() for index in es_mappings.keys(): for ev_type in es_mappings[index]['mappings'].keys(): seen_types.add(ev_type) # TODO(gordc): tests assume sorted ordering but backends are not # explicitly ordered. # NOTE: _default_ is a type that appears in all mappings but is not # real 'type' seen_types.discard('_default_') return sorted(list(seen_types)) @staticmethod def _remap_es_types(d_type): if d_type == 'string': d_type = 'text' elif d_type == 'long': d_type = 'int' elif d_type == 'double': d_type = 'float' elif d_type == 'date' or d_type == 'date_time': d_type = 'datetime' return d_type def get_trait_types(self, event_type): iclient = es.client.IndicesClient(self.conn) es_mappings = iclient.get_mapping('%s_*' % self.index_name) seen_types = [] for index in es_mappings.keys(): # if event_type exists in index and has traits if (es_mappings[index]['mappings'].get(event_type) and es_mappings[index]['mappings'][event_type] ['properties']['traits'].get('properties')): for t_type in (es_mappings[index]['mappings'][event_type] ['properties']['traits']['properties'].keys()): d_type = ( es_mappings[index]['mappings'][event_type] ['properties']['traits']['properties'][t_type]['type']) d_type = models.Trait.get_type_by_name( self._remap_es_types(d_type)) if (t_type, d_type) not in seen_types: yield {'name': t_type, 'data_type': d_type} seen_types.append((t_type, d_type)) def get_traits(self, event_type, trait_type=None): t_types = dict((res['name'], res['data_type']) for res in self.get_trait_types(event_type)) if not t_types or (trait_type and trait_type not in t_types.keys()): return result = self.conn.search('%s_*' % self.index_name, event_type) for ev in result['hits']['hits']: if trait_type and ev['_source']['traits'].get(trait_type): yield models.Trait(name=trait_type, dtype=t_types[trait_type], value=models.Trait.convert_value( t_types[trait_type], ev['_source']['traits'][trait_type])) else: for trait in ev['_source']['traits'].keys(): yield models.Trait(name=trait, dtype=t_types[trait], value=models.Trait.convert_value( t_types[trait], ev['_source']['traits'][trait]))
class Connection(hbase_base.Connection, base.Connection): """Put the event data into a HBase database Collections: - events: - row_key: timestamp of event's generation + uuid of event in format: "%s:%s" % (ts, Event.message_id) - Column Families: f: contains the following qualifiers: - event_type: description of event's type - timestamp: time stamp of event generation - all traits for this event in format: .. code-block:: python "%s:%s" % (trait_name, trait_type) """ CAPABILITIES = utils.update_nested(base.Connection.CAPABILITIES, AVAILABLE_CAPABILITIES) STORAGE_CAPABILITIES = utils.update_nested( base.Connection.STORAGE_CAPABILITIES, AVAILABLE_STORAGE_CAPABILITIES, ) _memory_instance = None EVENT_TABLE = "event" def upgrade(self): tables = [self.EVENT_TABLE] column_families = {'f': dict(max_versions=1)} with self.conn_pool.connection() as conn: hbase_utils.create_tables(conn, tables, column_families) def clear(self): LOG.debug('Dropping HBase schema...') with self.conn_pool.connection() as conn: for table in [self.EVENT_TABLE]: try: conn.disable_table(table) except Exception: LOG.debug('Cannot disable table but ignoring error') try: conn.delete_table(table) except Exception: LOG.debug('Cannot delete table but ignoring error') def record_events(self, event_models): """Write the events to Hbase. :param event_models: a list of models.Event objects. """ error = None with self.conn_pool.connection() as conn: events_table = conn.table(self.EVENT_TABLE) for event_model in event_models: # Row key consists of timestamp and message_id from # models.Event or purposes of storage event sorted by # timestamp in the database. ts = event_model.generated row = hbase_utils.prepare_key( hbase_utils.timestamp(ts, reverse=False), event_model.message_id) event_type = event_model.event_type traits = {} if event_model.traits: for trait in event_model.traits: key = hbase_utils.prepare_key(trait.name, trait.dtype) traits[key] = trait.value record = hbase_utils.serialize_entry(traits, event_type=event_type, timestamp=ts, raw=event_model.raw) try: events_table.put(row, record) except Exception as ex: LOG.exception(_LE("Failed to record event: %s") % ex) error = ex if error: raise error def get_events(self, event_filter, pagination=None): """Return an iter of models.Event objects. :param event_filter: storage.EventFilter object, consists of filters for events that are stored in database. :param pagination: Pagination parameters. """ limit = None if pagination: if pagination.get('sort'): LOG.warning(_LW('Driver does not support sort functionality')) limit = pagination.get('limit') if limit == 0: return q, start, stop = hbase_utils.make_events_query_from_filter( event_filter) with self.conn_pool.connection() as conn: events_table = conn.table(self.EVENT_TABLE) gen = events_table.scan(filter=q, row_start=start, row_stop=stop, limit=limit) for event_id, data in gen: traits = [] events_dict = hbase_utils.deserialize_entry(data)[0] for key, value in events_dict.items(): if isinstance(key, tuple): trait_name, trait_dtype = key traits.append(models.Trait(name=trait_name, dtype=int(trait_dtype), value=value)) ts, mess = event_id.split(':') yield models.Event( message_id=hbase_utils.unquote(mess), event_type=events_dict['event_type'], generated=events_dict['timestamp'], traits=sorted(traits, key=operator.attrgetter('dtype')), raw=events_dict['raw'] ) def get_event_types(self): """Return all event types as an iterable of strings.""" with self.conn_pool.connection() as conn: events_table = conn.table(self.EVENT_TABLE) gen = events_table.scan() event_types = set() for event_id, data in gen: events_dict = hbase_utils.deserialize_entry(data)[0] for key, value in events_dict.items(): if not isinstance(key, tuple) and key.startswith('event_type'): if value not in event_types: event_types.add(value) yield value def get_trait_types(self, event_type): """Return a dictionary containing the name and data type of the trait. Only trait types for the provided event_type are returned. :param event_type: the type of the Event """ q = hbase_utils.make_query(event_type=event_type) trait_names = set() with self.conn_pool.connection() as conn: events_table = conn.table(self.EVENT_TABLE) gen = events_table.scan(filter=q) for event_id, data in gen: events_dict = hbase_utils.deserialize_entry(data)[0] for key, value in events_dict.items(): if isinstance(key, tuple): trait_name, trait_type = key if trait_name not in trait_names: # Here we check that our method return only unique # trait types, for ex. if it is found the same trait # types in different events with equal event_type, # method will return only one trait type. It is # proposed that certain trait name could have only one # trait type. trait_names.add(trait_name) data_type = models.Trait.type_names[int(trait_type)] yield {'name': trait_name, 'data_type': data_type} def get_traits(self, event_type, trait_type=None): """Return all trait instances associated with an event_type. If trait_type is specified, only return instances of that trait type. :param event_type: the type of the Event to filter by :param trait_type: the name of the Trait to filter by """ q = hbase_utils.make_query(event_type=event_type, trait_type=trait_type) with self.conn_pool.connection() as conn: events_table = conn.table(self.EVENT_TABLE) gen = events_table.scan(filter=q) for event_id, data in gen: events_dict = hbase_utils.deserialize_entry(data)[0] for key, value in events_dict.items(): if isinstance(key, tuple): trait_name, trait_type = key yield models.Trait(name=trait_name, dtype=int(trait_type), value=value)