示例#1
0
def main():
    args = get_parser().parse_args()

    # Set up logging to use the console
    console = logging.StreamHandler(sys.stderr)
    formatter = logging.Formatter(
        '[%(asctime)s] %(levelname)-8s %(message)s')
    console.setFormatter(formatter)
    root_logger.addHandler(console)
    if args.debug:
        root_logger.setLevel(logging.DEBUG)
    else:
        root_logger.setLevel(logging.INFO)

    _validate_conn_options(args)

    nosql_conf = cfg.ConfigOpts()
    db_options.set_defaults(nosql_conf, args.nosql_conn)
    nosql_conf.register_opts(storage.OPTS, 'database')
    nosql_conn = storage.get_connection_from_config(nosql_conf)

    sql_conf = cfg.ConfigOpts()
    db_options.set_defaults(sql_conf, args.sql_conn)
    sql_conf.register_opts(storage.OPTS, 'database')
    sql_conn = storage.get_connection_from_config(sql_conf)

    root_logger.info(
        _LI("Starting to migrate alarms data from NoSQL to SQL..."))

    count = 0
    for alarm in nosql_conn.get_alarms():
        root_logger.debug("Migrating alarm %s..." % alarm.alarm_id)
        try:
            sql_conn.create_alarm(alarm)
            count += 1
        except exception.DBDuplicateEntry:
            root_logger.warning(_LW("Duplicated alarm %s found, skipped."),
                                alarm.alarm_id)
        if not args.migrate_history:
            continue

        history_count = 0
        for history in nosql_conn.get_alarm_changes(alarm.alarm_id, None):
            history_data = history.as_dict()
            root_logger.debug("    Migrating alarm history data with"
                              " event_id %s..." % history_data['event_id'])
            try:
                sql_conn.record_alarm_change(history_data)
                history_count += 1
            except exception.DBDuplicateEntry:
                root_logger.warning(
                    _LW("    Duplicated alarm history %s found, skipped."),
                    history_data['event_id'])
        root_logger.info(_LI("    Migrated %(count)s history data of alarm "
                             "%(alarm_id)s"),
                         {'count': history_count, 'alarm_id': alarm.alarm_id})

    root_logger.info(_LI("End alarms data migration from NoSQL to SQL, %s"
                         " alarms have been migrated."), count)
示例#2
0
def conversion():
    confirm = moves.input("This tool is used for converting the combination "
                          "alarms to composite alarms, please type 'yes' to "
                          "confirm: ")
    if confirm != 'yes':
        print("Alarm conversion aborted!")
        return
    args = get_parser().parse_args()
    conf = service.prepare_service()
    conn = storage.get_connection_from_config(conf)
    combination_alarms = list(conn.get_alarms(alarm_type='combination',
                                              alarm_id=args.alarm_id or None))
    count = 0
    for alarm in combination_alarms:
        new_name = 'From-combination: %s' % alarm.alarm_id
        n_alarm = list(conn.get_alarms(name=new_name, alarm_type='composite'))
        if n_alarm:
            LOG.warning(_LW('Alarm %(alarm)s has been already converted as '
                            'composite alarm: %(n_alarm_id)s, skipped.'),
                        {'alarm': alarm.alarm_id,
                         'n_alarm_id': n_alarm[0].alarm_id})
            continue
        try:
            composite_rule = _generate_composite_rule(conn, alarm)
        except DependentAlarmNotFound as e:
            LOG.warning(_LW('The dependent alarm %(dep_alarm)s of alarm %'
                            '(com_alarm)s not found, skipped.'),
                        {'com_alarm': e.com_alarm_id,
                         'dep_alarm': e.dependent_alarm_id})
            continue
        except UnsupportedSubAlarmType as e:
            LOG.warning(_LW('Alarm conversion from combination to composite '
                            'only support combination alarms depending '
                            'threshold alarms, the type of alarm %(alarm)s '
                            'is: %(type)s, skipped.'),
                        {'alarm': e.sub_alarm_id, 'type': e.sub_alarm_type})
            continue
        new_alarm = models.Alarm(**alarm.as_dict())
        new_alarm.alarm_id = str(uuid.uuid4())
        new_alarm.name = new_name
        new_alarm.type = 'composite'
        new_alarm.description = ('composite alarm converted from combination '
                                 'alarm: %s' % alarm.alarm_id)
        new_alarm.rule = composite_rule
        new_alarm.timestamp = datetime.datetime.now()
        conn.create_alarm(new_alarm)
        LOG.info(_LI('End Converting combination alarm %(s_alarm)s to '
                     'composite alarm %(d_alarm)s'),
                 {'s_alarm': alarm.alarm_id, 'd_alarm': new_alarm.alarm_id})
        count += 1
    if args.delete_combination_alarm:
        for alarm in combination_alarms:
            LOG.info(_LI('Deleting the combination alarm %s...'),
                     alarm.alarm_id)
            conn.delete_alarm(alarm.alarm_id)
    LOG.info(_LI('%s combination alarms have been converted to composite '
                 'alarms.'), count)
示例#3
0
    def _transition_alarm(self, alarm, state, trending_state, statistics,
                          outside_count):
        unknown = alarm.state == evaluator.UNKNOWN
        continuous = alarm.repeat_actions

        if trending_state:
            if unknown or continuous:
                state = trending_state if unknown else alarm.state
                reason, reason_data = self._reason(alarm, statistics, state,
                                                   outside_count)
                self._refresh(alarm, state, reason, reason_data)
                return

        if state == evaluator.UNKNOWN and not unknown:
            LOG.warning(
                _LW('Expecting %(expected)d datapoints but only get '
                    '%(actual)d') % {
                        'expected': alarm.rule['evaluation_periods'],
                        'actual': len(statistics)
                    })
            # Reason is not same as log message because we want to keep
            # consistent since thirdparty software may depend on old format.
            reason = _(
                '%d datapoints are unknown') % alarm.rule['evaluation_periods']
            last = None if not statistics else statistics[-1]
            reason_data = self._reason_data('unknown',
                                            alarm.rule['evaluation_periods'],
                                            last)
            self._refresh(alarm, state, reason, reason_data)

        elif state and (alarm.state != state or continuous):
            reason, reason_data = self._reason(alarm, statistics, state,
                                               outside_count)
            self._refresh(alarm, state, reason, reason_data)
示例#4
0
    def extract_my_subset(self, group_id, universal_set):
        """Filters an iterable, returning only objects assigned to this agent.

        We have a list of objects and get a list of active group members from
        `tooz`. We then hash all the objects into buckets and return only
        the ones that hashed into *our* bucket.
        """
        if not group_id:
            return universal_set
        if group_id not in self._groups:
            self.join_group(group_id)
        try:
            members = self._get_members(group_id)
            LOG.debug('Members of group: %s, Me: %s', members, self._my_id)
            if self._my_id not in members:
                LOG.warning(_LW('Cannot extract tasks because agent failed to '
                                'join group properly. Rejoining group.'))
                self.join_group(group_id)
                members = self._get_members(group_id)
                if self._my_id not in members:
                    raise MemberNotInGroupError(group_id, members, self._my_id)
                LOG.debug('Members of group: %s, Me: %s', members, self._my_id)
            hr = HashRing(members)
            LOG.debug('Universal set: %s', universal_set)
            my_subset = [v for v in universal_set
                         if hr.get_node(str(v)) == self._my_id]
            LOG.debug('My subset: %s', my_subset)
            return my_subset
        except tooz.coordination.ToozError:
            LOG.exception(_LE('Error getting group membership info from '
                              'coordination backend.'))
            return []
def setup_app(pecan_config=PECAN_CONFIG, conf=None):
    if conf is None:
        # NOTE(jd) That sucks but pecan forces us to use kwargs :(
        raise RuntimeError("Config is actually mandatory")
    # FIXME: Replace DBHook with a hooks.TransactionHook
    app_hooks = [hooks.ConfigHook(conf),
                 hooks.DBHook(
                     storage.get_connection_from_config(conf)),
                 hooks.TranslationHook()]

    pecan.configuration.set_config(dict(pecan_config), overwrite=True)

    # NOTE(sileht): pecan debug won't work in multi-process environment
    pecan_debug = conf.api.pecan_debug
    if conf.api.workers != 1 and pecan_debug:
        pecan_debug = False
        LOG.warning(_LW('pecan_debug cannot be enabled, if workers is > 1, '
                        'the value is overrided with False'))

    app = pecan.make_app(
        pecan_config['app']['root'],
        debug=pecan_debug,
        hooks=app_hooks,
        wrap_app=middleware.ParsableErrorMiddleware,
        guess_content_type_from_ext=False
    )

    return app
示例#6
0
 def _refresh(self, alarm, state, reason, reason_data, always_record=False):
     """Refresh alarm state."""
     try:
         previous = alarm.state
         alarm.state = state
         if previous != state or always_record:
             LOG.info(_('alarm %(id)s transitioning to %(state)s because '
                        '%(reason)s') % {'id': alarm.alarm_id,
                                         'state': state,
                                         'reason': reason})
             try:
                 self._storage_conn.update_alarm(alarm)
             except storage.AlarmNotFound:
                 LOG.warning(_LW("Skip updating this alarm's state, the"
                                 "alarm: %s has been deleted"),
                             alarm.alarm_id)
             else:
                 self._record_change(alarm)
             self.notifier.notify(alarm, previous, reason, reason_data)
         elif alarm.repeat_actions:
             self.notifier.notify(alarm, previous, reason, reason_data)
     except Exception:
         # retry will occur naturally on the next evaluation
         # cycle (unless alarm state reverts in the meantime)
         LOG.exception(_('alarm state update failed'))
示例#7
0
    def evaluate_events(self, events):
        """Evaluate the events by referring related alarms."""

        if not isinstance(events, list):
            events = [events]

        LOG.debug('Starting event alarm evaluation: #events = %d', len(events))
        for e in events:
            LOG.debug('Evaluating event: event = %s', e)
            try:
                event = Event(e)
            except InvalidEvent:
                LOG.warning(
                    _LW('Event <%s> is invalid, aborting evaluation '
                        'for it.'), e)
                continue

            for id, alarm in six.iteritems(
                    self._get_project_alarms(event.project)):
                try:
                    self._evaluate_alarm(alarm, event)
                except Exception:
                    LOG.exception(
                        _LE('Failed to evaluate alarm (id=%(a)s) '
                            'triggered by event = %(e)s.'), {
                                'a': id,
                                'e': e
                            })

        LOG.debug('Finished event alarm evaluation.')
示例#8
0
    def extract_my_subset(self, group_id, universal_set):
        """Filters an iterable, returning only objects assigned to this agent.

        We have a list of objects and get a list of active group members from
        `tooz`. We then hash all the objects into buckets and return only
        the ones that hashed into *our* bucket.
        """
        if not group_id:
            return universal_set
        if group_id not in self._groups:
            self.join_group(group_id)
        try:
            members = self._get_members(group_id)
            LOG.debug('Members of group: %s, Me: %s', members, self._my_id)
            if self._my_id not in members:
                LOG.warning(
                    _LW('Cannot extract tasks because agent failed to '
                        'join group properly. Rejoining group.'))
                self.join_group(group_id)
                members = self._get_members(group_id)
                if self._my_id not in members:
                    raise MemberNotInGroupError(group_id, members, self._my_id)
                LOG.debug('Members of group: %s, Me: %s', members, self._my_id)
            hr = HashRing(members)
            LOG.debug('Universal set: %s', universal_set)
            my_subset = [
                v for v in universal_set if hr.get_node(str(v)) == self._my_id
            ]
            LOG.debug('My subset: %s', my_subset)
            return my_subset
        except tooz.coordination.ToozError:
            LOG.exception(
                _LE('Error getting group membership info from '
                    'coordination backend.'))
            return []
示例#9
0
 def _refresh(self, alarm, state, reason, reason_data, always_record=False):
     """Refresh alarm state."""
     try:
         previous = alarm.state
         alarm.state = state
         if previous != state or always_record:
             LOG.info(_('alarm %(id)s transitioning to %(state)s because '
                        '%(reason)s') % {'id': alarm.alarm_id,
                                         'state': state,
                                         'reason': reason})
             try:
                 self._storage_conn.update_alarm(alarm)
             except storage.AlarmNotFound:
                 LOG.warning(_LW("Skip updating this alarm's state, the"
                                 "alarm: %s has been deleted"),
                             alarm.alarm_id)
             else:
                 self._record_change(alarm)
             self.notifier.notify(alarm, previous, reason, reason_data)
         elif alarm.repeat_actions:
             self.notifier.notify(alarm, previous, reason, reason_data)
     except Exception:
         # retry will occur naturally on the next evaluation
         # cycle (unless alarm state reverts in the meantime)
         LOG.exception(_('alarm state update failed'))
示例#10
0
文件: app.py 项目: paperandsoap/aodh
def setup_app(pecan_config=PECAN_CONFIG, conf=None):
    if conf is None:
        # NOTE(jd) That sucks but pecan forces us to use kwargs :(
        raise RuntimeError("Config is actually mandatory")
    # FIXME: Replace DBHook with a hooks.TransactionHook
    app_hooks = [
        hooks.ConfigHook(conf),
        hooks.DBHook(storage.get_connection_from_config(conf)),
        hooks.TranslationHook()
    ]

    pecan.configuration.set_config(dict(pecan_config), overwrite=True)

    # NOTE(sileht): pecan debug won't work in multi-process environment
    pecan_debug = conf.api.pecan_debug
    if conf.api.workers != 1 and pecan_debug:
        pecan_debug = False
        LOG.warning(
            _LW('pecan_debug cannot be enabled, if workers is > 1, '
                'the value is overrided with False'))

    app = pecan.make_app(pecan_config['app']['root'],
                         debug=pecan_debug,
                         hooks=app_hooks,
                         wrap_app=middleware.ParsableErrorMiddleware,
                         guess_content_type_from_ext=False)

    return app
示例#11
0
    def _transition_alarm(self, alarm, state, trending_state, statistics,
                          outside_count):
        unknown = alarm.state == evaluator.UNKNOWN
        continuous = alarm.repeat_actions

        if trending_state:
            if unknown or continuous:
                state = trending_state if unknown else alarm.state
                reason, reason_data = self._reason(alarm, statistics, state,
                                                   outside_count)
                self._refresh(alarm, state, reason, reason_data)
                return

        if state == evaluator.UNKNOWN and not unknown:
            LOG.warning(_LW('Expecting %(expected)d datapoints but only get '
                            '%(actual)d') % {
                'expected': alarm.rule['evaluation_periods'],
                'actual': len(statistics)})
            # Reason is not same as log message because we want to keep
            # consistent since thirdparty software may depend on old format.
            reason = _('%d datapoints are unknown') % alarm.rule[
                'evaluation_periods']
            last = None if not statistics else statistics[-1]
            reason_data = self._reason_data('unknown',
                                            alarm.rule['evaluation_periods'],
                                            last)
            self._refresh(alarm, state, reason, reason_data)

        elif state and (alarm.state != state or continuous):
            reason, reason_data = self._reason(alarm, statistics, state,
                                               outside_count)
            self._refresh(alarm, state, reason, reason_data)
示例#12
0
文件: app.py 项目: chungg/aodh
def setup_app(pecan_config=None, extra_hooks=None):
    # FIXME: Replace DBHook with a hooks.TransactionHook
    app_hooks = [hooks.ConfigHook(),
                 hooks.DBHook(
                     storage.get_connection_from_config(cfg.CONF, 'alarm'),),
                 hooks.TranslationHook()]
    if extra_hooks:
        app_hooks.extend(extra_hooks)

    if not pecan_config:
        pecan_config = get_pecan_config()

    pecan.configuration.set_config(dict(pecan_config), overwrite=True)

    # NOTE(sileht): pecan debug won't work in multi-process environment
    pecan_debug = CONF.api.pecan_debug
    if service.get_workers('api') != 1 and pecan_debug:
        pecan_debug = False
        LOG.warning(_LW('pecan_debug cannot be enabled, if workers is > 1, '
                        'the value is overrided with False'))

    app = pecan.make_app(
        pecan_config.app.root,
        debug=pecan_debug,
        force_canonical=getattr(pecan_config.app, 'force_canonical', True),
        hooks=app_hooks,
        wrap_app=middleware.ParsableErrorMiddleware,
        guess_content_type_from_ext=False
    )

    return app
示例#13
0
    def evaluate_events(self, events):
        """Evaluate the events by referring related alarms."""

        if not isinstance(events, list):
            events = [events]

        LOG.debug('Starting event alarm evaluation: #events = %d',
                  len(events))
        for e in events:
            LOG.debug('Evaluating event: event = %s', e)
            try:
                event = Event(e)
            except InvalidEvent:
                LOG.warning(_LW('Event <%s> is invalid, aborting evaluation '
                                'for it.'), e)
                continue

            for id, alarm in six.iteritems(
                    self._get_project_alarms(event.project)):
                try:
                    self._evaluate_alarm(alarm, event)
                except Exception:
                    LOG.exception(_LE('Failed to evaluate alarm (id=%(a)s) '
                                      'triggered by event = %(e)s.'),
                                  {'a': id, 'e': e})

        LOG.debug('Finished event alarm evaluation.')
示例#14
0
 def _statistics(self, rule, start, end):
     try:
         return self._gnocchi_client.metric.aggregation(
             metrics=rule['metrics'],
             start=start, stop=end,
             aggregation=rule['aggregation_method'])
     except Exception as e:
         LOG.warning(_LW('alarm stats retrieval failed: %s'), e)
         return []
示例#15
0
 def _statistics(self, rule, start, end):
     try:
         return self._gnocchi_client.metric.aggregation(
             metrics=rule['metrics'],
             start=start,
             stop=end,
             aggregation=rule['aggregation_method'])
     except Exception as e:
         LOG.warning(_LW('alarm stats retrieval failed: %s'), e)
         return []
示例#16
0
def create_tables(conn, tables, column_families):
    for table in tables:
        try:
            conn.create_table(table, column_families)
        except ttypes.AlreadyExists:
            if conn.table_prefix:
                table = ("%(table_prefix)s"
                         "%(separator)s"
                         "%(table_name)s" %
                         dict(table_prefix=conn.table_prefix,
                              separator=conn.table_prefix_separator,
                              table_name=table))

            LOG.warning(_LW("Cannot create table %s because "
                            "it already exists. Ignoring error"), table)
示例#17
0
def create_tables(conn, tables, column_families):
    for table in tables:
        try:
            conn.create_table(table, column_families)
        except ttypes.AlreadyExists:
            if conn.table_prefix:
                table = ("%(table_prefix)s"
                         "%(separator)s"
                         "%(table_name)s" %
                         dict(table_prefix=conn.table_prefix,
                              separator=conn.table_prefix_separator,
                              table_name=table))

            LOG.warning(
                _LW("Cannot create table %s because "
                    "it already exists. Ignoring error"), table)
示例#18
0
 def _statistics(self, rule, start, end):
     try:
         # FIXME(sileht): In case of a heat autoscaling stack decide to
         # delete an instance, the gnocchi metrics associated to this
         # instance will be no more updated and when the alarm will ask
         # for the aggregation, gnocchi will raise a 'No overlap'
         # exception.
         # So temporary set 'needed_overlap' to 0 to disable the
         # gnocchi checks about missing points. For more detail see:
         #   https://bugs.launchpad.net/gnocchi/+bug/1479429
         return self._gnocchi_client.metric.aggregation(
             metrics=rule['metrics'],
             start=start, stop=end,
             aggregation=rule['aggregation_method'],
             needed_overlap=0)
     except Exception as e:
         LOG.warning(_LW('alarm stats retrieval failed: %s'), e)
         return []
示例#19
0
 def _statistics(self, rule, start, end):
     try:
         # FIXME(sileht): In case of a heat autoscaling stack decide to
         # delete an instance, the gnocchi metrics associated to this
         # instance will be no more updated and when the alarm will ask
         # for the aggregation, gnocchi will raise a 'No overlap'
         # exception.
         # So temporary set 'needed_overlap' to 0 to disable the
         # gnocchi checks about missing points. For more detail see:
         #   https://bugs.launchpad.net/gnocchi/+bug/1479429
         return self._gnocchi_client.metric.aggregation(
             metrics=rule['metrics'],
             start=start,
             stop=end,
             aggregation=rule['aggregation_method'],
             needed_overlap=0)
     except Exception as e:
         LOG.warning(_LW('alarm stats retrieval failed: %s'), e)
         return []
示例#20
0
文件: threshold.py 项目: sileht/aodh
    def _sufficient(self, alarm, statistics):
        """Check for the sufficiency of the data for evaluation.

        Ensure there is sufficient data for evaluation, transitioning to
        unknown otherwise.
        """
        sufficient = len(statistics) >= alarm.rule['evaluation_periods']
        if not sufficient and alarm.state != evaluator.UNKNOWN:
            LOG.warn(_LW('Expecting %(expected)d datapoints but only get '
                         '%(actual)d') % {
                'expected': alarm.rule['evaluation_periods'],
                'actual': len(statistics)})
            # Reason is not same as log message because we want to keep
            # consistent since thirdparty software may depend on old format.
            reason = _('%d datapoints are unknown') % alarm.rule[
                'evaluation_periods']
            last = None if not statistics else statistics[-1]
            reason_data = self._reason_data('unknown',
                                            alarm.rule['evaluation_periods'],
                                            last)
            self._refresh(alarm, evaluator.UNKNOWN, reason, reason_data)
        return sufficient
示例#21
0
文件: threshold.py 项目: sileht/aodh
    def _sufficient(self, alarm, statistics):
        """Check for the sufficiency of the data for evaluation.

        Ensure there is sufficient data for evaluation, transitioning to
        unknown otherwise.
        """
        sufficient = len(statistics) >= alarm.rule['evaluation_periods']
        if not sufficient and alarm.state != evaluator.UNKNOWN:
            LOG.warn(
                _LW('Expecting %(expected)d datapoints but only get '
                    '%(actual)d') % {
                        'expected': alarm.rule['evaluation_periods'],
                        'actual': len(statistics)
                    })
            # Reason is not same as log message because we want to keep
            # consistent since thirdparty software may depend on old format.
            reason = _(
                '%d datapoints are unknown') % alarm.rule['evaluation_periods']
            last = None if not statistics else statistics[-1]
            reason_data = self._reason_data('unknown',
                                            alarm.rule['evaluation_periods'],
                                            last)
            self._refresh(alarm, evaluator.UNKNOWN, reason, reason_data)
        return sufficient
def conversion():
    args = get_parser().parse_args()
    conf = service.prepare_service([])
    conn = storage.get_connection_from_config(conf)
    combination_alarms = list(
        conn.get_alarms(alarm_type='combination',
                        alarm_id=args.alarm_id or None))
    count = 0
    for alarm in combination_alarms:
        new_name = 'From-combination: %s' % alarm.alarm_id
        n_alarm = list(conn.get_alarms(name=new_name, alarm_type='composite'))
        if n_alarm:
            LOG.warning(
                _LW('Alarm %(alarm)s has been already converted as '
                    'composite alarm: %(n_alarm_id)s, skipped.'), {
                        'alarm': alarm.alarm_id,
                        'n_alarm_id': n_alarm[0].alarm_id
                    })
            continue
        try:
            composite_rule = _generate_composite_rule(conn, alarm)
        except DependentAlarmNotFound as e:
            LOG.warning(
                _LW('The dependent alarm %(dep_alarm)s of alarm %'
                    '(com_alarm)s not found, skipped.'), {
                        'com_alarm': e.com_alarm_id,
                        'dep_alarm': e.dependent_alarm_id
                    })
            continue
        except UnsupportedSubAlarmType as e:
            LOG.warning(
                _LW('Alarm conversion from combination to composite '
                    'only support combination alarms depending '
                    'threshold alarms, the type of alarm %(alarm)s '
                    'is: %(type)s, skipped.'), {
                        'alarm': e.sub_alarm_id,
                        'type': e.sub_alarm_type
                    })
            continue
        new_alarm = models.Alarm(**alarm.as_dict())
        new_alarm.alarm_id = uuidutils.generate_uuid()
        new_alarm.name = new_name
        new_alarm.type = 'composite'
        new_alarm.description = ('composite alarm converted from combination '
                                 'alarm: %s' % alarm.alarm_id)
        new_alarm.rule = composite_rule
        new_alarm.timestamp = datetime.datetime.now()
        conn.create_alarm(new_alarm)
        LOG.info(
            _LI('End Converting combination alarm %(s_alarm)s to '
                'composite alarm %(d_alarm)s'), {
                    's_alarm': alarm.alarm_id,
                    'd_alarm': new_alarm.alarm_id
                })
        count += 1
    if args.delete_combination_alarm:
        for alarm in combination_alarms:
            LOG.info(_LI('Deleting the combination alarm %s...'),
                     alarm.alarm_id)
            conn.delete_alarm(alarm.alarm_id)
    LOG.info(
        _LI('%s combination alarms have been converted to composite '
            'alarms.'), count)