Exemplo n.º 1
0
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = argparse.ArgumentParser(
        description='Fixes issues with orphaned resource providers.')
    mysqlargs = MySqlArgs({
        'user': '******',
        'password': '',
        'host': 'localhost',
        'port': 3306,
    })
    mysqlargs.inject(parser)

    parser.add_argument(
        '-q',
        '--quiet',
        action='store_true',
        help='Quiet mode. No output if there was nothing to do.')
    parser.add_argument(
        '--slack',
        type=str,
        help=
        'JSON file with Slack webhook information to send a notification to')
    parser.add_argument('action',
                        choices=['info', 'update'],
                        help='Just display info or actually update them?')

    args = parser.parse_args(argv[1:])
    mysqlargs.extract(args)

    if args.slack:
        slack = Slackbot(args.slack, script_name='orphan-resource-providers')
    else:
        slack = None

    db = mysqlargs.connect()

    kwargs = {
        'db': db,
        'describe': args.action == 'info',
        'quiet': args.quiet,
    }
    if slack:
        with slack:  # log exceptions
            update_count = resource_providers_fixer(**kwargs)
    else:
        update_count = resource_providers_fixer(**kwargs)

    if slack and (args.action == 'update') and ((not args.quiet)
                                                or update_count):
        if update_count > 0:
            message = ('Commanded update of *{} resource providers*'.format(
                update_count))
            color = '#000000'
        else:
            message = ('No resource providers to update')
            color = '#cccccc'

        slack.post('orphan-resource-providers', message, color=color)
Exemplo n.º 2
0
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = base_parser(
        'Strange things, as per someone\'s definition of "strange".')
    parser.add_argument('-v', '--verbose', action='store_true')

    args = parser.parse_args(argv[1:])

    if args.slack:
        slack = Slackbot(args.slack)
    else:
        slack = None

    os_vars = {
        k: os.environ[k]
        for k in os.environ if k.startswith(OS_ENV_PREFIX)
    }
    if args.osrc:
        os_vars.update(osapi.load_osrc(args.osrc))
    missing_os_vars = set(osapi.Auth.required_os_vars) - set(os_vars)
    if missing_os_vars:
        print('Missing required OS values in env/rcfile: {}'.format(
            ', '.join(missing_os_vars)),
              file=sys.stderr)
        return -1

    auth = osapi.Auth(os_vars)

    nodes = osrest.ironic_nodes(auth, details=True)
    # hypervisors = osrest.nova_hypervisors(auth, details=True)

    errored_nodes = [
        n for n in nodes.values()
        if n['provision_state'] == 'error' and not n['maintenance']
    ]

    if not errored_nodes:
        if args.verbose:
            print('All good.')
        return

    message = ['Ironic nodes in "error" provision state, not in maintenance']
    message.extend(
        '• `{}`, last error: {}'.format(n['uuid'], n.get('last_error'))
        for n in errored_nodes)
    message = '\n'.join(message)

    print(message.replace('•', '*'))
    if slack:
        slack.post(SUBCOMMAND, message, color='xkcd:red')
Exemplo n.º 3
0
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = argparse.ArgumentParser(description='Kick Ironic nodes that '
        'are in an common/known error state')

    parser.add_argument('mode', choices=['info', 'reset'],
        help='Just display data on the stuck nodes or reset their states')
    parser.add_argument('--slack', type=str,
        help='JSON file with Slack webhook information to send a notification to')
    parser.add_argument('--osrc', type=str,
        help='Connection parameter file. Should include password. envars used '
        'if not provided by this file.')
    parser.add_argument('-v', '--verbose', action='store_true')
    parser.add_argument('--dry-run', action='store_true',
        help='Dry run, don\'t actually do anything')

    args = parser.parse_args(argv[1:])

    if args.slack:
        slack = Slackbot(args.slack)
    else:
        slack = None

    os_vars = {k: os.environ[k] for k in os.environ if k.startswith(OS_ENV_PREFIX)}
    if args.osrc:
        os_vars.update(load_osrc(args.osrc))
    missing_os_vars = set(Auth.required_os_vars) - set(os_vars)
    if missing_os_vars:
        print(
            'Missing required OS values in env/rcfile: {}'
            .format(', '.join(missing_os_vars)),
            file=sys.stderr
        )
        return -1

    auth = Auth(os_vars)

    nodes = osrest.ironic_nodes(auth, details=True)
    cureable = cureable_nodes(nodes)

    if args.mode == 'info':
        print('{} node(s) in a state that we can treat'.format(len(cureable)))
        for nid in cureable:
            print('-' * 40)
            print('\n'.join(
                '{:<25s} {}'.format(key, nodes[nid].get(key))
                for key
                in [
                    'uuid',
                    'provision_updated_at',
                    'provision_state',
                    'last_error',
                    'instance_uuid',
                    'extra',
                    'maintenance',
                ]
            ))
        if slack:
            if cureable:
                message = ('{} nodes in correctable error states (no action '
                           'taken)'.format(len(cureable)))
                color = 'xkcd:orange red'
            else:
                error_nodes = sum(1
                    for (nid, n)
                    in nodes.items()
                    if (
                        not n['maintenance'] and
                        n['provision_state'] == 'error'
                    )
                )
                if error_nodes:
                    message = ('No nodes in correctable error states ({} other'
                               ' nodes in error state)').format(error_nodes)
                    color = 'xkcd:yellow'
                else:
                    message = 'No nodes in correctable error states'
                    color = 'xkcd:green'

            slack.post(SUBCOMMAND, message, color=color)
        return

    if len(cureable) == 0:
        if args.verbose:
            message = 'Nothing to do.'
            print(message)
            if slack:
                slack.post(SUBCOMMAND, message, color='xkcd:light grey')
        return

    print('To correct: {}'.format(repr(cureable)))
    if slack:
        message = ['Ironic nodes in correctable error states']
        for nid in cureable:
            message.append(' • `{}`: "{}"'.format(nid, nodes[nid]['last_error']))
        message = '\n'.join(message)
        slack.post(SUBCOMMAND, message, color='xkcd:darkish red')

    reset_ok = []
    too_many = []
    for nid in cureable:
        resetter = NodeResetter(auth, nid, dry_run=args.dry_run)
        try:
            resetter.reset()
        except TooManyResets as e:
            too_many.append(nid)
        except Exception as e:
            if slack:
                error = '{}; check logs'.format(str(e))
                slack.post(SUBCOMMAND, error, color='xkcd:red')
            raise
        else:
            reset_ok.append((nid, resetter.tracker.count()))

    print('Attempted to fix: {}'.format(repr(reset_ok)))
    print('Refused to fix:   {}'.format(repr(too_many)))
    if slack:
        message = []
        if reset_ok:
            message.append('Attempted reset of nodes')
            message.extend(' • `{}`: {} resets'.format(*r) for r in reset_ok)
        if too_many:
            message.append('\nAbstained (already at limit)')
            message.extend(' • `{}`'.format(*r) for r in reset_ok)

        color = 'xkcd:chartreuse'
        if args.dry_run:
            color = 'xkcd:yellow'
            message.append('dry run, no changes actually made.')
        if too_many:
            color = 'xkcd:orange'

        message = '\n'.join(message)
        slack.post(SUBCOMMAND, message, color=color)
Exemplo n.º 4
0
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = argparse.ArgumentParser(
        description='Floating IP and port reclaimer.')
    mysqlargs = MySqlArgs({
        'user': '******',
        'password': '',
        'host': 'localhost',
        'port': 3306,
    })
    mysqlargs.inject(parser)
    osapi.add_arguments(parser)

    parser.add_argument(
        '-w',
        '--whitelist',
        type=str,
        help='File of project/tenant IDs/names to ignore, one per line. '
        'Ignores case and dashes.')
    parser.add_argument(
        '-q',
        '--quiet',
        action='store_true',
        help='Quiet mode. No output if there was nothing to do.')
    parser.add_argument(
        '--slack',
        type=str,
        help=
        'JSON file with Slack webhook information to send a notification to')
    parser.add_argument(
        '-d',
        '--dbversion',
        type=str,
        help=
        'Version of the database. Schemas differ, pick the appropriate one.',
        choices=[query.LIBERTY, query.OCATA],
        default=query.LIBERTY)
    parser.add_argument('--kvm', help='Run at KVM site', action='store_true')

    parser.add_argument('action',
                        choices=['info', 'delete'],
                        help='Just display info or actually delete them?')
    parser.add_argument('type',
                        choices=list(RESOURCE_QUERY),
                        help='Grab floating IPs or ports?')
    parser.add_argument(
        'idle_days',
        type=float,
        help='Number of days since last active instance in project was '
        'deleted to consider it idle.')

    args = parser.parse_args(argv[1:])
    mysqlargs.extract(args)
    auth = osapi.Auth.from_env_or_args(args=args)

    if args.action == 'delete' and args.type == 'port' and args.dbversion == 'ocata':
        print(
            'Checking ports on Ocata isn\'t validated, refusing to '
            'automatically delete.',
            file=sys.stderr)
        sys.exit(1)

    if args.slack:
        slack = Slackbot(args.slack, script_name='neutron-reaper')
    else:
        slack = None

    whitelist = set()
    if args.whitelist:
        with open(args.whitelist) as f:
            whitelist = {normalize_project_name(line, args.kvm) for line in f}

    db = mysqlargs.connect()
    db.version = args.dbversion

    kwargs = {
        'db': db,
        'auth': auth,
        'type_': args.type,
        'idle_days': args.idle_days,
        'whitelist': whitelist,
        'kvm': args.kvm,
        'describe': args.action == 'info',
        'quiet': args.quiet,
    }
    if slack:
        with slack:  # log exceptions
            remove_count = reaper(**kwargs)
    else:
        remove_count = reaper(**kwargs)

    if slack and (args.action == 'delete') and ((not args.quiet)
                                                or remove_count):
        thing = '{}{}'.format(
            {
                'ip': 'floating IP',
                'port': 'port'
            }[args.type],
            ('' if remove_count == 1 else 's'),
        )

        if remove_count > 0:
            message = (
                'Commanded deletion of *{} {}* ({:.0f} day grace-period)'.
                format(remove_count, thing, args.idle_days))
            color = '#000000'
        else:
            message = ('No {} to delete ({:.0f} day grace-period)'.format(
                thing, args.idle_days))
            color = '#cccccc'

        slack.post('neutron-reaper', message, color=color)
Exemplo n.º 5
0
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = argparse.ArgumentParser(description='Kick Ironic nodes that '
        'refer to a deleted/nonexistant Nova instance')

    parser.add_argument('mode', choices=['info', 'delete'],
        help='Just display data on the bound nodes or delete them')
    parser.add_argument('--slack', type=str,
        help='JSON file with Slack webhook information to send a notification to')
    parser.add_argument('--osrc', type=str,
        help='Connection parameter file. Should include password. envars used '
        'if not provided by this file.')
    parser.add_argument('-v', '--verbose', action='store_true')
    parser.add_argument('--force-sane', action='store_true',
        help='Disable sanity checking (i.e. things really are that bad)')
    parser.add_argument('--force-insane', action='store_true',
        help=argparse.SUPPRESS) # for testing

    args = parser.parse_args(argv[1:])

    if args.slack:
        slack = Slackbot(args.slack)
    else:
        slack = None

    os_vars = {k: os.environ[k] for k in os.environ if k.startswith(OS_ENV_PREFIX)}
    if args.osrc:
        os_vars.update(load_osrc(args.osrc))
    missing_os_vars = set(Auth.required_os_vars) - set(os_vars)
    if missing_os_vars:
        print(
            'Missing required OS values in env/rcfile: {}'
            .format(', '.join(missing_os_vars)),
            file=sys.stderr
        )
        return -1

    auth = Auth(os_vars)

    nodes = osrest.ironic_nodes(auth)
    instances = osrest.nova_instances(auth)

    node_instance_map = {
        n['instance_uuid']: n
        for n
        in nodes.values()
        if n['instance_uuid'] is not None
    }

    node_instance_ids = set(node_instance_map)
    instance_ids = set(instances)

    unbound_instances = node_instance_ids - instance_ids

    if args.mode == 'info':
        # no-op
        if unbound_instances:
            print('ZOMBIE INSTANCES ON NODES')
        else:
            print('No zombies currently.')
        for inst_id in unbound_instances:
            node = node_instance_map[inst_id]

            assert inst_id not in instances, 'contradiction, this should be impossible'

            print('-----')
            print('Ironic Node\n'
                  '  ID:       {}'.format(node['uuid']))
            print('  Instance: {}'.format(node['instance_uuid']))
            print('  State:    {}'.format(node['provision_state']))

        if slack:
            if unbound_instances:
                message = ('{} nodes with dead instances (no action taken)'
                           .format(len(unbound_instances)))
                color = 'xkcd:orange red'
            else:
                message = 'No nodes with dead instances.'
                color = 'xkcd:green'
            slack.post(SUBCOMMAND, message, color=color)

    elif args.mode == 'delete':
        if not args.force_sane or args.force_insane:
            # sanity check(s) to avoid doing something stupid
            if len(instance_ids) == 0 and len(unbound_instances) != 0:
                _thats_crazy('(in)sanity check: 0 running instances(?!)', slack)

            ubi_limit = 20 if not args.force_insane else -1
            if len(unbound_instances) > ubi_limit:
                _thats_crazy(
                    '(in)sanity check: it thinks there are {} unbound instances'
                        .format(len(unbound_instances)),
                    slack,
                )

        if slack:
            if unbound_instances:
                message = 'Possible Ironic nodes with nonexistant instances:\n{}'.format(
                    '\n'.join(
                        ' • node `{}` → instance `{}`'.format(
                            node_instance_map[i]['uuid'],
                            node_instance_map[i]['instance_uuid'])
                        for i in unbound_instances
                    )
                )
                color = 'xkcd:darkish red'
            elif args.verbose:
                message = 'No Ironic nodes visibly clinging to dead instances'
                color = 'xkcd:light grey'
            else:
                message = None

            if message:
                slack.post(SUBCOMMAND, message, color=color)

        try:
            for inst_id in unbound_instances:
                node = node_instance_map[inst_id]
                node_id = node['uuid']
                if node['provision_state'] == 'available':
                    clear_node_instance_data(auth, node_id)
                else:
                    osrest.ironic_node_set_state(auth, node_id, 'deleted')
        except Exception as e:
            if slack:
                error = '{} while trying to clean instances; check logs for traceback'.format(str(e))
                slack.post(SUBCOMMAND, error, color='xkcd:red')
            raise
        else:
            if unbound_instances and slack:
                ok_message = (
                    'Cleaned {} instance(s).'
                    .format(len(unbound_instances))
                )
                slack.post(SUBCOMMAND, ok_message, color='xkcd:chartreuse')