Пример #1
0
    def test_sshcheckjob(self):
        """Test sshcheckjob"""


        clusters = {'delcatty': {'path': '/opt/moab/bin/checkjob', 'master': 'master15.delcatty.gent.vsc'}, 'phanpy': {'path': '/opt/moab/bin/checkjob', 'master': 'master17.phanpy.gent.vsc'}, 'raichu': {'path': '/opt/moab/bin/checkjob', 'master': 'master13.raichu.gent.vsc'}, 'golett': {'path': '/opt/moab/bin/checkjob', 'master': 'master19.golett.gent.vsc'}, 'swalot': {'path': '/opt/moab/bin/checkjob', 'master': 'master21.swalot.gent.vsc'}}

        checkjob = SshCheckjob(
            'master1',
            'testuser',
            clusters=clusters,
            cache_pickle=True,
            dry_run=True)
        self.assertEqual(checkjob._command('/opt/moab/bin/showq'), ['sudo', 'ssh', 'testuser@master1', '/opt/moab/bin/showq'])
        self.assertEquals(checkjob.info, CheckjobInfo)
        self.assertEquals(checkjob.info(), {})
Пример #2
0
    def test_sshcheckjob(self):
        """Test sshcheckjob"""

        clusters = {
            'delcatty': {
                'path': '/opt/moab/bin/checkjob',
                'master': 'master15.delcatty.gent.vsc'
            },
            'phanpy': {
                'path': '/opt/moab/bin/checkjob',
                'master': 'master17.phanpy.gent.vsc'
            },
            'raichu': {
                'path': '/opt/moab/bin/checkjob',
                'master': 'master13.raichu.gent.vsc'
            },
            'golett': {
                'path': '/opt/moab/bin/checkjob',
                'master': 'master19.golett.gent.vsc'
            },
            'swalot': {
                'path': '/opt/moab/bin/checkjob',
                'master': 'master21.swalot.gent.vsc'
            }
        }

        checkjob = SshCheckjob('master1',
                               'testuser',
                               clusters=clusters,
                               cache_pickle=True,
                               dry_run=True)
        self.assertEqual(
            checkjob._command('/opt/moab/bin/showq'),
            ['sudo', 'ssh', 'testuser@master1', '/opt/moab/bin/showq'])
        self.assertEquals(checkjob.info, CheckjobInfo)
        self.assertEquals(checkjob.info(), {})
Пример #3
0
def main():
    # Collect all info

    # Note: debug option is provided by generaloption
    # Note: other settings, e.g., ofr each cluster will be obtained from the configuration file
    options = {
        'nagios-check-interval-threshold': NAGIOS_CHECK_INTERVAL_THRESHOLD,
        'hosts': ('the hosts/clusters that should be contacted for job information', None, 'extend', []),
        'location': ('the location for storing the pickle file: delcatty, muk', str, 'store', 'delcatty'),
        'access_token': ('the token that will allow authentication against the account page', None, 'store', None),
        'account_page_url': ('', None, 'store', None),
        'target_master': ('the master used to execute showq commands', None, 'store', None),
        'target_user': ('the user for ssh to the target master', None, 'store', None),
    }

    opts = ExtendedSimpleOption(options)

    try:
        rest_client = AccountpageClient(token=opts.options.access_token)

        gpfs = GpfsOperations()
        storage = VscStorage()
        storage_name = cluster_user_pickle_store_map[opts.options.location]
        login_mount_point = storage[storage_name].login_mount_point
        gpfs_mount_point = storage[storage_name].gpfs_mount_point

        clusters = {}
        for host in opts.options.hosts:
            master = opts.configfile_parser.get(host, "master")
            checkjob_path = opts.configfile_parser.get(host, "checkjob_path")
            clusters[host] = {
                'master': master,
                'path': checkjob_path
            }

        checkjob = SshCheckjob(
            opts.options.target_master,
            opts.options.target_user,
            clusters,
            cache_pickle=True,
            dry_run=opts.options.dry_run)

        (job_information, _, _) = checkjob.get_moab_command_information()

        active_users = job_information.keys()

        logger.debug("Active users: %s" % (active_users))
        logger.debug("Checkjob information: %s" % (job_information))

        nagios_user_count = 0
        nagios_no_store = 0

        stats = {}

        for user in active_users:
            path = get_pickle_path(opts.options.location, user, rest_client)
            try:
                user_queue_information = CheckjobInfo({user: job_information[user]})
                store_on_gpfs(user, path, "checkjob", user_queue_information, gpfs, login_mount_point,
                              gpfs_mount_point, ".checkjob.json.gz", opts.options.dry_run)
                nagios_user_count += 1
            except Exception:
                logger.exception("Could not store cache file for user %s" % (user))
                nagios_no_store += 1
        stats["store_users"] = nagios_user_count
        stats["store_fail"] = nagios_no_store
        stats["store_fail_critical"] = STORE_LIMIT_CRITICAL
    except Exception, err:
        logger.exception("critical exception caught: %s" % (err))
        opts.critical("Script failed in a horrible way")
        sys.exit(NAGIOS_EXIT_CRITICAL)
Пример #4
0
def main():
    # Collect all info

    # Note: debug option is provided by generaloption
    # Note: other settings, e.g., ofr each cluster will be obtained from the configuration file
    options = {
        'nagios-check-interval-threshold':
        NAGIOS_CHECK_INTERVAL_THRESHOLD,
        'hosts':
        ('the hosts/clusters that should be contacted for job information',
         None, 'extend', []),
        'location': ('the location for storing the pickle file: delcatty, muk',
                     str, 'store', 'delcatty'),
        'access_token':
        ('the token that will allow authentication against the account page',
         None, 'store', None),
        'account_page_url': ('', None, 'store', None),
        'target_master':
        ('the master used to execute showq commands', None, 'store', None),
        'target_user':
        ('the user for ssh to the target master', None, 'store', None),
    }

    opts = ExtendedSimpleOption(options)

    try:
        rest_client = AccountpageClient(token=opts.options.access_token)

        gpfs = GpfsOperations()
        storage = VscStorage()
        storage_name = cluster_user_pickle_store_map[opts.options.location]
        login_mount_point = storage[storage_name].login_mount_point
        gpfs_mount_point = storage[storage_name].gpfs_mount_point

        clusters = {}
        for host in opts.options.hosts:
            master = opts.configfile_parser.get(host, "master")
            checkjob_path = opts.configfile_parser.get(host, "checkjob_path")
            clusters[host] = {'master': master, 'path': checkjob_path}

        checkjob = SshCheckjob(opts.options.target_master,
                               opts.options.target_user,
                               clusters,
                               cache_pickle=True,
                               dry_run=opts.options.dry_run)

        (job_information, _, _) = checkjob.get_moab_command_information()

        active_users = job_information.keys()

        logger.debug("Active users: %s" % (active_users))
        logger.debug("Checkjob information: %s" % (job_information))

        nagios_user_count = 0
        nagios_no_store = 0

        stats = {}

        for user in active_users:
            path = get_pickle_path(opts.options.location, user, rest_client)
            try:
                user_queue_information = CheckjobInfo(
                    {user: job_information[user]})
                store_on_gpfs(user, path, "checkjob", user_queue_information,
                              gpfs, login_mount_point, gpfs_mount_point,
                              ".checkjob.json.gz", opts.options.dry_run)
                nagios_user_count += 1
            except Exception:
                logger.exception("Could not store cache file for user %s" %
                                 (user))
                nagios_no_store += 1
        stats["store_users"] = nagios_user_count
        stats["store_fail"] = nagios_no_store
        stats["store_fail_critical"] = STORE_LIMIT_CRITICAL
    except Exception, err:
        logger.exception("critical exception caught: %s" % (err))
        opts.critical("Script failed in a horrible way")
        sys.exit(NAGIOS_EXIT_CRITICAL)