Exemplo n.º 1
0
def stats():
    try:
        stats = pyslurm.statistics()
        s = stats.get()
        #display(s)
        print(s)
    except ValueError as e:
        print("Error - {0}".format(e.args[0]))
Exemplo n.º 2
0
    if stats_dict:
        req_time=stats_dict['req_time']
        for key, value in stats_dict.iteritems():
            if key in ['rpc_user_stats', 'rpc_type_stats']:
                label = 'rpc_user_id'
                if key == 'rpc_type_stats':
                    label = 'rpc_type_id'
                for rpc_key, rpc_val in value.iteritems():
                    if label == 'rpc_user_id':
                        rpc_key = pwd.getpwuid(rpc_key)[0]
                    for rpc_val_key, rpc_value in rpc_val.iteritems():
                        print("{0}.slurmctld.stats.{1}.{2}.{3} {4} {5}".format(prefix, key, rpc_key, rpc_val_key, rpc_value, req_time))
            else:
                print("{0}.slurmctld.stats.{1} {2} {3}".format(prefix, key, value, req_time))


if __name__ == "__main__":

    import pwd
    import time

    import pyslurm

    try:
        stats = pyslurm.statistics()
        s = stats.get()
        display(s)
    except ValueError as e:
        print("Error - {0}".format(e.args[0]))

    def collect(self):

        try:
            sdiag = pyslurm.statistics().get()
        except:
            return
        else:

            # Slurmctld Stats
            self.publish('server_thread_count',sdiag.get("server_thread_count"))
            self.publish('agent_queue_size',sdiag.get("agent_queue_size"))
    
            # Jobs Stats
            self.publish('jobs_submitted',sdiag.get("jobs_submitted"))
            self.publish('jobs_started',sdiag.get("jobs_started"))
            self.publish('jobs_completed',sdiag.get("jobs_completed"))
            self.publish('jobs_canceled',sdiag.get("jobs_canceled"))
            self.publish('jobs_failed',sdiag.get("jobs_failed"))
    
            # Main Scheduler Stats
            self.publish('main_last_cycle',sdiag.get("schedule_cycle_last"))
            self.publish('main_max_cycle',sdiag.get("schedule_cycle_max"))
            self.publish('main_total_cycles',sdiag.get("schedule_cycle_counter"))
    
            if sdiag.get("schedule_cycle_counter") > 0:
                self.publish('main_mean_cycle',
                    sdiag.get("schedule_cycle_sum") / sdiag.get("schedule_cycle_counter")
                )
                self.publish('main_mean_depth_cycle', (
                    sdiag.get("schedule_cycle_depth") / sdiag.get("schedule_cycle_counter")
                ))
    
            if (sdiag.get("req_time") - sdiag.get("req_time_start")) > 60:
                self.publish('main_cycles_per_minute', (
                    sdiag.get("schedule_cycle_counter") /
                    ((sdiag.get("req_time") - sdiag.get("req_time_start")) / 60)
                ))
    
            self.publish('main_last_queue_length',sdiag.get("schedule_queue_len"))
    
            # Backfilling stats
            self.publish('bf_total_jobs_since_slurm_start',sdiag.get("bf_backfilled_jobs"))
            self.publish('bf_total_jobs_since_cycle_start',sdiag.get("bf_last_backfilled_jobs"))
            self.publish('bf_total_cycles',sdiag.get("bf_cycle_counter"))
            self.publish('bf_last_cycle',sdiag.get("bf_cycle_last"))
            self.publish('bf_max_cycle',sdiag.get("bf_cycle_max"))
            self.publish('bf_queue_length',sdiag.get("bf_queue_len"))
    
            if sdiag.get("bf_cycle_counter") > 0:
                self.publish('bf_mean_cycle', (
                    sdiag.get("bf_cycle_sum") / sdiag.get("bf_cycle_counter")
                ))
                self.publish('bf_depth_mean', (
                    sdiag.get("bf_depth_sum") / sdiag.get("bf_cycle_counter")
                ))
                self.publish('bf_depth_mean_try', (
                    sdiag.get("bf_depth_try_sum") / sdiag.get("bf_cycle_counter")
                ))
                self.publish('bf_queue_length_mean', (
                    sdiag.get("bf_queue_len_sum") / sdiag.get("bf_cycle_counter")
                ))
    
            self.publish('bf_last_depth_cycle',sdiag.get("bf_last_depth"))
            self.publish('bf_last_depth_cycle_try',sdiag.get("bf_last_depth_try"))
Exemplo n.º 4
0
def test_get_statistics():
    """Statistics: Test get_statistics() return type"""
    test_statistics = pyslurm.statistics().get()
    assert_true(isinstance(test_statistics, dict))
Exemplo n.º 5
0
 def get(self):
     s = pyslurm.statistics()
     data = s.get()
     return data
Exemplo n.º 6
0
def test_get_statistics():
    """Statistics: Test get_statistics() return type"""
    test_statistics = pyslurm.statistics().get()
    assert_true(isinstance(test_statistics, dict))
Exemplo n.º 7
0
def get_stats(debug=False):

    stats = {}
    time_before = time.time()
    try:
        sdiag = pyslurm.statistics().get()
    except:
        return None
    time_after = time.time()

    # Plugin Stats
    stats["stats_get_time"] = time_after - time_before

    # Slurmctld Stats
    stats["server_thread_count"] = sdiag.get("server_thread_count")
    stats["agent_queue_size"] = sdiag.get("agent_queue_size")
    stats["dbd_agent_queue_size"] = sdiag.get("dbd_agent_queue_size")

    # Jobs Stats
    stats["jobs_submitted"] = sdiag.get("jobs_submitted")
    stats["jobs_started"] = sdiag.get("jobs_started")
    stats["jobs_completed"] = sdiag.get("jobs_completed")
    stats["jobs_canceled"] = sdiag.get("jobs_canceled")
    stats["jobs_failed"] = sdiag.get("jobs_failed")

    # Main Scheduler Stats
    stats["main_last_cycle"] = sdiag.get("schedule_cycle_last")
    stats["main_max_cycle"] = sdiag.get("schedule_cycle_max")
    stats["main_total_cycles"] = sdiag.get("schedule_cycle_counter")

    if sdiag.get("schedule_cycle_counter") > 0:
        stats["main_mean_cycle"] = \
            sdiag.get("schedule_cycle_sum") / \
            sdiag.get("schedule_cycle_counter")
        stats["main_mean_depth_cycle"] = \
            sdiag.get("schedule_cycle_depth") / \
            sdiag.get("schedule_cycle_counter")

    if (sdiag.get("req_time") - sdiag.get("req_time_start")) > 60:
        stats["main_cycles_per_minute"] = \
            sdiag.get("schedule_cycle_counter") / \
            ((sdiag.get("req_time") - sdiag.get("req_time_start")) / 60)

    stats["main_last_queue_length"] = sdiag.get("schedule_queue_len")

    # Backfilling stats
    stats["bf_total_jobs_since_slurm_start"] = \
        sdiag.get("bf_backfilled_jobs")
    stats["bf_total_jobs_since_cycle_start"] = \
        sdiag.get("bf_last_backfilled_jobs")
    stats["bf_total_cycles"] = sdiag.get("bf_cycle_counter")
    stats["bf_last_cycle"] = sdiag.get("bf_cycle_last")
    stats["bf_max_cycle"] = sdiag.get("bf_cycle_max")
    stats["bf_queue_length"] = sdiag.get("bf_queue_len")

    if sdiag.get("bf_cycle_counter") > 0:
        stats["bf_mean_cycle"] = (sdiag.get("bf_cycle_sum") /
                                  sdiag.get("bf_cycle_counter"))
        stats["bf_depth_mean"] = (sdiag.get("bf_depth_sum") /
                                  sdiag.get("bf_cycle_counter"))
        stats["bf_depth_mean_try"] = (sdiag.get("bf_depth_try_sum") /
                                      sdiag.get("bf_cycle_counter"))
        stats["bf_queue_length_mean"] = (sdiag.get("bf_queue_len_sum") /
                                         sdiag.get("bf_cycle_counter"))

    stats["bf_last_depth_cycle"] = sdiag.get("bf_last_depth")
    stats["bf_last_depth_cycle_try"] = sdiag.get("bf_last_depth_try")

    # RPC users stats
    rpc_user_stats = sdiag.get('rpc_user_stats')
    if rpc_user_stats is None:
        rpc_user_stats = {}
    for user, u_metrics in rpc_user_stats.items():
        metric_prefixes = ['rpc_user_' + user + '_']
        if user not in ['root', 'slurm']:
            metric_prefixes += ['rpc_user_users_']
        for metric_prefix in metric_prefixes:
            if metric_prefix + 'count' not in stats:
                stats[metric_prefix + 'count'] = 0
                stats[metric_prefix + 'total_time'] = 0
            stats[metric_prefix + 'count'] += u_metrics[u'count']
            stats[metric_prefix + 'total_time'] += u_metrics[u'total_time']
            stats[metric_prefix + 'ave_time'] = \
                stats[metric_prefix + 'total_time'] / \
                stats[metric_prefix + 'count']

    # RPC types stats
    rpc_type_stats = sdiag.get('rpc_type_stats')
    if rpc_type_stats is None:
        rpc_type_stats = {}
    for rpc_type, rpc_metrics in rpc_type_stats.items():
        for m_name, m_value in rpc_metrics.items():
            if m_name != 'id':
                metric = 'rpc_type_' + str(rpc_type) + '-' + m_name
                stats[metric] = m_value

    # pending RPC by type and sum global pending RPC counter
    metric_global = 'rpc_pending_global'
    rpc_queue_stats = sdiag.get('rpc_queue_stats')
    if rpc_queue_stats is None:
        rpc_queue_stats = {}
    stats[metric_global] = 0
    for rpc_type, rpc_metrics in rpc_queue_stats.items():
        stats[metric_global] += rpc_metrics[u'count']
        stats['rpc_pending_' + rpc_type] = rpc_metrics[u'count']

    return stats
Exemplo n.º 8
0
    def collect(self):

        try:
            sdiag = pyslurm.statistics().get()
        except:
            return
        else:

            # Slurmctld Stats
            self.publish('server_thread_count',
                         sdiag.get("server_thread_count"))
            self.publish('agent_queue_size', sdiag.get("agent_queue_size"))

            # Jobs Stats
            self.publish('jobs_submitted', sdiag.get("jobs_submitted"))
            self.publish('jobs_started', sdiag.get("jobs_started"))
            self.publish('jobs_completed', sdiag.get("jobs_completed"))
            self.publish('jobs_canceled', sdiag.get("jobs_canceled"))
            self.publish('jobs_failed', sdiag.get("jobs_failed"))

            # Main Scheduler Stats
            self.publish('main_last_cycle', sdiag.get("schedule_cycle_last"))
            self.publish('main_max_cycle', sdiag.get("schedule_cycle_max"))
            self.publish('main_total_cycles',
                         sdiag.get("schedule_cycle_counter"))

            if sdiag.get("schedule_cycle_counter") > 0:
                self.publish(
                    'main_mean_cycle',
                    sdiag.get("schedule_cycle_sum") /
                    sdiag.get("schedule_cycle_counter"))
                self.publish('main_mean_depth_cycle',
                             (sdiag.get("schedule_cycle_depth") /
                              sdiag.get("schedule_cycle_counter")))

            if (sdiag.get("req_time") - sdiag.get("req_time_start")) > 60:
                self.publish(
                    'main_cycles_per_minute',
                    (sdiag.get("schedule_cycle_counter") /
                     ((sdiag.get("req_time") - sdiag.get("req_time_start")) /
                      60)))

            self.publish('main_last_queue_length',
                         sdiag.get("schedule_queue_len"))

            # Backfilling stats
            self.publish('bf_total_jobs_since_slurm_start',
                         sdiag.get("bf_backfilled_jobs"))
            self.publish('bf_total_jobs_since_cycle_start',
                         sdiag.get("bf_last_backfilled_jobs"))
            self.publish('bf_total_cycles', sdiag.get("bf_cycle_counter"))
            self.publish('bf_last_cycle', sdiag.get("bf_cycle_last"))
            self.publish('bf_max_cycle', sdiag.get("bf_cycle_max"))
            self.publish('bf_queue_length', sdiag.get("bf_queue_len"))

            if sdiag.get("bf_cycle_counter") > 0:
                self.publish('bf_mean_cycle', (sdiag.get("bf_cycle_sum") /
                                               sdiag.get("bf_cycle_counter")))
                self.publish('bf_depth_mean', (sdiag.get("bf_depth_sum") /
                                               sdiag.get("bf_cycle_counter")))
                self.publish('bf_depth_mean_try',
                             (sdiag.get("bf_depth_try_sum") /
                              sdiag.get("bf_cycle_counter")))
                self.publish('bf_queue_length_mean',
                             (sdiag.get("bf_queue_len_sum") /
                              sdiag.get("bf_cycle_counter")))

            self.publish('bf_last_depth_cycle', sdiag.get("bf_last_depth"))
            self.publish('bf_last_depth_cycle_try',
                         sdiag.get("bf_last_depth_try"))