Exemplo n.º 1
0
def run_interval(process_info):

    """
       Process has an array of 4 args:
         - configuration options
         - nb cpus in cluster
         - beginning interval
         - end interval
    """
    options = process_info[0]
    config = process_info[1]
    interval_beginning = process_info[2]
    interval_end = process_info[3]
    cluster = process_info[4]
    # get parameters
    step = options.interval
    template = options.template

    # Instantiate connexion to db
    db_section = "hpcstatsdb"
    dbhostname = config.get(db_section,"hostname")
    dbport = config.get(db_section,"port")
    dbname = config.get(db_section,"dbname")
    dbuser = config.get(db_section,"user")
    dbpass = config.get(db_section,"password")
    db = HPCStatsdb(dbhostname, dbport, dbname, dbuser, dbpass)

    #cluster = Cluster(options.cluster)

    if options.debug:
        print "getting nb cpus on cluster %s" % (cluster.get_name())

    db.bind()

    nb_cpus_cluster = cluster.get_nb_cpus(db)
    
    if options.debug:
        print "nb cpus on cluster %s: %d" % (cluster.get_name(), nb_cpus_cluster)

    userstats = {} 
    groupstats = {}

    if options.debug:
        print "%s -> %s" % (interval_beginning, interval_end)
    
    # calculate the number of hours during the interval
    nb_hours_interval = ((interval_end - interval_beginning).days + 1) * 24

    job_finder = JobFinder(db)
    interval_jobs = job_finder.find_jobs_in_interval(cluster.get_name(), interval_beginning, interval_end)

    cpu_time_interval = 0
    nb_jobs = 0
    #row = None

    for job in interval_jobs:

        try:
            running_datetime = job.get_running_datetime()
            if job.get_state() == 'RUNNING':
                end_datetime = datetime.now()
            else:
                end_datetime = job.get_end_datetime()
            nb_cpus = job.get_nb_procs()
            uid = job.get_uid()
            user_finder = UserFinder(db)
            user = user_finder.find(cluster.get_name(), uid)
            username = user.get_name()
            group = user.get_department()

            # TODO: put all these mechanism into Job class
            if running_datetime < interval_beginning:
                running_datetime = interval_beginning
            if end_datetime > interval_end:
                end_datetime = interval_end
            time_job = (end_datetime - running_datetime)
            cpu_time_job_seconds = ((time_job.days * 24 * 3600) + time_job.seconds) * nb_cpus
            cpu_time_interval += cpu_time_job_seconds
            nb_jobs += 1
            # initialize user and group dicts.
            if not username in userstats:
                userstats[username] = {}
            if not group in groupstats:
                groupstats[group] = {}
            # user's group
            if 'group' not in userstats[username]:
                userstats[username]['group'] = group
            # user's jobs
            if 'jobs' in userstats[username]:
                userstats[username]['jobs'] += 1
            else:
                userstats[username]['jobs'] = 1
            # group's jobs
            if 'jobs' in groupstats[group]:
                groupstats[group]['jobs'] += 1
            else:
                groupstats[group]['jobs'] = 1
            # user's time
            if 'time' in userstats[username]:
                userstats[username]['time'] += cpu_time_job_seconds
            else:
                userstats[username]['time'] = cpu_time_job_seconds
            # group's time
            if 'time' in groupstats[group]:
                groupstats[group]['time'] += cpu_time_job_seconds
            else:
                groupstats[group]['time'] = cpu_time_job_seconds

            if options.debug:
                print "debug: (%s - %s) * %d -> %s" % (end_datetime, running_datetime, nb_cpus, cpu_time_job_seconds)

        except UserWarning as w:
            #print "Warning:", w
            continue

    # nb accounts
    nb_accounts = cluster.get_nb_accounts(db, interval_beginning)

    # active users
    nb_active_users = cluster.get_nb_active_users(db, interval_beginning, interval_end)

    cpu_time_hours = cpu_time_interval / 3600
    if options.debug:
        print "debug: nb_cpus_cluster: %d nb_hours_interval: %d" % (nb_cpus_cluster, nb_hours_interval)

    cpu_time_available = nb_cpus_cluster * nb_hours_interval
    utilisation_rate = (float(cpu_time_hours) / cpu_time_available) * 100
    if step == "day":
        str_date = interval_beginning.strftime("%d/%m/%Y")
        str_interval = "-"
    elif step == "week":
        str_date = interval_beginning.strftime("week %W %Y")
        str_interval = interval_beginning.strftime("%d/%m/%Y -> ") + interval_end.strftime("%d/%m/%Y")
    elif step =="month":
        str_date = interval_beginning.strftime("%Y/%m")
        str_interval = interval_beginning.strftime("%d/%m/%Y -> ") + interval_end.strftime("%d/%m/%Y")
    
    # append calculated results into a list
    result = [ str_date, str_interval, nb_jobs, cpu_time_hours, cpu_time_available, nb_accounts, nb_active_users, userstats, groupstats ]

    db.unbind()
    
    return result
Exemplo n.º 2
0
def main(args=sys.argv):

    # Command line argument parser
    usage = "%prog [options] command"
    parser = StatsOptionParser(usage)
    (options, args) = parser.parse_args(args[1:])

    # validate options
    parser.validate(options)

    # configure logging
    logging_level = logging.INFO
    if options.debug:
        logging_level = logging.DEBUG
    logging.basicConfig(format = '%(levelname)s: %(filename)s: %(message)s',
                        level  = logging_level,
                        stream  = sys.stdout)
    
    # Config file argument parser
    config = HPCStatsConfig(options)

    # dump entire config file
    for section in config.sections():
        logging.debug(section)
        for option in config.options(section):
            logging.debug(" %s = %s", option, config.get(section, option))

    # Instantiate connexion to db
    db_section = "hpcstatsdb"
    dbhostname = config.get(db_section,"hostname")
    dbport = config.get(db_section,"port")
    dbname = config.get(db_section,"dbname")
    dbuser = config.get(db_section,"user")
    dbpass = config.get(db_section,"password")
    db = HPCStatsdb(dbhostname, dbport, dbname, dbuser, dbpass)
    db.bind()
    
    logging.debug("db information %s %s %s %s %s" % db.infos())
    
    cluster_finder = ClusterFinder(db)
    cluster = cluster_finder.find(options.clustername)

    if (options.arch):
        logging.info("=> Updating architecture for cluster %s" % (options.clustername))
        try:
            arch_importer = ArchitectureImporterFactory().factory(db, config, cluster.get_name())
            arch_importer.update_architecture()
            db.commit()
        except RuntimeError:
            logging.error("error occured on architecture update.")

    if (options.events):
        logging.info("=> Updating events for cluster %s" % (options.clustername))
        try:
            event_importer = EventImporterFactory().factory(db, config, cluster.get_name())
            event_importer.update_events()
            db.commit()
        except RuntimeError:
            logging.error("error occured on events update.")

    if (options.users):
        logging.info("=> Updating users for cluster %s" % (options.clustername))
        try:
          user_importer = UserImporterFactory().factory(db, config, cluster.get_name())
          user_importer.update_users()
          db.commit()
        except RuntimeError:
            logging.error("error occured on users update.")

    if (options.jobs):
        logging.info("=> Update of jobs for cluster %s" % (options.clustername))
        try:
            job_importer = JobImporterFactory().factory(db, config, cluster.get_name())
            # The last updated job in hpcstatsdb for this cluster
            last_updated_id = job_importer.get_last_job_id()
            # The unfinished jobs in hpcstatsdb for this cluster
            ids = job_importer.get_unfinished_job_id()
    
            jobs_to_update = ['not_empty']
            new_jobs = ['not_empty']
    
            nb_theads = 4
    
            offset = 0
            max_jobs = 100000
    
            logging.debug("Get jobs to update")
            jobs_to_update = job_importer.get_job_information_from_dbid_job_list(ids)
            for job in jobs_to_update:
                offset = offset + 1
                if not offset % 10:
                    logging.debug("update job push %d" % offset)
                job.update(db)
    
            offset = 0
    
            while new_jobs:
                logging.debug("Get %d new jobs starting at offset %d" % (max_jobs, offset))
                new_jobs = job_importer.get_job_for_id_above(last_updated_id, offset, max_jobs)
                for job in new_jobs:
                    offset = offset + 1
                    if not offset % 10000:
                        logging.debug("create job push %d" % offset)
                    job.save(db)
    
            db.commit()
        except RuntimeError:
            logging.error("error occured on jobs update.")
        
    db.unbind()
Exemplo n.º 3
0
def main(args=sys.argv):

    # Command line argument parser
    usage = "usage: %prog [options] (-c|--cluster) <cluster> [-i,--interval (day|week|month)] [-t,--template <template>]"
    parser = ReportOptionParser(usage)
    (options, args) = parser.parse_args(args[1:])

    if not options.cluster:
        parser.error("<cluster> should be specified")
 
    # Config file argument parser
    config = HPCStatsConfig(options)

    # locale to format numbers
    locale.setlocale(locale.LC_ALL, 'fr_FR')
    # TODO: load locale output of the environment

    if (options.debug):
        # dump entire config file
        for section in config.sections():
            print section
            for option in config.options(section):
                print " ", option, "=", config.get(section, option)

    # Instantiate connexion to db
    db_section = "hpcstatsdb"
    dbhostname = config.get(db_section,"hostname")
    dbport = config.get(db_section,"port")
    dbname = config.get(db_section,"dbname")
    dbuser = config.get(db_section,"user")
    dbpass = config.get(db_section,"password")
    db = HPCStatsdb(dbhostname, dbport, dbname, dbuser, dbpass)
    
    if (options.debug):
        print "db information %s %s %s %s %s" % db.infos()

    if (options.debug):
        print "→ running on cluster %s with interval %s" % (options.cluster, options.interval)

    # check if specified template is available
    #check_template(options.template)

    # get parameters
    step = options.interval
    template = options.template

    db.bind()
    
    cluster_finder = ClusterFinder(db)
    cluster = cluster_finder.find(options.cluster)  
      
    # check if cluster really exists
    if not cluster.exists_in_db(db):
        sys.exit("error: cluster %s does not exist in database. Available clusters are: %s."
                   % (cluster, ",".join(available_clusters)) )

    if options.debug:
        print "main: getting nb cpus on cluster %s" % (cluster.get_name())

    # get the total number of cpus inside the cluster
    nb_cpus_cluster = cluster.get_nb_cpus(db)

    results = []

    # get datetime of the first job
    min_datetime = cluster.get_min_datetime(db)
    #min_datetime = datetime(2011,5,1,0,0,0)
    max_datetime = datetime.now()
    tmp_datetime = min_datetime
    db.unbind()
    
    userstats_global = {}
    groupstats_global = {}
    processes_args = []

    # construct intervals with process information mapping
    while tmp_datetime < max_datetime:

        # get the exacts beginning and end of the step sized interval
        # around the tmp datetime
        (begin,end) = get_interval_begin_end(step,tmp_datetime)

        # construct an array of args for each process/interval
        process_info = []
        #process_info.append(options)
        #process_info.append(config)
        #process_info.append(cluster)
        #process_info.append(begin)
        #process_info.append(end)

        process_info.append(options)
        process_info.append(config)
        process_info.append(begin)
        process_info.append(end)
        process_info.append(cluster)

        # finally appends this append to the global array
        processes_args.append(process_info)
        
        # going to next interval
        interval = get_interval_timedelta(step)
        tmp_datetime += interval

    if options.debug:
        print processes_args

    # launch processes with their corresponding arguments
    parallel = True 
    processes_results = []
    if parallel:
        pool = Pool(4)
        processes_results = pool.map(run_interval, processes_args)
    else:
        for process_info in processes_args:
            process_results = run_interval(process_info)    
            processes_results.append(process_results)
    
    # then get results
    for result in processes_results:
        str_date = result[0]
        groupstats = result.pop()
        userstats = result.pop()
        userstats_global[str_date] = userstats
        groupstats_global[str_date] = groupstats
        results.append(result)

    if options.debug:
        print "debug: usersstats", userstats_global
        print "debug: groupsstats", groupstats_global

    # print results using template
    mytemplate = Template( filename=get_template_filename(template),
                           input_encoding='utf-8',
                           output_encoding='utf-8',
                           default_filters=['decode.utf8'],
                           encoding_errors='ignore'
                         )
    print mytemplate.render(cluster=cluster,step=step,results=results,userstats_global=userstats_global,groupstats_global=groupstats_global)