def __update_statistics_from_job( self, job_id, job_vgrid_name, buildcache_dict, job_dict, ): """The dirty details of what jobinfo is used in the statistics and buildcache""" # Fix legacy VGRIDs job_dict['VGRID'] = validated_vgrid_list(self.__configuration, job_dict) # If the mRSL file was modified and this is the first time # we have seen it, add the request info to the statistics. if not buildcache_dict.has_key(job_id): self.__add(self.VGRID, job_vgrid_name, 'NODECOUNT_REQ', int(job_dict['NODECOUNT'])) self.__add(self.VGRID, job_vgrid_name, 'CPUTIME_REQ', int(job_dict['CPUTIME'])) self.__add(self.VGRID, job_vgrid_name, 'CPUCOUNT_REQ', int(job_dict['CPUCOUNT'])) self.__add(self.VGRID, job_vgrid_name, 'DISK_REQ', int(job_dict['DISK'])) self.__add(self.VGRID, job_vgrid_name, 'MEMORY_REQ', int(job_dict['MEMORY'])) self.__add(self.VGRID, job_vgrid_name, 'RUNTIMEENVIRONMENT_REQ', len(job_dict['RUNTIMEENVIRONMENT'])) unique_resource_name = None resource_id = None if job_dict.has_key('RESOURCE_CONFIG'): if job_dict.has_key('UNIQUE_RESOURCE_NAME'): unique_resource_name = job_dict['UNIQUE_RESOURCE_NAME' ].upper() if job_dict['RESOURCE_CONFIG'].has_key('RESOURCE_ID'): resource_id = job_dict['RESOURCE_CONFIG']['RESOURCE_ID' ].upper() if job_dict['STATUS'] == 'PARSE': self.__add(self.VGRID, job_vgrid_name, 'PARSE', 1) elif job_dict['STATUS'] == 'QUEUED': self.__add(self.VGRID, job_vgrid_name, 'QUEUED', 1) elif job_dict['STATUS'] == 'EXECUTING': self.__add(self.VGRID, job_vgrid_name, 'EXECUTING', 1) elif job_dict['STATUS'] == 'FAILED': self.__add(self.VGRID, job_vgrid_name, 'FAILED', 1) self.__add_resource(unique_resource_name, resource_id, 'FAILED', 1) elif job_dict['STATUS'] == 'RETRY': self.__add(self.VGRID, job_vgrid_name, 'RETRY', 1) self.__add_resource(unique_resource_name, resource_id, 'RETRY', 1) elif job_dict['STATUS'] == 'EXPIRED': self.__add(self.VGRID, job_vgrid_name, 'EXPIRED', 1) elif job_dict['STATUS'] == 'FROZEN': self.__add(self.VGRID, job_vgrid_name, 'FROZEN', 1) elif job_dict['STATUS'] == 'CANCELED': self.__add(self.VGRID, job_vgrid_name, 'CANCELED', 1) elif job_dict['STATUS'] == 'FINISHED': # Recent jobs have the scheduled resource vgrid available in # the RESOURCE_VGRID field. However, that vgrid may be a parent of # the requested job vgrid due to inheritance. # We repeat the vgrid match up to find the actual job vgrid here. # Fall back to saved resource prioritized vgrid list for old jobs. active_res_vgrid = job_dict.get('RESOURCE_VGRID', None) if active_res_vgrid: search_vgrids = [active_res_vgrid] else: print "WARNING: no RESOURCE_VGRID for job %(JOB_ID)s" % \ job_dict resource_config = job_dict['RESOURCE_CONFIG'] # Fix legacy VGRIDs resource_config['VGRID'] = validated_vgrid_list( self.__configuration, resource_config) search_vgrids = resource_config['VGRID'] (match, active_job_vgrid, _) = job_fits_res_vgrid( job_dict['VGRID'], search_vgrids) if not match: # This should not happen - scheduled job to wrong vgrid! print "ERROR: %s no match for vgrids: %s vs %s" % \ (job_dict['JOB_ID'], job_dict['VGRID'], search_vgrids) active_job_vgrid = '__NO_SUCH_JOB_VGRID__' active_vgrid = active_job_vgrid.upper() if active_vgrid == job_vgrid_name: # Compute used wall time finished_timestamp = job_dict['FINISHED_TIMESTAMP'] finished_datetime = datetime.datetime( finished_timestamp.tm_year, finished_timestamp.tm_mon, finished_timestamp.tm_mday, finished_timestamp.tm_hour, finished_timestamp.tm_min, finished_timestamp.tm_sec, ) starting_timestamp = job_dict['EXECUTING_TIMESTAMP'] starting_datetime = datetime.datetime( starting_timestamp.tm_year, starting_timestamp.tm_mon, starting_timestamp.tm_mday, starting_timestamp.tm_hour, starting_timestamp.tm_min, starting_timestamp.tm_sec, ) used_walltime = finished_datetime - starting_datetime # VGrid stats self.__add(self.VGRID, job_vgrid_name, 'FINISHED', 1) self.__add(self.VGRID, job_vgrid_name, 'NODECOUNT_DONE', int(job_dict['NODECOUNT'])) self.__add(self.VGRID, job_vgrid_name, 'CPUTIME_DONE', int(job_dict['CPUTIME'])) self.__add(self.VGRID, job_vgrid_name, 'USED_WALLTIME', used_walltime) self.__add(self.VGRID, job_vgrid_name, 'CPUCOUNT_DONE', int(job_dict['CPUCOUNT'])) self.__add(self.VGRID, job_vgrid_name, 'DISK_DONE', int(job_dict['DISK'])) self.__add(self.VGRID, job_vgrid_name, 'MEMORY_DONE', int(job_dict['MEMORY'])) self.__add(self.VGRID, job_vgrid_name, 'RUNTIMEENVIRONMENT_DONE', len(job_dict['RUNTIMEENVIRONMENT'])) # Resource stats self.__add_resource(unique_resource_name, resource_id, 'FINISHED', 1) self.__add_resource(unique_resource_name, resource_id, 'USED_WALLTIME', used_walltime) # RE stats for runtime_env in job_dict['RUNTIMEENVIRONMENT']: self.__addre(self.VGRID, job_vgrid_name, runtime_env, 1) # Old mRSL files lack the UNIQUE_RESOURCE_NAME field if unique_resource_name: self.__addre(self.RESOURCE_TOTAL, unique_resource_name, runtime_env, 1) # Old mRSL files lack the RESOURCE_ID field # Old mRSL files has resource_id == unique_resource_name if resource_id and resource_id != unique_resource_name: self.__addre(self.RESOURCE_NODE, resource_id, runtime_env, 1) else: print 'Unknown status: ' + job_dict['STATUS'] # Check and update cache for previous status' if buildcache_dict.has_key(job_id) and \ buildcache_dict[job_id] in pending_states: self.__add(self.VGRID, job_vgrid_name, buildcache_dict[job_id], -1) # Cache current status for use in next iteration. # Note that status: CANCELED, FAILED, EXPIRED or FINISHED are # final stages and therefore none of thoose should occur in # the cache, as the mRSL file should not be modified once it # reaches one of thoose stages. if job_dict['STATUS'] in pending_states: buildcache_dict[job_id] = job_dict['STATUS'] elif buildcache_dict.has_key(job_id): del buildcache_dict[job_id]
def __update_statistics_from_job( self, job_id, job_vgrid_name, buildcache_dict, job_dict, ): """The dirty details of what jobinfo is used in the statistics and buildcache""" # Fix legacy VGRIDs job_dict['VGRID'] = validated_vgrid_list(self.__configuration, job_dict) # If the mRSL file was modified and this is the first time # we have seen it, add the request info to the statistics. if not buildcache_dict.has_key(job_id): self.__add(self.VGRID, job_vgrid_name, 'NODECOUNT_REQ', int(job_dict['NODECOUNT'])) self.__add(self.VGRID, job_vgrid_name, 'CPUTIME_REQ', int(job_dict['CPUTIME'])) self.__add(self.VGRID, job_vgrid_name, 'CPUCOUNT_REQ', int(job_dict['CPUCOUNT'])) self.__add(self.VGRID, job_vgrid_name, 'DISK_REQ', int(job_dict['DISK'])) self.__add(self.VGRID, job_vgrid_name, 'MEMORY_REQ', int(job_dict['MEMORY'])) self.__add(self.VGRID, job_vgrid_name, 'RUNTIMEENVIRONMENT_REQ', len(job_dict['RUNTIMEENVIRONMENT'])) unique_resource_name = None resource_id = None if job_dict.has_key('RESOURCE_CONFIG'): if job_dict.has_key('UNIQUE_RESOURCE_NAME'): unique_resource_name = job_dict['UNIQUE_RESOURCE_NAME'].upper() if job_dict['RESOURCE_CONFIG'].has_key('RESOURCE_ID'): resource_id = job_dict['RESOURCE_CONFIG']['RESOURCE_ID'].upper( ) if job_dict['STATUS'] == 'PARSE': self.__add(self.VGRID, job_vgrid_name, 'PARSE', 1) elif job_dict['STATUS'] == 'QUEUED': self.__add(self.VGRID, job_vgrid_name, 'QUEUED', 1) elif job_dict['STATUS'] == 'EXECUTING': self.__add(self.VGRID, job_vgrid_name, 'EXECUTING', 1) elif job_dict['STATUS'] == 'FAILED': self.__add(self.VGRID, job_vgrid_name, 'FAILED', 1) self.__add_resource(unique_resource_name, resource_id, 'FAILED', 1) elif job_dict['STATUS'] == 'RETRY': self.__add(self.VGRID, job_vgrid_name, 'RETRY', 1) self.__add_resource(unique_resource_name, resource_id, 'RETRY', 1) elif job_dict['STATUS'] == 'EXPIRED': self.__add(self.VGRID, job_vgrid_name, 'EXPIRED', 1) elif job_dict['STATUS'] == 'FROZEN': self.__add(self.VGRID, job_vgrid_name, 'FROZEN', 1) elif job_dict['STATUS'] == 'CANCELED': self.__add(self.VGRID, job_vgrid_name, 'CANCELED', 1) elif job_dict['STATUS'] == 'FINISHED': # Recent jobs have the scheduled resource vgrid available in # the RESOURCE_VGRID field. However, that vgrid may be a parent of # the requested job vgrid due to inheritance. # We repeat the vgrid match up to find the actual job vgrid here. # Fall back to saved resource prioritized vgrid list for old jobs. active_res_vgrid = job_dict.get('RESOURCE_VGRID', None) if active_res_vgrid: search_vgrids = [active_res_vgrid] else: print "WARNING: no RESOURCE_VGRID for job %(JOB_ID)s" % \ job_dict resource_config = job_dict['RESOURCE_CONFIG'] # Fix legacy VGRIDs resource_config['VGRID'] = validated_vgrid_list( self.__configuration, resource_config) search_vgrids = resource_config['VGRID'] (match, active_job_vgrid, _) = job_fits_res_vgrid(job_dict['VGRID'], search_vgrids) if not match: # This should not happen - scheduled job to wrong vgrid! print "ERROR: %s no match for vgrids: %s vs %s" % \ (job_dict['JOB_ID'], job_dict['VGRID'], search_vgrids) active_job_vgrid = '__NO_SUCH_JOB_VGRID__' active_vgrid = active_job_vgrid.upper() if active_vgrid == job_vgrid_name: # Compute used wall time finished_timestamp = job_dict['FINISHED_TIMESTAMP'] finished_datetime = datetime.datetime( finished_timestamp.tm_year, finished_timestamp.tm_mon, finished_timestamp.tm_mday, finished_timestamp.tm_hour, finished_timestamp.tm_min, finished_timestamp.tm_sec, ) starting_timestamp = job_dict['EXECUTING_TIMESTAMP'] starting_datetime = datetime.datetime( starting_timestamp.tm_year, starting_timestamp.tm_mon, starting_timestamp.tm_mday, starting_timestamp.tm_hour, starting_timestamp.tm_min, starting_timestamp.tm_sec, ) used_walltime = finished_datetime - starting_datetime # VGrid stats self.__add(self.VGRID, job_vgrid_name, 'FINISHED', 1) self.__add(self.VGRID, job_vgrid_name, 'NODECOUNT_DONE', int(job_dict['NODECOUNT'])) self.__add(self.VGRID, job_vgrid_name, 'CPUTIME_DONE', int(job_dict['CPUTIME'])) self.__add(self.VGRID, job_vgrid_name, 'USED_WALLTIME', used_walltime) self.__add(self.VGRID, job_vgrid_name, 'CPUCOUNT_DONE', int(job_dict['CPUCOUNT'])) self.__add(self.VGRID, job_vgrid_name, 'DISK_DONE', int(job_dict['DISK'])) self.__add(self.VGRID, job_vgrid_name, 'MEMORY_DONE', int(job_dict['MEMORY'])) self.__add(self.VGRID, job_vgrid_name, 'RUNTIMEENVIRONMENT_DONE', len(job_dict['RUNTIMEENVIRONMENT'])) # Resource stats self.__add_resource(unique_resource_name, resource_id, 'FINISHED', 1) self.__add_resource(unique_resource_name, resource_id, 'USED_WALLTIME', used_walltime) # RE stats for runtime_env in job_dict['RUNTIMEENVIRONMENT']: self.__addre(self.VGRID, job_vgrid_name, runtime_env, 1) # Old mRSL files lack the UNIQUE_RESOURCE_NAME field if unique_resource_name: self.__addre(self.RESOURCE_TOTAL, unique_resource_name, runtime_env, 1) # Old mRSL files lack the RESOURCE_ID field # Old mRSL files has resource_id == unique_resource_name if resource_id and resource_id != unique_resource_name: self.__addre(self.RESOURCE_NODE, resource_id, runtime_env, 1) else: print 'Unknown status: ' + job_dict['STATUS'] # Check and update cache for previous status' if buildcache_dict.has_key(job_id) and \ buildcache_dict[job_id] in pending_states: self.__add(self.VGRID, job_vgrid_name, buildcache_dict[job_id], -1) # Cache current status for use in next iteration. # Note that status: CANCELED, FAILED, EXPIRED or FINISHED are # final stages and therefore none of thoose should occur in # the cache, as the mRSL file should not be modified once it # reaches one of thoose stages. if job_dict['STATUS'] in pending_states: buildcache_dict[job_id] = job_dict['STATUS'] elif buildcache_dict.has_key(job_id): del buildcache_dict[job_id]
if root.find(os.sep + '.') != -1: continue for name in files: filename = os.path.join(root, name) # Only files modified since last update is checked if os.path.getmtime(filename) > last_buildtime: job_dict = unpickle(filename, self.__logger) if not job_dict: msg = 'gridstat::update() could not load: %s '\ % filename self.__logger.error(msg) continue job_vgrids = validated_vgrid_list(self.__configuration, job_dict) for job_vgrid_name in job_vgrids: # Update the statistics and cache # from the job details job_vgrid_name = job_vgrid_name.upper() self.__update_statistics_from_job(name, job_vgrid_name, buildcache_dict, job_dict) # Flush cache and unlock files try: file_handle.seek(0, 0)
if root.find(os.sep + '.') != -1: continue for name in files: filename = os.path.join(root, name) # Only files modified since last update is checked if os.path.getmtime(filename) > last_buildtime: job_dict = unpickle(filename, self.__logger) if not job_dict: msg = 'gridstat::update() could not load: %s '\ % filename self.__logger.error(msg) continue job_vgrids = validated_vgrid_list(self.__configuration, job_dict) for job_vgrid_name in job_vgrids: # Update the statistics and cache # from the job details job_vgrid_name = job_vgrid_name.upper() self.__update_statistics_from_job( name, job_vgrid_name, buildcache_dict, job_dict) # Flush cache and unlock files try: file_handle.seek(0, 0) py_pickle.dump(buildcache_dict, file_handle, 0)