def set_project_domain(self, project_code, domain_key): """Modify the Domain of the Project whose code is given in parameter. It raises HPCStatsRuntimeError if either the Project code or the Domain key are not found in DB. """ domain = Domain(domain_key, None) if not domain.existing(self.db): raise HPCStatsRuntimeError( \ "unable to find domain %s in database" \ % (domain_key)) project = Project(None, project_code, None) if not project.find(self.db): raise HPCStatsRuntimeError( \ "unable to find project %s in database" \ % (project_code)) # Load the Project in DB to get its description project.load(self.db) project.domain = domain logger.info("updating project %s with new domain %s", project_code, domain_key) project.update(self.db)
def load_cluster(self, cluster): """Connect to cluster Slurm database to extract project codes from jobs wckeys. Raises HPCStatsSourceError in case of error. """ self.log.debug("loading project codes from %s slurm database", cluster) self.connect_db(cluster) if not len(self.clusters_db[cluster]['partitions']): partitions_clause = '' else: partitions_clause = \ "WHERE job.partition IN (%s)" % \ ','.join(['%s'] * len(self.clusters_db[cluster]['partitions'])) req = """ SELECT DISTINCT(wckey) FROM %s_job_table job %s """ % (self.clusters_db[cluster]['prefix'], partitions_clause) params = tuple(self.clusters_db[cluster]['partitions']) self.cur.execute(req, params) while (1): row = self.cur.fetchone() if row == None: break wckey = row[0] if wckey == '': continue else: wckey_items = wckey.split(':') if len(wckey_items) != 2: if wckey not in self.invalid_wckeys: self.invalid_wckeys.append(wckey) self.log.warn(Errors.E_P0001, "format of wckey %s is not valid", wckey) continue else: project_code = wckey_items[0] project = Project(domain=self.default_domain, code=project_code, description=None) # check for duplicate project if not self.find_project(project): self.projects.append(project)
def test_update(self): """ProjectImporterCSV.update() works with simple data """ domain1 = Domain('dom1', 'domain name 1') project1 = Project(domain1, 'code1', 'project description 1') MockPg2.PG_REQS['save_project'].set_assoc(params=(project1.code, project1.description, domain1.key), result=[[1]]) self.importer.projects = [project1] self.importer.domains = [domain1] self.importer.update()
def set_project_description(self, project_code, description): """Modify in DB the description of the Project given in parameter. It raises HPCStatsRuntimeError if the Project is not found in DB. """ project = Project(None, project_code, None) if not project.find(self.db): raise HPCStatsRuntimeError( \ "unable to find project %s in database" \ % (project_code)) # Load the Project from DB to get its domain key project.load(self.db) project.description = description logger.info("updating project %s with new description", project_code) project.update(self.db)
def get_jobs_after_batchid(self, batchid, window_size=0): """Fill the jobs attribute with the list of Jobs found in Slurm DB whose id_job is over or equals to the batchid in parameter. Returns the last found batch_id. """ self.jobs = [] if window_size: limit = "LIMIT %d" % (window_size) else: limit = '' last_batch_id = -1 old_schema = self._is_old_schema() if old_schema is True: cpu_field = 'cpus_alloc' else: cpu_field = 'tres_alloc' if not len(self.partitions): partitions_clause = '' else: partitions_clause = "AND job.partition IN (%s)" % \ ','.join(['%s'] * len(self.partitions)) req = """ SELECT job_db_inx, id_job, id_user, id_group, time_submit, time_start, time_end, timelimit, nodes_alloc, %s, job.partition, qos.name AS qos, job.account, state, nodelist, assoc.user, job_name, wckey FROM %s_job_table job, %s_assoc_table assoc, qos_table qos WHERE job_db_inx >= %%s %s AND assoc.id_assoc = job.id_assoc AND qos.id = job.id_qos ORDER BY job_db_inx %s """ % (cpu_field, self.prefix, self.prefix, partitions_clause, limit) params = (batchid, ) + tuple(self.partitions) self.cur.execute(req, params) while (1): row = self.cur.fetchone() if row == None: break self.nb_loaded_jobs += 1 batch_id = last_batch_id = row[0] sched_id = row[1] submission_t = row[4] if submission_t == 0: submission = None else: submission = datetime.fromtimestamp(submission_t) start_t = row[5] if start_t == 0: start = None else: start = datetime.fromtimestamp(start_t) end_t = row[6] if end_t == 0: end = None else: end = datetime.fromtimestamp(end_t) # Some jobs in Slurm DBD have an end but no start. Typically, this # concernes the jobs that have been cancelled before starting. For # these jobs, we set the start equal to the end. if start is None and end is not None: start = end wall_t = row[7] if wall_t == 0: walltime = None elif wall_t >= 2147483648: walltime = "-1" else: walltime = str(wall_t) name = row[16] if old_schema is True: nbcpu = row[9] else: nbcpu = extract_tres_cpu(row[9]) if nbcpu == -1: raise HPCStatsSourceError( \ "unable to extract cpus_alloc from job tres") state = JobImporterSlurm.get_job_state_from_slurm_state(row[13]) nodelist = row[14] if nodelist == "(null)" or nodelist == "None assigned": nodelist = None partition = self.job_partition(sched_id, row[10], nodelist) qos = row[11] queue = "%s-%s" % (partition, qos) job_acct = row[12] login = row[15] searched_user = User(login, None, None, None) searched_account = Account(searched_user, self.cluster, None, None, None, None) account = self.app.users.find_account(searched_account) if account is None: msg = "account %s not found in loaded accounts" \ % (login) if self.strict_job_account_binding == True: raise HPCStatsSourceError(msg) elif login not in self.unknown_accounts: self.unknown_accounts.append(login) self.log.warn(Errors.E_J0001, msg) self.nb_excluded_jobs += 1 continue user = self.app.users.find_user(searched_user) if user is None: msg = "user %s not found in loaded users" % (login) raise HPCStatsSourceError(msg) job_department = user.department wckey = row[17] # empty wckey must be considered as None if wckey == '': wckey = None if wckey is None: project = None business = None else: wckey_items = wckey.split(':') if len(wckey_items) != 2: msg = "format of wckey %s is not valid" % (wckey) if self.strict_job_wckey_format == True: raise HPCStatsSourceError(msg) elif wckey not in self.invalid_wckeys: self.invalid_wckeys.append(wckey) self.log.warn(Errors.E_J0002, msg) project = None business = None else: project_code = wckey_items[0] searched_project = Project(None, project_code, None) project = self.app.projects.find_project(searched_project) if project is None: msg = "project %s not found in loaded projects" \ % (project_code) if self.strict_job_project_binding == True: raise HPCStatsSourceError(msg) elif project_code not in self.unknown_projects: self.unknown_projects.append(project_code) self.log.warn(Errors.E_J0003, msg) business_code = wckey_items[1] searched_business = Business(business_code, None) business = self.app.business.find(searched_business) if business is None: msg = "business code %s not found in loaded " \ "business codes" % (business_code) if self.strict_job_businesscode_binding == True: raise HPCStatsSourceError(msg) elif business_code not in self.unknown_businesses: self.unknown_businesses.append(business_code) self.log.warn(Errors.E_J0004, msg) job = Job(account, project, business, sched_id, str(batch_id), name, nbcpu, state, queue, job_acct, job_department, submission, start, end, walltime) self.jobs.append(job) if nodelist is not None: self.create_runs(nodelist, job) return last_batch_id
def load(self): """Open CSV file and load project out of it. Raises Exceptions if error is found in the file. Returns the list of Projects with their Domains. """ self.check() self.domains = [] self.projects = [] with open(self.csv_file, 'r') as csvfile: file_reader = csv.reader(csvfile, delimiter=';', quotechar='|') for row in file_reader: project_code = row[0] project_name = row[1] # domains domain_str = row[2] domain_m = re.match(r"\[(.*)\](.*)", domain_str) if domain_m: domain_key = domain_m.group(1) domain_name = domain_m.group(2) else: raise HPCStatsSourceError( \ "Project CSV %s domain format is invalid" \ % (project_code)) domain_key = domain_key.strip() domain_name = domain_name.strip() if len(domain_key) == 0: raise HPCStatsSourceError( \ "Project CSV %s domain key is empty" \ % (project_code)) if len(domain_name) == 0: raise HPCStatsSourceError( \ "Project CSV %s domain name is empty" \ % (project_code)) # Create the Domain and search for it among the already # existing ones. If not found, append to the list of Domains. new_domain = Domain(key=domain_key, name=domain_name) domain = self.find_domain(new_domain) if domain is None: domain = new_domain self.domains.append(domain) # Create the Project and search for it among the already # existing ones. If found, raise HPCStatsSourceError project = Project(domain=domain, code=project_code, description=project_name) # check for duplicate project and raise error if found if self.find_project(project): raise HPCStatsSourceError( \ "duplicated project code %s in CSV file" \ % (project_code)) self.projects.append(project) return self.projects