Exemplo n.º 1
0
    def set_project_domain(self, project_code, domain_key):
        """Modify the Domain of the Project whose code is given in parameter.
           It raises HPCStatsRuntimeError if either the Project code or the
           Domain key are not found in DB.
        """

        domain = Domain(domain_key, None)
        if not domain.existing(self.db):
            raise HPCStatsRuntimeError( \
                    "unable to find domain %s in database" \
                      % (domain_key))

        project = Project(None, project_code, None)
        if not project.find(self.db):
            raise HPCStatsRuntimeError( \
                   "unable to find project %s in database" \
                     % (project_code))

        # Load the Project in DB to get its description
        project.load(self.db)

        project.domain = domain

        logger.info("updating project %s with new domain %s", project_code,
                    domain_key)
        project.update(self.db)
Exemplo n.º 2
0
    def set_project_domain(self, project_code, domain_key):
        """Modify the Domain of the Project whose code is given in parameter.
           It raises HPCStatsRuntimeError if either the Project code or the
           Domain key are not found in DB.
        """

        domain = Domain(domain_key, None)
        if not domain.existing(self.db):
            raise HPCStatsRuntimeError( \
                    "unable to find domain %s in database" \
                      % (domain_key))

        project = Project(None, project_code, None)
        if not project.find(self.db):
            raise HPCStatsRuntimeError( \
                   "unable to find project %s in database" \
                     % (project_code))

        # Load the Project in DB to get its description
        project.load(self.db)

        project.domain = domain

        logger.info("updating project %s with new domain %s",
                    project_code, domain_key)
        project.update(self.db)
Exemplo n.º 3
0
    def load_cluster(self, cluster):
        """Connect to cluster Slurm database to extract project codes from
           jobs wckeys. Raises HPCStatsSourceError in case of error.
        """

        self.log.debug("loading project codes from %s slurm database", cluster)

        self.connect_db(cluster)

        if not len(self.clusters_db[cluster]['partitions']):
            partitions_clause = ''
        else:
            partitions_clause = \
                "WHERE job.partition IN (%s)" % \
                ','.join(['%s'] * len(self.clusters_db[cluster]['partitions']))

        req = """
                SELECT DISTINCT(wckey)
                  FROM %s_job_table job
                  %s
              """ % (self.clusters_db[cluster]['prefix'],
                     partitions_clause)

        params = tuple(self.clusters_db[cluster]['partitions'])
        self.cur.execute(req, params)

        while (1):
            row = self.cur.fetchone()
            if row == None:
                break

            wckey = row[0]

            if wckey == '':
                continue
            else:
                wckey_items = wckey.split(':')
                if len(wckey_items) != 2:
                    if wckey not in self.invalid_wckeys:
                        self.invalid_wckeys.append(wckey)
                        self.log.warn(Errors.E_P0001,
                                      "format of wckey %s is not valid",
                                      wckey)
                    continue
                else:
                    project_code = wckey_items[0]
                    project = Project(domain=self.default_domain,
                                      code=project_code,
                                      description=None)

                # check for duplicate project
                if not self.find_project(project):
                    self.projects.append(project)
Exemplo n.º 4
0
    def test_update(self):
        """ProjectImporterCSV.update() works with simple data
        """

        domain1 = Domain('dom1', 'domain name 1')
        project1 = Project(domain1, 'code1', 'project description 1')

        MockPg2.PG_REQS['save_project'].set_assoc(params=(project1.code,
                                                          project1.description,
                                                          domain1.key),
                                                  result=[[1]])
        self.importer.projects = [project1]
        self.importer.domains = [domain1]

        self.importer.update()
Exemplo n.º 5
0
    def set_project_description(self, project_code, description):
        """Modify in DB the description of the Project given in parameter. It
           raises HPCStatsRuntimeError if the Project is not found in DB.
        """

        project = Project(None, project_code, None)
        if not project.find(self.db):
            raise HPCStatsRuntimeError( \
                    "unable to find project %s in database" \
                      % (project_code))

        # Load the Project from DB to get its domain key
        project.load(self.db)

        project.description = description

        logger.info("updating project %s with new description", project_code)
        project.update(self.db)
Exemplo n.º 6
0
    def set_project_description(self, project_code, description):
        """Modify in DB the description of the Project given in parameter. It
           raises HPCStatsRuntimeError if the Project is not found in DB.
        """

        project = Project(None, project_code, None)
        if not project.find(self.db):
            raise HPCStatsRuntimeError( \
                    "unable to find project %s in database" \
                      % (project_code))

        # Load the Project from DB to get its domain key
        project.load(self.db)

        project.description = description

        logger.info("updating project %s with new description",
                    project_code)
        project.update(self.db)
Exemplo n.º 7
0
    def get_jobs_after_batchid(self, batchid, window_size=0):
        """Fill the jobs attribute with the list of Jobs found in Slurm DB
           whose id_job is over or equals to the batchid in parameter.
           Returns the last found batch_id.
        """

        self.jobs = []

        if window_size:
            limit = "LIMIT %d" % (window_size)
        else:
            limit = ''

        last_batch_id = -1

        old_schema = self._is_old_schema()
        if old_schema is True:
            cpu_field = 'cpus_alloc'
        else:
            cpu_field = 'tres_alloc'

        if not len(self.partitions):
            partitions_clause = ''
        else:
            partitions_clause = "AND job.partition IN (%s)" % \
                                ','.join(['%s'] * len(self.partitions))

        req = """
                SELECT job_db_inx,
                       id_job,
                       id_user,
                       id_group,
                       time_submit,
                       time_start,
                       time_end,
                       timelimit,
                       nodes_alloc,
                       %s,
                       job.partition,
                       qos.name AS qos,
                       job.account,
                       state,
                       nodelist,
                       assoc.user,
                       job_name,
                       wckey
                  FROM %s_job_table job,
                       %s_assoc_table assoc,
                       qos_table qos
                 WHERE job_db_inx >= %%s
                   %s
                   AND assoc.id_assoc = job.id_assoc
                   AND qos.id = job.id_qos
              ORDER BY job_db_inx %s
              """ % (cpu_field, self.prefix, self.prefix, partitions_clause,
                     limit)
        params = (batchid, ) + tuple(self.partitions)
        self.cur.execute(req, params)
        while (1):
            row = self.cur.fetchone()
            if row == None:
                break

            self.nb_loaded_jobs += 1

            batch_id = last_batch_id = row[0]
            sched_id = row[1]

            submission_t = row[4]
            if submission_t == 0:
                submission = None
            else:
                submission = datetime.fromtimestamp(submission_t)

            start_t = row[5]
            if start_t == 0:
                start = None
            else:
                start = datetime.fromtimestamp(start_t)

            end_t = row[6]
            if end_t == 0:
                end = None
            else:
                end = datetime.fromtimestamp(end_t)

            # Some jobs in Slurm DBD have an end but no start. Typically, this
            # concernes the jobs that have been cancelled before starting. For
            # these jobs, we set the start equal to the end.
            if start is None and end is not None:
                start = end

            wall_t = row[7]
            if wall_t == 0:
                walltime = None
            elif wall_t >= 2147483648:
                walltime = "-1"
            else:
                walltime = str(wall_t)

            name = row[16]
            if old_schema is True:
                nbcpu = row[9]
            else:
                nbcpu = extract_tres_cpu(row[9])
                if nbcpu == -1:
                    raise HPCStatsSourceError( \
                            "unable to extract cpus_alloc from job tres")

            state = JobImporterSlurm.get_job_state_from_slurm_state(row[13])

            nodelist = row[14]
            if nodelist == "(null)" or nodelist == "None assigned":
                nodelist = None

            partition = self.job_partition(sched_id, row[10], nodelist)
            qos = row[11]
            queue = "%s-%s" % (partition, qos)
            job_acct = row[12]

            login = row[15]

            searched_user = User(login, None, None, None)
            searched_account = Account(searched_user, self.cluster, None, None,
                                       None, None)
            account = self.app.users.find_account(searched_account)
            if account is None:
                msg = "account %s not found in loaded accounts" \
                        % (login)
                if self.strict_job_account_binding == True:
                    raise HPCStatsSourceError(msg)
                elif login not in self.unknown_accounts:
                    self.unknown_accounts.append(login)
                    self.log.warn(Errors.E_J0001, msg)
                self.nb_excluded_jobs += 1
                continue
            user = self.app.users.find_user(searched_user)
            if user is None:
                msg = "user %s not found in loaded users" % (login)
                raise HPCStatsSourceError(msg)
            job_department = user.department

            wckey = row[17]

            # empty wckey must be considered as None
            if wckey == '':
                wckey = None

            if wckey is None:
                project = None
                business = None
            else:
                wckey_items = wckey.split(':')
                if len(wckey_items) != 2:
                    msg = "format of wckey %s is not valid" % (wckey)
                    if self.strict_job_wckey_format == True:
                        raise HPCStatsSourceError(msg)
                    elif wckey not in self.invalid_wckeys:
                        self.invalid_wckeys.append(wckey)
                        self.log.warn(Errors.E_J0002, msg)
                    project = None
                    business = None
                else:
                    project_code = wckey_items[0]
                    searched_project = Project(None, project_code, None)
                    project = self.app.projects.find_project(searched_project)
                    if project is None:
                        msg = "project %s not found in loaded projects" \
                                % (project_code)
                        if self.strict_job_project_binding == True:
                            raise HPCStatsSourceError(msg)
                        elif project_code not in self.unknown_projects:
                            self.unknown_projects.append(project_code)
                            self.log.warn(Errors.E_J0003, msg)

                    business_code = wckey_items[1]
                    searched_business = Business(business_code, None)
                    business = self.app.business.find(searched_business)

                    if business is None:
                        msg = "business code %s not found in loaded " \
                              "business codes" % (business_code)
                        if self.strict_job_businesscode_binding == True:
                            raise HPCStatsSourceError(msg)
                        elif business_code not in self.unknown_businesses:
                            self.unknown_businesses.append(business_code)
                            self.log.warn(Errors.E_J0004, msg)

            job = Job(account, project, business, sched_id, str(batch_id),
                      name, nbcpu, state, queue, job_acct, job_department,
                      submission, start, end, walltime)
            self.jobs.append(job)

            if nodelist is not None:
                self.create_runs(nodelist, job)

        return last_batch_id
Exemplo n.º 8
0
    def __init__(self, db, config, cluster_name):

        self._db = db
        self._cluster_name = cluster_name

        context_section = self._cluster_name + "/context"
        self._context_file = config.get(context_section, "file")

        if not os.path.isfile(self._context_file):
            logging.error("context file %s does not exist", self._context_file)
            raise RuntimeError

        # delete all contexts entries in databases
        logging.debug("Delete all context entries in db")
        delete_contexts(self._db)
        self._db.commit()

        p_file = open(self._context_file, 'r')
        # save point is used to considere exception and commit in database only at the end
        db.get_cur().execute("SAVEPOINT my_savepoint;")
        with p_file as csvfile:
            file_reader = csv.reader(csvfile, delimiter=',', quotechar='|')
            for row in file_reader:
                 logging.debug("update projects and business codes for user : %s", row[0].lower())
                 # a new context is set in database for all projects attached AND for all business attached.
                 # new line is set with a project referance OR a business referance.
                 if row[6]:
                     for pareo in re.split('\|',row[6]):
                         project = Project()
                         try:
                             project.project_from_pareo(self._db, pareo)
                             context = Context(login = row[0].lower(),
                                               job = None,
                                               project = project.get_id(),
                                               business = None)
                             try:
                                 context.save(self._db)
                                 logging.debug("add context : %s", context)
                                 #self._db.commit()
                                 db.get_cur().execute("SAVEPOINT my_savepoint;")
                             except psycopg2.DataError:
                                 logging.error("impossible to add CONTEXT entry in database : (%s), du to encoding error", row)
                                 db.get_cur().execute("ROLLBACK TO SAVEPOINT my_savepoint;")
                                 pass
                             except psycopg2.IntegrityError:
                                 logging.error("impossible to add CONTEXT entry in database : (%s), du to relations error", row)
                                 db.get_cur().execute("ROLLBACK TO SAVEPOINT my_savepoint;")
                                 pass
                         except:
                             logging.error("context rejected. Project %s does not exist", pareo)
                             db.get_cur().execute("ROLLBACK TO SAVEPOINT my_savepoint;")
                             pass
                 if row[7]:
                     for code in re.split('\|',row[7]):
                         business = Business()
                         try:
                             business.business_from_key(self._db, code)
                             context = Context(login = row[0].lower(),
                                               job = None,
                                               project = None,
                                               business = business.get_id())
                             try:
                                 context.save(self._db)
                                 logging.debug("add context : %s", context)
                                 #self._db.commit()
                                 db.get_cur().execute("SAVEPOINT my_savepoint;")
                             except psycopg2.DataError:
                                 logging.error("impossible to add CONTEXT entry in database : (%s), du to encoding error", row)
                                 db.get_cur().execute("ROLLBACK TO SAVEPOINT my_savepoint;")
                                 pass
                             except psycopg2.IntegrityError:
                                 logging.error("impossible to add CONTEXT entry in database : (%s), du to relations error", row)
                                 db.get_cur().execute("ROLLBACK TO SAVEPOINT my_savepoint;")
                                 pass
                         except:
                             logging.error("context rejected. Business %s does not exist", code) 
                             db.get_cur().execute("ROLLBACK TO SAVEPOINT my_savepoint;")
                             pass
                 if not row[6] and not row[7]:
                     logging.error("line : %s rejected - not code or project associate", row)
        self._db.commit()
        p_file.close()
Exemplo n.º 9
0
    def __init__(self, db, config, cluster_name):

        self._db = db
        self._cluster_name = cluster_name

        pareo_section = self._cluster_name + "/pareo"
        self._pareo_file = config.get(pareo_section, "file")

        if not os.path.isfile(self._pareo_file):
            logging.error("pareo file %s does not exist", self._pareo_file)
            raise RuntimeError

        # delete all entries in domains and sectors tables and
        # its dependances in contexts and projects table
        logging.debug("Delete all pareo entries in db")
        delete_contexts_with_pareo(self._db)
        delete_projects(self._db)
        delete_sectors(self._db)
        delete_domains(self._db)
        self._db.commit()

        p_file = open(self._pareo_file, "r")
        # savepoint is used to considere exceptions and commit in database only at the end.
        db.get_cur().execute("SAVEPOINT my_savepoint;")
        # define pareo delimiters in csv file for domains and sectors values
        delimiters = "\[|]"
        with p_file as csvfile:
            file_reader = csv.reader(csvfile, delimiter=";", quotechar="|")
            for row in file_reader:
                # update domains table with third column of the file, only if sector exist in forth column
                if row[2]:
                    id_domain = re.split(delimiters, row[2])[1]
                    description_domain = re.split(delimiters, row[2])[2]
                    domain = Domain(id=id_domain, description=description_domain)
                    try:
                        if not domain.already_exist(self._db):
                            domain.save(self._db)
                            if not domain.get_description():
                                logging.debug("add domain : %s, without description", domain.get_id())
                            else:
                                logging.debug(
                                    "add domain : %s, with description : %s", domain.get_id(), domain.get_description()
                                )
                            db.get_cur().execute("SAVEPOINT my_savepoint;")
                    except psycopg2.DataError:
                        logging.error("impossible to add DOMAIN entry in database : (%s), du to encoding error", row[2])
                        db.get_cur().execute("ROLLBACK TO SAVEPOINT my_savepoint;")
                        pass
                else:
                    id_domain = None

                # update sector table with forth column of the file
                if id_domain:
                    if row[3] and row[3] != "[]":
                        id_sector = int(re.sub("[^0-9]", "", re.split(delimiters, row[3])[1]))
                        description_sector = re.split(delimiters, row[3])[2]
                    if not row[3] or row[3] == "[]":
                        id_sector = 0
                        description_sector = "default value for domain " + domain.get_id()
                    sector = Sector(id=id_sector, domain=id_domain, description=description_sector)
                    try:
                        if not sector.already_exist(self._db):
                            sector.save(self._db)
                            if not sector.get_description():
                                logging.debug(
                                    "add sector : %s, from domain : %s, without description",
                                    sector.get_id(),
                                    sector.get_domain(),
                                )
                            else:
                                logging.debug(
                                    "add sector : %s, from domain : %s, with description : %s",
                                    sector.get_id(),
                                    sector.get_domain(),
                                    sector.get_description(),
                                )
                            db.get_cur().execute("SAVEPOINT my_savepoint;")
                    except psycopg2.DataError:
                        logging.error("impossible to add SECTOR entry in database : (%s) du to encoding error", row[3])
                        db.get_cur().execute("ROLLBACK TO SAVEPOINT my_savepoint;")
                        pass
                    except psycopg2.IntegrityError:
                        logging.error(
                            "impossible to add SECTOR entry in database : (%s), du to relations error", row[3]
                        )
                        db.get_cur().execute("ROLLBACK TO SAVEPOINT my_savepoint;")
                        pass
                else:
                    id_sector = None

                # update Project table with first and seconds columns of the file
                # because of constrains of database, it is impossible to add project with domain reference and no sector reference. Need both or any.
                # in case of domain reference exist but sector referance doesn't exist, None value is set for both (see first if condition).
                if row[0]:
                    project = Project(sector=id_sector, domain=id_domain, description=row[1], pareo=row[0])
                    try:
                        if not project.already_exist(self._db):
                            project.save(self._db)
                            if not project.get_description():
                                logging.debug(
                                    "add project : %s, from domain : %s, without description",
                                    project.get_pareo(),
                                    project.get_domain(),
                                )
                            else:
                                logging.debug(
                                    "add project : %s, from domain : %s, and sector : %s with description : %s",
                                    project.get_pareo(),
                                    project.get_domain(),
                                    project.get_sector(),
                                    project.get_description(),
                                )
                            db.get_cur().execute("SAVEPOINT my_savepoint;")
                    except psycopg2.DataError:
                        logging.error(
                            "impossible to add PAREO entry in database : (%s - %s), du to encoding error",
                            row[0],
                            row[1],
                        )
                        db.get_cur().execute("ROLLBACK TO SAVEPOINT my_savepoint;")
                        pass
                    except psycopg2.IntegrityError:
                        logging.error(
                            "impossible to add PAREO entry in database : (%s - %s), du to relations error",
                            row[0],
                            row[1],
                        )
                        db.get_cur().execute("ROLLBACK TO SAVEPOINT my_savepoint;")
                        pass
        self._db.commit()
        p_file.close()
Exemplo n.º 10
0
    def load(self):
        """Open CSV file and load project out of it.
           Raises Exceptions if error is found in the file.
           Returns the list of Projects with their Domains.
        """

        self.check()

        self.domains = []
        self.projects = []

        with open(self.csv_file, 'r') as csvfile:

            file_reader = csv.reader(csvfile, delimiter=';', quotechar='|')

            for row in file_reader:

                project_code = row[0]
                project_name = row[1]

                # domains
                domain_str = row[2]
                domain_m = re.match(r"\[(.*)\](.*)", domain_str)
                if domain_m:
                    domain_key = domain_m.group(1)
                    domain_name = domain_m.group(2)
                else:
                    raise HPCStatsSourceError( \
                            "Project CSV %s domain format is invalid" \
                              % (project_code))

                domain_key = domain_key.strip()
                domain_name = domain_name.strip()
                if len(domain_key) == 0:
                    raise HPCStatsSourceError( \
                            "Project CSV %s domain key is empty" \
                              % (project_code))
                if len(domain_name) == 0:
                    raise HPCStatsSourceError( \
                            "Project CSV %s domain name is empty" \
                              % (project_code))

                # Create the Domain and search for it among the already
                # existing ones. If not found, append to the list of Domains.
                new_domain = Domain(key=domain_key, name=domain_name)
                domain = self.find_domain(new_domain)
                if domain is None:
                    domain = new_domain
                    self.domains.append(domain)

                # Create the Project and search for it among the already
                # existing ones. If found, raise HPCStatsSourceError
                project = Project(domain=domain,
                                  code=project_code,
                                  description=project_name)
                # check for duplicate project and raise error if found
                if self.find_project(project):
                    raise HPCStatsSourceError( \
                              "duplicated project code %s in CSV file" \
                                  % (project_code))

                self.projects.append(project)

        return self.projects