Exemplo n.º 1
0
    def create_runs(self, nodelist, job):
        """Create all Runs objects for the job in parameter and all the nodes
           in nodelist.
        """

        if is_bg_nodelist(nodelist):
            nodeset = compute_bg_nodelist(nodelist)
        else:
            try:
                nodeset = NodeSet(nodelist)
            except NodeSetParseRangeError:
                raise HPCStatsSourceError( \
                        "could not parse nodeset %s for job %s" \
                          % (nodelist, job.batch_id))

        for nodename in nodeset:
            searched_node = Node(nodename, self.cluster, None, None, None,
                                 None, None)
            node = self.app.arch.find_node(searched_node)
            if node is None:
                self.log.warn(Errors.E_J0006,
                              "unable to find node %s for job %s in loaded " \
                              "nodes", nodename, job.batch_id)
            else:
                run = Run(self.cluster, node, job)
                job.runs.append(run)
    def test_update_2(self):
        """ProjectImporterCSV.update() detect existing cluster and node
        """

        cluster1 = Cluster('cluster1')
        node1 = Node('node1', cluster1, 'model1', 'test_partition', 12, 6 * 1024 ** 3, 1)

        MockPg2.PG_REQS['find_cluster'].set_assoc(
          params=( cluster1.name, ),
          result=[ [ 1 ] ]
        )
        MockPg2.PG_REQS['find_node'].set_assoc(
          params=( node1.name, cluster1.cluster_id, ),
          result=[ [ 1 ] ]
        )
        self.importer.cluster = cluster1
        self.importer.nodes = [ node1 ]

        self.importer.update()
Exemplo n.º 3
0
    def init_load_data(self):
        """Utility method to initialize data to make load() simply."""

        self.e1_start = datetime(2015, 3, 2, 15, 59, 59)
        self.e1_end = datetime(2015, 3, 2, 16, 0, 0)
        self.node_name = 'node1'
        e1_start_ts = time.mktime(self.e1_start.timetuple())
        e1_end_ts = time.mktime(self.e1_end.timetuple())

        MockMySQLdb.MY_REQS['get_events']['res'] = \
          [
            [ e1_start_ts, e1_end_ts, self.node_name, '1=16', 35, 'reason1' ],
          ]
        MockMySQLdb.MY_REQS['event_table_cols']['res'] = []

        self.app.arch.nodes = [
            Node(self.node_name, self.cluster, 'model1', 'partition1', 16, 8,
                 0),
        ]
    def test_update(self):
        """ProjectImporterCSV.update() creates cluster and node if not existing
        """

        cluster1 = Cluster('cluster1')
        node1 = Node('node1', cluster1, 'model1', 'test_partition', 12, 6 * 1024 ** 3, 1)

        MockPg2.PG_REQS['save_cluster'].set_assoc(
          params=( cluster1.name ),
          result=[ [ 1 ] ]
        )
        MockPg2.PG_REQS['save_node'].set_assoc(
          params=( node1.name, cluster1.cluster_id, node1.partition,
                   node1.cpu, node1.memory, node1.flops ),
          result=[ [ 1 ] ]
        )
        self.importer.cluster = cluster1
        self.importer.nodes = [ node1 ]

        self.importer.update()
Exemplo n.º 5
0
    def load(self):
        """Load Cluster, Nodes and partitions from Architecture files. Raises
           HPCStatsRuntimeError or HPCStatsSourceError if error is encountered
           while loading data from sources. It sets attributes cluster, nodes
           and partitions with loaded data.
        """

        self.cluster = Cluster(self.cluster_name)
        self.nodes = []
        self.partitions = {}

        self.read_arch()
        config_get = self.config_get
        partitions = config_get(self.cluster.name, "partitions").split(',')

        for partition in partitions:

            part_sect = self.cluster.name + "/" + partition

            nodegroups = config_get(part_sect, "nodegroups").split(',')
            job_partitions = config_get(part_sect, "job_partitions") \
                               .split(',')

            nodeset_part = NodeSet() # nodeset for the partitions attribute

            for nodegroup in nodegroups:

                nodegroup_sect = self.cluster.name + "/" + partition \
                                 + "/" + nodegroup
                nodenames = config_get(nodegroup_sect, "names")
                nodeset_part.add(nodenames)

                sockets = config_get(nodegroup_sect, "sockets", isint=True)
                cores_per_socket = config_get(nodegroup_sect,
                                              "corespersocket",
                                              isint=True)
                cpu = sockets * cores_per_socket

                float_instructions = config_get(nodegroup_sect,
                                                "floatinstructions",
                                                isint=True)

                freq_str = config_get(nodegroup_sect, "frequency")
                freq = ArchitectureImporterArchfile.convert_freq(freq_str)
                if freq is None:
                    raise HPCStatsSourceError( \
                            "format of frequency for nodeset %s/%s/%s (%s) " \
                            "'%s' is not valid" \
                              % ( self.cluster.name,
                                  partition,
                                  nodegroup,
                                  nodenames,
                                  freq_str ))

                flops = sockets * cores_per_socket * float_instructions * freq

                mem_str = config_get(nodegroup_sect, "memory")
                mem = ArchitectureImporterArchfile.convert_mem(mem_str)
                if mem is None:
                    raise HPCStatsSourceError( \
                            "format of memory for nodeset %s/%s/%s (%s) " \
                            "'%s' is not valid" \
                              % ( self.cluster.name,
                                  partition,
                                  nodegroup,
                                  nodenames,
                                  mem_str ))

                model = config_get(nodegroup_sect, "model")
            
                nodeset_group = NodeSet(nodenames)
                for nodename in nodeset_group:
                    # create and append node
                    new_node = Node(name=nodename,
                                    cluster=self.cluster,
                                    model=model,
                                    partition=partition,
                                    cpu=cpu,
                                    memory=mem,
                                    flops=flops)
                    self.nodes.append(new_node)

            self.partitions[str(nodeset_part)] = job_partitions
Exemplo n.º 6
0
    def get_new_events(self, start):
        """Get all new Events from Slurm DB since start datetime. Parameter
           start must be a valid datetime. Returns a list of Events. The list
           is empty if none found.
        """

        self.log.info("searching new events since %s", str(start))
        timestamp = int(round(time.mktime(start.timetuple())))

        old_schema = self._is_old_schema()

        events = []

        if old_schema is True:
            cpu_field = 'cpu_count'
        else:
            cpu_field = 'tres'

        req = """
               SELECT time_start,
                      time_end,
                      node_name,
                      %s,
                      state,
                      reason
                 FROM %s_event_table
                WHERE node_name <> ''
                  AND time_start >= %%s
                ORDER BY time_start
              """ % (cpu_field, self.prefix)
        params = (timestamp, )

        self.cur.execute(req, params)

        while (1):
            row = self.cur.fetchone()
            if row == None:
                break

            datetime_start = datetime.fromtimestamp(row[0])

            timestamp_end = row[1]
            if timestamp_end == 0:
                datetime_end = None
            else:
                datetime_end = datetime.fromtimestamp(timestamp_end)

            node_name = row[2]
            searched_node = Node(node_name, self.cluster, None, None, None,
                                 None, None)
            node = self.app.arch.find_node(searched_node)
            if node is None:
                self.log.warn(
                    Errors.E_E0001, "event node %s is unknown in cluster %s "
                    "architecture, ignoring this event", node_name,
                    self.cluster.name)
                continue

            if old_schema is True:
                nb_cpu = row[3]
            else:
                nb_cpu = extract_tres_cpu(row[3])
                if nb_cpu == -1:
                    raise HPCStatsSourceError( \
                            "unable to extract cpu_count from event tres")

            event_type = EventImporterSlurm.txt_slurm_event_type(row[4])
            reason = row[5]

            event = Event(node=node,
                          cluster=self.cluster,
                          nb_cpu=nb_cpu,
                          start_datetime=datetime_start,
                          end_datetime=datetime_end,
                          event_type=event_type,
                          reason=reason)
            events.append(event)

        return self.merge_successive_events(events)
Exemplo n.º 7
0
    def test_merge_successive_events(self):
        """EventImporterSlurm.merge_successive_events() should merge successive
           events in the list if they are on the same node w/ same type.
        """

        e1_start = datetime(2015, 3, 2, 16, 0, 0)
        e1_end = datetime(2015, 3, 2, 16, 10, 0)
        e2_start = datetime(2015, 3, 2, 16, 10, 0)
        e2_end = datetime(2015, 3, 2, 16, 20, 0)
        e3_start = datetime(2015, 3, 2, 16, 20, 0)
        e3_end = datetime(2015, 3, 2, 16, 30, 0)

        node1 = [
            Node('node1', self.cluster, 'model1', 'partition1', 16, 8, 0),
        ]
        node2 = [
            Node('node2', self.cluster, 'model1', 'partition1', 16, 8, 0),
        ]

        # 3 successive events on one node with same type, they must be merged
        # into one event.
        events = [
            Event(self.cluster, node1, 4, e1_start, e1_end, 'type1',
                  'reason1'),
            Event(self.cluster, node1, 4, e2_start, e2_end, 'type1',
                  'reason1'),
            Event(self.cluster, node1, 4, e3_start, e3_end, 'type1',
                  'reason1'),
        ]
        merged = self.importer.merge_successive_events(events)
        self.assertEquals(1, len(merged))
        self.assertEquals(merged[0].start_datetime, e1_start)
        self.assertEquals(merged[0].end_datetime, e3_end)
        self.assertEquals(merged[0].event_type, 'type1')
        self.assertEquals(merged[0].reason, 'reason1')

        # 3 successive events on one node node1 with same type, with one event
        # on another node node2 in the middle: all events on node1 must be
        # merged while the other event on node2 must stay as is.
        events = [
            Event(self.cluster, node1, 4, e1_start, e1_end, 'type1',
                  'reason1'),
            Event(self.cluster, node2, 4, e2_start, e2_end, 'type1',
                  'reason1'),
            Event(self.cluster, node1, 4, e2_start, e2_end, 'type1',
                  'reason1'),
            Event(self.cluster, node1, 4, e3_start, e3_end, 'type1',
                  'reason1'),
        ]
        merged = self.importer.merge_successive_events(events)
        self.assertEquals(2, len(merged))
        self.assertEquals(merged[0].start_datetime, e1_start)
        self.assertEquals(merged[0].end_datetime, e3_end)
        self.assertEquals(merged[1].end_datetime, e2_end)
        self.assertEquals(merged[0].node, node1)
        self.assertEquals(merged[1].node, node2)

        # 3 successive events on node1 but with different types, they must not
        # be merged.
        events = [
            Event(self.cluster, node1, 4, e1_start, e1_end, 'type1',
                  'reason1'),
            Event(self.cluster, node1, 4, e2_start, e2_end, 'type2',
                  'reason1'),
            Event(self.cluster, node1, 4, e3_start, e3_end, 'type1',
                  'reason1'),
        ]
        merged = self.importer.merge_successive_events(events)
        self.assertEquals(3, len(merged))