Пример #1
0
 def network_statistics(self, **kwargs):
     """
     :param path:
     :param stateful:
     :return:
     """
     path = kwargs.get("path", self.default_path)
     for base_path, workernode, run, _ in relevant_directories(path=path):
         current_path = os.path.join(os.path.join(base_path, workernode),
                                     run)
         converter = CSVReader()
         parser = NetworkStatisticsParser(workernode=workernode,
                                          run=run,
                                          data_source=self,
                                          path=current_path,
                                          data_reader=converter)
         converter.parser = parser
         for statistics in self._read_stream(
                 path=current_path,
                 workernode=workernode,
                 run=run,
                 stateful=kwargs.get("stateful", False),
                 pattern="^[0-9]{10}-(process|traffic).log-[0-9]{8}",
                 converter=converter):
             yield statistics
Пример #2
0
    def test_correct_order(self):
        # TODO: datasource should default to FileDatasource
        parser = JobParser(
        )  # TODO: I guess I still need to set the correct data source
        reader = CSVReader()
        reader.parser = parser
        for index, data in enumerate(parser.parse(path=self._file_path())):
            self.assertIsNotNone(data)
        self.assertEqual(index, 0)

        # TODO: enumerate job to check correct order of processes
        tree = data.tree
        self.assertIsNotNone(tree)
        self.assertEqual(tree.getVertexCount(), 9109)

        for node, depth in tree.walkDFS():
            # check children
            last_child = None
            if len(node.children) > 0:
                for child in node.children:
                    if last_child:
                        self.assertTrue(
                            last_child.value.tme <= child.value.tme,
                            "TME (%d) is not smaller than previous TME (%d)" %
                            (child.value.tme, last_child.value.tme))
                    else:
                        self.assertTrue(
                            node.value.tme <= child.value.tme,
                            "TME of parent (%d) is not smaller than this of child (%d)"
                            % (node.value.tme, child.value.tme))
                        last_child = child
Пример #3
0
    def test_prepare_traffic(self):
        job = Job()
        self.assertRaises(NoDataSourceException, job.prepare_traffic)

        job = Job(data_source=FileDataSource())
        self.assertRaises(FilePathException, job.prepare_traffic)

        data_source = FileDataSource()
        parser = JobParser(data_source=data_source)
        reader = CSVReader()
        reader.parser = parser
        for job in parser.parse(
                path=os.path.join(os.path.dirname(gnmutils_tests.__file__),
                                  "data/c00-001-001/1/1-process.csv")):
            job.prepare_traffic()
        count = 0
        for process in job.processes():
            count += len(process.traffic)
        self.assertEqual(count, 3155)
        self.assertEqual(job.db_id, "1")
        self.assertEqual(job.job_id, 4165419)
        self.assertEqual(job.gpid, 30726)
        self.assertEqual(job.uid, 14808)
        self.assertEqual(job.tme, 1405011331)
        self.assertEqual(job.exit_tme, 1405065581)
        self.assertEqual(job.exit_code, 0)
        self.assertEqual(len(job.faulty_nodes), 1)
        job.regenerate_tree()
Пример #4
0
 def jobs(self, **kwargs):
     """
     :param path:
     :param source:
     :param pattern:
     :param stateful:
     :return:
     """
     path = kwargs.get("path", self.default_path)
     if "processed" in kwargs.get("source", "processed"):
         converter = CSVReader()
         for base_path, workernode, run, filename in relevant_directories(
                 path=path):
             current_path = os.path.join(
                 os.path.join(base_path, workernode), run)
             if filename:
                 for job in self.read_job(path=current_path,
                                          name=filename,
                                          converter=converter):
                     yield job
             else:
                 for dir_entry in sorted(os.listdir(current_path)):
                     matches = re.match(
                         kwargs.get("pattern", "(\d*)-process.csv"),
                         dir_entry)
                     if matches:
                         for job in self.read_job(path=current_path,
                                                  name=matches.group(1),
                                                  converter=converter):
                             yield job
     else:
         # convert raw data
         for base_path, workernode, run, _ in relevant_directories(
                 path=path):
             current_path = os.path.join(
                 os.path.join(base_path, workernode), run)
             converter = CSVReader()
             parser = ProcessStreamParser(workernode=workernode,
                                          run=run,
                                          data_source=self,
                                          path=current_path,
                                          data_reader=converter)
             converter.parser = parser
             for job in self._read_stream(
                     path=current_path,
                     data_path=os.path.join(
                         os.path.join(
                             kwargs.get("data_path", self.default_path),
                             workernode), run),
                     workernode=workernode,
                     run=run,
                     stateful=kwargs.get("stateful", False),
                     pattern="^[0-9]{10}-process.log-[0-9]{8}",
                     converter=converter):
                 yield job
Пример #5
0
 def _read_stream(self,
                  path=None,
                  data_path=None,
                  workernode=None,
                  run=None,
                  converter=CSVReader(),
                  stateful=False,
                  pattern=None):
     """
     :param path:
     :param data_path:
     :param workernode:
     :param run:
     :param converter:
     :param stateful:
     :param pattern:
     :return:
     """
     for dir_entry in sorted(os.listdir(path)):
         if re.match(pattern, dir_entry):
             for data_object in converter.parser.parse(
                     path=os.path.join(path, dir_entry)):
                 yield data_object
     converter.parser.check_caches(path=data_path)
     for data in converter.parser.pop_data():
         yield data
     if stateful:
         converter.parser.archive_state(path=path)
 def test_parsing(self):
     data_source = FileDataSource()
     data_reader = CSVReader()
     parser = NetworkStatisticsParser(data_source=data_source)
     data_reader.parser = parser
     for _ in parser.parse(path=self.traffic_file_path(
     )):  # nothing is returned by networkstatisticsparser
         pass
     for _ in parser.parse(path=self.process_file_path()):
         pass
     count = 0
     for data in parser.pop_data():
         for networkstats in data.values():
             count += networkstats.event_count
     self.assertEqual(count, 19998)
     parser.check_caches()
     parser.clear_caches()
     self.assertEqual(parser.data, {})
Пример #7
0
 def read_traffic(self, path, name, converter=CSVReader()):
     """
     :param path:
     :param name:
     :param converter:
     :return:
     """
     parser = TrafficParser(data_reader=converter)
     converter.parser = parser
     try:
         file_path = os.path.join(path, "%s-traffic.csv" % name)
     except AttributeError:
         raise FilePathException(value="path=%s, name=%s" % (path, name))
     return parser.parse(path=file_path)
Пример #8
0
 def traffics(self, **kwargs):
     """
     :param path:
     :param data_path:
     :param source:
     :param stateful:
     :return:
     """
     path = kwargs.get("path", self.default_path)
     if "processed" in kwargs.get("source", "processed"):
         pass
     else:
         # convert raw data
         for base_path, workernode, run, _ in relevant_directories(
                 path=path):
             current_path = os.path.join(
                 os.path.join(base_path, workernode), run)
             converter = CSVReader()
             parser = TrafficStreamParser(workernode=workernode,
                                          run=run,
                                          data_source=self,
                                          path=current_path,
                                          data_reader=converter)
             converter.parser = parser
             for traffic in self._read_stream(
                     path=current_path,
                     data_path=os.path.join(
                         os.path.join(
                             kwargs.get("data_path", self.default_path),
                             workernode), run),
                     workernode=workernode,
                     run=run,
                     stateful=kwargs.get("stateful", False),
                     pattern="^[0-9]{10}-traffic.log-[0-9]{8}",
                     converter=converter):
                 yield traffic
Пример #9
0
 def read_job(self, **kwargs):
     """
     :param path:
     :param name:
     :param converter:
     :return:
     """
     path = kwargs.get("path", None)
     name = kwargs.get("name", None)
     converter = kwargs.get("converter", CSVReader())
     if name is None:
         return None
     parser = JobParser(data_source=self,
                        data_reader=converter,
                        path=path,
                        name=name)
     converter.parser = parser
     if ".csv" in str(name):
         return parser.parse(path=os.path.join(path, name))
     return parser.parse(path=os.path.join(path, "%s-process.csv" % name))