def network_statistics(self, **kwargs): """ :param path: :param stateful: :return: """ path = kwargs.get("path", self.default_path) for base_path, workernode, run, _ in relevant_directories(path=path): current_path = os.path.join(os.path.join(base_path, workernode), run) converter = CSVReader() parser = NetworkStatisticsParser(workernode=workernode, run=run, data_source=self, path=current_path, data_reader=converter) converter.parser = parser for statistics in self._read_stream( path=current_path, workernode=workernode, run=run, stateful=kwargs.get("stateful", False), pattern="^[0-9]{10}-(process|traffic).log-[0-9]{8}", converter=converter): yield statistics
def test_correct_order(self): # TODO: datasource should default to FileDatasource parser = JobParser( ) # TODO: I guess I still need to set the correct data source reader = CSVReader() reader.parser = parser for index, data in enumerate(parser.parse(path=self._file_path())): self.assertIsNotNone(data) self.assertEqual(index, 0) # TODO: enumerate job to check correct order of processes tree = data.tree self.assertIsNotNone(tree) self.assertEqual(tree.getVertexCount(), 9109) for node, depth in tree.walkDFS(): # check children last_child = None if len(node.children) > 0: for child in node.children: if last_child: self.assertTrue( last_child.value.tme <= child.value.tme, "TME (%d) is not smaller than previous TME (%d)" % (child.value.tme, last_child.value.tme)) else: self.assertTrue( node.value.tme <= child.value.tme, "TME of parent (%d) is not smaller than this of child (%d)" % (node.value.tme, child.value.tme)) last_child = child
def test_prepare_traffic(self): job = Job() self.assertRaises(NoDataSourceException, job.prepare_traffic) job = Job(data_source=FileDataSource()) self.assertRaises(FilePathException, job.prepare_traffic) data_source = FileDataSource() parser = JobParser(data_source=data_source) reader = CSVReader() reader.parser = parser for job in parser.parse( path=os.path.join(os.path.dirname(gnmutils_tests.__file__), "data/c00-001-001/1/1-process.csv")): job.prepare_traffic() count = 0 for process in job.processes(): count += len(process.traffic) self.assertEqual(count, 3155) self.assertEqual(job.db_id, "1") self.assertEqual(job.job_id, 4165419) self.assertEqual(job.gpid, 30726) self.assertEqual(job.uid, 14808) self.assertEqual(job.tme, 1405011331) self.assertEqual(job.exit_tme, 1405065581) self.assertEqual(job.exit_code, 0) self.assertEqual(len(job.faulty_nodes), 1) job.regenerate_tree()
def jobs(self, **kwargs): """ :param path: :param source: :param pattern: :param stateful: :return: """ path = kwargs.get("path", self.default_path) if "processed" in kwargs.get("source", "processed"): converter = CSVReader() for base_path, workernode, run, filename in relevant_directories( path=path): current_path = os.path.join( os.path.join(base_path, workernode), run) if filename: for job in self.read_job(path=current_path, name=filename, converter=converter): yield job else: for dir_entry in sorted(os.listdir(current_path)): matches = re.match( kwargs.get("pattern", "(\d*)-process.csv"), dir_entry) if matches: for job in self.read_job(path=current_path, name=matches.group(1), converter=converter): yield job else: # convert raw data for base_path, workernode, run, _ in relevant_directories( path=path): current_path = os.path.join( os.path.join(base_path, workernode), run) converter = CSVReader() parser = ProcessStreamParser(workernode=workernode, run=run, data_source=self, path=current_path, data_reader=converter) converter.parser = parser for job in self._read_stream( path=current_path, data_path=os.path.join( os.path.join( kwargs.get("data_path", self.default_path), workernode), run), workernode=workernode, run=run, stateful=kwargs.get("stateful", False), pattern="^[0-9]{10}-process.log-[0-9]{8}", converter=converter): yield job
def _read_stream(self, path=None, data_path=None, workernode=None, run=None, converter=CSVReader(), stateful=False, pattern=None): """ :param path: :param data_path: :param workernode: :param run: :param converter: :param stateful: :param pattern: :return: """ for dir_entry in sorted(os.listdir(path)): if re.match(pattern, dir_entry): for data_object in converter.parser.parse( path=os.path.join(path, dir_entry)): yield data_object converter.parser.check_caches(path=data_path) for data in converter.parser.pop_data(): yield data if stateful: converter.parser.archive_state(path=path)
def test_parsing(self): data_source = FileDataSource() data_reader = CSVReader() parser = NetworkStatisticsParser(data_source=data_source) data_reader.parser = parser for _ in parser.parse(path=self.traffic_file_path( )): # nothing is returned by networkstatisticsparser pass for _ in parser.parse(path=self.process_file_path()): pass count = 0 for data in parser.pop_data(): for networkstats in data.values(): count += networkstats.event_count self.assertEqual(count, 19998) parser.check_caches() parser.clear_caches() self.assertEqual(parser.data, {})
def read_traffic(self, path, name, converter=CSVReader()): """ :param path: :param name: :param converter: :return: """ parser = TrafficParser(data_reader=converter) converter.parser = parser try: file_path = os.path.join(path, "%s-traffic.csv" % name) except AttributeError: raise FilePathException(value="path=%s, name=%s" % (path, name)) return parser.parse(path=file_path)
def traffics(self, **kwargs): """ :param path: :param data_path: :param source: :param stateful: :return: """ path = kwargs.get("path", self.default_path) if "processed" in kwargs.get("source", "processed"): pass else: # convert raw data for base_path, workernode, run, _ in relevant_directories( path=path): current_path = os.path.join( os.path.join(base_path, workernode), run) converter = CSVReader() parser = TrafficStreamParser(workernode=workernode, run=run, data_source=self, path=current_path, data_reader=converter) converter.parser = parser for traffic in self._read_stream( path=current_path, data_path=os.path.join( os.path.join( kwargs.get("data_path", self.default_path), workernode), run), workernode=workernode, run=run, stateful=kwargs.get("stateful", False), pattern="^[0-9]{10}-traffic.log-[0-9]{8}", converter=converter): yield traffic
def read_job(self, **kwargs): """ :param path: :param name: :param converter: :return: """ path = kwargs.get("path", None) name = kwargs.get("name", None) converter = kwargs.get("converter", CSVReader()) if name is None: return None parser = JobParser(data_source=self, data_reader=converter, path=path, name=name) converter.parser = parser if ".csv" in str(name): return parser.parse(path=os.path.join(path, name)) return parser.parse(path=os.path.join(path, "%s-process.csv" % name))