async def _launch_bench(self) -> asyncio.subprocess.Process: data_stream_python_home = BenchDriver.get_bench_home( 'data-stream-python') signal_invoker_home = data_stream_python_home + '/signal-invoker' #cmd = '{0}/bin/spark-submit {1}/receiver/sparkgpu_receiver_code.py'.format(self._bench_home, data_stream_python_home) cmd = '{0}/bin/spark-submit {1}/receiver/SPReceive.py {2}'.format( self._bench_home, data_stream_python_home, BenchDriver.get_bench_home('stream-ip-address')) env = os.environ.copy() env['M2_HOME'] = '/home/nvidia/.m2' env['MAVEN_HOME'] = '/home/nvidia/dcslab/packages/apache-maven-3.6.0' env['PATH'] = env['MAVEN_HOME'] + '/bin/:' + env['PATH'] proc = Popen([ 'python3.7', signal_invoker_home + '/signal_invoker.py', BenchDriver.get_bench_home('stream-ip-address') ]) self.sparkGPU_launched_time = time.time() pid = await self._cgroup.exec_command( cmd, env=env, stdout=asyncio.subprocess.DEVNULL, stderr=asyncio.subprocess.DEVNULL) sleep(3) return pid
class ParsecDriver(BenchDriver): _benches: Set[str] = {'streamcluster', 'canneal', 'swaptions', 'x264', 'ferret', 'bodytrack', 'blackscholes', 'dedup', 'facesim', 'fluidanimate', 'freqmine', 'raytrace', 'vips'} bench_name: str = 'parsec' _bench_home: str = BenchDriver.get_bench_home(bench_name) @staticmethod def has(bench_name: str) -> bool: return bench_name in ParsecDriver._benches def _find_bench_proc(self) -> Optional[psutil.Process]: if self._name == 'raytrace': exec_name = 'rtview' else: exec_name = self._name for process in self._async_proc_info.children(recursive=True): # type: psutil.Process if process.name() == exec_name and process.is_running(): return process return None async def _launch_bench(self) -> asyncio.subprocess.Process: cmd = '{0}/parsecmgmt -a run -p {1} -i native -n {2}' \ .format(self._bench_home, self._name, self._num_threads) return await self._cgroup.exec_command(cmd, stdout=asyncio.subprocess.DEVNULL)
class SpecDriver(BenchDriver): _benches: Set[str] = { 'lbm', 'libquantum', 'GemsFDTD', 'sphinx', 'gcc', 'zeusmp', 'sjeng' } bench_name: str = 'spec' _bench_home: str = BenchDriver.get_bench_home(bench_name) @staticmethod def has(bench_name: str) -> bool: return bench_name in SpecDriver._benches def _find_bench_proc(self) -> Optional[psutil.Process]: if self._name == 'sphinx': exec_name = 'sphinx_livepretend_base.proc' else: exec_name = f'{self._name}_base.proc' for process in self._async_proc_info.children( recursive=True): # type: psutil.Process if process.name() == exec_name and process.is_running(): return process return None async def _launch_bench(self) -> asyncio.subprocess.Process: cmd = 'runspec --config=vm.cfg --size=ref --noreportable --delay=0 --nobuild --iteration=1 {0}' \ .format(self._name) bench_bin = os.path.join(SpecDriver._bench_home, 'bin') bench_lib = os.path.join(bench_bin, 'lib') env = os.environ.copy() env['PATH'] = bench_bin + ':' + env['PATH'] env['SPEC'] = SpecDriver._bench_home env['SPECPERLLIB'] = f'{bench_bin}:{bench_lib}' env['LC_LANG'] = 'C' env['LC_ALL'] = 'C' return await self._cgroup.exec_command( cmd, stdout=asyncio.subprocess.DEVNULL, env=env) def stop(self) -> None: try: proc = self._find_bench_proc() if proc is not None: proc.kill() except psutil.NoSuchProcess: pass super().stop() @BenchDriver._Decorators.ensure_running def pause(self) -> None: self._async_proc.send_signal(SIGSTOP) self._find_bench_proc().suspend() @BenchDriver._Decorators.ensure_running def resume(self) -> None: self._async_proc.send_signal(SIGCONT) self._find_bench_proc().resume()
class SparkGPUDriver(BenchDriver): _benches: Set[str] = { 'GpuDSArrayMult', 'GpuEnablerExample', 'GpuEnablerCodegen', 'GpuKMeans', 'GpuKMeansBatch', 'GpuKMeansBatchSmall', 'SparkDSLR', 'SparkDSLRmod', 'SparkGPULR', 'perfDebug', 'GpuKMeansCpu', 'GpuKMeansGpu', 'GpuKMeansBatchCpu', 'GpuKMeansBatchGpu', 'SparkDSLRCpu', 'SparkDSLRGpu', 'SparkGPULRCpu', 'SparkGPULRGpu' } bench_name: str = 'sparkgpu' _bench_home: str = BenchDriver.get_bench_home(bench_name) @staticmethod def has(bench_name: str) -> bool: return bench_name in SparkGPUDriver._benches def _find_bench_proc(self) -> Optional[psutil.Process]: """ exec_name = self._name for process in self._async_proc_info.children(recursive=True): # type: psutil.Process print(process.name()) print(exec_name) # if process.name() == exec_name and process.is_running(): # spark process run programs on 'java', so get java calls if (process.name() == 'java' or process.name() == 'javac') and process.is_running(): return process """ children = self._async_proc_info.children(True) if len(children) is 0: return None else: return children[0] async def _launch_bench(self) -> asyncio.subprocess.Process: cmd = '{0}/bin/run-example {1}'.format(self._bench_home, self._name) env = os.environ.copy() env['M2_HOME'] = '/home/nvidia/.m2' env['MAVEN_HOME'] = '/home/nvidia/dcslab/packages/apache-maven-3.6.0' env['PATH'] = env['MAVEN_HOME'] + '/bin/:' + env['PATH'] return await self._cgroup.exec_command( cmd, env=env, stdout=asyncio.subprocess.DEVNULL, stderr=asyncio.subprocess.DEVNULL)
class SparkGPUDataReceiverPythonDriver(BenchDriver): _benches: Set[str] = {'SPReceiver'} bench_name: str = 'spark-submit' _bench_home: str = BenchDriver.get_bench_home(bench_name) @staticmethod def has(bench_name: str) -> bool: return bench_name in SparkGPUDataReceiverPythonDriver._benches def _find_bench_proc(self) -> Optional[psutil.Process]: children = self._async_proc_info.children(True) if len(children) is 0: return None else: return children[0] async def _launch_bench(self) -> asyncio.subprocess.Process: data_stream_python_home = BenchDriver.get_bench_home( 'data-stream-python') signal_invoker_home = data_stream_python_home + '/signal-invoker' #cmd = '{0}/bin/spark-submit {1}/receiver/sparkgpu_receiver_code.py'.format(self._bench_home, data_stream_python_home) cmd = '{0}/bin/spark-submit {1}/receiver/SPReceive.py {2}'.format( self._bench_home, data_stream_python_home, BenchDriver.get_bench_home('stream-ip-address')) env = os.environ.copy() env['M2_HOME'] = '/home/nvidia/.m2' env['MAVEN_HOME'] = '/home/nvidia/dcslab/packages/apache-maven-3.6.0' env['PATH'] = env['MAVEN_HOME'] + '/bin/:' + env['PATH'] proc = Popen([ 'python3.7', signal_invoker_home + '/signal_invoker.py', BenchDriver.get_bench_home('stream-ip-address') ]) self.sparkGPU_launched_time = time.time() pid = await self._cgroup.exec_command( cmd, env=env, stdout=asyncio.subprocess.DEVNULL, stderr=asyncio.subprocess.DEVNULL) sleep(3) return pid
class NPBDriver(BenchDriver): _benches: Set[str] = {'CG', 'IS', 'DC', 'EP', 'MG', 'FT', 'SP', 'BT', 'LU', 'UA'} bench_name: str = 'npb' _bench_home: str = BenchDriver.get_bench_home(bench_name) # TODO: variable data size DATA_SET_MAP = { 'CG': 'C', 'IS': 'D', 'DC': 'B', 'EP': 'C', 'MG': 'C', 'FT': 'C', 'SP': 'C', 'BT': 'C', 'LU': 'C', 'UA': 'C', } @property def _exec_name(self) -> str: return f'{self._name.lower()}.{NPBDriver.DATA_SET_MAP[self._name]}.x' @staticmethod def has(bench_name: str) -> bool: return bench_name in NPBDriver._benches def _find_bench_proc(self) -> Optional[psutil.Process]: exec_name = self._exec_name if self._async_proc_info.name() == exec_name and self._async_proc_info.is_running(): return self._async_proc_info return None async def _launch_bench(self) -> asyncio.subprocess.Process: exec_name = self._exec_name cmd = '{0}/bin/{1}'.format(self._bench_home, exec_name) env = {'OMP_NUM_THREADS': str(self._num_threads)} return await self._cgroup.exec_command(cmd, stdout=asyncio.subprocess.DEVNULL, env=env)
class RodiniaDriver(BenchDriver): _benches: Set[str] = {'nn', 'kmeans', 'cfd', 'particlefilter', 'bfs'} bench_name: str = 'rodinia' _bench_home: str = BenchDriver.get_bench_home(bench_name) @staticmethod def has(bench_name: str) -> bool: return bench_name in RodiniaDriver._benches def _find_bench_proc(self) -> Optional[psutil.Process]: if self._name == 'cfd': exec_name = 'euler3d_cpu' elif self._name == 'particlefilter': exec_name = 'particle_filter' else: exec_name = self._name for process in self._async_proc_info.children( recursive=True): # type: psutil.Process if process.name() == exec_name and process.is_running(): return process return None async def _launch_bench(self) -> asyncio.subprocess.Process: cmd = '{0}/openmp/{1}/run' \ .format(self._bench_home, self._name) env = { 'OMP_NUM_THREADS': str(self._num_threads), 'GOMP_CPU_AFFINITY': str(self._binding_cores) } return await self._cgroup.exec_command( cmd, env=env, stdout=asyncio.subprocess.DEVNULL, stderr=asyncio.subprocess.DEVNULL)
class Cifar10Driver(BenchDriver): _benches: Set[str] = {'cifar10_train', 'cifar10_eval'} bench_name: str = 'cifar10' _bench_home: str = BenchDriver.get_bench_home(bench_name) @staticmethod def has(bench_name: str) -> bool: return bench_name in Cifar10Driver._benches def _find_bench_proc(self) -> Optional[psutil.Process]: cmdline = self._async_proc_info.cmdline() exec_cmdline = cmdline[1] cmdline_list = exec_cmdline.split('/') exec_name = cmdline_list[len(cmdline_list) - 1].rstrip('.py') """ print(f'[_find_bench_proc] self._name: {self._name}') print(f'[_find_bench_proc] self._async_proc_info.name(): {self._async_proc_info.name()}') print(f'[_find_bench_proc] self._async_proc_info.cmdline(): {self._async_proc_info.cmdline()}') print(f'[_find_bench_proc] exec_name: {exec_name}') """ if self._name in exec_name and self._async_proc_info.is_running(): return self._async_proc_info async def _launch_bench(self) -> asyncio.subprocess.Process: if 'train' in self._name: args = '--max_steps 1000' cmd = 'python {0}/{1}.py {2}' \ .format(self._bench_home, self._name, args) elif 'eval' in self._name: args = '--run_multiple 10 --eval_interval_secs 1' cmd = 'python {0}/{1}.py {2}' \ .format(self._bench_home, self._name, args) return await self._cgroup.exec_command( cmd, stdout=asyncio.subprocess.DEVNULL)
class InceptionDriver(BenchDriver): _benches: Set[str] = {'imagenet_train', 'imagenet_eval'} bench_name: str = 'inception' _bench_home: str = BenchDriver.get_bench_home(bench_name) @staticmethod def has(bench_name: str) -> bool: return bench_name in InceptionDriver._benches def _find_bench_proc(self) -> Optional[psutil.Process]: cmdline = self._async_proc_info.cmdline() exec_cmdline = cmdline[1] cmdline_list = exec_cmdline.split('/') exec_name = cmdline_list[len(cmdline_list) - 1].rstrip('.py') """ print(f'[_find_bench_proc] self._name: {self._name}') print(f'[_find_bench_proc] self._async_proc_info.name(): {self._async_proc_info.name()}') print(f'[_find_bench_proc] self._async_proc_info.cmdline(): {self._async_proc_info.cmdline()}') print(f'[_find_bench_proc] exec_name: {exec_name}') """ if self._name in exec_name and self._async_proc_info.is_running(): return self._async_proc_info async def _launch_bench(self) -> asyncio.subprocess.Process: if 'train' in self._name: args = '--max_steps 5 --data_dir /ssd2/converted_data' cmd = '{0}/{1} {2}' \ .format(self._bench_home, self._name, args) elif 'eval' in self._name: args = '--num_examples 800 --run_once --data_dir /ssd2/converted_data --checkpoint_dir /tmp/imagenet_train_for_eval' cmd = '{0}/{1} {2}' \ .format(self._bench_home, self._name, args) return await self._cgroup.exec_command( cmd, stdout=asyncio.subprocess.DEVNULL)
class PyTorchDriver(BenchDriver): _benches: Set[str] = { 'alexnet-train-gpu', 'alexnet-eval-cpu', 'alexnet-eval-gpu', 'alexnet-train-cpu', 'vgg11-train-gpu', 'vgg11-eval-cpu', 'inceptionv3-train-gpu', 'inceptionv3-eval-cpu', 'inceptionv3-eval-gpu', 'inceptionv3-train-cpu', 'resnet152-train-gpu', 'resnet152-eval-cpu', 'squeezenet1_1-eval-cpu', 'squeezenet1_1-eval-gpu', 'squeezenet1_1-train-cpu', 'squeezenet1_1-train-gpu' } bench_name: str = 'pytorch' _bench_home: str = BenchDriver.get_bench_home(bench_name) model = None op_type = None pu_type = None @staticmethod def has(bench_name: str) -> bool: return bench_name in PyTorchDriver._benches def _find_bench_proc(self) -> Optional[psutil.Process]: cmdline = self._async_proc_info.cmdline() #print(f'cmdline : {cmdline}') try: if self._async_proc_info.is_running(): exec_cmdline = cmdline[1] cmdline_list = exec_cmdline.split('/') exec_name = cmdline_list[len(cmdline_list) - 1].rstrip('.py') model = '' for word in cmdline: if word == 'alexnet' or word == 'vgg11' or word == 'resnet152': model = word elif word == 'inception_v3' or word == 'inceptionv3': model = 'inceptionv3' elif word == 'squeezenet1_1': model = word """ print(f'[_find_bench_proc] self._name: {self._name}') print(f'[_find_bench_proc] self._async_proc_info.name(): {self._async_proc_info.name()}') print(f'[_find_bench_proc] self._async_proc_info.cmdline(): {self._async_proc_info.cmdline()}') print(f'[_find_bench_proc] exec_name: {exec_name}') """ full_exec_name = model + '-' + exec_name print(f'self._name: {self._name}') print(f'full_exec_name: {full_exec_name}') if self._name == full_exec_name and self._async_proc_info.is_running( ): return self._async_proc_info except (IndexError, UnboundLocalError) as ex: #print(f'{model}-{exec_name} is finished') print(f'Inception v3 finished!') print(f'self._async_proc_info: {self._async_proc_info}') print( f'self._async_proc_info.is_running(): {self._async_proc_info.is_running()}' ) return self._async_proc_info async def _launch_bench(self) -> asyncio.subprocess.Process: bench_name = self._name splitted_name = bench_name.split('-') #print(f'splitted_name: {splitted_name}') model = splitted_name[0] # model : alex or vgg op_type = splitted_name[1] # op_type : eval or train pu_type = splitted_name[ 2] # pu_type : processing unit (e.g., cpu or gpu) #print(f'splitted_name: {splitted_name}') print(f'model: {model}') print(f'op_type: {op_type}') print(f'pu_type: {pu_type}') print(f'batch_size: {self._batch_size}') if op_type == 'eval' and model == 'inceptionv3': model = 'inception_v3' elif op_type == 'train' and model == 'inceptionv3': model = 'inception_v3' if op_type == 'eval' and pu_type == 'cpu': #cmd = f'python {self._bench_home}/eval-cpu.py --data /ssd/raw_data -a {model} -b {self._batch_size} -e' cmd = f'python {self._bench_home}/eval-cpu.py /ssd/raw_data -a {model} -b {self._batch_size} -e --pretrained' elif op_type == 'eval' and pu_type == 'gpu': #cmd = f'python {self._bench_home}/eval-gpu.py --data /ssd/raw_data -a {model} -b {self._batch_size} -e' cmd = f'python {self._bench_home}/eval-gpu.py /ssd/raw_data -a {model} -b {self._batch_size} -e --pretrained' elif op_type == 'train' and pu_type == 'cpu': cmd = f'python {self._bench_home}/train-cpu.py -a {model} --lr 0.01 -b {self._batch_size} --epochs 1 /ssd/raw_data' elif op_type == 'train' and pu_type == 'gpu': cmd = f'python {self._bench_home}/train-gpu.py -a {model} --lr 0.01 -b {self._batch_size} --epochs 1 /ssd/raw_data' return await self._cgroup.exec_command( cmd, stdout=asyncio.subprocess.DEVNULL)
class SSDDriver(BenchDriver): """ SSD: Single Shot MultiBox Detection (Object Detection) """ _benches: Set[str] = { 'ssd-eval-vgg-cpu-small', 'ssd-eval-vgg-cpu-medium', 'ssd-eval-vgg-cpu-large', 'ssd-eval-vgg-gpu-small', 'ssd-eval-vgg-gpu-medium', 'ssd-eval-vgg-gpu-large', 'ssd-eval-vgg-gpu-original', 'ssd-eval-vgg-cpu-original', 'ssd-train-vgg-cpu', 'ssd-train-vgg-gpu', 'ssd-eval-vgg-gpu-dynamic' } bench_name: str = 'ssd' _bench_home: str = BenchDriver.get_bench_home(bench_name) @staticmethod def has(bench_name: str) -> bool: return bench_name in SSDDriver._benches def _find_bench_proc(self) -> Optional[psutil.Process]: cmdline = self._async_proc_info.cmdline() print(f'cmdline : {cmdline}') try: if self._async_proc_info.is_running(): exec_cmdline = cmdline[1] cmdline_list = exec_cmdline.split('/') exec_name = cmdline_list[len(cmdline_list) - 1].rstrip('.py') # FIXME: hard-coded and need to add more data types model = 'vgg' pu_type = '' for word in cmdline_list: if word == 'False': pu_type = 'cpu' elif word == 'True': pu_type = 'gpu' elif word == 'test_small': data_type = 'small' elif word == 'test_medium': data_type = 'medium' elif word == 'test_large': data_type = 'large' elif word == 'test_original': data_type = 'backup' #elif word == 'test_large_medium': # data_type = 'large' """ print(f'[_find_bench_proc] self._name: {self._name}') print(f'[_find_bench_proc] self._async_proc_info.name(): {self._async_proc_info.name()}') print(f'[_find_bench_proc] self._async_proc_info.cmdline(): {self._async_proc_info.cmdline()}') print(f'[_find_bench_proc] exec_name: {exec_name}') """ full_exec_name = exec_name + '-' + model + '-' + pu_type + '-' + data_type print(f'self._name: {self._name}') print(f'full_exec_name: {full_exec_name}') if self._name == full_exec_name and self._async_proc_info.is_running( ): return self._async_proc_info except (IndexError, UnboundLocalError) as ex: print(f'self._async_proc_info: {self._async_proc_info}') print( f'self._async_proc_info.is_running(): {self._async_proc_info.is_running()}' ) return self._async_proc_info async def _launch_bench(self) -> asyncio.subprocess.Process: bench_name = self._name splitted_name = bench_name.split('-') #print(f'splitted_name: {splitted_name}') op_type = splitted_name[1] # op_type : eval or train model = splitted_name[2] # model : alex or vgg (default: vgg) pu_type = splitted_name[ 3] # pu_type : processing unit (e.g., cpu or gpu) # FIXME: hard-coded and need to add more data types if op_type == "eval": data_type = splitted_name[ 4] # data_type : input data type (e.g., test_small or test_many) if "small" in data_type: data_type = "test_small" elif "medium" in data_type: data_type = "test_medium" elif "large" in data_type: data_type = "test_large" elif "original" in data_type: data_type = "test_backup" elif "dynamic" in data_type: data_type = "dynamic_load" else: data_type = None #print(f'splitted_name: {splitted_name}') print(f'model: {model}') print(f'op_type: {op_type}') print(f'pu_type: {pu_type}') print(f'data_type: {data_type}') print(f'batch_size: {self._batch_size}') if op_type == 'eval' and pu_type == 'cpu': cmd = f'python {self._bench_home}/ssd-eval.py --cuda False --set_type {data_type}_15' elif op_type == 'eval' and pu_type == 'gpu': cmd = f'python {self._bench_home}/ssd-eval.py --cuda True --set_type {data_type}_200' elif op_type == 'train' and pu_type == 'cpu': cmd = f'python {self._bench_home}/ssd-train.py --cuda False --batch_size 2' elif op_type == 'train' and pu_type == 'gpu': cmd = f'python {self._bench_home}/ssd-train.py --cuda True --batch_size 2' return await self._cgroup.exec_command(cmd, stdout=asyncio.subprocess.PIPE)