示例#1
0
    async def _launch_bench(self) -> asyncio.subprocess.Process:
        data_stream_python_home = BenchDriver.get_bench_home(
            'data-stream-python')
        signal_invoker_home = data_stream_python_home + '/signal-invoker'
        #cmd = '{0}/bin/spark-submit {1}/receiver/sparkgpu_receiver_code.py'.format(self._bench_home, data_stream_python_home)
        cmd = '{0}/bin/spark-submit {1}/receiver/SPReceive.py {2}'.format(
            self._bench_home, data_stream_python_home,
            BenchDriver.get_bench_home('stream-ip-address'))

        env = os.environ.copy()
        env['M2_HOME'] = '/home/nvidia/.m2'
        env['MAVEN_HOME'] = '/home/nvidia/dcslab/packages/apache-maven-3.6.0'
        env['PATH'] = env['MAVEN_HOME'] + '/bin/:' + env['PATH']

        proc = Popen([
            'python3.7', signal_invoker_home + '/signal_invoker.py',
            BenchDriver.get_bench_home('stream-ip-address')
        ])

        self.sparkGPU_launched_time = time.time()
        pid = await self._cgroup.exec_command(
            cmd,
            env=env,
            stdout=asyncio.subprocess.DEVNULL,
            stderr=asyncio.subprocess.DEVNULL)
        sleep(3)
        return pid
class ParsecDriver(BenchDriver):
    _benches: Set[str] = {'streamcluster', 'canneal', 'swaptions', 'x264', 'ferret', 'bodytrack', 'blackscholes',
                          'dedup', 'facesim', 'fluidanimate', 'freqmine', 'raytrace', 'vips'}
    bench_name: str = 'parsec'
    _bench_home: str = BenchDriver.get_bench_home(bench_name)

    @staticmethod
    def has(bench_name: str) -> bool:
        return bench_name in ParsecDriver._benches

    def _find_bench_proc(self) -> Optional[psutil.Process]:
        if self._name == 'raytrace':
            exec_name = 'rtview'
        else:
            exec_name = self._name

        for process in self._async_proc_info.children(recursive=True):  # type: psutil.Process
            if process.name() == exec_name and process.is_running():
                return process

        return None

    async def _launch_bench(self) -> asyncio.subprocess.Process:
        cmd = '{0}/parsecmgmt -a run -p {1} -i native -n {2}' \
            .format(self._bench_home, self._name, self._num_threads)

        return await self._cgroup.exec_command(cmd, stdout=asyncio.subprocess.DEVNULL)
示例#3
0
class SpecDriver(BenchDriver):
    _benches: Set[str] = {
        'lbm', 'libquantum', 'GemsFDTD', 'sphinx', 'gcc', 'zeusmp', 'sjeng'
    }
    bench_name: str = 'spec'
    _bench_home: str = BenchDriver.get_bench_home(bench_name)

    @staticmethod
    def has(bench_name: str) -> bool:
        return bench_name in SpecDriver._benches

    def _find_bench_proc(self) -> Optional[psutil.Process]:
        if self._name == 'sphinx':
            exec_name = 'sphinx_livepretend_base.proc'
        else:
            exec_name = f'{self._name}_base.proc'

        for process in self._async_proc_info.children(
                recursive=True):  # type: psutil.Process
            if process.name() == exec_name and process.is_running():
                return process

        return None

    async def _launch_bench(self) -> asyncio.subprocess.Process:
        cmd = 'runspec --config=vm.cfg --size=ref --noreportable --delay=0 --nobuild --iteration=1 {0}' \
            .format(self._name)

        bench_bin = os.path.join(SpecDriver._bench_home, 'bin')
        bench_lib = os.path.join(bench_bin, 'lib')

        env = os.environ.copy()
        env['PATH'] = bench_bin + ':' + env['PATH']
        env['SPEC'] = SpecDriver._bench_home
        env['SPECPERLLIB'] = f'{bench_bin}:{bench_lib}'
        env['LC_LANG'] = 'C'
        env['LC_ALL'] = 'C'

        return await self._cgroup.exec_command(
            cmd, stdout=asyncio.subprocess.DEVNULL, env=env)

    def stop(self) -> None:
        try:
            proc = self._find_bench_proc()
            if proc is not None:
                proc.kill()
        except psutil.NoSuchProcess:
            pass

        super().stop()

    @BenchDriver._Decorators.ensure_running
    def pause(self) -> None:
        self._async_proc.send_signal(SIGSTOP)
        self._find_bench_proc().suspend()

    @BenchDriver._Decorators.ensure_running
    def resume(self) -> None:
        self._async_proc.send_signal(SIGCONT)
        self._find_bench_proc().resume()
示例#4
0
class SparkGPUDriver(BenchDriver):
    _benches: Set[str] = {
        'GpuDSArrayMult', 'GpuEnablerExample', 'GpuEnablerCodegen',
        'GpuKMeans', 'GpuKMeansBatch', 'GpuKMeansBatchSmall', 'SparkDSLR',
        'SparkDSLRmod', 'SparkGPULR', 'perfDebug', 'GpuKMeansCpu',
        'GpuKMeansGpu', 'GpuKMeansBatchCpu', 'GpuKMeansBatchGpu',
        'SparkDSLRCpu', 'SparkDSLRGpu', 'SparkGPULRCpu', 'SparkGPULRGpu'
    }
    bench_name: str = 'sparkgpu'
    _bench_home: str = BenchDriver.get_bench_home(bench_name)

    @staticmethod
    def has(bench_name: str) -> bool:
        return bench_name in SparkGPUDriver._benches

    def _find_bench_proc(self) -> Optional[psutil.Process]:
        """
        exec_name = self._name

        for process in self._async_proc_info.children(recursive=True): 
            # type: psutil.Process
            print(process.name())
            print(exec_name)
            # if process.name() == exec_name and process.is_running():
            # spark process run programs on 'java', so get java calls
            if (process.name() == 'java' or process.name() == 'javac') and process.is_running():
                return process
        """
        children = self._async_proc_info.children(True)

        if len(children) is 0:
            return None
        else:
            return children[0]

    async def _launch_bench(self) -> asyncio.subprocess.Process:
        cmd = '{0}/bin/run-example {1}'.format(self._bench_home, self._name)

        env = os.environ.copy()
        env['M2_HOME'] = '/home/nvidia/.m2'
        env['MAVEN_HOME'] = '/home/nvidia/dcslab/packages/apache-maven-3.6.0'
        env['PATH'] = env['MAVEN_HOME'] + '/bin/:' + env['PATH']

        return await self._cgroup.exec_command(
            cmd,
            env=env,
            stdout=asyncio.subprocess.DEVNULL,
            stderr=asyncio.subprocess.DEVNULL)
示例#5
0
class SparkGPUDataReceiverPythonDriver(BenchDriver):
    _benches: Set[str] = {'SPReceiver'}
    bench_name: str = 'spark-submit'
    _bench_home: str = BenchDriver.get_bench_home(bench_name)

    @staticmethod
    def has(bench_name: str) -> bool:
        return bench_name in SparkGPUDataReceiverPythonDriver._benches

    def _find_bench_proc(self) -> Optional[psutil.Process]:
        children = self._async_proc_info.children(True)

        if len(children) is 0:
            return None
        else:
            return children[0]

    async def _launch_bench(self) -> asyncio.subprocess.Process:
        data_stream_python_home = BenchDriver.get_bench_home(
            'data-stream-python')
        signal_invoker_home = data_stream_python_home + '/signal-invoker'
        #cmd = '{0}/bin/spark-submit {1}/receiver/sparkgpu_receiver_code.py'.format(self._bench_home, data_stream_python_home)
        cmd = '{0}/bin/spark-submit {1}/receiver/SPReceive.py {2}'.format(
            self._bench_home, data_stream_python_home,
            BenchDriver.get_bench_home('stream-ip-address'))

        env = os.environ.copy()
        env['M2_HOME'] = '/home/nvidia/.m2'
        env['MAVEN_HOME'] = '/home/nvidia/dcslab/packages/apache-maven-3.6.0'
        env['PATH'] = env['MAVEN_HOME'] + '/bin/:' + env['PATH']

        proc = Popen([
            'python3.7', signal_invoker_home + '/signal_invoker.py',
            BenchDriver.get_bench_home('stream-ip-address')
        ])

        self.sparkGPU_launched_time = time.time()
        pid = await self._cgroup.exec_command(
            cmd,
            env=env,
            stdout=asyncio.subprocess.DEVNULL,
            stderr=asyncio.subprocess.DEVNULL)
        sleep(3)
        return pid
示例#6
0
class NPBDriver(BenchDriver):
    _benches: Set[str] = {'CG', 'IS', 'DC', 'EP', 'MG', 'FT', 'SP', 'BT', 'LU', 'UA'}
    bench_name: str = 'npb'
    _bench_home: str = BenchDriver.get_bench_home(bench_name)

    # TODO: variable data size
    DATA_SET_MAP = {
        'CG': 'C',
        'IS': 'D',
        'DC': 'B',
        'EP': 'C',
        'MG': 'C',
        'FT': 'C',
        'SP': 'C',
        'BT': 'C',
        'LU': 'C',
        'UA': 'C',
    }

    @property
    def _exec_name(self) -> str:
        return f'{self._name.lower()}.{NPBDriver.DATA_SET_MAP[self._name]}.x'

    @staticmethod
    def has(bench_name: str) -> bool:
        return bench_name in NPBDriver._benches

    def _find_bench_proc(self) -> Optional[psutil.Process]:
        exec_name = self._exec_name

        if self._async_proc_info.name() == exec_name and self._async_proc_info.is_running():
            return self._async_proc_info

        return None

    async def _launch_bench(self) -> asyncio.subprocess.Process:
        exec_name = self._exec_name

        cmd = '{0}/bin/{1}'.format(self._bench_home, exec_name)
        env = {'OMP_NUM_THREADS': str(self._num_threads)}

        return await self._cgroup.exec_command(cmd, stdout=asyncio.subprocess.DEVNULL, env=env)
示例#7
0
class RodiniaDriver(BenchDriver):
    _benches: Set[str] = {'nn', 'kmeans', 'cfd', 'particlefilter', 'bfs'}
    bench_name: str = 'rodinia'
    _bench_home: str = BenchDriver.get_bench_home(bench_name)

    @staticmethod
    def has(bench_name: str) -> bool:
        return bench_name in RodiniaDriver._benches

    def _find_bench_proc(self) -> Optional[psutil.Process]:
        if self._name == 'cfd':
            exec_name = 'euler3d_cpu'
        elif self._name == 'particlefilter':
            exec_name = 'particle_filter'
        else:
            exec_name = self._name

        for process in self._async_proc_info.children(
                recursive=True):  # type: psutil.Process
            if process.name() == exec_name and process.is_running():
                return process

        return None

    async def _launch_bench(self) -> asyncio.subprocess.Process:
        cmd = '{0}/openmp/{1}/run' \
            .format(self._bench_home, self._name)

        env = {
            'OMP_NUM_THREADS': str(self._num_threads),
            'GOMP_CPU_AFFINITY': str(self._binding_cores)
        }

        return await self._cgroup.exec_command(
            cmd,
            env=env,
            stdout=asyncio.subprocess.DEVNULL,
            stderr=asyncio.subprocess.DEVNULL)
class Cifar10Driver(BenchDriver):
    _benches: Set[str] = {'cifar10_train', 'cifar10_eval'}
    bench_name: str = 'cifar10'
    _bench_home: str = BenchDriver.get_bench_home(bench_name)

    @staticmethod
    def has(bench_name: str) -> bool:
        return bench_name in Cifar10Driver._benches

    def _find_bench_proc(self) -> Optional[psutil.Process]:

        cmdline = self._async_proc_info.cmdline()
        exec_cmdline = cmdline[1]
        cmdline_list = exec_cmdline.split('/')
        exec_name = cmdline_list[len(cmdline_list) - 1].rstrip('.py')
        """
        print(f'[_find_bench_proc] self._name: {self._name}')
        print(f'[_find_bench_proc] self._async_proc_info.name(): {self._async_proc_info.name()}')
        print(f'[_find_bench_proc] self._async_proc_info.cmdline(): {self._async_proc_info.cmdline()}')
        print(f'[_find_bench_proc] exec_name: {exec_name}')
        """
        if self._name in exec_name and self._async_proc_info.is_running():
            return self._async_proc_info

    async def _launch_bench(self) -> asyncio.subprocess.Process:
        if 'train' in self._name:
            args = '--max_steps 1000'
            cmd = 'python {0}/{1}.py {2}' \
                .format(self._bench_home, self._name, args)
        elif 'eval' in self._name:
            args = '--run_multiple 10 --eval_interval_secs 1'
            cmd = 'python {0}/{1}.py {2}' \
                .format(self._bench_home, self._name, args)

        return await self._cgroup.exec_command(
            cmd, stdout=asyncio.subprocess.DEVNULL)
class InceptionDriver(BenchDriver):
    _benches: Set[str] = {'imagenet_train', 'imagenet_eval'}
    bench_name: str = 'inception'
    _bench_home: str = BenchDriver.get_bench_home(bench_name)

    @staticmethod
    def has(bench_name: str) -> bool:
        return bench_name in InceptionDriver._benches

    def _find_bench_proc(self) -> Optional[psutil.Process]:

        cmdline = self._async_proc_info.cmdline()
        exec_cmdline = cmdline[1]
        cmdline_list = exec_cmdline.split('/')
        exec_name = cmdline_list[len(cmdline_list) - 1].rstrip('.py')
        """
        print(f'[_find_bench_proc] self._name: {self._name}')
        print(f'[_find_bench_proc] self._async_proc_info.name(): {self._async_proc_info.name()}')
        print(f'[_find_bench_proc] self._async_proc_info.cmdline(): {self._async_proc_info.cmdline()}')
        print(f'[_find_bench_proc] exec_name: {exec_name}')
        """
        if self._name in exec_name and self._async_proc_info.is_running():
            return self._async_proc_info

    async def _launch_bench(self) -> asyncio.subprocess.Process:
        if 'train' in self._name:
            args = '--max_steps 5 --data_dir /ssd2/converted_data'
            cmd = '{0}/{1} {2}' \
                .format(self._bench_home, self._name, args)
        elif 'eval' in self._name:
            args = '--num_examples 800 --run_once --data_dir /ssd2/converted_data --checkpoint_dir /tmp/imagenet_train_for_eval'
            cmd = '{0}/{1} {2}' \
                .format(self._bench_home, self._name, args)

        return await self._cgroup.exec_command(
            cmd, stdout=asyncio.subprocess.DEVNULL)
示例#10
0
class PyTorchDriver(BenchDriver):
    _benches: Set[str] = {
        'alexnet-train-gpu', 'alexnet-eval-cpu', 'alexnet-eval-gpu',
        'alexnet-train-cpu', 'vgg11-train-gpu', 'vgg11-eval-cpu',
        'inceptionv3-train-gpu', 'inceptionv3-eval-cpu',
        'inceptionv3-eval-gpu', 'inceptionv3-train-cpu', 'resnet152-train-gpu',
        'resnet152-eval-cpu', 'squeezenet1_1-eval-cpu',
        'squeezenet1_1-eval-gpu', 'squeezenet1_1-train-cpu',
        'squeezenet1_1-train-gpu'
    }
    bench_name: str = 'pytorch'
    _bench_home: str = BenchDriver.get_bench_home(bench_name)
    model = None
    op_type = None
    pu_type = None

    @staticmethod
    def has(bench_name: str) -> bool:
        return bench_name in PyTorchDriver._benches

    def _find_bench_proc(self) -> Optional[psutil.Process]:

        cmdline = self._async_proc_info.cmdline()
        #print(f'cmdline : {cmdline}')
        try:
            if self._async_proc_info.is_running():
                exec_cmdline = cmdline[1]
                cmdline_list = exec_cmdline.split('/')
                exec_name = cmdline_list[len(cmdline_list) - 1].rstrip('.py')
                model = ''
                for word in cmdline:
                    if word == 'alexnet' or word == 'vgg11' or word == 'resnet152':
                        model = word
                    elif word == 'inception_v3' or word == 'inceptionv3':
                        model = 'inceptionv3'
                    elif word == 'squeezenet1_1':
                        model = word
                """
                print(f'[_find_bench_proc] self._name: {self._name}')
                print(f'[_find_bench_proc] self._async_proc_info.name(): {self._async_proc_info.name()}')
                print(f'[_find_bench_proc] self._async_proc_info.cmdline(): {self._async_proc_info.cmdline()}')
                print(f'[_find_bench_proc] exec_name: {exec_name}')
                """

                full_exec_name = model + '-' + exec_name
                print(f'self._name: {self._name}')
                print(f'full_exec_name: {full_exec_name}')
                if self._name == full_exec_name and self._async_proc_info.is_running(
                ):
                    return self._async_proc_info
        except (IndexError, UnboundLocalError) as ex:
            #print(f'{model}-{exec_name} is finished')
            print(f'Inception v3 finished!')
            print(f'self._async_proc_info: {self._async_proc_info}')
            print(
                f'self._async_proc_info.is_running(): {self._async_proc_info.is_running()}'
            )
            return self._async_proc_info

    async def _launch_bench(self) -> asyncio.subprocess.Process:
        bench_name = self._name
        splitted_name = bench_name.split('-')
        #print(f'splitted_name: {splitted_name}')
        model = splitted_name[0]  # model : alex or vgg
        op_type = splitted_name[1]  # op_type : eval or train
        pu_type = splitted_name[
            2]  # pu_type : processing unit (e.g., cpu or gpu)
        #print(f'splitted_name: {splitted_name}')
        print(f'model: {model}')
        print(f'op_type: {op_type}')
        print(f'pu_type: {pu_type}')
        print(f'batch_size: {self._batch_size}')

        if op_type == 'eval' and model == 'inceptionv3':
            model = 'inception_v3'
        elif op_type == 'train' and model == 'inceptionv3':
            model = 'inception_v3'

        if op_type == 'eval' and pu_type == 'cpu':
            #cmd = f'python {self._bench_home}/eval-cpu.py --data /ssd/raw_data -a {model} -b {self._batch_size} -e'
            cmd = f'python {self._bench_home}/eval-cpu.py /ssd/raw_data -a {model} -b {self._batch_size} -e --pretrained'
        elif op_type == 'eval' and pu_type == 'gpu':
            #cmd = f'python {self._bench_home}/eval-gpu.py --data /ssd/raw_data -a {model} -b {self._batch_size} -e'
            cmd = f'python {self._bench_home}/eval-gpu.py /ssd/raw_data -a {model} -b {self._batch_size} -e --pretrained'
        elif op_type == 'train' and pu_type == 'cpu':
            cmd = f'python {self._bench_home}/train-cpu.py -a {model} --lr 0.01 -b {self._batch_size} --epochs 1 /ssd/raw_data'
        elif op_type == 'train' and pu_type == 'gpu':
            cmd = f'python {self._bench_home}/train-gpu.py -a {model} --lr 0.01 -b {self._batch_size} --epochs 1 /ssd/raw_data'

        return await self._cgroup.exec_command(
            cmd, stdout=asyncio.subprocess.DEVNULL)
示例#11
0
class SSDDriver(BenchDriver):
    """
    SSD: Single Shot MultiBox Detection (Object Detection)
    """
    _benches: Set[str] = {
        'ssd-eval-vgg-cpu-small', 'ssd-eval-vgg-cpu-medium',
        'ssd-eval-vgg-cpu-large', 'ssd-eval-vgg-gpu-small',
        'ssd-eval-vgg-gpu-medium', 'ssd-eval-vgg-gpu-large',
        'ssd-eval-vgg-gpu-original', 'ssd-eval-vgg-cpu-original',
        'ssd-train-vgg-cpu', 'ssd-train-vgg-gpu', 'ssd-eval-vgg-gpu-dynamic'
    }
    bench_name: str = 'ssd'
    _bench_home: str = BenchDriver.get_bench_home(bench_name)

    @staticmethod
    def has(bench_name: str) -> bool:
        return bench_name in SSDDriver._benches

    def _find_bench_proc(self) -> Optional[psutil.Process]:

        cmdline = self._async_proc_info.cmdline()
        print(f'cmdline : {cmdline}')
        try:
            if self._async_proc_info.is_running():
                exec_cmdline = cmdline[1]
                cmdline_list = exec_cmdline.split('/')
                exec_name = cmdline_list[len(cmdline_list) - 1].rstrip('.py')

                # FIXME: hard-coded and need to add more data types
                model = 'vgg'
                pu_type = ''
                for word in cmdline_list:
                    if word == 'False':
                        pu_type = 'cpu'
                    elif word == 'True':
                        pu_type = 'gpu'
                    elif word == 'test_small':
                        data_type = 'small'
                    elif word == 'test_medium':
                        data_type = 'medium'
                    elif word == 'test_large':
                        data_type = 'large'
                    elif word == 'test_original':
                        data_type = 'backup'
                    #elif word == 'test_large_medium':
                    #    data_type = 'large'
                """        
                print(f'[_find_bench_proc] self._name: {self._name}')
                print(f'[_find_bench_proc] self._async_proc_info.name(): {self._async_proc_info.name()}')
                print(f'[_find_bench_proc] self._async_proc_info.cmdline(): {self._async_proc_info.cmdline()}')
                print(f'[_find_bench_proc] exec_name: {exec_name}')
                """

                full_exec_name = exec_name + '-' + model + '-' + pu_type + '-' + data_type
                print(f'self._name: {self._name}')
                print(f'full_exec_name: {full_exec_name}')
                if self._name == full_exec_name and self._async_proc_info.is_running(
                ):
                    return self._async_proc_info
        except (IndexError, UnboundLocalError) as ex:
            print(f'self._async_proc_info: {self._async_proc_info}')
            print(
                f'self._async_proc_info.is_running(): {self._async_proc_info.is_running()}'
            )
            return self._async_proc_info

    async def _launch_bench(self) -> asyncio.subprocess.Process:
        bench_name = self._name
        splitted_name = bench_name.split('-')
        #print(f'splitted_name: {splitted_name}')
        op_type = splitted_name[1]  # op_type : eval or train
        model = splitted_name[2]  # model : alex or vgg (default: vgg)
        pu_type = splitted_name[
            3]  # pu_type : processing unit (e.g., cpu or gpu)

        # FIXME: hard-coded and need to add more data types
        if op_type == "eval":
            data_type = splitted_name[
                4]  # data_type : input data type (e.g., test_small or test_many)
            if "small" in data_type:
                data_type = "test_small"
            elif "medium" in data_type:
                data_type = "test_medium"
            elif "large" in data_type:
                data_type = "test_large"
            elif "original" in data_type:
                data_type = "test_backup"
            elif "dynamic" in data_type:
                data_type = "dynamic_load"
        else:
            data_type = None
        #print(f'splitted_name: {splitted_name}')
        print(f'model: {model}')
        print(f'op_type: {op_type}')
        print(f'pu_type: {pu_type}')
        print(f'data_type: {data_type}')
        print(f'batch_size: {self._batch_size}')

        if op_type == 'eval' and pu_type == 'cpu':
            cmd = f'python {self._bench_home}/ssd-eval.py --cuda False --set_type {data_type}_15'
        elif op_type == 'eval' and pu_type == 'gpu':
            cmd = f'python {self._bench_home}/ssd-eval.py --cuda True --set_type {data_type}_200'
        elif op_type == 'train' and pu_type == 'cpu':
            cmd = f'python {self._bench_home}/ssd-train.py --cuda False --batch_size 2'
        elif op_type == 'train' and pu_type == 'gpu':
            cmd = f'python {self._bench_home}/ssd-train.py --cuda True --batch_size 2'

        return await self._cgroup.exec_command(cmd,
                                               stdout=asyncio.subprocess.PIPE)