Пример #1
0
    def setup(self):
        cluster = LocalCluster(n_workers=1, threads_per_worker=1,
                               resources={"resource": 1}, worker_class=Worker)
        spec = copy.deepcopy(cluster.new_worker_spec())
        del spec[1]['options']['resources']
        cluster.worker_spec.update(spec)
        cluster.scale(2)
        client = Client(cluster)

        self.client = client
Пример #2
0
    def setup(self, resource, steal_interval):
        config.set(
            {"distributed.scheduler.work-stealing-interval": steal_interval})
        rdict = {"resource": resource} if resource else None
        cluster = LocalCluster(n_workers=1,
                               threads_per_worker=1,
                               resources=rdict,
                               worker_class=Worker)

        spec = copy.deepcopy(cluster.new_worker_spec())

        if resource:
            del spec[1]['options']['resources']
        cluster.worker_spec.update(spec)
        cluster.scale(2)
        client = Client(cluster)

        self.client = client
def run_dask(options: dict,
        docker_username: str = None,
        docker_password: str = None,
        docker: bool = False,
        slurm_job_array: bool = False):
    try:
        if 'jobqueue' not in options:
            cluster = LocalCluster()
        else:
            jobqueue = options['jobqueue']
            gpus = options['gpus'] if 'gpus' in options else 0
            if 'slurm' in jobqueue:
                print("Requesting SLURM cluster:")
                pprint(jobqueue['slurm'])
                cluster = SLURMCluster(job_extra=[f"--gres=gpu:{gpus}"], **jobqueue['slurm']) if gpus else SLURMCluster(**jobqueue['slurm'])
            elif 'pbs' in jobqueue:
                print("Requesting PBS cluster:")
                pprint(jobqueue['pbs'])
                cluster = PBSCluster(job_extra=[f"--gres=gpu:{gpus}"], **jobqueue['pbs']) if gpus else PBSCluster(**jobqueue['pbs'])
            elif 'moab' in jobqueue:
                print("Requesting MOAB cluster:")
                pprint(jobqueue['moab'])
                cluster = MoabCluster(job_extra=[f"--gres=gpu:{gpus}"], **jobqueue['moab']) if gpus else MoabCluster(**jobqueue['moab'])
            elif 'sge' in jobqueue:
                print("Requesting SGE cluster:")
                pprint(jobqueue['sge'])
                cluster = SGECluster(job_extra=[f"--gres=gpu:{gpus}"], **jobqueue['sge']) if gpus else SGECluster(**jobqueue['sge'])
            elif 'lsf' in jobqueue:
                print("Requesting LSF cluster:")
                pprint(jobqueue['lsf'])
                cluster = LSFCluster(job_extra=[f"--gres=gpu:{gpus}"], **jobqueue['lsf']) if gpus else LSFCluster(**jobqueue['lsf'])
            elif 'oar' in jobqueue:
                print("Requesting OAR cluster:")
                pprint(jobqueue['oar'])
                cluster = OARCluster(job_extra=[f"--gres=gpu:{gpus}"], **jobqueue['oar']) if gpus else OARCluster(**jobqueue['oar'])
            else:
                raise ValueError(f"Unsupported jobqueue configuration: {jobqueue}")

            print(f"Cluster job script: {cluster.job_script()}")

        if 'output' in options and 'from' in options['output']: output_path = options['output']['from']
        else: output_path = '.'

        if 'input' not in options:
            env = options['env'] if 'env' in options else []
            params = options['parameters'] if 'parameters' in options else []
            bind_mounts = options['bind_mounts'] if 'bind_mounts' in options else []
            no_cache = options['no_cache'] if 'no_cache' in options else False
            gpus = options['gpus'] if 'gpus' in options else 0

            if 'jobqueue' in options: cluster.scale(1)
            with Client(cluster) as client:
                command = prep_command(
                    work_dir=options['workdir'],
                    image=options['image'],
                    command=options['command'],
                    env=env + [{'key': 'INDEX', 'value': 1}],
                    parameters=params + [{'key': 'OUTPUT', 'value': output_path}],
                    bind_mounts=bind_mounts,
                    no_cache=no_cache,
                    gpus=gpus,
                    docker_username=docker_username,
                    docker_password=docker_password,
                    docker=docker)

                logger.info(f"Submitting container")
                future = submit_command(client, command, options['log_file'] if 'log_file' in options else None, 3)
                future.result()
                if future.status != 'finished':
                    logger.error(f"Container failed: {future.exception}")
                else:
                    logger.info(f"Container completed")
        elif options['input']['kind'] == InputKind.DIRECTORY:
            input_path = options['input']['path']
            env = options['env'] if 'env' in options else []
            params = options['parameters'] if 'parameters' in options else []
            bind_mounts = options['bind_mounts'] if 'bind_mounts' in options else []
            no_cache = options['no_cache'] if 'no_cache' in options else False
            gpus = options['gpus'] if 'gpus' in options else 0

            if 'jobqueue' in options: cluster.scale(1)
            with Client(cluster) as client:
                command = prep_command(
                    work_dir=options['workdir'],
                    image=options['image'],
                    command=options['command'],
                    env=env + [{'key': 'INDEX', 'value': 1}],
                    parameters=params + [{'key': 'INPUT', 'value': input_path}, {'key': 'OUTPUT', 'value': output_path}],
                    bind_mounts=bind_mounts,
                    no_cache=no_cache,
                    gpus=gpus,
                    docker_username=docker_username,
                    docker_password=docker_password,
                    docker=docker)

                logger.info(f"Submitting container for directory '{input_path}'")
                future = submit_command(client, command, options['log_file'] if 'log_file' in options else None, 3)
                future.result()
                if future.status != 'finished':
                    logger.error(f"Container failed for directory '{input_path}': {future.exception}")
                else:
                    logger.info(f"Container completed for directory '{input_path}'")
        elif options['input']['kind'] == InputKind.FILES:
            input_path = options['input']['path']
            if slurm_job_array:
                files = os.listdir(input_path)
                file_id = int(os.environ.get('SLURM_ARRAY_TASK_ID'))
                current_file = files[file_id]

                env = options['env'] if 'env' in options else []
                params = options['parameters'] if 'parameters' in options else []
                patterns = options['input']['patterns'] if 'patterns' in options['input'] else []
                bind_mounts = options['bind_mounts'] if 'bind_mounts' in options else []
                no_cache = options['no_cache'] if 'no_cache' in options else False
                gpus = options['gpus'] if 'gpus' in options else 0

                if 'jobqueue' in options: cluster.scale(1)
                with Client(cluster) as client:
                    command = prep_command(
                        work_dir=options['workdir'],
                        image=options['image'],
                        command=options['command'],
                        env=env + [{'key': 'INDEX', 'value': file_id}] + [{'key': 'PATTERNS', 'value': ','.join(patterns)}],
                        parameters=params + [{'key': 'INPUT', 'value': join(input_path, current_file)}, {'key': 'OUTPUT', 'value': output_path}],
                        bind_mounts=bind_mounts,
                        no_cache=no_cache,
                        gpus=gpus,
                        docker_username=docker_username,
                        docker_password=docker_password,
                        docker=docker)

                    logger.info(f"Submitting container for file '{input_path}'")
                    future = submit_command(client, command, options['log_file'] if 'log_file' in options else None, 3)
                    future.result()
                    if future.status != 'finished':
                        logger.error(f"Container failed for file '{input_path}': {future.exception}")
                    else:
                        logger.info(f"Container completed for file '{input_path}'")

                logger.info(f"Run succeeded")
            else:
                files = os.listdir(input_path)
                count = len(files)
                futures = []

                if 'jobqueue' not in options:
                    logger.info(f"Processing {count} files in '{input_path}'")
                else:
                    logger.info(f"Requesting {count} nodes to process {count} files in '{input_path}' with job script:\n{cluster.job_script()}")
                    cluster.scale(count)

                env = options['env'] if 'env' in options else []
                params = deepcopy(options['parameters']) if 'parameters' in options else []
                patterns = options['input']['patterns'] if 'patterns' in options['input'] else []
                bind_mounts = options['bind_mounts'] if 'bind_mounts' in options else []
                no_cache = options['no_cache'] if 'no_cache' in options else False
                gpus = options['gpus'] if 'gpus' in options else 0

                with Client(cluster) as client:
                    num_files = len(files)
                    for i, current_file in tqdm.tqdm(enumerate(files), total=num_files):
                        command = prep_command(
                            work_dir=options['workdir'],
                            image=options['image'],
                            command=options['command'],
                            env=env + [{'key': 'INDEX', 'value': i}] + [{'key': 'PATTERNS', 'value': ','.join(patterns)}],
                            parameters=params + [{'key': 'INPUT', 'value': join(input_path, current_file)}, {'key': 'OUTPUT', 'value': output_path}],
                            bind_mounts=bind_mounts,
                            no_cache=no_cache,
                            gpus=gpus,
                            docker_username=docker_username,
                            docker_password=docker_password,
                            docker=docker)

                        logger.info(f"Submitting container for file {i}")
                        futures.append(submit_command(client, command, options['log_file'] if 'log_file' in options else None, 3))

                    finished = 0
                    for future in tqdm.tqdm(as_completed(futures), total=num_files):
                        finished += 1
                        if future.status != 'finished':
                            logger.error(f"Container failed for file {finished}: {future.exception}")
                        else:
                            logger.info(f"Container completed for file {finished}")
        elif options['input']['kind'] == InputKind.FILE:
            input_path = options['input']['path']
            env = options['env'] if 'env' in options else []
            params = options['parameters'] if 'parameters' in options else []
            patterns = options['input']['patterns'] if 'patterns' in options['input'] else []
            bind_mounts = options['bind_mounts'] if 'bind_mounts' in options else []
            no_cache = options['no_cache'] if 'no_cache' in options else False
            gpus = options['gpus'] if 'gpus' in options else 0

            if 'jobqueue' in options: cluster.scale(1)
            with Client(cluster) as client:
                command = prep_command(
                    work_dir=options['workdir'],
                    image=options['image'],
                    command=options['command'],
                    env=env + [{'key': 'INDEX', 'value': 1}] + [{'key': 'PATTERNS', 'value': ','.join(patterns)}],
                    parameters=params + [{'key': 'INPUT', 'value': input_path}, {'key': 'OUTPUT', 'value': output_path}],
                    bind_mounts=bind_mounts,
                    no_cache=no_cache,
                    gpus=gpus,
                    docker_username=docker_username,
                    docker_password=docker_password,
                    docker=docker)

                logger.info(f"Submitting container for file 1")
                future = submit_command(client, command, options['log_file'] if 'log_file' in options else None, 3)
                future.result()
                if future.status != 'finished':
                    logger.error(f"Container failed for file 1")
                    logger.error(future.exception)
                else:
                    logger.info(f"Container completed for file 1")

        logger.info(f"Run succeeded")
    except:
        logger.error(f"Run failed: {traceback.format_exc()}")
        raise