示例#1
0
    def to_disk(
        self,
        job: Job,
        worker: Optional[str] = None,
        verbose: bool = False,
    ) -> None:
        """
        saves parameter configuration for a single job to project_path.
        This allows Ludwig workers to find jobs
        """
        if not job.is_ready():
            raise SystemExit(
                'Cannot save job. Job is not ready. Update job.param2val')

        # save parameter configuration to shared drive
        unique_id = f'{job.param2val["param_name"]}_{job.param2val["job_name"]}'
        p = self.project_path / f'{worker}_{unique_id}.pkl'
        with p.open('wb') as f:
            pickle.dump(job.param2val, f)

        # console
        print_ludwig(f'Parameter configuration for {worker} saved to disk')
        if verbose:
            print(job)
            print()
示例#2
0
文件: job.py 项目: h0m3brew/Ludwig
    def update_param_name(self,
                          runs_path: Path,
                          num_new: int,  # number of new param_names assigned
                          ) -> None:
        """
        check if param2val exists in runs.
        only if it doesn't exist, create a new one (otherwise problems with queued runs might occur)
        """
        param_nums = [int(p.name.split('_')[-1])
                      for p in runs_path.glob('param*')
                      if config.Constants.not_ludwig not in p.name] or [0]

        for param_p in runs_path.glob('param_*'):
            with (param_p / 'param2val.yaml').open('r') as f:
                loaded_param2val = yaml.load(f, Loader=yaml.FullLoader)
            if self.is_same(self.param2val, loaded_param2val):
                print_ludwig('Configuration matches existing configuration')
                self.is_new = False
                param_name = param_p.name
                break
        else:
            new_param_num = max(param_nums) + 1 + num_new
            param_nums.append(new_param_num)
            param_name = 'param_{:0>3}'.format(new_param_num)
            self.is_new = True

        self.param2val['param_name'] = param_name
        print_ludwig(f'Assigned job param_name={param_name}')
示例#3
0
def add_ssh_config():
    """
    append contents of /media/research_data/.ludwig/config to ~/.ssh/ludwig_config
    """
    src = config.WorkerDirs.research_data / '.ludwig' / 'config'
    dst = Path().home(
    ) / '.ssh' / 'ludwig_config'  # do not overwrite existing config
    print_ludwig('Copying {} to {}'.format(src, dst))
    shutil.copy(src, dst)
示例#4
0
文件: job.py 项目: h0m3brew/Ludwig
 def calc_num_needed(self,
                     runs_path: Path,
                     reps: int,
                     disable: bool = False,  # for unit-testing or debugging
                     ):
     param_name = self.param2val['param_name']
     num_times_logged = len(list((runs_path / param_name).glob('*num*')))
     num_times_logged = num_times_logged if not disable else 0
     res = reps - num_times_logged
     res = max(0, res)
     print_ludwig('{:<10} logged {:>3} times. Will execute job {:>3} times'.format(
         param_name, num_times_logged, res))
     return res
示例#5
0
    def start_jobs(
        self,
        worker: str,
    ) -> None:
        """
        source code is uploaded.
        run.py is uploaded to worker, which triggers killing of existing jobs,
         and executes run.py.
        if no param2val for worker is saved to server, then run.py will exit.
        """

        # -------------------------------------- checks

        assert self.project_name.lower(
        ) == self.src_name  # TODO what about when src name must be different?
        # this must be true because in run.py project_name is converted to src_name

        self.check_disk_space()

        # -------------------------------------- prepare paths

        if not self.project_path.exists():
            self.project_path.mkdir()
        if not self.runs_path.exists():
            self.runs_path.mkdir(parents=True)

        remote_path = f'{config.WorkerDirs.watched.name}/{self.src_name}'

        # ------------------------------------- sftp

        # connect via sftp
        research_data_path = self.project_path.parent
        private_key_path = research_data_path / '.ludwig' / 'id_rsa'
        sftp = pysftp.Connection(username='******',
                                 host=self.worker2ip[worker],
                                 private_key=str(private_key_path))

        # upload code files
        print_ludwig(
            f'Will upload {self.src_name} to {remote_path} on {worker}')
        sftp.makedirs(remote_path)
        sftp.put_r(localpath=self.src_name, remotepath=remote_path)

        # upload run.py
        run_file_name = f'run_{self.project_name}.py'
        sftp.put(
            localpath=run.__file__,
            remotepath=f'{config.WorkerDirs.watched.name}/{run_file_name}')

        print_ludwig(f'Upload to {worker} complete')
示例#6
0
 def check_disk_space(self, verbose=False):
     if platform.system() in {'Linux'}:
         p = self.project_path.parent
         usage_stats = psutil.disk_usage(str(p))
         percent_used = usage_stats[3]
         if verbose:
             print_ludwig('Percent Disk Space used at {}: {}'.format(
                 p, percent_used))
         if percent_used > config.Remote.disk_max_percent:
             raise RuntimeError('Disk space usage > {}.'.format(
                 config.Remote.disk_max_percent))
     else:
         print_ludwig(
             'WARNING: Cannot determine disk space on non-Linux platform.')
示例#7
0
    def calc_num_needed(
        self,
        runs_path: Path,
        reps: int,
    ):
        param_name = self.param2val['param_name']
        num_times_logged = len(list((runs_path / param_name).glob('*num*')))

        res = reps - num_times_logged
        res = max(0, res)
        print_ludwig(
            '{:<10} logged {:>3} times. Will execute job {:>3} times'.format(
                param_name, num_times_logged, res))
        return res
示例#8
0
    def kill_jobs(
        self,
        worker: str,
    ) -> None:
        """
        first kil all job descriptions for worker (pickle files saved on server).
        then, run.py is uploaded to worker, which triggers killing of existing jobs,
         and executes run.py.
        because no job descriptions for worker exist on server, run.py will exit.
        """

        # -------------------------------------- checks

        assert self.project_name.lower(
        ) == self.src_name  # TODO what about when src name must be different?
        # this must be true because in run.py project_name is converted to src_name

        self.check_disk_space()

        # -------------------------------------- prepare paths

        if not self.project_path.exists():
            self.project_path.mkdir()
        if not self.runs_path.exists():
            self.runs_path.mkdir(parents=True)

        # ------------------------------------- sftp

        # connect via sftp
        research_data_path = self.project_path.parent
        private_key_path = research_data_path / '.ludwig' / 'id_rsa'
        sftp = pysftp.Connection(username='******',
                                 host=self.worker2ip[worker],
                                 private_key=str(private_key_path))

        # upload run.py - this triggers watcher which kills active jobs associated with project
        run_file_name = f'run_{self.project_name}.py'
        sftp.put(
            localpath=run.__file__,
            remotepath=f'{config.WorkerDirs.watched.name}/{run_file_name}')

        print_ludwig(
            f'Killed any active jobs with src_name={self.src_name} on {worker}'
        )
示例#9
0
文件: results.py 项目: phueb/Ludwig
def gen_param_paths(
    project_name: str,
    param2requests: Dict[str, list],
    param2default: Dict[str, Any],
    runs_path: Optional[Path] = None,
    ludwig_data_path: Optional[Path] = None,
    label_params: Optional[List[str]] = None,
    isolated: bool = False,
    label_n: bool = True,
    verbose: bool = True,
    verbose_plus: bool = False,
    require_all_found: bool = True,
):
    """
    Return path objects that point to folders with job results.
     Folders located in those paths are each generated with the same parameter configuration.
     Use this for retrieving data after a job has been completed
    """

    # --------------------------------------------------------  paths

    if ludwig_data_path is None:
        ludwig_data_path = Path(
            default_mnt_point) / configs.WorkerDirs.ludwig_data.name

    if isolated:
        project_path = Path.cwd()
    else:
        project_path = ludwig_data_path / project_name

        # check that ludwig_data is mounted
        if not os.path.ismount(ludwig_data_path):
            raise OSError(f'{ludwig_data_path} is not mounted')

    if not runs_path:
        runs_path = project_path / 'runs'

    # get + check path to runs
    if runs_path is None:
        runs_path = ludwig_data_path / project_name / 'runs'
    if not runs_path.exists():
        raise FileNotFoundError(f'{runs_path} does not exist.')

    # ------------------------------------------------------- prepare params

    label_params = sorted(
        set([
            param for param, val in param2requests.items()
            if val != param2default[param]
        ] + (label_params or [])))

    requested_param2vals = list(
        gen_all_param2vals(param2requests, param2default))

    print_ludwig('Looking for the following parameter configurations:')
    num_requested = 0
    for requested_param2val in requested_param2vals:
        print(sorted(requested_param2val.items()))
        num_requested += 1

    # look for param_paths
    num_found = 0
    for param_path in sorted(runs_path.glob('param_*')):
        if verbose:
            print_ludwig(f'Checking {param_path}...')

        # load param2val
        with (param_path / 'param2val.yaml').open('r') as f:
            param2val = yaml.load(f, Loader=yaml.FullLoader)
        loaded_param2val = param2val.copy()

        for param_name in configs.Constants.added_param_names:
            try:
                del loaded_param2val[param_name]
            except KeyError:  # Ludwig < v2.0
                pass

        # is match?
        if loaded_param2val in requested_param2vals:
            num_found += 1
            label_ = '\n'.join(
                [f'{param}={param2val[param]}' for param in label_params])
            if label_n:
                n = len(list(param_path.glob('*num*')))
                label_ += f'\nn={n}'
            if verbose:
                print_ludwig('Param2val matches')
                print_ludwig(label_)
            yield param_path, label_
        else:
            if verbose:
                print_ludwig('Params do not match')
                if verbose_plus:
                    for k in param2requests:
                        for v in param2requests[k]:
                            if loaded_param2val[k] != v:
                                print_ludwig(
                                    f'For key "{k}", {v} does not match {loaded_param2val[k]}'
                                )

    if num_requested != num_found and require_all_found:
        raise SystemExit(
            f'Ludwig: Found {num_found} but requested {num_requested}')
示例#10
0
def submit():
    """
    run jobs locally or on Ludwig workers.

    This script should be called in root directory of the Python project.
    If not specified via CL arguments, it will try to import src.params.
    src.params is where this script will try to find the parameters with which to execute your jobs.
    """

    cwd = Path.cwd()
    project_name = cwd.name

    # parse cmd-line args
    parser = argparse.ArgumentParser()
    parser.add_argument('-src',
                        '--src',
                        default=cwd.name.lower(),
                        action='store',
                        dest='src',
                        required=False,
                        help='Specify path to your source code.')
    parser.add_argument(
        '-r',
        '--reps',
        default=1,
        action='store',
        dest='reps',
        type=int,
        choices=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 40, 50],
        required=False,
        help=
        'Number of times each job will be executed. To kill all jobs, use 0.'
    )  # TODO test r 0 - it should kill all running jobs
    parser.add_argument(
        '-m',
        '--minimal',
        action='store_true',
        default=False,
        dest='minimal',
        required=False,
        help='Run minimal parameter configuration for debugging.')
    parser.add_argument('-l',
                        '--local',
                        action='store_true',
                        default=False,
                        dest='local',
                        required=False,
                        help='Run on host')
    parser.add_argument(
        '-i',
        '--isolated',
        action='store_true',
        default=False,
        dest='isolated',
        required=False,
        help=
        'Do not connect to server. Only works when all data is available on client.'
    )
    parser.add_argument(
        '-w',
        '--worker',
        default=None,
        action='store',
        dest='worker',
        choices=configs.Remote.online_worker_names,
        required=False,
        help='Specify a single worker name if submitting to single worker only'
    )
    parser.add_argument('-g',
                        '--group',
                        default=None,
                        action='store',
                        dest='group',
                        choices=configs.Remote.group2workers.keys(),
                        required=False,
                        help='Specify a worker group')
    parser.add_argument(
        '-x',
        '--clear_runs',
        action='store_true',
        default=False,
        dest='clear_runs',
        required=False,
        help=
        'Delete all saved runs associated with current project on shared drive'
    )
    parser.add_argument('-f',
                        '--first_only',
                        action='store_true',
                        default=False,
                        dest='first_only',
                        required=False,
                        help='Run first job and exit.')
    parser.add_argument('-v',
                        '--version',
                        action='version',
                        version='%(prog)s ' + __version__)
    parser.add_argument(
        '-mnt',
        '--ludwig_data',
        default=None,
        action='store',
        dest='ludwig_data_path',
        required=False,
        help=
        'Specify where the shared drive is mounted on your system (if not /media/ludwig_data).'
    )
    parser.add_argument('-e',
                        '--extra_paths',
                        nargs='*',
                        default=[],
                        action='store',
                        dest='extra_paths',
                        required=False,
                        help='Paths to additional Python packages or data. ')
    parser.add_argument(
        '-n',
        '--no-upload',
        action='store_true',
        dest='no_upload',
        required=False,
        help='Whether to upload jobs to Ludwig. Set false for testing')
    parser.add_argument(
        '-s',
        '--skip-hostkey',
        action='store_true',
        dest='skip_hostkey',
        required=False,
        default=False,
        help=
        'Whether to skip hostkey checking. Unsafe, but may prevent SSH connection error.'
    )
    namespace = parser.parse_args()

    # ---------------------------------------------- checks

    assert not (namespace.local and namespace.isolated)
    assert not (namespace.extra_paths and namespace.isolated)

    # ---------------------------------------------- paths

    if namespace.ludwig_data_path:
        ludwig_data_path = Path(namespace.ludwig_data_path)
    else:
        ludwig_data_path = Path(
            default_mnt_point) / configs.WorkerDirs.ludwig_data.name

    # project path points to wherever user decides a job should be executed:
    # locally: project_path is local project_path
    # remotely: project_path is on shared_drive
    if namespace.isolated:
        project_path = cwd
    else:
        project_path = ludwig_data_path / project_name

    runs_path = project_path / 'runs'

    src_path = cwd / namespace.src

    # ------------------------------------------------ user code

    # import user params + job
    print_ludwig(f'Importing source code from {src_path}')
    sys.path.append(str(cwd))
    user_params = importlib.import_module(src_path.name + '.params')
    if namespace.local or namespace.isolated:  # no need to import job when not executed locally
        user_job = importlib.import_module(src_path.name + '.job')

    # ------------------------------------------------ checks

    if not namespace.isolated:
        if not os.path.ismount(str(ludwig_data_path)):
            raise OSError(f'{ludwig_data_path} is not mounted')

    if not src_path.exists():
        raise NotADirectoryError(f'Cannot find source code in {src_path}.')

    # check for mis-spelled param names
    for k in user_params.param2requests:
        if k not in user_params.param2requests:
            raise KeyError(
                f'Param "{k}" in param2requests is not in param2default. Check spelling'
            )

    # check that requests are lists and that each list does not contain repeated values
    for k, v in user_params.param2requests.items():
        if not isinstance(v, list):
            raise TypeError(
                'Value of param2requests["{}"] must be a list.'.format(k))
        for vi in v:
            if isinstance(
                    vi, list
            ):  # tuples can be members of a set (they are hashable) but not lists
                raise TypeError(
                    'Inner collections in param2requests must be of type tuple, not list.'
                )
            if isinstance(vi, dict):
                raise TypeError(
                    'Inner collections in param2requests must not be of type dict'
                )
        if len(v) != len(
                set(v)
        ):  # otherwise each identical value will be assigned a unique param_name
            raise ValueError('Each requested parameter value must be unique')
        if k not in user_params.param2default:
            raise KeyError(
                '{} is not a key in param2default. Check spelling.'.format(k))

    # check that there are no lists (only tuples) in param2default
    for k, v in user_params.param2default.items():
        if isinstance(
                v, list
        ):  # tuples can be members of a set (they are hashable) but not lists
            raise TypeError(
                'Type list is not allowed in param2default. Convert any lists to tuples.'
            )

    # ---------------------------------------------

    if not namespace.isolated and not namespace.local:

        # are additional source code files required? (do this before killing active jobs)
        # these can be Python packages, which will be importable, or contain data.
        # extra_paths is only allowed to be non-empty if not --local
        for extra_path in namespace.extra_paths:
            p = Path(extra_path)
            if not p.is_dir():
                raise NotADirectoryError('{} is not a directory'.format(p))
            src = str(extra_path)
            dst = str(project_path / p.name)
            print_ludwig(f'Copying {src} to {dst}')
            copy_tree(src, dst)

        # delete job instructions for worker saved on server (do this before uploader.to_disk() )
        for pkl_path in project_path.glob(f'*.pkl'):
            pkl_path.unlink()

        if namespace.group is None:
            random.shuffle(configs.Remote.online_worker_names)
            workers_cycle = cycle(configs.Remote.online_worker_names)
        else:
            workers_cycle = cycle(
                configs.Remote.group2workers[namespace.group])
            print(f'Using workers in group={namespace.group}')

    # ---------------------------------------------------

    if namespace.minimal:
        print_ludwig('Using minimal (debug) parameter configuration')
        param2val = user_params.param2default.copy()
        param2val.update(user_params.param2debug)
        param2val_list = [param2val]
    else:
        param2val_list = gen_all_param2vals(user_params.param2requests,
                                            user_params.param2default)
    # iterate over unique jobs
    num_new = 0
    workers_with_jobs = set()
    for param2val in param2val_list:

        # make job
        job = Job(param2val)

        # get param_name - always try to use an existing param_name even if clear_runs == True,
        # because this provides constancy in that parameter configurations always receive the same param_name,
        # which allows for hard-coding param_names in user code.
        # but hard-coding is not recommended, because constancy is broken when runs are manually cleared.
        job.update_param_name(runs_path, num_new)

        # add project_path
        if namespace.local:
            job.param2val['project_path'] = str(project_path)
        elif namespace.isolated:
            job.param2val['project_path'] = str(cwd)
        else:
            job.param2val['project_path'] = str(
                configs.WorkerDirs.ludwig_data / project_name)

        # allow exit if requested parameter configuration already exists requested number of times?
        # do counting with --local, because behavior when --local should be identical to behavior of Ludwig worker
        if namespace.minimal or namespace.isolated or namespace.clear_runs:
            num_needed = namespace.reps
        else:
            num_needed = job.calc_num_needed(runs_path, namespace.reps)

        # replicate each job
        for rep_id in range(num_needed):

            # add job_name and save_path
            job.update_job_name_and_save_path(rep_id, namespace.src)

            # if running locally, execute job now + cleanup
            if namespace.local or namespace.isolated:
                series_list = user_job.main(job.param2val)
                save_job_files(job.param2val, series_list, runs_path)

                # temporary runs folder auto-created with name = {project_name}_runs must be removed
                path_tmp = cwd / f'{src_path.name}_runs'
                if path_tmp.exists():
                    shutil.rmtree(path_tmp)
                    print(f'Removed temporary directory {path_tmp}')

            # if running on Ludwig, save worker instructions to shared drive
            else:

                worker = namespace.worker or next(workers_cycle)
                workers_with_jobs.add(worker)

                uploader = Uploader(project_path, src_path.name,
                                    namespace.skip_hostkey)
                uploader.to_disk(job, worker)

        num_new += int(job.is_new)

        if namespace.first_only:
            raise SystemExit(
                'Exiting loop after first job because --first_only=True.')

    # exit ?
    if namespace.no_upload:
        print_ludwig('Flag --upload set to False. Not uploading run.py.')
        return
    elif namespace.local or namespace.isolated:
        return
    elif not workers_with_jobs:  # all requested jobs have previously been completed
        return

    # kill running jobs on workers? (do this before removing runs folders).
    # triggering worker without job instructions kills existing job with matching project_name
    if namespace.worker is None:
        workers_for_killing = set(
            configs.Remote.online_worker_names).difference(workers_with_jobs)
    else:  # if connecting to single worker, only kill jobs on single worker to prevent needing tp access others
        workers_for_killing = [namespace.worker]
    for worker in workers_for_killing:
        uploader.kill_jobs(worker)

    # delete existing runs on shared drive?
    if namespace.clear_runs:
        for param_path in runs_path.glob('*param*'):
            print_ludwig('Removing\n{}'.format(param_path))
            sys.stdout.flush()
            shutil.rmtree(str(param_path))

    # upload = start jobs
    for worker in workers_with_jobs:
        uploader.start_jobs(worker)

    print('Submitted jobs to:')
    for w in workers_with_jobs:
        print(w)
示例#11
0
def submit():
    """
    run jobs locally or on Ludwig workers.

    This script should be called in root directory of the Python project.
    If not specified via CL arguments, it will try to import src.params.
    src.params is where this script will try to find the parameters with which to execute your jobs.
    """

    cwd = Path.cwd()
    project_name = cwd.name

    # parse cmd-line args
    parser = argparse.ArgumentParser()
    parser.add_argument('-src',
                        '--src',
                        default=cwd.name.lower(),
                        action='store',
                        dest='src',
                        required=False,
                        help='Specify path to your source code.')
    parser.add_argument(
        '-r',
        '--reps',
        default=1,
        action='store',
        dest='reps',
        type=int,
        choices=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 40, 50],
        required=False,
        help='Number of times each job will be executed')
    parser.add_argument(
        '-m',
        '--minimal',
        action='store_true',
        default=False,
        dest='minimal',
        required=False,
        help='Run minimal parameter configuration for debugging.')
    parser.add_argument('-l',
                        '--local',
                        action='store_true',
                        default=False,
                        dest='local',
                        required=False,
                        help='Run on host')
    parser.add_argument(
        '-i',
        '--isolated',
        action='store_true',
        default=False,
        dest='isolated',
        required=False,
        help='Do not connect to server. Use this only when all daa is available'
    )
    parser.add_argument(
        '-w',
        '--worker',
        default=None,
        action='store',
        dest='worker',
        choices=config.Remote.online_worker_names,
        required=False,
        help='Specify a single worker name if submitting to single worker only'
    )
    parser.add_argument('-g',
                        '--group',
                        default=None,
                        action='store',
                        dest='group',
                        choices=config.Remote.group2workers.keys(),
                        required=False,
                        help='Specify a worker group')
    parser.add_argument(
        '-x',
        '--clear_runs',
        action='store_true',
        default=False,
        dest='clear_runs',
        required=False,
        help=
        'Delete all saved runs associated with current project on shared drive'
    )
    parser.add_argument('-f',
                        '--first_only',
                        action='store_true',
                        default=False,
                        dest='first_only',
                        required=False,
                        help='Run first job and exit.')
    parser.add_argument('-v',
                        '--version',
                        action='version',
                        version='%(prog)s ' + __version__)
    parser.add_argument(
        '-mnt',
        '--research_data',
        default=None,
        action='store',
        dest='research_data_path',
        required=False,
        help=
        'Specify where the shared drive is mounted on your system (if not /media/research_data).'
    )
    parser.add_argument('-e',
                        '--extra_paths',
                        nargs='*',
                        default=[],
                        action='store',
                        dest='extra_paths',
                        required=False,
                        help='Paths to additional Python packages or data. ')
    parser.add_argument(
        '-n',
        '--no-upload',
        action='store_true',
        dest='no_upload',
        required=False,
        help='Whether to upload jobs to Ludwig. Set false for testing')
    namespace = parser.parse_args()

    # ---------------------------------------------- paths

    if namespace.research_data_path:
        research_data_path = Path(namespace.research_data_path)
    else:
        research_data_path = Path(
            default_mnt_point) / config.WorkerDirs.research_data.name

    if namespace.isolated:
        project_path = cwd
    else:
        project_path = research_data_path / project_name

    runs_path = project_path / 'runs'

    src_path = cwd / namespace.src

    if namespace.local or namespace.isolated:
        pass
        # TODO remove old parents of save_paths

    # ------------------------------------------------ user code

    # import user params + job
    print_ludwig('Trying to import source code from:\n{}'.format(src_path))
    sys.path.append(str(cwd))
    user_params = importlib.import_module(src_path.name + '.params')
    user_job = importlib.import_module(src_path.name + '.job')

    # ------------------------------------------------ checks

    if not namespace.isolated:
        if not os.path.ismount(str(research_data_path)):
            raise OSError(f'{research_data_path} is not mounted')

    if not src_path.exists():
        raise NotADirectoryError(f'Cannot find source code in {src_path}.')

    # check that requests are lists and that each list does not contain repeated values
    for k, v in user_params.param2requests.items():
        if not isinstance(v, list):
            raise TypeError('Values of param2requests must be lists')
        for vi in v:
            if isinstance(
                    vi, list
            ):  # tuples can be members of a set (they are hashable) but not lists
                raise TypeError(
                    'Inner collections in param2requests must be of type tuple, not list'
                )
        if len(v) != len(
                set(v)
        ):  # otherwise each identical value will be assigned a unique param_name
            raise ValueError('Each requested parameter value must be unique')

    # check that there are no lists (only tuples) in param2default
    for k, v in user_params.param2default.items():
        if isinstance(
                v, list
        ):  # tuples can be members of a set (they are hashable) but not lists
            raise TypeError(
                'Type list is not allowed in param2default. Convert any lists to tuples.'
            )

    # ---------------------------------------------

    # are additional source code files required? (do this before killing active jobs)
    # these can be Python packages, which will be importable, or contain data.
    # extra_paths is only allowed to be non-empty if not --local
    for extra_path in namespace.extra_paths:
        p = Path(extra_path)
        if not p.is_dir():
            raise NotADirectoryError('{} is not a directory'.format(p))
        src = str(extra_path)
        dst = str(project_path / p.name)
        print_ludwig(f'Copying {src} to {dst}')
        copy_tree(src, dst)

    uploader = Uploader(project_path, src_path.name)

    # delete job instructions for worker saved on server (do this before uploader.to_disk() )
    for pkl_path in project_path.glob(f'*.pkl'):
        pkl_path.unlink()

    if namespace.group is None:
        random.shuffle(config.Remote.online_worker_names)
        workers_cycle = cycle(config.Remote.online_worker_names)
    else:
        workers_cycle = cycle(config.Remote.group2workers[namespace.group])
        print(f'Using workers in group={namespace.group}')

    # ---------------------------------------------------

    if namespace.minimal:
        print_ludwig('Using minimal (debug) parameter configuration')
        param2val = user_params.param2default.copy()
        param2val.update(user_params.param2debug)
        param2val_list = [param2val]
    else:
        param2val_list = gen_all_param2vals(user_params.param2requests,
                                            user_params.param2default)
    # iterate over unique jobs
    num_new = 0
    workers_with_jobs = set()
    for param2val in param2val_list:

        # make job
        job = Job(param2val)
        job.update_param_name(runs_path, num_new)

        # multiply job
        for rep_id in range(
                job.calc_num_needed(runs_path,
                                    namespace.reps,
                                    disable=True if
                                    (namespace.minimal or namespace.local
                                     or namespace.clear_runs) else False)):
            job.update_job_name(rep_id)

            # run locally
            if namespace.local or namespace.isolated:
                job.param2val['project_path'] = str(project_path)
                job.param2val['param_name'] += config.Constants.not_ludwig
                job.param2val['job_name'] += config.Constants.not_ludwig
                series_list = user_job.main(job.param2val)
                save_job_files(job.param2val, series_list, runs_path)
            # upload to Ludwig worker
            else:
                job.param2val['project_path'] = str(
                    config.WorkerDirs.research_data / project_name)
                worker = namespace.worker or next(workers_cycle)
                workers_with_jobs.add(worker)
                uploader.to_disk(job, worker)

        num_new += int(job.is_new)

        if namespace.first_only:
            raise SystemExit(
                'Exiting loop after first job because --first_only=True.')

    # upload?
    if namespace.no_upload:
        print_ludwig('Flag --upload set to False. Not uploading run.py.')
        return
    elif namespace.local and not namespace.minimal:
        return

    # kill running jobs on workers? (do this before removing runs folders)
    # trigger worker without job instructions: kills existing job with matching project_name
    for worker in set(
            config.Remote.online_worker_names).difference(workers_with_jobs):
        uploader.kill_jobs(worker)

    # delete existing runs?
    if namespace.clear_runs:
        for param_path in runs_path.glob('*param*'):
            print_ludwig('Removing\n{}'.format(param_path))
            sys.stdout.flush()
            shutil.rmtree(str(param_path))

    # upload = start jobs
    for worker in workers_with_jobs:
        uploader.start_jobs(worker)

    print('Submitted jobs to:')
    for w in workers_with_jobs:
        print(w)