예제 #1
0
def calculate_keyspace(branches: List[Dict]) -> Dict:
    keyspace = {k: {v} for k, v in collapse_nested_dict(branches[0])}

    for branch in branches[1:]:
        branch = dict(collapse_nested_dict(branch))
        if set(branch.keys()) != set(keyspace.keys()):
            raise ValueError("All branches must have the same keys")
        for k, v in branch.items():
            if k == FULL_ARTIFACT_PATH_KEY:
                validate_artifact_path(v)
            keyspace[k].add(v)
    keyspace = {k: list(v) for k, v in keyspace.items()}
    return keyspace
예제 #2
0
def calculate_keyspace(branches):
    if branches[0] is None:
        return {}

    keyspace = {k: {v} for k, v in collapse_nested_dict(branches[0])}

    for branch in branches[1:]:
        branch = dict(collapse_nested_dict(branch))
        if set(branch.keys()) != set(keyspace.keys()):
            raise ValueError("All branches must have the same keys")
        for k, v in branch.items():
            keyspace[k].add(v)
    keyspace = {k: list(v) for k, v in keyspace.items()}
    return keyspace
예제 #3
0
def test_collapse_nested_dict():
    source = {'a': {'b': {'c': 1, 'd': 2}}, 'e': 3}
    result = collapse_nested_dict(source)
    assert set(result) == {
        ('a.b.c', 1),
        ('a.b.d', 2),
        ('e', 3),
    }
예제 #4
0
def build_job_list(ctx: RunContext) -> List[dict]:
    jobs = []
    number_already_completed = 0

    for (input_draw, random_seed, branch_config) in ctx.keyspace:
        parameters = {
            'model_specification_file': str(ctx.model_specification),
            'branch_configuration': branch_config,
            'input_draw': int(input_draw),
            'random_seed': int(random_seed),
            'results_path': ctx.output_directory,
        }

        do_schedule = True
        if ctx.existing_outputs is not None:
            mask = ctx.existing_outputs.input_draw == int(input_draw)
            mask &= ctx.existing_outputs.random_seed == int(random_seed)
            if branch_config:
                for k, v in collapse_nested_dict(branch_config):
                    if isinstance(v, float):
                        mask &= np.isclose(ctx.existing_outputs[k], v)
                    else:
                        mask &= ctx.existing_outputs[k] == v
            do_schedule = not np.any(mask)

        if do_schedule:
            jobs.append(parameters)
        else:
            number_already_completed += 1

    if number_already_completed:
        logger.info(
            f"{number_already_completed} of {len(ctx.keyspace)} jobs completed in previous run."
        )
        if number_already_completed != len(ctx.existing_outputs):
            logger.warning(
                "There are jobs from the previous run which would not have been created "
                "with the configuration saved with the run. That either means that code "
                "has changed between then and now or that the outputs or configuration data "
                "have been modified. This may represent a serious error so give it some thought."
            )

    ctx.number_already_completed = number_already_completed
    np.random.shuffle(jobs)
    return jobs
예제 #5
0
def already_complete(job_parameters: JobParameters,
                     existing_outputs: pd.DataFrame) -> bool:
    if existing_outputs.empty:
        return False

    job_parameter_list = collapse_nested_dict(
        job_parameters.branch_configuration)
    job_parameter_list.extend([
        ("run_configuration.run_key.input_draw", job_parameters.input_draw),
        ("run_configuration.run_key.random_seed", job_parameters.random_seed),
    ])

    mask = pd.Series(True, index=existing_outputs.index)
    for k, v in job_parameter_list:
        if isinstance(v, float):
            mask &= np.isclose(existing_outputs[k], v)
        else:
            mask &= existing_outputs[k] == v
    return np.any(mask)
def test_expand_branch_template():
    source = [
        {
            "a": {
                "b": [1, 2],
                "c": 3,
                "d": [4, 5, 6],
                "e": [True, False]
            }
        },
        {
            "a": {
                "b": 10,
                "c": 30,
                "d": 40,
                "e": True
            }
        },
    ]
    result = expand_branch_templates(source)

    result = [collapse_nested_dict(r) for r in result]

    expected = [
        collapse_nested_dict(r) for r in [
            {
                "a": {
                    "b": 1,
                    "c": 3,
                    "d": 4,
                    "e": True
                }
            },
            {
                "a": {
                    "b": 2,
                    "c": 3,
                    "d": 5,
                    "e": True
                }
            },
            {
                "a": {
                    "b": 1,
                    "c": 3,
                    "d": 6,
                    "e": True
                }
            },
            {
                "a": {
                    "b": 2,
                    "c": 3,
                    "d": 4,
                    "e": True
                }
            },
            {
                "a": {
                    "b": 1,
                    "c": 3,
                    "d": 5,
                    "e": True
                }
            },
            {
                "a": {
                    "b": 2,
                    "c": 3,
                    "d": 6,
                    "e": True
                }
            },
            {
                "a": {
                    "b": 10,
                    "c": 30,
                    "d": 40,
                    "e": True
                }
            },
            {
                "a": {
                    "b": 1,
                    "c": 3,
                    "d": 4,
                    "e": False
                }
            },
            {
                "a": {
                    "b": 2,
                    "c": 3,
                    "d": 5,
                    "e": False
                }
            },
            {
                "a": {
                    "b": 1,
                    "c": 3,
                    "d": 6,
                    "e": False
                }
            },
            {
                "a": {
                    "b": 2,
                    "c": 3,
                    "d": 4,
                    "e": False
                }
            },
            {
                "a": {
                    "b": 1,
                    "c": 3,
                    "d": 5,
                    "e": False
                }
            },
            {
                "a": {
                    "b": 2,
                    "c": 3,
                    "d": 6,
                    "e": False
                }
            },
        ]
    ]
    assert sorted(result) == sorted(expected)
예제 #7
0
def expand_branch_templates(templates: Dict) -> List[Dict]:
    """
    Take a list of dictionaries of configuration values (like the ones used in
    experiment branch configurations) and expand it by taking any values which
    are lists and creating a new set of branches which is made up of the
    product of all those lists plus all non-list values.

    For example this:

    .. code::

       {'a': {'b': [1,2], 'c': 3, 'd': [4,5,6]}}

    becomes this:

    .. code::

       [
           {'a': {'b': 1, 'c': 3, 'd': 4}},
           {'a': {'b': 2, 'c': 3, 'd': 5}},
           {'a': {'b': 1, 'c': 3, 'd': 6}},
           {'a': {'b': 2, 'c': 3, 'd': 4}},
           {'a': {'b': 1, 'c': 3, 'd': 5}},
           {'a': {'b': 2, 'c': 3, 'd': 6}}
       ]

    """
    expanded_branches = []

    for branch in templates:
        branch = sorted(collapse_nested_dict(branch))
        branch = [(k, v if isinstance(v, list) else [v]) for k, v in branch]
        expanded_size = np.product([len(v) for k, v in branch])
        new_branches = []
        pointers = {k: 0 for k, _ in branch}
        for _ in range(expanded_size):
            new_branch = []
            tick = True
            for k, v in branch:
                new_branch.append((k, v[pointers[k]]))
                if tick:
                    i = pointers[k] + 1
                    if i < len(v):
                        tick = False
                        pointers[k] = i
                    else:
                        pointers[k] = 0
            new_branches.append(new_branch)
        expanded_branches.extend(new_branches)

    final_branches = []
    for branch in expanded_branches:
        root = {}
        final_branches.append(root)
        for k, v in branch:
            current = root
            *ks, k = k.split(".")
            for sub_k in ks:
                if sub_k in current:
                    current = current[sub_k]
                else:
                    current[sub_k] = {}
                    current = current[sub_k]
            current[k] = v

    return final_branches
예제 #8
0
def test_expand_branch_template():
    source = [{
        'a': {
            'b': [1, 2],
            'c': 3,
            'd': [4, 5, 6],
            'e': [True, False]
        }
    }, {
        'a': {
            'b': 10,
            'c': 30,
            'd': 40,
            'e': True
        }
    }]
    result = expand_branch_templates(source)

    result = [collapse_nested_dict(r) for r in result]

    expected = [
        collapse_nested_dict(r) for r in [
            {
                'a': {
                    'b': 1,
                    'c': 3,
                    'd': 4,
                    'e': True
                }
            },
            {
                'a': {
                    'b': 2,
                    'c': 3,
                    'd': 5,
                    'e': True
                }
            },
            {
                'a': {
                    'b': 1,
                    'c': 3,
                    'd': 6,
                    'e': True
                }
            },
            {
                'a': {
                    'b': 2,
                    'c': 3,
                    'd': 4,
                    'e': True
                }
            },
            {
                'a': {
                    'b': 1,
                    'c': 3,
                    'd': 5,
                    'e': True
                }
            },
            {
                'a': {
                    'b': 2,
                    'c': 3,
                    'd': 6,
                    'e': True
                }
            },
            {
                'a': {
                    'b': 10,
                    'c': 30,
                    'd': 40,
                    'e': True
                }
            },
            {
                'a': {
                    'b': 1,
                    'c': 3,
                    'd': 4,
                    'e': False
                }
            },
            {
                'a': {
                    'b': 2,
                    'c': 3,
                    'd': 5,
                    'e': False
                }
            },
            {
                'a': {
                    'b': 1,
                    'c': 3,
                    'd': 6,
                    'e': False
                }
            },
            {
                'a': {
                    'b': 2,
                    'c': 3,
                    'd': 4,
                    'e': False
                }
            },
            {
                'a': {
                    'b': 1,
                    'c': 3,
                    'd': 5,
                    'e': False
                }
            },
            {
                'a': {
                    'b': 2,
                    'c': 3,
                    'd': 6,
                    'e': False
                }
            },
        ]
    ]
    assert sorted(result) == sorted(expected)
def worker(parameters: Mapping):
    node = f"{os.environ['SGE_CLUSTER_NAME']}:{os.environ['HOSTNAME']}"
    job = f"{os.environ['JOB_NAME']}: {os.environ['JOB_ID']}:{os.environ['SGE_TASK_ID']}"

    input_draw = parameters['input_draw']
    random_seed = parameters['random_seed']
    model_specification_file = parameters['model_specification_file']
    branch_config = parameters['branch_configuration']
    logger.info(f'Launching new job {job} on {node}')
    logger.info('Starting job: {}'.format(
        (input_draw, random_seed, model_specification_file, branch_config)))

    try:
        np.random.seed([input_draw, random_seed])
        worker_ = get_current_job().id

        from vivarium.framework.engine import SimulationContext
        from vivarium.framework.utilities import collapse_nested_dict

        configuration = {}
        run_key = {'input_draw': input_draw, 'random_seed': random_seed}

        if branch_config is not None:
            configuration.update(dict(branch_config))
            run_key.update(dict(branch_config))

        configuration.update({
            'run_configuration': {
                'input_draw_number': input_draw,
                'run_id': str(worker_) + '_' + str(time()),
                'results_directory': parameters['results_path'],
                'run_key': run_key,
            },
            'randomness': {
                'random_seed': random_seed,
                'additional_seed': input_draw,
            },
            'input_data': {
                'input_draw_number': input_draw,
            }
        })

        sim = SimulationContext(model_specification_file,
                                configuration=configuration)
        logger.info('Simulation configuration:')
        logger.info(str(sim.configuration))

        start = time()
        logger.info('Beginning simulation setup.')
        sim.setup()
        sim.initialize_simulants()
        logger.info(
            f'Simulation setup complete in {(time() - start)/60} minutes.')
        sim_start = time()
        logger.info('Starting main simulation loop.')
        sim.run()
        sim.finalize()
        metrics = sim.report()
        end = time()

        start_time = pd.Timestamp(**sim.configuration.time.start.to_dict())
        end_time = pd.Timestamp(**sim.configuration.time.end.to_dict())
        step_size = pd.Timedelta(days=sim.configuration.time.step_size)
        num_steps = int(math.ceil((end_time - start_time) / step_size))

        logger.info(
            f'Simulation main loop completed in {(end - sim_start)/60} minutes.'
        )
        logger.info(
            f'Average step length was {(end - sim_start)/num_steps} seconds.')
        logger.info(f'Total simulation run time {(end - start) / 60} minutes.')

        idx = pd.MultiIndex.from_tuples(
            [(input_draw, random_seed)],
            names=['input_draw_number', 'random_seed'])
        output_metrics = pd.DataFrame(metrics, index=idx)
        for k, v in collapse_nested_dict(run_key):
            output_metrics[k] = v
        output = [output_metrics.to_msgpack()]
        return output

    except Exception:
        logger.exception('Unhandled exception in worker')
        job = get_current_job()
        job.meta['root_exception'] = format_exc()
        job.save_meta()
        raise
    finally:
        logger.info('Exiting job: {}'.format(
            (input_draw, random_seed, model_specification_file,
             branch_config)))
예제 #10
0
def work_horse(job_parameters: dict) -> pd.DataFrame:
    node = f"{ENV_VARIABLES.HOSTNAME.value}"
    job = f"{ENV_VARIABLES.JOB_ID.value}:{ENV_VARIABLES.TASK_ID.value}"

    job_parameters = JobParameters(**job_parameters)

    logger.info(f"Launching new job {job} on {node}")
    logger.info(f"Starting job: {job_parameters}")

    try:
        configuration = job_parameters.branch_configuration
        # TODO: Need to test serialization of an empty dict, then this
        #   can go away.  If you're successfully running code and this
        #   assert is still here, delete it.
        assert configuration is not None

        configuration.update({
            "run_configuration": {
                "run_id": str(get_current_job().id) + "_" + str(time()),
                "results_directory": job_parameters.results_path,
                "run_key": job_parameters.job_specific,
            },
            "randomness": {
                "random_seed": job_parameters.random_seed,
                "additional_seed": job_parameters.input_draw,
            },
            "input_data": {
                "input_draw_number": job_parameters.input_draw,
            },
        })

        sim = SimulationContext(job_parameters.model_specification,
                                configuration=configuration)
        logger.info("Simulation configuration:")
        logger.info(str(sim.configuration))

        start_time = pd.Timestamp(**sim.configuration.time.start.to_dict())
        end_time = pd.Timestamp(**sim.configuration.time.end.to_dict())
        step_size = pd.Timedelta(days=sim.configuration.time.step_size)
        num_steps = int(math.ceil((end_time - start_time) / step_size))

        start_snapshot = CounterSnapshot()
        event = {"start": time()}  # timestamps of application events
        logger.info("Beginning simulation setup.")
        sim.setup()
        event["simulant_initialization_start"] = time()
        exec_time = {
            "setup_minutes":
            (event["simulant_initialization_start"] - event["start"]) / 60
        }  # execution event
        logger.info(
            f'Simulation setup completed in {exec_time["setup_minutes"]:.3f} minutes.'
        )

        sim.initialize_simulants()
        event["simulation_start"] = time()
        exec_time["simulant_initialization_minutes"] = (
            event["simulation_start"] -
            event["simulant_initialization_start"]) / 60
        logger.info(
            f'Simulant initialization completed in {exec_time["simulant_initialization_minutes"]:.3f} minutes.'
        )

        logger.info(
            f"Starting main simulation loop with {num_steps} time steps")
        sim.run()
        event["results_start"] = time()
        exec_time["main_loop_minutes"] = (event["results_start"] -
                                          event["simulation_start"]) / 60
        exec_time["step_mean_seconds"] = (
            event["results_start"] - event["simulation_start"]) / num_steps
        logger.info(
            f'Simulation main loop completed in {exec_time["main_loop_minutes"]:.3f} minutes.'
        )
        logger.info(
            f'Average step length was {exec_time["step_mean_seconds"]:.3f} seconds.'
        )

        sim.finalize()
        metrics = sim.report(print_results=False)
        event["end"] = time()
        end_snapshot = CounterSnapshot()

        do_sim_epilogue(start_snapshot, end_snapshot, event, exec_time,
                        job_parameters)

        idx = pd.MultiIndex.from_tuples(
            [(job_parameters.input_draw, job_parameters.random_seed)],
            names=["input_draw_number", "random_seed"],
        )
        output_metrics = pd.DataFrame(metrics, index=idx)
        for k, v in collapse_nested_dict(job_parameters.branch_configuration):
            output_metrics[k] = v
        return output_metrics

    except Exception:
        logger.exception("Unhandled exception in worker")
        job = get_current_job()
        job.meta["root_exception"] = format_exc()
        job.save_meta()
        raise
    finally:
        logger.info(f"Exiting job: {job_parameters}")