Пример #1
def split_raw_data_by_survival(raw_data: RawData) -> Tuple[RawData, RawData]:
    '''Segment raw data into data for agents that die and that survive

        raw_data: Raw simulation data
        Tuple of 2 raw data dictionaries. The first contains all agents
        that survive until division. The second contains all agents that
        die before dividing.
    # Establish which agents die
    agents_die = set()
    for time_data in raw_data.values():
        agents_data = get_in(time_data, PATH_TO_AGENTS)
        for agent, agent_data in agents_data.items():
            dead = get_in(agent_data, PATH_TO_DEAD, False)
            if dead:

    # Split the data
    survive_data = RawData(dict())
    for time in raw_data:
        agents_path = (time, ) + PATH_TO_AGENTS
        assoc_path(survive_data, agents_path, dict())
    die_data = copy.deepcopy(survive_data)

    for time, time_data in raw_data.items():
        agents_data = get_in(time_data, PATH_TO_AGENTS)
        for agent, agent_data in agents_data.items():
            dest = die_data if agent in agents_die else survive_data
            agent_path = (time, ) + PATH_TO_AGENTS + (agent, )
            assoc_path(dest, agent_path, agent_data)

    return survive_data, die_data
Пример #2
def _calc_live_and_dead_finals(
    data: RawData,
    path_to_variable: Path,
    time_range: Tuple[float, float],
    agents: Iterable[str] = tuple(),
) -> Tuple[Dict[str, float], Dict[str, float]]:
    values: Dict[str, Dict[float, float]] = {}
    die = set()

    end_time = max(data.keys())
    for time, time_data in data.items():
        if (time < time_range[0] * end_time
                or time > time_range[1] * end_time):
        agents_data = get_in(time_data, PATH_TO_AGENTS)
        for agent, agent_data in agents_data.items():
            if agents and agent not in agents:
            agent_values = values.setdefault(agent, {})
            value = get_in(agent_data, path_to_variable)
            if get_in(agent_data, PATH_TO_DEAD, False):
            agent_values[time] = value

    live_finals = {}
    dead_finals = {}
    for agent, agent_values in values.items():
        if not agent_values:
        if agent in die:
            dead_finals[agent] = agent_values[max(agent_values.keys())]
            live_finals[agent] = agent_values[max(agent_values.keys())]
    return live_finals, dead_finals
def make_snapshots_figure(
        data: RawData,
        environment_config: EnvironmentConfig,
        name: str,
        fields: Sequence[str],
        agent_fill_color: Optional[str] = None,
        agent_alpha: float = 1,
        num_snapshots: int = NUM_SNAPSHOTS,
        snapshot_times: Optional[Tuple[float, ...]] = None,
        xlim: Tuple[float, float] = (10, 40),
        ylim: Tuple[float, float] = (10, 40)
        ) -> dict:
    '''Make a figure of snapshots.

        data: The experiment data.
        environment_config: Environment parameters.
        name: Name of the output file (excluding file extension).
        fields: List of the names of fields to include.
        agent_fill_color: Fill color for agents.
        agent_alpha: Transparency for agents.
        num_snapshots: Number of snapshots.
        snapshot_times: Times to take snapshots at. If
            None, they are evenly spaced.
        xlim: Limits of x-axis.
        ylim: Limits of y-axis.

    snapshots_data = Analyzer.format_data_for_snapshots(
        data, environment_config)
    if not fields:
        data = RawData({
            key: val
            for key, val in data.items() if key != 'fields'
    plot_config = {
        'out_dir': FIG_OUT_DIR,
        'filename': '{}.{}'.format(name, FILE_EXTENSION),
        'include_fields': fields,
        'field_label_size': 54,
        'default_font_size': 54,
        'agent_fill_color': agent_fill_color,
        'dead_color': (0, 0, 0.79),  # gray in HSV
        'agent_alpha': agent_alpha,
        'n_snapshots': num_snapshots,
        'snapshot_times': snapshot_times,
        'scale_bar_length': 10,
        'scale_bar_color': 'white' if fields else 'black',
        'xlim': xlim,
        'ylim': ylim,
        'min_color': '#FFFFFF',
        'max_color': '#000000',
        'grid_color': 'white' if fields else '',
    stats = plot_snapshots(snapshots_data, plot_config)
    return stats
Пример #4
 def test_multiple_proteins(self) -> None:
     data = RawData({
         1: {
             'agents': {
                 self._make_agent_data(2, {
                     'protein1': 1,
                     'protein2': 2,
                     'protein3': 0
                 self._make_agent_data(4, {
                     'protein2': 3,
                     'protein1': 8,
                     'protein3': 0
     name_to_path_map: Dict[str, Path] = {
         'protein1': ('counts', 'protein1'),
         'protein2': ('counts', 'protein2'),
     table = raw_data_to_end_expression_table(data, name_to_path_map)
     if table['protein1'][0] == 1 / 2:
         assert table['protein1'].tolist() == [1 / 2, 8 / 4]
         assert table['protein2'].tolist() == [2 / 2, 3 / 4]
         assert table['volume'].tolist() == [2, 4]
         assert table['protein1'].tolist() == [8 / 4, 1 / 2]
         assert table['protein2'].tolist() == [3 / 4, 2 / 2]
         assert table['volume'].tolist() == [4, 2]
     assert 'protein3' not in table.columns
Пример #5
 def test_get_end_time(self) -> None:
     data = RawData({
         2: {
             'agents': {
                 'agent1': self._make_agent_data(2, {'protein': 0}),
                 'agent2': self._make_agent_data(4, {'protein': 0}),
         3: {
             'agents': {
                 'agent1': self._make_agent_data(2, {'protein': 1}),
                 'agent2': self._make_agent_data(4, {'protein': 4}),
         1: {
             'agents': {
                 'agent1': self._make_agent_data(2, {'protein': 0}),
                 'agent2': self._make_agent_data(4, {'protein': 0}),
     name_to_path_map: Dict[str, Path] = {
         'protein': ('counts', 'protein'),
     table = raw_data_to_end_expression_table(data, name_to_path_map)
     assert set(table['protein']) == set([1 / 2, 4 / 4])
     assert set(table['volume']) == set([2, 4])
Пример #6
def raw_data_to_end_expression_table(
        raw_data: RawData, paths_dict: Dict[str, Path]) -> pd.DataFrame:
    '''Create a table of end expression levels from raw simulation data.

        raw_data: Raw simulation data
        paths: Map from names to paths to protein counts. The names will
            be used as column headers in the returned table.

        Table with one column for each protein and one row for each
        agent. Each cell contains the protein concentration in that
        agent in the final simulation timepoint.
    end_data = raw_data[max(raw_data.keys())]
    expression_data: Dict = {name: [] for name in paths_dict}
    expression_data[VOLUME_KEY] = []
    agents_data = get_in(end_data, AGENTS_PATH)
    for agent_data in agents_data.values():
        volume = get_in(agent_data, VOLUME_PATH, 0)
        for name, path in paths_dict.items():
            count = get_in(agent_data, path, 0)
            concentration = count / volume if volume else 0
    return pd.DataFrame(expression_data)
def get_experiment_data(
        args: argparse.Namespace,
        experiment_id: str,
        ) -> DataTuple:
    '''Get simulation data for an experiment.

    If ``args.data_path`` is set, retrieve the experiment data from a
    JSON file named ``<experiment_id>.json`` under ``args.data_path``.
    Otherwise, retrieve the data from MongoDB.

        args: Parsed CLI args.
        experiment_id: ID of experiment.

    Returns: Tuple of simulation data and environment config.
    if args.data_path:
        path = os.path.join(
            args.data_path, '{}.json'.format(experiment_id))
        with open(path, 'r') as f:
            loaded_file = json.load(f)
            data = RawData({
                float(time): value
                for time, value in loaded_file['data'].items()
            config = EnvironmentConfig(
            return data, config
    return Analyzer.get_data(args, experiment_id)
def get_total_mass_plot(
    datasets: Dict[str, List[RawData]],
    colors: List[str],
    fontsize: float = 36,
    vlines: Iterable[Tuple[float, float, str, str]] = tuple(),
) -> Tuple[plt.Figure, dict]:
    '''Plot the total masses of colonies from groups of simulations.

    Each group's total mass over time is plotted as a curve on the
    resulting figure.

        datasets: Map from the label to associate with a group of
            simulations to a list of the datasets in that group.
        colors: Map from a group label to the color to show that group's
            data in.
        fontsize: Size of all text on figure.
        vlines: Tuple of vertical line specifiers. Each specifier is a
            tuple of the line position, label position as fraction of x
            range, color, and label.

        A tuple of the figure and a dictionary that maps from group
        label to tuples of the first, second, and third quartiles of
        that group's data.
    fig, ax = plt.subplots()
    quartiles = {}
    for i, (label, replicates) in enumerate(datasets.items()):
        filtered_replicates = []
        for replicate in replicates:
            # Exclude first timepoint, which is often wrong
            filtered = RawData({
                key: val
                for key, val in replicate.items()
                if key != min(replicate.keys())
        label_quartiles = plot_total_mass(filtered_replicates, ax, label,
                                          colors[i], fontsize)
        quartiles[label] = label_quartiles
    for x, label_x, vline_color, vline_label in vlines:
        ax.axvline(  # type: ignore
            x / 60 / 60, color=vline_color, linestyle='--')
        ax.text(  # type: ignore
            transform=ax.transAxes)  # type: ignore
    ax.set_ylabel(  # type: ignore
        'Total Cell Mass (fg)', fontsize=fontsize)
    ax.set_xlabel('Time (hr)', fontsize=fontsize)  # type: ignore
    for spine_name in ('top', 'right'):
        ax.spines[spine_name].set_visible(False)  # type: ignore
    return fig, quartiles
Пример #9
def filter_raw_data_by_time(raw_data: RawData,
                            time_range: Tuple[float, float]) -> RawData:
    '''Filter raw simulation data to the timepoints within a range

        raw_data: Raw simulation data.
        time_range: Tuple of range endpoints. Each endpoint is a float
            between 0 and 1 (inclusive) that denotes a fraction of the
            total simulation time.
        A subset of the key-value pairs in ``raw_data``. Includes only
        those timepoints between the ``time_range`` endpoints
    floor, ceil = time_range
    end = max(raw_data.keys())
    filtered = RawData({
        time: time_data
        for time, time_data in raw_data.items()
        if floor * end <= time <= ceil * end
    return filtered
def get_total_mass_timeseries(data: RawData) -> List[float]:
    '''Get a timeseries of the total mass of a simulation.

        data: Data from the simulation.

        A list of the total cell mass in the simulation over time.
    times = sorted(data.keys())
    mass_timeseries = []
    for time in times:
        agents_data = get_in(data[time], AGENTS_PATH)
    return mass_timeseries
Пример #11
 def test_zeros(self) -> None:
     data = RawData({
         1: {
             'agents': {
                 'agent1': self._make_agent_data(0, {'protein': 1}),
                 'agent2': self._make_agent_data(4, {'protein': 0}),
                 'agent3': self._make_agent_data(0, {'protein': 0}),
     name_to_path_map: Dict[str, Path] = {
         'protein': ('counts', 'protein'),
     table = raw_data_to_end_expression_table(data, name_to_path_map)
     assert set(table['protein']) == set([0, 0, 0])
     assert set(table['volume']) == set([0, 4, 0])
Пример #12
def _get_final_live_agents(
        data: RawData,
        time_range: Tuple[float, float] = (0, 1),
) -> List[str]:
    data = filter_raw_data_by_time(data, time_range)
    max_time = max(data.keys())
    agents = []
    agents_data = get_in(
        # Pylint doesn't recognize that the RawData NewType is a dict
        data[max_time],  # pylint: disable=unsubscriptable-object
    for agent, agent_data in agents_data.items():
        dead = get_in(agent_data, PATH_TO_DEAD)
        if not dead:
    return agents
Пример #13
def plot_phylogeny(
    data: RawData,
    out: str = 'phylogeny.pdf',
    live_color: str = 'green',
    dead_color: str = 'black',
    ignore_color: str = 'lightgray',
    time_range: Tuple[float, float] = (0, 1)
) -> Tuple[TreeNode, pd.DataFrame]:
    '''Plot phylogenetic tree from an experiment.

        data: The simulation data.
        out: Path to the output file. File type will be inferred from
            the file name.
        live_color: Color for nodes representing cells that survive
            until division.
        dead_color: Color for nodes representing cells that die.
        ignore_color: Color for nodes outside the time range considered.
        time_range: Tuple specifying the range of times to consider.
            Range values specified as fractions of the final
    agent_ids: Set[str] = set()
    dead_ids: Set[str] = set()
    in_time_range_ids: Set[str] = set()
    end_time = max(data.keys())
    for time, time_data in data.items():
        agents_data = get_in(time_data, AGENTS_PATH)
        assert agents_data is not None
        agent_ids |= set(agents_data.keys())

        if time_range[0] * end_time <= time <= time_range[1] * end_time:
            in_time_range_ids |= set(agents_data.keys())
            for agent_id, agent_data in agents_data.items():
                if get_in(agent_data, PATH_TO_DEAD, False):

    trees = make_ete_trees(agent_ids)
    assert len(trees) == 1
    tree = trees[0]

    # Set style for overall figure
    tstyle = TreeStyle()
    tstyle.show_scale = False
    tstyle.show_leaf_name = False
    tstyle.scale = None
    tstyle.optimal_scale_level = 'full'  # Avoid artificial branches
    tstyle.mode = 'c'
    legend = {
        'Die': dead_color,
        'Survive': live_color,
        'Divided Before Antibiotics Appeared': ignore_color,
    for label, color in legend.items():
        tstyle.legend.add_face(CircleFace(5, color), column=0)
        tstyle.legend.add_face(TextFace(' ' + label, ftype=FONT), column=1)

    # Set styles for each node
    for node in tree.traverse():
        nstyle = NodeStyle()
        nstyle['size'] = 5
        nstyle['vt_line_width'] = 1
        nstyle['hz_line_width'] = 1
        if node.name in in_time_range_ids:
            if node.name in dead_ids:
                nstyle['fgcolor'] = dead_color
                nstyle['fgcolor'] = live_color
            nstyle['fgcolor'] = ignore_color
    tree.render(out, tree_style=tstyle, w=400)
    survive_col = []
    agents_col = []
    for agent in in_time_range_ids:
        survive_col.append(0 if agent in dead_ids else 1)
    df = pd.DataFrame({'agents': agents_col, 'survival': survive_col})
    return tree, df