Exemplo n.º 1
0
def _try_complete(args):
    """Try to complete the medium for a model."""
    file, med, growth, max_import, mip, w = args
    mod = load_model(file)
    exc = find_external_compartment(mod)
    try:
        fixed = mm.complete_medium(mod,
                                   med,
                                   growth,
                                   max_import=max_import,
                                   minimize_components=mip,
                                   weights=w)
        added = sum(i not in med.index for i in fixed.index)
        can_grow = True
        logger.info("Could grow `%s` by adding %d import." % (file, added))
    except OptimizationError:
        fixed = pd.Series(float("nan"), index=med.index)
        added = float("nan")
        can_grow = False
        logger.info("Could not grow `%s`." % file)
    fixed.index = [
        re.sub(
            "(_{}$)|([^a-zA-Z0-9 :]{}[^a-zA-Z0-9 :]$)".format(exc, exc),
            "_m",
            rid,
        ) for rid in fixed.index
    ]

    return (can_grow, added, fixed)
Exemplo n.º 2
0
def test_download(tmpdir):
    print(tmpdir.dirpath())
    util.download_model(URL, str(tmpdir))
    assert tmpdir.join("e_coli_core.xml.gz").check()

    model = util.load_model(URL)
    assert len(model.reactions) == 95
    assert len(model.metabolites) == 72
Exemplo n.º 3
0
def reaction_matrix(files):
    """Create a matrix of reactions x models."""
    ids = []
    for f in files:
        model = load_model(f)
        ids.extend([(r.id, model.name) for r in model.reactions])
    rlist = pd.DataFrame(ids, columns=["reaction", "id"])
    rlist["value"] = 1
    rlist = rlist.pivot_table(values="value", index="id", columns="reaction")

    return rlist.fillna(0).astype(int)
Exemplo n.º 4
0
def _grow(args):
    """Get the maximum growth rate under a given medium."""
    file, med = args
    mod = load_model(file)
    good = med[med.index.isin([r.id for r in mod.exchanges])]
    if len(good) == 0:
        logger.warning("Could not find any reactions from the medium in `%s`. "
                       "Maybe a mismatch in IDs?")
    mod.medium = med[med.index.isin([r.id for r in mod.exchanges])]
    rate = mod.slim_optimize()
    return rate
Exemplo n.º 5
0
def test_join_models():
    single = util.load_model(tax.file[0])
    single_coefs = {
        v.name: coef
        for v, coef in single.objective.get_linear_coefficients(
            single.variables).items()
    }
    mod = util.join_models(tax.file, id="test_model")
    coefs = {
        v.name: coef
        for v, coef in mod.objective.get_linear_coefficients(
            mod.variables).items()
    }
    assert len(mod.reactions) == len(single.reactions) + 1  # added biomass
    assert len(mod.metabolites) == len(single.metabolites)
    assert np.allclose(single.slim_optimize(), mod.slim_optimize())
Exemplo n.º 6
0
def _fix_medium(args):
    """Get the fixed medium for a model."""
    mid, file, medium, min_growth, max_import, min_c = args
    model = load_model(file)
    for r in model.reactions:
        r.id = clean_ids(r.id)
    try:
        fixed = complete_medium(
            model,
            medium,
            min_growth=min_growth,
            max_import=max_import,
            minimize_components=min_c,
        )
    except Exception:
        fixed = medium.copy()
    if model.solver.status != OPTIMAL:
        logger.warning("Can't reach the specified growth rate for model %s." %
                       mid)
        fixed = medium.copy()
    fixed.name = mid
    return fixed
Exemplo n.º 7
0
def test_load_model():
    row = tax.loc[0]
    model = util.load_model(row.file)
    assert len(model.reactions) == 95
    assert len(model.metabolites) == 72
Exemplo n.º 8
0
    def __init__(self,
                 taxonomy,
                 id=None,
                 name=None,
                 rel_threshold=1e-6,
                 solver=None,
                 progress=True,
                 max_exchange=100,
                 mass=1):
        """Create a new community object.

        `micom` builds a community from a taxonomy which may simply be a list
        of model files in its simplest form. Usually, the taxonomy will contain
        additional information such as annotations for the individuals (for
        instance phylum, organims or species) and abundances.

        Notes
        -----
        `micom` will automatically add exchange fluxes and and a community
        objective maximizing the overall growth rate of the community.

        Parameters
        ----------
        taxonomy : pandas.DataFrame
            The taxonomy used for building the model. Must have at least the
            two columns "id" and "file" which specify an ID and the filepath
            for each model. Valid file extensions are ".pickle", ".xml",
            ".xml.gz" and ".json". If the taxonomy includes a column named
            "abundance" it will be used to quantify each individual in the
            community. If absent `micom` will assume all individuals are
            present in the same amount.
        id : str, optional
            The ID for the community. Should only contain letters and numbers,
            otherwise it will be formatted as such.
        name : str, optional
            The name for the community.
        rel_threshold : float < 1, optional
            The relative abundance threshold that will be used. Describes the
            smallest relative amount of an individual that will be considered
            non-zero. All individuals with a smaller relative amount will be
            omitted.
        solver : str, optional
            Which solver to use. Will default to cplex if available which is
            better suited for large problems.
        progress : bool, optional
            Show a progress bar.
        max_exchange : positive float, optional
            During model constructions exchange reactions are duplicated into
            internal and external exchange reactions. This specifies the new
            import flux bound for the *internal* exchange reaction. Import
            rates for the exchanges between the medium and outside are still
            mantained.
        mass : positive float, optional
            The total mass of the community in gDW. Used to adjust import
            fluxes which are assumed to be given as mmol/gDW*h for the
            entire community. As a consequence all import fluxes will be
            divided by that number.

        Attributes
        ----------
        species : list
            A list of species IDs in the community.

        """
        super(Community, self).__init__(id, name)

        logger.info("building new micom model {}.".format(id))
        if not solver:
            self.solver = ("cplex"
                           if "cplex" in cobra.util.solver.solvers else "glpk")
        else:
            self.solver = solver
        adjust_solver_config(self.solver)

        if not (isinstance(taxonomy, pd.DataFrame)
                and all(col in taxonomy.columns for col in _taxonomy_cols)):
            raise ValueError("`taxonomy` must be a pandas DataFrame with at"
                             "least columns id and file :(")

        self._rtol = rel_threshold
        self._modification = None
        self.mass = mass

        taxonomy = taxonomy.copy()
        if "abundance" not in taxonomy.columns:
            taxonomy["abundance"] = 1
        taxonomy.abundance /= taxonomy.abundance.sum()
        logger.info("{} individuals with abundances below threshold".format(
            (taxonomy.abundance <= self._rtol).sum()))
        taxonomy = taxonomy[taxonomy.abundance > self._rtol]
        if taxonomy.id.str.contains(r"[^A-Za-z0-9_]", regex=True).any():
            logger.warning("taxonomy IDs contain prohibited characters and"
                           " will be reformatted")
            taxonomy.id = taxonomy.id.replace([r"[^A-Za-z0-9_\s]", r"\s+"],
                                              ["", "_"],
                                              regex=True)

        self.__taxonomy = taxonomy
        self.__taxonomy.index = self.__taxonomy.id

        obj = Zero
        self.species = []
        index = self.__taxonomy.index
        index = tqdm(index, unit="models") if progress else index
        for idx in index:
            row = self.__taxonomy.loc[idx]
            if isinstance(row.file, list):
                if len(row.file) > 1:
                    model = join_models(row.file)
                    logger.info("joined {} models".format(len(row.file)))
                else:
                    model = load_model(row.file[0])
            else:
                model = load_model(row.file)
            suffix = "__" + idx.replace(" ", "_").strip()
            logger.info("converting IDs for {}".format(idx))
            for r in model.reactions:
                r.global_id = re.sub("__\\d__", "_", r.id).strip(" _-")
                r.id = r.global_id + suffix
                r.community_id = idx
            for m in model.metabolites:
                m.global_id = re.sub("__\\d+__", "_", m.id).strip(" _-")
                m.id = m.global_id + suffix
                m.compartment += suffix
                m.community_id = idx
            logger.info("adding reactions for {} to community".format(idx))
            self.add_reactions(model.reactions)
            o = self.solver.interface.Objective.clone(model.objective,
                                                      model=self.solver)
            obj += o.expression * row.abundance
            self.species.append(idx)
            species_obj = self.problem.Constraint(o.expression,
                                                  name="objective_" + idx,
                                                  lb=0.0)
            self.add_cons_vars([species_obj])
            self.__add_exchanges(model.reactions,
                                 row,
                                 internal_exchange=max_exchange)
            self.solver.update()  # to avoid dangling refs due to lazy add

        com_obj = add_var_from_expression(self,
                                          "community_objective",
                                          obj,
                                          lb=0)
        self.objective = self.problem.Objective(com_obj, direction="max")
Exemplo n.º 9
0
    def __init__(
        self,
        taxonomy,
        model_db=None,
        id=None,
        name=None,
        rel_threshold=1e-6,
        solver=None,
        progress=True,
        max_exchange=100,
        mass=1,
    ):
        """Create a new community object.

        `micom` builds a community from a taxonomy which may simply be a list
        of model files in its simplest form. Usually, the taxonomy will contain
        additional information such as annotations for the individuals (for
        instance phylum, organims or species) and abundances.

        The recommended way to build a micom model is to supply a
        quantification of taxa (called "taxonomy" here) which specifies the
        taxonomic ranks for a taxon and its abundance, and a model database
        for a specific rank (for instance "genus"). MICOM will match the
        ranks from your taxonomy to the model database and assemble the
        community models from that. You will also get information about the
        construction process by calling `Community.build_metrics`.

        The most customizable way only takes a single table where summarization
        and matching to the reference database has already occured. In this
        case you will also provide paths to model files for each taxon. This is
        the "old" way but may still be applicable if you want to use a custom
        database or want full control of matching your data to reference
        models.

        Notes
        -----
        `micom` will automatically add exchange fluxes and and a community
        objective maximizing the overall growth rate of the community.

        Parameters
        ----------
        taxonomy : pandas.DataFrame
            The taxonomy used for building the model. Must have at least the
            column "id". If no model database is specified in the next argument
            it furthermore requires a column "file" which specifies a filepath
            for each model. Valid file extensions are ".pickle", ".xml",
            ".xml.gz" and ".json". If a model database is specified this must
            contain at least a column with the same name as the rank used in
            the model database. Thus, for a genus-level database you will need
            a column `genus`. Additional taxa ranks can also be specified and
            will be used to be more stringent in taxa matching.
            Finally, the taxonomy should contain a column `abundance`. It will
            be used to quantify each individual in the community. If absent,
            MICOM will assume all individuals are present in the same amount.
        model_db : str
            A pre-built model database. If ending in `.qza` must be a Qiime 2
            artifact of type `MetabolicModels[JSON]`. Can also be a folder,
            zip (must end in `.zip`) file or None if the taxonomy contains a
            column `file`.
        id : str, optional
            The ID for the community. Should only contain letters and numbers,
            otherwise it will be formatted as such.
        name : str, optional
            The name for the community.
        rel_threshold : float < 1, optional
            The relative abundance threshold that will be used. Describes the
            smallest relative amount of an individual that will be considered
            non-zero. All individuals with a smaller relative amount will be
            omitted.
        solver : str, optional
            Which solver to use. Will default to cplex if available which is
            better suited for large problems.
        progress : bool, optional
            Show a progress bar.
        max_exchange : positive float, optional
            During model constructions exchange reactions are duplicated into
            internal and external exchange reactions. This specifies the new
            import flux bound for the *internal* exchange reaction. Import
            rates for the exchanges between the medium and outside are still
            mantained.
        mass : positive float, optional
            The total mass of the community in gDW. Used to adjust import
            fluxes which are assumed to be given as mmol/gDW*h for the
            entire community. As a consequence all import fluxes will be
            divided by that number.

        Attributes
        ----------
        taxa : list
            A list of taxa IDs in the community.

        """
        super(Community, self).__init__(id, name)

        logger.info("building new micom model {}.".format(id))
        if not solver:
            solver = [
                s for s in ["cplex", "osqp", "gurobi", "glpk"]
                if s in cobra.util.solver.solvers
            ][0]
        logger.info("using the %s solver." % solver)
        if solver == "glpk":
            logger.warning(
                "No QP solver found, will use GLPK. A lot of functionality "
                "in MICOM will require a QP solver :/")
        self.solver.configuration.lp_method = "auto"
        self.solver.configuration.qp_method = "auto"
        self.solver.configuration.presolve = False
        self.solver = solver
        self._rtol = rel_threshold
        self._modification = None
        self.mass = mass
        self.__db_metrics = None
        adjust_solver_config(self.solver)
        taxonomy = taxonomy.copy()
        if "abundance" not in taxonomy.columns:
            taxonomy["abundance"] = 1
        taxonomy.abundance /= taxonomy.abundance.sum()
        logger.info("{} individuals with abundances below threshold".format(
            (taxonomy.abundance <= self._rtol).sum()))
        taxonomy = taxonomy[taxonomy.abundance > self._rtol]

        if not (isinstance(taxonomy, pd.DataFrame)
                and "id" in taxonomy.columns):
            raise ValueError("`taxonomy` must be a pandas DataFrame with at"
                             "least a column `id` :(")
        if model_db is None and "file" not in taxonomy.columns:
            raise ValueError(
                "If no model database is specified you need to pass "
                "file names for models in a `file` column as well.")
        compressed = False
        if model_db is not None:
            compressed = model_db.endswith(".qza") or model_db.endswith(".zip")
            if compressed:
                tdir = TemporaryDirectory(prefix="micom_")
            if "file" in taxonomy.columns:
                del taxonomy["file"]
            if model_db.endswith(".qza"):
                manifest = load_qiime_model_db(model_db, tdir.name)
            elif model_db.endswith(".zip"):
                manifest = load_zip_model_db(model_db, tdir.name)
            else:
                manifest = load_manifest(model_db)
            rank = manifest["summary_rank"][0]
            if rank not in taxonomy.columns:
                raise ValueError("Missing the column `%s` from the taxonomy." %
                                 rank)
            keep_cols = [
                r for r in _ranks[0:(_ranks.index(rank) + 1)]
                if r in taxonomy.columns and r in manifest.columns
            ]
            manifest = manifest[keep_cols + ["file"]]
            merged = pd.merge(taxonomy, manifest, on=keep_cols)

            self.__db_metrics = pd.Series({
                "found_taxa":
                merged.shape[0],
                "total_taxa":
                taxonomy.shape[0],
                "found_fraction":
                merged.shape[0] / taxonomy.shape[0],
                "found_abundance_fraction":
                merged.abundance.sum(),
            })
            logger.info("Matched %g%% of total abundance in model DB." %
                        (100.0 * self.__db_metrics[3]))
            if self.__db_metrics["found_abundance_fraction"] < 0.5:
                logger.warning(
                    "Less than 50%% of the abundance could be matched to the "
                    "model database. Model `%s` may not be representative "
                    "of the sample" % self.id)
            taxonomy = merged
            taxonomy["abundance"] /= taxonomy["abundance"].sum()

        if taxonomy.id.str.contains(r"[^A-Za-z0-9_]", regex=True).any():
            logger.warning("Taxa IDs contain prohibited characters and"
                           " will be reformatted.")
            taxonomy.id = taxonomy.id.replace(r"[^A-Za-z0-9_\s]+",
                                              "_",
                                              regex=True)

        self.__taxonomy = taxonomy
        self.__taxonomy.index = self.__taxonomy.id

        obj = Zero
        self.taxa = []
        index = self.__taxonomy.index
        index = tqdm(index, unit="models") if progress else index
        for idx in index:
            row = self.__taxonomy.loc[idx]
            if isinstance(row.file, list):
                if len(row.file) > 1:
                    model = join_models(row.file)
                    logger.info("joined {} models".format(len(row.file)))
                else:
                    model = load_model(row.file[0])
            else:
                model = load_model(row.file)
            suffix = "__" + idx.replace(" ", "_").strip()
            logger.info("converting IDs for {}".format(idx))
            external = cobra.medium.find_external_compartment(model)
            logger.info("Identified %s as the external compartment for %s. "
                        "If that is wrong you may be in trouble..." %
                        (external, idx))
            for r in model.reactions:
                r.global_id = clean_ids(r.id)
                r.id = r.global_id + suffix
                r.community_id = idx
                # avoids https://github.com/opencobra/cobrapy/issues/926
                r._compartments = None
                # SBO terms may not be maintained
                if "sbo" in r.annotation:
                    del r.annotation["sbo"]
            for m in model.metabolites:
                m.global_id = clean_ids(m.id)
                m.id = m.global_id + suffix
                m.compartment += suffix
                m.community_id = idx
            logger.info("adding reactions for {} to community".format(idx))
            self.add_reactions(model.reactions)
            o = self.solver.interface.Objective.clone(model.objective,
                                                      model=self.solver)
            obj += o.expression * row.abundance
            self.taxa.append(idx)
            taxa_obj = self.problem.Constraint(o.expression,
                                               name="objective_" + idx,
                                               lb=0.0)
            self.add_cons_vars([taxa_obj])
            self.__add_exchanges(
                model.reactions,
                row,
                external_compartment=external,
                internal_exchange=max_exchange,
            )
            self.solver.update()  # to avoid dangling refs due to lazy add

        if compressed:
            tdir.cleanup()
        com_obj = add_var_from_expression(self,
                                          "community_objective",
                                          obj,
                                          lb=0)
        self.objective = self.problem.Objective(com_obj, direction="max")
Exemplo n.º 10
0
def _annotate(f):
    """Get annotation for a model."""
    mod = load_model(f)
    return annotate_metabolites_from_exchanges(mod)