Пример #1
0
def _new_reaction(session, reaction, bigg_id, reaction_hash, model_db_id, model,
                  is_pseudoreaction):
    """Add a new universal reaction with reaction matrix rows."""

    # name is optional in cobra 0.4b2. This will probably change back.
    name = check_none(getattr(reaction, 'name', None))
    reaction_db = Reaction(bigg_id=bigg_id, name=scrub_name(name),
                           reaction_hash=reaction_hash,
                           pseudoreaction=is_pseudoreaction)
    session.add(reaction_db)
    session.commit

    # for each reactant, add to the reaction matrix
    for metabolite, stoich in six.iteritems(reaction.metabolites):
        try:
            component_bigg_id, compartment_bigg_id = parse.split_compartment(metabolite.id)
        except NotFoundError:
            logging.error('Could not split metabolite %s in model %s' % (metabolite.id, model.id))
            continue

        # get the component in the model
        comp_comp_db = (session
                        .query(CompartmentalizedComponent)
                        .join(Component,
                              Component.id == CompartmentalizedComponent.component_id)
                        .join(Compartment,
                              Compartment.id == CompartmentalizedComponent.compartment_id)
                        .join(ModelCompartmentalizedComponent,
                              ModelCompartmentalizedComponent.compartmentalized_component_id == CompartmentalizedComponent.id)
                        .filter(Component.bigg_id == component_bigg_id)
                        .filter(Compartment.bigg_id == compartment_bigg_id)
                        .filter(ModelCompartmentalizedComponent.model_id == model_db_id)
                        .first())
        if comp_comp_db is None:
            logging.error('Could not find metabolite {!s} for model {!s} in the database'
                        .format(metabolite.id, model.id))
            continue

        # check if the reaction matrix row already exists
        found_reaction_matrix = (session
                                .query(ReactionMatrix)
                                .filter(ReactionMatrix.reaction_id == reaction_db.id)
                                .filter(ReactionMatrix.compartmentalized_component_id == comp_comp_db.id)
                                .count() > 0)
        if not found_reaction_matrix:
            new_object = ReactionMatrix(reaction_id=reaction_db.id,
                                        compartmentalized_component_id=comp_comp_db.id,
                                        stoichiometry=stoich)
            session.add(new_object)
        else:
            logging.debug('ReactionMatrix row already present for model {!s} metabolite {!s} reaction {!s}'
                        .format(model.id, metabolite.id, reaction.id))

    return reaction_db
Пример #2
0
def _new_reaction(session, reaction, cobra_id, reaction_hash, model_db_id, model,
                  is_pseudoreaction):
    """Add a new universal reaction with reaction matrix rows."""

    # name is optional in cobra 0.4b2. This will probably change back.
    name = check_none(getattr(reaction, 'name', None))
    reaction_db = Reaction(cobra_id=cobra_id, name=scrub_name(name),
                           reaction_hash=reaction_hash,
                           pseudoreaction=is_pseudoreaction)
    session.add(reaction_db)
    session.commit

    # for each reactant, add to the reaction matrix
    for metabolite, stoich in six.iteritems(reaction.metabolites):
        try:
            component_cobra_id, compartment_cobra_id = parse.split_compartment(metabolite.id)
        except NotFoundError:
            logging.error('Could not split metabolite %s in model %s' % (metabolite.id, model.id))
            continue

        # get the component in the model
        comp_comp_db = (session
                        .query(CompartmentalizedComponent)
                        .join(Component,
                              Component.id == CompartmentalizedComponent.component_id)
                        .join(Compartment,
                              Compartment.id == CompartmentalizedComponent.compartment_id)
                        .join(ModelCompartmentalizedComponent,
                              ModelCompartmentalizedComponent.compartmentalized_component_id == CompartmentalizedComponent.id)
                        .filter(Component.cobra_id == component_cobra_id)
                        .filter(Compartment.cobra_id == compartment_cobra_id)
                        .filter(ModelCompartmentalizedComponent.model_id == model_db_id)
                        .first())
        if comp_comp_db is None:
            logging.error('Could not find metabolite {!s} for model {!s} in the database'
                        .format(metabolite.id, model.id))
            continue

        # check if the reaction matrix row already exists
        found_reaction_matrix = (session
                                .query(ReactionMatrix)
                                .filter(ReactionMatrix.reaction_id == reaction_db.id)
                                .filter(ReactionMatrix.compartmentalized_component_id == comp_comp_db.id)
                                .count() > 0)
        if not found_reaction_matrix:
            new_object = ReactionMatrix(reaction_id=reaction_db.id,
                                        compartmentalized_component_id=comp_comp_db.id,
                                        stoichiometry=stoich)
            session.add(new_object)
        else:
            logging.debug('ReactionMatrix row already present for model {!s} metabolite {!s} reaction {!s}'
                        .format(model.id, metabolite.id, reaction.id))

    return reaction_db
Пример #3
0
def load_genes(session, model_db_id, model, model_db_rxn_ids, old_gene_ids):
    """Load the genes for this model.

    Arguments:
    ---------

    session: An SQLAlchemy session.

    model_db_id: The database ID for the model.

    model: The COBRApy model.

    model_db_rxn_ids: A dictionary with keys for reactions in the model and
    values for the associated ModelReaction.id in the database.

    old_gene_ids: A dictionary where keys are new IDs and values are old IDs for
    genes.

    """
    # only grab this once
    data_source_id = get_or_create_data_source(session, 'old_bigg_id')

    # find the model in the db
    model_db = session.query(Model).get(model_db_id)

    # find the chromosomes in the db
    chromosome_ids = (session
                      .query(Chromosome.id)
                      .filter(Chromosome.genome_id == model_db.genome_id)
                      .all())
    if len(chromosome_ids) == 0:
        logging.warning('No chromosomes for model %s' % model_db.bigg_id)

    # keep track of the gene-reaction associations
    gene_bigg_id_to_model_reaction_db_ids = defaultdict(set)
    for reaction in model.reactions:
        # find the ModelReaction that corresponds to this particular reaction in
        # the model
        model_reaction_db = (session
                             .query(ModelReaction)
                             .get(model_db_rxn_ids[reaction.id]))
        if model_reaction_db is None:
            logging.error('Could not find ModelReaction {} for {} in model {}. Cannot load GeneReactionMatrix entries'
                          .format(model_db_rxn_ids[reaction.id], reaction.id, model.id))
            continue
        for gene in reaction.genes:
            gene_bigg_id_to_model_reaction_db_ids[gene.id].add(model_reaction_db.id)

    # load the genes
    for gene in model.genes:
        if len(chromosome_ids) == 0:
            gene_db = None; is_alternative_transcript = False
        else:
            # find a matching gene
            fns = [_by_bigg_id, _by_name, _by_synonym, _by_alternative_transcript,
                   _by_alternative_transcript_name, _by_alternative_transcript_synonym,
                   _by_bigg_id_no_underscore]
            gene_db, is_alternative_transcript = _match_gene_by_fns(fns, session,
                                                                    gene.id,
                                                                    chromosome_ids)

        if not gene_db:
            # add
            if len(chromosome_ids) > 0:
                logging.warning('Gene not in genbank file: {} from model {}'
                            .format(gene.id, model.id))
            gene_db = Gene(bigg_id=gene.id,
                           # name is optional in cobra 0.4b2. This will probably change back.
                           name=scrub_name(getattr(gene, 'name', None)),
                           mapped_to_genbank=False)
            session.add(gene_db)
            session.commit()

        elif is_alternative_transcript:
            # duplicate gene for the alternative transcript
            old_gene_db = gene_db
            ome_gene = {}
            ome_gene['bigg_id'] = gene.id
            ome_gene['name'] = old_gene_db.name
            ome_gene['leftpos'] = old_gene_db.leftpos
            ome_gene['rightpos'] = old_gene_db.rightpos
            ome_gene['chromosome_id'] = old_gene_db.chromosome_id
            ome_gene['strand'] = old_gene_db.strand
            ome_gene['mapped_to_genbank'] = True
            ome_gene['alternative_transcript_of'] = old_gene_db.id
            gene_db = Gene(**ome_gene)
            session.add(gene_db)
            session.commit()

            # duplicate all the synonyms
            synonyms_db = (session
                           .query(Synonym)
                           .filter(Synonym.ome_id == old_gene_db.id)
                           .all())
            for syn_db in synonyms_db:
                # add a new synonym
                ome_synonym = {}
                ome_synonym['type'] = syn_db.type
                ome_synonym['ome_id'] = gene_db.id
                ome_synonym['synonym'] = syn_db.synonym
                ome_synonym['data_source_id'] = syn_db.data_source_id
                synonym_object = Synonym(**ome_synonym)
                session.add(synonym_object)

        # add model gene
        model_gene_db = (session
                         .query(ModelGene)
                         .filter(ModelGene.gene_id == gene_db.id)
                         .filter(ModelGene.model_id == model_db_id)
                         .first())
        if model_gene_db is None:
            model_gene_db = ModelGene(gene_id=gene_db.id,
                                      model_id=model_db_id)
            session.add(model_gene_db)
            session.commit()

        # add old gene synonym
        for old_bigg_id in old_gene_ids[gene.id]:
            synonym_db = (session
                          .query(Synonym)
                          .filter(Synonym.type == 'gene')
                          .filter(Synonym.ome_id == gene_db.id)
                          .filter(Synonym.synonym == old_bigg_id)
                          .filter(Synonym.data_source_id == data_source_id)
                          .first())
            if synonym_db is None:
                synonym_db = Synonym(type='gene',
                                     ome_id=gene_db.id,
                                     synonym=old_bigg_id,
                                     data_source_id=data_source_id)
                session.add(synonym_db)
                session.commit()
            # add OldIDSynonym
            old_id_db = (session
                         .query(OldIDSynonym)
                         .filter(OldIDSynonym.type == 'model_gene')
                         .filter(OldIDSynonym.ome_id == model_gene_db.id)
                         .filter(OldIDSynonym.synonym_id == synonym_db.id)
                         .first())
            if old_id_db is None:
                old_id_db = OldIDSynonym(type='model_gene',
                                        ome_id=model_gene_db.id,
                                        synonym_id=synonym_db.id)
                session.add(old_id_db)
                session.commit()

        # find model reaction
        try:
            model_reaction_db_ids = gene_bigg_id_to_model_reaction_db_ids[gene.id]
        except KeyError:
            # error message above
            continue

        for mr_db_id in model_reaction_db_ids:
            # add to the GeneReactionMatrix, if not already present
            found_gene_reaction_row = (session
                                       .query(GeneReactionMatrix)
                                       .filter(GeneReactionMatrix.model_gene_id == model_gene_db.id)
                                       .filter(GeneReactionMatrix.model_reaction_id == mr_db_id)
                                       .count() > 0)
            if not found_gene_reaction_row:
                new_object = GeneReactionMatrix(model_gene_id=model_gene_db.id,
                                                model_reaction_id=mr_db_id)
                session.add(new_object)

            # update the gene_reaction_rule if the gene id has changed
            if gene.id != gene_db.bigg_id:
                mr = session.query(ModelReaction).get(mr_db_id)
                new_rule = _replace_gene_str(mr.gene_reaction_rule, gene.id,
                                             gene_db.bigg_id)
                (session
                .query(ModelReaction)
                .filter(ModelReaction.id == mr_db_id)
                .update({ModelReaction.gene_reaction_rule: new_rule}))
Пример #4
0
def load_reactions(session, model_db_id, model, old_reaction_ids,
                   comp_comp_db_ids, final_metabolite_ids):
    """Load the reactions and stoichiometries into the model.

    TODO if the reaction is already loaded, we need to check the stoichometry
    has. If that doesn't match, then add a new reaction with an incremented ID
    (e.g. ACALD_1)

    Arguments
    ---------

    session: An SQLAlchemy session.

    model_db_id: The database ID for the model.

    model: The COBRApy model.

    old_reaction_ids: A dictionary where keys are new IDs and values are old IDs
    for reactions.

    comp_comp_db_ids: A dictionary where keys are the original compartmentalized
    metabolite ids and the values are the database IDs for the compartmentalized
    components.

    final_metabolite_ids: A new dictionary where keys are original
    compartmentalized metabolite IDs from the model and values are the new
    compartmentalized metabolite IDs.

    Returns
    -------

    A dictionary with keys for reaction BiGG IDs in the model and values for the
    associated ModelReaction.id in the database.

    """

    # only grab this once
    data_source_id = get_or_create_data_source(session, 'old_bigg_id')

    # get reaction hash_prefs
    hash_prefs = load_tsv(settings.reaction_hash_prefs)
    def _check_hash_prefs(a_hash, is_pseudoreaction):
        """Return the preferred BiGG ID for a_hash, or None."""
        for row in hash_prefs:
            marked_pseudo = len(row) > 2 and row[2] == 'pseudoreaction'
            if row[0] == a_hash and marked_pseudo == is_pseudoreaction:
                return row[1]
        return None

    # Generate reaction hashes, and find reactions in the same model in opposite
    # directions.
    reaction_hashes = {r.id: parse.hash_reaction(r, final_metabolite_ids)
                       for r in model.reactions}
    reverse_reaction_hashes = {r.id: parse.hash_reaction(r, final_metabolite_ids, reverse=True)
                               for r in model.reactions}
    reverse_reaction_hashes_rev = {v: k for k, v in six.iteritems(reverse_reaction_hashes)}
    reactions_not_to_reverse = set()
    for r_id, h in six.iteritems(reaction_hashes):
        if h in reverse_reaction_hashes_rev:
            reactions_not_to_reverse.add(r_id)
            reactions_not_to_reverse.add(reverse_reaction_hashes_rev[h])

    model_db_rxn_ids = {}
    for reaction in model.reactions:
        # Drop duplicates label
        reaction_id = parse.remove_duplicate_tag(reaction.id)

        # Get the reaction
        reaction_db = (session
                       .query(Reaction)
                       .filter(Reaction.bigg_id == reaction_id)
                       .first())

        # check for pseudoreaction
        is_pseudoreaction = check_pseudoreaction(reaction_id)

        # calculate the hash
        reaction_hash = reaction_hashes[reaction.id]
        hash_db = (session
                   .query(Reaction)
                   .filter(Reaction.reaction_hash == reaction_hash)
                   .filter(Reaction.pseudoreaction == is_pseudoreaction)
                   .first())
        # If there wasn't a match for the forward hash, also check the reverse
        # hash. Do not check reverse hash for reactions with both directions
        # defined in the same model (e.g. SUCDi and FRD7).
        if not hash_db and reaction.id not in reactions_not_to_reverse:
            reverse_hash_db = (session
                               .query(Reaction)
                               .filter(Reaction.reaction_hash == reverse_reaction_hashes[reaction.id])
                               .filter(Reaction.pseudoreaction == is_pseudoreaction)
                               .first())
        else:
            reverse_hash_db = None

        # bigg_id match  hash match b==h  pseudoreaction  example                   function
        #  n               n               n            first GAPD                _new_reaction (1)
        #  n               n               y            first EX_glc_e            _new_reaction (1)
        #  y               n               n            incorrect GAPD            _new_reaction & increment (2)
        #  y               n               y            incorrect EX_glc_e        _new_reaction & increment (2)
        #  n               y               n            GAPDH after GAPD          reaction = hash_reaction (3a)
        #  n               y               y            EX_glc__e after EX_glc_e  reaction = hash_reaction (3a)
        #  y               y         n     n            ?                         reaction = hash_reaction (3a)
        #  y               y         n     y            ?                         reaction = hash_reaction (3a)
        #  y               y         y     n            second GAPD               reaction = bigg_reaction (3b)
        #  y               y         y     y            second EX_glc_e           reaction = bigg_reaction (3b)
        # NOTE: only check pseudoreaction hash against other pseudoreactions
        # 4a and 4b are 3a and 3b with a reversed reaction

        def _find_new_incremented_id(session, original_id):
            """Look for a reaction bigg_id that is not already taken."""
            new_id = increment_id(original_id)
            while True:
                # Check for existing and deprecated reaction ids
                if (session.query(Reaction).filter(Reaction.bigg_id == new_id).first() is None and
                    not _is_deprecated_reaction_id(session, new_id)):
                    return new_id
                new_id = increment_id(new_id)

        # Check for a preferred ID in the preferences, based on the forward
        # hash. Don't check the reverse hash in preferences.
        preferred_id = _check_hash_prefs(reaction_hash, is_pseudoreaction)

        # no reversed by default
        is_reversed = False
        is_new = False

        # (0) If there is a preferred ID, make that the new ID, and increment any old IDs
        if preferred_id is not None:
            # if the reaction already matches, just continue
            if hash_db is not None and hash_db.bigg_id == preferred_id:
                reaction_db = hash_db
            # otherwise, make the new reaction
            else:
                # if existing reactions match the preferred reaction find a new,
                # incremented id for the existing match
                preferred_id_db = session.query(Reaction).filter(Reaction.bigg_id == preferred_id).first()
                if preferred_id_db is not None:
                    new_id = _find_new_incremented_id(session, preferred_id)
                    logging.warning('Incrementing database reaction {} to {} and prefering {} (from model {}) based on hash preferences'
                                .format(preferred_id, new_id, preferred_id, model.id))
                    preferred_id_db.bigg_id = new_id
                    session.commit()

                # make a new reaction for the preferred_id
                reaction_db = _new_reaction(session, reaction, preferred_id,
                                            reaction_hash, model_db_id, model,
                                            is_pseudoreaction, comp_comp_db_ids)
                is_new = True

        # (1) no bigg_id matches, no stoichiometry match or pseudoreaction, then
        # make a new reaction
        elif reaction_db is None and hash_db is None and reverse_hash_db is None:
            # check that the id is not deprecated
            if _is_deprecated_reaction_id(session, reaction.id):
                logging.error(('Keeping bigg_id {} (hash {} - from model {}) '
                               'even though it is on the deprecated ID list. '
                               'You should add it to reaction-hash-prefs.txt')
                              .format(reaction_id, reaction_hash, model.id))
            reaction_db = _new_reaction(session, reaction, reaction_id,
                                        reaction_hash, model_db_id, model,
                                        is_pseudoreaction, comp_comp_db_ids)
            is_new = True

        # (2) bigg_id matches, but not the hash, then increment the BIGG_ID
        elif reaction_db is not None and hash_db is None and reverse_hash_db is None:
            # loop until we find a non-matching find non-matching ID
            new_id = _find_new_incremented_id(session, reaction.id)
            logging.warning('Incrementing bigg_id {} to {} (from model {}) based on conflicting reaction hash'
                        .format(reaction_id, new_id, model.id))
            reaction_db = _new_reaction(session, reaction, new_id,
                                        reaction_hash, model_db_id, model,
                                        is_pseudoreaction, comp_comp_db_ids)
            is_new = True

        # (3) but found a stoichiometry match, then use the hash reaction match.
        elif hash_db is not None:
            # WARNING TODO this requires that loaded metabolites always match on
            # bigg_id, which should be the case.

            # (3a)
            if reaction_db is None or reaction_db.id != hash_db.id:
                reaction_db = hash_db
            # (3b) BIGG ID matches a reaction with the same hash, then just continue
            else:
                pass

        # (4) but found a stoichiometry match, then use the hash reaction match.
        elif reverse_hash_db is not None:
            # WARNING TODO this requires that loaded metabolites always match on
            # bigg_id, which should be the case.

            # Remember to switch upper and lower bounds
            is_reversed = True
            logging.info('Matched {} to {} based on reverse hash'
                         .format(reaction_id, reverse_hash_db.bigg_id))

            # (4a)
            if reaction_db is None or reaction_db.id != reverse_hash_db.id:
                reaction_db = reverse_hash_db
            # (4b) BIGG ID matches a reaction with the same hash, then just continue
            else:
                pass

        else:
            raise Exception('Should not get here')

        # If the reaction is not new, consider improving the descriptive name
        if not is_new:
            new_name = scrub_name(check_none(getattr(reaction, 'name', None)))
            improve_name(session, reaction_db, new_name)

        # Add reaction to deprecated ID list if necessary
        if reaction_db.bigg_id != reaction_id:
            get_or_create(session, DeprecatedID, deprecated_id=reaction_id,
                          type='reaction', ome_id=reaction_db.id)

        # If the reaction is reversed, then switch upper and lower bound
        lower_bound = -reaction.upper_bound if is_reversed else reaction.lower_bound
        upper_bound = -reaction.lower_bound if is_reversed else reaction.upper_bound

        # subsystem
        subsystem = check_none(reaction.subsystem.strip())

        # get the model reaction
        model_reaction_db = (session
                             .query(ModelReaction)
                             .filter(ModelReaction.reaction_id == reaction_db.id)
                             .filter(ModelReaction.model_id == model_db_id)
                             .filter(ModelReaction.lower_bound == lower_bound)
                             .filter(ModelReaction.upper_bound == upper_bound)
                             .filter(ModelReaction.gene_reaction_rule == reaction.gene_reaction_rule)
                             .filter(ModelReaction.objective_coefficient == reaction.objective_coefficient)
                             .filter(ModelReaction.subsystem == subsystem)
                             .first())
        if model_reaction_db is None:
            # get the number of existing copies of this reaction in the model
            copy_number = (session
                           .query(ModelReaction)
                           .filter(ModelReaction.reaction_id == reaction_db.id)
                           .filter(ModelReaction.model_id == model_db_id)
                           .count()) + 1
            # make a new reaction
            model_reaction_db = ModelReaction(model_id=model_db_id,
                                              reaction_id=reaction_db.id,
                                              gene_reaction_rule=reaction.gene_reaction_rule,
                                              original_gene_reaction_rule=reaction.gene_reaction_rule,
                                              upper_bound=upper_bound,
                                              lower_bound=lower_bound,
                                              objective_coefficient=reaction.objective_coefficient,
                                              copy_number=copy_number,
                                              subsystem=subsystem)
            session.add(model_reaction_db)
            session.commit()

        # remember the changed ids
        model_db_rxn_ids[reaction.id] = model_reaction_db.id

        # add synonyms
        #
        # get the id from the published model
        for old_bigg_id in old_reaction_ids[reaction.id]:
            # add a synonym
            synonym_db = (session
                          .query(Synonym)
                          .filter(Synonym.type == 'reaction')
                          .filter(Synonym.ome_id == reaction_db.id)
                          .filter(Synonym.synonym == old_bigg_id)
                          .filter(Synonym.data_source_id == data_source_id)
                          .first())
            if synonym_db is None:
                synonym_db = Synonym(type='reaction',
                                     ome_id=reaction_db.id,
                                     synonym=old_bigg_id,
                                     data_source_id=data_source_id)
                session.add(synonym_db)
                session.commit()

            # add OldIDSynonym
            old_id_db = (session
                         .query(OldIDSynonym)
                         .filter(OldIDSynonym.type == 'model_reaction')
                         .filter(OldIDSynonym.ome_id == model_reaction_db.id)
                         .filter(OldIDSynonym.synonym_id == synonym_db.id)
                         .first())
            if old_id_db is None:
                old_id_db = OldIDSynonym(type='model_reaction',
                                         ome_id=model_reaction_db.id,
                                         synonym_id=synonym_db.id)
                session.add(old_id_db)
                session.commit()

    return model_db_rxn_ids
Пример #5
0
def load_metabolites(session, model_id, model, compartment_names,
                     old_metabolite_ids):
    """Load the metabolites as components and model components.

    Arguments:
    ---------

    session: An SQLAlchemy session.

    model_id: The database ID for the model.

    model: The COBRApy model.

    old_metabolite_ids: A dictionary where keys are new IDs and values are old
    IDs for compartmentalized metabolites.

    Returns
    -------

    comp_comp_db_ids: A dictionary where keys are the original compartmentalized
    metabolite ids and the values are the database IDs for the compartmentalized
    components.

    final_metabolite_ids: A new dictionary where keys are original
    compartmentalized metabolite IDs from the model and values are the new
    compartmentalized metabolite IDs.

    """
    comp_comp_db_ids = {}
    final_metabolite_ids = {}

    # only grab this once
    data_source_id = get_or_create_data_source(session, 'old_bigg_id')

    # get metabolite id duplicates
    met_dups = load_tsv(settings.metabolite_duplicates)
    def _check_metabolite_duplicates(bigg_id):
        """Return a new ID if there is a preferred ID, otherwise None."""
        for row in met_dups:
            if bigg_id in row[1:]:
                return row[0]
        return None

    # for each metabolite in the model
    for metabolite in model.metabolites:
        metabolite_id = parse.remove_duplicate_tag(metabolite.id)

        try:
            component_bigg_id, compartment_bigg_id = parse.split_compartment(metabolite_id)
        except Exception:
            logging.error(('Could not find compartment for metabolite %s in'
                            'model %s' % (metabolite_id, model.id)))
            continue

        preferred = _check_metabolite_duplicates(component_bigg_id)
        new_bigg_id = preferred if preferred else component_bigg_id

        # look for the formula in these places
        formula_fns = [lambda m: getattr(m, 'formula', None), # support cobra v0.3 and 0.4
                       lambda m: m.notes.get('FORMULA', None),
                       lambda m: m.notes.get('FORMULA1', None)]
        # Cast to string, but not for None
        strip_str_or_none = lambda v: str(v).strip() if v is not None else None
        # Ignore the empty string
        ignore_empty_str = lambda s: s if s != '' else None
        # Use a generator for lazy evaluation
        values = (ignore_empty_str(strip_str_or_none(formula_fn(metabolite)))
                  for formula_fn in formula_fns)
        # Get the first non-null result. Otherwise _formula = None.
        _formula = format_formula(next(filter(None, values), None))
        # Check for non-valid formulas
        if parse.invalid_formula(_formula):
            logging.warning('Invalid formula %s for metabolite %s in model %s' % (_formula, metabolite_id, model.id))
            _formula = None

        # get charge
        try:
            charge = int(metabolite.charge)
            # check for float charge
            if charge != metabolite.charge:
                logging.warning('Could not load charge {} for {} in model {}'
                             .format(metabolite.charge, metabolite_id, model.id))
                charge = None
        except Exception:
            if hasattr(metabolite, 'charge') and metabolite.charge is not None:
                logging.debug('Could not convert charge to integer for metabolite {} in model {}: {}'
                              .format(metabolite_id, model.id, metabolite.charge))
            charge = None

        # If there is no metabolite, add a new one.
        metabolite_db = (session
                         .query(Component)
                         .filter(Component.bigg_id == new_bigg_id)
                         .first())

        # if necessary, add the new metabolite, and keep track of the ID
        new_name = scrub_name(getattr(metabolite, 'name', None))
        if metabolite_db is None:
            # make the new metabolite
            metabolite_db = Component(bigg_id=new_bigg_id, name=new_name)
            session.add(metabolite_db)
            session.commit()
        else:
            # If the metabolite is not new, consider improving the descriptive name
            improve_name(session, metabolite_db, new_name)

        # add the deprecated id if necessary
        if metabolite_db.bigg_id != component_bigg_id:
            get_or_create(session, DeprecatedID, deprecated_id=component_bigg_id,
                          type='component', ome_id=metabolite_db.id)

        # if there is no compartment, add a new one
        compartment_db = (session
                          .query(Compartment)
                          .filter(Compartment.bigg_id == compartment_bigg_id)
                          .first())
        if compartment_db is None:
            try:
                name = compartment_names[compartment_bigg_id]
            except KeyError:
                logging.warning('No name found for compartment %s' % compartment_bigg_id)
                name = ''
            compartment_db = Compartment(bigg_id=compartment_bigg_id, name=name)
            session.add(compartment_db)
            session.commit()

        # if there is no compartmentalized compartment, add a new one
        comp_component_db = (session
                             .query(CompartmentalizedComponent)
                             .filter(CompartmentalizedComponent.component_id == metabolite_db.id)
                             .filter(CompartmentalizedComponent.compartment_id == compartment_db.id)
                             .first())
        if comp_component_db is None:
            comp_component_db = CompartmentalizedComponent(component_id=metabolite_db.id,
                                                           compartment_id=compartment_db.id)
            session.add(comp_component_db)
            session.commit()

        # remember for adding the reaction
        comp_comp_db_ids[metabolite.id] = comp_component_db.id
        final_metabolite_ids[metabolite.id] = '%s_%s' % (new_bigg_id, compartment_bigg_id)

        # if there is no model compartmentalized compartment, add a new one
        model_comp_comp_db = (session
                              .query(ModelCompartmentalizedComponent)
                              .filter(ModelCompartmentalizedComponent.compartmentalized_component_id == comp_component_db.id)
                              .filter(ModelCompartmentalizedComponent.model_id == model_id)
                              .first())
        if model_comp_comp_db is None:
            model_comp_comp_db = ModelCompartmentalizedComponent(model_id=model_id,
                                                                 compartmentalized_component_id=comp_component_db.id,
                                                                 formula=_formula,
                                                                 charge=charge)
            session.add(model_comp_comp_db)
            session.commit()
        else:
            if model_comp_comp_db.formula is None:
                model_comp_comp_db.formula = _formula
            if model_comp_comp_db.charge is None:
                model_comp_comp_db.charge = charge
            session.commit()

        # add synonyms
        for old_bigg_id_c in old_metabolite_ids[metabolite.id]:
            # Add Synonym and  OldIDSynonym
            synonym_db = (session
                          .query(Synonym)
                          .filter(Synonym.type == 'compartmentalized_component')
                          .filter(Synonym.ome_id == comp_component_db.id)
                          .filter(Synonym.synonym == old_bigg_id_c)
                          .filter(Synonym.data_source_id == data_source_id)
                          .first())
            if synonym_db is None:
                synonym_db = Synonym(type='compartmentalized_component',
                                     ome_id=comp_component_db.id,
                                     synonym=old_bigg_id_c,
                                     data_source_id=data_source_id)
                session.add(synonym_db)
                session.commit()
            old_id_db = (session
                         .query(OldIDSynonym)
                         .filter(OldIDSynonym.type == 'model_compartmentalized_component')
                         .filter(OldIDSynonym.ome_id == model_comp_comp_db.id)
                         .filter(OldIDSynonym.synonym_id == synonym_db.id)
                         .first())
            if old_id_db is None:
                old_id_db = OldIDSynonym(type='model_compartmentalized_component',
                                         ome_id=model_comp_comp_db.id,
                                         synonym_id=synonym_db.id)
                session.add(old_id_db)
                session.commit()

            # Also add Synonym and OldIDSynonym for the universal metabolite
            try:
                new_style_id = parse.id_for_new_id_style(
                    parse.fix_legacy_id(old_bigg_id_c, use_hyphens=False),
                    is_metabolite=True
                )
                old_bigg_id_c_without_compartment = parse.split_compartment(new_style_id)[0]
            except Exception as e:
                logging.warning(e.message)
            else:
                synonym_db_2 = (session
                                .query(Synonym)
                                .filter(Synonym.type == 'component')
                                .filter(Synonym.ome_id == metabolite_db.id)
                                .filter(Synonym.synonym == old_bigg_id_c_without_compartment)
                                .filter(Synonym.data_source_id == data_source_id)
                                .first())
                if synonym_db_2 is None:
                    synonym_db_2 = Synonym(type='component',
                                        ome_id=metabolite_db.id,
                                        synonym=old_bigg_id_c_without_compartment,
                                        data_source_id=data_source_id)
                    session.add(synonym_db_2)
                    session.commit()
                old_id_db = (session
                            .query(OldIDSynonym)
                            .filter(OldIDSynonym.type == 'model_compartmentalized_component')
                            .filter(OldIDSynonym.ome_id == model_comp_comp_db.id)
                            .filter(OldIDSynonym.synonym_id == synonym_db_2.id)
                            .first())
                if old_id_db is None:
                    old_id_db = OldIDSynonym(type='model_compartmentalized_component',
                                            ome_id=model_comp_comp_db.id,
                                            synonym_id=synonym_db_2.id)
                    session.add(old_id_db)
                    session.commit()

    return comp_comp_db_ids, final_metabolite_ids
Пример #6
0
def load_genes(session, model_db_id, model, model_db_rxn_ids, old_gene_ids):
    """Load the genes for this model.

    Arguments:
    ---------

    session: An SQLAlchemy session.

    model_db_id: The database ID for the model.

    model: The COBRApy model.

    model_db_rxn_ids: A dictionary with keys for reactions in the model and
    values for the associated ModelReaction.id in the database.

    old_gene_ids: A dictionary where keys are new IDs and values are old IDs for
    genes.

    """
    # only grab this once
    data_source_id = get_or_create_data_source(session, 'old_bigg_id')

    # find the model in the db
    model_db = session.query(Model).get(model_db_id)

    # find the chromosomes in the db
    chromosome_ids = (session
                      .query(Chromosome.id)
                      .filter(Chromosome.genome_id == model_db.genome_id)
                      .all())
    if len(chromosome_ids) == 0:
        logging.warn('No chromosomes for model %s' % model_db.bigg_id)

    # keep track of the gene-reaction associations
    gene_bigg_id_to_model_reaction_db_ids = defaultdict(set)
    for reaction in model.reactions:
        # find the ModelReaction that corresponds to this particular reaction in
        # the model
        model_reaction_db = (session
                             .query(ModelReaction)
                             .get(model_db_rxn_ids[reaction.id]))
        if model_reaction_db is None:
            logging.error('Could not find ModelReaction {} for {} in model {}. Cannot load GeneReactionMatrix entries'
                          .format(model_db_rxn_ids[reaction.id], reaction.id, model.id))
            continue
        for gene in reaction.genes:
            gene_bigg_id_to_model_reaction_db_ids[gene.id].add(model_reaction_db.id)

    # load the genes
    for gene in model.genes:
        if len(chromosome_ids) == 0:
            gene_db = None; is_alternative_transcript = False
        else:
            # find a matching gene
            fns = [_by_bigg_id, _by_name, _by_synonym, _by_alternative_transcript,
                   _by_alternative_transcript_name, _by_alternative_transcript_synonym,
                   _by_bigg_id_no_underscore]
            gene_db, is_alternative_transcript = _match_gene_by_fns(fns, session,
                                                                    gene.id,
                                                                    chromosome_ids)

        if not gene_db:
            # add
            if len(chromosome_ids) > 0:
                logging.warn('Gene not in genbank file: {} from model {}'
                            .format(gene.id, model.id))
            gene_db = Gene(bigg_id=gene.id,
                           # name is optional in cobra 0.4b2. This will probably change back.
                           name=scrub_name(getattr(gene, 'name', None)),
                           mapped_to_genbank=False)
            session.add(gene_db)
            session.commit()

        elif is_alternative_transcript:
            # duplicate gene for the alternative transcript
            old_gene_db = gene_db
            ome_gene = {}
            ome_gene['bigg_id'] = gene.id
            ome_gene['name'] = old_gene_db.name
            ome_gene['leftpos'] = old_gene_db.leftpos
            ome_gene['rightpos'] = old_gene_db.rightpos
            ome_gene['chromosome_id'] = old_gene_db.chromosome_id
            ome_gene['strand'] = old_gene_db.strand
            ome_gene['mapped_to_genbank'] = True
            ome_gene['alternative_transcript_of'] = old_gene_db.id
            gene_db = Gene(**ome_gene)
            session.add(gene_db)
            session.commit()

            # duplicate all the synonyms
            synonyms_db = (session
                           .query(Synonym)
                           .filter(Synonym.ome_id == old_gene_db.id)
                           .all())
            for syn_db in synonyms_db:
                # add a new synonym
                ome_synonym = {}
                ome_synonym['type'] = syn_db.type
                ome_synonym['ome_id'] = gene_db.id
                ome_synonym['synonym'] = syn_db.synonym
                ome_synonym['data_source_id'] = syn_db.data_source_id
                synonym_object = Synonym(**ome_synonym)
                session.add(synonym_object)

        # add model gene
        model_gene_db = (session
                         .query(ModelGene)
                         .filter(ModelGene.gene_id == gene_db.id)
                         .filter(ModelGene.model_id == model_db_id)
                         .first())
        if model_gene_db is None:
            model_gene_db = ModelGene(gene_id=gene_db.id,
                                      model_id=model_db_id)
            session.add(model_gene_db)
            session.commit()

        # add old gene synonym
        for old_bigg_id in old_gene_ids[gene.id]:
            synonym_db = (session
                          .query(Synonym)
                          .filter(Synonym.type == 'gene')
                          .filter(Synonym.ome_id == gene_db.id)
                          .filter(Synonym.synonym == old_bigg_id)
                          .filter(Synonym.data_source_id == data_source_id)
                          .first())
            if synonym_db is None:
                synonym_db = Synonym(type='gene',
                                     ome_id=gene_db.id,
                                     synonym=old_bigg_id,
                                     data_source_id=data_source_id)
                session.add(synonym_db)
                session.commit()
            # add OldIDSynonym
            old_id_db = (session
                         .query(OldIDSynonym)
                         .filter(OldIDSynonym.type == 'model_gene')
                         .filter(OldIDSynonym.ome_id == model_gene_db.id)
                         .filter(OldIDSynonym.synonym_id == synonym_db.id)
                         .first())
            if old_id_db is None:
                old_id_db = OldIDSynonym(type='model_gene',
                                        ome_id=model_gene_db.id,
                                        synonym_id=synonym_db.id)
                session.add(old_id_db)
                session.commit()

        # find model reaction
        try:
            model_reaction_db_ids = gene_bigg_id_to_model_reaction_db_ids[gene.id]
        except KeyError:
            # error message above
            continue

        for mr_db_id in model_reaction_db_ids:
            # add to the GeneReactionMatrix, if not already present
            found_gene_reaction_row = (session
                                       .query(GeneReactionMatrix)
                                       .filter(GeneReactionMatrix.model_gene_id == model_gene_db.id)
                                       .filter(GeneReactionMatrix.model_reaction_id == mr_db_id)
                                       .count() > 0)
            if not found_gene_reaction_row:
                new_object = GeneReactionMatrix(model_gene_id=model_gene_db.id,
                                                model_reaction_id=mr_db_id)
                session.add(new_object)

            # update the gene_reaction_rule if the gene id has changed
            if gene.id != gene_db.bigg_id:
                mr = session.query(ModelReaction).get(mr_db_id)
                new_rule = _replace_gene_str(mr.gene_reaction_rule, gene.id,
                                             gene_db.bigg_id)
                (session
                .query(ModelReaction)
                .filter(ModelReaction.id == mr_db_id)
                .update({ModelReaction.gene_reaction_rule: new_rule}))
Пример #7
0
def load_reactions(session, model_db_id, model, old_reaction_ids,
                   comp_comp_db_ids, final_metabolite_ids):
    """Load the reactions and stoichiometries into the model.

    TODO if the reaction is already loaded, we need to check the stoichometry
    has. If that doesn't match, then add a new reaction with an incremented ID
    (e.g. ACALD_1)

    Arguments
    ---------

    session: An SQLAlchemy session.

    model_db_id: The database ID for the model.

    model: The COBRApy model.

    old_reaction_ids: A dictionary where keys are new IDs and values are old IDs
    for reactions.

    comp_comp_db_ids: A dictionary where keys are the original compartmentalized
    metabolite ids and the values are the database IDs for the compartmentalized
    components.

    final_metabolite_ids: A new dictionary where keys are original
    compartmentalized metabolite IDs from the model and values are the new
    compartmentalized metabolite IDs.

    Returns
    -------

    A dictionary with keys for reaction BiGG IDs in the model and values for the
    associated ModelReaction.id in the database.

    """

    # only grab this once
    data_source_id = get_or_create_data_source(session, 'old_bigg_id')

    # get reaction hash_prefs
    hash_prefs = load_tsv(settings.reaction_hash_prefs)
    def _check_hash_prefs(a_hash, is_pseudoreaction):
        """Return the preferred BiGG ID for a_hash, or None."""
        for row in hash_prefs:
            marked_pseudo = len(row) > 2 and row[2] == 'pseudoreaction'
            if row[0] == a_hash and marked_pseudo == is_pseudoreaction:
                return row[1]
        return None

    # Generate reaction hashes, and find reactions in the same model in opposite
    # directions.
    reaction_hashes = {r.id: parse.hash_reaction(r, final_metabolite_ids)
                       for r in model.reactions}
    reverse_reaction_hashes = {r.id: parse.hash_reaction(r, final_metabolite_ids, reverse=True)
                               for r in model.reactions}
    reverse_reaction_hashes_rev = {v: k for k, v in six.iteritems(reverse_reaction_hashes)}
    reactions_not_to_reverse = set()
    for r_id, h in six.iteritems(reaction_hashes):
        if h in reverse_reaction_hashes_rev:
            reactions_not_to_reverse.add(r_id)
            reactions_not_to_reverse.add(reverse_reaction_hashes_rev[h])

    model_db_rxn_ids = {}
    for reaction in model.reactions:
        # Drop duplicates label
        reaction_id = parse.remove_duplicate_tag(reaction.id)

        # Get the reaction
        reaction_db = (session
                       .query(Reaction)
                       .filter(Reaction.bigg_id == reaction_id)
                       .first())

        # check for pseudoreaction
        is_pseudoreaction = check_pseudoreaction(reaction_id)

        # calculate the hash
        reaction_hash = reaction_hashes[reaction.id]
        hash_db = (session
                   .query(Reaction)
                   .filter(Reaction.reaction_hash == reaction_hash)
                   .filter(Reaction.pseudoreaction == is_pseudoreaction)
                   .first())
        # If there wasn't a match for the forward hash, also check the reverse
        # hash. Do not check reverse hash for reactions with both directions
        # defined in the same model (e.g. SUCDi and FRD7).
        if not hash_db and reaction.id not in reactions_not_to_reverse:
            reverse_hash_db = (session
                               .query(Reaction)
                               .filter(Reaction.reaction_hash == reverse_reaction_hashes[reaction.id])
                               .filter(Reaction.pseudoreaction == is_pseudoreaction)
                               .first())
        else:
            reverse_hash_db = None

        # bigg_id match  hash match b==h  pseudoreaction  example                   function
        #  n               n               n            first GAPD                _new_reaction (1)
        #  n               n               y            first EX_glc_e            _new_reaction (1)
        #  y               n               n            incorrect GAPD            _new_reaction & increment (2)
        #  y               n               y            incorrect EX_glc_e        _new_reaction & increment (2)
        #  n               y               n            GAPDH after GAPD          reaction = hash_reaction (3a)
        #  n               y               y            EX_glc__e after EX_glc_e  reaction = hash_reaction (3a)
        #  y               y         n     n            ?                         reaction = hash_reaction (3a)
        #  y               y         n     y            ?                         reaction = hash_reaction (3a)
        #  y               y         y     n            second GAPD               reaction = bigg_reaction (3b)
        #  y               y         y     y            second EX_glc_e           reaction = bigg_reaction (3b)
        # NOTE: only check pseudoreaction hash against other pseudoreactions
        # 4a and 4b are 3a and 3b with a reversed reaction

        def _find_new_incremented_id(session, original_id):
            """Look for a reaction bigg_id that is not already taken."""
            new_id = increment_id(original_id)
            while True:
                # Check for existing and deprecated reaction ids
                if (session.query(Reaction).filter(Reaction.bigg_id == new_id).first() is None and
                    not _is_deprecated_reaction_id(session, new_id)):
                    return new_id
                new_id = increment_id(new_id)

        # Check for a preferred ID in the preferences, based on the forward
        # hash. Don't check the reverse hash in preferences.
        preferred_id = _check_hash_prefs(reaction_hash, is_pseudoreaction)

        # no reversed by default
        is_reversed = False
        is_new = False

        # (0) If there is a preferred ID, make that the new ID, and increment any old IDs
        if preferred_id is not None:
            # if the reaction already matches, just continue
            if hash_db is not None and hash_db.bigg_id == preferred_id:
                reaction_db = hash_db
            # otherwise, make the new reaction
            else:
                # if existing reactions match the preferred reaction find a new,
                # incremented id for the existing match
                preferred_id_db = session.query(Reaction).filter(Reaction.bigg_id == preferred_id).first()
                if preferred_id_db is not None:
                    new_id = _find_new_incremented_id(session, preferred_id)
                    logging.warn('Incrementing database reaction {} to {} and prefering {} (from model {}) based on hash preferences'
                                .format(preferred_id, new_id, preferred_id, model.id))
                    preferred_id_db.bigg_id = new_id
                    session.commit()

                # make a new reaction for the preferred_id
                reaction_db = _new_reaction(session, reaction, preferred_id,
                                            reaction_hash, model_db_id, model,
                                            is_pseudoreaction, comp_comp_db_ids)
                is_new = True

        # (1) no bigg_id matches, no stoichiometry match or pseudoreaction, then
        # make a new reaction
        elif reaction_db is None and hash_db is None and reverse_hash_db is None:
            # check that the id is not deprecated
            if _is_deprecated_reaction_id(session, reaction.id):
                logging.error(('Keeping bigg_id {} (hash {} - from model {}) '
                               'even though it is on the deprecated ID list. '
                               'You should add it to reaction-hash-prefs.txt')
                              .format(reaction_id, reaction_hash, model.id))
            reaction_db = _new_reaction(session, reaction, reaction_id,
                                        reaction_hash, model_db_id, model,
                                        is_pseudoreaction, comp_comp_db_ids)
            is_new = True

        # (2) bigg_id matches, but not the hash, then increment the BIGG_ID
        elif reaction_db is not None and hash_db is None and reverse_hash_db is None:
            # loop until we find a non-matching find non-matching ID
            new_id = _find_new_incremented_id(session, reaction.id)
            logging.warn('Incrementing bigg_id {} to {} (from model {}) based on conflicting reaction hash'
                        .format(reaction_id, new_id, model.id))
            reaction_db = _new_reaction(session, reaction, new_id,
                                        reaction_hash, model_db_id, model,
                                        is_pseudoreaction, comp_comp_db_ids)
            is_new = True

        # (3) but found a stoichiometry match, then use the hash reaction match.
        elif hash_db is not None:
            # WARNING TODO this requires that loaded metabolites always match on
            # bigg_id, which should be the case.

            # (3a)
            if reaction_db is None or reaction_db.id != hash_db.id:
                reaction_db = hash_db
            # (3b) BIGG ID matches a reaction with the same hash, then just continue
            else:
                pass

        # (4) but found a stoichiometry match, then use the hash reaction match.
        elif reverse_hash_db is not None:
            # WARNING TODO this requires that loaded metabolites always match on
            # bigg_id, which should be the case.

            # Remember to switch upper and lower bounds
            is_reversed = True
            logging.info('Matched {} to {} based on reverse hash'
                         .format(reaction_id, reverse_hash_db.bigg_id))

            # (4a)
            if reaction_db is None or reaction_db.id != reverse_hash_db.id:
                reaction_db = reverse_hash_db
            # (4b) BIGG ID matches a reaction with the same hash, then just continue
            else:
                pass

        else:
            raise Exception('Should not get here')

        # If the reaction is not new, consider improving the descriptive name
        if not is_new:
            new_name = scrub_name(check_none(getattr(reaction, 'name', None)))
            improve_name(session, reaction_db, new_name)

        # Add reaction to deprecated ID list if necessary
        if reaction_db.bigg_id != reaction_id:
            get_or_create(session, DeprecatedID, deprecated_id=reaction_id,
                          type='reaction', ome_id=reaction_db.id)

        # If the reaction is reversed, then switch upper and lower bound
        lower_bound = -reaction.upper_bound if is_reversed else reaction.lower_bound
        upper_bound = -reaction.lower_bound if is_reversed else reaction.upper_bound

        # subsystem
        subsystem = check_none(reaction.subsystem.strip())

        # get the model reaction
        model_reaction_db = (session
                             .query(ModelReaction)
                             .filter(ModelReaction.reaction_id == reaction_db.id)
                             .filter(ModelReaction.model_id == model_db_id)
                             .filter(ModelReaction.lower_bound == lower_bound)
                             .filter(ModelReaction.upper_bound == upper_bound)
                             .filter(ModelReaction.gene_reaction_rule == reaction.gene_reaction_rule)
                             .filter(ModelReaction.objective_coefficient == reaction.objective_coefficient)
                             .filter(ModelReaction.subsystem == subsystem)
                             .first())
        if model_reaction_db is None:
            # get the number of existing copies of this reaction in the model
            copy_number = (session
                           .query(ModelReaction)
                           .filter(ModelReaction.reaction_id == reaction_db.id)
                           .filter(ModelReaction.model_id == model_db_id)
                           .count()) + 1
            # make a new reaction
            model_reaction_db = ModelReaction(model_id=model_db_id,
                                              reaction_id=reaction_db.id,
                                              gene_reaction_rule=reaction.gene_reaction_rule,
                                              original_gene_reaction_rule=reaction.gene_reaction_rule,
                                              upper_bound=upper_bound,
                                              lower_bound=lower_bound,
                                              objective_coefficient=reaction.objective_coefficient,
                                              copy_number=copy_number,
                                              subsystem=subsystem)
            session.add(model_reaction_db)
            session.commit()

        # remember the changed ids
        model_db_rxn_ids[reaction.id] = model_reaction_db.id

        # add synonyms
        #
        # get the id from the published model
        for old_bigg_id in old_reaction_ids[reaction.id]:
            # add a synonym
            synonym_db = (session
                          .query(Synonym)
                          .filter(Synonym.type == 'reaction')
                          .filter(Synonym.ome_id == reaction_db.id)
                          .filter(Synonym.synonym == old_bigg_id)
                          .filter(Synonym.data_source_id == data_source_id)
                          .first())
            if synonym_db is None:
                synonym_db = Synonym(type='reaction',
                                     ome_id=reaction_db.id,
                                     synonym=old_bigg_id,
                                     data_source_id=data_source_id)
                session.add(synonym_db)
                session.commit()

            # add OldIDSynonym
            old_id_db = (session
                         .query(OldIDSynonym)
                         .filter(OldIDSynonym.type == 'model_reaction')
                         .filter(OldIDSynonym.ome_id == model_reaction_db.id)
                         .filter(OldIDSynonym.synonym_id == synonym_db.id)
                         .first())
            if old_id_db is None:
                old_id_db = OldIDSynonym(type='model_reaction',
                                         ome_id=model_reaction_db.id,
                                         synonym_id=synonym_db.id)
                session.add(old_id_db)
                session.commit()

    return model_db_rxn_ids
Пример #8
0
def load_metabolites(session, model_id, model, compartment_names,
                     old_metabolite_ids):
    """Load the metabolites as components and model components.

    Arguments:
    ---------

    session: An SQLAlchemy session.

    model_id: The database ID for the model.

    model: The COBRApy model.

    old_metabolite_ids: A dictionary where keys are new IDs and values are old
    IDs for compartmentalized metabolites.

    Returns
    -------

    comp_comp_db_ids: A dictionary where keys are the original compartmentalized
    metabolite ids and the values are the database IDs for the compartmentalized
    components.

    final_metabolite_ids: A new dictionary where keys are original
    compartmentalized metabolite IDs from the model and values are the new
    compartmentalized metabolite IDs.

    """
    comp_comp_db_ids = {}
    final_metabolite_ids = {}

    # only grab this once
    data_source_id = get_or_create_data_source(session, 'old_bigg_id')

    # get metabolite id duplicates
    met_dups = load_tsv(settings.metabolite_duplicates)
    def _check_metabolite_duplicates(bigg_id):
        """Return a new ID if there is a preferred ID, otherwise None."""
        for row in met_dups:
            if bigg_id in row[1:]:
                return row[0]
        return None

    # for each metabolite in the model
    for metabolite in model.metabolites:
        metabolite_id = parse.remove_duplicate_tag(metabolite.id)

        try:
            component_bigg_id, compartment_bigg_id = parse.split_compartment(metabolite_id)
        except Exception:
            logging.error(('Could not find compartment for metabolite %s in'
                            'model %s' % (metabolite_id, model.id)))
            continue

        preferred = _check_metabolite_duplicates(component_bigg_id)
        new_bigg_id = preferred if preferred else component_bigg_id

        # look for the formula in these places
        formula_fns = [lambda m: getattr(m, 'formula', None), # support cobra v0.3 and 0.4
                       lambda m: m.notes.get('FORMULA', None),
                       lambda m: m.notes.get('FORMULA1', None)]
        # Cast to string, but not for None
        strip_str_or_none = lambda v: str(v).strip() if v is not None else None
        # Ignore the empty string
        ignore_empty_str = lambda s: s if s != '' else None
        # Use a generator for lazy evaluation
        values = (ignore_empty_str(strip_str_or_none(formula_fn(metabolite)))
                  for formula_fn in formula_fns)
        # Get the first non-null result. Otherwise _formula = None.
        _formula = format_formula(next(filter(None, values), None))
        # Check for non-valid formulas
        if parse.invalid_formula(_formula):
            logging.warn('Invalid formula %s for metabolite %s in model %s' % (_formula, metabolite_id, model.id))
            _formula = None

        # get charge
        try:
            charge = int(metabolite.charge)
            # check for float charge
            if charge != metabolite.charge:
                logging.warn('Could not load charge {} for {} in model {}'
                             .format(metabolite.charge, metabolite_id, model.id))
                charge = None
        except Exception:
            if hasattr(metabolite, 'charge') and metabolite.charge is not None:
                logging.debug('Could not convert charge to integer for metabolite {} in model {}: {}'
                              .format(metabolite_id, model.id, metabolite.charge))
            charge = None

        # If there is no metabolite, add a new one.
        metabolite_db = (session
                         .query(Component)
                         .filter(Component.bigg_id == new_bigg_id)
                         .first())

        # if necessary, add the new metabolite, and keep track of the ID
        new_name = scrub_name(getattr(metabolite, 'name', None))
        if metabolite_db is None:
            # make the new metabolite
            metabolite_db = Component(bigg_id=new_bigg_id, name=new_name)
            session.add(metabolite_db)
            session.commit()
        else:
            # If the metabolite is not new, consider improving the descriptive name
            improve_name(session, metabolite_db, new_name)

        # add the deprecated id if necessary
        if metabolite_db.bigg_id != component_bigg_id:
            get_or_create(session, DeprecatedID, deprecated_id=component_bigg_id,
                          type='component', ome_id=metabolite_db.id)

        # if there is no compartment, add a new one
        compartment_db = (session
                          .query(Compartment)
                          .filter(Compartment.bigg_id == compartment_bigg_id)
                          .first())
        if compartment_db is None:
            try:
                name = compartment_names[compartment_bigg_id]
            except KeyError:
                logging.warn('No name found for compartment %s' % compartment_bigg_id)
                name = ''
            compartment_db = Compartment(bigg_id=compartment_bigg_id, name=name)
            session.add(compartment_db)
            session.commit()

        # if there is no compartmentalized compartment, add a new one
        comp_component_db = (session
                             .query(CompartmentalizedComponent)
                             .filter(CompartmentalizedComponent.component_id == metabolite_db.id)
                             .filter(CompartmentalizedComponent.compartment_id == compartment_db.id)
                             .first())
        if comp_component_db is None:
            comp_component_db = CompartmentalizedComponent(component_id=metabolite_db.id,
                                                           compartment_id=compartment_db.id)
            session.add(comp_component_db)
            session.commit()

        # remember for adding the reaction
        comp_comp_db_ids[metabolite.id] = comp_component_db.id
        final_metabolite_ids[metabolite.id] = '%s_%s' % (new_bigg_id, compartment_bigg_id)

        # if there is no model compartmentalized compartment, add a new one
        model_comp_comp_db = (session
                              .query(ModelCompartmentalizedComponent)
                              .filter(ModelCompartmentalizedComponent.compartmentalized_component_id == comp_component_db.id)
                              .filter(ModelCompartmentalizedComponent.model_id == model_id)
                              .first())
        if model_comp_comp_db is None:
            model_comp_comp_db = ModelCompartmentalizedComponent(model_id=model_id,
                                                                 compartmentalized_component_id=comp_component_db.id,
                                                                 formula=_formula,
                                                                 charge=charge)
            session.add(model_comp_comp_db)
            session.commit()
        else:
            if model_comp_comp_db.formula is None:
                model_comp_comp_db.formula = _formula
            if model_comp_comp_db.charge is None:
                model_comp_comp_db.charge = charge
            session.commit()

        # add synonyms
        for old_bigg_id_c in old_metabolite_ids[metabolite.id]:
            # Add Synonym and  OldIDSynonym
            synonym_db = (session
                          .query(Synonym)
                          .filter(Synonym.type == 'compartmentalized_component')
                          .filter(Synonym.ome_id == comp_component_db.id)
                          .filter(Synonym.synonym == old_bigg_id_c)
                          .filter(Synonym.data_source_id == data_source_id)
                          .first())
            if synonym_db is None:
                synonym_db = Synonym(type='compartmentalized_component',
                                     ome_id=comp_component_db.id,
                                     synonym=old_bigg_id_c,
                                     data_source_id=data_source_id)
                session.add(synonym_db)
                session.commit()
            old_id_db = (session
                         .query(OldIDSynonym)
                         .filter(OldIDSynonym.type == 'model_compartmentalized_component')
                         .filter(OldIDSynonym.ome_id == model_comp_comp_db.id)
                         .filter(OldIDSynonym.synonym_id == synonym_db.id)
                         .first())
            if old_id_db is None:
                old_id_db = OldIDSynonym(type='model_compartmentalized_component',
                                         ome_id=model_comp_comp_db.id,
                                         synonym_id=synonym_db.id)
                session.add(old_id_db)
                session.commit()

            # Also add Synonym and OldIDSynonym for the universal metabolite
            try:
                new_style_id = parse.id_for_new_id_style(
                    parse.fix_legacy_id(old_bigg_id_c, use_hyphens=False),
                    is_metabolite=True
                )
                old_bigg_id_c_without_compartment = parse.split_compartment(new_style_id)[0]
            except Exception as e:
                logging.warn(e.message)
            else:
                synonym_db_2 = (session
                                .query(Synonym)
                                .filter(Synonym.type == 'component')
                                .filter(Synonym.ome_id == metabolite_db.id)
                                .filter(Synonym.synonym == old_bigg_id_c_without_compartment)
                                .filter(Synonym.data_source_id == data_source_id)
                                .first())
                if synonym_db_2 is None:
                    synonym_db_2 = Synonym(type='component',
                                        ome_id=metabolite_db.id,
                                        synonym=old_bigg_id_c_without_compartment,
                                        data_source_id=data_source_id)
                    session.add(synonym_db_2)
                    session.commit()
                old_id_db = (session
                            .query(OldIDSynonym)
                            .filter(OldIDSynonym.type == 'model_compartmentalized_component')
                            .filter(OldIDSynonym.ome_id == model_comp_comp_db.id)
                            .filter(OldIDSynonym.synonym_id == synonym_db_2.id)
                            .first())
                if old_id_db is None:
                    old_id_db = OldIDSynonym(type='model_compartmentalized_component',
                                            ome_id=model_comp_comp_db.id,
                                            synonym_id=synonym_db_2.id)
                    session.add(old_id_db)
                    session.commit()

    return comp_comp_db_ids, final_metabolite_ids
Пример #9
0
def load_metabolites(session, model_id, model, compartment_names,
                     old_metabolite_ids):
    """Load the metabolites as components and model components.

    Arguments:
    ---------

    session: An SQLAlchemy session.

    model_id: The database ID for the model.

    model: The COBRApy model.

    old_metabolite_ids: A dictionary where keys are new IDs and values are old
    IDs for compartmentalized metabolites.

    """

    # only grab this once
    data_source_id = get_or_create_data_source(session, 'old_bigg_id')

    # for each metabolite in the model
    for metabolite in model.metabolites:
        try:
            component_bigg_id, compartment_bigg_id = parse.split_compartment(metabolite.id)
        except Exception:
            logging.error(('Could not find compartment for metabolite %s in'
                            'model %s' % (metabolite.id, model.id)))
            continue

        # If there is no metabolite, add a new one.
        # TODO we could also double check these ID matches with linkouts and formula
        metabolite_db = (session
                         .query(Metabolite)
                         .filter(Metabolite.bigg_id == component_bigg_id)
                         .first())

        # Look for the formula in these places
        formula_fns = [lambda m: getattr(m, 'formula', None), # support cobra v0.3 and 0.4
                       lambda m: m.notes.get('FORMULA', None),
                       lambda m: m.notes.get('FORMULA1', None)]
        # Cast to string, but not for None
        strip_str_or_none = lambda v: str(v).strip() if v is not None else None
        # Ignore the empty string
        ignore_empty_str = lambda s: s if s != '' else None
        # Use a generator for lazy evaluation
        values = (ignore_empty_str(strip_str_or_none(formula_fn(metabolite)))
                  for formula_fn in formula_fns)
        # Get the first non-null result. Otherwise _formula = None.
        _formula = format_formula(next(filter(None, values), None))

        # get charge
        try:
            charge = int(metabolite.charge)
        except Exception:
            if hasattr(metabolite, 'charge') and metabolite.charge is not None:
                logging.debug('Could not convert charge to integer for metabolite {} in model {}: {}'
                              .format(metabolite.id, model.id, metabolite.charge))
            charge = None

        # if necessary, add the new metabolite, and keep track of the ID
        if metabolite_db is None:
            # make the new metabolite
            metabolite_db = Metabolite(bigg_id=component_bigg_id,
                                       name=scrub_name(getattr(metabolite, 'name', None)))
            session.add(metabolite_db)
            session.commit()

        # load the linkouts for the universal metabolite
        # _load_metabolite_linkouts(session, metabolite, metabolite_db.id)

        # if there is no compartment, add a new one
        compartment_db = (session
                          .query(Compartment)
                          .filter(Compartment.bigg_id == compartment_bigg_id)
                          .first())
        if compartment_db is None:
            try:
                name = compartment_names[compartment_bigg_id]
            except KeyError:
                logging.warn('No name found for compartment %s' % compartment_bigg_id)
                name = ''
            compartment_db = Compartment(bigg_id=compartment_bigg_id, name=name)
            session.add(compartment_db)
            session.commit()

        # if there is no compartmentalized compartment, add a new one
        comp_component_db = (session
                             .query(CompartmentalizedComponent)
                             .filter(CompartmentalizedComponent.component_id == metabolite_db.id)
                             .filter(CompartmentalizedComponent.compartment_id == compartment_db.id)
                             .first())
        if comp_component_db is None:
            comp_component_db = CompartmentalizedComponent(component_id=metabolite_db.id,
                                                           compartment_id=compartment_db.id)
            session.add(comp_component_db)
            session.commit()

        # if there is no model compartmentalized compartment, add a new one
        model_comp_comp_db = (session
                              .query(ModelCompartmentalizedComponent)
                              .filter(ModelCompartmentalizedComponent.compartmentalized_component_id == comp_component_db.id)
                              .filter(ModelCompartmentalizedComponent.model_id == model_id)
                              .first())
        if model_comp_comp_db is None:
            model_comp_comp_db = ModelCompartmentalizedComponent(model_id=model_id,
                                                                 compartmentalized_component_id=comp_component_db.id,
                                                                 formula=_formula,
                                                                 charge=charge)
            session.add(model_comp_comp_db)
            session.commit()
        else:
            if model_comp_comp_db.formula is None:
                model_comp_comp_db.formula = _formula
            if model_comp_comp_db.charge is None:
                model_comp_comp_db.charge = charge
            session.commit()

        # add synonyms
        for old_bigg_id_c in old_metabolite_ids[metabolite.id]:
            synonym_db = (session
                          .query(Synonym)
                          .filter(Synonym.type == 'compartmentalized_component')
                          .filter(Synonym.ome_id == comp_component_db.id)
                          .filter(Synonym.synonym == old_bigg_id_c)
                          .filter(Synonym.data_source_id == data_source_id)
                          .first())
            if synonym_db is None:
                synonym_db = Synonym(type='compartmentalized_component',
                                     ome_id=comp_component_db.id,
                                     synonym=old_bigg_id_c,
                                     data_source_id=data_source_id)
                session.add(synonym_db)
                session.commit()

            # add OldIDSynonym
            old_id_db = (session
                         .query(OldIDSynonym)
                         .filter(OldIDSynonym.type == 'model_compartmentalized_component')
                         .filter(OldIDSynonym.ome_id == model_comp_comp_db.id)
                         .filter(OldIDSynonym.synonym_id == synonym_db.id)
                         .first())
            if old_id_db is None:
                old_id_db = OldIDSynonym(type='model_compartmentalized_component',
                                         ome_id=model_comp_comp_db.id,
                                         synonym_id=synonym_db.id)
                session.add(old_id_db)
                session.commit()
Пример #10
0
def load_metabolites(session, model_id, model, compartment_names,
                     old_metabolite_ids):
    """Load the metabolites as components and model components.

    Arguments:
    ---------

    session: An SQLAlchemy session.

    model_id: The database ID for the model.

    model: The COBRApy model.

    old_metabolite_ids: A dictionary where keys are new IDs and values are old
    IDs for compartmentalized metabolites.

    """

    # only grab this once
    data_source_id = get_or_create_data_source(session, 'old_cobra_id')

    # for each metabolite in the model
    for metabolite in model.metabolites:
        try:
            component_cobra_id, compartment_cobra_id = parse.split_compartment(metabolite.id)
        except Exception:
            logging.error(('Could not find compartment for metabolite %s in'
                            'model %s' % (metabolite.id, model.id)))
            continue

        # If there is no metabolite, add a new one.
        # TODO we could also double check these ID matches with linkouts and formula
        metabolite_db = (session
                         .query(Metabolite)
                         .filter(Metabolite.cobra_id == component_cobra_id)
                         .first())

        # Look for the formula in these places
        formula_fns = [lambda m: getattr(m, 'formula', None), # support cobra v0.3 and 0.4
                       lambda m: m.notes.get('FORMULA', None),
                       lambda m: m.notes.get('FORMULA1', None)]
        # Cast to string, but not for None
        strip_str_or_none = lambda v: str(v).strip() if v is not None else None
        # Ignore the empty string
        ignore_empty_str = lambda s: s if s != '' else None
        # Use a generator for lazy evaluation
        values = (ignore_empty_str(strip_str_or_none(formula_fn(metabolite)))
                  for formula_fn in formula_fns)
        # Get the first non-null result. Otherwise _formula = None.
        _formula = format_formula(next(filter(None, values), None))

        # get charge
        try:
            charge = int(metabolite.charge)
        except Exception:
            if hasattr(metabolite, 'charge') and metabolite.charge is not None:
                logging.debug('Could not convert charge to integer for metabolite {} in model {}: {}'
                              .format(metabolite.id, model.id, metabolite.charge))
            charge = None

        # if necessary, add the new metabolite, and keep track of the ID
        if metabolite_db is None:
            # make the new metabolite
            metabolite_db = Metabolite(cobra_id=component_cobra_id,
                                       name=scrub_name(getattr(metabolite, 'name', None)))
            session.add(metabolite_db)
            session.commit()

        # load the linkouts for the universal metabolite
        # _load_metabolite_linkouts(session, metabolite, metabolite_db.id)

        # if there is no compartment, add a new one
        compartment_db = (session
                          .query(Compartment)
                          .filter(Compartment.cobra_id == compartment_cobra_id)
                          .first())
        if compartment_db is None:
            try:
                name = compartment_names[compartment_cobra_id]
            except KeyError:
                logging.warn('No name found for compartment %s' % compartment_cobra_id)
                name = ''
            compartment_db = Compartment(cobra_id=compartment_cobra_id, name=name)
            session.add(compartment_db)
            session.commit()

        # if there is no compartmentalized compartment, add a new one
        comp_component_db = (session
                             .query(CompartmentalizedComponent)
                             .filter(CompartmentalizedComponent.component_id == metabolite_db.id)
                             .filter(CompartmentalizedComponent.compartment_id == compartment_db.id)
                             .first())
        if comp_component_db is None:
            comp_component_db = CompartmentalizedComponent(component_id=metabolite_db.id,
                                                           compartment_id=compartment_db.id)
            session.add(comp_component_db)
            session.commit()

        # if there is no model compartmentalized compartment, add a new one
        model_comp_comp_db = (session
                              .query(ModelCompartmentalizedComponent)
                              .filter(ModelCompartmentalizedComponent.compartmentalized_component_id == comp_component_db.id)
                              .filter(ModelCompartmentalizedComponent.model_id == model_id)
                              .first())
        if model_comp_comp_db is None:
            model_comp_comp_db = ModelCompartmentalizedComponent(model_id=model_id,
                                                                 compartmentalized_component_id=comp_component_db.id,
                                                                 formula=_formula,
                                                                 charge=charge)
            session.add(model_comp_comp_db)
            session.commit()
        else:
            if model_comp_comp_db.formula is None:
                model_comp_comp_db.formula = _formula
            if model_comp_comp_db.charge is None:
                model_comp_comp_db.charge = charge
            session.commit()

        # add synonyms
        for old_cobra_id_c in old_metabolite_ids[metabolite.id]:
            synonym_db = (session
                          .query(Synonym)
                          .filter(Synonym.type == 'compartmentalized_component')
                          .filter(Synonym.ome_id == comp_component_db.id)
                          .filter(Synonym.synonym == old_cobra_id_c)
                          .filter(Synonym.data_source_id == data_source_id)
                          .first())
            if synonym_db is None:
                synonym_db = Synonym(type='compartmentalized_component',
                                     ome_id=comp_component_db.id,
                                     synonym=old_cobra_id_c,
                                     data_source_id=data_source_id)
                session.add(synonym_db)
                session.commit()

            # add OldIDSynonym
            old_id_db = (session
                         .query(OldIDSynonym)
                         .filter(OldIDSynonym.type == 'model_compartmentalized_component')
                         .filter(OldIDSynonym.ome_id == model_comp_comp_db.id)
                         .filter(OldIDSynonym.synonym_id == synonym_db.id)
                         .first())
            if old_id_db is None:
                old_id_db = OldIDSynonym(type='model_compartmentalized_component',
                                         ome_id=model_comp_comp_db.id,
                                         synonym_id=synonym_db.id)
                session.add(old_id_db)
                session.commit()