Пример #1
0
def load_gene_synonym(session, gene_db, synonym, data_source_id):
    """Load the synonym for this gene from the given genome."""
    data_source_id = get_or_create_data_source(session, data_source_id)
    synonym_db, _ = get_or_create(session, Synonym,
                                  type='gene',
                                  ome_id=gene_db.id,
                                  synonym=synonym,
                                  data_source_id=data_source_id)
    return synonym_db.id
Пример #2
0
def load_gene_synonym(session, gene_db, synonym, data_source_id):
    """Load the synonym for this gene from the given genome."""
    data_source_id = get_or_create_data_source(session, data_source_id)
    synonym_db, _ = get_or_create(session,
                                  Synonym,
                                  type='gene',
                                  ome_id=gene_db.id,
                                  synonym=synonym,
                                  data_source_id=data_source_id)
    return synonym_db.id
Пример #3
0
    def __init__(self, name, data_source_id=None, group_name=None, attributes=None):

        session = Session()
        if data_source_id is None:
            data_source, exists = get_or_create(session, DataSource, cobra_id='-1',
                                                name='generic', url_prefix='')
            data_source_id = data_source.id
        session.close()

        self.name = name
        self.data_source_id = data_source_id
        self.group_name = group_name
        self.attributes = attributes
Пример #4
0
def load_reactions(session, model_db_id, model, old_reaction_ids,
                   comp_comp_db_ids, final_metabolite_ids):
    """Load the reactions and stoichiometries into the model.

    TODO if the reaction is already loaded, we need to check the stoichometry
    has. If that doesn't match, then add a new reaction with an incremented ID
    (e.g. ACALD_1)

    Arguments
    ---------

    session: An SQLAlchemy session.

    model_db_id: The database ID for the model.

    model: The COBRApy model.

    old_reaction_ids: A dictionary where keys are new IDs and values are old IDs
    for reactions.

    comp_comp_db_ids: A dictionary where keys are the original compartmentalized
    metabolite ids and the values are the database IDs for the compartmentalized
    components.

    final_metabolite_ids: A new dictionary where keys are original
    compartmentalized metabolite IDs from the model and values are the new
    compartmentalized metabolite IDs.

    Returns
    -------

    A dictionary with keys for reaction BiGG IDs in the model and values for the
    associated ModelReaction.id in the database.

    """

    # only grab this once
    data_source_id = get_or_create_data_source(session, 'old_bigg_id')

    # get reaction hash_prefs
    hash_prefs = load_tsv(settings.reaction_hash_prefs)
    def _check_hash_prefs(a_hash, is_pseudoreaction):
        """Return the preferred BiGG ID for a_hash, or None."""
        for row in hash_prefs:
            marked_pseudo = len(row) > 2 and row[2] == 'pseudoreaction'
            if row[0] == a_hash and marked_pseudo == is_pseudoreaction:
                return row[1]
        return None

    # Generate reaction hashes, and find reactions in the same model in opposite
    # directions.
    reaction_hashes = {r.id: parse.hash_reaction(r, final_metabolite_ids)
                       for r in model.reactions}
    reverse_reaction_hashes = {r.id: parse.hash_reaction(r, final_metabolite_ids, reverse=True)
                               for r in model.reactions}
    reverse_reaction_hashes_rev = {v: k for k, v in six.iteritems(reverse_reaction_hashes)}
    reactions_not_to_reverse = set()
    for r_id, h in six.iteritems(reaction_hashes):
        if h in reverse_reaction_hashes_rev:
            reactions_not_to_reverse.add(r_id)
            reactions_not_to_reverse.add(reverse_reaction_hashes_rev[h])

    model_db_rxn_ids = {}
    for reaction in model.reactions:
        # Drop duplicates label
        reaction_id = parse.remove_duplicate_tag(reaction.id)

        # Get the reaction
        reaction_db = (session
                       .query(Reaction)
                       .filter(Reaction.bigg_id == reaction_id)
                       .first())

        # check for pseudoreaction
        is_pseudoreaction = check_pseudoreaction(reaction_id)

        # calculate the hash
        reaction_hash = reaction_hashes[reaction.id]
        hash_db = (session
                   .query(Reaction)
                   .filter(Reaction.reaction_hash == reaction_hash)
                   .filter(Reaction.pseudoreaction == is_pseudoreaction)
                   .first())
        # If there wasn't a match for the forward hash, also check the reverse
        # hash. Do not check reverse hash for reactions with both directions
        # defined in the same model (e.g. SUCDi and FRD7).
        if not hash_db and reaction.id not in reactions_not_to_reverse:
            reverse_hash_db = (session
                               .query(Reaction)
                               .filter(Reaction.reaction_hash == reverse_reaction_hashes[reaction.id])
                               .filter(Reaction.pseudoreaction == is_pseudoreaction)
                               .first())
        else:
            reverse_hash_db = None

        # bigg_id match  hash match b==h  pseudoreaction  example                   function
        #  n               n               n            first GAPD                _new_reaction (1)
        #  n               n               y            first EX_glc_e            _new_reaction (1)
        #  y               n               n            incorrect GAPD            _new_reaction & increment (2)
        #  y               n               y            incorrect EX_glc_e        _new_reaction & increment (2)
        #  n               y               n            GAPDH after GAPD          reaction = hash_reaction (3a)
        #  n               y               y            EX_glc__e after EX_glc_e  reaction = hash_reaction (3a)
        #  y               y         n     n            ?                         reaction = hash_reaction (3a)
        #  y               y         n     y            ?                         reaction = hash_reaction (3a)
        #  y               y         y     n            second GAPD               reaction = bigg_reaction (3b)
        #  y               y         y     y            second EX_glc_e           reaction = bigg_reaction (3b)
        # NOTE: only check pseudoreaction hash against other pseudoreactions
        # 4a and 4b are 3a and 3b with a reversed reaction

        def _find_new_incremented_id(session, original_id):
            """Look for a reaction bigg_id that is not already taken."""
            new_id = increment_id(original_id)
            while True:
                # Check for existing and deprecated reaction ids
                if (session.query(Reaction).filter(Reaction.bigg_id == new_id).first() is None and
                    not _is_deprecated_reaction_id(session, new_id)):
                    return new_id
                new_id = increment_id(new_id)

        # Check for a preferred ID in the preferences, based on the forward
        # hash. Don't check the reverse hash in preferences.
        preferred_id = _check_hash_prefs(reaction_hash, is_pseudoreaction)

        # no reversed by default
        is_reversed = False
        is_new = False

        # (0) If there is a preferred ID, make that the new ID, and increment any old IDs
        if preferred_id is not None:
            # if the reaction already matches, just continue
            if hash_db is not None and hash_db.bigg_id == preferred_id:
                reaction_db = hash_db
            # otherwise, make the new reaction
            else:
                # if existing reactions match the preferred reaction find a new,
                # incremented id for the existing match
                preferred_id_db = session.query(Reaction).filter(Reaction.bigg_id == preferred_id).first()
                if preferred_id_db is not None:
                    new_id = _find_new_incremented_id(session, preferred_id)
                    logging.warning('Incrementing database reaction {} to {} and prefering {} (from model {}) based on hash preferences'
                                .format(preferred_id, new_id, preferred_id, model.id))
                    preferred_id_db.bigg_id = new_id
                    session.commit()

                # make a new reaction for the preferred_id
                reaction_db = _new_reaction(session, reaction, preferred_id,
                                            reaction_hash, model_db_id, model,
                                            is_pseudoreaction, comp_comp_db_ids)
                is_new = True

        # (1) no bigg_id matches, no stoichiometry match or pseudoreaction, then
        # make a new reaction
        elif reaction_db is None and hash_db is None and reverse_hash_db is None:
            # check that the id is not deprecated
            if _is_deprecated_reaction_id(session, reaction.id):
                logging.error(('Keeping bigg_id {} (hash {} - from model {}) '
                               'even though it is on the deprecated ID list. '
                               'You should add it to reaction-hash-prefs.txt')
                              .format(reaction_id, reaction_hash, model.id))
            reaction_db = _new_reaction(session, reaction, reaction_id,
                                        reaction_hash, model_db_id, model,
                                        is_pseudoreaction, comp_comp_db_ids)
            is_new = True

        # (2) bigg_id matches, but not the hash, then increment the BIGG_ID
        elif reaction_db is not None and hash_db is None and reverse_hash_db is None:
            # loop until we find a non-matching find non-matching ID
            new_id = _find_new_incremented_id(session, reaction.id)
            logging.warning('Incrementing bigg_id {} to {} (from model {}) based on conflicting reaction hash'
                        .format(reaction_id, new_id, model.id))
            reaction_db = _new_reaction(session, reaction, new_id,
                                        reaction_hash, model_db_id, model,
                                        is_pseudoreaction, comp_comp_db_ids)
            is_new = True

        # (3) but found a stoichiometry match, then use the hash reaction match.
        elif hash_db is not None:
            # WARNING TODO this requires that loaded metabolites always match on
            # bigg_id, which should be the case.

            # (3a)
            if reaction_db is None or reaction_db.id != hash_db.id:
                reaction_db = hash_db
            # (3b) BIGG ID matches a reaction with the same hash, then just continue
            else:
                pass

        # (4) but found a stoichiometry match, then use the hash reaction match.
        elif reverse_hash_db is not None:
            # WARNING TODO this requires that loaded metabolites always match on
            # bigg_id, which should be the case.

            # Remember to switch upper and lower bounds
            is_reversed = True
            logging.info('Matched {} to {} based on reverse hash'
                         .format(reaction_id, reverse_hash_db.bigg_id))

            # (4a)
            if reaction_db is None or reaction_db.id != reverse_hash_db.id:
                reaction_db = reverse_hash_db
            # (4b) BIGG ID matches a reaction with the same hash, then just continue
            else:
                pass

        else:
            raise Exception('Should not get here')

        # If the reaction is not new, consider improving the descriptive name
        if not is_new:
            new_name = scrub_name(check_none(getattr(reaction, 'name', None)))
            improve_name(session, reaction_db, new_name)

        # Add reaction to deprecated ID list if necessary
        if reaction_db.bigg_id != reaction_id:
            get_or_create(session, DeprecatedID, deprecated_id=reaction_id,
                          type='reaction', ome_id=reaction_db.id)

        # If the reaction is reversed, then switch upper and lower bound
        lower_bound = -reaction.upper_bound if is_reversed else reaction.lower_bound
        upper_bound = -reaction.lower_bound if is_reversed else reaction.upper_bound

        # subsystem
        subsystem = check_none(reaction.subsystem.strip())

        # get the model reaction
        model_reaction_db = (session
                             .query(ModelReaction)
                             .filter(ModelReaction.reaction_id == reaction_db.id)
                             .filter(ModelReaction.model_id == model_db_id)
                             .filter(ModelReaction.lower_bound == lower_bound)
                             .filter(ModelReaction.upper_bound == upper_bound)
                             .filter(ModelReaction.gene_reaction_rule == reaction.gene_reaction_rule)
                             .filter(ModelReaction.objective_coefficient == reaction.objective_coefficient)
                             .filter(ModelReaction.subsystem == subsystem)
                             .first())
        if model_reaction_db is None:
            # get the number of existing copies of this reaction in the model
            copy_number = (session
                           .query(ModelReaction)
                           .filter(ModelReaction.reaction_id == reaction_db.id)
                           .filter(ModelReaction.model_id == model_db_id)
                           .count()) + 1
            # make a new reaction
            model_reaction_db = ModelReaction(model_id=model_db_id,
                                              reaction_id=reaction_db.id,
                                              gene_reaction_rule=reaction.gene_reaction_rule,
                                              original_gene_reaction_rule=reaction.gene_reaction_rule,
                                              upper_bound=upper_bound,
                                              lower_bound=lower_bound,
                                              objective_coefficient=reaction.objective_coefficient,
                                              copy_number=copy_number,
                                              subsystem=subsystem)
            session.add(model_reaction_db)
            session.commit()

        # remember the changed ids
        model_db_rxn_ids[reaction.id] = model_reaction_db.id

        # add synonyms
        #
        # get the id from the published model
        for old_bigg_id in old_reaction_ids[reaction.id]:
            # add a synonym
            synonym_db = (session
                          .query(Synonym)
                          .filter(Synonym.type == 'reaction')
                          .filter(Synonym.ome_id == reaction_db.id)
                          .filter(Synonym.synonym == old_bigg_id)
                          .filter(Synonym.data_source_id == data_source_id)
                          .first())
            if synonym_db is None:
                synonym_db = Synonym(type='reaction',
                                     ome_id=reaction_db.id,
                                     synonym=old_bigg_id,
                                     data_source_id=data_source_id)
                session.add(synonym_db)
                session.commit()

            # add OldIDSynonym
            old_id_db = (session
                         .query(OldIDSynonym)
                         .filter(OldIDSynonym.type == 'model_reaction')
                         .filter(OldIDSynonym.ome_id == model_reaction_db.id)
                         .filter(OldIDSynonym.synonym_id == synonym_db.id)
                         .first())
            if old_id_db is None:
                old_id_db = OldIDSynonym(type='model_reaction',
                                         ome_id=model_reaction_db.id,
                                         synonym_id=synonym_db.id)
                session.add(old_id_db)
                session.commit()

    return model_db_rxn_ids
Пример #5
0
def load_metabolites(session, model_id, model, compartment_names,
                     old_metabolite_ids):
    """Load the metabolites as components and model components.

    Arguments:
    ---------

    session: An SQLAlchemy session.

    model_id: The database ID for the model.

    model: The COBRApy model.

    old_metabolite_ids: A dictionary where keys are new IDs and values are old
    IDs for compartmentalized metabolites.

    Returns
    -------

    comp_comp_db_ids: A dictionary where keys are the original compartmentalized
    metabolite ids and the values are the database IDs for the compartmentalized
    components.

    final_metabolite_ids: A new dictionary where keys are original
    compartmentalized metabolite IDs from the model and values are the new
    compartmentalized metabolite IDs.

    """
    comp_comp_db_ids = {}
    final_metabolite_ids = {}

    # only grab this once
    data_source_id = get_or_create_data_source(session, 'old_bigg_id')

    # get metabolite id duplicates
    met_dups = load_tsv(settings.metabolite_duplicates)
    def _check_metabolite_duplicates(bigg_id):
        """Return a new ID if there is a preferred ID, otherwise None."""
        for row in met_dups:
            if bigg_id in row[1:]:
                return row[0]
        return None

    # for each metabolite in the model
    for metabolite in model.metabolites:
        metabolite_id = parse.remove_duplicate_tag(metabolite.id)

        try:
            component_bigg_id, compartment_bigg_id = parse.split_compartment(metabolite_id)
        except Exception:
            logging.error(('Could not find compartment for metabolite %s in'
                            'model %s' % (metabolite_id, model.id)))
            continue

        preferred = _check_metabolite_duplicates(component_bigg_id)
        new_bigg_id = preferred if preferred else component_bigg_id

        # look for the formula in these places
        formula_fns = [lambda m: getattr(m, 'formula', None), # support cobra v0.3 and 0.4
                       lambda m: m.notes.get('FORMULA', None),
                       lambda m: m.notes.get('FORMULA1', None)]
        # Cast to string, but not for None
        strip_str_or_none = lambda v: str(v).strip() if v is not None else None
        # Ignore the empty string
        ignore_empty_str = lambda s: s if s != '' else None
        # Use a generator for lazy evaluation
        values = (ignore_empty_str(strip_str_or_none(formula_fn(metabolite)))
                  for formula_fn in formula_fns)
        # Get the first non-null result. Otherwise _formula = None.
        _formula = format_formula(next(filter(None, values), None))
        # Check for non-valid formulas
        if parse.invalid_formula(_formula):
            logging.warning('Invalid formula %s for metabolite %s in model %s' % (_formula, metabolite_id, model.id))
            _formula = None

        # get charge
        try:
            charge = int(metabolite.charge)
            # check for float charge
            if charge != metabolite.charge:
                logging.warning('Could not load charge {} for {} in model {}'
                             .format(metabolite.charge, metabolite_id, model.id))
                charge = None
        except Exception:
            if hasattr(metabolite, 'charge') and metabolite.charge is not None:
                logging.debug('Could not convert charge to integer for metabolite {} in model {}: {}'
                              .format(metabolite_id, model.id, metabolite.charge))
            charge = None

        # If there is no metabolite, add a new one.
        metabolite_db = (session
                         .query(Component)
                         .filter(Component.bigg_id == new_bigg_id)
                         .first())

        # if necessary, add the new metabolite, and keep track of the ID
        new_name = scrub_name(getattr(metabolite, 'name', None))
        if metabolite_db is None:
            # make the new metabolite
            metabolite_db = Component(bigg_id=new_bigg_id, name=new_name)
            session.add(metabolite_db)
            session.commit()
        else:
            # If the metabolite is not new, consider improving the descriptive name
            improve_name(session, metabolite_db, new_name)

        # add the deprecated id if necessary
        if metabolite_db.bigg_id != component_bigg_id:
            get_or_create(session, DeprecatedID, deprecated_id=component_bigg_id,
                          type='component', ome_id=metabolite_db.id)

        # if there is no compartment, add a new one
        compartment_db = (session
                          .query(Compartment)
                          .filter(Compartment.bigg_id == compartment_bigg_id)
                          .first())
        if compartment_db is None:
            try:
                name = compartment_names[compartment_bigg_id]
            except KeyError:
                logging.warning('No name found for compartment %s' % compartment_bigg_id)
                name = ''
            compartment_db = Compartment(bigg_id=compartment_bigg_id, name=name)
            session.add(compartment_db)
            session.commit()

        # if there is no compartmentalized compartment, add a new one
        comp_component_db = (session
                             .query(CompartmentalizedComponent)
                             .filter(CompartmentalizedComponent.component_id == metabolite_db.id)
                             .filter(CompartmentalizedComponent.compartment_id == compartment_db.id)
                             .first())
        if comp_component_db is None:
            comp_component_db = CompartmentalizedComponent(component_id=metabolite_db.id,
                                                           compartment_id=compartment_db.id)
            session.add(comp_component_db)
            session.commit()

        # remember for adding the reaction
        comp_comp_db_ids[metabolite.id] = comp_component_db.id
        final_metabolite_ids[metabolite.id] = '%s_%s' % (new_bigg_id, compartment_bigg_id)

        # if there is no model compartmentalized compartment, add a new one
        model_comp_comp_db = (session
                              .query(ModelCompartmentalizedComponent)
                              .filter(ModelCompartmentalizedComponent.compartmentalized_component_id == comp_component_db.id)
                              .filter(ModelCompartmentalizedComponent.model_id == model_id)
                              .first())
        if model_comp_comp_db is None:
            model_comp_comp_db = ModelCompartmentalizedComponent(model_id=model_id,
                                                                 compartmentalized_component_id=comp_component_db.id,
                                                                 formula=_formula,
                                                                 charge=charge)
            session.add(model_comp_comp_db)
            session.commit()
        else:
            if model_comp_comp_db.formula is None:
                model_comp_comp_db.formula = _formula
            if model_comp_comp_db.charge is None:
                model_comp_comp_db.charge = charge
            session.commit()

        # add synonyms
        for old_bigg_id_c in old_metabolite_ids[metabolite.id]:
            # Add Synonym and  OldIDSynonym
            synonym_db = (session
                          .query(Synonym)
                          .filter(Synonym.type == 'compartmentalized_component')
                          .filter(Synonym.ome_id == comp_component_db.id)
                          .filter(Synonym.synonym == old_bigg_id_c)
                          .filter(Synonym.data_source_id == data_source_id)
                          .first())
            if synonym_db is None:
                synonym_db = Synonym(type='compartmentalized_component',
                                     ome_id=comp_component_db.id,
                                     synonym=old_bigg_id_c,
                                     data_source_id=data_source_id)
                session.add(synonym_db)
                session.commit()
            old_id_db = (session
                         .query(OldIDSynonym)
                         .filter(OldIDSynonym.type == 'model_compartmentalized_component')
                         .filter(OldIDSynonym.ome_id == model_comp_comp_db.id)
                         .filter(OldIDSynonym.synonym_id == synonym_db.id)
                         .first())
            if old_id_db is None:
                old_id_db = OldIDSynonym(type='model_compartmentalized_component',
                                         ome_id=model_comp_comp_db.id,
                                         synonym_id=synonym_db.id)
                session.add(old_id_db)
                session.commit()

            # Also add Synonym and OldIDSynonym for the universal metabolite
            try:
                new_style_id = parse.id_for_new_id_style(
                    parse.fix_legacy_id(old_bigg_id_c, use_hyphens=False),
                    is_metabolite=True
                )
                old_bigg_id_c_without_compartment = parse.split_compartment(new_style_id)[0]
            except Exception as e:
                logging.warning(e.message)
            else:
                synonym_db_2 = (session
                                .query(Synonym)
                                .filter(Synonym.type == 'component')
                                .filter(Synonym.ome_id == metabolite_db.id)
                                .filter(Synonym.synonym == old_bigg_id_c_without_compartment)
                                .filter(Synonym.data_source_id == data_source_id)
                                .first())
                if synonym_db_2 is None:
                    synonym_db_2 = Synonym(type='component',
                                        ome_id=metabolite_db.id,
                                        synonym=old_bigg_id_c_without_compartment,
                                        data_source_id=data_source_id)
                    session.add(synonym_db_2)
                    session.commit()
                old_id_db = (session
                            .query(OldIDSynonym)
                            .filter(OldIDSynonym.type == 'model_compartmentalized_component')
                            .filter(OldIDSynonym.ome_id == model_comp_comp_db.id)
                            .filter(OldIDSynonym.synonym_id == synonym_db_2.id)
                            .first())
                if old_id_db is None:
                    old_id_db = OldIDSynonym(type='model_compartmentalized_component',
                                            ome_id=model_comp_comp_db.id,
                                            synonym_id=synonym_db_2.id)
                    session.add(old_id_db)
                    session.commit()

    return comp_comp_db_ids, final_metabolite_ids
Пример #6
0
def load_reactions(session, model_db_id, model, old_reaction_ids,
                   comp_comp_db_ids, final_metabolite_ids):
    """Load the reactions and stoichiometries into the model.

    TODO if the reaction is already loaded, we need to check the stoichometry
    has. If that doesn't match, then add a new reaction with an incremented ID
    (e.g. ACALD_1)

    Arguments
    ---------

    session: An SQLAlchemy session.

    model_db_id: The database ID for the model.

    model: The COBRApy model.

    old_reaction_ids: A dictionary where keys are new IDs and values are old IDs
    for reactions.

    comp_comp_db_ids: A dictionary where keys are the original compartmentalized
    metabolite ids and the values are the database IDs for the compartmentalized
    components.

    final_metabolite_ids: A new dictionary where keys are original
    compartmentalized metabolite IDs from the model and values are the new
    compartmentalized metabolite IDs.

    Returns
    -------

    A dictionary with keys for reaction BiGG IDs in the model and values for the
    associated ModelReaction.id in the database.

    """

    # only grab this once
    data_source_id = get_or_create_data_source(session, 'old_bigg_id')

    # get reaction hash_prefs
    hash_prefs = load_tsv(settings.reaction_hash_prefs)
    def _check_hash_prefs(a_hash, is_pseudoreaction):
        """Return the preferred BiGG ID for a_hash, or None."""
        for row in hash_prefs:
            marked_pseudo = len(row) > 2 and row[2] == 'pseudoreaction'
            if row[0] == a_hash and marked_pseudo == is_pseudoreaction:
                return row[1]
        return None

    # Generate reaction hashes, and find reactions in the same model in opposite
    # directions.
    reaction_hashes = {r.id: parse.hash_reaction(r, final_metabolite_ids)
                       for r in model.reactions}
    reverse_reaction_hashes = {r.id: parse.hash_reaction(r, final_metabolite_ids, reverse=True)
                               for r in model.reactions}
    reverse_reaction_hashes_rev = {v: k for k, v in six.iteritems(reverse_reaction_hashes)}
    reactions_not_to_reverse = set()
    for r_id, h in six.iteritems(reaction_hashes):
        if h in reverse_reaction_hashes_rev:
            reactions_not_to_reverse.add(r_id)
            reactions_not_to_reverse.add(reverse_reaction_hashes_rev[h])

    model_db_rxn_ids = {}
    for reaction in model.reactions:
        # Drop duplicates label
        reaction_id = parse.remove_duplicate_tag(reaction.id)

        # Get the reaction
        reaction_db = (session
                       .query(Reaction)
                       .filter(Reaction.bigg_id == reaction_id)
                       .first())

        # check for pseudoreaction
        is_pseudoreaction = check_pseudoreaction(reaction_id)

        # calculate the hash
        reaction_hash = reaction_hashes[reaction.id]
        hash_db = (session
                   .query(Reaction)
                   .filter(Reaction.reaction_hash == reaction_hash)
                   .filter(Reaction.pseudoreaction == is_pseudoreaction)
                   .first())
        # If there wasn't a match for the forward hash, also check the reverse
        # hash. Do not check reverse hash for reactions with both directions
        # defined in the same model (e.g. SUCDi and FRD7).
        if not hash_db and reaction.id not in reactions_not_to_reverse:
            reverse_hash_db = (session
                               .query(Reaction)
                               .filter(Reaction.reaction_hash == reverse_reaction_hashes[reaction.id])
                               .filter(Reaction.pseudoreaction == is_pseudoreaction)
                               .first())
        else:
            reverse_hash_db = None

        # bigg_id match  hash match b==h  pseudoreaction  example                   function
        #  n               n               n            first GAPD                _new_reaction (1)
        #  n               n               y            first EX_glc_e            _new_reaction (1)
        #  y               n               n            incorrect GAPD            _new_reaction & increment (2)
        #  y               n               y            incorrect EX_glc_e        _new_reaction & increment (2)
        #  n               y               n            GAPDH after GAPD          reaction = hash_reaction (3a)
        #  n               y               y            EX_glc__e after EX_glc_e  reaction = hash_reaction (3a)
        #  y               y         n     n            ?                         reaction = hash_reaction (3a)
        #  y               y         n     y            ?                         reaction = hash_reaction (3a)
        #  y               y         y     n            second GAPD               reaction = bigg_reaction (3b)
        #  y               y         y     y            second EX_glc_e           reaction = bigg_reaction (3b)
        # NOTE: only check pseudoreaction hash against other pseudoreactions
        # 4a and 4b are 3a and 3b with a reversed reaction

        def _find_new_incremented_id(session, original_id):
            """Look for a reaction bigg_id that is not already taken."""
            new_id = increment_id(original_id)
            while True:
                # Check for existing and deprecated reaction ids
                if (session.query(Reaction).filter(Reaction.bigg_id == new_id).first() is None and
                    not _is_deprecated_reaction_id(session, new_id)):
                    return new_id
                new_id = increment_id(new_id)

        # Check for a preferred ID in the preferences, based on the forward
        # hash. Don't check the reverse hash in preferences.
        preferred_id = _check_hash_prefs(reaction_hash, is_pseudoreaction)

        # no reversed by default
        is_reversed = False
        is_new = False

        # (0) If there is a preferred ID, make that the new ID, and increment any old IDs
        if preferred_id is not None:
            # if the reaction already matches, just continue
            if hash_db is not None and hash_db.bigg_id == preferred_id:
                reaction_db = hash_db
            # otherwise, make the new reaction
            else:
                # if existing reactions match the preferred reaction find a new,
                # incremented id for the existing match
                preferred_id_db = session.query(Reaction).filter(Reaction.bigg_id == preferred_id).first()
                if preferred_id_db is not None:
                    new_id = _find_new_incremented_id(session, preferred_id)
                    logging.warn('Incrementing database reaction {} to {} and prefering {} (from model {}) based on hash preferences'
                                .format(preferred_id, new_id, preferred_id, model.id))
                    preferred_id_db.bigg_id = new_id
                    session.commit()

                # make a new reaction for the preferred_id
                reaction_db = _new_reaction(session, reaction, preferred_id,
                                            reaction_hash, model_db_id, model,
                                            is_pseudoreaction, comp_comp_db_ids)
                is_new = True

        # (1) no bigg_id matches, no stoichiometry match or pseudoreaction, then
        # make a new reaction
        elif reaction_db is None and hash_db is None and reverse_hash_db is None:
            # check that the id is not deprecated
            if _is_deprecated_reaction_id(session, reaction.id):
                logging.error(('Keeping bigg_id {} (hash {} - from model {}) '
                               'even though it is on the deprecated ID list. '
                               'You should add it to reaction-hash-prefs.txt')
                              .format(reaction_id, reaction_hash, model.id))
            reaction_db = _new_reaction(session, reaction, reaction_id,
                                        reaction_hash, model_db_id, model,
                                        is_pseudoreaction, comp_comp_db_ids)
            is_new = True

        # (2) bigg_id matches, but not the hash, then increment the BIGG_ID
        elif reaction_db is not None and hash_db is None and reverse_hash_db is None:
            # loop until we find a non-matching find non-matching ID
            new_id = _find_new_incremented_id(session, reaction.id)
            logging.warn('Incrementing bigg_id {} to {} (from model {}) based on conflicting reaction hash'
                        .format(reaction_id, new_id, model.id))
            reaction_db = _new_reaction(session, reaction, new_id,
                                        reaction_hash, model_db_id, model,
                                        is_pseudoreaction, comp_comp_db_ids)
            is_new = True

        # (3) but found a stoichiometry match, then use the hash reaction match.
        elif hash_db is not None:
            # WARNING TODO this requires that loaded metabolites always match on
            # bigg_id, which should be the case.

            # (3a)
            if reaction_db is None or reaction_db.id != hash_db.id:
                reaction_db = hash_db
            # (3b) BIGG ID matches a reaction with the same hash, then just continue
            else:
                pass

        # (4) but found a stoichiometry match, then use the hash reaction match.
        elif reverse_hash_db is not None:
            # WARNING TODO this requires that loaded metabolites always match on
            # bigg_id, which should be the case.

            # Remember to switch upper and lower bounds
            is_reversed = True
            logging.info('Matched {} to {} based on reverse hash'
                         .format(reaction_id, reverse_hash_db.bigg_id))

            # (4a)
            if reaction_db is None or reaction_db.id != reverse_hash_db.id:
                reaction_db = reverse_hash_db
            # (4b) BIGG ID matches a reaction with the same hash, then just continue
            else:
                pass

        else:
            raise Exception('Should not get here')

        # If the reaction is not new, consider improving the descriptive name
        if not is_new:
            new_name = scrub_name(check_none(getattr(reaction, 'name', None)))
            improve_name(session, reaction_db, new_name)

        # Add reaction to deprecated ID list if necessary
        if reaction_db.bigg_id != reaction_id:
            get_or_create(session, DeprecatedID, deprecated_id=reaction_id,
                          type='reaction', ome_id=reaction_db.id)

        # If the reaction is reversed, then switch upper and lower bound
        lower_bound = -reaction.upper_bound if is_reversed else reaction.lower_bound
        upper_bound = -reaction.lower_bound if is_reversed else reaction.upper_bound

        # subsystem
        subsystem = check_none(reaction.subsystem.strip())

        # get the model reaction
        model_reaction_db = (session
                             .query(ModelReaction)
                             .filter(ModelReaction.reaction_id == reaction_db.id)
                             .filter(ModelReaction.model_id == model_db_id)
                             .filter(ModelReaction.lower_bound == lower_bound)
                             .filter(ModelReaction.upper_bound == upper_bound)
                             .filter(ModelReaction.gene_reaction_rule == reaction.gene_reaction_rule)
                             .filter(ModelReaction.objective_coefficient == reaction.objective_coefficient)
                             .filter(ModelReaction.subsystem == subsystem)
                             .first())
        if model_reaction_db is None:
            # get the number of existing copies of this reaction in the model
            copy_number = (session
                           .query(ModelReaction)
                           .filter(ModelReaction.reaction_id == reaction_db.id)
                           .filter(ModelReaction.model_id == model_db_id)
                           .count()) + 1
            # make a new reaction
            model_reaction_db = ModelReaction(model_id=model_db_id,
                                              reaction_id=reaction_db.id,
                                              gene_reaction_rule=reaction.gene_reaction_rule,
                                              original_gene_reaction_rule=reaction.gene_reaction_rule,
                                              upper_bound=upper_bound,
                                              lower_bound=lower_bound,
                                              objective_coefficient=reaction.objective_coefficient,
                                              copy_number=copy_number,
                                              subsystem=subsystem)
            session.add(model_reaction_db)
            session.commit()

        # remember the changed ids
        model_db_rxn_ids[reaction.id] = model_reaction_db.id

        # add synonyms
        #
        # get the id from the published model
        for old_bigg_id in old_reaction_ids[reaction.id]:
            # add a synonym
            synonym_db = (session
                          .query(Synonym)
                          .filter(Synonym.type == 'reaction')
                          .filter(Synonym.ome_id == reaction_db.id)
                          .filter(Synonym.synonym == old_bigg_id)
                          .filter(Synonym.data_source_id == data_source_id)
                          .first())
            if synonym_db is None:
                synonym_db = Synonym(type='reaction',
                                     ome_id=reaction_db.id,
                                     synonym=old_bigg_id,
                                     data_source_id=data_source_id)
                session.add(synonym_db)
                session.commit()

            # add OldIDSynonym
            old_id_db = (session
                         .query(OldIDSynonym)
                         .filter(OldIDSynonym.type == 'model_reaction')
                         .filter(OldIDSynonym.ome_id == model_reaction_db.id)
                         .filter(OldIDSynonym.synonym_id == synonym_db.id)
                         .first())
            if old_id_db is None:
                old_id_db = OldIDSynonym(type='model_reaction',
                                         ome_id=model_reaction_db.id,
                                         synonym_id=synonym_db.id)
                session.add(old_id_db)
                session.commit()

    return model_db_rxn_ids
Пример #7
0
def load_metabolites(session, model_id, model, compartment_names,
                     old_metabolite_ids):
    """Load the metabolites as components and model components.

    Arguments:
    ---------

    session: An SQLAlchemy session.

    model_id: The database ID for the model.

    model: The COBRApy model.

    old_metabolite_ids: A dictionary where keys are new IDs and values are old
    IDs for compartmentalized metabolites.

    Returns
    -------

    comp_comp_db_ids: A dictionary where keys are the original compartmentalized
    metabolite ids and the values are the database IDs for the compartmentalized
    components.

    final_metabolite_ids: A new dictionary where keys are original
    compartmentalized metabolite IDs from the model and values are the new
    compartmentalized metabolite IDs.

    """
    comp_comp_db_ids = {}
    final_metabolite_ids = {}

    # only grab this once
    data_source_id = get_or_create_data_source(session, 'old_bigg_id')

    # get metabolite id duplicates
    met_dups = load_tsv(settings.metabolite_duplicates)
    def _check_metabolite_duplicates(bigg_id):
        """Return a new ID if there is a preferred ID, otherwise None."""
        for row in met_dups:
            if bigg_id in row[1:]:
                return row[0]
        return None

    # for each metabolite in the model
    for metabolite in model.metabolites:
        metabolite_id = parse.remove_duplicate_tag(metabolite.id)

        try:
            component_bigg_id, compartment_bigg_id = parse.split_compartment(metabolite_id)
        except Exception:
            logging.error(('Could not find compartment for metabolite %s in'
                            'model %s' % (metabolite_id, model.id)))
            continue

        preferred = _check_metabolite_duplicates(component_bigg_id)
        new_bigg_id = preferred if preferred else component_bigg_id

        # look for the formula in these places
        formula_fns = [lambda m: getattr(m, 'formula', None), # support cobra v0.3 and 0.4
                       lambda m: m.notes.get('FORMULA', None),
                       lambda m: m.notes.get('FORMULA1', None)]
        # Cast to string, but not for None
        strip_str_or_none = lambda v: str(v).strip() if v is not None else None
        # Ignore the empty string
        ignore_empty_str = lambda s: s if s != '' else None
        # Use a generator for lazy evaluation
        values = (ignore_empty_str(strip_str_or_none(formula_fn(metabolite)))
                  for formula_fn in formula_fns)
        # Get the first non-null result. Otherwise _formula = None.
        _formula = format_formula(next(filter(None, values), None))
        # Check for non-valid formulas
        if parse.invalid_formula(_formula):
            logging.warn('Invalid formula %s for metabolite %s in model %s' % (_formula, metabolite_id, model.id))
            _formula = None

        # get charge
        try:
            charge = int(metabolite.charge)
            # check for float charge
            if charge != metabolite.charge:
                logging.warn('Could not load charge {} for {} in model {}'
                             .format(metabolite.charge, metabolite_id, model.id))
                charge = None
        except Exception:
            if hasattr(metabolite, 'charge') and metabolite.charge is not None:
                logging.debug('Could not convert charge to integer for metabolite {} in model {}: {}'
                              .format(metabolite_id, model.id, metabolite.charge))
            charge = None

        # If there is no metabolite, add a new one.
        metabolite_db = (session
                         .query(Component)
                         .filter(Component.bigg_id == new_bigg_id)
                         .first())

        # if necessary, add the new metabolite, and keep track of the ID
        new_name = scrub_name(getattr(metabolite, 'name', None))
        if metabolite_db is None:
            # make the new metabolite
            metabolite_db = Component(bigg_id=new_bigg_id, name=new_name)
            session.add(metabolite_db)
            session.commit()
        else:
            # If the metabolite is not new, consider improving the descriptive name
            improve_name(session, metabolite_db, new_name)

        # add the deprecated id if necessary
        if metabolite_db.bigg_id != component_bigg_id:
            get_or_create(session, DeprecatedID, deprecated_id=component_bigg_id,
                          type='component', ome_id=metabolite_db.id)

        # if there is no compartment, add a new one
        compartment_db = (session
                          .query(Compartment)
                          .filter(Compartment.bigg_id == compartment_bigg_id)
                          .first())
        if compartment_db is None:
            try:
                name = compartment_names[compartment_bigg_id]
            except KeyError:
                logging.warn('No name found for compartment %s' % compartment_bigg_id)
                name = ''
            compartment_db = Compartment(bigg_id=compartment_bigg_id, name=name)
            session.add(compartment_db)
            session.commit()

        # if there is no compartmentalized compartment, add a new one
        comp_component_db = (session
                             .query(CompartmentalizedComponent)
                             .filter(CompartmentalizedComponent.component_id == metabolite_db.id)
                             .filter(CompartmentalizedComponent.compartment_id == compartment_db.id)
                             .first())
        if comp_component_db is None:
            comp_component_db = CompartmentalizedComponent(component_id=metabolite_db.id,
                                                           compartment_id=compartment_db.id)
            session.add(comp_component_db)
            session.commit()

        # remember for adding the reaction
        comp_comp_db_ids[metabolite.id] = comp_component_db.id
        final_metabolite_ids[metabolite.id] = '%s_%s' % (new_bigg_id, compartment_bigg_id)

        # if there is no model compartmentalized compartment, add a new one
        model_comp_comp_db = (session
                              .query(ModelCompartmentalizedComponent)
                              .filter(ModelCompartmentalizedComponent.compartmentalized_component_id == comp_component_db.id)
                              .filter(ModelCompartmentalizedComponent.model_id == model_id)
                              .first())
        if model_comp_comp_db is None:
            model_comp_comp_db = ModelCompartmentalizedComponent(model_id=model_id,
                                                                 compartmentalized_component_id=comp_component_db.id,
                                                                 formula=_formula,
                                                                 charge=charge)
            session.add(model_comp_comp_db)
            session.commit()
        else:
            if model_comp_comp_db.formula is None:
                model_comp_comp_db.formula = _formula
            if model_comp_comp_db.charge is None:
                model_comp_comp_db.charge = charge
            session.commit()

        # add synonyms
        for old_bigg_id_c in old_metabolite_ids[metabolite.id]:
            # Add Synonym and  OldIDSynonym
            synonym_db = (session
                          .query(Synonym)
                          .filter(Synonym.type == 'compartmentalized_component')
                          .filter(Synonym.ome_id == comp_component_db.id)
                          .filter(Synonym.synonym == old_bigg_id_c)
                          .filter(Synonym.data_source_id == data_source_id)
                          .first())
            if synonym_db is None:
                synonym_db = Synonym(type='compartmentalized_component',
                                     ome_id=comp_component_db.id,
                                     synonym=old_bigg_id_c,
                                     data_source_id=data_source_id)
                session.add(synonym_db)
                session.commit()
            old_id_db = (session
                         .query(OldIDSynonym)
                         .filter(OldIDSynonym.type == 'model_compartmentalized_component')
                         .filter(OldIDSynonym.ome_id == model_comp_comp_db.id)
                         .filter(OldIDSynonym.synonym_id == synonym_db.id)
                         .first())
            if old_id_db is None:
                old_id_db = OldIDSynonym(type='model_compartmentalized_component',
                                         ome_id=model_comp_comp_db.id,
                                         synonym_id=synonym_db.id)
                session.add(old_id_db)
                session.commit()

            # Also add Synonym and OldIDSynonym for the universal metabolite
            try:
                new_style_id = parse.id_for_new_id_style(
                    parse.fix_legacy_id(old_bigg_id_c, use_hyphens=False),
                    is_metabolite=True
                )
                old_bigg_id_c_without_compartment = parse.split_compartment(new_style_id)[0]
            except Exception as e:
                logging.warn(e.message)
            else:
                synonym_db_2 = (session
                                .query(Synonym)
                                .filter(Synonym.type == 'component')
                                .filter(Synonym.ome_id == metabolite_db.id)
                                .filter(Synonym.synonym == old_bigg_id_c_without_compartment)
                                .filter(Synonym.data_source_id == data_source_id)
                                .first())
                if synonym_db_2 is None:
                    synonym_db_2 = Synonym(type='component',
                                        ome_id=metabolite_db.id,
                                        synonym=old_bigg_id_c_without_compartment,
                                        data_source_id=data_source_id)
                    session.add(synonym_db_2)
                    session.commit()
                old_id_db = (session
                            .query(OldIDSynonym)
                            .filter(OldIDSynonym.type == 'model_compartmentalized_component')
                            .filter(OldIDSynonym.ome_id == model_comp_comp_db.id)
                            .filter(OldIDSynonym.synonym_id == synonym_db_2.id)
                            .first())
                if old_id_db is None:
                    old_id_db = OldIDSynonym(type='model_compartmentalized_component',
                                            ome_id=model_comp_comp_db.id,
                                            synonym_id=synonym_db_2.id)
                    session.add(old_id_db)
                    session.commit()

    return comp_comp_db_ids, final_metabolite_ids