예제 #1
0
파일: muller2_new.py 프로젝트: emdadi/CAMND
def go(species):
    method_dir = r'%s/method/muller2_new' % my_constants.basePath
    out_dir = r'%s/%s/muller2_new' % (my_constants.resultPath, species)
    my_util.mkdir_p(out_dir)

    source_file = '%s/dataset/networks/%s' % (
        my_constants.basePath, my_constants.species_sbml[species])

    S, mets, rxns, revs, met_names, rxn_names, biomass, met_comparts = importer.sbmlStoichiometricMatrix(
        source_file,
        True,
        read_species_compart=True,
        remove_biomass=False,
        normalize_stoich=False)

    f = open('muller2_new.m', 'w')
    write_line(f, 'addpath %s/code' % method_dir)
    write_line(f, "model = readCbModel('%s')" % source_file)
    write_line(f, "model.c(%d) = 1;" % (rxns.index(biomass[0]) + 1))
    write_line(f, "changeCobraSolver('glpk');")
    write_line(f, '[modules, var, flux] = computeModulesOpt( model );')
    write_line(f, 'save muller2_newout.mat modules var flux')

    f.close()

    my_util.prepare_matlab_file_and_exec_and_wait_finish(
        'muller2_new', 'muller2_newout.mat', False)

    res_vars = my_util.try_load_matlab_result('muller2_newout.mat')
    raw_modules = res_vars[
        'modules']  # if matlab has failed, this will throw exception!
    shutil.copy('muller2_newout.mat', out_dir)

    raw_modules = raw_modules.T.tolist(
    )  # eacho row will be a module where reactions are marked
    modules = []
    for raw_module in raw_modules:
        modules.append([])
        for rIdx, in_module in enumerate(raw_module):
            if in_module == 1:
                modules[-1].append(rxns[rIdx])

    out = open('%s/final_modules.txt' % out_dir, 'w')
    out.write(
        "#each row is a module of reactions. not all reactions are specified (nature of this method only select some reaction to be modules)\n"
    )
    for m in modules:
        out.write(' '.join(m))
        out.write('\n')
    out.close()
예제 #2
0
def go(species):
    out_dir = r'%s/%s/guimera' % (my_constants.resultPath, species)
    my_util.mkdir_p(out_dir)

    S, mets, rxns, revs, met_names, rxn_names, biomass, met_comparts = importer.sbmlStoichiometricMatrix(
        r'%s/dataset/networks/%s' %
        (my_constants.basePath, my_constants.species_sbml[species]),
        True,
        read_species_compart=True,
        remove_biomass=True,
        normalize_stoich=True)

    # graph is represented as list of edges between metabs
    grph = my_util.graph_by_explode_reactions_to_complete_bipartite(S, mets)

    grphIdx = [(mets.index(m1), mets.index(m2)) for (m1, m2) in grph]
    grphIdx.sort()
    inf = open('guimera.in', 'w')
    for eIdx in grphIdx:
        inf.write('%d %d\n' % (eIdx[0], eIdx[1]))
    inf.close()

    # netcarto_cl net_file_name seed T_ini iteration_factor cooling_factor
    # T_ini, iteration_factor, and cooling_factor can be set to -1 to use the defaults (2/size_of_network, 1.0, and 0.995, respectively).
    res = os.system('netcarto_cl guimera.in %d -1 -1 -1 0')
    shutil.copy('modules.dat', out_dir)
    shutil.copy('roles.dat', out_dir)

    out = open('%s/final_modules.txt' % out_dir, 'w')
    out.write('# each line one module!\n')
    netcarto_outf = open('modules.dat', 'r')
    for l in netcarto_outf:
        didx = l.find('---')
        if didx != -1:
            l = l[didx + len('---'):].strip()
            mIdxs = l.split(' ')
            out.write(' '.join([mets[int(i)] for i in mIdxs]))
            out.write('\n')
    netcarto_outf.close()
    out.close()
예제 #3
0
def go(species):
    out_dir = r'%s/%s/newman' % (my_constants.resultPath, species)
    my_util.mkdir_p(out_dir)

    S, mets, rxns, revs, met_names, rxn_names, biomass, met_comparts = importer.sbmlStoichiometricMatrix(
        r'%s/dataset/networks/%s' %
        (my_constants.basePath, my_constants.species_sbml[species]),
        True,
        read_species_compart=True,
        remove_biomass=True)

    # graph is represented as list of edges between metabs
    grph = my_util.graph_by_explode_reactions_to_complete_bipartite(S, mets)

    grphIdx = [(mets.index(m1), mets.index(m2)) for (m1, m2) in grph]
    grphIdx.sort()
    inf = open('newman.in', 'w')
    for eIdx in grphIdx:
        inf.write('%d,%d,1\n' % (eIdx[0], eIdx[1]))
    inf.close()

    correctedcmty.main(['DUMMY', 'newman.in', 'newman.out'])
    shutil.copy('newman.out', out_dir)

    resf = open('newman.out', 'r').read()
    stidx = resf.rindex('START_COMP')
    edidx = resf.rindex('END_COMP')
    res = resf[stidx + len('START_COMP'):edidx]
    res_lines = res.split('\n')

    out = open('%s/final_modules.txt' % out_dir, 'w')
    out.write('# each line one module!\n')
    for l in res_lines:
        l = l.strip()
        if l == '':
            continue
        mIdxs = eval(l)
        out.write(' '.join([mets[int(i)] for i in mIdxs]))
        out.write('\n')
    out.close()
예제 #4
0
        ds = 0
        for m in mod_metabs:
            # metab_row = S[mets.index(m)]
            # ds += sum([1 for ri in metab_row if ri != 0])
            ds += len(metab_edges[m])  # TODO: for hyperarcs this causes a reaction to be counted more than once for a module

        sum_term = ls * 1.0 / total_links - pow(ds / (2.0 * total_links), 2)
        if mod_name == eval_constants.EXTERNAL_MODULE_ID:
            modularity -= sum_term
        else:
            modularity += sum_term

    return modularity


if __name__ == '__main__':
    # species = 'toy_model'
    import cPickle as pickle
    for species in my_constants.species_sbml.keys():
        # species = 'ecoli_iaf1260'

        out_dir = r'%s/%s/newman' % (my_constants.resultPath, species)

        print species
        S, mets, rxns, revs, met_names, rxn_names, biomass, met_comparts = importer.sbmlStoichiometricMatrix(r'%s/dataset/networks/%s' % (my_constants.basePath, my_constants.species_sbml[species]), True, read_species_compart=True)

        # pickle.dump([S, mets, rxns, revs, met_names, rxn_names, biomass, met_comparts], open(r'D:\University\DiseaseSim\zsh\MSB\%s.pkl' % species, 'wb'))
        # aS, amets, arxns, arevs, amet_names, arxn_names, abiomass, amet_comparts = pickle.load(open(r'D:\University\DiseaseSim\zsh\MSB\ecoli_iaf1260.pkl', 'rb'))

        # print S == aS, mets == amets, revs == arevs, amet_names == met_names, abiomass == biomass, amet_comparts == met_comparts, arxns == rxns
    # print compute_modularity('%s/final_modules.txt' % out_dir, False, True, False, S, mets, rxns)
예제 #5
0
파일: holme.py 프로젝트: emdadi/CAMND
def go(species, only_cut_dendogram=False):
    method_dir = r'%s/method/holme' % my_constants.basePath
    out_dir = r'%s/%s/holme' % (my_constants.resultPath, species)
    my_util.mkdir_p(out_dir)

    S, mets, rxns, revs, met_names, rxn_names, biomass, met_comparts = importer.sbmlStoichiometricMatrix(
        r'%s/dataset/networks/%s' %
        (my_constants.basePath, my_constants.species_sbml[species]),
        True,
        read_species_compart=True,
        remove_biomass=True)

    if not only_cut_dendogram:
        # graph is a bipartite representation of the network. should be written to file with format:
        # (1.) each line represents a directed link: 'from' 'to'
        # (2.) substances are enumerated 1, 2, . . .
        # (3.) reaction nodes are enumerated 1000000, 1000001, . . .
        METAB_OFFS = 1
        REACT_OFFS = 1000000

        lines = []
        for i, row in enumerate(S):
            for j, col in enumerate(S[i]):
                if col < 0:  # metab i is consumed by react j
                    lines.append((i + METAB_OFFS, j + REACT_OFFS))
                elif col > 0:  # metab i is produced by react j
                    lines.append((j + REACT_OFFS, i + METAB_OFFS))

        my_util.mkdir_p('inp')
        shutil.copy('%s/inp/cmds' % method_dir, 'inp/')
        shutil.copy('%s/orgnames' % method_dir, './')

        my_util.mkdir_p('cell')

        lines.sort()
        inf = open('cell/ho.dat', 'w')
        for l in lines:
            inf.write('%d %d\n' % (l[0], l[1]))
        inf.close()

        inf = open('cell/ho.nam', 'w')
        for l in mets:
            inf.write('%s\n' % l)
        inf.close()

        # hi cmd_file_name
        # cmd_file_name will be a filename located in inp/
        my_util.mkdir_p('data')
        res = os.system('hi cmds')
        shutil.copy('data/cellho', out_dir)

    resf = open('%s/cellho' % out_dir, 'rb')
    levels = []
    finish = False
    while not finish:
        levels.append([])
        for i in range(len(mets)):
            x = read_int(resf)
            if x is None:
                finish = True
                break
            levels[-1].append(x)
    levels.pop()
    resf.close()

    if not only_cut_dendogram:
        # outputs: wpgma tree, cut at proper height?
        out = open('%s/dendogram.py' % out_dir, 'w')
        out.write(
            "#print tree is 2d list. each entry is the result of algorithm in one iteration. for each iteration there is a list of cluster-index for each metabolite\n"
        )
        out.write(
            "#print e.g. for the first level all values are 1 (meaning that no split is still done) and in the last level metabolites are numbered from 1 to len(mets)\n"
        )
        out.write("\n")
        out.write("tree = " + str(levels))
        out.close()

    # TODO: very bad job!!!!
    dummy_tree, tree_height, dummy_thresholds = read_hierarchical_decomposition_holme(
        species, '%s/dendogram.py' % out_dir)

    cts = eval(open('%s/cut_iterations.txt' % method_dir, 'r').read())
    for l in cts[species]:

        # TODO: very bad job
        # iter = int(l * len(levels))
        iter = int(tree_height - int(l * tree_height) + 1)

        level_iter = levels[iter]
        modules = {}
        for i, mmod in enumerate(level_iter):
            if mmod not in modules:
                modules[mmod] = []
            modules[mmod].append(i)

        out = open('%s/final_modules_%s.txt' % (out_dir, l), 'w')
        for midx in range(1, len(modules) + 1):
            out.write(' '.join([mets[s] for s in modules[midx]]))
            out.write('\n')
        out.close()
예제 #6
0
def go(species):
    method_dir = r'%s/method/schuster' % my_constants.basePath
    out_dir = r'%s/%s/schuster' % (my_constants.resultPath, species)
    my_util.mkdir_p(out_dir)

    source_file = '%s/dataset/networks/%s' % (
        my_constants.basePath, my_constants.species_sbml[species])

    S, mets, rxns, revs, met_names, rxn_names, biomass, met_comparts = importer.sbmlStoichiometricMatrix(
        source_file,
        True,
        read_species_compart=True,
        remove_biomass=True,
        normalize_stoich=True)

    external_strategy = eval(
        open('%s/external_strategy.txt' % method_dir, 'r').read())

    # both may either be with/without boundary reactions which is specified by the strategy
    # these will be used to create schuster.in file which is used by the method as the input network
    with_boundary_reacts = {}
    with_boundary_mets = list(mets)

    if external_strategy[
            species] == 'KEGG_PSEUDO_NETWORK':  # this implies networks are built from kegg by merging pathways
        subs_to_reacts = {}
        prods_to_reacts = {}
        for j, r in enumerate(rxns):
            subs = []
            prods = []
            for i in range(len(S)):
                if S[i][j] > 0:
                    prods.append([S[i][j],
                                  mets[i]])  # [stoichiometry, met_name]
                elif S[i][j] < 0:
                    subs.append([-S[i][j],
                                 mets[i]])  # [stoichiometry, met_name]
            with_boundary_reacts[r] = [subs, prods]

            has_subs, has_prods = False, False
            for i in range(len(S)):
                if S[i][j] > 0 or (S[i][j] < 0 and revs[j] != 0):
                    if mets[i] not in prods_to_reacts:
                        prods_to_reacts[mets[i]] = set()
                    prods_to_reacts[mets[i]].add(r)
                    has_prods = True
                if S[i][j] < 0 or (S[i][j] > 0 and revs[j] != 0):
                    if mets[i] not in subs_to_reacts:
                        subs_to_reacts[mets[i]] = set()
                    subs_to_reacts[mets[i]].add(r)
                    has_subs = True

            if not has_subs or not has_prods:
                raise Exception(
                    'reaction without substrate/products in KEGG pseudo networks! should never happen!'
                )

        # checks whether the reaction is internal in presence of reversiblity
        def is_internal(met):
            if met not in prods_to_reacts or met not in subs_to_reacts:
                return False
            if prods_to_reacts[met] == subs_to_reacts[met] and len(
                    prods_to_reacts[met]) == 1:
                the_react = list(prods_to_reacts[met])[0]
                the_react_subs = [
                    ss[1] for ss in with_boundary_reacts[the_react][0]
                ]
                the_react_prods = [
                    ss[1] for ss in with_boundary_reacts[the_react][1]
                ]
                if not (met in the_react_subs
                        and met in the_react_prods):  # not polymeric
                    return False
            return True

        internal_metabolites = [
            m for m in with_boundary_mets if is_internal(m)
        ]
        initial_external_metabolites = [
            m for m in with_boundary_mets if m not in internal_metabolites
        ]
    else:
        # may add to with_boundary_mets based on strategy.
        # may update with_boundary_reacts according to newly added with_boundary_mets
        for j, r in enumerate(rxns):
            subs = []
            prods = []
            for i in range(len(S)):
                if S[i][j] > 0:
                    prods.append([S[i][j],
                                  mets[i]])  # [stoichiometry, met_name]
                elif S[i][j] < 0:
                    subs.append([-S[i][j],
                                 mets[i]])  # [stoichiometry, met_name]

            # this strategy means reactions without product are boundary reactions
            # note that: reversibility could potentially be problematic BUT I HAVE CHECKED BIGG NETWORKS, THESE KIND OF REACTIONS ARE REAL OUTSIDE BOUNDARY
            if not subs:
                raise Exception(
                    'reaction without substrate! should never happen!')
            elif not prods:
                if external_strategy[species] == 'BOUNDARY':
                    if len(subs) != 1:
                        raise Exception(
                            'EXCHANGE reaction with more than one substrate metabolite! unacceptable.'
                        )
                    new_boundary_met = subs[0][1] + '_BND'
                    if new_boundary_met not in with_boundary_mets:
                        # print 'the only exceptions are once for h**o recon 1 and once for ecoli ijo 1366!'
                        with_boundary_mets.append(new_boundary_met)
                    prods = [[subs[0][0], new_boundary_met]]
                else:
                    print 'skipping reaction without product: EXCHANGE!'
                    continue

            with_boundary_reacts[r] = [subs, prods]

        if external_strategy[species] == 'BOUNDARY':
            initial_external_metabolites = [
                m for i, m in enumerate(with_boundary_mets)
                if with_boundary_mets[i].endswith('_BND')
            ]
            internal_metabolites = [
                m for m in with_boundary_mets
                if m not in initial_external_metabolites
            ]
        else:
            initial_external_metabolites = [
                m for i, m in enumerate(with_boundary_mets)
                if with_boundary_mets[i].endswith('_e')
            ]
            internal_metabolites = [
                m for m in with_boundary_mets
                if m not in initial_external_metabolites
            ]

    inf = open('schuster.in', 'w')
    write_line(inf, '-ENZREV')
    write_line(inf, ' '.join([r for i, r in enumerate(rxns) if revs[i] == 1]))
    write_line(inf, '')
    write_line(inf, '-ENZIRREV')
    write_line(inf, ' '.join([r for i, r in enumerate(rxns) if revs[i] != 1]))
    write_line(inf, '')
    write_line(inf, '-METINT')
    write_line(inf, ' '.join(internal_metabolites))
    write_line(inf, '')
    write_line(inf, '-METEXT')
    write_line(inf, ' '.join(initial_external_metabolites))
    write_line(inf, '')
    write_line(inf, '-CAT')
    for r_name, r in with_boundary_reacts.iteritems():
        react_str = '%s : %s = %s .' % (r_name, ' + '.join([
            '%d %s' % (p[0], p[1]) for p in r[0]
        ]), ' + '.join(['%d %s' % (p[0], p[1]) for p in r[1]]))
        write_line(inf, react_str)
    inf.close()

    # for each species, this may be defined or simply be '' which is ignored
    thresholds = eval(open('%s/thresholds.txt' % method_dir, 'r').read())

    for thr in thresholds[species]:
        run_for_all_template_files('subsystem%d.out', os.remove)
        run_for_all_template_files(out_dir + '/subsystem%d.out', os.remove)

        # subnet file_name
        if my_constants.win:
            res = os.system('%s/src/subnet.exe schuster.in %s' %
                            (method_dir, thr))
        else:
            res = os.system('subnet schuster.in %s' % thr)

        rmods = {}
        mmods = {}

        def read_module_and_move(resf_path, i):
            rmod, mmod = read_result_file(resf_path)
            rmods['%d' % i] = rmod
            mmods['%d' % i] = mmod

            int_out_dir_path = '%s/subsystem_%s_%d.out' % (out_dir, thr, i)
            shutil.copy(resf_path, int_out_dir_path)

        run_for_all_template_files('subsystem%d.out', read_module_and_move)

        outr = open('%s/react_modules_%s.txt' % (out_dir, thr), 'w')
        outrm = open('%s/metab_react_modules_%s.txt' % (out_dir, thr), 'w')
        outm = open('%s/metab_modules_%s.txt' % (out_dir, thr), 'w')
        for mname, rmod in rmods.iteritems():
            write_line(outr, ' '.join(rmod))
            write_line(outrm, ' '.join(mmods[mname]))
            if mmods[mname]:
                write_line(outm, ' '.join(mmods[mname]))
        outr.close()
        outrm.close()
        outm.close()

        shutil.copy(
            '%s/metab_modules_%s.txt' % (out_dir, thr),
            '%s/final_modules_%s.txt' %
            (out_dir,
             thr))  # TODO: which file r/m should be selected as final modules?
예제 #7
0
파일: sridharan.py 프로젝트: emdadi/CAMND
def go(species, only_cut_dendogram=False):
    need_biomass_removal = False and my_constants.species_artificial_biomass[
        species]

    method_dir = r'%s/method/sridharan' % my_constants.basePath
    out_dir = r'%s/%s/sridharan' % (my_constants.resultPath, species)
    my_util.mkdir_p(out_dir)

    source_file = '%s/dataset/networks/%s' % (
        my_constants.basePath, my_constants.species_sbml[species])

    S, mets, rxns, revs, met_names, rxn_names, biomass, met_comparts = importer.sbmlStoichiometricMatrix(
        source_file,
        True,
        read_species_compart=True,
        remove_biomass=need_biomass_removal,
        normalize_stoich=False)

    if not only_cut_dendogram:
        f = open('sridharan.m', 'w')
        write_line(f, 'addpath %s/code' % method_dir)
        write_line(f, "model = readCbModel('%s')" % source_file)
        if need_biomass_removal:
            for l in my_util.get_remove_reaction_matlab_script(
                    'model', biomass):
                write_line(f, l)
        write_line(
            f, '[mods, mods_hier] = Shred_Network_plos2011(model, false);')
        write_line(f, 'save sridharan.mat mods mods_hier')

        f.close()

        my_util.prepare_matlab_file_and_exec_and_wait_finish(
            'sridharan', 'sridharan.mat', False)

        shutil.copy('sridharan.mat', out_dir)

    modules_hierarchy, mods_mets, mods_rxns, raw_tree_height = read_module_hierarchy(
        '%s/sridharan.mat' % out_dir, mets, rxns)

    # TODO: very bad job!!!!
    # NOTE: mets are removed because their modules share metabolites!!!
    dummy_tree, tree_height, dummy_thresholds = read_hierarchical_decomposition_sridharan(
        species, '%s/sridharan.mat' % out_dir, mets, rxns, is_mets=False)

    def dendogram_height(hierarchy_height):
        return tree_height - hierarchy_height + 1

    cut_heights = eval(open('%s/cut_heights.txt' % method_dir, 'r').read())
    for thrr in cut_heights[species]:

        # TODO: very bad job
        thr_height = thrr * tree_height

        all_modularized_observations = set()
        cut_mods = []
        # to cut, find all modules above below cut height whose super above cut height
        # also all other observations are added as remaining modules (observations are not in module_hierarchy as one-sized-module and so should be specially treated)
        for m_idx, m_sup_sub in enumerate(modules_hierarchy):
            module_height = m_sup_sub[2]
            super_module_height = modules_hierarchy[m_sup_sub[0]][
                2] if m_sup_sub[0] is not None else tree_height + 1
            if dendogram_height(
                    module_height) <= thr_height < dendogram_height(
                        super_module_height):
                cut_mods.append(mods_rxns[m_idx])
                all_modularized_observations.update(mods_rxns[m_idx])

        for r in rxns:
            if r not in all_modularized_observations:
                cut_mods.append([r])

        outr = open('%s/react_modules_%s.txt' % (out_dir, thrr), 'w')
        # outm = open('%s/metab_modules_%s.txt' % (out_dir, thrr), 'w')
        # for rmod, mmod in zip(cut_mods_rxns, cut_mods_mets):
        for rmod in cut_mods:
            write_line(outr, ' '.join(rmod))
            # write_line(outm, ' '.join(mmod))
        outr.close()
        # outm.close()

        shutil.copy(
            '%s/react_modules_%s.txt' % (out_dir, thrr),
            '%s/final_modules_%s.txt' % (out_dir, thrr)
        )  # TODO: which file r/m should be selected as final modules?
예제 #8
0
        cached_metabolite_similarities[(m1, m2, species, factor1)] = sim

        metabolite_pair_similarity_done += 1
        return sim


def compute_chebi_distance(inf, species, type, is_rmod, is_partial_module, S, mets, reacts, revs, chebi_loaded_model):  # type either of cc/bp/bf
    similarity_table, table_compound_order = get_cached_compound_similarities(species, type)
    return do_compute_distance(cached_metabolite_similarities, similarity_table, table_compound_order, inf, species, type, is_rmod, is_partial_module, S, mets, reacts, revs, chebi_loaded_model)


if __name__ == '__main__':
    # compute_stats_for_all_species('%s/evaluation/gossto/stats_final.txt' % my_constants.basePath, '%s/evaluation/gossto/stats_reaction_types.txt' % my_constants.basePath)
    #
    # if True:
    #     exit()

    species = 'ecoli_core'

    out_dir = r'%s/%s/newman' % (my_constants.resultPath, species)

    src_file = r'%s/dataset/networks/%s' % (my_constants.basePath, my_constants.species_sbml[species])

    S, mets, rxns, revs, met_names, rxn_names, biomass, met_comparts = importer.sbmlStoichiometricMatrix(src_file, True, read_species_compart=True)

    reader = libsbml.SBMLReader()
    doc = reader.readSBML(src_file)
    chebi_loaded_model = ChebiLoadedModel([doc.getModel()])

    print compute_chebi_distance('%s/final_modules.txt' % out_dir, species, 'mf', False, False, S, mets, rxns, revs, chebi_loaded_model)
예제 #9
0
def compute_stats_for_all_species(res_path, reaction_types_res_path, species_filter=None):
    res = open(res_path, 'w')
    res.write('species\ttype\tall\tnot enzymatic\tundefined enzymes\tmissed: from sbml to simtbl\tdisrupted: from sbml to simtbl\trescued by EC Number\n')

    reaction_types_res = open(reaction_types_res_path, 'w')

    for species, species_file in my_constants.species_sbml.iteritems():
        if species_filter and species not in species_filter:
            continue

        reaction_types = {}
        species_row_counter = 0

        for type in ['mf', 'bp', 'cc']:
            for restric_to_reliable_genes in [False, True]:
                by_reaction_type_classifier = []

                try:
                    similarity_table, table_gene_order = get_cached_similarity_tables(species, type, restric_to_reliable_genes)
                except:
                    traceback.print_exc()
                    continue

                S, mets, rxns, revs, met_names, rxn_names, biomass, met_comparts = importer.sbmlStoichiometricMatrix(r'%s/dataset/networks/%s' % (my_constants.basePath, species_file), True, read_species_compart=True)

                src_file = r'%s/dataset/networks/%s' % (my_constants.basePath, species_file)
                reader = libsbml.SBMLReader()
                doc = reader.readSBML(src_file)
                go_loaded_model = eval_util.GoLoadedModel(doc.getModel(), species)
                moredata_loaded_model = eval_util.MoreDataLoadedModel(species)

                all_rxns, not_enzymatic, undefined_gpd, disrupted, missed, ok, rescued = 0, 0, 0, 0, 0, 0, 0
                for ri, r in enumerate(rxns):
                    all_rxns += 1
                    enz_mapping_state, enz_grp = get_even_ecnumber_annotation_term_ids_for_reaction(r, species, 'go_distance', go_loaded_model, moredata_loaded_model, stats_mode=True)
                    # enz_mapping_state, enz_grp = eval_util.get_annotation_term_ids_for_reaction(r, species, 'go_distance', go_loaded_model, stats_mode=True)

                    # enz_all = accumulate_all_enzymes_altogether(enz_grp)
                    if enz_mapping_state == -1:
                        decided_reaction_type = -1
                        not_enzymatic += 1
                    elif enz_mapping_state == -2:
                        decided_reaction_type = -2
                        undefined_gpd += 1
                    elif enz_mapping_state == -3:
                        decided_reaction_type = -3
                        missed += 1
                    elif enz_mapping_state == -5:
                        decided_reaction_type = -5
                        rescued += 1
                    else:
                        enz_all = accumulate_all_enzymes_altogether(enz_grp)
                        if all([e not in similarity_table for e in enz_all]):
                            decided_reaction_type = -3
                            missed += 1
                        elif any([e not in similarity_table for e in enz_all]):     # enz_mapping_state == -4:
                            decided_reaction_type = -4
                            disrupted += 1
                        else:
                            decided_reaction_type = 0
                            ok += 1

                    reaction_type = re.findall('[a-z]+$|^R_EX_|^R_DM_|t2$', r)
                    if reaction_type:
                        rt = reaction_type[0]
                    else:
                        rt = 'unknown'
                    if rt not in reaction_types:
                        reaction_types[rt] = set()
                    reaction_types[rt].add((r, decided_reaction_type, tuple([m for mi, m in enumerate(mets) if S[mi][ri] < 0]), tuple([m for mi, m in enumerate(mets) if S[mi][ri] > 0])))
                    by_reaction_type_classifier.append((decided_reaction_type, rt))

                # for r1 in rxns:
                #     for r2 in rxns:
                #         all += 1
                #         code, desc = compute_similarity_of_gene_pair(r1, r2, similarity_table, species, type, restric_to_reliable_genes, go_loaded_model, stats_mode=True)
                #         if code == 0:
                #             ok += 1
                #         elif code == -1:
                #             not_enzymatic += 1
                #         elif code == -2:
                #             disrupted += 1
                #         elif code == -3:
                #             missed += 1
                #         else:
                #             print 'ERROR: unknown go_distance:compute_similarity_of_gene_pair status code!'
                #             exit(1)
                # res.write('%s\t%s\t%d\t%d\t%d\t%d\n' % (species, '%s_%s' % (type.lower(), 'g' if restric_to_reliable_genes else 'f'), all_rxns, not_enzymatic, disrupted, missed))

                ne_distribution = {}
                for ft in reaction_types.keys():
                    ne_distribution[ft] = 0
                for rtc in by_reaction_type_classifier:
                    if rtc[0] == -1:
                        ne_distribution[rtc[1]] += 1

                found_types0 = sorted(list(reaction_types.keys()), key=lambda x: ne_distribution[x], reverse=True)
                found_types = []
                try:
                    rexi = found_types0.index('R_EX_')
                    found_types.append(found_types0[rexi])
                except:
                    pass

                try:
                    ti = found_types0.index('t')
                    found_types.append(found_types0[ti])
                except:
                    pass

                found_types.append('SUM OTHERS')
                
                sum_others = 0
                for ft in found_types0:
                    if ft not in {'R_EX_', 't'}:
                        found_types.append(ft)
                        sum_others += ne_distribution[ft]

                ne_distribution['SUM OTHERS'] = sum_others

                ne_distribution_line = ''
                if species_row_counter == 0:
                    ne_distribution_line = '\t'.join(found_types)
                elif species_row_counter == 1:
                    ne_distribution_line = '\t'.join([str(ne_distribution[k]) for k in found_types])

                species_row_counter += 1

                res.write('%s\t%s\t%d\t%d\t%d\t%d\t%d\t%d\t\t\t\t\t%s\n' % (species, '%s_%s' % (type.lower(), 'g' if restric_to_reliable_genes else 'f'), all_rxns, not_enzymatic, undefined_gpd, missed, disrupted, rescued, ne_distribution_line))

        reaction_types_res.write('\n--------------------------------------------FOR %s:\n' % species)
        reaction_types_res.write(' '.join(sorted(list(reaction_types.keys()))) + '\n')
        for k, v in reaction_types.iteritems():
            stat = reduce(lambda x, y: tuple(map(operator.add, x, y)), [(1, 0, 0, 0, 0, 0) if z[1] == -1 else (0, 1, 0, 0, 0, 0) if z[1] == -2 else (0, 0, 1, 0, 0, 0) if z[1] == -3 else (0, 0, 0, 1, 0, 0) if z[1] == -4 else (0, 0, 0, 0, 1, 0) if z[1] == -5 else (0, 0, 0, 0, 0, 1) for z in v])
            reaction_types_res.write(k + ': ne: %d, undef: %d, mis: %d, dis: %d, rescued: %d, ok: %d\n' % (stat[0], stat[1], stat[2], stat[3], stat[4], stat[5]))
            reaction_types_res.write('     ')
            pp.pprint(v, reaction_types_res)

    res.close()
    reaction_types_res.close()
예제 #10
0
def go(species, pseudo_species=None):
    method_dir = r'%s/method/verwoerd' % my_constants.basePath
    out_dir = r'%s/%s/verwoerd' % (my_constants.resultPath, species)
    my_util.mkdir_p(out_dir)

    source_file = '%s/dataset/networks/%s' % (
        my_constants.basePath, my_constants.species_sbml[species])

    S, mets, rxns, revs, met_names, rxn_names, biomass, met_comparts = importer.sbmlStoichiometricMatrix(
        source_file,
        True,
        read_species_compart=True,
        remove_biomass=True,
        normalize_stoich=True)

    external_strategy = eval(
        open('%s/external_strategy.txt' % method_dir, 'r').read())

    # both may either be with/without boundary reactions which is specified by the strategy
    with_boundary_reacts = {}
    with_boundary_mets = list(mets)
    with_boundary_comparts = list(met_comparts)

    next_boundary_met_idx = len(mets)
    if external_strategy[
            species] == 'KEGG_PSEUDO_NETWORK':  # this implies networks are built from kegg by merging pathways
        subs_to_reacts = {}
        prods_to_reacts = {}
        for j, r in enumerate(rxns):
            subs = []
            prods = []
            for i in range(len(S)):
                if S[i][j] > 0:
                    prods.append([S[i][j], i])  # [stoichiometry, met_name]
                elif S[i][j] < 0:
                    subs.append([-S[i][j], i])  # [stoichiometry, met_name]
            with_boundary_reacts[r] = [subs, prods]

            has_subs, has_prods = False, False
            for i in range(len(S)):
                if S[i][j] > 0 or (S[i][j] < 0 and revs[j] != 0):
                    if mets[i] not in prods_to_reacts:
                        prods_to_reacts[mets[i]] = set()
                    prods_to_reacts[mets[i]].add(r)
                    has_prods = True
                if S[i][j] < 0 or (S[i][j] > 0 and revs[j] != 0):
                    if mets[i] not in subs_to_reacts:
                        subs_to_reacts[mets[i]] = set()
                    subs_to_reacts[mets[i]].add(r)
                    has_subs = True

            if not has_subs or not has_prods:
                raise Exception(
                    'reaction without substrate/products in KEGG pseudo networks! should never happen!'
                )

        # checks whether the reaction is internal in presence of reversiblity
        def is_internal(met):
            if met not in prods_to_reacts or met not in subs_to_reacts:
                return False
            if prods_to_reacts[met] == subs_to_reacts[met] and len(
                    prods_to_reacts[met]) == 1:
                the_react = list(prods_to_reacts[met])[0]
                the_react_subs = [
                    mets[ss[1]] for ss in with_boundary_reacts[the_react][0]
                ]
                the_react_prods = [
                    mets[ss[1]] for ss in with_boundary_reacts[the_react][1]
                ]
                if not (met in the_react_subs
                        and met in the_react_prods):  # not polymeric
                    return False
            return True

        initial_external_metabolites_idxs = [
            mets.index(m) for m in with_boundary_mets if not is_internal(m)
        ]
        # internal_metabolites = [m for m in with_boundary_mets if m in mets_used_as_subs and m in mets_used_as_prods]
    else:
        # may add to with_boundary_mets based on strategy.
        # may update with_boundary_reacts according to newly added with_boundary_mets
        for j, r in enumerate(rxns):
            subs = []
            prods = []
            for i in range(len(S)):
                if S[i][j] > 0:
                    prods.append([S[i][j], i])
                elif S[i][j] < 0:
                    subs.append([-S[i][j], i])

            if not subs:
                raise Exception(
                    'reaction without substrate! should never happen!')
            elif not prods:
                if external_strategy[species] == 'BOUNDARY':
                    if len(subs) != 1:
                        raise Exception(
                            'EXCHANGE reaction with more than one substrate metabolite! unacceptable.'
                        )
                    new_boundary_met = mets[subs[0][1]] + '_BND'
                    if new_boundary_met in with_boundary_mets:
                        # print 'should happen only once for h**o recon 1 and once for ecoli ijo 1366!'
                        prods = [[
                            subs[0][0],
                            with_boundary_mets.index(new_boundary_met)
                        ]]
                    else:
                        with_boundary_mets.append(new_boundary_met)
                        with_boundary_comparts.append('B')
                        prods = [[subs[0][0], next_boundary_met_idx]]
                        next_boundary_met_idx += 1
                else:
                    print 'skipping reaction without product: EXCHANGE!'
                    continue

            with_boundary_reacts[r] = [subs, prods]

        # with_boundary_S = [[] for i in range(len(with_boundary_mets))]
        # for ri, r in rxns:
        #     r_def = with_boundary_reacts[r]
        #     for e in r_def[0]:  # substrates
        #         if e[0] != 0:

        if external_strategy[species] == 'BOUNDARY':
            initial_external_metabolites_idxs = [
                i for i, m in enumerate(with_boundary_mets)
                if with_boundary_mets[i].endswith('_BND')
            ]
        else:
            initial_external_metabolites_idxs = [
                i for i, m in enumerate(with_boundary_mets)
                if with_boundary_mets[i].endswith('_e')
            ]
            # internal_metabolites = [m for m in with_boundary_mets if m not in initial_external_metabolites]

    if pseudo_species:
        my_util.mkdir_p(pseudo_species)
        inf = open('%s/verwoerd.tsv' % pseudo_species, 'w')
    else:
        inf = open('verwoerd_%s.tsv' % species, 'w')
    write_line(inf, '<Title>')
    write_line(inf, species.replace('/', ''))
    write_line(inf, '<Reactions>')
    write_line(inf, '\t'.join(rxns))
    write_line(inf, '<ReversibleReactions>')
    write_line(inf, '\t'.join([r for i, r in enumerate(rxns) if revs[i] == 1]))
    write_line(inf, '<InternalCompounds>')
    # write_line(inf, '\t'.join(['%s' % (with_boundary_mets[i]) for i in range(len(with_boundary_mets))]))
    write_line(
        inf, '\t'.join([
            '%s %s' % (with_boundary_mets[i], with_boundary_comparts[i])
            for i in range(len(with_boundary_mets))
        ]))
    write_line(inf, '<ExternalCompounds>')
    # write_line(inf, '\t'.join(['%s %s' % (with_boundary_mets[i], with_boundary_comparts[i]) for i in initial_external_metabolites_idxs]))
    write_line(inf, '<Stoichiometry>')
    write_line(inf, '%%MatrixMarket matrix coordinate real general')

    s_lines = []
    nnz_s = 0
    for ri, r in enumerate(rxns):
        if r not in with_boundary_reacts:  # exchange reactions
            continue

        rdef = with_boundary_reacts[r]
        for e in rdef[0]:
            s_lines.append('%d\t%d\t%s' % (e[1] + 1, ri + 1, -e[0]))
            nnz_s += 1
        for e in rdef[1]:
            s_lines.append('%d\t%d\t%s' % (e[1] + 1, ri + 1, e[0]))
            nnz_s += 1

    write_line(
        inf, '\t\t\t%d\t%d\t%d' %
        (len(with_boundary_mets), len(with_boundary_reacts), nnz_s))
    for sl in s_lines:
        write_line(inf, sl)

    inf.close()

    if pseudo_species:
        inf = open('%s/verwoerd-ext.txt' % pseudo_species, 'w')
    else:
        inf = open('verwoerd-ext_%s.txt' % species, 'w')
    for init_extern_idx in initial_external_metabolites_idxs:
        write_line(inf, with_boundary_mets[init_extern_idx])
    inf.close()
예제 #11
0
def go(species, pseudo_species=None):
    method_dir = r'%s/method/verwoerd' % my_constants.basePath
    out_dir = r'%s/%s/verwoerd' % (my_constants.resultPath, species)
    my_util.mkdir_p(out_dir)

    source_file = '%s/dataset/networks/%s' % (
        my_constants.basePath, my_constants.species_sbml[species])

    S, mets, rxns, revs, met_names, rxn_names, biomass, met_comparts = importer.sbmlStoichiometricMatrix(
        source_file,
        True,
        read_species_compart=True,
        remove_biomass=True,
        normalize_stoich=True)

    # for each species, this may be defined or simply be '' which is ignored
    thresholds = eval(open('%s/thresholds.txt' % method_dir, 'r').read())

    for thr in thresholds[species]:
        if pseudo_species:
            manual_results_dir = '%s/Subnetworks_%s' % (pseudo_species, thr)
        else:
            manual_results_dir = '%s_%s' % (species, thr)

        rmods = {}
        mmods = {}

        def read_module_and_move(resf_path, i):
            rmod, mmod = read_result_file(resf_path)
            rmods['%d' % i] = rmod
            mmods['%d' % i] = mmod

            int_out_dir_path = '%s/subsystem_%s_%d.out' % (out_dir, thr, i)
            shutil.copy(resf_path, int_out_dir_path)

        if pseudo_species:
            run_for_all_template_files(
                manual_results_dir + '/' + species.replace('/', '') +
                '_Block_%d.tsv', read_module_and_move)
        else:
            run_for_all_template_files(
                manual_results_dir + '/' + species + '_Block_%d.tsv',
                read_module_and_move)

        outr = open('%s/react_modules_%s.txt' % (out_dir, thr), 'w')
        outrm = open('%s/metab_react_modules_%s.txt' % (out_dir, thr), 'w')
        outm = open('%s/metab_modules_%s.txt' % (out_dir, thr), 'w')
        for mname, rmod in rmods.iteritems():
            write_line(outr, ' '.join(rmod))
            write_line(outrm, ' '.join(mmods[mname]))
            if mmods[mname]:
                write_line(outm, ' '.join(mmods[mname]))
        outr.close()
        outrm.close()
        outm.close()

        shutil.copy(
            '%s/metab_modules_%s.txt' % (out_dir, thr),
            '%s/final_modules_%s.txt' %
            (out_dir,
             thr))  # TODO: which file r/m should be selected as final modules?