def go(species): method_dir = r'%s/method/muller2_new' % my_constants.basePath out_dir = r'%s/%s/muller2_new' % (my_constants.resultPath, species) my_util.mkdir_p(out_dir) source_file = '%s/dataset/networks/%s' % ( my_constants.basePath, my_constants.species_sbml[species]) S, mets, rxns, revs, met_names, rxn_names, biomass, met_comparts = importer.sbmlStoichiometricMatrix( source_file, True, read_species_compart=True, remove_biomass=False, normalize_stoich=False) f = open('muller2_new.m', 'w') write_line(f, 'addpath %s/code' % method_dir) write_line(f, "model = readCbModel('%s')" % source_file) write_line(f, "model.c(%d) = 1;" % (rxns.index(biomass[0]) + 1)) write_line(f, "changeCobraSolver('glpk');") write_line(f, '[modules, var, flux] = computeModulesOpt( model );') write_line(f, 'save muller2_newout.mat modules var flux') f.close() my_util.prepare_matlab_file_and_exec_and_wait_finish( 'muller2_new', 'muller2_newout.mat', False) res_vars = my_util.try_load_matlab_result('muller2_newout.mat') raw_modules = res_vars[ 'modules'] # if matlab has failed, this will throw exception! shutil.copy('muller2_newout.mat', out_dir) raw_modules = raw_modules.T.tolist( ) # eacho row will be a module where reactions are marked modules = [] for raw_module in raw_modules: modules.append([]) for rIdx, in_module in enumerate(raw_module): if in_module == 1: modules[-1].append(rxns[rIdx]) out = open('%s/final_modules.txt' % out_dir, 'w') out.write( "#each row is a module of reactions. not all reactions are specified (nature of this method only select some reaction to be modules)\n" ) for m in modules: out.write(' '.join(m)) out.write('\n') out.close()
def go(species): out_dir = r'%s/%s/guimera' % (my_constants.resultPath, species) my_util.mkdir_p(out_dir) S, mets, rxns, revs, met_names, rxn_names, biomass, met_comparts = importer.sbmlStoichiometricMatrix( r'%s/dataset/networks/%s' % (my_constants.basePath, my_constants.species_sbml[species]), True, read_species_compart=True, remove_biomass=True, normalize_stoich=True) # graph is represented as list of edges between metabs grph = my_util.graph_by_explode_reactions_to_complete_bipartite(S, mets) grphIdx = [(mets.index(m1), mets.index(m2)) for (m1, m2) in grph] grphIdx.sort() inf = open('guimera.in', 'w') for eIdx in grphIdx: inf.write('%d %d\n' % (eIdx[0], eIdx[1])) inf.close() # netcarto_cl net_file_name seed T_ini iteration_factor cooling_factor # T_ini, iteration_factor, and cooling_factor can be set to -1 to use the defaults (2/size_of_network, 1.0, and 0.995, respectively). res = os.system('netcarto_cl guimera.in %d -1 -1 -1 0') shutil.copy('modules.dat', out_dir) shutil.copy('roles.dat', out_dir) out = open('%s/final_modules.txt' % out_dir, 'w') out.write('# each line one module!\n') netcarto_outf = open('modules.dat', 'r') for l in netcarto_outf: didx = l.find('---') if didx != -1: l = l[didx + len('---'):].strip() mIdxs = l.split(' ') out.write(' '.join([mets[int(i)] for i in mIdxs])) out.write('\n') netcarto_outf.close() out.close()
def go(species): out_dir = r'%s/%s/newman' % (my_constants.resultPath, species) my_util.mkdir_p(out_dir) S, mets, rxns, revs, met_names, rxn_names, biomass, met_comparts = importer.sbmlStoichiometricMatrix( r'%s/dataset/networks/%s' % (my_constants.basePath, my_constants.species_sbml[species]), True, read_species_compart=True, remove_biomass=True) # graph is represented as list of edges between metabs grph = my_util.graph_by_explode_reactions_to_complete_bipartite(S, mets) grphIdx = [(mets.index(m1), mets.index(m2)) for (m1, m2) in grph] grphIdx.sort() inf = open('newman.in', 'w') for eIdx in grphIdx: inf.write('%d,%d,1\n' % (eIdx[0], eIdx[1])) inf.close() correctedcmty.main(['DUMMY', 'newman.in', 'newman.out']) shutil.copy('newman.out', out_dir) resf = open('newman.out', 'r').read() stidx = resf.rindex('START_COMP') edidx = resf.rindex('END_COMP') res = resf[stidx + len('START_COMP'):edidx] res_lines = res.split('\n') out = open('%s/final_modules.txt' % out_dir, 'w') out.write('# each line one module!\n') for l in res_lines: l = l.strip() if l == '': continue mIdxs = eval(l) out.write(' '.join([mets[int(i)] for i in mIdxs])) out.write('\n') out.close()
ds = 0 for m in mod_metabs: # metab_row = S[mets.index(m)] # ds += sum([1 for ri in metab_row if ri != 0]) ds += len(metab_edges[m]) # TODO: for hyperarcs this causes a reaction to be counted more than once for a module sum_term = ls * 1.0 / total_links - pow(ds / (2.0 * total_links), 2) if mod_name == eval_constants.EXTERNAL_MODULE_ID: modularity -= sum_term else: modularity += sum_term return modularity if __name__ == '__main__': # species = 'toy_model' import cPickle as pickle for species in my_constants.species_sbml.keys(): # species = 'ecoli_iaf1260' out_dir = r'%s/%s/newman' % (my_constants.resultPath, species) print species S, mets, rxns, revs, met_names, rxn_names, biomass, met_comparts = importer.sbmlStoichiometricMatrix(r'%s/dataset/networks/%s' % (my_constants.basePath, my_constants.species_sbml[species]), True, read_species_compart=True) # pickle.dump([S, mets, rxns, revs, met_names, rxn_names, biomass, met_comparts], open(r'D:\University\DiseaseSim\zsh\MSB\%s.pkl' % species, 'wb')) # aS, amets, arxns, arevs, amet_names, arxn_names, abiomass, amet_comparts = pickle.load(open(r'D:\University\DiseaseSim\zsh\MSB\ecoli_iaf1260.pkl', 'rb')) # print S == aS, mets == amets, revs == arevs, amet_names == met_names, abiomass == biomass, amet_comparts == met_comparts, arxns == rxns # print compute_modularity('%s/final_modules.txt' % out_dir, False, True, False, S, mets, rxns)
def go(species, only_cut_dendogram=False): method_dir = r'%s/method/holme' % my_constants.basePath out_dir = r'%s/%s/holme' % (my_constants.resultPath, species) my_util.mkdir_p(out_dir) S, mets, rxns, revs, met_names, rxn_names, biomass, met_comparts = importer.sbmlStoichiometricMatrix( r'%s/dataset/networks/%s' % (my_constants.basePath, my_constants.species_sbml[species]), True, read_species_compart=True, remove_biomass=True) if not only_cut_dendogram: # graph is a bipartite representation of the network. should be written to file with format: # (1.) each line represents a directed link: 'from' 'to' # (2.) substances are enumerated 1, 2, . . . # (3.) reaction nodes are enumerated 1000000, 1000001, . . . METAB_OFFS = 1 REACT_OFFS = 1000000 lines = [] for i, row in enumerate(S): for j, col in enumerate(S[i]): if col < 0: # metab i is consumed by react j lines.append((i + METAB_OFFS, j + REACT_OFFS)) elif col > 0: # metab i is produced by react j lines.append((j + REACT_OFFS, i + METAB_OFFS)) my_util.mkdir_p('inp') shutil.copy('%s/inp/cmds' % method_dir, 'inp/') shutil.copy('%s/orgnames' % method_dir, './') my_util.mkdir_p('cell') lines.sort() inf = open('cell/ho.dat', 'w') for l in lines: inf.write('%d %d\n' % (l[0], l[1])) inf.close() inf = open('cell/ho.nam', 'w') for l in mets: inf.write('%s\n' % l) inf.close() # hi cmd_file_name # cmd_file_name will be a filename located in inp/ my_util.mkdir_p('data') res = os.system('hi cmds') shutil.copy('data/cellho', out_dir) resf = open('%s/cellho' % out_dir, 'rb') levels = [] finish = False while not finish: levels.append([]) for i in range(len(mets)): x = read_int(resf) if x is None: finish = True break levels[-1].append(x) levels.pop() resf.close() if not only_cut_dendogram: # outputs: wpgma tree, cut at proper height? out = open('%s/dendogram.py' % out_dir, 'w') out.write( "#print tree is 2d list. each entry is the result of algorithm in one iteration. for each iteration there is a list of cluster-index for each metabolite\n" ) out.write( "#print e.g. for the first level all values are 1 (meaning that no split is still done) and in the last level metabolites are numbered from 1 to len(mets)\n" ) out.write("\n") out.write("tree = " + str(levels)) out.close() # TODO: very bad job!!!! dummy_tree, tree_height, dummy_thresholds = read_hierarchical_decomposition_holme( species, '%s/dendogram.py' % out_dir) cts = eval(open('%s/cut_iterations.txt' % method_dir, 'r').read()) for l in cts[species]: # TODO: very bad job # iter = int(l * len(levels)) iter = int(tree_height - int(l * tree_height) + 1) level_iter = levels[iter] modules = {} for i, mmod in enumerate(level_iter): if mmod not in modules: modules[mmod] = [] modules[mmod].append(i) out = open('%s/final_modules_%s.txt' % (out_dir, l), 'w') for midx in range(1, len(modules) + 1): out.write(' '.join([mets[s] for s in modules[midx]])) out.write('\n') out.close()
def go(species): method_dir = r'%s/method/schuster' % my_constants.basePath out_dir = r'%s/%s/schuster' % (my_constants.resultPath, species) my_util.mkdir_p(out_dir) source_file = '%s/dataset/networks/%s' % ( my_constants.basePath, my_constants.species_sbml[species]) S, mets, rxns, revs, met_names, rxn_names, biomass, met_comparts = importer.sbmlStoichiometricMatrix( source_file, True, read_species_compart=True, remove_biomass=True, normalize_stoich=True) external_strategy = eval( open('%s/external_strategy.txt' % method_dir, 'r').read()) # both may either be with/without boundary reactions which is specified by the strategy # these will be used to create schuster.in file which is used by the method as the input network with_boundary_reacts = {} with_boundary_mets = list(mets) if external_strategy[ species] == 'KEGG_PSEUDO_NETWORK': # this implies networks are built from kegg by merging pathways subs_to_reacts = {} prods_to_reacts = {} for j, r in enumerate(rxns): subs = [] prods = [] for i in range(len(S)): if S[i][j] > 0: prods.append([S[i][j], mets[i]]) # [stoichiometry, met_name] elif S[i][j] < 0: subs.append([-S[i][j], mets[i]]) # [stoichiometry, met_name] with_boundary_reacts[r] = [subs, prods] has_subs, has_prods = False, False for i in range(len(S)): if S[i][j] > 0 or (S[i][j] < 0 and revs[j] != 0): if mets[i] not in prods_to_reacts: prods_to_reacts[mets[i]] = set() prods_to_reacts[mets[i]].add(r) has_prods = True if S[i][j] < 0 or (S[i][j] > 0 and revs[j] != 0): if mets[i] not in subs_to_reacts: subs_to_reacts[mets[i]] = set() subs_to_reacts[mets[i]].add(r) has_subs = True if not has_subs or not has_prods: raise Exception( 'reaction without substrate/products in KEGG pseudo networks! should never happen!' ) # checks whether the reaction is internal in presence of reversiblity def is_internal(met): if met not in prods_to_reacts or met not in subs_to_reacts: return False if prods_to_reacts[met] == subs_to_reacts[met] and len( prods_to_reacts[met]) == 1: the_react = list(prods_to_reacts[met])[0] the_react_subs = [ ss[1] for ss in with_boundary_reacts[the_react][0] ] the_react_prods = [ ss[1] for ss in with_boundary_reacts[the_react][1] ] if not (met in the_react_subs and met in the_react_prods): # not polymeric return False return True internal_metabolites = [ m for m in with_boundary_mets if is_internal(m) ] initial_external_metabolites = [ m for m in with_boundary_mets if m not in internal_metabolites ] else: # may add to with_boundary_mets based on strategy. # may update with_boundary_reacts according to newly added with_boundary_mets for j, r in enumerate(rxns): subs = [] prods = [] for i in range(len(S)): if S[i][j] > 0: prods.append([S[i][j], mets[i]]) # [stoichiometry, met_name] elif S[i][j] < 0: subs.append([-S[i][j], mets[i]]) # [stoichiometry, met_name] # this strategy means reactions without product are boundary reactions # note that: reversibility could potentially be problematic BUT I HAVE CHECKED BIGG NETWORKS, THESE KIND OF REACTIONS ARE REAL OUTSIDE BOUNDARY if not subs: raise Exception( 'reaction without substrate! should never happen!') elif not prods: if external_strategy[species] == 'BOUNDARY': if len(subs) != 1: raise Exception( 'EXCHANGE reaction with more than one substrate metabolite! unacceptable.' ) new_boundary_met = subs[0][1] + '_BND' if new_boundary_met not in with_boundary_mets: # print 'the only exceptions are once for h**o recon 1 and once for ecoli ijo 1366!' with_boundary_mets.append(new_boundary_met) prods = [[subs[0][0], new_boundary_met]] else: print 'skipping reaction without product: EXCHANGE!' continue with_boundary_reacts[r] = [subs, prods] if external_strategy[species] == 'BOUNDARY': initial_external_metabolites = [ m for i, m in enumerate(with_boundary_mets) if with_boundary_mets[i].endswith('_BND') ] internal_metabolites = [ m for m in with_boundary_mets if m not in initial_external_metabolites ] else: initial_external_metabolites = [ m for i, m in enumerate(with_boundary_mets) if with_boundary_mets[i].endswith('_e') ] internal_metabolites = [ m for m in with_boundary_mets if m not in initial_external_metabolites ] inf = open('schuster.in', 'w') write_line(inf, '-ENZREV') write_line(inf, ' '.join([r for i, r in enumerate(rxns) if revs[i] == 1])) write_line(inf, '') write_line(inf, '-ENZIRREV') write_line(inf, ' '.join([r for i, r in enumerate(rxns) if revs[i] != 1])) write_line(inf, '') write_line(inf, '-METINT') write_line(inf, ' '.join(internal_metabolites)) write_line(inf, '') write_line(inf, '-METEXT') write_line(inf, ' '.join(initial_external_metabolites)) write_line(inf, '') write_line(inf, '-CAT') for r_name, r in with_boundary_reacts.iteritems(): react_str = '%s : %s = %s .' % (r_name, ' + '.join([ '%d %s' % (p[0], p[1]) for p in r[0] ]), ' + '.join(['%d %s' % (p[0], p[1]) for p in r[1]])) write_line(inf, react_str) inf.close() # for each species, this may be defined or simply be '' which is ignored thresholds = eval(open('%s/thresholds.txt' % method_dir, 'r').read()) for thr in thresholds[species]: run_for_all_template_files('subsystem%d.out', os.remove) run_for_all_template_files(out_dir + '/subsystem%d.out', os.remove) # subnet file_name if my_constants.win: res = os.system('%s/src/subnet.exe schuster.in %s' % (method_dir, thr)) else: res = os.system('subnet schuster.in %s' % thr) rmods = {} mmods = {} def read_module_and_move(resf_path, i): rmod, mmod = read_result_file(resf_path) rmods['%d' % i] = rmod mmods['%d' % i] = mmod int_out_dir_path = '%s/subsystem_%s_%d.out' % (out_dir, thr, i) shutil.copy(resf_path, int_out_dir_path) run_for_all_template_files('subsystem%d.out', read_module_and_move) outr = open('%s/react_modules_%s.txt' % (out_dir, thr), 'w') outrm = open('%s/metab_react_modules_%s.txt' % (out_dir, thr), 'w') outm = open('%s/metab_modules_%s.txt' % (out_dir, thr), 'w') for mname, rmod in rmods.iteritems(): write_line(outr, ' '.join(rmod)) write_line(outrm, ' '.join(mmods[mname])) if mmods[mname]: write_line(outm, ' '.join(mmods[mname])) outr.close() outrm.close() outm.close() shutil.copy( '%s/metab_modules_%s.txt' % (out_dir, thr), '%s/final_modules_%s.txt' % (out_dir, thr)) # TODO: which file r/m should be selected as final modules?
def go(species, only_cut_dendogram=False): need_biomass_removal = False and my_constants.species_artificial_biomass[ species] method_dir = r'%s/method/sridharan' % my_constants.basePath out_dir = r'%s/%s/sridharan' % (my_constants.resultPath, species) my_util.mkdir_p(out_dir) source_file = '%s/dataset/networks/%s' % ( my_constants.basePath, my_constants.species_sbml[species]) S, mets, rxns, revs, met_names, rxn_names, biomass, met_comparts = importer.sbmlStoichiometricMatrix( source_file, True, read_species_compart=True, remove_biomass=need_biomass_removal, normalize_stoich=False) if not only_cut_dendogram: f = open('sridharan.m', 'w') write_line(f, 'addpath %s/code' % method_dir) write_line(f, "model = readCbModel('%s')" % source_file) if need_biomass_removal: for l in my_util.get_remove_reaction_matlab_script( 'model', biomass): write_line(f, l) write_line( f, '[mods, mods_hier] = Shred_Network_plos2011(model, false);') write_line(f, 'save sridharan.mat mods mods_hier') f.close() my_util.prepare_matlab_file_and_exec_and_wait_finish( 'sridharan', 'sridharan.mat', False) shutil.copy('sridharan.mat', out_dir) modules_hierarchy, mods_mets, mods_rxns, raw_tree_height = read_module_hierarchy( '%s/sridharan.mat' % out_dir, mets, rxns) # TODO: very bad job!!!! # NOTE: mets are removed because their modules share metabolites!!! dummy_tree, tree_height, dummy_thresholds = read_hierarchical_decomposition_sridharan( species, '%s/sridharan.mat' % out_dir, mets, rxns, is_mets=False) def dendogram_height(hierarchy_height): return tree_height - hierarchy_height + 1 cut_heights = eval(open('%s/cut_heights.txt' % method_dir, 'r').read()) for thrr in cut_heights[species]: # TODO: very bad job thr_height = thrr * tree_height all_modularized_observations = set() cut_mods = [] # to cut, find all modules above below cut height whose super above cut height # also all other observations are added as remaining modules (observations are not in module_hierarchy as one-sized-module and so should be specially treated) for m_idx, m_sup_sub in enumerate(modules_hierarchy): module_height = m_sup_sub[2] super_module_height = modules_hierarchy[m_sup_sub[0]][ 2] if m_sup_sub[0] is not None else tree_height + 1 if dendogram_height( module_height) <= thr_height < dendogram_height( super_module_height): cut_mods.append(mods_rxns[m_idx]) all_modularized_observations.update(mods_rxns[m_idx]) for r in rxns: if r not in all_modularized_observations: cut_mods.append([r]) outr = open('%s/react_modules_%s.txt' % (out_dir, thrr), 'w') # outm = open('%s/metab_modules_%s.txt' % (out_dir, thrr), 'w') # for rmod, mmod in zip(cut_mods_rxns, cut_mods_mets): for rmod in cut_mods: write_line(outr, ' '.join(rmod)) # write_line(outm, ' '.join(mmod)) outr.close() # outm.close() shutil.copy( '%s/react_modules_%s.txt' % (out_dir, thrr), '%s/final_modules_%s.txt' % (out_dir, thrr) ) # TODO: which file r/m should be selected as final modules?
cached_metabolite_similarities[(m1, m2, species, factor1)] = sim metabolite_pair_similarity_done += 1 return sim def compute_chebi_distance(inf, species, type, is_rmod, is_partial_module, S, mets, reacts, revs, chebi_loaded_model): # type either of cc/bp/bf similarity_table, table_compound_order = get_cached_compound_similarities(species, type) return do_compute_distance(cached_metabolite_similarities, similarity_table, table_compound_order, inf, species, type, is_rmod, is_partial_module, S, mets, reacts, revs, chebi_loaded_model) if __name__ == '__main__': # compute_stats_for_all_species('%s/evaluation/gossto/stats_final.txt' % my_constants.basePath, '%s/evaluation/gossto/stats_reaction_types.txt' % my_constants.basePath) # # if True: # exit() species = 'ecoli_core' out_dir = r'%s/%s/newman' % (my_constants.resultPath, species) src_file = r'%s/dataset/networks/%s' % (my_constants.basePath, my_constants.species_sbml[species]) S, mets, rxns, revs, met_names, rxn_names, biomass, met_comparts = importer.sbmlStoichiometricMatrix(src_file, True, read_species_compart=True) reader = libsbml.SBMLReader() doc = reader.readSBML(src_file) chebi_loaded_model = ChebiLoadedModel([doc.getModel()]) print compute_chebi_distance('%s/final_modules.txt' % out_dir, species, 'mf', False, False, S, mets, rxns, revs, chebi_loaded_model)
def compute_stats_for_all_species(res_path, reaction_types_res_path, species_filter=None): res = open(res_path, 'w') res.write('species\ttype\tall\tnot enzymatic\tundefined enzymes\tmissed: from sbml to simtbl\tdisrupted: from sbml to simtbl\trescued by EC Number\n') reaction_types_res = open(reaction_types_res_path, 'w') for species, species_file in my_constants.species_sbml.iteritems(): if species_filter and species not in species_filter: continue reaction_types = {} species_row_counter = 0 for type in ['mf', 'bp', 'cc']: for restric_to_reliable_genes in [False, True]: by_reaction_type_classifier = [] try: similarity_table, table_gene_order = get_cached_similarity_tables(species, type, restric_to_reliable_genes) except: traceback.print_exc() continue S, mets, rxns, revs, met_names, rxn_names, biomass, met_comparts = importer.sbmlStoichiometricMatrix(r'%s/dataset/networks/%s' % (my_constants.basePath, species_file), True, read_species_compart=True) src_file = r'%s/dataset/networks/%s' % (my_constants.basePath, species_file) reader = libsbml.SBMLReader() doc = reader.readSBML(src_file) go_loaded_model = eval_util.GoLoadedModel(doc.getModel(), species) moredata_loaded_model = eval_util.MoreDataLoadedModel(species) all_rxns, not_enzymatic, undefined_gpd, disrupted, missed, ok, rescued = 0, 0, 0, 0, 0, 0, 0 for ri, r in enumerate(rxns): all_rxns += 1 enz_mapping_state, enz_grp = get_even_ecnumber_annotation_term_ids_for_reaction(r, species, 'go_distance', go_loaded_model, moredata_loaded_model, stats_mode=True) # enz_mapping_state, enz_grp = eval_util.get_annotation_term_ids_for_reaction(r, species, 'go_distance', go_loaded_model, stats_mode=True) # enz_all = accumulate_all_enzymes_altogether(enz_grp) if enz_mapping_state == -1: decided_reaction_type = -1 not_enzymatic += 1 elif enz_mapping_state == -2: decided_reaction_type = -2 undefined_gpd += 1 elif enz_mapping_state == -3: decided_reaction_type = -3 missed += 1 elif enz_mapping_state == -5: decided_reaction_type = -5 rescued += 1 else: enz_all = accumulate_all_enzymes_altogether(enz_grp) if all([e not in similarity_table for e in enz_all]): decided_reaction_type = -3 missed += 1 elif any([e not in similarity_table for e in enz_all]): # enz_mapping_state == -4: decided_reaction_type = -4 disrupted += 1 else: decided_reaction_type = 0 ok += 1 reaction_type = re.findall('[a-z]+$|^R_EX_|^R_DM_|t2$', r) if reaction_type: rt = reaction_type[0] else: rt = 'unknown' if rt not in reaction_types: reaction_types[rt] = set() reaction_types[rt].add((r, decided_reaction_type, tuple([m for mi, m in enumerate(mets) if S[mi][ri] < 0]), tuple([m for mi, m in enumerate(mets) if S[mi][ri] > 0]))) by_reaction_type_classifier.append((decided_reaction_type, rt)) # for r1 in rxns: # for r2 in rxns: # all += 1 # code, desc = compute_similarity_of_gene_pair(r1, r2, similarity_table, species, type, restric_to_reliable_genes, go_loaded_model, stats_mode=True) # if code == 0: # ok += 1 # elif code == -1: # not_enzymatic += 1 # elif code == -2: # disrupted += 1 # elif code == -3: # missed += 1 # else: # print 'ERROR: unknown go_distance:compute_similarity_of_gene_pair status code!' # exit(1) # res.write('%s\t%s\t%d\t%d\t%d\t%d\n' % (species, '%s_%s' % (type.lower(), 'g' if restric_to_reliable_genes else 'f'), all_rxns, not_enzymatic, disrupted, missed)) ne_distribution = {} for ft in reaction_types.keys(): ne_distribution[ft] = 0 for rtc in by_reaction_type_classifier: if rtc[0] == -1: ne_distribution[rtc[1]] += 1 found_types0 = sorted(list(reaction_types.keys()), key=lambda x: ne_distribution[x], reverse=True) found_types = [] try: rexi = found_types0.index('R_EX_') found_types.append(found_types0[rexi]) except: pass try: ti = found_types0.index('t') found_types.append(found_types0[ti]) except: pass found_types.append('SUM OTHERS') sum_others = 0 for ft in found_types0: if ft not in {'R_EX_', 't'}: found_types.append(ft) sum_others += ne_distribution[ft] ne_distribution['SUM OTHERS'] = sum_others ne_distribution_line = '' if species_row_counter == 0: ne_distribution_line = '\t'.join(found_types) elif species_row_counter == 1: ne_distribution_line = '\t'.join([str(ne_distribution[k]) for k in found_types]) species_row_counter += 1 res.write('%s\t%s\t%d\t%d\t%d\t%d\t%d\t%d\t\t\t\t\t%s\n' % (species, '%s_%s' % (type.lower(), 'g' if restric_to_reliable_genes else 'f'), all_rxns, not_enzymatic, undefined_gpd, missed, disrupted, rescued, ne_distribution_line)) reaction_types_res.write('\n--------------------------------------------FOR %s:\n' % species) reaction_types_res.write(' '.join(sorted(list(reaction_types.keys()))) + '\n') for k, v in reaction_types.iteritems(): stat = reduce(lambda x, y: tuple(map(operator.add, x, y)), [(1, 0, 0, 0, 0, 0) if z[1] == -1 else (0, 1, 0, 0, 0, 0) if z[1] == -2 else (0, 0, 1, 0, 0, 0) if z[1] == -3 else (0, 0, 0, 1, 0, 0) if z[1] == -4 else (0, 0, 0, 0, 1, 0) if z[1] == -5 else (0, 0, 0, 0, 0, 1) for z in v]) reaction_types_res.write(k + ': ne: %d, undef: %d, mis: %d, dis: %d, rescued: %d, ok: %d\n' % (stat[0], stat[1], stat[2], stat[3], stat[4], stat[5])) reaction_types_res.write(' ') pp.pprint(v, reaction_types_res) res.close() reaction_types_res.close()
def go(species, pseudo_species=None): method_dir = r'%s/method/verwoerd' % my_constants.basePath out_dir = r'%s/%s/verwoerd' % (my_constants.resultPath, species) my_util.mkdir_p(out_dir) source_file = '%s/dataset/networks/%s' % ( my_constants.basePath, my_constants.species_sbml[species]) S, mets, rxns, revs, met_names, rxn_names, biomass, met_comparts = importer.sbmlStoichiometricMatrix( source_file, True, read_species_compart=True, remove_biomass=True, normalize_stoich=True) external_strategy = eval( open('%s/external_strategy.txt' % method_dir, 'r').read()) # both may either be with/without boundary reactions which is specified by the strategy with_boundary_reacts = {} with_boundary_mets = list(mets) with_boundary_comparts = list(met_comparts) next_boundary_met_idx = len(mets) if external_strategy[ species] == 'KEGG_PSEUDO_NETWORK': # this implies networks are built from kegg by merging pathways subs_to_reacts = {} prods_to_reacts = {} for j, r in enumerate(rxns): subs = [] prods = [] for i in range(len(S)): if S[i][j] > 0: prods.append([S[i][j], i]) # [stoichiometry, met_name] elif S[i][j] < 0: subs.append([-S[i][j], i]) # [stoichiometry, met_name] with_boundary_reacts[r] = [subs, prods] has_subs, has_prods = False, False for i in range(len(S)): if S[i][j] > 0 or (S[i][j] < 0 and revs[j] != 0): if mets[i] not in prods_to_reacts: prods_to_reacts[mets[i]] = set() prods_to_reacts[mets[i]].add(r) has_prods = True if S[i][j] < 0 or (S[i][j] > 0 and revs[j] != 0): if mets[i] not in subs_to_reacts: subs_to_reacts[mets[i]] = set() subs_to_reacts[mets[i]].add(r) has_subs = True if not has_subs or not has_prods: raise Exception( 'reaction without substrate/products in KEGG pseudo networks! should never happen!' ) # checks whether the reaction is internal in presence of reversiblity def is_internal(met): if met not in prods_to_reacts or met not in subs_to_reacts: return False if prods_to_reacts[met] == subs_to_reacts[met] and len( prods_to_reacts[met]) == 1: the_react = list(prods_to_reacts[met])[0] the_react_subs = [ mets[ss[1]] for ss in with_boundary_reacts[the_react][0] ] the_react_prods = [ mets[ss[1]] for ss in with_boundary_reacts[the_react][1] ] if not (met in the_react_subs and met in the_react_prods): # not polymeric return False return True initial_external_metabolites_idxs = [ mets.index(m) for m in with_boundary_mets if not is_internal(m) ] # internal_metabolites = [m for m in with_boundary_mets if m in mets_used_as_subs and m in mets_used_as_prods] else: # may add to with_boundary_mets based on strategy. # may update with_boundary_reacts according to newly added with_boundary_mets for j, r in enumerate(rxns): subs = [] prods = [] for i in range(len(S)): if S[i][j] > 0: prods.append([S[i][j], i]) elif S[i][j] < 0: subs.append([-S[i][j], i]) if not subs: raise Exception( 'reaction without substrate! should never happen!') elif not prods: if external_strategy[species] == 'BOUNDARY': if len(subs) != 1: raise Exception( 'EXCHANGE reaction with more than one substrate metabolite! unacceptable.' ) new_boundary_met = mets[subs[0][1]] + '_BND' if new_boundary_met in with_boundary_mets: # print 'should happen only once for h**o recon 1 and once for ecoli ijo 1366!' prods = [[ subs[0][0], with_boundary_mets.index(new_boundary_met) ]] else: with_boundary_mets.append(new_boundary_met) with_boundary_comparts.append('B') prods = [[subs[0][0], next_boundary_met_idx]] next_boundary_met_idx += 1 else: print 'skipping reaction without product: EXCHANGE!' continue with_boundary_reacts[r] = [subs, prods] # with_boundary_S = [[] for i in range(len(with_boundary_mets))] # for ri, r in rxns: # r_def = with_boundary_reacts[r] # for e in r_def[0]: # substrates # if e[0] != 0: if external_strategy[species] == 'BOUNDARY': initial_external_metabolites_idxs = [ i for i, m in enumerate(with_boundary_mets) if with_boundary_mets[i].endswith('_BND') ] else: initial_external_metabolites_idxs = [ i for i, m in enumerate(with_boundary_mets) if with_boundary_mets[i].endswith('_e') ] # internal_metabolites = [m for m in with_boundary_mets if m not in initial_external_metabolites] if pseudo_species: my_util.mkdir_p(pseudo_species) inf = open('%s/verwoerd.tsv' % pseudo_species, 'w') else: inf = open('verwoerd_%s.tsv' % species, 'w') write_line(inf, '<Title>') write_line(inf, species.replace('/', '')) write_line(inf, '<Reactions>') write_line(inf, '\t'.join(rxns)) write_line(inf, '<ReversibleReactions>') write_line(inf, '\t'.join([r for i, r in enumerate(rxns) if revs[i] == 1])) write_line(inf, '<InternalCompounds>') # write_line(inf, '\t'.join(['%s' % (with_boundary_mets[i]) for i in range(len(with_boundary_mets))])) write_line( inf, '\t'.join([ '%s %s' % (with_boundary_mets[i], with_boundary_comparts[i]) for i in range(len(with_boundary_mets)) ])) write_line(inf, '<ExternalCompounds>') # write_line(inf, '\t'.join(['%s %s' % (with_boundary_mets[i], with_boundary_comparts[i]) for i in initial_external_metabolites_idxs])) write_line(inf, '<Stoichiometry>') write_line(inf, '%%MatrixMarket matrix coordinate real general') s_lines = [] nnz_s = 0 for ri, r in enumerate(rxns): if r not in with_boundary_reacts: # exchange reactions continue rdef = with_boundary_reacts[r] for e in rdef[0]: s_lines.append('%d\t%d\t%s' % (e[1] + 1, ri + 1, -e[0])) nnz_s += 1 for e in rdef[1]: s_lines.append('%d\t%d\t%s' % (e[1] + 1, ri + 1, e[0])) nnz_s += 1 write_line( inf, '\t\t\t%d\t%d\t%d' % (len(with_boundary_mets), len(with_boundary_reacts), nnz_s)) for sl in s_lines: write_line(inf, sl) inf.close() if pseudo_species: inf = open('%s/verwoerd-ext.txt' % pseudo_species, 'w') else: inf = open('verwoerd-ext_%s.txt' % species, 'w') for init_extern_idx in initial_external_metabolites_idxs: write_line(inf, with_boundary_mets[init_extern_idx]) inf.close()
def go(species, pseudo_species=None): method_dir = r'%s/method/verwoerd' % my_constants.basePath out_dir = r'%s/%s/verwoerd' % (my_constants.resultPath, species) my_util.mkdir_p(out_dir) source_file = '%s/dataset/networks/%s' % ( my_constants.basePath, my_constants.species_sbml[species]) S, mets, rxns, revs, met_names, rxn_names, biomass, met_comparts = importer.sbmlStoichiometricMatrix( source_file, True, read_species_compart=True, remove_biomass=True, normalize_stoich=True) # for each species, this may be defined or simply be '' which is ignored thresholds = eval(open('%s/thresholds.txt' % method_dir, 'r').read()) for thr in thresholds[species]: if pseudo_species: manual_results_dir = '%s/Subnetworks_%s' % (pseudo_species, thr) else: manual_results_dir = '%s_%s' % (species, thr) rmods = {} mmods = {} def read_module_and_move(resf_path, i): rmod, mmod = read_result_file(resf_path) rmods['%d' % i] = rmod mmods['%d' % i] = mmod int_out_dir_path = '%s/subsystem_%s_%d.out' % (out_dir, thr, i) shutil.copy(resf_path, int_out_dir_path) if pseudo_species: run_for_all_template_files( manual_results_dir + '/' + species.replace('/', '') + '_Block_%d.tsv', read_module_and_move) else: run_for_all_template_files( manual_results_dir + '/' + species + '_Block_%d.tsv', read_module_and_move) outr = open('%s/react_modules_%s.txt' % (out_dir, thr), 'w') outrm = open('%s/metab_react_modules_%s.txt' % (out_dir, thr), 'w') outm = open('%s/metab_modules_%s.txt' % (out_dir, thr), 'w') for mname, rmod in rmods.iteritems(): write_line(outr, ' '.join(rmod)) write_line(outrm, ' '.join(mmods[mname])) if mmods[mname]: write_line(outm, ' '.join(mmods[mname])) outr.close() outrm.close() outm.close() shutil.copy( '%s/metab_modules_%s.txt' % (out_dir, thr), '%s/final_modules_%s.txt' % (out_dir, thr)) # TODO: which file r/m should be selected as final modules?