def load_seq_pairs(fname, metab_exclude=None): """ metab_exclude: should be in sequential_metab/metabolites_exclude.txt """ S, entrez_enzymes, rnames, mnames = load_metabolic_data(fname) ez2en = ut.dict_inverse_sets(orth.convert_dict('Hs','Hs_entrez')) if metab_exclude: print "Excluding %s metabolites, filtering rxns" % len(metab_exclude) S, entrez_enzymes = filter_rxns_metabs(S, entrez_enzymes, rnames, mnames, metab_exclude) else: print "No filtering of metabolites and rxns." sequentials = seq_pairs(S, entrez_enzymes, conv_dict=ez2en) return sequentials
def convdict_from_fname(species, ext_file): # Doesn't yet work for the general case of possibly needing to go two # steps--to the new species, then to a new seqdb totype = '_'.join(ext_file.split('/')[-1].split('_')[:2]) #Hs_entrez;Dm_fbgn # If there's no matching conversion file, assume it's not needed. genedict = None try: genedict = orth.convert_dict(species, totype) except IOError as e: print 'No external conversion file:', species, totype, e.strerror else: if genedict is None: print 'No external conversion file:', species, totype else: print 'Conversion file:', species, totype, len(genedict), 'keys' return genedict
def load_kegg_sequentials(fname, do_convert=True): dkegg = load_kegg_brite(fname) kegg_paths = [ut.i1(v) for v in dkegg.values() if v] def path_pairs(list_path): return [(list_path[i],list_path[i+1]) for i in range(len(list_path)-1)] group_pairs = ut.flatten([path_pairs(lpath) for lpath in kegg_paths]) #if return_groups: #if conv_dict: #return convert_groups_singles(labeled_pairs, conv_dict) #else: #return labeled_pairs single_pairs = [(xi,yi) for x,y in group_pairs for xi in x for yi in y] unique_pairs = pu.dedupe(single_pairs) print "%s total, %s single, %s unique pairs returned" % ( len(group_pairs), len(single_pairs), len(unique_pairs)) if do_convert: conv_dict = ut.dict_inverse_sets(orth.convert_dict('Hs','Hs_entrez')) conv_pairs = convert_pairs_singles(unique_pairs, conv_dict) print "%s converted pairs with 1-1 matches" % len(conv_pairs) return conv_pairs else: return unique_pairs