def seq_pairs(S, enzymes, mat_adj=None, conv_dict=None, return_groups=False): """ - S: stoichiometric matrix of reactants (-) and products (+) - enzymes: list of enzymes corresponding to columns in S """ mat_adj = mat_adj if mat_adj is not None else adj_matrix(S) rows, cols = np.where(mat_adj > 0) #wrestling with where output rows, cols = [np.array(x)[0] for x in rows, cols] rowcols = ut.zip_exact(rows, cols) # dedup, keeping only upper off-diagonal rowcols = [(row,col) for row,col in rowcols if row < col] pairs = [(enzymes[row],enzymes[col]) for row,col in rowcols] # filter out blanks labeled_pairs = [(x,y) for x,y in pairs if x and y] if return_groups: if conv_dict: return convert_groups_singles(labeled_pairs, conv_dict) else: return labeled_pairs single_pairs = [(xi,yi) for x,y in labeled_pairs for xi in x.split() for yi in y.split()] unique_pairs = pu.dedupe(single_pairs) print "%s total, %s labeled, %s single, %s unique pairs returned" % (len(pairs), len(labeled_pairs), len(single_pairs), len(unique_pairs)) if conv_dict: conv_pairs = convert_pairs_singles(unique_pairs, conv_dict) print "%s converted pairs with 1-1 matches" % len(conv_pairs) return conv_pairs else: return unique_pairs
def pairs_exceeding(elut, skey, thresh): """ Doesn't return self-self interactions. """ arr_prots = np.array(elut.prots) if skey == 'apex': apexes = ApexScores(elut).apex_array pairs = matching_pairs(apexes, arr_prots) else: # loading precomputed indices is so far massively slower than this score_mat, _, new_prots = scorekey_elution(skey, elut, None) if new_prots is not None: arr_prots = np.array(new_prots) rows, cols = np.where(score_mat > thresh) p1s, p2s = [arr_prots[ids] for ids in rows, cols] pairs = ut.zip_exact(p1s, p2s) return pairs
def transpose(d, fin, fout): sys.path.append(d+'/..') import utils as ut lines = [l for l in ut.load_tab_file(fin)] if lines[-1][0].startswith('#'): #ignore comments, such as last line in spcount output lines = lines[:-1] print "skipping last line" cols = ut.zip_exact(*lines) #zip messes up if these files aren't neat # _After_ zipping, get rid of the column 1 header--R doesn't like it. col0list = list(cols[0]) print col0list[0][0] assert (col0list[0][0] == '#' or col0list[0] == 'Locus') # make sure we're removing what we should be col0list.remove(col0list[0]) cols[0] = tuple(col0list) col2title = cols[1][0].lower() # get rid of the total/descr column if col2title.find('total') > -1 or col2title.find('descr') > -1: cols.remove(cols[1]) print "removing second column--extraneous" ut.write_tab_file(cols, fout)
def combine_ppis_matched(ppisa, ppisb): return [(pa[0],pa[1],combine_or(pa[2],pb[2]),pa[3]) for pa,pb in ut.zip_exact(ppisa, ppisb)]