def print_stuff(line): cluster_index = sorted_clusters.index(cluster) naive_cdr3, matureiseq0_cdr3 = utils.subset_sequences( line, iseq=0, restrict_to_region='cdr3' ) # line['naive_seq'][(line['codon_positions']['v']):((line['codon_positions']['j'])+3)] #get nt sequence of CDR3 from first base of cysteine through last base of tryptophan # mature_cdr3_seqs = [] # trying to translate the consensus cdr3 so I can search these with my seed seqs # for iseq in range(len(line['unique_ids'])): # naive_cdr3_seq, mature_cdr3_seq = utils.subset_sequences(line, iseq=iseq, restrict_to_region='cdr3') # mature_cdr3_seqs.append(mature_cdr3_seq) # translated_cdr3 = Seq().... not done cdr3_aa = '%-30s' % Seq(naive_cdr3).translate() if any('-ig' in s for s in line['unique_ids']): cdr3_aa = utils.color('red', cdr3_aa, width=30) print '%4s %s %s %s %5d %5d %5d %7.3f %8.4f %2d %s %4.2f' % ( cluster_index, utils.color_gene(line['v_gene'], width=15), utils.color_gene(line['d_gene'], width=15), utils.color_gene(line['j_gene'], width=10), len(line['unique_ids']), numpy.mean(line['n_mutations']), numpy.median(line['n_mutations']), numpy.mean(line['mut_freqs']), float(len(cluster)) / n_total, (line['cdr3_length'] / 3), cdr3_aa, utils.fay_wu_h(line, debug=False), )
def get_cdr3_title(self, annotation): naive_cdr3_seq, _ = utils.subset_sequences(annotation, iseq=0, restrict_to_region='cdr3') title = '' if len(naive_cdr3_seq) % 3 != 0: # print ' out of frame: adding %s' % ((3 - len(naive_cdr3_seq) % 3) * 'N') naive_cdr3_seq += (3 - len(naive_cdr3_seq) % 3) * 'N' title += ' (out of frame)' title = self.Seq.Seq(naive_cdr3_seq).translate() + title return title
def print_stuff(line): cluster_index = sorted_clusters.index(cluster) naive_cdr3, matureiseq0_cdr3 = utils.subset_sequences(line, iseq=0, restrict_to_region='cdr3') # returns the CDR3 nt sequence for naive, and the first mutated sequence (iseq0); CDR3 = first base of cysteine through last base of tryptophan # mature_cdr3_seqs = [] # trying to translate the consensus cdr3 so I can search these with my seed seqs # for iseq in range(len(line['unique_ids'])): # naive_cdr3_seq, mature_cdr3_seq = utils.subset_sequences(line, iseq=iseq, restrict_to_region='cdr3') # mature_cdr3_seqs.append(mature_cdr3_seq) # mature_cdr3_seqs # translated_cdr3 = mature_cdr3_seqs.translate() cdr3_aa = '%-30s' % Seq(naive_cdr3).translate() # If a cluster contains one of our seed seqs, color this CDR3 red if any('-ig' in s for s in line['unique_ids']): cdr3_aa = utils.color('red', cdr3_aa, width=30) if args.cdr3 in cdr3_aa: # Only print clusters with naive CDR3 that matches our specified --cdr3 argument print 'index genes size n muts SHM rep frac CDR3 FayWuH' print ' mean med len seq' print '%4s %s %s %s %5d %5d %5d %7.3f %8.4f %2d %s %4.2f' % ( cluster_index, utils.color_gene(line['v_gene'], width=15), utils.color_gene(line['d_gene'], width=15), utils.color_gene(line['j_gene'], width=10), len(line['unique_ids']), numpy.mean(line['n_mutations']), numpy.median(line['n_mutations']), numpy.mean(line['mut_freqs']), float(len(cluster)) / n_total, (line['cdr3_length']/3), cdr3_aa, utils.fay_wu_h(line, debug=False), ) # print 'number of mutations per sequence in cluster', sorted(line['n_mutations']) print len(line['naive_seq']), 'length of naive seq' # utils.print_reco_event(utils.synthesize_single_seq_line(line, iseq=0)) # print ascii-art representation of the rearrangement event print 'unique_ids: ', getkey(line['unique_ids']) print print utils.print_reco_event(line)
def naive_cdr3(info): naiveseq, _ = utils.subset_sequences(info, iseq=0, restrict_to_region='cdr3') return naiveseq
def print_stuff(line): intscore = 0 # create a clonal family scoring system cluster_index = sorted_clusters.index(cluster) shm_index = shm_clusters.index(cluster) naive_cdr3, matureiseq0_cdr3 = utils.subset_sequences( line, iseq=0, restrict_to_region='cdr3' ) # line['naive_seq'][(line['codon_positions']['v']):((line['codon_positions']['j'])+3)] #get nt sequence of CDR3 from first base of cysteine through last base of tryptophan # mature_cdr3_seqs = [] # trying to translate the consensus cdr3 so I can search these with my seed seqs # for iseq in range(len(line['unique_ids'])): # naive_cdr3_seq, mature_cdr3_seq = utils.subset_sequences(line, iseq=iseq, restrict_to_region='cdr3') # mature_cdr3_seqs.append(mature_cdr3_seq) # translated_cdr3 = Seq().... not done cdr3_aa = '%-30s' % Seq(naive_cdr3).translate() if any('-ig' in s for s in line['unique_ids']): cdr3_aa = utils.color('red', cdr3_aa, width=30) # score clusters based on cluster size if cluster_index < 25: intscore = intscore + 4 elif cluster_index >= 25 and cluster_index <= 50: intscore = intscore + 3 elif cluster_index >= 50 and cluster_index <= 75: intscore = intscore + 2 elif cluster_index >= 75 and cluster_index <= 100: intscore = intscore + 1 # score clusters based on SHM if shm_index < 25: intscore = intscore + 4 elif shm_index >= 25 and shm_index <= 50: intscore = intscore + 3 elif shm_index >= 50 and shm_index <= 75: intscore = intscore + 2 elif shm_index >= 75 and shm_index <= 100: intscore = intscore + 1 # score clusters based on SFS if utils.fay_wu_h(line, debug=False) <= -20: intscore = intscore + 4 elif utils.fay_wu_h(line, debug=False) <= -10: intscore = intscore + 3 elif utils.fay_wu_h(line, debug=False) <= 0: intscore = intscore + 2 elif utils.fay_wu_h(line, debug=False) <= 10: intscore = intscore + 1 # score by bnAb gene usage if (line['v_gene']).split('*')[0] in ( cd4bs_genes or glycan_genes or bridging_genes or mper_genes ): # beware this does not include CDR3 length of bnAb VH genes intscore = intscore + 4 print '%4s %4s %s %s %s %5d %5d %5d %7.3f %8.4f %2d %s %4.2f' % ( intscore, cluster_index, utils.color_gene(line['v_gene'], width=15), utils.color_gene(line['d_gene'], width=15), utils.color_gene(line['j_gene'], width=10), len(line['unique_ids']), numpy.mean(line['n_mutations']), numpy.median(line['n_mutations']), numpy.mean(line['mut_freqs']), float(len(cluster)) / n_total, (line['cdr3_length'] / 3), cdr3_aa, utils.fay_wu_h(line, debug=False), )