sys.path.insert(1, current_script_dir) import utils import plotting # ---------------------------------------------------------------------------------------- datadir = 'data/imgt' xtitles = { 'indels' : 'fraction of positions indel\'d', 'subs' : 'substitution fraction' } glfo = utils.read_germline_set(datadir) vgenes = glfo['aligned-genes']['v'].keys() pversions = OrderedDict() for vg in vgenes: pv = utils.primary_version(vg) if pv not in pversions: pversions[pv] = [] pversions[pv].append(vg) # remove primary versions that only have one gene for pv in pversions: if len(pversions[pv]) == 1: print 'removing single-gene pv %s' % pv del pversions[pv] # ---------------------------------------------------------------------------------------- def indel_difference_fraction(seq1, seq2): """ fraction of positions in the aligned sequences <seq1> <seq2> which are dots in exactly one of them """ if len(seq1) != len(seq2): raise Exception('sequences different length:\n %s\n %s' % (seq1, seq2))
import utils import glutils import plotting # ---------------------------------------------------------------------------------------- datadir = 'data/germlines/human' xtitles = { 'indels' : 'fraction of positions indel\'d', 'subs' : 'substitution fraction' } glfo = glutils.read_glfo(datadir) vgenes = glfo['aligned-genes']['v'].keys() pversions = OrderedDict() for vg in vgenes: pv = utils.primary_version(vg) if pv not in pversions: pversions[pv] = [] pversions[pv].append(vg) # remove primary versions that only have one gene for pv in pversions: if len(pversions[pv]) == 1: print 'removing single-gene pv %s' % pv del pversions[pv] # ---------------------------------------------------------------------------------------- def indel_difference_fraction(seq1, seq2): """ fraction of positions in the aligned sequences <seq1> <seq2> which are dots in exactly one of them """ if len(seq1) != len(seq2): raise Exception('sequences different length:\n %s\n %s' % (seq1, seq2))
def get_base(gene): basestr = utils.primary_version(gene) if utils.sub_version(gene) is not None: basestr += '-' + utils.sub_version(gene) return basestr
sorted(set([get_base(g) for g in glfo['seqs'][args.region]]))))) args.other_genes = utils.get_arg_list(args.other_genes) if args.other_genes is not None: genes += args.other_genes seqstrs = ['' for _ in range(len(genes))] snpstrs = ['' for _ in range(len(genes))] gene_str_width = max( [utils.len_excluding_colors(utils.color_gene(g)) for g in genes]) codon_positions = glfo[utils.conserved_codons[args.locus][args.region] + '-positions'] if args.region != 'd' else None max_seq_len = max([len(glfo['seqs'][args.region][g]) for g in genes]) ref_gene = genes[0] if args.ref_allele is None else utils.rejoin_gene( args.locus, args.region, utils.primary_version(genes[0]), utils.sub_version(genes[0]), args.ref_allele) if ref_gene != genes[0]: genes.remove(ref_gene) genes.insert(0, ref_gene) ref_seq = glfo['seqs'][args.region][ref_gene] ref_pos = codon_positions[ref_gene] for igene in range(0, len(genes)): gene = genes[igene] seq = glfo['seqs'][args.region][gene] pos = codon_positions[gene] if pos < ref_pos: # align the codon position in the case that this seq is shorter up to the codon seq = (ref_pos - pos) * '-' + seq pos += (ref_pos - pos)
sys.path.insert(1, partis_dir + '/python') import utils import glutils parser = argparse.ArgumentParser() parser.add_argument('--base', required=True) parser.add_argument('--alleles') parser.add_argument('--other-genes') parser.add_argument('--region', default='v') parser.add_argument('--chain', default='h') parser.add_argument('--glfo-dir', default='data/germlines/human') args = parser.parse_args() glfo = glutils.read_glfo(args.glfo_dir, args.chain) if args.alleles is None: args.alleles = [utils.allele(g) for g in glfo['seqs'][args.region] if args.base == utils.primary_version(g) + '-' + utils.sub_version(g)] else: args.alleles = utils.get_arg_list(args.alleles) args.other_genes = utils.get_arg_list(args.other_genes) # for g, s in glfo['seqs']['v'].items(): # print '%s %3d' % (utils.color_gene(g, width=20), len(s) - glfo['cyst-positions'][g]) # sys.exit() # base = '4-59' # a1, a2 = '12', '01' # gene1, gene2 = 'IGHV' + base + '*' + a1, 'IGHV' + base + '*' + a2 genes = ['IG' + args.chain.upper() + args.region.upper() + args.base + '*' + al for al in args.alleles] if args.other_genes is not None: genes += args.other_genes
import utils import glutils parser = argparse.ArgumentParser() parser.add_argument('--base', required=True) parser.add_argument('--alleles') parser.add_argument('--other-genes') parser.add_argument('--region', default='v') parser.add_argument('--locus', default='igh', choices=utils.loci.keys()) parser.add_argument('--glfo-dir', default='data/germlines/human') args = parser.parse_args() glfo = glutils.read_glfo(args.glfo_dir, args.locus) if args.alleles is None: args.alleles = [ utils.allele(g) for g in glfo['seqs'][args.region] if args.base == utils.primary_version(g) + ('-' + utils.sub_version(g) if utils.sub_version(g) is not None else '') ] else: args.alleles = utils.get_arg_list(args.alleles) if len(args.alleles) == 0: raise Exception( 'couldn\'t find any alleles for --base %s. Other choices:\n %s' % (args.base, ' '.join(glfo['seqs'][args.region]))) args.other_genes = utils.get_arg_list(args.other_genes) # for g, s in glfo['seqs']['v'].items(): # print '%s %3d' % (utils.color_gene(g, width=20), len(s) - glfo['cyst-positions'][g]) # sys.exit()