示例#1
0
sys.path.insert(1, current_script_dir)

import utils
import plotting

# ----------------------------------------------------------------------------------------
datadir = 'data/imgt'
xtitles = {
    'indels' : 'fraction of positions indel\'d',
    'subs' : 'substitution fraction'
}
glfo = utils.read_germline_set(datadir)
vgenes = glfo['aligned-genes']['v'].keys()
pversions = OrderedDict()
for vg in vgenes:
    pv = utils.primary_version(vg)
    if pv not in pversions:
        pversions[pv] = []
    pversions[pv].append(vg)

# remove primary versions that only have one gene
for pv in pversions:
    if len(pversions[pv]) == 1:
        print 'removing single-gene pv %s' % pv
        del pversions[pv]

# ----------------------------------------------------------------------------------------
def indel_difference_fraction(seq1, seq2):
    """ fraction of positions in the aligned sequences <seq1> <seq2> which are dots in exactly one of them """
    if len(seq1) != len(seq2):
        raise Exception('sequences different length:\n  %s\n  %s' % (seq1, seq2))
示例#2
0
import utils
import glutils
import plotting

# ----------------------------------------------------------------------------------------
datadir = 'data/germlines/human'
xtitles = {
    'indels' : 'fraction of positions indel\'d',
    'subs' : 'substitution fraction'
}
glfo = glutils.read_glfo(datadir)
vgenes = glfo['aligned-genes']['v'].keys()
pversions = OrderedDict()
for vg in vgenes:
    pv = utils.primary_version(vg)
    if pv not in pversions:
        pversions[pv] = []
    pversions[pv].append(vg)

# remove primary versions that only have one gene
for pv in pversions:
    if len(pversions[pv]) == 1:
        print 'removing single-gene pv %s' % pv
        del pversions[pv]

# ----------------------------------------------------------------------------------------
def indel_difference_fraction(seq1, seq2):
    """ fraction of positions in the aligned sequences <seq1> <seq2> which are dots in exactly one of them """
    if len(seq1) != len(seq2):
        raise Exception('sequences different length:\n  %s\n  %s' % (seq1, seq2))
示例#3
0
def get_base(gene):
    basestr = utils.primary_version(gene)
    if utils.sub_version(gene) is not None:
        basestr += '-' + utils.sub_version(gene)
    return basestr
示例#4
0
            sorted(set([get_base(g) for g in glfo['seqs'][args.region]])))))
args.other_genes = utils.get_arg_list(args.other_genes)
if args.other_genes is not None:
    genes += args.other_genes

seqstrs = ['' for _ in range(len(genes))]
snpstrs = ['' for _ in range(len(genes))]

gene_str_width = max(
    [utils.len_excluding_colors(utils.color_gene(g)) for g in genes])
codon_positions = glfo[utils.conserved_codons[args.locus][args.region] +
                       '-positions'] if args.region != 'd' else None
max_seq_len = max([len(glfo['seqs'][args.region][g]) for g in genes])

ref_gene = genes[0] if args.ref_allele is None else utils.rejoin_gene(
    args.locus, args.region, utils.primary_version(genes[0]),
    utils.sub_version(genes[0]), args.ref_allele)
if ref_gene != genes[0]:
    genes.remove(ref_gene)
    genes.insert(0, ref_gene)
ref_seq = glfo['seqs'][args.region][ref_gene]
ref_pos = codon_positions[ref_gene]

for igene in range(0, len(genes)):
    gene = genes[igene]
    seq = glfo['seqs'][args.region][gene]
    pos = codon_positions[gene]
    if pos < ref_pos:  # align the codon position in the case that this seq is shorter up to the codon
        seq = (ref_pos - pos) * '-' + seq
        pos += (ref_pos - pos)
示例#5
0
sys.path.insert(1, partis_dir + '/python')

import utils
import glutils

parser = argparse.ArgumentParser()
parser.add_argument('--base', required=True)
parser.add_argument('--alleles')
parser.add_argument('--other-genes')
parser.add_argument('--region', default='v')
parser.add_argument('--chain', default='h')
parser.add_argument('--glfo-dir', default='data/germlines/human')
args = parser.parse_args()
glfo = glutils.read_glfo(args.glfo_dir, args.chain)
if args.alleles is None:
    args.alleles = [utils.allele(g) for g in glfo['seqs'][args.region] if args.base == utils.primary_version(g) + '-' + utils.sub_version(g)]
else:
    args.alleles = utils.get_arg_list(args.alleles)
args.other_genes = utils.get_arg_list(args.other_genes)

# for g, s in glfo['seqs']['v'].items():
#     print '%s  %3d' % (utils.color_gene(g, width=20), len(s) - glfo['cyst-positions'][g])
# sys.exit()

# base = '4-59'
# a1, a2 = '12', '01'
# gene1, gene2 = 'IGHV' + base + '*' + a1, 'IGHV' + base + '*' + a2

genes = ['IG' + args.chain.upper() + args.region.upper() + args.base + '*' + al for al in args.alleles]
if args.other_genes is not None:
    genes += args.other_genes
示例#6
0
import utils
import glutils

parser = argparse.ArgumentParser()
parser.add_argument('--base', required=True)
parser.add_argument('--alleles')
parser.add_argument('--other-genes')
parser.add_argument('--region', default='v')
parser.add_argument('--locus', default='igh', choices=utils.loci.keys())
parser.add_argument('--glfo-dir', default='data/germlines/human')
args = parser.parse_args()
glfo = glutils.read_glfo(args.glfo_dir, args.locus)
if args.alleles is None:
    args.alleles = [
        utils.allele(g) for g in glfo['seqs'][args.region]
        if args.base == utils.primary_version(g) +
        ('-' +
         utils.sub_version(g) if utils.sub_version(g) is not None else '')
    ]
else:
    args.alleles = utils.get_arg_list(args.alleles)
if len(args.alleles) == 0:
    raise Exception(
        'couldn\'t find any alleles for --base %s. Other choices:\n    %s' %
        (args.base, ' '.join(glfo['seqs'][args.region])))
args.other_genes = utils.get_arg_list(args.other_genes)

# for g, s in glfo['seqs']['v'].items():
#     print '%s  %3d' % (utils.color_gene(g, width=20), len(s) - glfo['cyst-positions'][g])
# sys.exit()