示例#1
0
 def run(self, problem, params=None):
     '''Run the phasing processing chain. Adapts the generic Filter interface to include
     both a Problem and PhasingParam inputs. If params=None, using default PhaseParam values.'''
     '''A template method that delegates to runner(), which accepts two input parameters.'''
     return self.handle(
         util.Struct(problem=problem,
                     params=params if params else PhaseParam()))
示例#2
0
 def test_phase_family(self):
     '''Check phasing trivial cases in all genotyped trios.'''
     problem = io.read_plink(pedigree=itu.HUTT_PED, prefix=itu.GENOTYPE_SAMPLE, haplotype=None)
     itu.assert_size_equals(problem.genotype, 8, 1415)
     assert_equal(len(problem.trios()), 869, 'Unexpected # of genotyped trios')
     self.phaser.run(problem, PhaseParam(debug=False))
     itu.assert_problem_stats(problem, 22640, 20225, 144)
示例#3
0
 def __init__(self, params=PhaseParam()):
     self.params = params
     self.debug = params.debug
     # TODO: move into params
     self.min_segment_length = 0.0
     # TODO: calculate min_segment_length from s_ibd, s, slice_size
     self.max_difference = 2
示例#4
0
def ibd_germline(problem, samples):
    '''Return all IBD segments among the genotyped samples' haplotypes using GERMLINE. Segments are
    rounded to the nearest slice.'''
    ibd_computer = GermlineIbdComputer(PhaseParam())  
    h_mat = _HapMatrix(problem, gt.genotyped(problem, samples))
    m = ibd_computer.ibd_segments(h_mat)
    m.group_to_disjoint()
    return m
示例#5
0
 def setUp(self):
     '''Load single nuclear family test case.'''
     # Remove a key child to make problem more interesting for the IBD algorithm
     self.problem = im.io.read_npz(
         itu.FAMILY945_ONE_PARENT_STAGE2).remove_nodes([2])
     self.family = self.problem.families(genotyped=False)[0]
     self.sibs = ig._filled_members(self.problem, self.family)
     self.ibd_computer = ig.GermlineIbdComputer(PhaseParam())
示例#6
0
    def test_family_12(self):
        '''Test comparing sibs with non-genotyped parents (stage 4).'''
        problem = io.read_npz(itu.FAMILY12_STAGE2)
        itu.assert_size_equals(problem.genotype, 3218, 7)
        itu.assert_problem_stats(problem, 45052, 42162, 237)
        assert_equal(len(list(problem.families(genotyped=False))), 1,
                     'Incorrect number of families')

        phaser = family_sib_comparison_phaser()
        phaser.run(problem, PhaseParam(single_member=1))

        itu.assert_problem_stats(problem, 45052, 42162, 237)
示例#7
0
文件: phase.py 项目: orenlivne/ober
def __main(options):
    '''
    --------------------------------------------------
    Main program - accepts an options struct.
    --------------------------------------------------
    '''
    if options.debug: print 'Input options', options
    print 'Building phaser (stage = %d) ...' % (options.stage, )
    phaser = build_phasing_pipeline(options)

    if options.debug: print 'Reading data ...'
    problem = __load_problem(options)

    if options.debug: print 'Phasing ...'
    params = PhaseParam()
    params.update_from_struct(options)
    request = run_phasing_chain(phaser, problem, params)

    print ''
    request.stats.pprint()
    print ''

    if options.output is not None:
        if options.min_output:
            print 'Minimizing output size...'
            io.slim(problem)
        out_prefix, ext = os.path.splitext(options.output)
        if ext == '.npz':
            print 'Writing haplotype result to %s in NPZ format ...' % (
                options.output, )
            io.write_npz(problem, options.output)
            output_info = out_prefix + '.info.npz'
            print 'Writing problem info result to %s in NPZ format ...' % (
                output_info, )
            io.write_info_npz(problem.info, output_info)
        else:
            print 'Writing haplotype result to %s in PLINK format ...' % (
                options.output, )
            io.write_plink(problem, options.output, verbose=options.debug)
    return problem
示例#8
0
    def test_ibd_segments_sib_pair2(self):
        '''Test calculating distant IBD segments against a single surrogate parent;
        compare with IBD segments based on nucelar family info.'''
        segment_set = ibd_segments_with_relatives(self.problem, 3, 5,
                                                  PhaseParam(id_coef_file=im.itu.ID_COEF_FILE, max_path_length=2),
                                                  im.ibd_hmm.prob_ibd_hmm)

        im.itu.assert_segments_almost_equal(segment_set,
                                            [((1278, 1337), (31217345, 32331594, 1.114, 1), ((3, 0), (5, 0))),
                                             ((2276, 2363), (42344297, 43527681, 1.183, 1), ((3, 1), (5, 0))),
                                             ((3138, 3206), (49803008, 50837224, 1.034, 1), ((3, 1), (5, 0))),
                                             ((603 , 3218), (25453554, 51156934, 25.703, 1), ((5, 1), (3, 1)))],
                                             full_data=True, decimal=3, err_msg='Wrong IBD segments')
示例#9
0
    def test_ibd_segments_sib_pair(self):
        '''Test calculating distant IBD segments between a pair of sibs;
        compare with IBD segments based on nucelar family info'''
        segment_set = ibd_segments_with_relatives(self.problem, 3, 2,
                                                  PhaseParam(id_coef_file=im.itu.ID_COEF_FILE),
                                                  im.ibd_hmm.prob_ibd_hmm)

        im.itu.assert_segments_almost_equal(segment_set,
                                            [((1412, 3218), (32992389, 51156934, 18.165, 1), ((3, 0), (2, 0))),
                                             ((241 , 451), (19643555, 23817486, 4.174, 1), ((2, 0), (3, 1))),
                                             ((0   , 600), (16484792, 25444874, 8.960, 1), ((3, 1), (2, 1))),
                                             ((2650, 3218), (45892433, 51156934, 5.265, 1), ((3, 1), (2, 1)))],
                                             full_data=True, decimal=3, err_msg='Wrong IBD segments')
示例#10
0
 def test_ibd_segments_ibdld(self):
     '''Calculate IBD segments in a nuclear family using IBDLD.'''
     segment_cache = im.ibdld.ibd_ld.IbdSegmentGlobalCacheIbdld(itu.FAMILY7 + '.ibd')
     segment_computer = im.ibdld.ibd_ld.IbdSegmentComputerIbdld(segment_cache, self.haplotype,
                                                       chrom=22,
                                                       sample_id=self.problem.pedigree.sample_id,
                                                       samples=[2, 8],
                                                       threshold=0.9,
                                                       params=PhaseParam())
     segment_set = segment.break_and_group_segments(segment_computer.segments)
     assert_segments_almost_equal(segment_set,
                                  [((38  , 2849), [], ((8, 1), (2, 1)))],
                                  full_data=False, decimal=3, err_msg='Wrong grouped IBDLD IBD segments')
     assert_equal(segment_set.errors, empty_errors_array(), 'IBDLD does not support errors but they are output?!')
示例#11
0
文件: phase.py 项目: orenlivne/ober
def run_phasing_chain(phaser, problem, params=None):
    '''The main call that runs the phasing, stats saving , and post-processing as one long pipeline.
    Returns the populated request object.'''
    request = util.Struct(problem=problem,
                          params=params if params else PhaseParam(),
                          g_orig=problem.genotype.data.copy(),
                          stats=util.Struct())
    # Run phasing processing chain
    start = time.time()
    phaser.handle(request)
    t = time.time() - start
    request.stats.time = t

    return request
示例#12
0
文件: phase.py 项目: orenlivne/ober
def __main(options):
    '''
    --------------------------------------------------
    Main program - accepts an options struct.
    --------------------------------------------------
    '''
    if options.debug: print 'Input options', options
    print 'Building phaser (stage = %d) ...' % (options.stage,)
    phaser = build_phasing_pipeline(options)
    
    if options.debug: print 'Reading data ...'
    problem = __load_problem(options)

    if options.debug: print 'Phasing ...'
    params = PhaseParam()
    params.update_from_struct(options)
    request = run_phasing_chain(phaser, problem, params)
    
    print ''
    request.stats.pprint()
    print ''

    if options.output is not None:
        if options.min_output:
            print 'Minimizing output size...'
            io.slim(problem)
        out_prefix, ext = os.path.splitext(options.output)
        if ext == '.npz':
            print 'Writing haplotype result to %s in NPZ format ...' % (options.output,)
            io.write_npz(problem, options.output)
            output_info = out_prefix + '.info.npz'
            print 'Writing problem info result to %s in NPZ format ...' % (output_info,)
            io.write_info_npz(problem.info, output_info)
        else:
            print 'Writing haplotype result to %s in PLINK format ...' % (options.output,)
            io.write_plink(problem, options.output, verbose=options.debug)
    return problem
    def setUp(self):
        '''Load test data and expected results.'''
        unittest.TestCase.setUp(self)

        # Load test data ready from previous phasing stagees
        self.problem = io.read_npz(itu.FAMILY13_STAGE2)
        self.family = self.problem.families()[0]
        self.phaser = phase_core.PhaseDecorator(
            FilterChain([
                trivial_phaser(),
                family_phaser(),
                family_child_comparison_phaser()
            ]))
        self.comparator = ic.ChildComparator(
            Struct(problem=self.problem, params=PhaseParam()), self.family)
 def test_child_comparison_phaser(self):
     '''Test phasing a founder parent by comparing its partially-phased children. Test main
     phasing method here.'''
     h = self.problem.haplotype
     (f, m) = (self.family.father, self.family.mother)
     assert_almost_equal(h.fill_fraction(sample=f), 0.60, 2,
                         'Unexpected pre-phasing parent fill %')
     assert_almost_equal(h.fill_fraction(sample=m), 0.63, 2,
                         'Unexpected pre-phasing parent fill %')
     #print self.problem.fill_fraction(sample=self.family.member_set)
     phaser = family_child_comparison_phaser()
     phaser.run(self.problem, PhaseParam())
     #print self.problem.fill_fraction(sample=self.family.member_set)
     assert_almost_equal(h.fill_fraction(sample=f), 0.998, 3,
                         'Unexpected post-phasing parent fill %')
     assert_almost_equal(h.fill_fraction(sample=m), 0.998, 3,
                         'Unexpected post-phasing parent fill %')
示例#15
0
    def test_ibd_parent_vs_all_children(self):
        '''Test calculating distant IBD segments against all surrogate parents;
        compare with IBD segments based on nucelar family info.'''
#        segment_set = ibd_segments_with_surrogate_parents(self.problem, 0,
#                                                          PhaseParam(margin=0., surrogate_parent_fill_threshold=0.9,
#                                                                     max_path_length=2, debug=True),
#                                                          prob_ibd_calculator=im.ibd_hmm.prob_ibd_hmm,
#                                                          is_i_phased=True)
        # Turn off kinship-based POO determination IBD segment computation since we don't have
        # a complete pedigree here 
        segment_set = ibd_segments_with_relatives(self.problem, 0,
                                                  genotyped_children(self.problem, self.problem.first_family),
                                                  PhaseParam(id_coef_file=im.itu.ID_COEF_FILE, max_path_length=2),
                                                  im.ibd_hmm.prob_ibd_hmm, use_kinship=False)
        im.itu.assert_segments_almost_equal(segment_set,
                                            [((0   , 1420), (16484792, 33032458, 16.548, 1), ((2, 0), (0, 0))),
                                             ((241 , 446), (19643555, 23761236, 4.118, 1), ((0, 0), (2, 1))),
                                             ((2215, 2270), (40876234, 42241372, 1.365, 1), ((0, 0), (2, 1))),
                                             ((3138, 3206), (49803008, 50837224, 1.034, 1), ((0, 0), (2, 1))),
                                             ((1278, 1337), (31217345, 32331594, 1.114, 1), ((0, 1), (2, 0))),
                                             ((1411, 3218), (32978753, 51156934, 18.178, 1), ((0, 1), (2, 0))),
                                             ((1278, 1337), (31217345, 32331594, 1.114, 1), ((3, 0), (0, 0))),
                                             ((241 , 451), (19643555, 23817486, 4.174, 1), ((0, 0), (3, 1))),
                                             ((2276, 2363), (42344297, 43527681, 1.183, 1), ((0, 0), (3, 1))),
                                             ((3138, 3206), (49803008, 50837224, 1.034, 1), ((0, 0), (3, 1))),
                                             ((0   , 3218), (16484792, 51156934, 34.672, 1), ((0, 1), (3, 0))),
                                             ((1278, 1337), (31217345, 32331594, 1.114, 1), ((0, 0), (4, 0))),
                                             ((2552, 3218), (45011952, 51156934, 6.145, 1), ((0, 0), (4, 0))),
                                             ((385 , 451), (22583252, 23817486, 1.234, 1), ((0, 0), (4, 1))),
                                             ((2215, 2270), (40876234, 42241372, 1.365, 1), ((0, 0), (4, 1))),
                                             ((0   , 2571), (16484792, 45231758, 28.747, 1), ((0, 1), (4, 0))),
                                             ((0   , 3218), (16484792, 51156934, 34.672, 1), ((0, 0), (5, 0))),
                                             ((385 , 451), (22583252, 23817486, 1.234, 1), ((5, 1), (0, 0))),
                                             ((2276, 2363), (42344297, 43527681, 1.183, 1), ((5, 1), (0, 0))),
                                             ((3138, 3206), (49803008, 50837224, 1.034, 1), ((5, 1), (0, 0))),
                                             ((1278, 1337), (31217345, 32331594, 1.114, 1), ((0, 1), (5, 0))),
                                             ((0   , 805), (16484792, 27119061, 10.634, 1), ((0, 0), (6, 0))),
                                             ((1278, 1337), (31217345, 32331594, 1.114, 1), ((0, 0), (6, 0))),
                                             ((241 , 347), (19643555, 22015144, 2.372, 1), ((6, 1), (0, 0))),
                                             ((385 , 451), (22583252, 23817486, 1.234, 1), ((6, 1), (0, 0))),
                                             ((2276, 2363), (42344297, 43527681, 1.183, 1), ((6, 1), (0, 0))),
                                             ((3138, 3206), (49803008, 50837224, 1.034, 1), ((6, 1), (0, 0))),
                                             ((762 , 3218), (26836780, 51156934, 24.320, 1), ((0, 1), (6, 0)))],
                                             full_data=True, decimal=3, err_msg='Wrong IBD segments')
示例#16
0
文件: ibd_ld.py 项目: orenlivne/ober
 def __init__(self,
              cache,
              haplotype,
              chrom,
              samples,
              sample_id,
              threshold,
              params=PhaseParam()):
     self.__cache = cache
     self.__sample_id = sample_id
     self.__haplotype = haplotype
     self.__chrom = chrom
     self.__samples = samples
     self.__num_snps = haplotype.num_snps
     self.__threshold = threshold
     self.__hap_comparator = _hap_comparator_ibdld
     self.__data = haplotype.data
     self.__params = params
     self._bp = haplotype.snp['base_pair']
     self.__num_snps = haplotype.num_snps
示例#17
0
def ibd_segments_with_surrogate_parents(problem,
                                        i,
                                        min_path_length,
                                        max_path_length,
                                        surrogate_parent_fill_threshold=0.9,
                                        params=PhaseParam(),
                                        prob_ibd_calculator=prob_ibd_ibs):
    '''A utility method that calculates likely IBD segments between i and surrogate parents.
    Supports minimum required IBD segment length vs. d meioses separating two samples. Assumes
    # an exponential distribution and computes the value x for which P(x >= X) = ibd_length_upper_percentile.
     
    Important: use a short median filter window size (~3) to catch more spikes since we are
    less certain that spikes are genotype errors than in parent-child IBD calculations
    '''
    # Find the set J of phased relatives of m=max_path_length proximity; if not found, increment
    # m to at most max_path_length until such are found
    # relatives = self.filled_relatives(i, self.max_path_length, self.het_fill_threshold)
    relatives = RelativeCollection.in_neighborhood(
        i, problem, min_path_length, max_path_length,
        params.surrogate_parent_fill_threshold)
    return ibd_segments_with_relatives(problem, i, relatives.info['index'],
                                       params, prob_ibd_calculator)
示例#18
0
def _phase_in_ibd_segment(problem,
                          snp,
                          haps,
                          consensus,
                          debug=False,
                          params=PhaseParam()):
    '''Impute haplotypes in an IBD-sharing sample set haps over a SNP array snp, or a SNP array
    segment [snp[0],snp[1]], if it is a tuple.
    
    Use the concensus functor to calculator a consensus haplotype and copy it to all other
    IBD-sharing samples. haps is a list of (sample,hap) tuples, where sample=sample ID and
    hap=allele (paternal/maternal) that are assumed to be IBD in this segment. 
    The other allele in each haplotype is inferred from the genotype.
    
    concensus = 'max': if a non-zero value is found, it is the concensus. This should be applied
    when all haps are certain, so all non-missing values should agree.
    This is cleanly implemented using max(h over samples). 
    
    concensus = 'majority': majority vote of non-missing values.'''

    # print 'Phasing in segment (%d,%d)' % (start, stop, )
    # problem, params = request.problem, request.params
    # snp_test_index = params.snp_test_index
    snps = np.arange(snp[0], snp[1]) if isinstance(snp, tuple) else snp
    common, hh = _compute_conensus(problem.h, snps, haps, consensus)

    # Flag samples that are inconsistent with the concensus as errors if we have enough haps
    # to support the evidence for errors. If there are too many errors, this is a dubious segment
    errors = _find_errors(problem, snps, haps, consensus, common, hh,
                          params.min_consensus_samples)
    if debug: print 'Consensus errors (%d)' % len(errors[0]), errors
    # print 'Consensus errors %d' % len(errors[0])
    problem.genotype_error(errors[0], errors[1],
                           'IBD majority vote inconsistency')

    # Phase all haps: copy consensus haplotype to missing haplotype entries
    _phase_by_consensus(problem, snps, haps, common)
示例#19
0
 def setUp(self):
     '''Load single nuclear family test case.'''
     self.problem = im.io.read_npz(itu.SIB_FOUNDERS_STAGE3)
     self.family = self.problem.families(genotyped=False)[0]
     self.sibs = ig._filled_members(self.problem, self.family)
     self.ibd_computer = ig.GermlineIbdComputer(PhaseParam())
示例#20
0
 def setUp(self):
     '''Load single nuclear family test case.'''
     self.problem = im.io.read_npz(itu.FAMILY4_STAGE3)
     self.family = self.problem.first_family
     self.sibs = ig._filled_members(self.problem, self.family)
     self.ibd_computer = ig.GermlineIbdComputer(PhaseParam())
示例#21
0
============================================================
'''
import impute as im, itertools, matplotlib.pyplot as P, sys
from impute.tools.param import PhaseParam

# print sys.argv
generate_plots = True if (len(sys.argv) < 2) else bool(int(sys.argv[1]))
p = im.io.read_npz(im.itu.FAMILY_TOO_ZEROED_STAGE2)
haps = list(
    itertools.product(im.gt.genotyped_members(p, p.first_family), xrange(2)))
children = im.gt.genotyped_children(p, p.first_family)

# IBD between sib pairs
child = 3
sib = 2
sib_ibd = im.ibd_distant.ibd_segments_with_relatives(
    p, child, [sib],
    PhaseParam(margin=0., surrogate_parent_fill_threshold=0.9, debug=True),
    im.ibd_hmm.prob_ibd_hmm)
print sib_ibd
if generate_plots:
    P.figure(1)
    child_haps = list(itertools.product([child, sib], xrange(2)))
    g = im.plots.plot_hap_coloring(sib_ibd,
                                   child_haps,
                                   pair_gap=10,
                                   linewidth=6,
                                   title='Sib IBD Segments',
                                   snp_range=(0, p.num_snps))
#    P.savefig(os.environ['OBER'] + '/doc/ibd/hmm/family_ibd_hmm_sib.png')
示例#22
0
 def test_ibd_segments_hmm(self):
     '''Test locating IBD segments between the unphased proband and its sib surrogate parents.
     Uses HMM IBD posterior.'''
     relatives = self.__phased_sibs(self.s)
     segment_set = im.idist.ibd_segments_with_relatives(
         self.problem, self.s, relatives, PhaseParam(debug=False),
         im.ibd_hmm.prob_ibd_hmm)
     segment_set.group_to_disjoint()
     assert_segments_almost_equal(
         segment_set,
         [((0, 96), (16484792, 17948473, 1.464, 0), ((2, 0), (3, 1), (4, 1),
                                                     (2, 1))),
          ((96, 344), (17948473, 21460008, 3.512, 0), ((3, 1), (4, 1),
                                                       (2, 1))),
          ((344, 380), (21460008, 22554306, 1.094, 0),
           ((2, 0), (3, 1), (4, 1), (2, 1), (4, 0))),
          ((380, 383), (22554306, 22555078, 0.001, 0), ((3, 1), (4, 1),
                                                        (2, 1))),
          ((383, 438), (22555078, 23636541, 1.081, 0),
           ((2, 0), (3, 1), (4, 1), (2, 1), (4, 0))),
          ((438, 442), (23636541, 23695404, 0.059, 0), ((3, 1), (4, 1),
                                                        (2, 1))),
          ((442, 519), (23695404, 24406778, 0.711, 0),
           ((2, 0), (3, 1), (4, 1), (2, 1), (4, 0))),
          ((519, 557), (24406778, 25088629, 0.682, 0),
           ((0, 1), (2, 0), (3, 1), (4, 1), (2, 1), (4, 0))),
          ((557, 951), (25088629, 27698217, 2.610, 0), ((0, 1), (3, 1),
                                                        (4, 1), (2, 1))),
          ((951, 969), (27698217, 27832985, 0.135, 0),
           ((0, 1), (2, 0), (3, 1), (4, 1), (2, 1))),
          ((969, 1019), (27832985, 28093392, 0.260, 0), ((2, 0), (3, 1),
                                                         (4, 1), (2, 1))),
          ((1019, 1147), (28093392, 29670939, 1.578, 0),
           ((2, 0), (3, 1), (4, 1), (2, 1), (4, 0))),
          ((1147, 1188), (29670939, 30113960, 0.443, 0), ((2, 0), (3, 1),
                                                          (4, 1), (2, 1))),
          ((1188, 1403), (30113960, 32950053, 2.836, 0),
           ((2, 0), (3, 1), (4, 1), (2, 1), (4, 0))),
          ((1403, 1943), (32950053, 36891858, 3.942, 0), ((2, 0), (3, 1),
                                                          (4, 1), (2, 1))),
          ((1943, 2053), (36891858, 37982012, 1.090, 0),
           ((2, 0), (3, 1), (4, 1), (2, 1), (4, 0))),
          ((2053, 2055), (37982012, 38086574, 0.105, 0), ((2, 0), (3, 1),
                                                          (4, 1), (2, 1))),
          ((2055, 2133), (38086574, 39454432, 1.368, 0),
           ((2, 0), (3, 1), (4, 1), (2, 1), (4, 0))),
          ((2133, 2174), (39454432, 40018212, 0.564, 0), ((2, 0), (3, 1),
                                                          (4, 1), (2, 1))),
          ((2174, 2221), (40018212, 41107688, 1.089, 0),
           ((2, 0), (3, 1), (4, 1), (2, 1), (4, 0))),
          ((2221, 2612), (41107688, 45515269, 4.408, 0), ((2, 0), (3, 1),
                                                          (4, 1), (2, 1))),
          ((2612, 2661), (45515269, 45972017, 0.457, 0),
           ((2, 0), (3, 1), (4, 1), (2, 1), (4, 0))),
          ((2661, 2735), (45972017, 47094390, 1.122, 0),
           ((0, 1), (0, 0), (3, 1), (2, 1), (2, 0), (1, 0), (4, 1), (1, 1),
            (4, 0))),
          ((2735, 2945), (47094390, 48569604, 1.475, 0),
           ((2, 0), (0, 0), (1, 0), (3, 1), (4, 1), (2, 1), (4, 0))),
          ((2945, 2991), (48569604, 48741583, 0.172, 0),
           ((2, 0), (0, 0), (1, 0), (3, 1), (4, 1), (4, 0))),
          ((2991, 3127), (48741583, 49752332, 1.011, 0),
           ((2, 0), (0, 0), (1, 0), (3, 1), (4, 1), (2, 1), (4, 0))),
          ((3127, 3177), (49752332, 50120255, 0.368, 0),
           ((2, 0), (1, 0), (3, 1), (4, 1), (0, 0))),
          ((3177, 3218), (50120255, 51156934, 1.037, 0),
           ((0, 1), (0, 0), (3, 1), (2, 1), (2, 0), (1, 0), (4, 1), (1, 1),
            (4, 0)))],
         decimal=3,
         err_msg='Wrong IBD segments')