示例#1
0
def get_families(db, selected_families=None):
    """
    Query the samples table to return a list of Family
    objects that each contain all of the Subjects in a Family.
    """
    conn = sqlite3.connect(db)
    conn.isolation_level = None
    conn.row_factory = sqlite3.Row
    c = conn.cursor()

    families_dict = Family.from_cursor(c)

    # if the user has specified a set of selected families
    # to which the analysis should be restricted, then
    # first sanity check that the family ids they specified are valid.
    if selected_families is not None:
        for family in selected_families.split(','):
            if family not in families_dict:
                sys.exit("ERROR: family \"%s\" is not a valid family_id\n" % family)

    families = []
    for fam in families_dict:
        if selected_families is None or fam in selected_families:
            families.append(families_dict[fam])
    return families
示例#2
0
    def set_family_info(self):
        """
        Extract the relevant genotype filters, as well all labels
        for each family in the database.
        """
        self.families = families = Family.from_cursor(self.gq.c).values()
        args = self.args

        self.family_ids = []
        self.family_masks = []
        kwargs = {'only_affected': not getattr(self.args, "allow_unaffected", False),
                  'min_gq': args.min_gq}
        if self.model == "mendel_violations":
            kwargs = {'only_affected': self.args.only_affected}
        if self.model != "comp_het" and self.model != "mendel_violations":
            kwargs['strict'] = not self.args.lenient
        elif self.model == "comp_het":
            kwargs['pattern_only'] = self.args.pattern_only

        requested_fams = None if not args.families else set(args.families.split(","))

        for family in families:
            if requested_fams is None or family.family_id in requested_fams:
                # e.g. family.auto_rec(gt_ll, min_depth)
                family_filter = getattr(family, self.model)(gt_ll=self.args.gt_phred_ll,
                                    min_depth=self.args.min_sample_depth,
                                    **kwargs)
            else:
                family_filter = 'False'

            self.family_masks.append(family_filter)
            self.family_ids.append(family.family_id)
示例#3
0
文件: gim.py 项目: mmoisse/gemini
    def set_family_info(self):
        """
        Extract the relevant genotype filters, as well all labels
        for each family in the database.
        """
        self.families = families = Family.from_cursor(self.gq.c).values()
        args = self.args

        self.family_ids = []
        self.family_masks = []
        kwargs = {
            'only_affected': not getattr(self.args, "allow_unaffected", False)
        }
        if self.model == "mendel_violations":
            kwargs = {'only_affected': self.args.only_affected}
        if self.model != "comp_het" and self.model != "mendel_violations":
            kwargs['strict'] = not self.args.lenient
        elif self.model == "comp_het":
            kwargs['pattern_only'] = self.args.pattern_only

        requested_fams = None if not args.families else set(
            args.families.split(","))

        for family in families:
            if requested_fams is None or family.family_id in requested_fams:
                # e.g. family.auto_rec(gt_ll, min_depth)
                family_filter = getattr(family, self.model)(
                    gt_ll=self.args.gt_phred_ll,
                    min_depth=self.args.min_sample_depth,
                    **kwargs)
            else:
                family_filter = 'False'

            self.family_masks.append(family_filter)
            self.family_ids.append(family.family_id)
示例#4
0
def test_comp_het_one_parent_2kids():
    """
    test that we cant have a candidate when a parent is HOM_REF at both sites.
    """
    mom._i = 0
    kid._i = 1
    kid2._i = 2
    kid.dad = None
    kid.mom = None
    kid.mom = mom
    efam = EvalFamily(Family([mom, kid, kid2], '2kids'))
    efam.gt_types = [Family.HOM_REF, Family.HET, Family.HET]

    res = efam.comp_het_pair([Family.HOM_REF, Family.HET, Family.HET],
                             ["T/T", "T/C", "T/C"],
                             [Family.HOM_REF, Family.HET, Family.HET],
                             ["A/A", "A/C", "A/C"], [False] * 3, [False] * 3,
                             "T",
                             "C",
                             "A",
                             "C",
                             fast_mode=False,
                             allow_unaffected=True)

    assert not res['candidate'], res
示例#5
0
def get_families(db, selected_families=None):
    """
    Query the samples table to return a list of Family
    objects that each contain all of the Subjects in a Family.
    """
    conn = sqlite3.connect(db)
    conn.isolation_level = None
    conn.row_factory = sqlite3.Row
    c = conn.cursor()

    families_dict = Family.from_cursor(c)

    # if the user has specified a set of selected families
    # to which the analysis should be restricted, then
    # first sanity check that the family ids they specified are valid.
    if selected_families is not None:
        for family in selected_families.split(','):
            if family not in families_dict:
                sys.exit("ERROR: family \"%s\" is not a valid family_id\n" %
                         family)

    families = []
    for fam in families_dict:
        if selected_families is None or fam in selected_families:
            families.append(families_dict[fam])
    return families
示例#6
0
def test_comp_het_singleton():
    kid = Sample('kid', affected=True)

    efam = EvalFamily(Family([kid], 'singleton'))
    efam.gt_types = [Family.HET]

    res = efam.comp_het_pair([Family.HET], ["A/C"], [Family.HET], ["A/C"],
                             [False], [False], "A", "C", "A", "C")

    assert res['candidate']
    assert res['priority'] == 2, res
示例#7
0
def make_fam2():
    # 1 affected kid, parent, grandparent
    fam = Family.from_ped("""\
#family_id  sample_id   paternal_id maternal_id sex phenotype
1   dad   0   0   1  1
1   mom   grandpa   grandma   2  2
1   kid   dad   mom   1  2
1   kid2   dad   mom   1  1
1   grandma 0   0     2  2
1   grandpa 0   0     1  1""")
    return fam
示例#8
0
def make_fam1():
    # only 1 affected kid.
    fam = Family.from_ped("""\
#family_id  sample_id   paternal_id maternal_id sex phenotype
1   dad   0   0   1  1
1   mom   grandpa   grandma   2  1
1   kid   dad   mom   1  2
1   kid2   dad   mom   1  1
1   grandma 0   0     2  1
1   grandpa 0   0     1  1""")
    return fam
示例#9
0
文件: gim.py 项目: arq5x/gemini
    def candidates(self):
        args = self.args

        self.gq._connect_to_database()
        fams = self.fams = Family.from_cursor(self.gq.conn)

        if args.families:
            fams = {f: fam for f, fam in fams.items() if f in set(args.families.split(","))}

        for grp, li in self.gen_candidates('gene'):
            samples_w_hetpair = defaultdict(list)
            sites, strs = [], []
            for row in li:

                gt_types, gt_bases, gt_phases = row['gt_types'], row['gts'], row['gt_phases']
                site = Site(row)
                site.gt_phases, site.gt_bases, site.gt_types = gt_phases, gt_bases, gt_types
                sites.append((str(site), site))



            for family_id, fam in fams.items():
                # if a site has been deemed "impossible", we store and then
                # skip it to avoid compuational overhead on it multiple times.
                impossible_sites = {}
                for i, (strsite1, site1) in enumerate(sites[:-1], start=1):
                    if strsite1 in impossible_sites:
                        continue

                    for (strsite2, site2) in sites[i:]:
                        if strsite2 in impossible_sites:
                            continue

                        ch = fam.comp_het_pair(site1.gt_types, site1.gt_bases,
                                               site2.gt_types, site2.gt_bases,
                                               site1.gt_phases, site2.gt_phases,
                                               ref1=site1.row['ref'],
                                               alt1=site1.row['alt'],
                                               ref2=site2.row['ref'],
                                               alt2=site2.row['alt'],
                                               allow_unaffected=args.allow_unaffected,
                                               fast_mode=True,
                                               pattern_only=args.pattern_only)

                        if ch.get('impossible') == 'site1':
                            impossible_sites[strsite1] = True
                            break
                        if ch.get('impossible') == 'site2':
                            impossible_sites[strsite2] = True

                        if not ch['candidate']: continue

                        samples_w_hetpair[(site1, site2)].append(ch)
            yield grp, self.filter_candidates(samples_w_hetpair)
示例#10
0
def make_fam2():
    # 1 affected kid, parent, grandparent
    fam = Family.from_ped("""\
#family_id  sample_id   paternal_id maternal_id sex phenotype
1   dad   0   0   1  1
1   mom   grandpa   grandma   2  2
1   kid   dad   mom   1  2
1   kid2   dad   mom   1  1
1   grandma 0   0     2  2
1   grandpa 0   0     1  1""")
    return fam
示例#11
0
def make_fam(n_affecteds, n_unaffecteds, n_unknowns, id="xxx"):

    samples = []
    for i in range(n_affecteds):
        samples.append(
            Sample('affected_%d' % i,
                   affected=True,
                   sex=random.randint(1, 2),
                   name='affected_%d' % i))
    for i in range(n_unaffecteds):
        samples.append(
            Sample('unaffected_%d' % i,
                   affected=False,
                   sex=random.randint(1, 2),
                   name='affected_%d' % i))
    for i in range(n_unknowns):
        samples.append(
            Sample('unknown_%d' % i,
                   affected=None,
                   sex=random.randint(1, 2),
                   name='affected_%d' % i))

    for i in range(int((n_affecteds + n_affecteds + n_unknowns) / 2)):

        sample = random.choice(samples)
        if random.random() < 0.9:
            try:
                sample.dad = random.choice([
                    s for s in samples if not s == sample and s.sex == 'male'
                ])
            except IndexError:
                pass
        if random.random() < 0.9:
            try:
                sample.mom = random.choice([
                    s for s in samples if not s == sample and s.sex == 'female'
                ])
            except IndexError:
                pass

    fam = EvalFamily(Family(samples, 'fam_%s' % id))
    fam.gt_types = [random.randrange(0, 4) for _ in range(len(samples))]
    fam.gt_depths = [random.randrange(0, 100) for _ in range(len(samples))]
    fam.gt_phred_ll_homref = [
        random.randrange(0, 100) for _ in range(len(samples))
    ]
    fam.gt_phred_ll_het = [
        random.randrange(0, 100) for _ in range(len(samples))
    ]
    fam.gt_phred_ll_homalt = [
        random.randrange(0, 100) for _ in range(len(samples))
    ]
    fam.gt_quals = [random.randrange(5, 100) for _ in range(len(samples))]
    return fam
示例#12
0
def make_fam1():
    # only 1 affected kid.
    fam = Family.from_ped("""\
#family_id  sample_id   paternal_id maternal_id sex phenotype
1   dad   0   0   1  1
1   mom   grandpa   grandma   2  1
1   kid   dad   mom   1  2
1   kid2   dad   mom   1  1
1   grandma 0   0     2  1
1   grandpa 0   0     1  1""")
    return fam
示例#13
0
def test_comp_het_all_hets():

    efam = EvalFamily(Family([dad, mom, kid], 'triox'))

    efam.gt_types = [Family.HET] * 3

    res = efam.comp_het_pair([Family.HET] * 3, ["A/C"] * 3, [Family.HET] * 3,
                             ["A/C"] * 3, [False] * 3, [False] * 3, "A", "C",
                             "A", "C")

    assert res['candidate']
    assert res['priority'] == 3
示例#14
0
def test_x_rec():

    mom = Sample('mom_1239NIH', affected=False, sex='female')
    dad = Sample('dad_1240NIH', affected=False, sex='male')
    kid_aff = Sample('kidaff_1238NIH', affected=True, sex='female')

    kid_aff.mom = mom
    kid_aff.dad = dad

    efam = EvalFamily(Family([dad, mom, kid_aff], 'oler-trio'))
    # mom should be a carrier
    efam.gt_types = [Family.HOM_REF, Family.HOM_REF, Family.HOM_ALT]
    assert efam.x_rec()
示例#15
0
文件: gim.py 项目: mmoisse/gemini
    def candidates(self):
        args = self.args

        self.gq._connect_to_database()
        fams = self.fams = Family.from_cursor(self.gq.c)

        if args.families:
            fams = {
                f: fam
                for f, fam in fams.items()
                if f in set(args.families.split(","))
            }

        for grp, li in self.gen_candidates('gene'):
            samples_w_hetpair = defaultdict(list)
            sites = []
            for row in li:

                gt_types, gt_bases, gt_phases = row['gt_types'], row[
                    'gts'], row['gt_phases']
                site = Site(row)
                site.gt_phases, site.gt_bases, site.gt_types = gt_phases, gt_bases, gt_types
                sites.append(site)

            for i, site1 in enumerate(sites[:-1], start=1):
                for site2 in sites[i:]:

                    for family_id, fam in fams.items():

                        ch = fam.comp_het_pair(
                            site1.gt_types,
                            site1.gt_bases,
                            site2.gt_types,
                            site2.gt_bases,
                            site1.gt_phases,
                            site2.gt_phases,
                            ref1=site1.row['ref'],
                            alt1=site1.row['alt'],
                            ref2=site2.row['ref'],
                            alt2=site2.row['alt'],
                            allow_unaffected=args.allow_unaffected,
                            fast_mode=True,
                            pattern_only=args.pattern_only)

                        if not ch['candidate']: continue

                        samples_w_hetpair[(site1, site2)].append(ch)
            yield grp, self.filter_candidates(samples_w_hetpair)
示例#16
0
    def candidates(self):
        args = self.args

        self.gq._connect_to_database()
        fams = self.fams = Family.from_cursor(self.gq.c)

        if args.families:
            fams = {
                f: fam
                for f, fam in fams.items()
                if f in set(args.families.split(","))
            }

        for grp, li in self.gen_candidates('gene'):
            samples_w_hetpair = defaultdict(list)
            sites = []
            for row in li:

                gt_types, gt_bases, gt_phases = row['gt_types'], row[
                    'gts'], row['gt_phases']
                site = Site(row)
                site.gt_phases, site.gt_bases, site.gt_types = gt_phases, gt_bases, gt_types
                sites.append(site)

            for i, site1 in enumerate(sites[:-1], start=1):
                for site2 in sites[i:]:

                    for family_id, fam in fams.items():

                        ch = fam.comp_het_pair(
                            site1.gt_types,
                            site1.gt_bases,
                            site2.gt_types,
                            site2.gt_bases,
                            site1.gt_phases,
                            site2.gt_phases,
                            ref1=site1.row['ref'],
                            alt1=site1.row['alt'],
                            ref2=site2.row['ref'],
                            alt2=site2.row['alt'],
                            allow_unaffected=args.allow_unaffected,
                            fast_mode=True,
                            pattern_only=args.pattern_only)

                        if not ch['candidate']: continue

                        samples_w_hetpair[(site1, site2)].append(ch)
            yield grp, self.filter_candidates(samples_w_hetpair)
示例#17
0
def get_families(db, selected_families=None):
    """
    Query the samples table to return a list of Family
    objects that each contain all of the Subjects in a Family.
    """
    conn, metadata = database.get_session_metadata(db)

    families_dict = Family.from_cursor(conn)

    # if the user has specified a set of selected families
    # to which the analysis should be restricted, then
    # first sanity check that the family ids they specified are valid.
    if selected_families is not None:
        for family in selected_families.split(','):
            if family not in families_dict:
                raise ValueError("Family \"%s\" is not a valid family_id\n" % family)

    families = []
    for fam in families_dict:
        if selected_families is None or fam in selected_families:
            families.append(families_dict[fam])
    return families
示例#18
0
def get_families(db, selected_families=None):
    """
    Query the samples table to return a list of Family
    objects that each contain all of the Subjects in a Family.
    """
    conn, metadata = database.get_session_metadata(db)

    families_dict = Family.from_cursor(conn)

    # if the user has specified a set of selected families
    # to which the analysis should be restricted, then
    # first sanity check that the family ids they specified are valid.
    if selected_families is not None:
        for family in selected_families.split(','):
            if family not in families_dict:
                raise ValueError("Family \"%s\" is not a valid family_id\n" % family)

    families = []
    for fam in families_dict:
        if selected_families is None or fam in selected_families:
            families.append(families_dict[fam])
    return families
示例#19
0
def test_comp_het_one_parent():
    mom._i = 0
    kid._i = 1
    kid.dad = None
    kid.mom = None
    efam = EvalFamily(Family([mom, kid], 'pair_mom'))
    efam.gt_types = [Family.HET] * 2
    res = efam.comp_het_pair([Family.HET] * 2, ["A/C"] * 2, [Family.HET] * 2,
                             ["A/C"] * 2, [False] * 2, [False] * 2, "A", "C",
                             "A", "C")
    assert res['candidate']
    assert res['priority'] == 3, res['priority']

    res = efam.comp_het_pair([Family.HOM_REF, Family.HET] * 2, ["A/A", "A/C"],
                             [Family.HET, Family.HET], ["A/C"] * 2,
                             [False] * 2, [False] * 2, "A", "C", "A", "C")
    assert res['candidate']
    assert res['priority'] == 2, res['priority']

    res = efam.comp_het_pair([Family.HOM_REF, Family.HOM_REF] * 2,
                             ["A/A", "A/A"], [Family.HET, Family.HET],
                             ["A/C"] * 2, [False] * 2, [False] * 2, "A", "C",
                             "A", "C")
    assert not res['candidate']
示例#20
0
def test_x_dom_parents():

    mom = Sample('mom', affected=False, sex='female')
    dad = Sample('dad', affected=False, sex='male')
    kid = Sample('kid', affected=True, sex='female')

    kid.mom, kid.dad = mom, dad

    efam = EvalFamily(Family([dad, mom, kid], 'trio'))
    efam.gt_types = [Family.HOM_REF, Family.HOM_REF, Family.HET]

    # neither parent is het
    assert not efam.x_dom()

    # neither parent is affected
    efam.gt_types = [Family.HET, Family.HOM_REF, Family.HET]
    assert not efam.x_dom()

    dad.affected = True
    assert efam.x_dom()

    # for male, only mom must be affected
    kid.sex = 'male'
    assert not efam.x_dom()
示例#21
0
文件: gim.py 项目: tyl868/gemini
    def candidates(self):
        args = self.args

        self.gq._connect_to_database()
        fams = self.fams = Family.from_cursor(self.gq.conn)

        if args.families:
            fams = {
                f: fam
                for f, fam in fams.items()
                if f in set(args.families.split(","))
            }

        for grp, li in self.gen_candidates('gene'):
            samples_w_hetpair = defaultdict(list)
            sites, strs = [], []
            for row in li:

                gt_types, gt_bases, gt_phases = row['gt_types'], row[
                    'gts'], row['gt_phases']
                site = Site(row)
                site.gt_phases, site.gt_bases, site.gt_types = gt_phases, gt_bases, gt_types
                sites.append((str(site), site))

            for family_id, fam in fams.items():
                # if a site has been deemed "impossible", we store and then
                # skip it to avoid compuational overhead on it multiple times.
                impossible_sites = {}
                for i, (strsite1, site1) in enumerate(sites[:-1], start=1):
                    if strsite1 in impossible_sites:
                        continue

                    for (strsite2, site2) in sites[i:]:
                        if strsite2 in impossible_sites:
                            continue

                        ch = fam.comp_het_pair(
                            site1.gt_types,
                            site1.gt_bases,
                            site2.gt_types,
                            site2.gt_bases,
                            site1.gt_phases,
                            site2.gt_phases,
                            ref1=site1.row['ref'],
                            alt1=site1.row['alt'],
                            ref2=site2.row['ref'],
                            alt2=site2.row['alt'],
                            allow_unaffected=args.allow_unaffected,
                            fast_mode=True,
                            pattern_only=args.pattern_only)

                        if ch.get('impossible') == 'site1':
                            impossible_sites[strsite1] = True
                            break
                        if ch.get('impossible') == 'site2':
                            impossible_sites[strsite2] = True

                        if not ch['candidate']: continue

                        samples_w_hetpair[(site1, site2)].append(ch)
            yield grp, self.filter_candidates(samples_w_hetpair)
示例#22
0
from __future__ import print_function
import sys
from inheritance import Sample, Family, EvalFamily

mom = Sample('mom', affected=False)
dad = Sample('dad', affected=False)
kid = Sample('kid', affected=True)

kid.mom, kid.dad = mom, dad

fam = Family([mom, dad, kid], 'a')


def make_fam1():
    # only 1 affected kid.
    fam = Family.from_ped("""\
#family_id  sample_id   paternal_id maternal_id sex phenotype
1   dad   0   0   1  1
1   mom   grandpa   grandma   2  1
1   kid   dad   mom   1  2
1   kid2   dad   mom   1  1
1   grandma 0   0     2  1
1   grandpa 0   0     1  1""")
    return fam


def make_fam2():
    # 1 affected kid, parent, grandparent
    fam = Family.from_ped("""\
#family_id  sample_id   paternal_id maternal_id sex phenotype
1   dad   0   0   1  1