示例#1
0
    def set_family_info(self):
        """
        Extract the relevant genotype filters, as well all labels
        for each family in the database.
        """
        self.families = families = Family.from_cursor(self.gq.c).values()
        args = self.args

        self.family_ids = []
        self.family_masks = []
        kwargs = {'only_affected': not getattr(self.args, "allow_unaffected", False),
                  'min_gq': args.min_gq}
        if self.model == "mendel_violations":
            kwargs = {'only_affected': self.args.only_affected}
        if self.model != "comp_het" and self.model != "mendel_violations":
            kwargs['strict'] = not self.args.lenient
        elif self.model == "comp_het":
            kwargs['pattern_only'] = self.args.pattern_only

        requested_fams = None if not args.families else set(args.families.split(","))

        for family in families:
            if requested_fams is None or family.family_id in requested_fams:
                # e.g. family.auto_rec(gt_ll, min_depth)
                family_filter = getattr(family, self.model)(gt_ll=self.args.gt_phred_ll,
                                    min_depth=self.args.min_sample_depth,
                                    **kwargs)
            else:
                family_filter = 'False'

            self.family_masks.append(family_filter)
            self.family_ids.append(family.family_id)
示例#2
0
文件: gim.py 项目: mmoisse/gemini
    def set_family_info(self):
        """
        Extract the relevant genotype filters, as well all labels
        for each family in the database.
        """
        self.families = families = Family.from_cursor(self.gq.c).values()
        args = self.args

        self.family_ids = []
        self.family_masks = []
        kwargs = {
            'only_affected': not getattr(self.args, "allow_unaffected", False)
        }
        if self.model == "mendel_violations":
            kwargs = {'only_affected': self.args.only_affected}
        if self.model != "comp_het" and self.model != "mendel_violations":
            kwargs['strict'] = not self.args.lenient
        elif self.model == "comp_het":
            kwargs['pattern_only'] = self.args.pattern_only

        requested_fams = None if not args.families else set(
            args.families.split(","))

        for family in families:
            if requested_fams is None or family.family_id in requested_fams:
                # e.g. family.auto_rec(gt_ll, min_depth)
                family_filter = getattr(family, self.model)(
                    gt_ll=self.args.gt_phred_ll,
                    min_depth=self.args.min_sample_depth,
                    **kwargs)
            else:
                family_filter = 'False'

            self.family_masks.append(family_filter)
            self.family_ids.append(family.family_id)
示例#3
0
def get_families(db, selected_families=None):
    """
    Query the samples table to return a list of Family
    objects that each contain all of the Subjects in a Family.
    """
    conn = sqlite3.connect(db)
    conn.isolation_level = None
    conn.row_factory = sqlite3.Row
    c = conn.cursor()

    families_dict = Family.from_cursor(c)

    # if the user has specified a set of selected families
    # to which the analysis should be restricted, then
    # first sanity check that the family ids they specified are valid.
    if selected_families is not None:
        for family in selected_families.split(','):
            if family not in families_dict:
                sys.exit("ERROR: family \"%s\" is not a valid family_id\n" % family)

    families = []
    for fam in families_dict:
        if selected_families is None or fam in selected_families:
            families.append(families_dict[fam])
    return families
示例#4
0
def get_families(db, selected_families=None):
    """
    Query the samples table to return a list of Family
    objects that each contain all of the Subjects in a Family.
    """
    conn = sqlite3.connect(db)
    conn.isolation_level = None
    conn.row_factory = sqlite3.Row
    c = conn.cursor()

    families_dict = Family.from_cursor(c)

    # if the user has specified a set of selected families
    # to which the analysis should be restricted, then
    # first sanity check that the family ids they specified are valid.
    if selected_families is not None:
        for family in selected_families.split(','):
            if family not in families_dict:
                sys.exit("ERROR: family \"%s\" is not a valid family_id\n" %
                         family)

    families = []
    for fam in families_dict:
        if selected_families is None or fam in selected_families:
            families.append(families_dict[fam])
    return families
示例#5
0
文件: gim.py 项目: arq5x/gemini
    def candidates(self):
        args = self.args

        self.gq._connect_to_database()
        fams = self.fams = Family.from_cursor(self.gq.conn)

        if args.families:
            fams = {f: fam for f, fam in fams.items() if f in set(args.families.split(","))}

        for grp, li in self.gen_candidates('gene'):
            samples_w_hetpair = defaultdict(list)
            sites, strs = [], []
            for row in li:

                gt_types, gt_bases, gt_phases = row['gt_types'], row['gts'], row['gt_phases']
                site = Site(row)
                site.gt_phases, site.gt_bases, site.gt_types = gt_phases, gt_bases, gt_types
                sites.append((str(site), site))



            for family_id, fam in fams.items():
                # if a site has been deemed "impossible", we store and then
                # skip it to avoid compuational overhead on it multiple times.
                impossible_sites = {}
                for i, (strsite1, site1) in enumerate(sites[:-1], start=1):
                    if strsite1 in impossible_sites:
                        continue

                    for (strsite2, site2) in sites[i:]:
                        if strsite2 in impossible_sites:
                            continue

                        ch = fam.comp_het_pair(site1.gt_types, site1.gt_bases,
                                               site2.gt_types, site2.gt_bases,
                                               site1.gt_phases, site2.gt_phases,
                                               ref1=site1.row['ref'],
                                               alt1=site1.row['alt'],
                                               ref2=site2.row['ref'],
                                               alt2=site2.row['alt'],
                                               allow_unaffected=args.allow_unaffected,
                                               fast_mode=True,
                                               pattern_only=args.pattern_only)

                        if ch.get('impossible') == 'site1':
                            impossible_sites[strsite1] = True
                            break
                        if ch.get('impossible') == 'site2':
                            impossible_sites[strsite2] = True

                        if not ch['candidate']: continue

                        samples_w_hetpair[(site1, site2)].append(ch)
            yield grp, self.filter_candidates(samples_w_hetpair)
示例#6
0
    def candidates(self):
        args = self.args

        self.gq._connect_to_database()
        fams = self.fams = Family.from_cursor(self.gq.c)

        if args.families:
            fams = {
                f: fam
                for f, fam in fams.items()
                if f in set(args.families.split(","))
            }

        for grp, li in self.gen_candidates('gene'):
            samples_w_hetpair = defaultdict(list)
            sites = []
            for row in li:

                gt_types, gt_bases, gt_phases = row['gt_types'], row[
                    'gts'], row['gt_phases']
                site = Site(row)
                site.gt_phases, site.gt_bases, site.gt_types = gt_phases, gt_bases, gt_types
                sites.append(site)

            for i, site1 in enumerate(sites[:-1], start=1):
                for site2 in sites[i:]:

                    for family_id, fam in fams.items():

                        ch = fam.comp_het_pair(
                            site1.gt_types,
                            site1.gt_bases,
                            site2.gt_types,
                            site2.gt_bases,
                            site1.gt_phases,
                            site2.gt_phases,
                            ref1=site1.row['ref'],
                            alt1=site1.row['alt'],
                            ref2=site2.row['ref'],
                            alt2=site2.row['alt'],
                            allow_unaffected=args.allow_unaffected,
                            fast_mode=True,
                            pattern_only=args.pattern_only)

                        if not ch['candidate']: continue

                        samples_w_hetpair[(site1, site2)].append(ch)
            yield grp, self.filter_candidates(samples_w_hetpair)
示例#7
0
文件: gim.py 项目: mmoisse/gemini
    def candidates(self):
        args = self.args

        self.gq._connect_to_database()
        fams = self.fams = Family.from_cursor(self.gq.c)

        if args.families:
            fams = {
                f: fam
                for f, fam in fams.items()
                if f in set(args.families.split(","))
            }

        for grp, li in self.gen_candidates('gene'):
            samples_w_hetpair = defaultdict(list)
            sites = []
            for row in li:

                gt_types, gt_bases, gt_phases = row['gt_types'], row[
                    'gts'], row['gt_phases']
                site = Site(row)
                site.gt_phases, site.gt_bases, site.gt_types = gt_phases, gt_bases, gt_types
                sites.append(site)

            for i, site1 in enumerate(sites[:-1], start=1):
                for site2 in sites[i:]:

                    for family_id, fam in fams.items():

                        ch = fam.comp_het_pair(
                            site1.gt_types,
                            site1.gt_bases,
                            site2.gt_types,
                            site2.gt_bases,
                            site1.gt_phases,
                            site2.gt_phases,
                            ref1=site1.row['ref'],
                            alt1=site1.row['alt'],
                            ref2=site2.row['ref'],
                            alt2=site2.row['alt'],
                            allow_unaffected=args.allow_unaffected,
                            fast_mode=True,
                            pattern_only=args.pattern_only)

                        if not ch['candidate']: continue

                        samples_w_hetpair[(site1, site2)].append(ch)
            yield grp, self.filter_candidates(samples_w_hetpair)
示例#8
0
def get_families(db, selected_families=None):
    """
    Query the samples table to return a list of Family
    objects that each contain all of the Subjects in a Family.
    """
    conn, metadata = database.get_session_metadata(db)

    families_dict = Family.from_cursor(conn)

    # if the user has specified a set of selected families
    # to which the analysis should be restricted, then
    # first sanity check that the family ids they specified are valid.
    if selected_families is not None:
        for family in selected_families.split(','):
            if family not in families_dict:
                raise ValueError("Family \"%s\" is not a valid family_id\n" % family)

    families = []
    for fam in families_dict:
        if selected_families is None or fam in selected_families:
            families.append(families_dict[fam])
    return families
示例#9
0
def get_families(db, selected_families=None):
    """
    Query the samples table to return a list of Family
    objects that each contain all of the Subjects in a Family.
    """
    conn, metadata = database.get_session_metadata(db)

    families_dict = Family.from_cursor(conn)

    # if the user has specified a set of selected families
    # to which the analysis should be restricted, then
    # first sanity check that the family ids they specified are valid.
    if selected_families is not None:
        for family in selected_families.split(','):
            if family not in families_dict:
                raise ValueError("Family \"%s\" is not a valid family_id\n" % family)

    families = []
    for fam in families_dict:
        if selected_families is None or fam in selected_families:
            families.append(families_dict[fam])
    return families
示例#10
0
文件: gim.py 项目: tyl868/gemini
    def candidates(self):
        args = self.args

        self.gq._connect_to_database()
        fams = self.fams = Family.from_cursor(self.gq.conn)

        if args.families:
            fams = {
                f: fam
                for f, fam in fams.items()
                if f in set(args.families.split(","))
            }

        for grp, li in self.gen_candidates('gene'):
            samples_w_hetpair = defaultdict(list)
            sites, strs = [], []
            for row in li:

                gt_types, gt_bases, gt_phases = row['gt_types'], row[
                    'gts'], row['gt_phases']
                site = Site(row)
                site.gt_phases, site.gt_bases, site.gt_types = gt_phases, gt_bases, gt_types
                sites.append((str(site), site))

            for family_id, fam in fams.items():
                # if a site has been deemed "impossible", we store and then
                # skip it to avoid compuational overhead on it multiple times.
                impossible_sites = {}
                for i, (strsite1, site1) in enumerate(sites[:-1], start=1):
                    if strsite1 in impossible_sites:
                        continue

                    for (strsite2, site2) in sites[i:]:
                        if strsite2 in impossible_sites:
                            continue

                        ch = fam.comp_het_pair(
                            site1.gt_types,
                            site1.gt_bases,
                            site2.gt_types,
                            site2.gt_bases,
                            site1.gt_phases,
                            site2.gt_phases,
                            ref1=site1.row['ref'],
                            alt1=site1.row['alt'],
                            ref2=site2.row['ref'],
                            alt2=site2.row['alt'],
                            allow_unaffected=args.allow_unaffected,
                            fast_mode=True,
                            pattern_only=args.pattern_only)

                        if ch.get('impossible') == 'site1':
                            impossible_sites[strsite1] = True
                            break
                        if ch.get('impossible') == 'site2':
                            impossible_sites[strsite2] = True

                        if not ch['candidate']: continue

                        samples_w_hetpair[(site1, site2)].append(ch)
            yield grp, self.filter_candidates(samples_w_hetpair)