Пример #1
0
    def toJSON(self):
        d = {}
        for key in [
            'variant_types',
            'so_annotations',
            'ref_freqs',
            'ref_acs',
            'ref_hom_hemi',
            'annotations',
            'genes',
            'exclude_genes',
        ]:
            if getattr(self, key):
                d[key] = getattr(self, key)

        if getattr(self, 'locations'):
            locations = []
            for location1, location2 in self.locations:
                chrom1, pos1 = genomeloc.get_chr_pos(location1)
                chrom2, pos2 = genomeloc.get_chr_pos(location2)
                if chrom1 != chrom2:
                    raise ValueError("locations have different chromosomes: %s:%s %s:%s" % (chrom1, pos1, chrom2, pos2))

                locations.append("%s:%s-%s" % (chrom1, pos1, pos2))
            d['locations'] = locations

        return d
Пример #2
0
    def toJSON(self):
        d = {}
        for key in [
                'variant_types',
                'so_annotations',
                'ref_freqs',
                'annotations',
                'genes',
                'exclude_genes',
        ]:
            if getattr(self, key):
                d[key] = getattr(self, key)

        if getattr(self, 'locations'):
            locations = []
            for location1, location2 in self.locations:
                chrom1, pos1 = genomeloc.get_chr_pos(location1)
                chrom2, pos2 = genomeloc.get_chr_pos(location2)
                if chrom1 != chrom2:
                    raise ValueError(
                        "locations have different chromosomes: %s:%s %s:%s" %
                        (chrom1, pos1, chrom2, pos2))

                locations.append("%s:%s-%s" % (chrom1, pos1, pos2))
            d['locations'] = locations

        return d
Пример #3
0
def family_coverage_gene(request, family, gene_id):

    project_id = family.project.project_id
    gene = get_reference().get_gene(gene_id)
    gene_structure = get_reference().get_gene_structure(gene_id)
    individuals = family.get_individuals()
    indiv_ids = [i.indiv_id for i in individuals]
    num_individuals = len(indiv_ids)

    coding_regions = []
    for c in get_coding_regions_from_gene_structure(gene_id, gene_structure):
        coding_region = {}
        coding_region['start'] = genomeloc.get_chr_pos(c.xstart)[1]
        coding_region['stop'] = genomeloc.get_chr_pos(c.xstop)[1]
        coding_region['gene_id'] = c.gene_id
        coding_region['size'] = c.xstop - c.xstart + 1
        coding_regions.append(coding_region)

    coverages = {}
    for individual in individuals:
        coverages[
            individual.indiv_id] = get_coverage_store().get_coverage_for_gene(
                str(individual.pk), gene['gene_id'])

    whole_gene = Counter({'callable': 0, 'low_coverage': 0, 'poor_mapping': 0})
    for coverage_spec in coverages.values():
        whole_gene['callable'] += coverage_spec['gene_totals']['callable']
        whole_gene['low_coverage'] += coverage_spec['gene_totals'][
            'low_coverage']
        whole_gene['poor_mapping'] += coverage_spec['gene_totals'][
            'poor_mapping']
    gene_coding_size = 0
    for c in coding_regions:
        gene_coding_size += c['stop'] - c['start'] + 1
    totalsize = gene_coding_size * num_individuals
    whole_gene['ratio_callable'] = whole_gene['callable'] / float(totalsize)
    whole_gene['ratio_low_coverage'] = whole_gene['low_coverage'] / float(
        totalsize)
    whole_gene['ratio_poor_mapping'] = whole_gene['poor_mapping'] / float(
        totalsize)
    whole_gene['gene_coding_size'] = gene_coding_size

    return render(
        request, 'coverage/family_coverage_gene.html', {
            'project': family.project,
            'family': family,
            'gene': gene,
            'coverages_json': json.dumps(coverages),
            'whole_gene_json': json.dumps(whole_gene),
            'coding_regions_json': json.dumps(coding_regions),
            'indiv_ids_json': json.dumps(indiv_ids),
            'individuals': individuals,
            'whole_gene': whole_gene,
        })
Пример #4
0
def family_coverage_gene(request, family, gene_id):

    project_id = family.project.project_id
    gene = get_reference().get_gene(gene_id)
    gene_structure = get_reference().get_gene_structure(gene_id)
    individuals = family.get_individuals()
    indiv_ids = [i.indiv_id for i in individuals]
    num_individuals = len(indiv_ids)

    coding_regions = []
    for c in get_coding_regions_from_gene_structure(gene_id, gene_structure):
        coding_region = {}
        coding_region['start'] = genomeloc.get_chr_pos(c.xstart)[1]
        coding_region['stop'] = genomeloc.get_chr_pos(c.xstop)[1]
        coding_region['gene_id'] = c.gene_id
        coding_region['size'] = c.xstop-c.xstart+1
        coding_regions.append(coding_region)

    coverages = {}
    for individual in individuals:
        coverages[individual.indiv_id] = get_coverage_store().get_coverage_for_gene(
            str(individual.pk),
            gene['gene_id']
        )

    whole_gene = Counter({'callable': 0, 'low_coverage': 0, 'poor_mapping': 0})
    for coverage_spec in coverages.values():
        whole_gene['callable'] += coverage_spec['gene_totals']['callable']
        whole_gene['low_coverage'] += coverage_spec['gene_totals']['low_coverage']
        whole_gene['poor_mapping'] += coverage_spec['gene_totals']['poor_mapping']
    gene_coding_size = 0
    for c in coding_regions:
        gene_coding_size += c['stop']-c['start']+1
    totalsize = gene_coding_size*num_individuals
    whole_gene['ratio_callable'] = whole_gene['callable'] / float(totalsize)
    whole_gene['ratio_low_coverage'] = whole_gene['low_coverage'] / float(totalsize)
    whole_gene['ratio_poor_mapping'] = whole_gene['poor_mapping'] / float(totalsize)
    whole_gene['gene_coding_size'] = gene_coding_size

    return render(request, 'coverage/family_coverage_gene.html', {
        'project': family.project,
        'family': family,
        'gene': gene,
        'coverages_json': json.dumps(coverages),
        'whole_gene_json': json.dumps(whole_gene),
        'coding_regions_json': json.dumps(coding_regions),
        'indiv_ids_json': json.dumps(indiv_ids),
        'individuals': individuals,
        'whole_gene': whole_gene,
    })
Пример #5
0
    def get_multiple_variants(self,
                              project_id,
                              family_id,
                              xpos_ref_alt_tuples,
                              user=None):
        """
        Get one or more specific variants in a family
        Variant should be identifiable by xpos, ref, and alt
        Note that ref and alt are just strings from the VCF (for now)
        """
        variant_ids = []
        for xpos, ref, alt in xpos_ref_alt_tuples:
            chrom, pos = get_chr_pos(xpos)
            variant_ids.append("%s-%s-%s-%s" % (chrom, pos, ref, alt))

        results = self.get_elasticsearch_variants(
            project_id,
            family_id=family_id,
            variant_id_filter=variant_ids,
            user=user)
        # make sure all variants in xpos_ref_alt_tuples were retrieved and are in the same order.
        # Return None for tuples that weren't found in ES.
        results_by_xpos_ref_alt = {}
        for r in results:
            results_by_xpos_ref_alt[(r.xpos, r.ref, r.alt)] = r

        # create a list that's the same length as the input list of xpos_ref_alt_tuples, putting None for
        # xpos-ref-alt's that weren't found in the elasticsearch index
        results = [results_by_xpos_ref_alt.get(t) for t in xpos_ref_alt_tuples]

        return results
Пример #6
0
    def get_single_variant(self,
                           project_id,
                           family_id,
                           xpos,
                           ref,
                           alt,
                           user=None):
        chrom, pos = get_chr_pos(xpos)

        variant_id = "%s-%s-%s-%s" % (chrom, pos, ref, alt)
        results = list(
            self.get_elasticsearch_variants(project_id,
                                            family_id=family_id,
                                            variant_id_filter=[variant_id],
                                            user=user,
                                            include_all_consequences=True))

        if not results:
            return None

        if len(results) > 1:
            raise ValueError(
                "Multiple variant records found for project: %s family: %s  %s-%s-%s-%s: \n %s"
                % (project_id, family_id, chrom, pos, ref, alt, "\n".join(
                    [pformat(v.toJSON()) for v in results])))

        variant = results[0]

        return variant
Пример #7
0
    def get_multiple_variants(self, project_id, family_id, xpos_ref_alt_tuples, user=None):
        """
        Get one or more specific variants in a family
        Variant should be identifiable by xpos, ref, and alt
        Note that ref and alt are just strings from the VCF (for now)
        """
        variant_ids = []
        for xpos, ref, alt in  xpos_ref_alt_tuples:
            chrom, pos = get_chr_pos(xpos)
            if chrom == 'M':
                chrom = 'MT'
            variant_ids.append("%s-%s-%s-%s" % (chrom, pos, ref, alt))


        results = self.get_elasticsearch_variants(project_id, family_id=family_id, variant_id_filter=variant_ids, user=user)
        # make sure all variants in xpos_ref_alt_tuples were retrieved and are in the same order.
        # Return None for tuples that weren't found in ES.
        results_by_xpos_ref_alt = {}
        for r in results:
            results_by_xpos_ref_alt[(r.xpos, r.ref, r.alt)] = r

        # create a list that's the same length as the input list of xpos_ref_alt_tuples, putting None for
        # xpos-ref-alt's that weren't found in the elasticsearch index
        results = [results_by_xpos_ref_alt.get(t) for t in xpos_ref_alt_tuples]

        return results
Пример #8
0
    def __init__(self, xpos, ref, alt):
        self.xpos = xpos
        self.ref = ref
        self.alt = alt

        # TODO: should be implemented in genomeloc.py
        self.xposx = xpos
        if len(ref) == 1 and len(alt) > 1:  # insertion
            self.xposx += len(alt) - 1
        elif len(ref) > 1 and len(alt) == 1:  # deletion
            self.xposx -= 1
        elif len(ref) > 1 and len(alt) > 1:  # multi base sub
            self.xposx += len(alt) - 1
        chrom, pos = genomeloc.get_chr_pos(self.xpos)
        self.chr = chrom
        self.pos = pos
        self.pos_end = self.xposx % 1e9

        # TODO: feels like this should be an ordered dict
        self.genotypes = {}
        self.extras = {}
        self.annotation = None
        self.gene_ids = None
        self.coding_gene_ids = None

        self.vcf_id = None
        self.vartype = 'snp' if len(ref) == 1 and len(alt) == 1 else 'indel'
Пример #9
0
 def toJSON(self):
     d = {}
     for key in [
             'variant_types', 'so_annotations', 'ref_freqs', 'annotations',
             'genes', "exclude_genes"
     ]:
         if getattr(self, key):
             d[key] = getattr(self, key)
     if getattr(self, 'locations'):
         d['locations'] = [
             "%s:%s-%s" % (genomeloc.get_chr_pos(locA)[0],
                           genomeloc.get_chr_pos(locA)[1],
                           genomeloc.get_chr_pos(locB)[1])
             for locA, locB in self.locations
         ]
     return d
Пример #10
0
    def get_multiple_variants(self, project_id, family_id,
                              xpos_ref_alt_tuples):
        """
        Get one or more specific variants in a family
        Variant should be identifiable by xpos, ref, and alt
        Note that ref and alt are just strings from the VCF (for now)
        """
        variant_ids = []
        for xpos, ref, alt in xpos_ref_alt_tuples:
            chrom, pos = get_chr_pos(xpos)
            variant_ids.append("%s-%s-%s-%s" % (chrom, pos, ref, alt))

        cache_key = (project_id, family_id, tuple(xpos_ref_alt_tuples))
        if cache_key in self._results_cache:
            results = self._results_cache[cache_key]
        else:
            results = list(
                self.get_elasticsearch_variants(project_id,
                                                family_id=family_id,
                                                variant_id_filter=variant_ids))
            # make sure all variants in xpos_ref_alt_tuples were retrieved and are in the same order.
            # Return None for tuples that weren't found in ES.
            results_by_xpos_ref_alt = {}
            for r in results:
                results_by_xpos_ref_alt[(r.xpos, r.ref, r.alt)] = r
            results = [
                results_by_xpos_ref_alt.get(t) for t in xpos_ref_alt_tuples
            ]

            self._results_cache[cache_key] = results

        return results
Пример #11
0
    def get_single_variant(self, project_id, family_id, xpos, ref, alt):
        chrom, pos = get_chr_pos(xpos)

        variant_id = "%s-%s-%s-%s" % (chrom, pos, ref, alt)

        cache_key = (project_id, family_id, xpos, ref, alt)
        if cache_key in self._results_cache:
            results = self._results_cache[cache_key]
        else:
            results = list(
                self.get_elasticsearch_variants(project_id,
                                                family_id=family_id,
                                                variant_id_filter=[variant_id
                                                                   ]))
            self._results_cache[cache_key] = results

        if not results:
            return None

        if len(results) > 1:
            raise ValueError(
                "Multiple variant records found for project: %s family: %s  %s-%s-%s-%s: \n %s"
                % (project_id, family_id, chrom, pos, ref, alt, "\n".join(
                    [pformat(v.toJSON()) for v in results])))

        variant = results[0]

        return variant
Пример #12
0
    def toDict(self):
        genes = [{ 'gene' : bg.gene_symbol, 'cds_dist': bg.cds_dist } for bg in self.breakpointgene_set.all()]

        chr,pos = genomeloc.get_chr_pos(self.xpos)
        return {
            'xpos' : self.xpos,
            'chr' : chr,
            'pos' : pos,
            'obs' : self.obs,
            'sample_count' : self.sample_count,
            'consensus' : self.consensus,
            'indiv_id' : self.individual.indiv_id,
            'genes' : genes,
        }
Пример #13
0
    def toList(self):
        genes = [{ 'gene' : bg.gene_symbol, 'cds_dist': bg.cds_dist } for bg in self.breakpointgene_set.all()]

        chr,pos = genomeloc.get_chr_pos(self.xpos)
        return [
            self.xpos,
            chr,
            pos,
            self.obs,
            self.sample_count,
            self.consensus,
            self.partner,
            self.individual.indiv_id,
            genes,
        ]
Пример #14
0
    def toDict(self):
        genes = [{
            'gene': bg.gene_symbol,
            'cds_dist': bg.cds_dist
        } for bg in self.breakpointgene_set.all()]

        chr, pos = genomeloc.get_chr_pos(self.xpos)
        return {
            'xpos': self.xpos,
            'chr': chr,
            'pos': pos,
            'obs': self.obs,
            'sample_count': self.sample_count,
            'consensus': self.consensus,
            'indiv_id': self.individual.indiv_id,
            'genes': genes,
        }
Пример #15
0
    def get_single_variant(self, project_id, family_id, xpos, ref, alt, user=None):
        chrom, pos = get_chr_pos(xpos)
        if chrom == 'M':
            chrom = 'MT'

        variant_id = "%s-%s-%s-%s" % (chrom, pos, ref, alt)
        results = list(self.get_elasticsearch_variants(project_id, family_id=family_id, variant_id_filter=[variant_id], user=user, include_all_consequences=True))

        if not results:
            return None

        if len(results) > 1:
            raise ValueError("Multiple variant records found for project: %s family: %s  %s-%s-%s-%s: \n %s" % (
                project_id, family_id, chrom, pos, ref, alt, "\n".join([pformat(v.toJSON()) for v in results])))

        variant = results[0]

        return variant
Пример #16
0
    def toList(self):
        genes = [{
            'gene': bg.gene_symbol,
            'cds_dist': bg.cds_dist
        } for bg in self.breakpointgene_set.all()]

        chr, pos = genomeloc.get_chr_pos(self.xpos)
        return [
            self.xpos,
            chr,
            pos,
            self.obs,
            self.sample_count,
            self.consensus,
            self.partner,
            self.individual.indiv_id,
            genes,
        ]