Exemplo n.º 1
0
    def test_get_metrics(self):
        # Create a variant object
        new_variant = Variant(chr=self.chromo,
                              pos=1,
                              ref="jon",
                              alt="ba",
                              md5="kid",
                              type=self.varianttype,)
        new_variant.save()

        response = self.client.get('/api/variants/{0}/assessment-metrics/'.
                                   format(new_variant.id),
                                   HTTP_ACCEPT='application/json')
        # Should be ok
        self.assertEqual(response.status_code, codes.ok)
Exemplo n.º 2
0
    def test_get_all(self):
        count_before = Variant.objects.count()
        new_variant = Variant(chr=self.chromo,
                              pos=1,
                              ref="jon",
                              alt="ba",
                              md5="kid",
                              type=self.varianttype,)
        new_variant.save()
        count_after = Variant.objects.count()
        # Now try making a get request with the objects id
        response = self.client.get('/api/variants/{0}/'.format(new_variant.id),
                                   HTTP_ACCEPT='application/json')

        # Request should be good and the count should have changed
        self.assertEqual(response.status_code, codes.ok)
        self.assertEqual(count_after, count_before+1)
Exemplo n.º 3
0
    def get_variant(self, record):
        "Get or create a variant."
        chrom, pos, ref, alt = record.CHROM, record.POS, record.REF, '/'.join([str(x) for x in record.ALT])

        # Calculate MD5 and attempt to fetch the primary key from
        # the local cache, otherwise use it when inserting.
        md5 = calculate_md5(chrom, pos, ref, alt)

        # Ensure the cache is valid for the chromosome
        self.check_cache(chrom)

        variant_id = self.variant_cache.get(md5, None)

        # Just make a faux instance
        if variant_id:
            variant = Variant(pk=variant_id)
        # Create if it does not exist
        else:
            variant = Variant(pos=pos, ref=ref, alt=alt, md5=md5)

            # Update foreign key references
            variant.chr = self.get_chromosome(chrom)
            variant.type = self.get_variant_type(record.var_type.upper())

            # Periods are useless..
            variant.rsid = record.ID == '.' and None or record.ID
            variant.save()
            self.file_variants += 1

            # Update cache
            self.variant_cache[md5] = variant.pk

            # Process SNPEff data if this is the first time this variant
            # has been seen.
            if 'EFF' in record.INFO:
                effs = record.INFO['EFF'].split(',')
                self.load_effects(effs, variant)
        return variant
Exemplo n.º 4
0
    def get_variant(self, record):
        "Get or create a variant."
        chrom, pos, ref, alt = record.CHROM, record.POS, record.REF, '/'.join(
            [str(x) for x in record.ALT])

        # Calculate MD5 and attempt to fetch the primary key from
        # the local cache, otherwise use it when inserting.
        md5 = calculate_md5(chrom, pos, ref, alt)

        # Ensure the cache is valid for the chromosome
        self.check_cache(chrom)

        variant_id = self.variant_cache.get(md5, None)

        # Just make a faux instance
        if variant_id:
            variant = Variant(pk=variant_id)
        # Create if it does not exist
        else:
            variant = Variant(pos=pos, ref=ref, alt=alt, md5=md5)

            # Update foreign key references
            variant.chr = self.get_chromosome(chrom)
            variant.type = self.get_variant_type(record.var_type.upper())

            # Periods are useless..
            variant.rsid = record.ID == '.' and None or record.ID
            variant.save()
            self.file_variants += 1

            # Update cache
            self.variant_cache[md5] = variant.pk

            # Process SNPEff data if this is the first time this variant
            # has been seen.
            if 'EFF' in record.INFO:
                effs = record.INFO['EFF'].split(',')
                self.load_effects(effs, variant)
        return variant
Exemplo n.º 5
0
    def load_hgmd_snp(self, cursor, using=None):
        cursor.execute('''
            select distinct
                cl.acc_num as hgmd_id,
                c.value as chr,
                c.id as chr_id,
                v.id as variant_id,
                trim(both from cl.disease) as phenotype,
                ph.id as phenotype_id,
                cl.gene as gene,
                g.id as gene_id,
                cl.pmid as pubmed,
                pm.pmid as pubmed_id
            from (
                select m.acc_num, m.disease, m.gene, m.pmid, c.chromosome, c."coordSTART" as pos
                    from raw.hgmd_mutation m inner join raw.hgmd_hg19_coords c on (m.acc_num = c.acc_num)
            ) cl
                left outer join chromosome c on (cl.chromosome = c.value)
                left outer join variant v on (c.id = v.chr_id and cl.pos = v.pos)
                left outer join variant_type vt on (v.type_id = vt.id)
                left outer join phenotype ph on (lower(trim(both from regexp_replace(cl.disease, '\s*\?$', ''))) = lower(ph.term))
                left outer join pubmed pm on (cl.pmid::varchar = pm.pmid::varchar)
                left outer join gene g on (cl.gene::text = g.symbol::text)
                left outer join variant_phenotype vp on (vp.variant_id = v.id)
            where vt.value = 'SNP'
                and cl.disease not like '%%?'
                and v.id is not null and g.id is not null
            order by c.id
        ''')

        keys = ['hgmd_id', 'chr', 'chr_id', 'variant_id',
            'phenotype', 'phenotype_id', 'gene', 'gene_id',
            'pubmed', 'pubmed_id']

        count = 0
        new_pubmed_map = {}
        new_phenotype_map = {}

        chrs = dict(Chromosome.objects.values_list('value', 'id'))

        while True:
            rows = cursor.fetchmany(100)
            if not rows:
                break

            for row in rows:
                record = dict(zip(keys, row))

                # Get or create a pubmed record
                if record['pubmed_id']:
                    pubmed = PubMed(pmid=record['pubmed_id'])
                    pubmed._state.db = using
                # Some records have a bogus PMID. Only proces the valid ones.
                elif type(record['pubmed']) is int or record['pubmed'].isdigit():
                    pmid = int(record['pubmed'])
                    if pmid in new_pubmed_map:
                        pubmed = new_pubmed_map[pmid]
                    else:
                        pubmed = PubMed(pmid=pmid)
                        pubmed.save()
                        new_pubmed_map[pmid] = pubmed
                else:
                    pubmed = None

                # Get or create a the phenotype, associate the HGMD id with
                if record['phenotype_id']:
                    phenotype = Phenotype(pk=record['phenotype_id'])
                    phenotype._state.db = using
                else:
                    term = record['phenotype']
                    # Check newly added objects
                    if term in new_phenotype_map:
                        phenotype = new_phenotype_map[term]
                    else:
                        phenotype = Phenotype(term=record['phenotype'])
                        phenotype.save()
                        new_phenotype_map[term] = phenotype

                _chr = Chromosome(pk=chrs[record['chr']])
                _chr._state.db = using

                if record['gene_id']:
                    gene = Gene(pk=record['gene_id'])
                    gene._state.db = using

                    try:
                        gp = GenePhenotype.objects.get(gene=gene, phenotype=phenotype)
                    except GenePhenotype.DoesNotExist:
                        gp = GenePhenotype(gene=gene, phenotype=phenotype)
                    gp.hgmd_id = record['hgmd_id']
                    gp.save()
                else:
                    gene = None

                if record['variant_id']:
                    variant = Variant(pk=record['variant_id'])
                    variant._state.db = using

                    try:
                        vp = VariantPhenotype.objects.get(variant=variant, phenotype=phenotype)
                    except VariantPhenotype.DoesNotExist:
                        vp = VariantPhenotype(variant=variant, phenotype=phenotype)
                    vp.hgmd_id = record['hgmd_id']
                    vp.save()
                else:
                    variant = None

                if pubmed:
                    phenotype.articles.add(pubmed)
                    if variant:
                        variant.articles.add(pubmed)
                    if gene:
                        gene.articles.add(pubmed)

                count += 1

            sys.stdout.write('{0}\r'.format(count))
            sys.stdout.flush()

        return count