Exemplo n.º 1
0
 def _get_or_create_article(self, pmid):
     try:
         return PubMed.objects.get(pmid=pmid), False
     except PubMed.DoesNotExist:
         article = PubMed(pmid=pmid)
         article.save()
         return article, True
Exemplo n.º 2
0
    def load_hgmd_snp(self, cursor, using=None):
        cursor.execute('''
            select distinct
                cl.acc_num as hgmd_id,
                c.value as chr,
                c.id as chr_id,
                v.id as variant_id,
                trim(both from cl.disease) as phenotype,
                ph.id as phenotype_id,
                cl.gene as gene,
                g.id as gene_id,
                cl.pmid as pubmed,
                pm.pmid as pubmed_id
            from (
                select m.acc_num, m.disease, m.gene, m.pmid, c.chromosome, c."coordSTART" as pos
                    from raw.hgmd_mutation m inner join raw.hgmd_hg19_coords c on (m.acc_num = c.acc_num)
            ) cl
                left outer join chromosome c on (cl.chromosome = c.value)
                left outer join variant v on (c.id = v.chr_id and cl.pos = v.pos)
                left outer join variant_type vt on (v.type_id = vt.id)
                left outer join phenotype ph on (lower(trim(both from regexp_replace(cl.disease, '\s*\?$', ''))) = lower(ph.term))
                left outer join pubmed pm on (cl.pmid::varchar = pm.pmid::varchar)
                left outer join gene g on (cl.gene::text = g.symbol::text)
                left outer join variant_phenotype vp on (vp.variant_id = v.id)
            where vt.value = 'SNP'
                and cl.disease not like '%%?'
                and v.id is not null and g.id is not null
            order by c.id
        ''')

        keys = ['hgmd_id', 'chr', 'chr_id', 'variant_id',
            'phenotype', 'phenotype_id', 'gene', 'gene_id',
            'pubmed', 'pubmed_id']

        count = 0
        new_pubmed_map = {}
        new_phenotype_map = {}

        chrs = dict(Chromosome.objects.values_list('value', 'id'))

        while True:
            rows = cursor.fetchmany(100)
            if not rows:
                break

            for row in rows:
                record = dict(zip(keys, row))

                # Get or create a pubmed record
                if record['pubmed_id']:
                    pubmed = PubMed(pmid=record['pubmed_id'])
                    pubmed._state.db = using
                # Some records have a bogus PMID. Only proces the valid ones.
                elif type(record['pubmed']) is int or record['pubmed'].isdigit():
                    pmid = int(record['pubmed'])
                    if pmid in new_pubmed_map:
                        pubmed = new_pubmed_map[pmid]
                    else:
                        pubmed = PubMed(pmid=pmid)
                        pubmed.save()
                        new_pubmed_map[pmid] = pubmed
                else:
                    pubmed = None

                # Get or create a the phenotype, associate the HGMD id with
                if record['phenotype_id']:
                    phenotype = Phenotype(pk=record['phenotype_id'])
                    phenotype._state.db = using
                else:
                    term = record['phenotype']
                    # Check newly added objects
                    if term in new_phenotype_map:
                        phenotype = new_phenotype_map[term]
                    else:
                        phenotype = Phenotype(term=record['phenotype'])
                        phenotype.save()
                        new_phenotype_map[term] = phenotype

                _chr = Chromosome(pk=chrs[record['chr']])
                _chr._state.db = using

                if record['gene_id']:
                    gene = Gene(pk=record['gene_id'])
                    gene._state.db = using

                    try:
                        gp = GenePhenotype.objects.get(gene=gene, phenotype=phenotype)
                    except GenePhenotype.DoesNotExist:
                        gp = GenePhenotype(gene=gene, phenotype=phenotype)
                    gp.hgmd_id = record['hgmd_id']
                    gp.save()
                else:
                    gene = None

                if record['variant_id']:
                    variant = Variant(pk=record['variant_id'])
                    variant._state.db = using

                    try:
                        vp = VariantPhenotype.objects.get(variant=variant, phenotype=phenotype)
                    except VariantPhenotype.DoesNotExist:
                        vp = VariantPhenotype(variant=variant, phenotype=phenotype)
                    vp.hgmd_id = record['hgmd_id']
                    vp.save()
                else:
                    variant = None

                if pubmed:
                    phenotype.articles.add(pubmed)
                    if variant:
                        variant.articles.add(pubmed)
                    if gene:
                        gene.articles.add(pubmed)

                count += 1

            sys.stdout.write('{0}\r'.format(count))
            sys.stdout.flush()

        return count