def _get_or_create_article(self, pmid): try: return PubMed.objects.get(pmid=pmid), False except PubMed.DoesNotExist: article = PubMed(pmid=pmid) article.save() return article, True
def load_hgmd_snp(self, cursor, using=None): cursor.execute(''' select distinct cl.acc_num as hgmd_id, c.value as chr, c.id as chr_id, v.id as variant_id, trim(both from cl.disease) as phenotype, ph.id as phenotype_id, cl.gene as gene, g.id as gene_id, cl.pmid as pubmed, pm.pmid as pubmed_id from ( select m.acc_num, m.disease, m.gene, m.pmid, c.chromosome, c."coordSTART" as pos from raw.hgmd_mutation m inner join raw.hgmd_hg19_coords c on (m.acc_num = c.acc_num) ) cl left outer join chromosome c on (cl.chromosome = c.value) left outer join variant v on (c.id = v.chr_id and cl.pos = v.pos) left outer join variant_type vt on (v.type_id = vt.id) left outer join phenotype ph on (lower(trim(both from regexp_replace(cl.disease, '\s*\?$', ''))) = lower(ph.term)) left outer join pubmed pm on (cl.pmid::varchar = pm.pmid::varchar) left outer join gene g on (cl.gene::text = g.symbol::text) left outer join variant_phenotype vp on (vp.variant_id = v.id) where vt.value = 'SNP' and cl.disease not like '%%?' and v.id is not null and g.id is not null order by c.id ''') keys = ['hgmd_id', 'chr', 'chr_id', 'variant_id', 'phenotype', 'phenotype_id', 'gene', 'gene_id', 'pubmed', 'pubmed_id'] count = 0 new_pubmed_map = {} new_phenotype_map = {} chrs = dict(Chromosome.objects.values_list('value', 'id')) while True: rows = cursor.fetchmany(100) if not rows: break for row in rows: record = dict(zip(keys, row)) # Get or create a pubmed record if record['pubmed_id']: pubmed = PubMed(pmid=record['pubmed_id']) pubmed._state.db = using # Some records have a bogus PMID. Only proces the valid ones. elif type(record['pubmed']) is int or record['pubmed'].isdigit(): pmid = int(record['pubmed']) if pmid in new_pubmed_map: pubmed = new_pubmed_map[pmid] else: pubmed = PubMed(pmid=pmid) pubmed.save() new_pubmed_map[pmid] = pubmed else: pubmed = None # Get or create a the phenotype, associate the HGMD id with if record['phenotype_id']: phenotype = Phenotype(pk=record['phenotype_id']) phenotype._state.db = using else: term = record['phenotype'] # Check newly added objects if term in new_phenotype_map: phenotype = new_phenotype_map[term] else: phenotype = Phenotype(term=record['phenotype']) phenotype.save() new_phenotype_map[term] = phenotype _chr = Chromosome(pk=chrs[record['chr']]) _chr._state.db = using if record['gene_id']: gene = Gene(pk=record['gene_id']) gene._state.db = using try: gp = GenePhenotype.objects.get(gene=gene, phenotype=phenotype) except GenePhenotype.DoesNotExist: gp = GenePhenotype(gene=gene, phenotype=phenotype) gp.hgmd_id = record['hgmd_id'] gp.save() else: gene = None if record['variant_id']: variant = Variant(pk=record['variant_id']) variant._state.db = using try: vp = VariantPhenotype.objects.get(variant=variant, phenotype=phenotype) except VariantPhenotype.DoesNotExist: vp = VariantPhenotype(variant=variant, phenotype=phenotype) vp.hgmd_id = record['hgmd_id'] vp.save() else: variant = None if pubmed: phenotype.articles.add(pubmed) if variant: variant.articles.add(pubmed) if gene: gene.articles.add(pubmed) count += 1 sys.stdout.write('{0}\r'.format(count)) sys.stdout.flush() return count