def main(args): ## qmods = { 'extraConstraint': makeOneOfConstraint('Gene.primaryIdentifier', args.identifiers), } ## qs = [ ('gene', mouseGenes), ('synonyms', mouseSynonyms), ('expressed', mouseExpressedGenes), ('expressedImages', mouseExpressedGenesWithImages), ('location', mouseLocations), ('proteinIds', mouseProteinIds), ('xrefs', mouseXrefs), ('pantherId', mousePantherIds), ('myGeneLink', mouseMyGeneLinks), ] hasPheno = set() for r in doQuery(mouseHasPheno, MOUSEMINE): hasPheno.add(r['primaryIdentifier']) hasImpc = set() for r in doQuery(mouseHasImpc, MOUSEMINE): hasImpc.add(r['primaryIdentifier']) id2gene = {} for label, q in qs: if label == 'gene': for r in doQuery(q % qmods, MOUSEMINE): r['mgiid'] = r['primaryIdentifier'] id2gene[r['primaryIdentifier']] = r else: for r in doQuery(q % qmods, MOUSEMINE): obj = id2gene.get(r['primaryIdentifier'], None) if obj: obj.setdefault(label, []).append(r) print('{\n "metaData": %s,\n "data": [' % json.dumps(buildMetaObject(MOUSEMINE), indent=2)) first = True for i in id2gene: obj = id2gene[i] if not first: print(',', end='') obj["hasPheno"] = obj["primaryIdentifier"] in hasPheno obj["hasImpc"] = obj["primaryIdentifier"] in hasImpc print(json.dumps(getJsonObj(obj), indent=2)) first = False print(']\n}')
def getSamples(url): eid2samples = {} for r in doQuery(htSamples, url): rkey = (r["samples.name"], r["samples.age"], r["samples.sex"], r["samples.structure.identifier"]) eid2samples.setdefault(r['experimentId'], {})[rkey] = r return eid2samples
def main(): args = parseCmdLine() ids = args.identifiers xtra = makeOneOfConstraint('Genotype.alleles.feature.primaryIdentifier', ids) xtra2 = makeOneOfConstraint('Allele.feature.primaryIdentifier', ids) # Process Genotype-AllelePairs. Build index from genotype id to list of component (allele+state) id2components = {} toDelete = set(SKIP) for r in doQuery(q_genotypeAlleles % xtra, MOUSEMINE): gid = r["primaryIdentifier"] try: id2components.setdefault(gid, []).append(r) except: toDelete.add(gid) # Build set of MGI ids of alleles being sent to the alliance includedAlleles = set() for r in doQuery(q_alleles % xtra2, MOUSEMINE): includedAlleles.add(r['primaryIdentifier']) # Process genotypes. For each one, find / attach its components if any and output. # Screen for genotypes to be deleted. # print('{\n "metaData": %s,\n "data": [' % json.dumps(buildMetaObject(MOUSEMINE), indent=2)) first = True for g in doQuery(q_genotypes % xtra, MOUSEMINE): gid = g["primaryIdentifier"] if gid in toDelete: continue g["components"] = id2components.get(gid, []) gobj = getJsonObj(g, includedAlleles) if gobj: if not first: print(",", end=' ') print(json.dumps(gobj, indent=2)) first = False print("]}")
def loadEMAPAParents(url): log('Loading EMAPA parents...') q = '''<query name="" model="genomic" view="OntologyRelation.childTerm.identifier OntologyRelation.parentTerm.identifier" longDescription="" sortOrder="OntologyRelation.childTerm.identifier asc" > <constraint path="OntologyRelation.childTerm" type="EMAPATerm"/> <constraint path="OntologyRelation.parentTerm" type="EMAPATerm"/> <constraint path="OntologyRelation.direct" op="=" value="true"/> </query>''' id2pids = {} for i, r in enumerate(doQuery(q, url)): id2pids.setdefault(r["childTerm.identifier"], []).append(r["parentTerm.identifier"]) log('Loaded %d parent/child relations.' % i) return id2pids
def loadEMAPA(url): log('Loading EMAPA...') id2emapa = {} q = ''' <query model="genomic" view=" EMAPATerm.identifier EMAPATerm.name EMAPATerm.startsAt EMAPATerm.endsAt " > </query> ''' for t in doQuery(q, url): t["startsAt"] = int(t["startsAt"]) t["endsAt"] = int(t["endsAt"]) id2emapa[t["identifier"]] = t log('Loaded %d EMAPA terms.' % len(id2emapa)) return id2emapa
def getExpressionData(url, ids): log('Getting expression data...') q = '''<query model="genomic" view=" GXDExpression.assayId GXDExpression.assayType GXDExpression.feature.primaryIdentifier GXDExpression.stage GXDExpression.structure.identifier GXDExpression.publication.mgiId GXDExpression.publication.pubMedId" sortOrder="GXDExpression.assayId asc GXDExpression.structure.identifier asc GXDExpression.stage asc" constraintLogic="A and (B or (C and D)) and E" > <constraint path="GXDExpression.detected" code="A" op="=" value="true"/> <constraint path="GXDExpression.genotype.hasMutantAllele" code="B" op="=" value="false"/> <constraint path="GXDExpression.assayType" code="C" op="=" value="In situ reporter (knock in)"/> <constraint path="GXDExpression.genotype.zygosity" code="D" op="=" value="ht"/> %s </query> ''' % makeOneOfConstraint('GXDExpression.feature.primaryIdentifier', ids) prev = None qcount = 0 ycount = 0 for r in doQuery(q, MOUSEMINE): qcount += 1 if not prev \ or r["assayId"] != prev["assayId"] \ or r["stage"] != prev["stage"] \ or r["structure.identifier"] != prev["structure.identifier"]: ycount += 1 yield r # prev = r # log('getExpressionData: %d results => %d unique results' % (qcount, ycount))
def loadSubmittedAlleles () : aids = set() for a in doQuery(qAlleles, MOUSEMINE): aids.add(a["primaryIdentifier"]) return aids
def getExperiments(url): for r in doQuery(htExperiments, url): yield r
def getVariables(url): eid2vars = {} for r in doQuery(htVariables, url): if r["variables.name"]: eid2vars.setdefault(r['experimentId'], []).append(r) return eid2vars
def getReferences(url): eid2refs = {} for r in doQuery(htReferences, url): eid2refs.setdefault(r['experimentId'], []).append(r) return eid2refs
def annotations(url, okind, skind, ids=None): qopts = { 'alleleFeatView': "OntologyAnnotation.subject.feature.primaryIdentifier" if skind == "Allele" else "", 'xtraConstraint': makeOneOfConstraint("OntologyAnnotation.subject.primaryIdentifier", ids), 'xtraConstraint2': makeOneOfConstraint( "OntologyAnnotationEvidence.annotation.subject.primaryIdentifier", ids), 'okind': okind, 'skind': skind, } qAnnots = '''<query model="genomic" view=" OntologyAnnotation.id OntologyAnnotation.subject.primaryIdentifier OntologyAnnotation.subject.symbol OntologyAnnotation.subject.name OntologyAnnotation.ontologyTerm.identifier OntologyAnnotation.ontologyTerm.name OntologyAnnotation.qualifier %(alleleFeatView)s " sortOrder="OntologyAnnotation.id asc" > <constraint path="OntologyAnnotation.ontologyTerm" type="%(okind)s"/> <constraint path="OntologyAnnotation.subject" type="%(skind)s"/> <constraint path="OntologyAnnotation.subject.organism.taxonId" op="=" value="10090"/> %(xtraConstraint)s </query> ''' % qopts qEvidence = '''<query model="genomic" view=" OntologyAnnotation.id OntologyAnnotation.evidence.id OntologyAnnotation.evidence.annotationDate OntologyAnnotation.evidence.code.code OntologyAnnotation.evidence.publications.id OntologyAnnotation.evidence.publications.pubMedId OntologyAnnotation.evidence.publications.mgiJnum OntologyAnnotation.evidence.publications.mgiId %(alleleFeatView)s " sortOrder="OntologyAnnotation.id asc OntologyAnnotation.evidence.id asc" > <constraint path="OntologyAnnotation.ontologyTerm" type="%(okind)s"/> <constraint path="OntologyAnnotation.subject" type="%(skind)s"/> <constraint path="OntologyAnnotation.subject.organism.taxonId" op="=" value="10090"/> %(xtraConstraint)s </query> ''' % qopts qBaseAnnots = '''<query model="genomic" view=" OntologyAnnotationEvidence.id OntologyAnnotationEvidence.publications.id OntologyAnnotationEvidence.annotation.id OntologyAnnotationEvidence.baseAnnotations.subject.primaryIdentifier OntologyAnnotationEvidence.baseAnnotations.evidence.annotationDate OntologyAnnotationEvidence.baseAnnotations.evidence.publications.pubMedId " sortOrder="OntologyAnnotationEvidence.annotation.id asc OntologyAnnotationEvidence.id asc" > <constraint path="OntologyAnnotationEvidence.annotation.ontologyTerm" type="%(okind)s"/> <constraint path="OntologyAnnotationEvidence.annotation.subject" type="%(skind)s"/> <constraint path="OntologyAnnotationEvidence.baseAnnotations.evidence.publications" op="=" loopPath="OntologyAnnotationEvidence.publications"/> %(xtraConstraint2)s </query> ''' % qopts qs = [ map(lambda x: (x[0], 'annotation', list(x[1])), groupby(doQuery(qAnnots, url), lambda e: e['id'])), map(lambda x: (x[0], 'evidence', list(x[1])), groupby(doQuery(qEvidence, url), lambda e: e['id'])), map(lambda x: (x[0], 'baseAnnots', list(x[1])), groupby(doQuery(qBaseAnnots, url), lambda e: e['annotation.id'])), ] for x in groupby(heapq.merge(*qs), lambda x: x[0]): r = {} for y in x[1]: if y[1] == 'annotation': r.update(y[2][0]) elif y[1] == 'evidence': r['evidence'] = y[2] elif y[1] == 'baseAnnots': # Note that these are *all* the base annotations. Each one is associated with a # specific evidence object. Look for matching ids # the field named 'id' in the base annot record should equal the 'evidence.id' # in the evidence object. r['baseAnnots'] = y[2] rr = applyConversions(r, okind, skind) if rr: for n, e in enumerate(rr["invevidence"]): rr["agrevidence"] = e rr["agrbaseannots"] = rr["invbaseannots"][n] rr2 = formatDafJsonRecord( rr, "disease" if okind == "DOTerm" else "phenotype", skind) if rr2: yield rr2
def getAlleles(url, ids): qopts = { 'xtraConstraint': makeOneOfConstraint('Allele.feature.primaryIdentifier', ids) } # Query for alleles that have expressed component qexpressors = '''<query model="genomic" view=" MGIExpressesComponent.allele.primaryIdentifier MGIExpressesComponent.allele.symbol " ></query> ''' expressors = set() for r in doQuery(qexpressors, url): expressors.add(r['allele.primaryIdentifier']) # Query allele synonyms, build index of id -> synonyms # 2020-12-18: change constraints: # - drop ontology annotation requirement # - drop null allele type restriction # - add exclusion when germline transmission = 'cell line' qsynonyms = '''<query model="genomic" view=" Allele.primaryIdentifier Allele.synonyms.value " constraintLogic="A and B and C and D" sortOrder="Allele.primaryIdentifier asc" > <constraint code="A" path="Allele.organism.taxonId" op="=" value="10090" /> <constraint code="B" path="Allele.alleleType" op="NONE OF"> <value>QTL</value> </constraint> <constraint code="C" path="Allele.isWildType" op="=" value="false" /> <constraint code="D" path="Allele.glTransmission" op="!=" value="Cell Line"/> %(xtraConstraint)s </query> ''' % qopts aid2syns = {} for r in doQuery(qsynonyms, url): aid2syns.setdefault(r['primaryIdentifier'], set()).add(r['synonyms.value']) # Main allele query. qalleles = '''<query model="genomic" view=" Allele.primaryIdentifier Allele.symbol Allele.name Allele.alleleType Allele.molecularNote Allele.feature.primaryIdentifier Allele.feature.mgiType Allele.drivenBy " constraintLogic="A and B and C and (D or E)" sortOrder="Allele.primaryIdentifier asc" > <constraint code="A" path="Allele.organism.taxonId" op="=" value="10090" /> <constraint code="B" path="Allele.alleleType" op="NONE OF"> <value>QTL</value> </constraint> <constraint code="C" path="Allele.isWildType" op="=" value="false" /> <constraint code="D" path="Allele.glTransmission" op="!=" value="Cell Line"/> <constraint code="E" path="Allele.glTransmission" op="IS NULL" /> %(xtraConstraint)s </query> ''' % qopts for r in doQuery(qalleles, url): aid = r['primaryIdentifier'] r['synonyms'] = list(aid2syns.get(aid, [])) # If the allele has a driver or has expressed components, then the allele has a # "construct". At the Alliance, constructs must have an ID, but at MGI they don't (they're not objects). # So we create a fake ID for it. This are not displayed and are not used to create links. # Constructs are dumped separately. Here we just need the ID if r['drivenBy'] or aid in expressors: r['construct'] = aid + "_con" # yield r