def get_gene_id(gene_name): '''Retrieve systematic yeast gene name from the common name. :param gene_name: Common name for yeast gene (e.g. ADE2). :type gene_name: str :returns: Systematic name for yeast gene (e.g. YOR128C). :rtype: str ''' from intermine.webservice import Service service = Service('http://yeastmine.yeastgenome.org/yeastmine/service') # Get a new query on the class (table) you will be querying: query = service.new_query('Gene') # The view specifies the output columns query.add_view('primaryIdentifier', 'secondaryIdentifier', 'symbol', 'name', 'sgdAlias', 'crossReferences.identifier', 'crossReferences.source.name') # Uncomment and edit the line below (the default) to select a custom sort # order: # query.add_sort_order('Gene.primaryIdentifier', 'ASC') # You can edit the constraint values below query.add_constraint('organism.shortName', '=', 'S. cerevisiae', code='B') query.add_constraint('Gene', 'LOOKUP', gene_name, code='A') # Uncomment and edit the code below to specify your own custom logic: # query.set_logic('A and B') for row in query.rows(): gid = row['secondaryIdentifier'] return gid
def get_gene_id(gene_name): """Retrieve systematic yeast gene name from the common name. :param gene_name: Common name for yeast gene (e.g. ADE2). :type gene_name: str :returns: Systematic name for yeast gene (e.g. YOR128C). :rtype: str """ service = Service("http://yeastmine.yeastgenome.org/yeastmine/service") # Get a new query on the class (table) you will be querying: query = service.new_query("Gene") # The view specifies the output columns query.add_view("primaryIdentifier", "secondaryIdentifier", "symbol", "name", "sgdAlias", "crossReferences.identifier", "crossReferences.source.name") # Uncomment and edit the line below (the default) to select a custom sort # order: # query.add_sort_order("Gene.primaryIdentifier", "ASC") # You can edit the constraint values below query.add_constraint("organism.shortName", "=", "S. cerevisiae", code="B") query.add_constraint("Gene", "LOOKUP", gene_name, code="A") # Uncomment and edit the code below to specify your own custom logic: # query.set_logic("A and B") for row in query.rows(): gid = row["secondaryIdentifier"] return gid
def search_SGD(self, gene_code=None): service = Service("http://yeastmine.yeastgenome.org/yeastmine/service") query = service.new_query("Gene") query.add_view( "chromosome.primaryIdentifier", "chromosomeLocation.end", "chromosomeLocation.start", "chromosomeLocation.strand", "secondaryIdentifier", ) query.add_constraint("symbol", "=", gene_code, code="A") for row in query.rows(): print( [ row["secondaryIdentifier"], row["chromosome.primaryIdentifier"], row["chromosomeLocation.start"], row["chromosomeLocation.end"], "+" if row["chromosomeLocation.strand"] else "-", ] ) return [ row["secondaryIdentifier"], row["chromosome.primaryIdentifier"][3:], row["chromosomeLocation.start"], row["chromosomeLocation.end"], "+" if row["chromosomeLocation.strand"] else "-", ]
def index_genes(organism, mod): backup_filename = organism + "mine_genes_" + time.strftime("%m_%d_%Y") + ".bkp" if os.path.isfile(backup_filename): print "Restoring fetched data from today from " + organism + "mine" backup = open(backup_filename, 'rb') genes = pickle.load(backup) else: print "Fetching data from " + organism + "mine" service = Service(mod["mine_service_url"]) query = service.new_query("Gene") query.add_view(mod["gene_fields"].values()) query.add_constraint("organism.name", "=", mod["mine_organism_name"], code="B") rows = query.rows() genes = {} for row in rows: id = row[mod["gene_fields"]["id"]] if id in genes: genes[id]["go_ids"].append(row[mod["gene_fields"]["go_id"]]) genes[id]["go_names"].append(row[mod["gene_fields"]["go_name"]]) else: genes[id] = { "name": row[mod["gene_fields"]["gene_name"]], "symbol": row[mod["gene_fields"]["gene_symbol"]], "synonym": row[mod["gene_fields"]["gene_synonym"]], "go_ids": [row[mod["gene_fields"]["go_id"]]], "go_names": [row[mod["gene_fields"]["go_name"]]], "href": mod["url_prefix"] + row["primaryIdentifier"] + mod["url_suffix"], "organism": organism, "category": "gene" } with open(backup_filename, 'wb') as backup: pickle.dump(genes, backup) print "Indexing " + str(len(genes)) + " " + organism + " genes" bulk_data = [] for gene in genes.keys(): bulk_data.append({ 'index': { '_index': INDEX_NAME, '_type': DOC_TYPE, '_id': organism + "_" + gene } }) bulk_data.append(genes[gene]) if len(bulk_data) % 500 == 0: es.bulk(index=INDEX_NAME, body=bulk_data, refresh=True) bulk_data = [] if len(bulk_data) > 0: es.bulk(index=INDEX_NAME, body=bulk_data, refresh=True)
def fetchGene(GeneName): service = Service("http://yeastmine.yeastgenome.org/yeastmine/service") template = service.get_template('Gene_GenomicDNA') rows = template.rows( E = {"op": "LOOKUP", "value": GeneName, "extra_value": "S. cerevisiae"} ) # this service seems to return multiple similar genes but we want the first one only, so count # and it returns information about the gene you want count=0 for row in rows: count=count+1 if count==1: descr= row["description"] GeneSeq=Seq(row["sequence.residues"]) GeneSysName=row["secondaryIdentifier"] #let's create a record for the oldGene GeneRecord = SeqRecord(GeneSeq, id=GeneSysName) #now let's add some more information to make it useful GeneRecord.name=GeneName GeneRecord.features=GeneSysName GeneRecord.description=descr return GeneRecord
def intermine_query(ids, organism, *args): service = Service(service_urls[organism]) query = service.new_query("Gene", case_sensitive=True) query.add_constraint("Gene", "LOOKUP", ids, code="A") query.add_constraint("organism.name", "=", organism, code="B") query.select(*args) return query
def wmquery(): service = Service("http://intermine.wormbase.org/tools/wormmine/service") query = service.new_query("Gene") query.add_view( "biotype", "length", "symbol", "primaryIdentifier", "downstreamIntergenicRegion.primaryIdentifier", "downstreamIntergenicRegion.organism.name", "downstreamIntergenicRegion.locations.feature.primaryIdentifier", "downstreamIntergenicRegion.locations.start", "downstreamIntergenicRegion.locations.end", "downstreamIntergenicRegion.locations.strand", "homologues.dataSets.name", "upstreamIntergenicRegion.primaryIdentifier", "upstreamIntergenicRegion.organism.name", "upstreamIntergenicRegion.locations.feature.primaryIdentifier", "upstreamIntergenicRegion.locations.start", "upstreamIntergenicRegion.locations.end", "upstreamIntergenicRegion.locations.strand", "transcripts.primaryIdentifier", "transcripts.symbol") for row in query.rows(): print (row["biotype"], row["length"], row["symbol"], row["primaryIdentifier"], \ row["downstreamIntergenicRegion.primaryIdentifier"], \ row["downstreamIntergenicRegion.organism.name"], \ row["downstreamIntergenicRegion.locations.feature.primaryIdentifier"], \ row["downstreamIntergenicRegion.locations.start"], \ row["downstreamIntergenicRegion.locations.end"], \ row["downstreamIntergenicRegion.locations.strand"], row["homologues.dataSets.name"], \ row["upstreamIntergenicRegion.primaryIdentifier"], \ row["upstreamIntergenicRegion.organism.name"], \ row["upstreamIntergenicRegion.locations.feature.primaryIdentifier"], \ row["upstreamIntergenicRegion.locations.start"], \ row["upstreamIntergenicRegion.locations.end"], \ row["upstreamIntergenicRegion.locations.strand"], row["transcripts.primaryIdentifier"], \ row["transcripts.symbol"])
def get_all_gene_annotations(): service = Service( "https://yeastmine.yeastgenome.org:443/yeastmine/service") query = service.new_query("Gene") col_names = [ "briefDescription", "description", "functionSummary", "chromosome.primaryIdentifier", "secondaryIdentifier", "symbol", "phenotypeSummary", "locations.strand", "locations.end", "locations.start" ] query.add_view(col_names) seen_orfs = set() col_dicts = {c: [] for c in col_names} for row in query.rows(): # for some reason rows are repeated in the yeastmine output, so I deduplicate them here if row['secondaryIdentifier'] not in seen_orfs: for c in col_names: col_dicts[c].append(row[c]) seen_orfs.add(row['secondaryIdentifier']) name_shortener = { 'chromosome.primaryIdentifier': 'chromosome', 'secondaryIdentifier': 'ORF', 'symbol': 'Gene', 'locations.start': 'start', 'locations.end': 'end', 'locations.strand': 'orf_strand' } td = pd.DataFrame(col_dicts).rename(columns=name_shortener) td['Gene_ORF'] = td.apply(lambda row: gene_orfer(row), axis=1) return td
def main(): """Connects to yeastmine and creates a dictionary of annotation data. Data is saved into shelve as well as returned.""" #print("annotations.SGD.yeastmine.main:") service = Service("http://yeastmine.yeastgenome.org/yeastmine") query = service.new_query() query.add_view( "SequenceFeature.primaryIdentifier", "SequenceFeature.featureType", "SequenceFeature.secondaryIdentifier", "SequenceFeature.description", "SequenceFeature.sgdAlias", "SequenceFeature.name", "SequenceFeature.symbol", "SequenceFeature.chromosome.name", "SequenceFeature.chromosome.featAttribute", "SequenceFeature.locations.start", "SequenceFeature.locations.end", "SequenceFeature.locations.strand" ) query.add_constraint("SequenceFeature.organism.name", "=", "Saccharomyces cerevisiae", "A") query.add_constraint("SequenceFeature.featureType", "=", "ORF", "B") query.set_logic("(A and B)") annotation = {} #print("settins.PROJECT_ROOT: %s" % settings.PROJECT_ROOT) #print("os.path.join: %s" % os.path.join(os.path.join(settings.PROJECT_ROOT, 'apps', 'annotations', 'SGD', 'yeastmine'))) db = shelve.open(os.path.join(settings.PROJECT_ROOT, 'apps', 'annotations', 'SGD', 'yeastmine'), 'c') for row in query.rows(): data = {} for x in xrange(0, len(row.views)): attribute = row.views[x].split('.')[-1] value = row.data[x]['value'] if attribute == 'name' and not value: continue data[attribute] = value if 'name' not in data: data['name'] = None annotation[data['secondaryIdentifier']] = data db[str(data['secondaryIdentifier'])] = data db.close() return annotation
def getInteractions(): service = Service("http://yeastmine.yeastgenome.org/yeastmine/service") # Get a new query on the class (table) you will be querying: query = service.new_query("Gene") # Type constraints should come early - before all mentions of the paths they constrain query.add_constraint("goAnnotation.ontologyTerm", "GOTerm") # The view specifies the output columns query.add_view( "symbol", "interactions.details.experimentType", "interactions.gene2.symbol", "interactions.gene2.briefDescription" ) # You can edit the constraint values below query.add_constraint("goAnnotation.qualifier", "IS NULL", code = "C") query.add_constraint("goAnnotation.qualifier", "!=", "NOT", code = "B") query.add_constraint("goAnnotation.ontologyTerm.name", "=", "cytoplasmic translation", code = "A") query.add_constraint("name", "ONE OF", ["Ribosomal Protein of the Large subunit", "Ribosomal Protein of the Small subunit"], code = "D") query.add_constraint("interactions.details.annotationType", "=", "manually curated", code = "E") # Your custom constraint logic is specified with the code below: query.set_logic("A and (B or C) and E and D") interactions = {} for row in query.rows(): if row["symbol"] not in interactions.keys(): interactions[row["symbol"]] = [{ "expt" : row["interactions.details.experimentType"], "gene2": row["interactions.gene2.symbol"],"desc":row["interactions.gene2.briefDescription"]}] else: interactions[row["symbol"]].append({ "expt": row["interactions.details.experimentType"], "gene2": row["interactions.gene2.symbol"],"desc":row["interactions.gene2.briefDescription"]}) return interactions
def plot_go_vs_p(list_name): """ A function to plot GO Term vs P-value with label of gene count on each bar ================================================ example: >>>from intermine import query_manager as qm >>>b.plot_go_vs_p("PL_obesityMonogen_ORahilly09") """ link = "http://registry.intermine.org/service/instances/" + mine r = requests.get(link) dict = json.loads(r.text) url = dict["instance"]["url"] service = Service(url) lm = service.list_manager() store = lm.get_list(name=list_name) r = store.calculate_enrichment(widget="go_enrichment_for_gene") gene_count = [] identifier = [] p_value = [] object_count = 0 for i in r: if object_count < 5: gene_count.append(i.matches) identifier.append(i.identifier) p_value.append(i.p_value) object_count = object_count + 1 else: if object_count >= 5: break y = pd.Series(p_value) x = identifier # Plot the figure. ax = y.plot(kind='bar') ax.set_title('GO Term vs p-value (Label: Gene count)') ax.set_xlabel('GO Term') ax.set_ylabel('p_value') ax.set_xticklabels(x, rotation='horizontal') rects = ax.patches def autolabel(rects, ax): i = 0 for rect in rects: x = rect.get_x() + rect.get_width()/2. y = rect.get_height() ax.annotate(gene_count[i], (x, y), xytext=(0, 5), textcoords="offset points", ha='center', va='bottom') i = i+1 autolabel(ax.patches, ax) ax.margins(y=0.1) plt.show()
def humanmine(): for iden in gnomics.objects.auxiliary_files.identifier.filter_identifiers(gene.identifiers, ["humanmine primary id", "humanmine primary identifier", "humanmine primary gene id", "humanmine primary gene identifier"]): s = Service("www.humanmine.org/humanmine") Gene = s.model.Gene q = s.query(Gene).select("*").where("Gene", "LOOKUP", iden["identifier"]) gene_object = {} for row in q.rows(): process = row.__str__() for x in re.findall(r"(\w+)=('[0-9A-Za-z:()\- \[\]<>\.,]{1,}'|None|[0-9]{1,})", process): temp_str = x[1] if temp_str[0] == "'" and temp_str[-1] == "'": temp_str = temp_str[1:-1] if x[0] == "description": gene_object["description"] = temp_str.strip() elif x[0] == "cytoLocation": gene_object["cytogenetic_location"] = temp_str.strip() elif x[0] == "id": gene_object["id"] = temp_str.strip() elif x[0] == "length": gene_object["length"] = temp_str.strip() elif x[0] == "primaryIdentifier": gene_object["primary_id"] = temp_str.strip() elif x[0] == "score": gene_object["score"] = temp_str.strip() elif x[0] == "scoreType": gene_object["score_type"] = temp_str.strip() elif x[0] == "secondaryIdentifier": gene_object["secondary_id"] = temp_str.strip() elif x[0] == "symbol": gene_object["symbol"] = temp_str.strip() return gene_object
def fetch_from_sgd() -> dict: """Query SGD's intermine service and return an up-to-date dict of S. Cerevisiae features (genes). Returned is a dictionary of "SGD_ID" -> dict of feature data. Keys in feature data are: sgd_id, feature_qualifier, feature_type, orf, name, aliases, chromosome, chromosomal_location, start_coordinate, stop_coordinate, description :rtype: dict """ re_num = re.compile(r'(\d+)') service = Service("https://yeastmine.yeastgenome.org/yeastmine/service") query = service.new_query("Gene") query.add_view("primaryIdentifier", "featureType", "qualifier", "secondaryIdentifier", "symbol", "chromosomeLocation.start", "chromosomeLocation.end", "description", "synonyms.value") query.add_constraint("organism.shortName", "=", "S. cerevisiae", code="A") query.add_constraint("featureType", "=", "ORF", code="C") genes = {} logger.debug("Executing query on yeastmine") for row in query.rows(): sgd_id = row["primaryIdentifier"] orf = row["secondaryIdentifier"] orfnum = re_num.findall(orf) if orfnum: orfnum = int(orfnum[0]) else: orfnum = 0 if orf.startswith('Q'): chrom = 0 else: chrom = ord(orf[1]) - 64 if orf[2] == 'L': orfnum = -orfnum if sgd_id not in genes: logger.debug(f"Parsing new ORF: {orf}") genes[sgd_id] = { 'sgd_id': row["primaryIdentifier"], 'feature_qualifier': row["qualifier"], 'feature_type': row['featureType'], 'orf': orf, 'name': row["symbol"], 'aliases': [], 'chromosome': chrom, 'chromosomal_location': orfnum, 'start_coordinate': str(row["chromosomeLocation.start"]), 'stop_coordinate': str(row["chromosomeLocation.end"]), 'description': row["description"], } if row["synonyms.value"] not in (orf, row["symbol"]): genes[sgd_id]['aliases'].append(row["synonyms.value"]) return genes
def query_fishmine(intermine_url: str, protein_id: str, query: str="Gene") -> IntermineResult: service = Service(intermine_url) query = service.new_query(query) query.add_view("primaryIdentifier") query.add_constraint("primaryIdentifier", "CONTAINS", "ZDB*", code="A") query.add_constraint("crossReferences.identifier", "=", "{}".format(protein_id), code="B") result_list = ["ZFIN:{}".format(val['primaryIdentifier']) for val in query.rows()] return intermine_response_factory(result_list, protein_id)
def parse(self, limit=None): count = 0 for num in range(10, 100): fuzzy_gene = "MGI:{0}*".format(num) gene = "MGI:{0}".format(num) service = Service("http://www.mousemine.org/mousemine/service") logging.getLogger('Model').setLevel(logging.CRITICAL) logging.getLogger('JSONIterator').setLevel(logging.CRITICAL) query = service.new_query("OntologyAnnotation") query.add_constraint("subject", "SequenceFeature") query.add_constraint("ontologyTerm", "MPTerm") query.add_view("subject.primaryIdentifier", "subject.symbol", "subject.sequenceOntologyTerm.name", "ontologyTerm.identifier", "ontologyTerm.name", "evidence.publications.pubMedId", "evidence.comments.type", "evidence.comments.description") query.add_sort_order("OntologyAnnotation.ontologyTerm.name", "ASC") query.add_constraint("subject.organism.taxonId", "=", "10090", code="A") query.add_constraint("subject", "LOOKUP", fuzzy_gene, code="B") query.add_constraint("subject.primaryIdentifier", "CONTAINS", gene, code="C") query.outerjoin("evidence.comments") for row in query.rows(): mgi_curie = row["subject.primaryIdentifier"] mp_curie = row["ontologyTerm.identifier"] pub_curie = "PMID:{0}".format( row["evidence.publications.pubMedId"]) assoc = G2PAssoc(self.graph, self.name, mgi_curie, mp_curie) if row["evidence.publications.pubMedId"]: reference = Reference( self.graph, pub_curie, Reference.ref_types['journal_article']) reference.addRefToGraph() assoc.add_source(pub_curie) assoc.add_evidence('ECO:0000059') assoc.add_association_to_graph() if not count % 10 and count != 0: count_from = count - 10 logger.info( "{0} processed ids from MGI:{1}* to MGI:{2}*".format( datetime.datetime.now(), count_from, count)) count += 1 if limit and count >= limit: break return
def sgd_connection(gene, p_dir, l_dir): # load gene phenotype data from SGD database service = Service( 'https://yeastmine.yeastgenome.org:443/yeastmine/service') a = service.new_query('Gene') view_list = [ 'primaryIdentifier', 'symbol', 'secondaryIdentifier', 'sgdAlias', 'qualifier', 'phenotypes.experimentType', 'phenotypes.mutantType', 'phenotypes.observable', 'phenotypes.qualifier', 'phenotypes.allele', 'phenotypes.alleleComment', 'phenotypes.strainBackground', 'phenotypes.chemical', 'phenotypes.condition', 'phenotypes.details', 'phenotypes.reporter', 'phenotypes.publications.pubMedId', 'phenotypes.publications.citation' ] for item in view_list: a.add_view(item) a.add_constraint('organism.shortName', '=', 'S. cerevisiae', code='B') a.add_constraint('Gene', 'LOOKUP', gene, code='A') phenotype_line = 'Gene Primary DBID\tGene Standard Name\tGene Systematic Name\t' \ 'Gene Sgd Alias\tGene Qualifier\tPhenotypes Experiment Type\t' \ 'Phenotypes Mutant Type\tPhenotypes Observable\tPhenotypes Qualifier\t' \ 'Phenotypes Allele\tPhenotypes Allele Comment\tPhenotypes Strain Background\t' \ 'Phenotypes Chemical\tPhenotypes Condition\tPhenotypes Details\t' \ 'Phenotypes Reporter\tPublications PubMed ID\tPublications Citation\n' p_result_file = os.path.join(p_dir, '{0}.txt'.format(gene)) with open(p_result_file, 'w', encoding='utf-8') as f1: for row in a.rows(): result_line = '' for k in view_list: result_line += '{0}\t'.format(str(row[k])) phenotype_line += result_line.strip() + '\n' f1.write(phenotype_line) # Load phenotype summary b = service.new_query('Gene') b.add_view('phenotypes.genes.phenotypeSummary') b.add_constraint('organism.shortName', '=', 'S. cerevisiae', code='B') b.add_constraint('Gene', 'LOOKUP', gene, code='A') summary = '' for row in b.rows(): p_result = row['phenotypes.genes.phenotypeSummary'] if p_result: summary += p_result result_list = [gene, summary] # Load PubMed id c = service.new_query('Gene') c.add_view('publicationAnnotations.publication.pubMedId') c.add_constraint('organism.shortName', '=', 'S. cerevisiae', code='B') c.add_constraint('Gene', 'LOOKUP', gene, code='A') l_result_file = os.path.join(l_dir, '{0}.txt'.format(gene)) with open(l_result_file, 'w', encoding='utf-8') as f2: for row in c.rows(): pubmed_id = row['publicationAnnotations.publication.pubMedId'] if pubmed_id: handle = pubmed_connection(pubmed_id, gene) if handle: f2.write(handle.read()) return result_list
def get_yeast_gene_location(gene_name): '''Acquire the location of a gene from SGD http://www.yeastgenome.org :param gene_name: Name of the gene. :type gene_name: string :returns location: [int: chromosome, int:biostart, int:bioend, int:strand] :rtype location: list ''' from intermine.webservice import Service service = Service('http://yeastmine.yeastgenome.org/yeastmine/service') # Get a new query on the class (table) you will be querying: query = service.new_query('Gene') # The view specifies the output columns query.add_view('primaryIdentifier', 'secondaryIdentifier', 'symbol', 'name', 'organism.shortName', 'chromosome.primaryIdentifier', 'chromosomeLocation.start', 'chromosomeLocation.end', 'chromosomeLocation.strand') # Uncomment and edit the line below (the default) to select a custom sort # order: # query.add_sort_order('Gene.primaryIdentifier', 'ASC') # You can edit the constraint values below query.add_constraint('organism.shortName', '=', 'S. cerevisiae', code='B') query.add_constraint('Gene', 'LOOKUP', gene_name, code='A') # Uncomment and edit the code below to specify your own custom logic: # query.set_logic('A and B') chromosomes = { 'chrI': 1, 'chrII': 2, 'chrIII': 3, 'chrIV': 4, 'chrV': 5, 'chrVI': 6, 'chrVII': 7, 'chrVIII': 8, 'chrIX': 9, 'chrX': 10, 'chrXI': 11, 'chrXII': 12, 'chrXIII': 13, 'chrXIV': 14, 'chrXV': 15, 'chrXVI': 16 } first_result = query.rows().next() return [ chromosomes[first_result['chromosome.primaryIdentifier']], first_result['chromosomeLocation.start'], first_result['chromosomeLocation.end'], int(first_result['chromosomeLocation.strand']) ]
def download(self, genes, fields, scope=None, species=None): ''' Retrives the data depending on self.constraints and self.view ''' constraints = self.constraints views = self.views glist = np.array(genes) if len(glist) > 1000: a = len(glist) / 1000 segs = np.array_split(glist, a) else: segs = [glist] # store the data in here z = [] # API uses letters to distinguish between constraints alpha = list(string.ascii_uppercase) for seg in segs: # Connect to the API service = SS(self.datasource) query = service.new_query("Gene") query.add_view(",".join(views)) # Some databases require a host name if self.hostid != "": query.add_constraint("Gene", "LOOKUP", ",".join(seg), self.hostid, code="A") else: query.add_constraint("Gene", "LOOKUP", ",".join(seg), code="A") # Apply the constraints if len(constraints) != 0: i = 1 for constraint in constraints: letter = alpha[i] if len(constraint.split("=")) == 2: L = constraint.split("=") query.add_constraint(L[0], "=", L[1], code=letter) elif re.search("IS NOT NULL", constraint): p1 = constraint.replace(" IS NOT NULL", "") query.add_constraint(p1, "IS NOT NULL", code=letter) i = i + 1 # Parse the output into a list of tuples j = 0 for row in query.rows(): t = [row['symbol']] for v in views: t.append(row[v]) z.append(tuple(t)) j += 1 self.dataset = z
def find_max_data_items(new_list, intermine, intermine_url): service = Service(intermine_url + "/service") max = 0 for i in new_list: query = service.new_query(i) query.add_view(i + ".*") if (query.count() >= max): max = query.count() return max
def query(ids): service = Service("http://targetmine.nibio.go.jp/targetmine") query = service.new_query("Protein") query.add_view("primaryIdentifier", "primaryAccession", "name", "length", "compounds.compound.casRegistryNumber", "compounds.compound.name", "compounds.compound.compoundGroup.name") test_id = ids[0] query.add_constraint("Protein", "IN", ",".join(ids)) return query.rows()
def get_yeast_gene_location(gene_name): """Acquire the location of a gene from SGD http://www.yeastgenome.org :param gene_name: Name of the gene. :type gene_name: string :returns location: [int: chromosome, int:biostart, int:bioend, int:strand] :rtype location: list """ service = Service("http://yeastmine.yeastgenome.org/yeastmine/service") # Get a new query on the class (table) you will be querying: query = service.new_query("Gene") # The view specifies the output columns query.add_view("primaryIdentifier", "secondaryIdentifier", "symbol", "name", "organism.shortName", "chromosome.primaryIdentifier", "chromosomeLocation.start", "chromosomeLocation.end", "chromosomeLocation.strand") # Uncomment and edit the line below (the default) to select a custom sort # order: # query.add_sort_order("Gene.primaryIdentifier", "ASC") # You can edit the constraint values below query.add_constraint("organism.shortName", "=", "S. cerevisiae", code="B") query.add_constraint("Gene", "LOOKUP", gene_name, code="A") # Uncomment and edit the code below to specify your own custom logic: # query.set_logic("A and B") chromosomes = { "chrI": 1, "chrII": 2, "chrIII": 3, "chrIV": 4, "chrV": 5, "chrVI": 6, "chrVII": 7, "chrVIII": 8, "chrIX": 9, "chrX": 10, "chrXI": 11, "chrXII": 12, "chrXIII": 13, "chrXIV": 14, "chrXV": 15, "chrXVI": 16 } first_result = query.rows().next() return [ chromosomes[first_result["chromosome.primaryIdentifier"]], first_result["chromosomeLocation.start"], first_result["chromosomeLocation.end"], int(first_result["chromosomeLocation.strand"]) ]
def main(): if not os.path.exists("results"): os.makedirs("results") service = Service("https://apps.araport.org/thalemine/service") file = open("results/all_genes.csv", "w") list_written = [] list_genes = [] list_gene_names = [] for index, line in enumerate(open(os.getcwd() + "/" + sys.argv[1])): gene = line.strip() query = service.new_query("Gene") query.add_view("primaryIdentifier", "RNASeqExpressions.expressionLevel", "RNASeqExpressions.experiment.SRAaccession", "RNASeqExpressions.experiment.tissue", "RNASeqExpressions.unit") query.add_sort_order("Gene.RNASeqExpressions.experiment.SRAaccession", "DESC") query.add_constraint("primaryIdentifier", "=", gene, code="A") for row in query.rows(): experiment_tissue = str( row["RNASeqExpressions.experiment.SRAaccession"]) + "-" + str( row["RNASeqExpressions.experiment.tissue"]) expression_value = str(row["RNASeqExpressions.expressionLevel"]) if experiment_tissue not in list_written: list_written.append(experiment_tissue) list_genes.append((gene, experiment_tissue, expression_value)) list_gene_names.append(gene) for item in list_written: file.write("\t" + item) file.write("\n") flag = 0 for gene_name in list_gene_names: file.write(gene_name) for item in list_written: flag = 0 for gene_name_temp, exp_tissue, expression_value in list_genes: if gene_name == gene_name_temp: if item == exp_tissue: file.write("\t" + expression_value) flag = 1 break if flag == 0: file.write("\t0") file.write("\n") file.close()
def query(ids): service = Service("http://targetmine.nibio.go.jp/targetmine") query = service.new_query("Protein") query.add_view( "primaryIdentifier", "primaryAccession", "name", "length", "compounds.compound.casRegistryNumber", "compounds.compound.name", "compounds.compound.compoundGroup.name" ) test_id = ids[0] query.add_constraint("Protein", "IN", ",".join(ids)) return query.rows()
def get_yeast_gene_location(gene_name): '''Acquire the location of a gene from SGD http://www.yeastgenome.org :param gene_name: Name of the gene. :type gene_name: string :returns location: [int: chromosome, int:biostart, int:bioend, int:strand] :rtype location: list ''' from intermine.webservice import Service service = Service('http://yeastmine.yeastgenome.org/yeastmine/service') # Get a new query on the class (table) you will be querying: query = service.new_query('Gene') # The view specifies the output columns query.add_view('primaryIdentifier', 'secondaryIdentifier', 'symbol', 'name', 'organism.shortName', 'chromosome.primaryIdentifier', 'chromosomeLocation.start', 'chromosomeLocation.end', 'chromosomeLocation.strand') # Uncomment and edit the line below (the default) to select a custom sort # order: # query.add_sort_order('Gene.primaryIdentifier', 'ASC') # You can edit the constraint values below query.add_constraint('organism.shortName', '=', 'S. cerevisiae', code='B') query.add_constraint('Gene', 'LOOKUP', gene_name, code='A') # Uncomment and edit the code below to specify your own custom logic: # query.set_logic('A and B') chromosomes = {'chrI': 1, 'chrII': 2, 'chrIII': 3, 'chrIV': 4, 'chrV': 5, 'chrVI': 6, 'chrVII': 7, 'chrVIII': 8, 'chrIX': 9, 'chrX': 10, 'chrXI': 11, 'chrXII': 12, 'chrXIII': 13, 'chrXIV': 14, 'chrXV': 15, 'chrXVI': 16} first_result = query.rows().next() return [chromosomes[first_result['chromosome.primaryIdentifier']], first_result['chromosomeLocation.start'], first_result['chromosomeLocation.end'], int(first_result['chromosomeLocation.strand'])]
def get_yeast_gene_location(gene_name): """Acquire the location of a gene from SGD http://www.yeastgenome.org :param gene_name: Name of the gene. :type gene_name: string :returns location: [int: chromosome, int:biostart, int:bioend, int:strand] :rtype location: list """ service = Service("http://yeastmine.yeastgenome.org/yeastmine/service") # Get a new query on the class (table) you will be querying: query = service.new_query("Gene") # The view specifies the output columns query.add_view("primaryIdentifier", "secondaryIdentifier", "symbol", "name", "organism.shortName", "chromosome.primaryIdentifier", "chromosomeLocation.start", "chromosomeLocation.end", "chromosomeLocation.strand") # Uncomment and edit the line below (the default) to select a custom sort # order: # query.add_sort_order("Gene.primaryIdentifier", "ASC") # You can edit the constraint values below query.add_constraint("organism.shortName", "=", "S. cerevisiae", code="B") query.add_constraint("Gene", "LOOKUP", gene_name, code="A") # Uncomment and edit the code below to specify your own custom logic: # query.set_logic("A and B") chromosomes = {"chrI": 1, "chrII": 2, "chrIII": 3, "chrIV": 4, "chrV": 5, "chrVI": 6, "chrVII": 7, "chrVIII": 8, "chrIX": 9, "chrX": 10, "chrXI": 11, "chrXII": 12, "chrXIII": 13, "chrXIV": 14, "chrXV": 15, "chrXVI": 16} first_result = query.rows().next() return [chromosomes[first_result["chromosome.primaryIdentifier"]], first_result["chromosomeLocation.start"], first_result["chromosomeLocation.end"], int(first_result["chromosomeLocation.strand"])]
def ratmine(gene): for iden in gnomics.objects.auxiliary_files.identifier.filter_identifiers( gene.identifiers, [ "ratmine primary id", "ratmine primary identifier", "ratmine primary gene id", "ratmine primary gene identifier" ]): s = Service("http://ratmine.mcw.edu/ratmine") Gene = s.model.Gene q = s.query(Gene).select("*").where("Gene", "LOOKUP", iden["identifier"]) gene_object = {} for row in q.rows(): process = row.__str__() for x in re.findall( r"(\w+)=('[0-9A-Za-z:()\- \[\]<>\.,]{1,}'|None|[0-9]{1,})", process): temp_str = x[1] if temp_str[0] == "'" and temp_str[-1] == "'": temp_str = temp_str[1:-1] if x[0] == "briefDescription": if temp_str.strip() == "None": gene_object["brief_description"] = None else: gene_object["brief_description"] = temp_str.strip() elif x[0] == "description": gene_object["description"] = temp_str.strip() elif x[0] == "geneType": gene_object["gene_type"] = temp_str.strip() elif x[0] == "id": gene_object["id"] = temp_str.strip() elif x[0] == "length": gene_object["length"] = temp_str.strip() elif x[0] == "name": gene_object["name"] = temp_str.strip() elif x[0] == "ncbi_gene_number": gene_object["ncbiGeneNumber"] = temp_str.strip() elif x[0] == "pharmGKBidentifier": gene_object["pharmGKB_id"] = temp_str.strip() elif x[0] == "primaryIdentifier": gene_object["primary_id"] = temp_str.strip() elif x[0] == "score": gene_object["score"] = temp_str.strip() elif x[0] == "scoreType": gene_object["score_type"] = temp_str.strip() elif x[0] == "secondaryIdentifier": gene_object["secondary_id"] = temp_str.strip() elif x[0] == "symbol": gene_object["symbol"] = temp_str.strip() return gene_object
def query_mousemine(intermine_url: str, gene_id: str) -> IntermineResult: """ :param intermine_url: intermine server, eg http://www.mousemine.org/mousemine/service :param gene_id: gene ID, eg ENSMUSG00000063180 :return: Intermine_Result object """ service = Service(intermine_url) query = service.new_query("SequenceFeature") query.add_view("primaryIdentifier") query.add_constraint("SequenceFeature", "LOOKUP", "{}".format(gene_id), code="A") query.add_constraint("organism.shortName", "=", "M. musculus", code="B") result_list = ["{}".format(val['primaryIdentifier']) for val in query.rows()] return intermine_response_factory(result_list, gene_id)
def parse(self, limit=None): count = 0 for num in range(10, 100): fuzzy_gene = "MGI:{0}*".format(num) gene = "MGI:{0}".format(num) service = Service("http://www.mousemine.org/mousemine/service") logging.getLogger('Model').setLevel(logging.ERROR) logging.getLogger('JSONIterator').setLevel(logging.ERROR) query = service.new_query("OntologyAnnotation") query.add_constraint("subject", "SequenceFeature") query.add_constraint("ontologyTerm", "MPTerm") query.add_view( "subject.primaryIdentifier", "subject.symbol", "subject.sequenceOntologyTerm.name", "ontologyTerm.identifier", "ontologyTerm.name", "evidence.publications.pubMedId", "evidence.comments.type", "evidence.comments.description" ) query.add_sort_order("OntologyAnnotation.ontologyTerm.name", "ASC") query.add_constraint("subject.organism.taxonId", "=", self.txid, code="A") query.add_constraint("subject", "LOOKUP", fuzzy_gene, code="B") query.add_constraint( "subject.primaryIdentifier", "CONTAINS", gene, code="C") query.outerjoin("evidence.comments") for row in query.rows(): mgi_curie = row["subject.primaryIdentifier"] mp_curie = row["ontologyTerm.identifier"] pub_curie = "PMID:{0}".format(row["evidence.publications.pubMedId"]) assoc = G2PAssoc(self.graph, self.name, mgi_curie, mp_curie) if row["evidence.publications.pubMedId"]: reference = Reference( self.graph, pub_curie, self.globaltt['journal article']) reference.addRefToGraph() assoc.add_source(pub_curie) assoc.add_evidence(self.globaltt['experimental phenotypic evidence']) assoc.add_association_to_graph() if not count % 10 and count != 0: count_from = count - 10 LOG.info( "%s processed ids from MGI:%i* to MGI:%i*", datetime.datetime.now(), count_from, count) count += 1 if limit and count >= limit: break return
def intermine_query(type): from intermine.webservice import Service service = Service("http://yeastmine.yeastgenome.org/yeastmine/service") # Get a new query on the class (table) you will be querying: query = service.new_query(type) # The view specifies the output columns query.add_view("primaryIdentifier", "sequence.residues") # Uncomment and edit the line below (the default) to select a custom sort order: # query.add_sort_order("Chromosome.primaryIdentifier", "ASC") return query
def test(self): ''' Tests the HumanMine API Look up symbol for APOBEC3G, should return APOBEC3G. ''' service = SS('http://www.humanmine.org/humanmine/service') query = service.new_query("Gene") query.add_view("symbol") query.add_constraint("Gene", "LOOKUP", "APOBEC3G", code="A") for row in query.rows(): symbol = row['symbol'] if symbol == "APOBEC3G": return 1 else: return 0
def query_fishmine(intermine_url: str, protein_id: str, query: str = "Gene") -> IntermineResult: service = Service(intermine_url) query = service.new_query(query) query.add_view("primaryIdentifier") query.add_constraint("primaryIdentifier", "CONTAINS", "ZDB*", code="A") query.add_constraint("crossReferences.identifier", "=", "{}".format(protein_id), code="B") result_list = [ "ZFIN:{}".format(val['primaryIdentifier']) for val in query.rows() ] return intermine_response_factory(result_list, protein_id)
class Yeast(Genome): """Yeast genome services""" genome_name = 'Saccharomyces_cerevisiae' service_url = "https://yeastmine.yeastgenome.org:443/yeastmine/service" service = Service(service_url) default_genome_path = os.path.join(genome_dir, genome_name)
class LiveSummaryTest(unittest.TestCase): TEST_ROOT = os.getenv("TESTMODEL_URL", "http://localhost:8080/intermine-demo/service") SERVICE = Service(TEST_ROOT) QUERY = SERVICE.select("Employee.*", "department.name") def testNumericSummary(self): summary = self.QUERY.summarise("age") self.assertEqual(10, summary["min"]) self.assertEqual(74, summary["max"]) self.assertEqual(44.878787878787875, summary["average"]) self.assertEqual(12.075481627447155, summary["stdev"]) def testNonNumericSummary(self): summary = self.QUERY.summarise("fullTime") self.assertEqual(56, summary[True]) self.assertEqual(76, summary[False]) summary = self.QUERY.summarise("department.name") self.assertEqual(18, summary["Sales"]) def testSummaryAsIterator(self): path = "department.name" q = self.QUERY results = q.results(summary_path = path) top = results.next() self.assertEqual("Accounting", top["item"]) self.assertEqual(18, top["count"]) self.assertEqual(top, q.first(summary_path = path)) def testAliasing(self): q = self.QUERY self.assertEqual(q.summarise("age"), q.summarize("age"))
def attack(self): username = "******".format(self.ident) password = "******" try: s = Service(self.service.root, username, password) s.deregister(s.get_deregistration_token()) self.counter.add(3) except: pass s = self.service.register(username, password) self.LOG.debug("Registered user " + username) self.counter.add(1) c = 0 classes = s.model.classes.values() self.counter.add(1) classkeys = s._get_json('/classkeys')['classes'] self.counter.add(1) while c == 0: table = random.choice(classes) if not (table.has_id and table.name in classkeys): continue query = s.query(table.name).select(classkeys[table.name][0]) c = query.count() self.counter.add(1) n = random.randint(1, min(100, c)) members = random.sample(map(lambda r: r[0], query.rows()), n) self.counter.add(1) self.LOG.debug("Will construct list of %s with: %r", table.name, members) with s.list_manager() as lm: l = lm.create_list(members, table.name) self.LOG.debug('Created list %s, size: %d', l.name, l.size) self.counter.add(1) try: s.deregister(s.get_deregistration_token()) self.counter.add(2) except: pass
def flymine(gene): obj_array = [] for ident in gene.identifiers: if ident["identifier_type"].lower() in [ "ensembl", "ensembl id", "ensembl identifier", "ensembl gene id" ]: s = Service("www.flymine.org/query") Gene = s.model.Gene q = s.query(Gene).select("*").where("Gene", "LOOKUP", ident["identifier"]) try: for row in q.rows(): primary_identifier = row["primaryIdentifier"] brief_description = row["briefDescription"] cyto_location = row["cytoLocation"] description = row["description"] identifier = row["id"] length_of_gene = row["length"] name_of_gene = row["name"] score = row["score"] score_type = row["scoreType"] secondary_identifier = row["secondaryIdentifier"] gene_symbol = row["symbol"] gene_object = { 'id': identifier, 'primary_id': primary_identifier, 'secondary_id': secondary_identifier, 'symbol': gene_symbol, 'name': name_of_gene, 'cyto_location': cyto_location, 'brief_description': brief_description, 'description': description, 'length': length_of_gene, 'score': score, 'score_type': score_type } obj_array.append(gene_object) except intermine.errors.WebserviceError: print( "A webservice error occurred. Please contact Intermine support." ) else: print("Something else went wrong.") return obj_array
def query_intermine(genes): genes = ', '.join(genes) from intermine.webservice import Service service = Service("http://www.mousemine.org/mousemine/service") query = service.new_query("OntologyAnnotation") query.add_constraint("ontologyTerm", "MPTerm") query.add_constraint("subject", "SequenceFeature") query.add_view("subject.primaryIdentifier", "subject.symbol", "subject.sequenceOntologyTerm.name", "ontologyTerm.identifier", "ontologyTerm.name", "evidence.publications.pubMedId", "evidence.comments.type", "evidence.comments.description") query.add_sort_order("OntologyAnnotation.ontologyTerm.name", "ASC") query.add_constraint("subject.organism.taxonId", "=", "10090", code="A") query.add_constraint("subject", "LOOKUP", genes, code="B") query.outerjoin("evidence.comments") return query
def ratmine(gene): for iden in gnomics.objects.auxiliary_files.identifier.filter_identifiers(gene.identifiers, ["ratmine primary id", "ratmine primary identifier", "ratmine primary gene id", "ratmine primary gene identifier"]): s = Service("http://ratmine.mcw.edu/ratmine") Gene = s.model.Gene q = s.query(Gene).select("*").where("Gene", "LOOKUP", iden["identifier"]) gene_object = {} for row in q.rows(): process = row.__str__() for x in re.findall(r"(\w+)=('[0-9A-Za-z:()\- \[\]<>\.,]{1,}'|None|[0-9]{1,})", process): temp_str = x[1] if temp_str[0] == "'" and temp_str[-1] == "'": temp_str = temp_str[1:-1] if x[0] == "briefDescription": if temp_str.strip() == "None": gene_object["brief_description"] = None else: gene_object["brief_description"] = temp_str.strip() elif x[0] == "description": gene_object["description"] = temp_str.strip() elif x[0] == "geneType": gene_object["gene_type"] = temp_str.strip() elif x[0] == "id": gene_object["id"] = temp_str.strip() elif x[0] == "length": gene_object["length"] = temp_str.strip() elif x[0] == "name": gene_object["name"] = temp_str.strip() elif x[0] == "ncbi_gene_number": gene_object["ncbiGeneNumber"] = temp_str.strip() elif x[0] == "pharmGKBidentifier": gene_object["pharmGKB_id"] = temp_str.strip() elif x[0] == "primaryIdentifier": gene_object["primary_id"] = temp_str.strip() elif x[0] == "score": gene_object["score"] = temp_str.strip() elif x[0] == "scoreType": gene_object["score_type"] = temp_str.strip() elif x[0] == "secondaryIdentifier": gene_object["secondary_id"] = temp_str.strip() elif x[0] == "symbol": gene_object["symbol"] = temp_str.strip() return gene_object
def run_queries(): service = Service('http://intermine.wormbase.org/tools/wormmine/service') for x in dir(queries): item = getattr(queries, x) if callable(item): if not item.__name__ in ['assert_result', 'Service', 'assert_greater', 'save_txt_file']: time.sleep(1) yield x, item()
def wormmine(gene): for iden in gnomics.objects.auxiliary_files.identifier.filter_identifiers( gene.identifiers, [ "wormmine primary id", "wormmine primary identifier", "wormmine primary gene id", "wormmine primary gene identifier" ]): s = Service("http://intermine.wormbase.org/tools/wormmine") Gene = s.model.Gene q = s.query(Gene).select("*").where("Gene", "LOOKUP", iden["identifier"]) gene_object = {} for row in q.rows(): process = row.__str__() for x in re.findall( r"(\w+)=('[0-9A-Za-z:()\- \[\]<>\.,]{1,}'|None|[0-9]{1,})", process): temp_str = x[1] if temp_str[0] == "'" and temp_str[-1] == "'": temp_str = temp_str[1:-1] if x[0] == "id": gene_object["id"] = temp_str.strip() elif x[0] == "lastUpdated": gene_object["last_updated"] = temp_str.strip() elif x[0] == "length": gene_object["length"] = temp_str.strip() elif x[0] == "name": gene_object["name"] = temp_str.strip() elif x[0] == "operon": gene_object["operon"] = temp_str.strip() elif x[0] == "primary_id": gene_object["primary_id"] = temp_str.strip() elif x[0] == "score": gene_object["score"] = temp_str.strip() elif x[0] == "score_type": gene_object["score_type"] = temp_str.strip() elif x[0] == "secondary_id": gene_object["secondary_id"] = temp_str.strip() elif x[0] == "symbol": gene_object["symbol"] = temp_str.strip() return gene_object
def attack(self): service = Service(self.service.root) self.counter.add(2) lists = list(l for l in service.get_all_lists() if l.size and l.status == 'CURRENT') Lists.LOG.debug("%d lists", len(lists)) self.counter.add(1) target = random.choice(lists) classkeys = None with closing(service.opener.open(service.root + "/classkeys")) as sock: classkeys = json.loads(sock.read())['classes'] Lists.LOG.debug("Classkeys for %s are %r", target.name, classkeys[target.list_type]) self.counter.add(1) q = service.query(target.list_type).where(target.list_type, 'IN', target.name) index = random.randint(0, target.size - 1) rand_member = next(q.rows(size = 1, start = index)) self.counter.add(1) Lists.LOG.debug(rand_member) target_keys = classkeys[target.list_type] if target_keys: lu_q = q.where(target.list_type, 'LOOKUP', rand_member[target_keys[0]]) Lists.LOG.debug("lookup q: %s", lu_q) Lists.LOG.debug("%s should be one", lu_q.count()) self.counter.add(1) suitable_widgets = list(w for w in service.widgets.values() if w['widgetType'] == 'enrichment' and target.list_type in w['targets']) self.counter.add(1) if not suitable_widgets: return widget = random.choice(suitable_widgets) Lists.LOG.debug("Calculating %s of %s", widget['name'], target.name) enriched = list(target.calculate_enrichment(widget['name'])) self.counter.add(1) if enriched: Lists.LOG.debug(enriched[0])
def query_mousemine(intermine_url: str, gene_id: str) -> IntermineResult: """ :param intermine_url: intermine server, eg http://www.mousemine.org/mousemine/service :param gene_id: gene ID, eg ENSMUSG00000063180 :return: Intermine_Result object """ service = Service(intermine_url) query = service.new_query("SequenceFeature") query.add_view("primaryIdentifier") query.add_constraint("SequenceFeature", "LOOKUP", "{}".format(gene_id), code="A") query.add_constraint("organism.shortName", "=", "M. musculus", code="B") result_list = [ "{}".format(val['primaryIdentifier']) for val in query.rows() ] return intermine_response_factory(result_list, gene_id)
def templates(request): # Determine available InterMines and associated templates selected_mines = request.GET.get('mines') if selected_mines is not None: selected_mines = selected_mines.split('+') existing_mines = [] existing_templates = {} intermines = InterMine.objects.all() for im in intermines: existing_mines.append(im.name) if not (selected_mines is None or im.name in selected_mines): continue base_url = im.url.rstrip('/') try: service = Service(base_url) except: # service is inaccessible, or some other error continue for t_name in service.templates: t = service.get_template(t_name) if t_name in existing_templates: existing_templates[t_name]['mines'].append(im.name) else: existing_templates[t_name] = { 'name': t.name, 'title': t.title, 'description': t.description, 'mines': [im.name] } # Sort existing_templates properly, and convert it to a list for t_name in existing_templates: existing_templates[t_name]['mines'] = sorted( existing_templates[t_name]['mines'], key=lambda m: m.lower()) existing_templates = list(existing_templates.values()) existing_templates = sorted(existing_templates, key=lambda t: t['title'].lower()) context = { 'existing_mines': existing_mines, 'existing_templates': existing_templates, 'user_mines': selected_mines, } return render(request, 'intermine_mgr/templates.html', context)
def humanmine(): for iden in gnomics.objects.auxiliary_files.identifier.filter_identifiers( gene.identifiers, [ "humanmine primary id", "humanmine primary identifier", "humanmine primary gene id", "humanmine primary gene identifier" ]): s = Service("www.humanmine.org/humanmine") Gene = s.model.Gene q = s.query(Gene).select("*").where("Gene", "LOOKUP", iden["identifier"]) gene_object = {} for row in q.rows(): process = row.__str__() for x in re.findall( r"(\w+)=('[0-9A-Za-z:()\- \[\]<>\.,]{1,}'|None|[0-9]{1,})", process): temp_str = x[1] if temp_str[0] == "'" and temp_str[-1] == "'": temp_str = temp_str[1:-1] if x[0] == "description": gene_object["description"] = temp_str.strip() elif x[0] == "cytoLocation": gene_object["cytogenetic_location"] = temp_str.strip() elif x[0] == "id": gene_object["id"] = temp_str.strip() elif x[0] == "length": gene_object["length"] = temp_str.strip() elif x[0] == "primaryIdentifier": gene_object["primary_id"] = temp_str.strip() elif x[0] == "score": gene_object["score"] = temp_str.strip() elif x[0] == "scoreType": gene_object["score_type"] = temp_str.strip() elif x[0] == "secondaryIdentifier": gene_object["secondary_id"] = temp_str.strip() elif x[0] == "symbol": gene_object["symbol"] = temp_str.strip() return gene_object
def flymine(gene): obj_array = [] for ident in gene.identifiers: if ident["identifier_type"].lower() in ["ensembl", "ensembl id", "ensembl identifier", "ensembl gene id"]: s = Service("www.flymine.org/query") Gene = s.model.Gene q = s.query(Gene).select("*").where("Gene", "LOOKUP", ident["identifier"]) try: for row in q.rows(): primary_identifier = row["primaryIdentifier"] brief_description = row["briefDescription"] cyto_location = row["cytoLocation"] description = row["description"] identifier = row["id"] length_of_gene = row["length"] name_of_gene = row["name"] score = row["score"] score_type = row["scoreType"] secondary_identifier = row["secondaryIdentifier"] gene_symbol = row["symbol"] gene_object = { 'id': identifier, 'primary_id': primary_identifier, 'secondary_id': secondary_identifier, 'symbol': gene_symbol, 'name': name_of_gene, 'cyto_location': cyto_location, 'brief_description': brief_description, 'description': description, 'length': length_of_gene, 'score': score, 'score_type': score_type } obj_array.append(gene_object) except intermine.errors.WebserviceError: print("A webservice error occurred. Please contact Intermine support.") else: print("Something else went wrong.") return obj_array
def fetchGene(GeneName): service = Service("http://yeastmine.yeastgenome.org/yeastmine/service") template = service.get_template('Gene_GenomicDNA') rows = template.rows( E = {"op": "LOOKUP", "value": GeneName, "extra_value": "S. cerevisiae"} ) # this service seems to return multiple similar genes but we want the first one only, so count # and it returns information about the gene you want count=0 for row in rows: count=count+1 if count==1: descr= row["description"] GeneSeq=Seq(row["sequence.residues"]) GeneSysName=row["secondaryIdentifier"] print(" ") print("I think you want...... "+row["secondaryIdentifier"]) print(row["description"]) print(" ") print(row["sequence.residues"]) print(" ") print("Good choice! I have a feeling you're going to get lucky with this one.") print(" ") print("Give me a second to put some of my ducks in a circle...") #let's create a record for the oldGene GeneRecord = SeqRecord(GeneSeq, id=GeneSysName) #now let's add some more information to make it useful GeneRecord.name=GeneName GeneRecord.features=GeneSysName return GeneRecord
def getData(mine): """ A function to get datasets corresponding to a mine ================================================ example: >>> from intermine import registry >>> registry.getData('flymine') Name: Affymetrix array: Drosophila1 Name: Affymetrix array: Drosophila2 Name: Affymetrix array: GeneChip Drosophila Genome 2.0 Array Name: Affymetrix array: GeneChip Drosophila Genome Array Name: Anoph-Expr data set Name: BDGP cDNA clone data set..... """ x = "http://registry.intermine.org/service/instances/" + mine try: r = requests.get(x) dict = json.loads(r.text) link = dict["instance"]["url"] service = Service(link) query = service.new_query("DataSet") query.add_view("name", "url") list = [] for row in query.rows(): try: list.append(row["name"]) except KeyError: print("No info available") list.sort() for i in range(len(list)): print("Name: " + list[i]) return None except KeyError: return "No such mine available"
def wormmine(gene): for iden in gnomics.objects.auxiliary_files.identifier.filter_identifiers(gene.identifiers, ["wormmine primary id", "wormmine primary identifier", "wormmine primary gene id", "wormmine primary gene identifier"]): s = Service("http://intermine.wormbase.org/tools/wormmine") Gene = s.model.Gene q = s.query(Gene).select("*").where("Gene", "LOOKUP", iden["identifier"]) gene_object = {} for row in q.rows(): process = row.__str__() for x in re.findall(r"(\w+)=('[0-9A-Za-z:()\- \[\]<>\.,]{1,}'|None|[0-9]{1,})", process): temp_str = x[1] if temp_str[0] == "'" and temp_str[-1] == "'": temp_str = temp_str[1:-1] if x[0] == "id": gene_object["id"] = temp_str.strip() elif x[0] == "lastUpdated": gene_object["last_updated"] = temp_str.strip() elif x[0] == "length": gene_object["length"] = temp_str.strip() elif x[0] == "name": gene_object["name"] = temp_str.strip() elif x[0] == "operon": gene_object["operon"] = temp_str.strip() elif x[0] == "primary_id": gene_object["primary_id"] = temp_str.strip() elif x[0] == "score": gene_object["score"] = temp_str.strip() elif x[0] == "score_type": gene_object["score_type"] = temp_str.strip() elif x[0] == "secondary_id": gene_object["secondary_id"] = temp_str.strip() elif x[0] == "symbol": gene_object["symbol"] = temp_str.strip() return gene_object
def test_user_registration(self): username = '******'.format(uuid.uuid4()) password = '******' try: s = Service(self.SERVICE.root, username, password) s.deregister(s.get_deregistration_token()) except: pass s = self.SERVICE.register(username, password) self.assertEqual(s.root, self.SERVICE.root) self.assertEqual(2, len(s.get_all_lists())) drt = s.get_deregistration_token() s.deregister(drt) self.assertRaises(WebserviceError, s.get_all_lists)
from intermine.webservice import Service service = Service("http://yeastmine.yeastgenome.org/yeastmine/service", token = "YOUR-API-KEY") query = service.new_query("Gene") query.add_view( "primaryIdentifier", "secondaryIdentifier", "organism.shortName", "symbol", "name" ) query.add_constraint("Gene", "IN", "systematic gene names", code = "A") for row in query.rows(): print(row["primaryIdentifier"], row["secondaryIdentifier"], row["organism.shortName"], \ row["symbol"], row["name"])
def fetch_yeast_locus_sequence(locus_name, flanking_size=0): """Acquire a sequence from SGD http://www.yeastgenome.org. :param locus_name: Common name or systematic name for the locus (e.g. ACT1 or YFL039C). :type locus_name: str :param flanking_size: The length of flanking DNA (on each side) to return :type flanking_size: int """ service = Service("http://yeastmine.yeastgenome.org/yeastmine/service") # Get a new query on the class (table) you will be querying: query = service.new_query("Gene") if flanking_size > 0: # The view specifies the output columns # secondaryIdentifier: the systematic name (e.g. YFL039C) # symbol: short name (e.g. ACT1) # length: sequence length # flankingRegions.direction: Upstream or downstream (or both) of locus # flankingRegions.sequence.length: length of the flanking regions # flankingRegions.sequence.residues: sequence of the flanking regions query.add_view("secondaryIdentifier", "symbol", "length", "flankingRegions.direction", "flankingRegions.sequence.length", "flankingRegions.sequence.residues") # You can edit the constraint values below query.add_constraint("flankingRegions.direction", "=", "both", code="A") query.add_constraint("Gene", "LOOKUP", locus_name, "S. cerevisiae", code="B") query.add_constraint("flankingRegions.distance", "=", "{:.1f}kb".format(flanking_size / 1000.), code="C") # Uncomment and edit the code below to specify your own custom logic: query.set_logic("A and B and C") # TODO: What to do when there"s more than one result? first_result = query.rows().next() # FIXME: Use logger module instead # print first_result["secondaryIdentifier"] # print first_result["symbol"], row["length"] # print first_result["flankingRegions.direction"] # print first_result["flankingRegions.sequence.length"] # print first_result["flankingRegions.sequence.residues"] seq = coral.DNA(first_result["flankingRegions.sequence.residues"]) # TODO: add more metadata elif flanking_size == 0: # The view specifies the output columns query.add_view("primaryIdentifier", "secondaryIdentifier", "symbol", "name", "sgdAlias", "organism.shortName", "sequence.length", "sequence.residues", "description", "qualifier") query.add_constraint("status", "IS NULL", code="D") query.add_constraint("status", "=", "Active", code="C") query.add_constraint("qualifier", "IS NULL", code="B") query.add_constraint("qualifier", "!=", "Dubious", code="A") query.add_constraint("Gene", "LOOKUP", locus_name, "S. cerevisiae", code="E") # Your custom constraint logic is specified with the code below: query.set_logic("(A or B) and (C or D) and E") first_result = query.rows().next() seq = coral.DNA(first_result["sequence.residues"]) else: print "Problem with the flanking region size...." seq = coral.DNA("") return seq
sys.stderr.write(".") else: # To run your query # to use it you will require the intermine python client. # To install the client, run the following command from a terminal: # # sudo easy_install intermine # # For further documentation you can visit: # http://intermine.readthedocs.org/en/latest/web-services/ # The following two lines will be needed in every python script: from intermine.webservice import Service service = Service("http://yeastmine.yeastgenome.org/yeastmine/service") query = service.new_query("SequenceFeature") query.add_view( "primaryIdentifier", "featureType", "secondaryIdentifier", "description", "sgdAlias", "symbol" ) query.add_constraint("featureType", "=", "telomerase_RNA_gene", code = "Z") query.add_constraint("qualifier", "IS NULL", code = "W") query.add_constraint("qualifier", "!=", "Dubious", code = "V") query.add_constraint("status", "=", "Active", code = "U") query.add_constraint("featureType", "=", "transposable_element_gene", code = "S") query.add_constraint("featureType", "=", "telomeric_repeat", code = "R") query.add_constraint("featureType", "=", "telomere", code = "Q") query.add_constraint("featureType", "=", "tRNA_gene", code = "P") query.add_constraint("featureType", "=", "snoRNA_gene", code = "O") query.add_constraint("featureType", "=", "snRNA_gene", code = "N")
#!/usr/bin/python from intermine.webservice import Service service = Service('http://synbiomine.org/query/service') query = service.new_query() query.add_view('Gene.symbol', 'Gene.name') for row in query.results(): print row
"""Examples of querying yeastmine with intermine webservice""" __author__ = "Aaron Brooks" __copyright__ = "Copyright 2015, " __credits__ = ["Aaron Brooks"] __license__ = "GPL" __version__ = "0.0.1" __maintainer__ = "Aaron Brooks" __email__ = "*****@*****.**" __status__ = "Development" from intermine.webservice import Service import pandas as pd service = Service("http://yeastmine.yeastgenome.org/yeastmine/service") #-------------------------------------------------------------------# # Gene Info #-------------------------------------------------------------------# gene = service.model.Gene.where(symbol = 'HFA1').first() print gene.symbol + "\n" + gene.description print gene #-------------------------------------------------------------------# # Model templates #-------------------------------------------------------------------# template = service.get_template("Gene_Pathways") for row in template.results(A={"symbol":"HFA1"}): print row