Пример #1
0
def get_all_gene_annotations():
    service = Service(
        "https://yeastmine.yeastgenome.org:443/yeastmine/service")
    query = service.new_query("Gene")
    col_names = [
        "briefDescription", "description", "functionSummary",
        "chromosome.primaryIdentifier", "secondaryIdentifier", "symbol",
        "phenotypeSummary", "locations.strand", "locations.end",
        "locations.start"
    ]
    query.add_view(col_names)
    seen_orfs = set()
    col_dicts = {c: [] for c in col_names}
    for row in query.rows():
        # for some reason rows are repeated in the yeastmine output, so I deduplicate them here
        if row['secondaryIdentifier'] not in seen_orfs:
            for c in col_names:
                col_dicts[c].append(row[c])
            seen_orfs.add(row['secondaryIdentifier'])
    name_shortener = {
        'chromosome.primaryIdentifier': 'chromosome',
        'secondaryIdentifier': 'ORF',
        'symbol': 'Gene',
        'locations.start': 'start',
        'locations.end': 'end',
        'locations.strand': 'orf_strand'
    }
    td = pd.DataFrame(col_dicts).rename(columns=name_shortener)
    td['Gene_ORF'] = td.apply(lambda row: gene_orfer(row), axis=1)
    return td
Пример #2
0
def main():
    """Connects to yeastmine and creates a dictionary of annotation data.
    Data is saved into shelve as well as returned."""
    #print("annotations.SGD.yeastmine.main:")
    service = Service("http://yeastmine.yeastgenome.org/yeastmine")

    query = service.new_query()

    query.add_view(
        "SequenceFeature.primaryIdentifier", "SequenceFeature.featureType",
        "SequenceFeature.secondaryIdentifier", "SequenceFeature.description",
        "SequenceFeature.sgdAlias", "SequenceFeature.name", "SequenceFeature.symbol",
        "SequenceFeature.chromosome.name", "SequenceFeature.chromosome.featAttribute",
        "SequenceFeature.locations.start", "SequenceFeature.locations.end", "SequenceFeature.locations.strand"
        )
    query.add_constraint("SequenceFeature.organism.name", "=", "Saccharomyces cerevisiae", "A")
    query.add_constraint("SequenceFeature.featureType", "=", "ORF", "B")
    query.set_logic("(A and B)")

    annotation = {}
    #print("settins.PROJECT_ROOT: %s" % settings.PROJECT_ROOT)
    #print("os.path.join: %s" % os.path.join(os.path.join(settings.PROJECT_ROOT, 'apps', 'annotations', 'SGD', 'yeastmine')))
    db = shelve.open(os.path.join(settings.PROJECT_ROOT, 'apps', 'annotations', 'SGD', 'yeastmine'), 'c')
    for row in query.rows():
        data = {}
        for x in xrange(0, len(row.views)):
            attribute = row.views[x].split('.')[-1]
            value = row.data[x]['value']
            if attribute == 'name' and not value: continue
            data[attribute] = value
        if 'name' not in data: data['name'] = None
        annotation[data['secondaryIdentifier']] = data
        db[str(data['secondaryIdentifier'])] = data
    db.close()
    return annotation
Пример #3
0
def get_gene_id(gene_name):
    """Retrieve systematic yeast gene name from the common name.

    :param gene_name: Common name for yeast gene (e.g. ADE2).
    :type gene_name: str
    :returns: Systematic name for yeast gene (e.g. YOR128C).
    :rtype: str

    """
    service = Service("http://yeastmine.yeastgenome.org/yeastmine/service")

    # Get a new query on the class (table) you will be querying:
    query = service.new_query("Gene")

    # The view specifies the output columns
    query.add_view("primaryIdentifier", "secondaryIdentifier", "symbol",
                   "name", "sgdAlias", "crossReferences.identifier",
                   "crossReferences.source.name")

    # Uncomment and edit the line below (the default) to select a custom sort
    # order:
    # query.add_sort_order("Gene.primaryIdentifier", "ASC")

    # You can edit the constraint values below
    query.add_constraint("organism.shortName", "=", "S. cerevisiae", code="B")
    query.add_constraint("Gene", "LOOKUP", gene_name, code="A")

    # Uncomment and edit the code below to specify your own custom logic:
    # query.set_logic("A and B")

    for row in query.rows():
        gid = row["secondaryIdentifier"]
    return gid
class LiveSummaryTest(unittest.TestCase):

    TEST_ROOT = os.getenv("TESTMODEL_URL", "http://localhost:8080/intermine-demo/service")
    SERVICE = Service(TEST_ROOT)

    QUERY = SERVICE.select("Employee.*", "department.name")

    def testNumericSummary(self):
        summary = self.QUERY.summarise("age")
        self.assertEqual(10, summary["min"])
        self.assertEqual(74, summary["max"])
        self.assertEqual(44.878787878787875, summary["average"])
        self.assertEqual(12.075481627447155, summary["stdev"])

    def testNonNumericSummary(self):
        summary = self.QUERY.summarise("fullTime")
        self.assertEqual(56, summary[True])
        self.assertEqual(76, summary[False])

        summary = self.QUERY.summarise("department.name")
        self.assertEqual(18, summary["Sales"])

    def testSummaryAsIterator(self):
        path = "department.name"
        q = self.QUERY
        results = q.results(summary_path = path)
        top = results.next()
        self.assertEqual("Accounting", top["item"])
        self.assertEqual(18, top["count"])

        self.assertEqual(top, q.first(summary_path = path))

    def testAliasing(self):
        q = self.QUERY
        self.assertEqual(q.summarise("age"), q.summarize("age"))
Пример #5
0
def intermine_query(ids, organism, *args):
    service = Service(service_urls[organism])
    query = service.new_query("Gene", case_sensitive=True)
    query.add_constraint("Gene", "LOOKUP", ids, code="A")
    query.add_constraint("organism.name", "=", organism, code="B")
    query.select(*args)
    return query
Пример #6
0
class Yeast(Genome):
    """Yeast genome services"""

    genome_name = 'Saccharomyces_cerevisiae'
    service_url = "https://yeastmine.yeastgenome.org:443/yeastmine/service"
    service = Service(service_url)
    default_genome_path = os.path.join(genome_dir, genome_name)
Пример #7
0
def wmquery():
    service = Service("http://intermine.wormbase.org/tools/wormmine/service")
    query = service.new_query("Gene")
    query.add_view(
        "biotype", "length", "symbol", "primaryIdentifier",
        "downstreamIntergenicRegion.primaryIdentifier",
        "downstreamIntergenicRegion.organism.name",
        "downstreamIntergenicRegion.locations.feature.primaryIdentifier",
        "downstreamIntergenicRegion.locations.start",
        "downstreamIntergenicRegion.locations.end",
        "downstreamIntergenicRegion.locations.strand",
        "homologues.dataSets.name",
        "upstreamIntergenicRegion.primaryIdentifier",
        "upstreamIntergenicRegion.organism.name",
        "upstreamIntergenicRegion.locations.feature.primaryIdentifier",
        "upstreamIntergenicRegion.locations.start",
        "upstreamIntergenicRegion.locations.end",
        "upstreamIntergenicRegion.locations.strand",
        "transcripts.primaryIdentifier", "transcripts.symbol")

    for row in query.rows():
        print (row["biotype"], row["length"], row["symbol"], row["primaryIdentifier"], \
            row["downstreamIntergenicRegion.primaryIdentifier"], \
            row["downstreamIntergenicRegion.organism.name"], \
            row["downstreamIntergenicRegion.locations.feature.primaryIdentifier"], \
            row["downstreamIntergenicRegion.locations.start"], \
            row["downstreamIntergenicRegion.locations.end"], \
            row["downstreamIntergenicRegion.locations.strand"], row["homologues.dataSets.name"], \
            row["upstreamIntergenicRegion.primaryIdentifier"], \
            row["upstreamIntergenicRegion.organism.name"], \
            row["upstreamIntergenicRegion.locations.feature.primaryIdentifier"], \
            row["upstreamIntergenicRegion.locations.start"], \
            row["upstreamIntergenicRegion.locations.end"], \
            row["upstreamIntergenicRegion.locations.strand"], row["transcripts.primaryIdentifier"], \
            row["transcripts.symbol"])
Пример #8
0
def index_genes(organism, mod):
    backup_filename = organism + "mine_genes_" + time.strftime("%m_%d_%Y") + ".bkp"
    if os.path.isfile(backup_filename):
        print "Restoring fetched data from today from " + organism + "mine"

        backup = open(backup_filename, 'rb')
        genes = pickle.load(backup)
    else:
        print "Fetching data from " + organism + "mine"
        service = Service(mod["mine_service_url"])

        query = service.new_query("Gene")
        query.add_view(mod["gene_fields"].values())

        query.add_constraint("organism.name", "=", mod["mine_organism_name"], code="B")

        rows = query.rows()

        genes = {}

        for row in rows:
            id = row[mod["gene_fields"]["id"]]

            if id in genes:
                genes[id]["go_ids"].append(row[mod["gene_fields"]["go_id"]])
                genes[id]["go_names"].append(row[mod["gene_fields"]["go_name"]])
            else:
                genes[id] = {
                    "name": row[mod["gene_fields"]["gene_name"]],
                    "symbol": row[mod["gene_fields"]["gene_symbol"]],
                    "synonym": row[mod["gene_fields"]["gene_synonym"]],
                    "go_ids": [row[mod["gene_fields"]["go_id"]]],
                    "go_names": [row[mod["gene_fields"]["go_name"]]],
                    "href": mod["url_prefix"] + row["primaryIdentifier"] + mod["url_suffix"],
                    "organism": organism,
                    "category": "gene"
                }

        with open(backup_filename, 'wb') as backup:
            pickle.dump(genes, backup)

    print "Indexing " + str(len(genes)) + " " + organism + " genes"

    bulk_data = []
    for gene in genes.keys():
        bulk_data.append({
            'index': {
                '_index': INDEX_NAME,
                '_type': DOC_TYPE,
                '_id': organism + "_" + gene
            }
        })
        bulk_data.append(genes[gene])

        if len(bulk_data) % 500 == 0:
            es.bulk(index=INDEX_NAME, body=bulk_data, refresh=True)
            bulk_data = []

    if len(bulk_data) > 0:
        es.bulk(index=INDEX_NAME, body=bulk_data, refresh=True)
Пример #9
0
def getInteractions():
    service = Service("http://yeastmine.yeastgenome.org/yeastmine/service")

    # Get a new query on the class (table) you will be querying:
    query = service.new_query("Gene")

    # Type constraints should come early - before all mentions of the paths they constrain
    query.add_constraint("goAnnotation.ontologyTerm", "GOTerm")

    # The view specifies the output columns
    query.add_view(
        "symbol", "interactions.details.experimentType",
        "interactions.gene2.symbol", "interactions.gene2.briefDescription"
    )

    # You can edit the constraint values below
    query.add_constraint("goAnnotation.qualifier", "IS NULL", code = "C")
    query.add_constraint("goAnnotation.qualifier", "!=", "NOT", code = "B")
    query.add_constraint("goAnnotation.ontologyTerm.name", "=", "cytoplasmic translation", code = "A")
    query.add_constraint("name", "ONE OF", ["Ribosomal Protein of the Large subunit", "Ribosomal Protein of the Small subunit"], code = "D")
    query.add_constraint("interactions.details.annotationType", "=", "manually curated", code = "E")

    # Your custom constraint logic is specified with the code below:
    query.set_logic("A and (B or C) and E and D")

    
    interactions = {}
    
    for row in query.rows():
        if row["symbol"] not in interactions.keys():
            interactions[row["symbol"]] = [{ "expt" : row["interactions.details.experimentType"], "gene2": row["interactions.gene2.symbol"],"desc":row["interactions.gene2.briefDescription"]}]
        else:
            interactions[row["symbol"]].append({ "expt": row["interactions.details.experimentType"], "gene2": row["interactions.gene2.symbol"],"desc":row["interactions.gene2.briefDescription"]})
    return interactions
Пример #10
0
def fetch_from_sgd() -> dict:
    """Query SGD's intermine service and return an up-to-date dict of S. Cerevisiae features (genes).
    Returned is a dictionary of "SGD_ID" -> dict of feature data. Keys in feature data are:
    sgd_id, feature_qualifier, feature_type, orf, name, aliases, chromosome, chromosomal_location, start_coordinate,
    stop_coordinate, description

    :rtype: dict
    """
    re_num = re.compile(r'(\d+)')

    service = Service("https://yeastmine.yeastgenome.org/yeastmine/service")

    query = service.new_query("Gene")
    query.add_view("primaryIdentifier", "featureType", "qualifier",
                   "secondaryIdentifier", "symbol", "chromosomeLocation.start",
                   "chromosomeLocation.end", "description", "synonyms.value")

    query.add_constraint("organism.shortName", "=", "S. cerevisiae", code="A")
    query.add_constraint("featureType", "=", "ORF", code="C")

    genes = {}

    logger.debug("Executing query on yeastmine")
    for row in query.rows():
        sgd_id = row["primaryIdentifier"]
        orf = row["secondaryIdentifier"]

        orfnum = re_num.findall(orf)
        if orfnum:
            orfnum = int(orfnum[0])
        else:
            orfnum = 0

        if orf.startswith('Q'):
            chrom = 0
        else:
            chrom = ord(orf[1]) - 64
            if orf[2] == 'L':
                orfnum = -orfnum

        if sgd_id not in genes:
            logger.debug(f"Parsing new ORF: {orf}")
            genes[sgd_id] = {
                'sgd_id': row["primaryIdentifier"],
                'feature_qualifier': row["qualifier"],
                'feature_type': row['featureType'],
                'orf': orf,
                'name': row["symbol"],
                'aliases': [],
                'chromosome': chrom,
                'chromosomal_location': orfnum,
                'start_coordinate': str(row["chromosomeLocation.start"]),
                'stop_coordinate': str(row["chromosomeLocation.end"]),
                'description': row["description"],
            }

        if row["synonyms.value"] not in (orf, row["symbol"]):
            genes[sgd_id]['aliases'].append(row["synonyms.value"])

    return genes
Пример #11
0
def get_gene_id(gene_name):
    '''Retrieve systematic yeast gene name from the common name.

    :param gene_name: Common name for yeast gene (e.g. ADE2).
    :type gene_name: str
    :returns: Systematic name for yeast gene (e.g. YOR128C).
    :rtype: str

    '''
    from intermine.webservice import Service

    service = Service('http://yeastmine.yeastgenome.org/yeastmine/service')

    # Get a new query on the class (table) you will be querying:
    query = service.new_query('Gene')

    # The view specifies the output columns
    query.add_view('primaryIdentifier', 'secondaryIdentifier', 'symbol',
                   'name', 'sgdAlias', 'crossReferences.identifier',
                   'crossReferences.source.name')

    # Uncomment and edit the line below (the default) to select a custom sort
    # order:
    # query.add_sort_order('Gene.primaryIdentifier', 'ASC')

    # You can edit the constraint values below
    query.add_constraint('organism.shortName', '=', 'S. cerevisiae', code='B')
    query.add_constraint('Gene', 'LOOKUP', gene_name, code='A')

    # Uncomment and edit the code below to specify your own custom logic:
    # query.set_logic('A and B')

    for row in query.rows():
        gid = row['secondaryIdentifier']
    return gid
Пример #12
0
def sgd_connection(gene, p_dir, l_dir):
    # load gene phenotype data from SGD database
    service = Service(
        'https://yeastmine.yeastgenome.org:443/yeastmine/service')
    a = service.new_query('Gene')
    view_list = [
        'primaryIdentifier', 'symbol', 'secondaryIdentifier', 'sgdAlias',
        'qualifier', 'phenotypes.experimentType', 'phenotypes.mutantType',
        'phenotypes.observable', 'phenotypes.qualifier', 'phenotypes.allele',
        'phenotypes.alleleComment', 'phenotypes.strainBackground',
        'phenotypes.chemical', 'phenotypes.condition', 'phenotypes.details',
        'phenotypes.reporter', 'phenotypes.publications.pubMedId',
        'phenotypes.publications.citation'
    ]
    for item in view_list:
        a.add_view(item)
    a.add_constraint('organism.shortName', '=', 'S. cerevisiae', code='B')
    a.add_constraint('Gene', 'LOOKUP', gene, code='A')
    phenotype_line = 'Gene Primary DBID\tGene Standard Name\tGene Systematic Name\t' \
                     'Gene Sgd Alias\tGene Qualifier\tPhenotypes Experiment Type\t' \
                     'Phenotypes Mutant Type\tPhenotypes Observable\tPhenotypes Qualifier\t' \
                     'Phenotypes Allele\tPhenotypes Allele Comment\tPhenotypes Strain Background\t' \
                     'Phenotypes Chemical\tPhenotypes Condition\tPhenotypes Details\t' \
                     'Phenotypes Reporter\tPublications PubMed ID\tPublications Citation\n'
    p_result_file = os.path.join(p_dir, '{0}.txt'.format(gene))
    with open(p_result_file, 'w', encoding='utf-8') as f1:
        for row in a.rows():
            result_line = ''
            for k in view_list:
                result_line += '{0}\t'.format(str(row[k]))
            phenotype_line += result_line.strip() + '\n'
        f1.write(phenotype_line)
    # Load phenotype summary
    b = service.new_query('Gene')
    b.add_view('phenotypes.genes.phenotypeSummary')
    b.add_constraint('organism.shortName', '=', 'S. cerevisiae', code='B')
    b.add_constraint('Gene', 'LOOKUP', gene, code='A')
    summary = ''
    for row in b.rows():
        p_result = row['phenotypes.genes.phenotypeSummary']
        if p_result:
            summary += p_result
    result_list = [gene, summary]
    # Load PubMed id
    c = service.new_query('Gene')
    c.add_view('publicationAnnotations.publication.pubMedId')
    c.add_constraint('organism.shortName', '=', 'S. cerevisiae', code='B')
    c.add_constraint('Gene', 'LOOKUP', gene, code='A')
    l_result_file = os.path.join(l_dir, '{0}.txt'.format(gene))
    with open(l_result_file, 'w', encoding='utf-8') as f2:
        for row in c.rows():
            pubmed_id = row['publicationAnnotations.publication.pubMedId']
            if pubmed_id:
                handle = pubmed_connection(pubmed_id, gene)
                if handle:
                    f2.write(handle.read())
    return result_list
Пример #13
0
def run_queries():
    service = Service('http://intermine.wormbase.org/tools/wormmine/service')

    for x in dir(queries):
        item = getattr(queries, x)
        if callable(item):
            if not item.__name__ in ['assert_result', 'Service', 'assert_greater', 'save_txt_file']:
                time.sleep(1)
                yield x, item()
Пример #14
0
    def parse(self, limit=None):

        count = 0
        for num in range(10, 100):
            fuzzy_gene = "MGI:{0}*".format(num)
            gene = "MGI:{0}".format(num)
            service = Service("http://www.mousemine.org/mousemine/service")
            logging.getLogger('Model').setLevel(logging.CRITICAL)
            logging.getLogger('JSONIterator').setLevel(logging.CRITICAL)
            query = service.new_query("OntologyAnnotation")
            query.add_constraint("subject", "SequenceFeature")
            query.add_constraint("ontologyTerm", "MPTerm")
            query.add_view("subject.primaryIdentifier", "subject.symbol",
                           "subject.sequenceOntologyTerm.name",
                           "ontologyTerm.identifier", "ontologyTerm.name",
                           "evidence.publications.pubMedId",
                           "evidence.comments.type",
                           "evidence.comments.description")
            query.add_sort_order("OntologyAnnotation.ontologyTerm.name", "ASC")
            query.add_constraint("subject.organism.taxonId",
                                 "=",
                                 "10090",
                                 code="A")
            query.add_constraint("subject", "LOOKUP", fuzzy_gene, code="B")
            query.add_constraint("subject.primaryIdentifier",
                                 "CONTAINS",
                                 gene,
                                 code="C")
            query.outerjoin("evidence.comments")

            for row in query.rows():
                mgi_curie = row["subject.primaryIdentifier"]
                mp_curie = row["ontologyTerm.identifier"]
                pub_curie = "PMID:{0}".format(
                    row["evidence.publications.pubMedId"])
                assoc = G2PAssoc(self.graph, self.name, mgi_curie, mp_curie)
                if row["evidence.publications.pubMedId"]:
                    reference = Reference(
                        self.graph, pub_curie,
                        Reference.ref_types['journal_article'])
                    reference.addRefToGraph()
                    assoc.add_source(pub_curie)

                assoc.add_evidence('ECO:0000059')
                assoc.add_association_to_graph()

            if not count % 10 and count != 0:
                count_from = count - 10
                logger.info(
                    "{0} processed ids from MGI:{1}* to MGI:{2}*".format(
                        datetime.datetime.now(), count_from, count))

            count += 1
            if limit and count >= limit:
                break

        return
Пример #15
0
def get_yeast_gene_location(gene_name):
    '''Acquire the location of a gene from SGD http://www.yeastgenome.org
    :param gene_name: Name of the gene.
    :type gene_name: string
    :returns location: [int: chromosome, int:biostart, int:bioend, int:strand]
    :rtype location: list

    '''
    from intermine.webservice import Service
    service = Service('http://yeastmine.yeastgenome.org/yeastmine/service')

    # Get a new query on the class (table) you will be querying:
    query = service.new_query('Gene')

    # The view specifies the output columns
    query.add_view('primaryIdentifier', 'secondaryIdentifier', 'symbol',
                   'name', 'organism.shortName',
                   'chromosome.primaryIdentifier', 'chromosomeLocation.start',
                   'chromosomeLocation.end', 'chromosomeLocation.strand')

    # Uncomment and edit the line below (the default) to select a custom sort
    # order:
    # query.add_sort_order('Gene.primaryIdentifier', 'ASC')

    # You can edit the constraint values below
    query.add_constraint('organism.shortName', '=', 'S. cerevisiae', code='B')
    query.add_constraint('Gene', 'LOOKUP', gene_name, code='A')

    # Uncomment and edit the code below to specify your own custom logic:
    # query.set_logic('A and B')
    chromosomes = {
        'chrI': 1,
        'chrII': 2,
        'chrIII': 3,
        'chrIV': 4,
        'chrV': 5,
        'chrVI': 6,
        'chrVII': 7,
        'chrVIII': 8,
        'chrIX': 9,
        'chrX': 10,
        'chrXI': 11,
        'chrXII': 12,
        'chrXIII': 13,
        'chrXIV': 14,
        'chrXV': 15,
        'chrXVI': 16
    }
    first_result = query.rows().next()

    return [
        chromosomes[first_result['chromosome.primaryIdentifier']],
        first_result['chromosomeLocation.start'],
        first_result['chromosomeLocation.end'],
        int(first_result['chromosomeLocation.strand'])
    ]
Пример #16
0
def find_max_data_items(new_list, intermine, intermine_url):

    service = Service(intermine_url + "/service")
    max = 0
    for i in new_list:
        query = service.new_query(i)
        query.add_view(i + ".*")
        if (query.count() >= max):
            max = query.count()
    return max
Пример #17
0
def query(ids):
    service = Service("http://targetmine.nibio.go.jp/targetmine")
    query = service.new_query("Protein")
    query.add_view("primaryIdentifier", "primaryAccession", "name", "length",
                   "compounds.compound.casRegistryNumber",
                   "compounds.compound.name",
                   "compounds.compound.compoundGroup.name")
    test_id = ids[0]
    query.add_constraint("Protein", "IN", ",".join(ids))
    return query.rows()
Пример #18
0
def get_yeast_gene_location(gene_name):
    """Acquire the location of a gene from SGD http://www.yeastgenome.org
    :param gene_name: Name of the gene.
    :type gene_name: string
    :returns location: [int: chromosome, int:biostart, int:bioend, int:strand]
    :rtype location: list

    """
    service = Service("http://yeastmine.yeastgenome.org/yeastmine/service")

    # Get a new query on the class (table) you will be querying:
    query = service.new_query("Gene")

    # The view specifies the output columns
    query.add_view("primaryIdentifier", "secondaryIdentifier", "symbol",
                   "name", "organism.shortName",
                   "chromosome.primaryIdentifier", "chromosomeLocation.start",
                   "chromosomeLocation.end", "chromosomeLocation.strand")

    # Uncomment and edit the line below (the default) to select a custom sort
    # order:
    # query.add_sort_order("Gene.primaryIdentifier", "ASC")

    # You can edit the constraint values below
    query.add_constraint("organism.shortName", "=", "S. cerevisiae", code="B")
    query.add_constraint("Gene", "LOOKUP", gene_name, code="A")

    # Uncomment and edit the code below to specify your own custom logic:
    # query.set_logic("A and B")
    chromosomes = {
        "chrI": 1,
        "chrII": 2,
        "chrIII": 3,
        "chrIV": 4,
        "chrV": 5,
        "chrVI": 6,
        "chrVII": 7,
        "chrVIII": 8,
        "chrIX": 9,
        "chrX": 10,
        "chrXI": 11,
        "chrXII": 12,
        "chrXIII": 13,
        "chrXIV": 14,
        "chrXV": 15,
        "chrXVI": 16
    }
    first_result = query.rows().next()

    return [
        chromosomes[first_result["chromosome.primaryIdentifier"]],
        first_result["chromosomeLocation.start"],
        first_result["chromosomeLocation.end"],
        int(first_result["chromosomeLocation.strand"])
    ]
Пример #19
0
def main():

    if not os.path.exists("results"):
        os.makedirs("results")

    service = Service("https://apps.araport.org/thalemine/service")
    file = open("results/all_genes.csv", "w")
    list_written = []
    list_genes = []
    list_gene_names = []
    for index, line in enumerate(open(os.getcwd() + "/" + sys.argv[1])):
        gene = line.strip()
        query = service.new_query("Gene")
        query.add_view("primaryIdentifier",
                       "RNASeqExpressions.expressionLevel",
                       "RNASeqExpressions.experiment.SRAaccession",
                       "RNASeqExpressions.experiment.tissue",
                       "RNASeqExpressions.unit")
        query.add_sort_order("Gene.RNASeqExpressions.experiment.SRAaccession",
                             "DESC")
        query.add_constraint("primaryIdentifier", "=", gene, code="A")

        for row in query.rows():
            experiment_tissue = str(
                row["RNASeqExpressions.experiment.SRAaccession"]) + "-" + str(
                    row["RNASeqExpressions.experiment.tissue"])
            expression_value = str(row["RNASeqExpressions.expressionLevel"])
            if experiment_tissue not in list_written:
                list_written.append(experiment_tissue)
            list_genes.append((gene, experiment_tissue, expression_value))

        list_gene_names.append(gene)

    for item in list_written:
        file.write("\t" + item)

    file.write("\n")

    flag = 0
    for gene_name in list_gene_names:
        file.write(gene_name)
        for item in list_written:
            flag = 0
            for gene_name_temp, exp_tissue, expression_value in list_genes:
                if gene_name == gene_name_temp:
                    if item == exp_tissue:
                        file.write("\t" + expression_value)
                        flag = 1
                        break
            if flag == 0:
                file.write("\t0")

        file.write("\n")

    file.close()
Пример #20
0
class LiveResultsTest(unittest.TestCase):

    TEST_ROOT = os.getenv("TESTMODEL_URL", "http://localhost:8080/intermine-demo/service")

    SERVICE = Service(TEST_ROOT)

    def testGetWidgets(self):

        widgets = self.SERVICE.widgets
        self.assertTrue(len(widgets) > 0, msg = "No widgets were found")
        self.assertTrue('age_groups' in widgets, msg = "Could not find age_groups")
Пример #21
0
def ratmine(gene):
    for iden in gnomics.objects.auxiliary_files.identifier.filter_identifiers(
            gene.identifiers, [
                "ratmine primary id", "ratmine primary identifier",
                "ratmine primary gene id", "ratmine primary gene identifier"
            ]):

        s = Service("http://ratmine.mcw.edu/ratmine")
        Gene = s.model.Gene
        q = s.query(Gene).select("*").where("Gene", "LOOKUP",
                                            iden["identifier"])
        gene_object = {}
        for row in q.rows():
            process = row.__str__()
            for x in re.findall(
                    r"(\w+)=('[0-9A-Za-z:()\- \[\]<>\.,]{1,}'|None|[0-9]{1,})",
                    process):

                temp_str = x[1]
                if temp_str[0] == "'" and temp_str[-1] == "'":
                    temp_str = temp_str[1:-1]

                if x[0] == "briefDescription":
                    if temp_str.strip() == "None":
                        gene_object["brief_description"] = None
                    else:
                        gene_object["brief_description"] = temp_str.strip()
                elif x[0] == "description":
                    gene_object["description"] = temp_str.strip()
                elif x[0] == "geneType":
                    gene_object["gene_type"] = temp_str.strip()
                elif x[0] == "id":
                    gene_object["id"] = temp_str.strip()
                elif x[0] == "length":
                    gene_object["length"] = temp_str.strip()
                elif x[0] == "name":
                    gene_object["name"] = temp_str.strip()
                elif x[0] == "ncbi_gene_number":
                    gene_object["ncbiGeneNumber"] = temp_str.strip()
                elif x[0] == "pharmGKBidentifier":
                    gene_object["pharmGKB_id"] = temp_str.strip()
                elif x[0] == "primaryIdentifier":
                    gene_object["primary_id"] = temp_str.strip()
                elif x[0] == "score":
                    gene_object["score"] = temp_str.strip()
                elif x[0] == "scoreType":
                    gene_object["score_type"] = temp_str.strip()
                elif x[0] == "secondaryIdentifier":
                    gene_object["secondary_id"] = temp_str.strip()
                elif x[0] == "symbol":
                    gene_object["symbol"] = temp_str.strip()

        return gene_object
Пример #22
0
def query_fishmine(intermine_url: str,
                   protein_id: str,
                   query: str = "Gene") -> IntermineResult:
    service = Service(intermine_url)
    query = service.new_query(query)
    query.add_view("primaryIdentifier")
    query.add_constraint("primaryIdentifier", "CONTAINS", "ZDB*", code="A")
    query.add_constraint("crossReferences.identifier",
                         "=",
                         "{}".format(protein_id),
                         code="B")
    result_list = [
        "ZFIN:{}".format(val['primaryIdentifier']) for val in query.rows()
    ]
    return intermine_response_factory(result_list, protein_id)
Пример #23
0
def development(host):

    if host == 'dev':
        service = Service('http://im-dev1.wormbase.org/tools/wormmine/service')
    else:
        service = Service(
            'http://intermine.wormbase.org/tools/wormmine/service')
    logger.info('Testing ' + str(service))
    settings.init()
    for x in dir(testing_queries):
        item = getattr(testing_queries, x)
        if callable(item):
            if item.__name__ not in [
                    'assert_result', 'Service', 'assert_greater',
                    'save_txt_file'
            ]:
                item(service)

    # test = [('query_01', 'Gene'), ('query_02', 'Gene')]
    logger.info(str(len(settings.to_check)) + ' query(ies) failed')
    for query in settings.to_check:
        logger.warning(query[0] + ' ' + query[1])
        item = getattr(testing_development, query[0])
        item(service, True)
Пример #24
0
    def attack(self):
        username = "******".format(self.ident)
        password = "******"

        try:
            s = Service(self.service.root, username, password)
            s.deregister(s.get_deregistration_token())
            self.counter.add(3)
        except:
            pass

        s = self.service.register(username, password)
        self.LOG.debug("Registered user " + username)
        self.counter.add(1)

        c = 0
        classes = s.model.classes.values()
        self.counter.add(1)

        classkeys = s._get_json('/classkeys')['classes']
        self.counter.add(1)

        while c == 0:
            table = random.choice(classes)
            if not (table.has_id and table.name in classkeys):
                continue
            query = s.query(table.name).select(classkeys[table.name][0])

            c = query.count()
            self.counter.add(1)

        n = random.randint(1, min(100, c))
        members = random.sample(map(lambda r: r[0], query.rows()), n)
        self.counter.add(1)

        self.LOG.debug("Will construct list of %s with: %r", table.name, members)

        with s.list_manager() as lm:
            l = lm.create_list(members, table.name)
            self.LOG.debug('Created list %s, size: %d', l.name, l.size)
            self.counter.add(1)

        try:
            s.deregister(s.get_deregistration_token())
            self.counter.add(2)
        except:
            pass
Пример #25
0
def flymine(gene):
    obj_array = []

    for ident in gene.identifiers:
        if ident["identifier_type"].lower() in [
                "ensembl", "ensembl id", "ensembl identifier",
                "ensembl gene id"
        ]:
            s = Service("www.flymine.org/query")
            Gene = s.model.Gene
            q = s.query(Gene).select("*").where("Gene", "LOOKUP",
                                                ident["identifier"])
            try:
                for row in q.rows():
                    primary_identifier = row["primaryIdentifier"]
                    brief_description = row["briefDescription"]
                    cyto_location = row["cytoLocation"]
                    description = row["description"]
                    identifier = row["id"]
                    length_of_gene = row["length"]
                    name_of_gene = row["name"]
                    score = row["score"]
                    score_type = row["scoreType"]
                    secondary_identifier = row["secondaryIdentifier"]
                    gene_symbol = row["symbol"]

                    gene_object = {
                        'id': identifier,
                        'primary_id': primary_identifier,
                        'secondary_id': secondary_identifier,
                        'symbol': gene_symbol,
                        'name': name_of_gene,
                        'cyto_location': cyto_location,
                        'brief_description': brief_description,
                        'description': description,
                        'length': length_of_gene,
                        'score': score,
                        'score_type': score_type
                    }
                    obj_array.append(gene_object)
            except intermine.errors.WebserviceError:
                print(
                    "A webservice error occurred. Please contact Intermine support."
                )
            else:
                print("Something else went wrong.")
    return obj_array
Пример #26
0
def query_intermine(genes):
    genes = ', '.join(genes)
    from intermine.webservice import Service
    service = Service("http://www.mousemine.org/mousemine/service")
    query = service.new_query("OntologyAnnotation")
    query.add_constraint("ontologyTerm", "MPTerm")
    query.add_constraint("subject", "SequenceFeature")
    query.add_view("subject.primaryIdentifier", "subject.symbol",
                   "subject.sequenceOntologyTerm.name",
                   "ontologyTerm.identifier", "ontologyTerm.name",
                   "evidence.publications.pubMedId", "evidence.comments.type",
                   "evidence.comments.description")
    query.add_sort_order("OntologyAnnotation.ontologyTerm.name", "ASC")
    query.add_constraint("subject.organism.taxonId", "=", "10090", code="A")
    query.add_constraint("subject", "LOOKUP", genes, code="B")
    query.outerjoin("evidence.comments")
    return query
Пример #27
0
    def test_user_registration(self):
        username = '******'.format(uuid.uuid4())
        password = '******'
        try:
            s = Service(self.SERVICE.root, username, password)
            s.deregister(s.get_deregistration_token())
        except:
            pass

        s = self.SERVICE.register(username, password)

        self.assertEqual(s.root, self.SERVICE.root)
        self.assertEqual(2, len(s.get_all_lists()))

        drt = s.get_deregistration_token()
        s.deregister(drt)

        self.assertRaises(WebserviceError, s.get_all_lists)
Пример #28
0
    def attack(self):
        service = Service(self.service.root)
        self.counter.add(2)

        lists = list(l for l in service.get_all_lists() if l.size and l.status == 'CURRENT')
        Lists.LOG.debug("%d lists", len(lists))
        self.counter.add(1)

        target = random.choice(lists)

        classkeys = None
        with closing(service.opener.open(service.root + "/classkeys")) as sock:
            classkeys = json.loads(sock.read())['classes']
            Lists.LOG.debug("Classkeys for %s are %r", target.name, classkeys[target.list_type])
            self.counter.add(1)

        q = service.query(target.list_type).where(target.list_type, 'IN', target.name)

        index = random.randint(0, target.size - 1)
        rand_member = next(q.rows(size = 1, start = index))
        self.counter.add(1)
        Lists.LOG.debug(rand_member)

        target_keys = classkeys[target.list_type]
        if target_keys:
            lu_q = q.where(target.list_type, 'LOOKUP', rand_member[target_keys[0]])
            Lists.LOG.debug("lookup q: %s", lu_q)
            Lists.LOG.debug("%s should be one", lu_q.count())
            self.counter.add(1)

        suitable_widgets = list(w for w in service.widgets.values() if w['widgetType'] == 'enrichment' and target.list_type in w['targets'])
        self.counter.add(1)

        if not suitable_widgets:
            return

        widget = random.choice(suitable_widgets)

        Lists.LOG.debug("Calculating %s of %s", widget['name'], target.name)
        enriched = list(target.calculate_enrichment(widget['name']))
        self.counter.add(1)

        if enriched:
            Lists.LOG.debug(enriched[0])
Пример #29
0
def wormmine(gene):
    for iden in gnomics.objects.auxiliary_files.identifier.filter_identifiers(
            gene.identifiers, [
                "wormmine primary id", "wormmine primary identifier",
                "wormmine primary gene id", "wormmine primary gene identifier"
            ]):

        s = Service("http://intermine.wormbase.org/tools/wormmine")
        Gene = s.model.Gene
        q = s.query(Gene).select("*").where("Gene", "LOOKUP",
                                            iden["identifier"])
        gene_object = {}
        for row in q.rows():
            process = row.__str__()
            for x in re.findall(
                    r"(\w+)=('[0-9A-Za-z:()\- \[\]<>\.,]{1,}'|None|[0-9]{1,})",
                    process):

                temp_str = x[1]
                if temp_str[0] == "'" and temp_str[-1] == "'":
                    temp_str = temp_str[1:-1]

                if x[0] == "id":
                    gene_object["id"] = temp_str.strip()
                elif x[0] == "lastUpdated":
                    gene_object["last_updated"] = temp_str.strip()
                elif x[0] == "length":
                    gene_object["length"] = temp_str.strip()
                elif x[0] == "name":
                    gene_object["name"] = temp_str.strip()
                elif x[0] == "operon":
                    gene_object["operon"] = temp_str.strip()
                elif x[0] == "primary_id":
                    gene_object["primary_id"] = temp_str.strip()
                elif x[0] == "score":
                    gene_object["score"] = temp_str.strip()
                elif x[0] == "score_type":
                    gene_object["score_type"] = temp_str.strip()
                elif x[0] == "secondary_id":
                    gene_object["secondary_id"] = temp_str.strip()
                elif x[0] == "symbol":
                    gene_object["symbol"] = temp_str.strip()

        return gene_object
Пример #30
0
def query_mousemine(intermine_url: str, gene_id: str) -> IntermineResult:
    """
    :param intermine_url: intermine server, eg
                          http://www.mousemine.org/mousemine/service
    :param gene_id: gene ID, eg ENSMUSG00000063180
    :return: Intermine_Result object
    """
    service = Service(intermine_url)
    query = service.new_query("SequenceFeature")
    query.add_view("primaryIdentifier")
    query.add_constraint("SequenceFeature",
                         "LOOKUP",
                         "{}".format(gene_id),
                         code="A")
    query.add_constraint("organism.shortName", "=", "M. musculus", code="B")
    result_list = [
        "{}".format(val['primaryIdentifier']) for val in query.rows()
    ]
    return intermine_response_factory(result_list, gene_id)