def fetchGene(GeneName):
    
    service = Service("http://yeastmine.yeastgenome.org/yeastmine/service")
    template = service.get_template('Gene_GenomicDNA')

    rows = template.rows(
        E = {"op": "LOOKUP", "value": GeneName, "extra_value": "S. cerevisiae"}
    )
    
    # this service seems to return multiple similar genes but we want the first one only, so count
    # and it returns information about the gene you want
    count=0
    for row in rows:
        
        count=count+1
        if count==1:
            descr= row["description"]
            GeneSeq=Seq(row["sequence.residues"])
            GeneSysName=row["secondaryIdentifier"]
       
    #let's create a record for the oldGene
    GeneRecord = SeqRecord(GeneSeq, id=GeneSysName)
    
    #now let's add some more information to make it useful
    GeneRecord.name=GeneName
    GeneRecord.features=GeneSysName
    GeneRecord.description=descr

    return GeneRecord 
Пример #2
0
def templates(request):
    # Determine available InterMines and associated templates
    selected_mines = request.GET.get('mines')
    if selected_mines is not None:
        selected_mines = selected_mines.split('+')
    existing_mines = []
    existing_templates = {}
    intermines = InterMine.objects.all()
    for im in intermines:
        existing_mines.append(im.name)
        if not (selected_mines is None or im.name in selected_mines):
            continue
        base_url = im.url.rstrip('/')
        try:
            service = Service(base_url)
        except:
            # service is inaccessible, or some other error
            continue
        for t_name in service.templates:
            t = service.get_template(t_name)
            if t_name in existing_templates:
                existing_templates[t_name]['mines'].append(im.name)
            else:
                existing_templates[t_name] = {
                    'name': t.name,
                    'title': t.title,
                    'description': t.description,
                    'mines': [im.name]
                }
    # Sort existing_templates properly, and convert it to a list
    for t_name in existing_templates:
        existing_templates[t_name]['mines'] = sorted(
            existing_templates[t_name]['mines'], key=lambda m: m.lower())
    existing_templates = list(existing_templates.values())
    existing_templates = sorted(existing_templates,
                                key=lambda t: t['title'].lower())

    context = {
        'existing_mines': existing_mines,
        'existing_templates': existing_templates,
        'user_mines': selected_mines,
    }
    return render(request, 'intermine_mgr/templates.html', context)
Пример #3
0
def fetchGene(GeneName):

    service = Service("http://yeastmine.yeastgenome.org/yeastmine/service")
    template = service.get_template('Gene_GenomicDNA')

    rows = template.rows(E={
        "op": "LOOKUP",
        "value": GeneName,
        "extra_value": "S. cerevisiae"
    })

    # this service seems to return multiple similar genes but we want the first one only, so count
    # and it returns information about the gene you want
    count = 0
    for row in rows:

        count = count + 1
        if count == 1:
            descr = row["description"]
            GeneSeq = Seq(row["sequence.residues"])
            GeneSysName = row["secondaryIdentifier"]
            #print(" ")
            #print("I think you want...... "+row["secondaryIdentifier"])
            #print(row["description"])
            #print(" ")
            #print(row["sequence.residues"])
            #print(" ")
            #print("Good choice! I have a feeling you're going to get lucky with this one.")
            #print(" ")
            #print("Give me a second to put some of my ducks in a circle...")

    #let's create a record for the oldGene
    GeneRecord = SeqRecord(GeneSeq, id=GeneSysName)

    #now let's add some more information to make it useful
    GeneRecord.name = GeneName
    GeneRecord.features = GeneSysName
    GeneRecord.description = descr

    return GeneRecord
Пример #4
0
def fetchGene(GeneName):
    
    service = Service("http://yeastmine.yeastgenome.org/yeastmine/service")
    template = service.get_template('Gene_GenomicDNA')

    rows = template.rows(
        E = {"op": "LOOKUP", "value": GeneName, "extra_value": "S. cerevisiae"}
    )
    
    # this service seems to return multiple similar genes but we want the first one only, so count
    # and it returns information about the gene you want
    count=0
    for row in rows:
        
        count=count+1
        if count==1:
            descr= row["description"]
            GeneSeq=Seq(row["sequence.residues"])
            GeneSysName=row["secondaryIdentifier"]
            print(" ")
            print("I think you want...... "+row["secondaryIdentifier"])
            print(row["description"])
            print(" ")
            print(row["sequence.residues"])
            print(" ")
            print("Good choice! I have a feeling you're going to get lucky with this one.")
            print(" ")
            print("Give me a second to put some of my ducks in a circle...")
       

            
    #let's create a record for the oldGene
    GeneRecord = SeqRecord(GeneSeq, id=GeneSysName)
    
    #now let's add some more information to make it useful
    GeneRecord.name=GeneName
    GeneRecord.features=GeneSysName

    return GeneRecord
Пример #5
0
def get_gene_genomic_seq_as_FASTA(gene_id,
                                  extension_for_saving=extension_for_saving,
                                  return_text=False):
    '''
    Main function of script. 
    Takes a gene's systematic name, standard name, or alias as defined at gene 
    page at yeastgenome.org, retrieves the associated information from 
    YeastMine, and saves or returns the genomic sequence of the gene in FASTA 
    format.

    Use `return_text` if calling from IPython or a Jupyter notebook and you want
    the FASTA record returned as text,
    '''
    # Get gene information from YeastMine
    #---------------------------------------------------------------------------
    # Based on the template Gene_Genomic DNA available under
    # 'Gene --> Genomic DNA' when under 'Templates' on navigation bar
    # in middle of page at YeastMine. Direct link:
    # https://yeastmine.yeastgenome.org/yeastmine/template.do?name=Gene_GenomicDNA&scope=all

    service = Service(
        "https://yeastmine.yeastgenome.org:443/yeastmine/service")

    # Retrieve genomic DNA (DNA sequence with introns) for the specified gene.

    template = service.get_template('Gene_GenomicDNA')

    # You can edit the constraint values below
    # E    Gene

    rows = template.rows(E={
        "op": "LOOKUP",
        "value": gene_id,
        "extra_value": "S. cerevisiae"
    })
    results = []
    for row in rows:
        results.append(row)

    # store corresponding gene genomic sequence
    genomic_seq = results[0]["sequence.residues"]

    # format gene_nom_info for making output file name or anything else needing
    # that information
    gene_nom_info = {}
    gene_nom_info['sys_nom'] = results[0]["secondaryIdentifier"]
    gene_nom_info['std_nom'] = results[0]["symbol"]
    gene_nom_info['aliases'] = results[0]["sgdAlias"]
    #print (gene_nom_info['aliases'] ) # FOR DEBUGGING ONLY
    #print (gene_nom_info['std_nom'] ) # FOR DEBUGGING ONLY
    #print (gene_nom_info['sys_nom'] ) # FOR DEBUGGING ONLY

    # feedback
    sys.stderr.write("looking up the gene associated with "
                     "{}...".format(gene_id))

    # Make output FASTA record
    #---------------------------------------------------------------------------
    # based on handling worked out in
    # `delete_seq_following_pattern_within_multiFASTA.py`
    record_description = '{}'.format(gene_nom_info['sys_nom'])
    record = SeqRecord(Seq(genomic_seq, generic_dna),
                       id=gene_nom_info['std_nom'],
                       description=record_description)  #based
    # on https://www.biostars.org/p/48797/ and `.ungap()` method, see
    # https://github.com/biopython/biopython/issues/1511 , and `description`
    # from what I've seen for `id` plus https://biopython.org/wiki/SeqIO
    #print (records[indx]) # ONLY FOR DEBUGGING
    sys.stderr.write("getting genomic sequence for the gene...")

    # Return text if called with `return_text = True`. Otherwise, consider
    # called from command line & save file.
    #---------------------------------------------------------------------------
    if return_text == True:
        # based on section 4.6 at
        #http://biopython.org/DIST/docs/tutorial/Tutorial.html#sec:SeqRecord-format
        # Feedback
        sys.stderr.write("\nReturning genomic sequence in FASTA format.")
        return record.format("fasta")
    else:
        output_file_name = generate_output_file_name(gene_nom_info,
                                                     extension_for_saving)
        SeqIO.write(record, output_file_name, "fasta")
        # Feedback
        sys.stderr.write("\n\nFile of genomic sequence "
                         "saved as '{}'.".format(output_file_name))
        sys.stderr.write("\nFinished.\n")
import pandas as pd

service = Service("http://yeastmine.yeastgenome.org/yeastmine/service")

#-------------------------------------------------------------------#
# Gene Info
#-------------------------------------------------------------------# 
gene = service.model.Gene.where(symbol = 'HFA1').first()

print gene.symbol + "\n" + gene.description
print gene

#-------------------------------------------------------------------#
# Model templates
#-------------------------------------------------------------------#
template = service.get_template("Gene_Pathways")
for row in template.results(A={"symbol":"HFA1"}):
	print row

#-------------------------------------------------------------------#
# Query
#-------------------------------------------------------------------#
query = service.new_query("Gene")
query.add_view("primaryIdentifier","name","symbol","pathways.name")
query.add_constraint("Gene", "LOOKUP", "HFA1")
for row in query.rows():
	print row

# The view specifies the output columns
query.add_view(
    "primaryIdentifier", "secondaryIdentifier", "symbol", "name", "sgdAlias",
Пример #7
0
def template_constraints(request):
    mines_dict = {}
    intermines = InterMine.objects.all()
    for im in intermines:
        mines_dict[im.name] = im

    q = request.GET.get('q')
    qq = q.split('__')
    q_template = qq[0]
    q_mine_name = qq[1]
    q_mine = mines_dict[q_mine_name]
    base_url = q_mine.url.rstrip('/')
    # Note that q_service must exist if this template exists, so there is no need for a try/except here
    q_service = Service(base_url)
    selected_template = q_service.get_template(q_template)
    nc = len(selected_template.constraints
             )  # number of constraints in selected_template

    constraints = []
    kw_constraints = {}
    gene_lists = q_service.get_all_list_names()
    gene_lists = [
        l for l in gene_lists if q_service.get_list(l).list_type == 'Gene'
    ]
    base_filters_str = '?q=%s' % (q)
    use_default_constraints = False
    for i in range(nc):
        stc_i = selected_template.constraints[i]
        stc_i_dict = stc_i.to_dict()
        is_gene_related = pathIsGeneRelated(stc_i.path)
        ch = chr(ord('A') + i)
        # constraints - return to template
        constraint = {
            'code': ch,
            'path': stc_i.path,
            'edit': stc_i.editable,
            'gene_related': is_gene_related
        }
        operator = request.GET.get('op' + ch)
        value = request.GET.get('value' + ch)
        value2 = request.GET.get('value2' + ch)
        value_list = getValueList(q_service, stc_i.path)
        if value_list is not None:
            constraint['value_list'] = value_list
        # organism_list is for the value2 (extraValue, extra_value) field in a ternary constraint
        organism_list = getValueList(q_service, 'Gene.organism.shortName')
        gene_operator = request.GET.get('gene_op' + ch)
        gene_value = request.GET.get('gene_value' + ch)
        if gene_operator is not None:
            # gene list-based constraint
            # (set operator and value for use in kw_constraints below)
            constraint['gene_op'] = operator = gene_operator
            constraint['gene_value'] = value = gene_value
            constraint['op'] = stc_i.op
            constraint['value'] = stc_i.value
            if 'extraValue' in stc_i_dict:
                constraint['value2'] = stc_i_dict['extraValue']
            constraints.append(constraint)
            if stc_i.editable:
                base_filters_str += '&op%s=%s&value%s=%s&gene_op%s=%s&gene_value%s=%s' % (
                    ch, stc_i.op, ch, stc_i.value, ch, gene_operator, ch,
                    gene_value)
        elif operator is not None:
            # regular (binary) constraint
            constraint['op'] = operator
            constraint['value'] = value
            if value2 is not None:
                # ternary constraint
                constraint['value2'] = value2
                if organism_list is not None:
                    constraint['value_list'] = organism_list
            constraints.append(constraint)
            if stc_i.editable:
                base_filters_str += '&op%s=%s&value%s=%s' % (ch, operator, ch,
                                                             value)
                if value2 is not None:
                    base_filters_str += '&value2%s=%s' % (ch, value2)
        else:
            # user submitted no constraints (yet), so use default values from selected template
            use_default_constraints = True
            try:
                constraint['op'] = stc_i.op
                constraint['value'] = stc_i.value
                if 'extraValue' in stc_i_dict:
                    constraint['value2'] = stc_i_dict['extraValue']
                    if organism_list is not None:
                        constraint['value_list'] = organism_list
                constraints.append(constraint)
            except:
                # ignore if any fields are missing
                pass
            continue  # so as not to submit default values to template query

        if stc_i.editable:
            # kw_constraints - submit to template query
            kw_constraints[ch] = {'op': operator, 'value': value}
            if value2 not in [None, 'Any']:
                kw_constraints[ch]['extra_value'] = value2

    if use_default_constraints:
        context = {
            'user_q': q,
            'user_mine': q_mine_name,
            'user_template': selected_template,
            'user_constraints': constraints,
            'gene_lists': gene_lists,
        }
        return render(request, 'intermine_mgr/template_constraints.html',
                      context)

    # Paging
    page = request.GET.get('page')
    if page is None:
        page = 1
    else:
        page = int(page)
    results_per_page = request.GET.get('rows')
    # Note that start indices are 0-based in the view, 1-based in the template
    if results_per_page is None:
        start = 0
    else:
        results_per_page = int(results_per_page)
        start = (page - 1) * results_per_page

    # Extract facet filters (only mines, for now)
    facet_filters = {}
    facet_filters_str = ''
    mine = request.GET.get('Mine')
    if mine:
        facet_filters['Mine'] = mine
        facet_filters_str += '&Mine=' + mine

    facets = {'Mine': {}}
    total_hits = 0
    results = []
    for im in intermines:
        if not (mine is None or mine == im.name):
            continue
        base_url = im.url.rstrip('/')
        try:
            service = Service(base_url)
        except:
            # service is inaccessible, or some other error
            continue
        try:
            template = service.get_template(q_template)
            # Execute the (possibly modified) query
            rr = template.rows(**kw_constraints)
            for row in rr:
                rd = row.to_d()
                rd.update({'mine': im.name})
                results.append(rd)
            im_total_hits = len(rr)
            total_hits += im_total_hits
            # Aggregating facets is more complicated:
            facets['Mine'][im.name] = im_total_hits
        except:
            # For example, if the constraints do not apply to the template for this mine
            continue
    # This yields the total hits and facet information.
    # Remove any mines with no hits.
    mm = list(facets['Mine'].keys())
    for m in mm:
        if facets['Mine'][m] == 0:
            del facets['Mine'][m]

    sort_tag = selected_template.get_sort_order(
    ).sort_orders[0].path  # TODO: sort by multiple columns?
    results = sorted(results, key=lambda result: safeSort(result, sort_tag))
    if results_per_page is None:
        end = total_hits
        last_page = 1
    else:
        end = min(start + results_per_page, total_hits)
        results = results[start:end]
        last_page = (total_hits - 1) // results_per_page + 1
        base_filters_str += '&rows=%d' % (results_per_page)
    context = {
        'user_q': q,
        'user_mine': q_mine_name,
        'user_template': selected_template,
        'user_constraints': constraints,
        'gene_lists': gene_lists,
        'base_filters_str': base_filters_str,
        'facet_filters': facet_filters,
        'facet_filters_str': facet_filters_str,
        'page': page,
        'results_per_page': results_per_page,
        'last_page': last_page,
        'start_row': start + 1,
        'end_row': end,
        'num_rows': total_hits,
        'results': results,
        'facets': facets,
    }

    return render(request, 'intermine_mgr/template_constraints.html', context)
def get_protein_seq_as_FASTA(gene_id, 
    extension_for_saving = extension_for_saving, return_text = False):
    '''
    Main function of script. 
    Takes a gene's systematic name, standard name, or alias as defined at gene 
    page at yeastgenome.org, retrieves the associated information from 
    YeastMine, and saves or returns the protein sequence in FASTA format.

    Use `return_text` if calling from IPython or a Jupyter notebook and you want
    the FASTA record returned as text,
    '''
    # Get gene information from YeastMine
    #---------------------------------------------------------------------------
    # Based on the template Gene_ProteinSequence available under 
    # 'Gene --> Protein Sequence' when clicking on 'Proteins' on navigation bar 
    # in middle of page at YeastMine. Direct link:
    # https://yeastmine.yeastgenome.org/yeastmine/template.do?name=Gene_ProteinSequence&scope=global
    
    service = Service("https://yeastmine.yeastgenome.org:443/yeastmine/service")

    # Retrieve protein sequence for a specified gene.

    template = service.get_template('Gene_ProteinSequence')

    # You can edit the constraint values below
    # B    Gene

    rows = template.rows(
        B = {"op": "LOOKUP", "value": gene_id, "extra_value": "S. cerevisiae"}
    )
    results = []
    for row in rows:
        results.append(row)
    
    # store corresponding protein sequence
    prot_seq = results[0]["proteins.sequence.residues"]
    
    # format gene_nom_info for making output file name or anything else needing 
    # that information
    gene_nom_info = {}
    gene_nom_info['sys_nom'] = results[0]["secondaryIdentifier"]
    gene_nom_info['std_nom'] = results[0]["symbol"]
    gene_nom_info['aliases'] = results[0]["sgdAlias"]
    #print (gene_nom_info['aliases'] ) # FOR DEBUGGING ONLY
    #print (gene_nom_info['std_nom'] ) # FOR DEBUGGING ONLY
    #print (gene_nom_info['sys_nom'] ) # FOR DEBUGGING ONLY


    # feedback
    sys.stderr.write("looking up the gene associated with "
        "{}...".format(gene_id))


    # Make output FASTA record
    #---------------------------------------------------------------------------
    # based handling worked out in 
    # `delete_seq_following_pattern_within_multiFASTA.py`
    record_description = '{}'.format(gene_nom_info['sys_nom'])
    record = SeqRecord(Seq(prot_seq, generic_protein), 
            id=gene_nom_info['std_nom'], description=record_description)#based
        # on https://www.biostars.org/p/48797/ and `.ungap()` method, see
        # https://github.com/biopython/biopython/issues/1511 , and `description`
        # from what I've seen for `id` plus https://biopython.org/wiki/SeqIO
        #print (records[indx]) # ONLY FOR DEBUGGING
    sys.stderr.write("getting protein sequence...")

    # Return text if called with `return_text = True`. Otherwise, consider 
    # called from command line & save file.
    #---------------------------------------------------------------------------
    if return_text == True:
        # based on section 4.6 at 
        #http://biopython.org/DIST/docs/tutorial/Tutorial.html#sec:SeqRecord-format
        # Feedback
        sys.stderr.write("\nReturning protein sequence in FASTA format.")
        return record.format("fasta") 
    else:
        output_file_name = generate_output_file_name(
            gene_nom_info,extension_for_saving)
        SeqIO.write(record,output_file_name, "fasta");
        # Feedback
        sys.stderr.write("\n\nFile of protein sequence "
            "saved as '{}'.".format(output_file_name))
        sys.stderr.write("\nFinished.\n")
Пример #9
0
# To install the client, run the following command from a terminal:
#
#     sudo easy_install intermine
#
# For further documentation you can visit:
#     http://intermine.readthedocs.org/en/latest/web-services/

# The following two lines will be needed in every python script:
from intermine.webservice import Service

service = Service("https://phytozome.jgi.doe.gov/phytomine/service")

# Look up the GO terms associated with a gene, or use a saved list of genes and
# look up the GO terms associated with all genes on the list.

template = service.get_template('GO-Annotations')

# You can edit the constraint values below
# A    Gene.primaryIdentifier
with open("genes_polya_frompac", "r") as l:

    for i in l.readlines():
        # print i
        rows = template.rows(A={"op": "=", "value": str(i.strip())})
        for row in rows:
            # print "1"
            print row["primaryIdentifier"], "\t", row["secondaryIdentifier"], row["length"], "\t", \
                row["chromosomeLocation.start"], "\t", row["chromosomeLocation.end"], "\t", \
                row["goAnnotation.ontologyTerm.identifier"], "\t", row["goAnnotation.ontologyTerm.name"], "\t", \
                row["goAnnotation.ontologyTerm.description"], "\t", row["briefDescription"]
Пример #10
0
#
# Saccharomyces Genome Database (SGD)
# http://yeastmine.yeastgenome.org/yeastmine/api.do?subtab=python
#
# YeastMine example script
#

from intermine.webservice import Service

service = Service("http://yeastmine.yeastgenome.org/yeastmine/service")

# List all GO annotations for a specified gene. Searches for the
# primaryIdentifier (SGDID), secondaryIdentifier (Systematic Name), symbol
# (Standard Gene Name) and wild card queries (such as *YAL*) are supported. 
# Manually curated, high-throughput, and computational GO annotations are
# included. Genes include Uncharacterized and Verified ORFs, pseudogenes,
# transposable element genes, RNAs, and genes Not in Systematic Sequence of
# S228C.

template = service.get_template('Gene_GO')

# You can edit the constraint values below
# A    Gene    Show GO annotations for gene:

rows = template.rows(
				A = {"op": "LOOKUP", "value": "YAL018C", "extra_value": "S. cerevisiae"}
				)
for row in rows:
	print row