Exemplo n.º 1
0
def download_sequences(accessions):
    records = {}
    for accession in accessions:
        handle = ExPASy.get_sprot_raw(accession)
        record = SwissProt.read(handle)
        records[accession] = record.sequence
    return records
Exemplo n.º 2
0
def access_sequence(accession):
    handle = ExPASy.get_sprot_raw(accession)
    try:
        record = SwissProt.read(handle)
    except ValueException:
        print("WARNING: Accession %s not found" % accession)
    return record.sequence
Exemplo n.º 3
0
def parseBlast():
    result_handle = open("./output/blastOut.xml")
    blast_records = NCBIXML.parse(result_handle)
    E_VALUE_THRESH = 1
    blastHits = {}
    accessions = {}
    #Loop through each protein query results
    for blast_record in blast_records:
        keyword_list = []  #stores running keyword list
        queryID = blast_record.query.split()[0].split(':')[
            1]  #parse for the query protein ID
        #Loop through the hits associated with particular sequence
        for alignment in blast_record.alignments:
            for hsp in alignment.hsps:
                #Hit must have e-value < threshold to be considered
                if hsp.expect < E_VALUE_THRESH:
                    title = alignment.title  #title of hit
                    splittitle = title.split()
                    raw_protein_title = title.split('OS')[
                        0]  #specific keywords in title
                    protein_title = " ".join(raw_protein_title.split()[2:])
                    keyword_list.append(protein_title)
                    accession = splittitle[1].split('|')[
                        1]  #parse for the accession number
                    accessions.setdefault(queryID, []).append(accession)
                    handle = ExPASy.get_sprot_raw(accession)
                    record = SwissProt.read(handle)
                    keyword_list += record.keywords
                    keyword_string = '; '.join(keyword_list)
                    blastHits[queryID] = keyword_string
            break  #only take top hit for now
    return (blastHits, accessions)
Exemplo n.º 4
0
def get_SwissProt(dict, accession):
    try:
        handle = ExPASy.get_sprot_raw(accession)
        record = SwissProt.read(handle)
        dict[accession] = record
    except urllib2.HTTPError, error:
        print accession + ": protein not found on UniProt . "
Exemplo n.º 5
0
def get_SwissProt(dict,accession):
    try:
        handle = ExPASy.get_sprot_raw(accession)
        record = SwissProt.read(handle)
        dict[accession] = record
    except urllib2.HTTPError, error:
        print accession + ": protein not found on UniProt . "
def sequence_file(*args):
    '''The function sequence_file save the sequence of the protein in fasta
    format, to do so the sequence is retrieved and the other necessary
    information to make the fasta header.
    We included a try/except chunck to display an Error if the code is invalid'''

    a = code.get()
    try:
        from Bio import ExPASy
        from Bio import SwissProt
        with ExPASy.get_sprot_raw(a) as handle:
            record = SwissProt.read(handle)
    except:
        if a == "":
            open_window("No Code", "Please Insert an Uniprot Code", "#FFC3C3",
                        '200x30')
        else:
            open_window("No Valid Code", "Please Insert a valid Uniprot Code",
                        "#FFC3C3", '200x30')

    descrip = record.description.split(";")[0]
    num = descrip.find("Full=") + 5
    descrip = descrip[num:]
    fasta_header = ">sp|" + code.get(
    ) + "|" + record.entry_name + " " + descrip + " OS=" + record.organism

    filename = filedialog.asksaveasfilename(defaultextension='.fasta',
                                            filetypes=[("fasta", "*.fasta")])
    TextFile = open(filename, "w")
    TextFile.write(fasta_header + '\n')
    TextFile.write(record.sequence)
    TextFile.close()
Exemplo n.º 7
0
	def find_COG2(self):
		"""Find records from uniprotIDs without use of keggIDs."""
		handle = ExPASy.get_sprot_raw(self.uprotID)
		record = SwissProt.read(handle)
		query = record.gene_name.strip("Name""="";")
		url_open = urllib.urlopen("http://rest.genome.jp/oc/?"+query)
		return url_open.read()
Exemplo n.º 8
0
def fetch_swp_expasy(uniprot_acc):
    """
    Fetch information on SwissProt accession (manually reviewed UniProt entry).
    
    http://biopython.org/DIST/docs/api/Bio.SwissProt.Record-class.html

    Parameters
    ----------
    arg1 : str
        SwissProt accession or identifier.

    Returns
    -------
    list
        list of length 2 with the name of the attributes found and their values.
    """

    #generates record object with information regarding SwissProt identifier
    handle = ExPASy.get_sprot_raw(uniprot_acc)
    record = SwissProt.read(handle)

    #checks all the attributes possibles for the record object generated and their type
    #attributes are of type: str, tuple, or list
    #attribute list found here: http://biopython.org/DIST/docs/api/Bio.SwissProt.Record-class.html
    attrib_names = [
        'accessions', 'data created', 'date created (ISO)', 'organism',
        'gene names', 'description', 'comments', 'keywords'
    ]
    swp_info_list = [
        record.accessions, record.created[0],
        dating(record.created[0]), record.organism, record.gene_name,
        record.description, record.comments, record.keywords
    ]
    return (attrib_names, swp_info_list)
Exemplo n.º 9
0
def main(input_string):
    record = SwissProt.read(ExPASy.get_sprot_raw(input_string))
    for ref in record.cross_references:
        if ref[0] == 'GO' and ref[2].startswith('P:'):
            # if reference is a Gene Ontology reference and refers to a
            # biological process
            print(ref[2][2:])
Exemplo n.º 10
0
def gen_uniprot_features_for_pdb(infile):
  for line in open(infile,'r'):
    (pdb_dom, count, uniprot_ids) = line.replace('\n','').split('\t')
    uniprot_ids = uniprot_ids.split('|')
    for uniprot_id in uniprot_ids:
      data = SwissProt.read(ExPASy.get_sprot_raw(uniprot_id)).__dict__  
      keep = False
      go = []; interpro = ''; evo_trace = ''
      for xref in data['cross_references']:
        if xref[0] == 'GO':
          go.append(xref[1])
        if xref[0] == 'InterPro':
          interpro = xref[1]
        if xref[0] == 'EvolutionaryTrace':
          evo_trace = xref[1]
        if xref[0] == 'PDB' and xref[1].lower() == pdb_dom.lower():
          keep = True
      if keep == False:
        continue
      organism = data['organism']
      loc = ''
      for comment in data['comments']:
        if comment.startswith('SUBCELLULAR LOCATION'):
          loc = comment
      print '%s\t%s\t%s\t%s\t%s\t%s\t%s' %(pdb_dom,uniprot_id,'|'.join(go),interpro,evo_trace,organism,loc)
Exemplo n.º 11
0
def main(filename):
    with open(filename) as fin:
        my_seq = fin.read().strip()
    handle = ExPASy.get_sprot_raw(my_seq) 
    record = SwissProt.read(handle)
    for s in [f[2].split(':')[1] for f in record.cross_references if f[0]=='GO' and f[2][0]=='P']:
        print s
Exemplo n.º 12
0
def get_records(ids):
    records = []
    for id in ids:
        handle = ExPASy.get_sprot_raw(id)
        record = SwissProt.read(handle)
        records.append(record.sequence)
    return records
Exemplo n.º 13
0
def main():
    # Read the UniProt ID for a txt file.
    with open('problem_datasets/rosalind_dbpr.txt', 'r') as infile:
        uni_id = infile.read().strip()

    # Retrieve the data from UniProt (separated IDs by commas).
    raw_data = ExPASy.get_sprot_raw(uni_id)
    record = SwissProt.read(
        raw_data)  # use SwissProt.parse for multiple proteins

    # Collect the relevant information.
    go = []
    for i in record.cross_references:
        if i[2].startswith('P:'):
            go.append(i[2][2:])

    # Output answer.
    with open('output/rosalind_dbpr_out.txt', 'w') as outfile:
        outfile.write('\n'.join(go))

    # Optional: Print answer and gene ID/name
    name = record.gene_name.split(' ')[0][5:]
    print('Gene:\n',
          name,
          ' (UniProt ID = ',
          uni_id,
          ')\n\nBiological Processes:\n',
          '\n'.join(go),
          sep='')
Exemplo n.º 14
0
def swissprot_search():

    f = open('output/seq_accession.txt')
    db = f.readline()
    for accession in f:
        handle = ExPASy.get_sprot_raw(accession)
        record = SwissProt.read(handle)
        print(record)
Exemplo n.º 15
0
 def get(self,id):
     """Open and Read a Swiss-Prot file locally from remote source (ExPASy database)
         Swiss-Prot file over the internet from the ExPASy database.
         Input must be a accession number stored on the swissprot site.
     """
     handle = ExPASy.get_sprot_raw(id)
     record = SwissProt.read(handle)
     return record
Exemplo n.º 16
0
def getgo(id):
    handle = ExPASy.get_sprot_raw(id)
    record = SwissProt.read(handle)
    go = [
        r[2].split(":")[1] for r in record.cross_references
        if r[0] == "GO" and r[2].startswith("P")
    ]
    print("\n".join(go))
Exemplo n.º 17
0
def main(id):
    handle = ExPASy.get_sprot_raw(id)
    record = SwissProt.read(handle)
    for cr in record.cross_references:
        if cr[0] == "GO":
            bits = cr[2].split(":")
            if bits[0] == "P":
                print bits[1]
Exemplo n.º 18
0
def main(argv):
    # input() reads stdin
    handle = ExPASy.get_sprot_raw(input().strip()) #you can give several IDs separated by commas
    record = SwissProt.read(handle) # use SwissProt.parse for multiple proteins
    
    # there ought to be a better way to pull GO information from the record! maybe there is...
    for p in filter(lambda x:x[0]=='GO' and x[2].startswith('P:'),record.cross_references):
        print(p[2][2:])
Exemplo n.º 19
0
def main():
    with open("dbpr") as f:
        handle = ExPASy.get_sprot_raw(f.readline().strip())
        record = SwissProt.read(handle)
        record = [x[2] for x in record.cross_references if x[0] == 'GO']
        record = [x[2:] for x in record if x[0] == 'P']
        sys.stdout = open("dbpr.out","w")
        print "\n".join(record)
Exemplo n.º 20
0
    def MouseHomolog(self, dfs):

        print('\nFinding mouse homologs')
        ind = 0
        new_dfs = []

        for acc in self.accs:

            try:
                handle = ExPASy.get_sprot_raw(acc)
                record = SwissProt.read(handle)
                name = record.entry_name
            except:
                print('\nNo entry for', acc, ',continuing')
                ind += 1
                continue

            try:
                mname = name.split('_')[0] + '_MOUSE'
                mhandle = ExPASy.get_sprot_raw(mname)
                mrecord = SwissProt.read(mhandle)
                mseq = mrecord.sequence
                print(f'\nFound mouse homolog for {name}: {mname}')
            except:
                print(f'\nNo mouse gene entry for {acc}-{name}, continuing')
                ind += 1
                continue

            df = dfs[ind]
            mcol = []

            for row in range(len(df)):
                pepseq = df.Sequence[df.index[row]]
                print(pepseq)
                if str(pepseq) in mseq:
                    mcol.append('True')
                else:
                    mcol.append('False')

            df['Mouse'] = mcol
            new_dfs.append(df)
            ind += 1

        df_final = pd.concat(new_dfs, sort=True)
        df_final.to_excel(self.out_folder + '/' + 'MouseHomologPeptides.xlsx',
                          index=True)
Exemplo n.º 21
0
def protfunction(query_proteins):
    """Shows the proteins function given their names or ids
    str -> list"""
    function_list = []
    for prot in query_proteins:
        with ExPASy.get_sprot_raw(prot) as handle:
            record = SwissProt.read(handle)
            function_list.append((prot, record.comments[0][10:]))
    return function_list
Exemplo n.º 22
0
def print_bio_process(file):
    Uniprot_id = file.read().rstrip()
    handle = ExPASy.get_sprot_raw(Uniprot_id)
    rec = SwissProt.read(handle)
    bio_process = [
        i[2][2:] for i in rec.cross_references
        if i[0] == 'GO' and i[2].startswith('P')
    ]
    print('\n'.join(bio_process))
Exemplo n.º 23
0
def dbpr():
    uniprot_id = open("rosalind_dbpr.txt").read().strip()
    handle = ExPASy.get_sprot_raw(uniprot_id)
    record = SwissProt.read(handle)

    # return the list of biological functions
    for ref in record.cross_references:
        if ref[0] == 'GO' and ref[2].startswith('P:'):
            print ref[2][2:]
Exemplo n.º 24
0
def main(argv):
    line = files.read_line(argv[0])
    handle = ExPASy.get_sprot_raw(line)
    record = SwissProt.read(handle)

    go = filter(lambda x: x[0] == 'GO' and 'P:' in x[2],
                record.cross_references)

    print '\n'.join(g[2].split(':')[1] for g in go)
Exemplo n.º 25
0
def DBPR(id):
    handle = ExPASy.get_sprot_raw(id)  # several IDs can be separated by commas
    record = SwissProt.read(
        handle)  # use SwissProt.parse for multiple proteins
    GO = []
    for item in record.cross_references:
        if item[0] == 'GO':
            if item[2].split(':')[0] == 'P':
                GO.append(item[2].split(':')[1])
    return GO
Exemplo n.º 26
0
def find_function(prot):
    handle = ExPASy.get_sprot_raw(prot) # Can give several IDs separated by commas
    record = SwissProt.read(handle) # Use SwissProt.parse for multiple proteins  

    functions = []
    for ref in record.cross_references:
        if ref[0] == 'GO' and ref[2][0] == 'P':
            print(ref)
            functions.append(ref[2][2:])
    return functions
Exemplo n.º 27
0
 def acession(self):
     self.rec=[]
     for ide in self.ids:
         if ide!='ND':
             results=ExPASy.get_sprot_raw(ide)
             rec=SwissProt.read(results)
             self.rec.append(rec)
         else:
             self.rec.append('ND')
     return self.rec
Exemplo n.º 28
0
def BiologicalProcesses(UniProtID):
    Handle = ExPASy.get_sprot_raw(UniProtID)
    Record = SwissProt.read(Handle)

    Processes = []
    for i in Record.cross_references:
        if "GO" in i:
            for j in i:
                if re.match("P:.*", j):
                    Processes.append(j[j.rfind(':')+1:])
    return "\n".join(Processes)
Exemplo n.º 29
0
def fetch(acc) :
    '''Downloads data from UniProt.
    Input: 
    acc: accession code of the record
    database: database name
    Return: the Entrez record
    '''
    base_url = 'http://www.uniprot.org/uniprot/'
    handle = urllib.request.urlopen(base_url + acc + '.txt')
    record = SwissProt.read(handle)
    return record
Exemplo n.º 30
0
 def eachget(self, id_list):
     a = []
     for eachid in id_list:
         try:
             record = SwissProt.read(ExPASy.get_sprot_raw(eachid))
             #print 'testing\n'
             a += [record]
         except:  #something to do if no summary found
             print('something wrong with this id:%s\n' %
                   eachid)  #here do nothing
     return a
Exemplo n.º 31
0
def geneontology(query_proteins):
    """Retrieves gene ontology biological processes given protein names or ids
    str -> set"""
    gene_ontology = []
    for prot in query_proteins:
        with ExPASy.get_sprot_raw(prot) as handle:
            record = SwissProt.read(handle)
            for ref in record.cross_references:
                if ref[0] == "GO" and ref[2].startswith("P"):
                    gene_ontology.append((prot, ref[2].split(":")[1]))
    return gene_ontology
Exemplo n.º 32
0
def get_keywords(lookup):
    try:
        handle = ExPASy.get_sprot_raw(lookup)
    except:
        print("Error in ExPASy")
        sys.exit(1)
    try:
        record = SwissProt.read(handle)
    except ValueError, error:
        print(error)
        sys.exit(1)
Exemplo n.º 33
0
def get_bio_processes(protein):

    processes = []
    handle = ExPASy.get_sprot_raw(protein)
    record = SwissProt.read(handle)
    for ref in record.cross_references:
        if ref[0] == 'GO':
            if ref[2].startswith('P'):
                processes.append(ref[2].split(":")[1])

    return processes
Exemplo n.º 34
0
def get_keywords(lookup):
    try:
        handle = ExPASy.get_sprot_raw(lookup)
    except:
        print("Error in ExPASy")
        sys.exit(1)
    try:
        record = SwissProt.read(handle)
    except ValueError, error:
        print(error)
        sys.exit(1)
Exemplo n.º 35
0
	def records(self):
		"""Return a dictionary of ID and swissprot records from query."""
		record_dict = {}
		except_ids = []
		for i in self.IDs():
			try:
				handle = ExPASy.get_sprot_raw(i)
				record_dict[i] = SwissProt.read(handle)
			except HTTPError, AssertionError:
				print("there was a problem finding uniprotID {} \n\
					try Records_fromfile-method".format(i))
Exemplo n.º 36
0
def accessionSearch():
    try:
        sInput = entryAccession.get() # get text field contents
        handle = ExPASy.get_sprot_raw(sInput) # for use in SwissProt.read method
        record = SwissProt.read(handle) # generates record from fasta code
        handle.close() # close handle since it's no longer in use
    except:
        # if exception is raised, display message to user
        lblResults.configure(text="invalid accession code!\n please try again...")
    else:
        # otherwise, submit sequence to motifFinder function
        motifFinder(record.sequence)
Exemplo n.º 37
0
def main(protein_id):
    handle = ExPASy.get_sprot_raw(protein_id) #you can give several IDs separated by commas
    record = SwissProt.read(handle) # use SwissProt.parse for multiple proteins

    answer = ""
    for r in record.cross_references:
        print r
        if r[0] == "GO":
            if r[2].split(":")[0] == 'P':
                answer += r[2].split(":")[1] + "\n"

    return answer.strip()
Exemplo n.º 38
0
def getDataFromProt(protid):
    url = "http://www.uniprot.org/uniprot/" + protid + ".txt"
    txt = urlopen(url).read()

    f = open((protid + ".dat"), "w")
    f.write(txt.decode('utf-8'))
    f.close()
    handle = open((protid + ".dat"))

    parsed = SwissProt.read(handle)
    status, locale, fmol, bio, name, id, function, length, ec = getInfoTxt(
        parsed)
    return name, id, locale, status, fmol, bio, function, length, ec
Exemplo n.º 39
0
def run(user_input="""Q5SLP9"""):
    uniprot_id = user_input.strip()
    handle = ExPASy.get_sprot_raw(uniprot_id)
    record = SwissProt.read(handle)

    gene_onotology = list(filter(lambda x: x[0] == "GO", record.cross_references))
    bio_processes = list(filter(lambda x: str(x[2]).startswith("P:"), gene_onotology))
    process_names = [str(process[2])[2:] for process in bio_processes]
    print(process_names)

    result = "\n".join(process_names)
    print(result)
    return result
Exemplo n.º 40
0
def _shell_lookup(args):
    """This function is called when the script is used from command line:

    [jakni@nissen scripts]$ python unifetch.py -a A6XGL2 -ncis
    Name: A6XGL2_HUMAN
    Data class: Unreviewed
    TaxID: 9606
    Sequence: MALWMRLLPLLALLALWGPDPAAAFVNQHLCGSHLVEALYLVCGERGFFYTPKTRRE [ ... ]
    """

    with _gnu.open(args.database) as database:
        data = database.get(args.accession, None)

    # If no accession is found, return "Not found."
    if data is None:
        return 'Not found.'

    fields = {'Name': [args.name],
                 'Date': [args.date],
                 'Data class': [args.dataclass],
                 'Organism': [args.organism],
                 'Taxonomy': [args.taxonomy],
                 'TaxID': [args.taxid],
                 'Sequence': [args.sequence]
              }

    # If nothing particular is specified, return the entire accession
    if not any(arr[0] for arr in fields.values()):
        text = _gzip.decompress(data).decode()
        return text

    else:
        # If output specified, return the relevant parts.
        fileobject = _io.BytesIO(_gzip.decompress(data))
        record = _SwissProt.read(fileobject)

        fields['Name'].append(record.entry_name)
        fields['Date'].append(record.created[0])
        fields['Data class'].append(record.data_class)
        fields['Organism'].append(record.organism)
        species = get_species(record)
        fields['Taxonomy'].append(
            ';'.join(record.organism_classification + ([species] if species else [])))
        fields['TaxID'].append(';'.join(record.taxonomy_id))
        fields['Sequence'].append(record.sequence)

        output = list()
        for title, (state, information) in fields.items():
            if state:
                output.append('{}: {}'.format(title, information))
        return '\n'.join(output)
Exemplo n.º 41
0
    def download_entry(self, accession):
        try:
            handle = ExPASy.get_sprot_raw(accession)
            record = SwissProt.read(handle)
        except:
            raise KeyError('{}'.format(accession))

        record_org = record.organism.strip().lower()
        if self.organism not in record_org:
            print('{} ortholog of {} not found.'.format(self.organism, accession))
            raise KeyError('{} ortholog of {} not found.'.format(self.organism, accession))
        else:
            self.records[accession] = record
            return record
Exemplo n.º 42
0
def main():
    #Grab our input id value
    uniprot_id = get_uniprot_id_from_file(arguments['<input>'])
    #Get a handle on the data for the uniprot id
    handle = ExPASy.get_sprot_raw(uniprot_id)
    #Parse our data
    record = SwissProt.read(handle)
    handle.close()
    #Process out the stuff of interest, GO values in this case
    go_refs = [ref[1:] for ref in record.cross_references if ref[0] == 'GO']
    for go_entry in go_refs:
        pre, val = go_entry[1].split(':')
        if pre == 'P':
            print(val)
Exemplo n.º 43
0
def main(fichier):
	"""
		navigate into protein database
	"""
	f = open(fichier,'r')
	fline = f.readline().strip()
	from Bio import ExPASy
	from Bio import SwissProt
	handle = ExPASy.get_sprot_raw(fline)
	record = SwissProt.read(handle)
	go = []
	for i in record.cross_references:
		if i[0] == 'GO' and i[2][0]=='P':
		        go.append(i[2].lstrip('P:'))
	print '\n'.join(go)
Exemplo n.º 44
0
def main():
    # Read the UniProt ID for a txt file.
    with open('problem_datasets/rosalind_dbpr.txt', 'r') as infile:
        uni_id = infile.read().strip()

    # Retrieve the data from UniProt (separated IDs by commas).
    raw_data = ExPASy.get_sprot_raw(uni_id)
    record = SwissProt.read(raw_data) # use SwissProt.parse for multiple proteins

    # Collect the relevant information.
    go = []
    for i in record.cross_references:
        if i[2].startswith('P:'):
            go.append(i[2][2:])

    # Output answer.
    with open('output/rosalind_dbpr_out.txt', 'w') as outfile:
        outfile.write('\n'.join(go))

    # Optional: Print answer and gene ID/name
    name = record.gene_name.split(' ')[0][5:]
    print('Gene:\n', name, ' (UniProt ID = ', uni_id,
          ')\n\nBiological Processes:\n', '\n'.join(go), sep='')  
Exemplo n.º 45
0
def snp_uniprot(uniprotname, selection='(all)', label=1, name='', quiet=0):
    '''
DESCRIPTION

    Selects all UniProt annotated nsSNPs (natural variants) in given
    structure. Does a sequence alignment of UniProt sequence and PDB
    sequence.

USAGE

    snp_uniprot uniprotname [, selection [, label [, name [, quiet]]]]

ARGUMENTS

    uniprotname = string: UniProt reference (like HBB_HUMAN or P68871)

    selection = string: atom selection

    label = 0 or 1: Label CA atoms of nsSNPs with mutation {default: 1}

    name = string: name of new selection {default: nsSNPs}

EXAMPLE

    fetch 3HBT
    snp_uniprot ACTG_HUMAN, chain A

SEE ALSO

    snp_ncbi
    '''
    from Bio import ExPASy
    from Bio import SwissProt
    handle = ExPASy.get_sprot_raw(uniprotname)
    record = SwissProt.read(handle)
    snp_common(record, selection, label, name, quiet)
Exemplo n.º 46
0
records = KeyWList.parse(handle)
codes = []
review = open("proteinas_uniprot.txt", "w")
for record in records:
    review.write("\n"+record['ID']+"\n")
    review.write("\n"+record['DE']+"\n")
    codes.append(record['AC'][:-1])#remover ";" no final de cada código de acesso
review.close()

#análise individual das proteínas relevantes (baseado nos códigos desenvolvidos pelos grupos 10 e 7)
f = open("analise_reviewed.txt","w")
for code in codes:
    data = urllib.urlopen("http://www.uniprot.org/uniprot/" + code + ".txt")
    while True:
         try:
             record = SwissProt.read(data)
             for ref in record.references:
                 f.write("\n\n****Informacao sobre a proteina %s****\n" %code)
                 f.write("\n\nNome: %s\n" %record.entry_name)
                 f.write("\nClasse: %s\n" %record.data_class)
                 f.write("\nTipo de molecula: %s\n" %record.molecule_type)
                 f.write("\nTamanho da sequencia: %s\n" %record.sequence_length)
                 f.write("\nCodigo de Accesso: %s\n" %record.accessions)
                 f.write("\nCriado: %s\n"% str(record.created))
                 f.write("\nAdaptacao da sequencia: %s\n" %str(record.sequence_update))
                 f.write("\nAdaptacao da anotacao: %s\n" %str(record.annotation_update))
                 f.write("\nDescricao: %s\n" %record.description)
                 f.write("\nNome do gene: %s\n" %record.gene_name)
                 f.write("\nOrganismo: %s\n" %record.organism)
                 f.write("\nOrganelo: %s\n" %record.organelle)
                 f.write("\nClassificacao do Organismo: %s\n" %record.organism_classification)
Exemplo n.º 47
0
from Bio import ExPASy
from Bio import SwissProt

id = "Q5SLP9" 
handle = ExPASy.get_sprot_raw(id)
record = SwissProt.read(handle)

for x in record.cross_references:
    if x[2][0:2] == 'P:':
        print x[2][2:]
Exemplo n.º 48
0
#!/usr/bin/env python
import sys
from Bio import ExPASy
from Bio import SwissProt

if __name__ == '__main__':
    rec = SwissProt.read(ExPASy.get_sprot_raw(sys.stdin.read().strip()))
    gos = [r[2].split(':')[1] for r in rec.cross_references if
           r[0] == 'GO' and r[2].startswith('P')]
    print('\n'.join(gos))
Exemplo n.º 49
0
# http://rosalind.info/problems/dbpr/

from Bio import ExPASy, SwissProt

if __name__ == '__main__':
    # Getting the UniProd ID of a protein
    uniprot_id = SwissProt.read(ExPASy.get_sprot_raw(open('rosalind_dbpr.txt').read().strip()))
    
    # Getting a list of biological processes
    processes = [r[2].split(':')[1] for r in uniprot_id.cross_references 
           if r[0] == 'GO' and r[2].startswith('P')]
    
    print('\n'.join(processes))
Exemplo n.º 50
0
def protein_record(protein):
    """Return the SwissProt record of a protein with id protein."""
    handle = ExPASy.get_sprot_raw(protein)  # you can give several IDs separated by commas
    return SwissProt.read(handle)  # use SwissProt.parse for multiple proteins
Exemplo n.º 51
0
"""
BioPython + regular expression demo
based on http://www.pasteur.fr/recherche/unites/sis/formation/python/ch11s04.html
"""

from Bio import SwissProt
import re

fd = open('ceru_human.sp') # file descriptor (handle)
r = SwissProt.read(fd) # record from file
print r.entry_name
print r.sequence
PS00079 = 'G.[FYW].[LIVMFYW].[CST].{8,8}G[LM]...[LIVMFYW]' # pattern for regexp
p = re.compile(PS00079) # regular expression pattern object
m = p.search(r.sequence) # matching string in sequence
i =  m.start() # index of start of match
j = m.end() # index of end of match
print i
print j
print r.sequence[i:j] # print a slice of the sequence

Exemplo n.º 52
0
def get_swissrec(accession):
    handle = ExPASy.get_sprot_raw(accession)
    record = SwissProt.read(handle)
    return record
			#print "<br/>stringWithProteins:"+str(stringWithProteins)
			arrayWithProteins=stringWithProteins.split(",");
			#print "<br/>arrayWithProteins "+str(arrayWithProteins)
			#Recorremos el arrayWithProteins y las guardamos en la tabla proteins, si es que no estan. De cada proteina este o no este tenemos que guardar el id_protein para generar el string
			#con las id_proteinas que guardaremos en la tabla enzymes proteins mas adelante
			arrayWithIdProteins=[]
			for protein in arrayWithProteins:
				#obtenemos el proteinName para esta protein
				url="http://www.uniprot.org/uniprot/"+str(protein)+".txt"
				#print "<br/>"+url
				try:
					filehandle = urllib.urlopen(url)
				except:
					print "Location: "+str(redirectionKOcurated)+"error=UniprotConnectionProblem&idEvidence="+idEvidence+" \n\n"
					sys.exit()
				record = SwissProt.read(filehandle)
				#print dir(record)
				description=str(record.description)
				#En description podemos tener algo así: 
				#RecName: Full=Aspartate aminotransferase, mitochondrial; Short=mAspAT; EC=2.6.1.1; AltName: Full=Fatty acid-binding protein; Short=FABP-1; AltName: Full=Glutamate oxaloacetate transaminase 2; AltName: Full=Plasma membrane-associated fatty acid-binding protein; Short=FABPpm; AltName: Full=Transaminase A; Flags: Fragment; 
				#Nos quedamos con la primera parte.
				arrayNombres=description.split(";")
				proteinName=arrayNombres[0]
				#En proteinName ahora tenemos algo así: 
				##RecName: Full=Aspartate aminotransferase, mitochondrial
				#Tenemos que quitar la parte de RecName: Full=
				proteinName=proteinName.replace("RecName: Full=","")
				proteinName=proteinName.replace("SubName: Full=","")
				#Ya tenemos todos los datos para guardar la proteina
				filehandle.close()
				selectProtein="select id_protein, id_uniprot, protein_name from proteins where id_uniprot='"+str(protein)+"'"
Exemplo n.º 54
0
    print "*Using SequenceParser"
    test_handle = open(datafile)
    seq_record = SeqIO.read(test_handle, "swiss")
    test_handle.close()

    assert isinstance(seq_record, SeqRecord)

    print seq_record.id
    print seq_record.name
    print seq_record.description
    print repr(seq_record.seq)

    print "*Using RecordParser"
    test_handle = open(datafile)
    record = SwissProt.read(test_handle)
    test_handle.close()

    # test a couple of things on the record -- this is not exhaustive
    print record.entry_name
    print record.accessions
    print record.organism_classification
    print record.seqinfo
    
    print "***Features:"
    for feature in record.features:
        print feature

    print "***References:"
    for ref in record.references:
        print "authors:", ref.authors
Exemplo n.º 55
0
 PDB_subset_nochain = [x.split('_')[0] for x in PDB_subset]
 # parse the pdb headers for DBREF to uniprot
 pdb_to_uniprot = find_uniprot_in_pdb(PDB_subset_nochain, pdb_folder)
 #determine the uniprot references to fetch
 to_fetch = []
 for entry in pdb_to_uniprot.keys():
     for ref in pdb_to_uniprot[entry].values():
         if ref not in to_fetch:
             to_fetch.append(ref)
 #fetch uniprot references as Record objects, then move them to a serializable dict
 uniprot_records = {}
 uniprot_failed = []
 for ref in to_fetch:
     try:
         with ExPASy.get_sprot_raw(ref) as handle:
             uniprot_records[ref] = {'record' : SwissProt.read(handle)}
     except (HTTPError, ValueError):
         uniprot_failed.append(ref) #deprecated uniprot entries fail on urllib problems
 serialize(uniprot_records, uniprot_folder, uniprot_file)
 serialize(uniprot_failed, uniprot_folder, uniprot_file.replace('.', '_failed.'))
 
 ###### Let's get all the pdb xreferences from the uniprot entries we have, and put them in
 # a sensible data structure
 clean = uniprot_records.copy()
 for ref in list(uniprot_records.keys()):
     clean[ref]['xrefs'] = {}
     clean[ref]['seq'] = uniprot_records[ref]['record'].sequence
     for xref in uniprot_records[ref]['record'].cross_references:
         #xref format ('PDB', ....... ,'A=1-451') for what we want
         if xref[0] == 'PDB':
             # possible format (1): E=28-337, F=731-744 - this will raise ValueError
Exemplo n.º 56
0
# input CSV file should be one line per protein, in the format:
# [SwissProt ID],[Domain start residue],[Domain stop],[cDNA sequence]

import re, csv, sys
from Bio import ExPASy, SwissProt, SeqIO
from Bio.Seq import Seq
from Bio.Alphabet import IUPAC

reader = csv.reader(open('test.csv'))
extracted = []
j=0

for row in reader:
    input_prot = row[0]
    get_prot = ExPASy.get_sprot_raw(input_prot)
    prot_record = SwissProt.read(get_prot)
    get_prot.close()
    prot_seq = prot_record.sequence
    prot_gene = prot_record.gene_name
    prot_domain = prot_seq[int(row[1])-1:int(row[2])]
    cdna = Seq(row[3], IUPAC.unambiguous_dna)

    outputfile = open('cDNA_extracted.csv', 'w')
    writer = csv.writer(outputfile)
    i=0
    # Steps through each possible frame of the input cDNA
    while i < 3:
        frame = cdna[i::]
        trans = frame.translate()
        orf_find = re.search(str(prot_domain), str(trans))
        if orf_find:
Exemplo n.º 57
0
def SWAT(id):
    handle = ExPASy.get_sprot_raw(id) # several IDs can be separated by commas
    record = SwissProt.read(handle) # use SwissProt.parse for multiple proteins
    return record.sequence
Exemplo n.º 58
0
from Bio import SeqIO
from Bio import SwissProt

#Ler Ficheiro de Interesse
record = SeqIO.read("sequence.gb", "genbank")

#Associar referencia Swissprot a cada feature 
acess= {"lpg2594":"Q5ZSC5","lpg2608":"Q5ZSB1","lpg26158":"Q5ZSA4",
        "lpg2624":"Q5ZS95","lpg2636":"Q5ZS83","lpg2645":"Q5ZS74",
        "lpg2657":"Q5ZS62","lpg2709":"Q5ZS10","lpg2768":"Q5ZRV8",}

#Exportar informacao relevante
save_file = open("My_Swissprot.txt", "w")
save_file.write ("SWISSPROT REGIST" + "\n")
save_file.write ("\n")
for f in record.features:
    if f.type == "CDS" and acess.has_key(f.qualifiers["locus_tag"][0]): #Verifica se existe registo da feature no SwissProt
        handle = ExPASy.get_sprot_raw(acess[f.qualifiers["locus_tag"][0]])
        swiss_record = SwissProt.read(handle) #Cria objeto SwissProt.Record
        text1= "Gene name: " + f.qualifiers["locus_tag"][0] + "\n" + "Entry name: " + swiss_record.entry_name + "\n" 
        text2= "Sequence length: " + str(swiss_record.sequence_length)+ "\n" + "Organism: " + str(swiss_record.organism) + "\n"
        text3= "Organism Classification: " + str(swiss_record.organism_classification) + "\n" + "Taxonomic ID: " + str(swiss_record.taxonomy_id[0])+ "\n"
        text4= "Description: " + str(swiss_record.description).strip("RecName: Full=")+ "\n"
        save_file.write(text1+text2+text3+text4)
        save_file.write("\n")
        handle.close()
save_file.close()

#Terminar
print "Registo exportado com sucesso!"
Exemplo n.º 59
0
def getgo(id):
	handle = ExPASy.get_sprot_raw(id)
	record = SwissProt.read(handle)
	go = [r[2].split(":")[1] for r in record.cross_references if r[0] == "GO" and r[2].startswith('P')]
	print "\n".join(go)
Exemplo n.º 60
0
#!/usr/bin/python

from Bio import ExPASy
from Bio import SwissProt
handle = ExPASy.get_sprot_raw('B5ZC00') #you can give several IDs separated by commas
record = SwissProt.read(handle) # use SwissProt.parse for multiple proteins

print dir(record)