Exemplo n.º 1
0
def getUNIPid(line):
    seqid = list()
    seqid.append(line[11:line[0].find(']') - 1])
    pairs = uniprot.batch_uniprot_id_mapping_pairs('P_REFSEQ_AC', 'ACC', seqid)
    reviewedpair = ""
    revStat = 0
    #Cette partie du code sert a determiner qu'elle des uniprotID sont reviewed
    #et va conserver le premier de la liste
    for y in pairs:
        uniCode = y[1]
        req = urllib2.Request(
            'http://www.uniprot.org/uniprot/?query={}&sort=score&columns=reviewed&format=tab'
            .format(uniCode))
        web = urllib2.urlopen(req)
        for i in web:
            if i.find("Status") == -1:
                if i.find("unreviewed") == -1:
                    if revStat == 0:
                        reviewedpair = y[1]
                        revStat = 1
                        break
                elif reviewedpair == "":
                    reviewedpair = y[1]

    return [GetGoAnnotation(reviewedpair)]
Exemplo n.º 2
0
def map_to_refseq(seqids):
    uniprot_mapping = uniprot.sequentially_convert_to_uniprot_id(seqids, "func.cache.json")
    uniprot_ids = uniprot_mapping.values()
    pairs = uniprot.batch_uniprot_id_mapping_pairs("ACC", "P_REFSEQ_AC", uniprot_ids)
    mapping = {}
    for seqid in seqids:
        if seqid in uniprot_mapping:
            uniprot_id = uniprot_mapping[seqid]
        for pair in pairs:
            if uniprot_id == pair[0]:
                mapping[seqid] = pair[1]
    os.remove("func.cache.json")
    return mapping
Exemplo n.º 3
0
def map_to_refseq(seqids):
  uniprot_mapping = uniprot.sequentially_convert_to_uniprot_id(
      seqids, 'func.cache.json')
  uniprot_ids = uniprot_mapping.values()
  pairs = uniprot.batch_uniprot_id_mapping_pairs(
    'ACC', 'P_REFSEQ_AC', uniprot_ids)
  mapping = {}  
  for seqid in seqids:
    if seqid in uniprot_mapping:
      uniprot_id = uniprot_mapping[seqid]
    for pair in pairs:
      if uniprot_id == pair[0]: 
        mapping[seqid] = pair[1]
  os.remove('func.cache.json')
  return mapping
Exemplo n.º 4
0
def getUNIPid(line):
  seqid = list()
  seqid.append(line[11:line[0].find(']')-1])
  pairs = uniprot.batch_uniprot_id_mapping_pairs('P_REFSEQ_AC','ACC',seqid)
  reviewedpair = ""
  revStat= 0
  #Cette partie du code sert a determiner qu'elle des uniprotID sont reviewed
  #et va conserver le premier de la liste
  for y in pairs:
      uniCode = y[1]
      req = urllib2.Request('http://www.uniprot.org/uniprot/?query={}&sort=score&columns=reviewed&format=tab'.format(uniCode))
      web = urllib2.urlopen(req)
      for i in web:
          if i.find("Status") == -1:
              if i.find("unreviewed") == -1:
                  if revStat == 0 :
                      reviewedpair = y[1]
                      revStat = 1
                      break
              elif reviewedpair == "" :
                  reviewedpair = y[1]


  return[GetGoAnnotation(reviewedpair)]
Exemplo n.º 5
0
import os
import uniprot
import pprint
import sys

# Clean up caches
os.system('rm cache*')

# Example 1 - reading a fasta file
seqids, fastas = uniprot.read_fasta('example.fasta')
pprint.pprint(seqids, indent=2)

# Example 2 - map identifiers for RefSeq to Uniprot
seqids = "NP_000508.1  NP_001018081.3".split()
pairs = uniprot.batch_uniprot_id_mapping_pairs('P_REFSEQ_AC', 'ACC', seqids)
pprint.pprint(pairs, indent=2)

# Example 2 - get UniProt metadata
uniprot_seqids = [j for i, j in pairs]
uniprot_data = uniprot.batch_uniprot_metadata(uniprot_seqids, 'cache')
pprint.pprint(uniprot_data, indent=2)

# Example 3 - parse for isoforms in metadata
text = open('cache/metadata.0.txt').read()
uniprot_data = uniprot.parse_isoforms(text)
pprint.pprint(uniprot_data)

# Example 4 - chaining commands to map seqids
seqids = "EFG_MYCA1 YP_885981.1 ENSG00000196176 Q91ZU6-8".split()
uniprot_data = uniprot.get_metadata_with_some_seqid_conversions(
    seqids, 'cache2')
Exemplo n.º 6
0

# Example 1 - reading a fasta file

seqids, fastas = uniprot.read_fasta("example.fasta")
pprint.pprint(seqids, indent=2)


# Example 2 - batch read identifier mappings with
# prespecified identifier types

seqids = """
NP_000508.1  NP_001018081.3
""".split()

pairs = uniprot.batch_uniprot_id_mapping_pairs("P_REFSEQ_AC", "ACC", seqids)

pprint.pprint(pairs, indent=2)


# Example 3 - sequential identifier mapping to UniProt
# identifiers using robust but slow method

seqids = """
EFG_MYCA1 YP_885981.1 CpC231_1796
""".split()

mapping = uniprot.sequentially_convert_to_uniprot_id(seqids, "cache.json")

uniprot_seqids = mapping.values()
Exemplo n.º 7
0
import os
import uniprot
import pprint
import sys

# Clean up caches
os.system('rm cache*')

# Example 1 - reading a fasta file
seqids, fastas = uniprot.read_fasta('example.fasta')
pprint.pprint(seqids, indent=2)

# Example 2 - map identifiers for RefSeq to Uniprot
seqids = "NP_000508.1  NP_001018081.3".split()
pairs = uniprot.batch_uniprot_id_mapping_pairs(
  'P_REFSEQ_AC', 'ACC', seqids)
pprint.pprint(pairs, indent=2)

# Example 2 - get UniProt metadata
uniprot_seqids = [j for i,j in pairs]
uniprot_data = uniprot.batch_uniprot_metadata(
    uniprot_seqids, 'cache')
pprint.pprint(uniprot_data, indent=2)

# Example 3 - parse for isoforms in metadata
text = open('cache/metadata.0.txt').read()
uniprot_data = uniprot.parse_isoforms(text)
pprint.pprint(uniprot_data)

# Example 4 - chaining commands to map seqids
seqids = "EFG_MYCA1 YP_885981.1 ENSG00000196176 Q91ZU6-8".split()