Пример #1
0
 def test_condense_xml_entry(self):
     #read in rawuniprotentry.xml
     for entry in self.root:
         refparse.condense_xml_entry(entry)
         self.assertNotEqual(self.entry.find(UP+'accession'), None)
         self.assertTrue(self.entry.find(UP+'accession').text not in [None, ''])
         self.assertNotEqual(self.entry.find(UP+'name'), None)
         self.assertTrue(self.entry.find(UP+'name').text not in [None, ''])
         self.assertNotEqual(self.entry.find(UP+'protein').find(UP+'recommendedName').find(UP+'fullName'), None)
         self.assertTrue(self.entry.find(UP+'protein').find(UP+'recommendedName').find(UP+'fullName').text not in [None, ''])
         self.assertNotEqual(self.entry.find(UP+'organism'), None)
         self.assertNotEqual(self.entry.find(UP+'proteinExistence'), None)
         self.assertNotEqual(self.entry.find(UP+'proteinExistence').find(UP+'depth'), None)
         self.assertNotEqual(self.entry.find(UP+'sequence'), None)
         self.assertTrue(self.entry.find(UP+'sequence').text not in [None, ''])
Пример #2
0
    except Exception, e:
        print >> sys.stderr, "failed: %s" % e
        exit(2)

    #Reference XML/FASTA
    ensembl, uniprot = None, None
    if options.reference_xml != None:
        try:
            refXml = os.path.abspath(options.reference_xml)
            refXml = open(refXml, 'r')
            p = et.XMLParser(
                remove_blank_text=True)  #required for pretty additions
            uniprot = et.parse(refXml, p)
            uniprot_root = uniprot.getroot()
            for entry in uniprot_root:
                refparse.condense_xml_entry(entry)
        except Exception, e:
            print >> sys.stderr, "Parsing and/or condensing reference xml failed: %s" % e
            exit(2)
    else:
        print >> sys.stderr, "failed: no UniProt reference protein database specified"

    #Read the Ensembl fasta into an XML structure
    if options.protein_fasta != None:
        try:
            refFasta = os.path.abspath(options.protein_fasta)
            refFasta = open(refFasta, 'r')
            ensembl_root = et.Element(UP + 'uniprot', nsmap=NAMESPACE_MAP)
            ensembl = et.ElementTree(ensembl_root)
            refparse.read_fasta_to_xml(
                ensembl_root, refFasta)  #TODO: singluarize the entry here
     protein_fasta = open(protein_fasta, 'r')
     protein_fasta = refparse.read_protein_fasta(protein_fasta)
 except Exception, e:
     print >> sys.stderr, "failed: %s" % e
     exit(2)
 
 #Reference XML/FASTA
 if options.reference_xml != None:
     try:
         refXml = os.path.abspath(options.reference_xml)
         refXml = open(refXml, 'r')
         p = et.XMLParser(remove_blank_text=True) #required for pretty additions
         db = et.parse(refXml, p)
         root = db.getroot()
         for entry in root:
             refparse.condense_xml_entry(entry)
     except Exception, e:
         print >> sys.stderr, "Parsing and/or condensing reference xml failed: %s" % e
         exit(2)
 elif options.protein_fasta != None:
     try:
         refFasta = os.path.abspath(options.protein_fasta)
         refFasta = open(refFasta, 'r')
         root = et.Element(UP+'uniprot', nsmap=NAMESPACE_MAP)
         db = et.ElementTree(root)
         refparse.read_fasta_to_xml(root, refFasta)
     except Exception, e:
         print >> sys.stderr, "Parsing reference fasta failed: %s" %e
         exit(2)
 else:
     print >> sys.stderr, "failed: no reference database specified"