示例#1
0
c_repo = arepa.cwd()
c_strTaxid = c_repo + "_taxid_"
c_strMode = "mode_"


def symbol(hashSymbols, strValue):
    return hashSymbols.setdefault(strValue, len(hashSymbols))


if len(sys.argv) < 1:
    raise Exception("Usage: string2c.py [taxa] < <string.txt>")
iMin = int(sys.argv[1])
strTaxa = None if (len(sys.argv) <= 2) else sys.argv[2]

setTaxa = arepa.taxa(strTaxa)

hashSymbols = {}
hashTaxa = {}
fFirst = True
for astrLine in csv.reader(sys.stdin, csv.excel_tab):
    if astrLine and astrLine[0].startswith("#"):
        continue
    if fFirst:
        fFirst = False
        continue
    strA, strB, strMode, strAction, strActor, strScore = astrLine
    strTax1, strTax2 = (re.sub(r'\..*$', "", s) for s in (strA, strB))
    if not strTax1 or (strTax1 != strTax2):
        strTax1 = "0"
    if setTaxa and (strTax1 not in setTaxa):
示例#2
0
        if strName == "experiment":
            self.m_strAccession = self.m_strSpecies = None

    def endElement(self, strName):

        if ( strName == "experiment" ) and self.m_strAccession and self.m_strSpecies and \
         ( ( not self.m_setTaxa ) or ( self.m_strSpecies in self.m_setTaxa ) ):
            self.m_setIDs.add((self.m_strAccession, self.m_strSpecies))

    def characters(self, strText):

        if self.m_strTag == "accession":
            if not self.m_strAccession:
                self.m_strAccession = strText
        elif self.m_strTag == "species":
            if not self.m_strSpecies:
                self.m_strSpecies = strText

    def endDocument(self):

        print(("\n".join("\t".join(astrCur) for astrCur in self.m_setIDs)))


strTaxa = None if (len(sys.argv) <= 1) else sys.argv[1]

setTaxa = arepa.taxa(strTaxa, True)

pSAX = xml.sax.make_parser()
pSAX.setContentHandler(CParser(setTaxa))
pSAX.parse(sys.stdin)