c_repo = arepa.cwd() c_strTaxid = c_repo + "_taxid_" c_strMode = "mode_" def symbol(hashSymbols, strValue): return hashSymbols.setdefault(strValue, len(hashSymbols)) if len(sys.argv) < 1: raise Exception("Usage: string2c.py [taxa] < <string.txt>") iMin = int(sys.argv[1]) strTaxa = None if (len(sys.argv) <= 2) else sys.argv[2] setTaxa = arepa.taxa(strTaxa) hashSymbols = {} hashTaxa = {} fFirst = True for astrLine in csv.reader(sys.stdin, csv.excel_tab): if astrLine and astrLine[0].startswith("#"): continue if fFirst: fFirst = False continue strA, strB, strMode, strAction, strActor, strScore = astrLine strTax1, strTax2 = (re.sub(r'\..*$', "", s) for s in (strA, strB)) if not strTax1 or (strTax1 != strTax2): strTax1 = "0" if setTaxa and (strTax1 not in setTaxa):
if strName == "experiment": self.m_strAccession = self.m_strSpecies = None def endElement(self, strName): if ( strName == "experiment" ) and self.m_strAccession and self.m_strSpecies and \ ( ( not self.m_setTaxa ) or ( self.m_strSpecies in self.m_setTaxa ) ): self.m_setIDs.add((self.m_strAccession, self.m_strSpecies)) def characters(self, strText): if self.m_strTag == "accession": if not self.m_strAccession: self.m_strAccession = strText elif self.m_strTag == "species": if not self.m_strSpecies: self.m_strSpecies = strText def endDocument(self): print(("\n".join("\t".join(astrCur) for astrCur in self.m_setIDs))) strTaxa = None if (len(sys.argv) <= 1) else sys.argv[1] setTaxa = arepa.taxa(strTaxa, True) pSAX = xml.sax.make_parser() pSAX.setContentHandler(CParser(setTaxa)) pSAX.parse(sys.stdin)