def start_element(name, attrs): debug('<' + name + '>' + str(attrs)) if name == SENTENCE: procSentStart() elif CHUNK.match(name): procChunkStart(name) elif name == TOKEN: try: pos = attrs[POS] except: pos = "PUNCT" try: markedEvent = attrs[MARKEDEVENT] except: markedEvent = None try: lemma = attrs[LEMMA] except: lemma = None procTokStart(pos, markedEvent, lemma) elif name == CHUNKHEAD: procChunkHeadStart() elif name == CHUNKPOSS: procPOSStart() currentDoc.addDocNode(startElementString(name, attrs))
def start_element(name, attrs): debug('<' + name + '>' + str(attrs)) if name == SENTENCE: procSentStart() elif CHUNK.match(name): procChunkStart(name) elif name == TOKEN: try: pos = attrs[POS] except: pos = "PUNCT" try: markedEvent = attrs[MARKEDEVENT] except: markedEvent = None try: lemma = attrs[LEMMA] except: lemma = None procTokStart(pos,markedEvent,lemma) elif name == CHUNKHEAD: procChunkHeadStart() elif name == CHUNKPOSS: procPOSStart() currentDoc.addDocNode(startElementString(name, attrs))
def start_element(name, attrs): #logger.debug("\n============\nNAME: "+str(name)+"\n============\n") #logger.debug("\nATTRS: "+str(attrs)+"\n") if name == SENTENCE: #print ">> SENTENCE" procSentStart() elif CHUNK.match(name): #print ">> CHUNK:", name procChunkStart(name) elif name == TOKEN: #print ">> TOKEN:", attrs if attrs.has_key(POS): pos = attrs[POS] else: pos = "PUNCT" procTokStart(pos) elif name == CHUNKHEAD: #print ">> CHUNKHEAD" procChunkHeadStart() elif CHUNKVERBAL.match(name): #print ">> CHUNKVERBAL:", name procChunkVerbalStart() elif name == CHUNKPOSS: #print ">> CHUNKPOSS" procPOSStart() elif name == EVENT: #print ">> EVENTS:", attrs procEventStart(attrs) elif name == INSTANCE: #print ">> INSTANCE:", attrs procInstanceStart(attrs) elif name == TIMEX: #print ">> TIMEX:", attrs procTimexStart(attrs) if name in EMPTY_TAGS: #print ">> EMPTY TAG:", name, attrs if name == 'MAKEINSTANCE': # to avoid confusion with the lex.pos attribute. # It is a bit hackish. posVal = attrs[EPOS] del attrs[EPOS] attrs[POS] = posVal currentDoc.addDocNode(emptyContentString(name, attrs)) elif name == TIMEML: pass else: #print "OTHER:", name, attrs currentDoc.addDocNode(startElementString(name, attrs))
def start_element(name, attrs): # logger.debug("\n============\nNAME: "+str(name)+"\n============\n") # logger.debug("\nATTRS: "+str(attrs)+"\n") if name == SENTENCE: # print ">> SENTENCE" procSentStart() elif CHUNK.match(name): # print ">> CHUNK:", name procChunkStart(name) elif name == TOKEN: # print ">> TOKEN:", attrs if attrs.has_key(POS): pos = attrs[POS] else: pos = "PUNCT" procTokStart(pos) elif name == CHUNKHEAD: # print ">> CHUNKHEAD" procChunkHeadStart() elif CHUNKVERBAL.match(name): # print ">> CHUNKVERBAL:", name procChunkVerbalStart() elif name == CHUNKPOSS: # print ">> CHUNKPOSS" procPOSStart() elif name == EVENT: # print ">> EVENTS:", attrs procEventStart(attrs) elif name == INSTANCE: # print ">> INSTANCE:", attrs procInstanceStart(attrs) elif name == TIMEX: # print ">> TIMEX:", attrs procTimexStart(attrs) if name in EMPTY_TAGS: # print ">> EMPTY TAG:", name, attrs if name == "MAKEINSTANCE": # to avoid confusion with the lex.pos attribute. # It is a bit hackish. posVal = attrs[EPOS] del attrs[EPOS] attrs[POS] = posVal currentDoc.addDocNode(emptyContentString(name, attrs)) elif name == TIMEML: pass else: # print "OTHER:", name, attrs currentDoc.addDocNode(startElementString(name, attrs))