示例#1
0
 def process(self, cas):
     self.f.write('<document id=' + str(cas.document_id) + '>\n')
     self.f.write('\t<text>' + cas.artifact + '</text>\n')
     self.f.write('\t<annotations>\n')
     for annot in CasUtil.get_all_annotations(cas):
         xml = '\t\t<annotation'
         xml += ' begin=' + str(annot.begin)
         xml += ' end=' + str(annot.end)
         xml += ' type=' + annot.type if annot.type else ''
         xml += ' value=' + str(annot.value) if annot.value else ''
         xml += ' />\n'
         self.f.write(xml)
     self.f.write('\t</annotations>\n')
     self.f.write('</document>\n\n')
示例#2
0
    def process(self, cas):
        lang = next(CasUtil.get_annotations(cas, "Language"))

        if lang.value != "de":
            return

        filtered_token = []
        for annot in CasUtil.get_all_annotations(cas):
            self.add_to_filtered_token(annot, filtered_token)

        for t in filtered_token:
            self.unique_token[t.lower()] = 1 if CasUtil.has_annotation(cas, t, 'NER') else 0

        self.write_output_files(cas, filtered_token)
示例#3
0
 def process(self, cas):
     print("Artifact:", cas.artifact)
     for annot in CasUtil.get_all_annotations(cas):
         print(annot, annot.get_covered_text())