# Load document context = etree.iterparse(file_name, events=('end',), remove_blank_text=True) annotations_tag = '{http://ilk.uvt.nl/folia}annotations' sentence_tag = '{http://ilk.uvt.nl/folia}s' word_tag = '{http://ilk.uvt.nl/folia}w' text_content_tag = '{http://ilk.uvt.nl/folia}t' for event, elem in context: if elem.tag == annotations_tag: # add entity-annotation for liwc annotation_attrs = { 'annotator': 'liwc', 'annotatortype': 'auto', 'datetime': datetime.now().isoformat(), 'set': 'liwc-set' } etree.SubElement(elem, 'entity-annotation', annotation_attrs) if elem.tag == sentence_tag: words = elem.findall(word_tag) for word in words: w = word.find(text_content_tag).text if w in liwc_dict.keys(): for cat in liwc_dict[w]: cat_label = 'liwc-{}'.format(liwc_categories[cat]) add_entity(elem, cat_label, [word], text_content_tag) write_folia_file(context, file_name, dir_out, 'liwc')
entity_annotations = elem.findall(entity_annotations_tag) for element in entity_annotations: if element.attrib.get("annotator") == annotator_name: add_annotation_tag = False # add entity-annotation for embodied emotions if add_annotation_tag: annotation_attrs = { "annotator": annotator_name, "annotatortype": "manual", "datetime": datetime.now().isoformat(), "set": annotator_set, } etree.SubElement(elem, "entity-annotation", annotation_attrs) if elem.tag == sentence_tag: words = elem.findall(word_tag) for word in words: w_id = word.attrib.get(id_tag) if w_id in word_id2annotations.keys(): # print w_id for annotation in word_id2annotations[w_id]: cat_label = "EmbodiedEmotions-{}".format(annotation.folia_entity_class()) add_entity(elem, cat_label, None, None, annotation) num_annotations_added += 1 print " Added {} entities".format(num_annotations_added) with open(folia_file, "w") as f: f.write(etree.tostring(context.root, encoding="utf8", xml_declaration=True, pretty_print=True))