added_synsets = set() # loop through Synsets for synset_obj in old.synsets_get_generator(): ili = synset_obj.get_ili() synset_id = synset_obj.get_id() if all([ili is not None, synset_id]): added_synsets.add(synset_id) if synset_id in odwn_ids: added_synsets.add(synset_id) # add LexicalEntries added_sense_ids = set() for counter, le_obj in enumerate(old.les_get_generator()): # mw not taken into account synset_id = le_obj.get_synset_id() sense_id = le_obj.get_sense_id() if all([synset_id, sense_id not in added_sense_ids]): if synset_id in added_synsets: # TODO: add sense examples lexical_entry_el = etree.SubElement(lexicon_el, 'LexicalEntry', attrib={'id': 'w%s' % counter}) etree.SubElement(lexical_entry_el, 'Lemma', attrib={'writtenForm': le_obj.get_lemma(), 'partOfSpeech': le_obj.get_pos()[0]})
'user_input', 'Anneleen', 'synsets_5_10.bin') with open(annotation_path,'rb') as infile: annotation = pickle.load(infile) #STEP X: remove le_objs to_remove = set() [to_remove.update(value['le_ids_to_remove']) for value in annotation.values() if 'le_ids_to_remove' in value] num_to_remove = len(to_remove) logger.info('%s le ids found to remove' % num_to_remove) for le_obj in my_parser.les_get_generator(): le_id = le_obj.get_id() if le_id in to_remove: le_obj.remove_me() #STEP X: RUN STATS my_parser.get_stats(verbose=True) #STEP X: export it to version 1.2 my_parser.export(output_path) logger.info('finished conversion')
added_synsets = set() # loop through Synsets for synset_obj in old.synsets_get_generator(): ili = synset_obj.get_ili() synset_id = synset_obj.get_id() if all([ili is not None, synset_id]): added_synsets.add(synset_id) if synset_id in odwn_ids: added_synsets.add(synset_id) # add LexicalEntries added_sense_ids = set() for counter, le_obj in enumerate(old.les_get_generator()): # mw not taken into account synset_id = le_obj.get_synset_id() sense_id = le_obj.get_sense_id() if all([synset_id, sense_id not in added_sense_ids]): if synset_id in added_synsets: # TODO: add sense examples lexical_entry_el = etree.SubElement(lexicon_el, 'LexicalEntry', attrib={'id': 'w%s' % counter}) etree.SubElement(lexical_entry_el, 'Lemma', attrib={'writtenForm': le_obj.get_lemma(), 'partOfSpeech': le_obj.get_pos()[0]})