def __init__(self): super(WMTestSetupWithOMG, self).__init__() self.ontology = Ontology() self.ontology.load_from_yaml_with_metadata(self.yaml_path) self.ontology.init_embeddings(self.embeddings_file) self.ontology.add_embeddings_to_nodes() self.ontology_mapper = OntologyMapper() self.ontology_mapper.load_ontology(self.internal_yaml_path) self.grounder = Grounder(self.ontology, self.min_groundings) self.grounder.build_grounder(self.keywords, self.stopwords) self.grounder.specify_user_root_path(self.root_path)
def __init__(self): super(CXTestSetupWithOMG, self).__init__() self.mapper = OntologyMapper() self.mapper.load_ontology(self.internal_yaml_path) self.ontology = Ontology() self.ontology.load_from_internal_yaml(self.internal_yaml_path) exemplars = Ontology.load_internal_exemplars(self.exemplars_json) self.ontology.add_internal_exemplars(exemplars, None, None) self.ontology.init_embeddings(self.embeddings_file) self.ontology.add_embeddings_to_nodes() centroids = utils.load_json(self.centroids_json) self.ontology.add_node_contextual_embeddings_with_name(centroids) self.grounder = Grounder(self.ontology, self.min_groundings)
def load_model(self, yaml_string=None): if yaml_string is None: # default / starting ontology yaml with io.open(self._ontology_yaml, 'r', encoding='utf8') as yaml_file_handle: yaml_string = yaml_file_handle.read() if self.model_loaded: # reuse the ontology's embeddings lookup ontology = self.model.get_ontology() last_time = time.time() print('Loading ontology from string...', end=' ') ontology.load_from_yaml_string_with_metadata(yaml_string) self.model.flush_cache() else: # build from scratch ontology = Ontology() last_time = time.time() print('Loading embeddings...', end=' ') ontology.init_embeddings(self._embeddings_file) this_time = time.time() print('{}s\nLoading ontology from string...'.format(this_time - last_time), end=' ') last_time = this_time ontology.load_from_yaml_string_with_metadata(yaml_string) # mapper to add internal exemplars when possible ontology_mapper = OntologyMapper() ontology_mapper.load_ontology(self._internal_ontology_file) self.mapper = ontology_mapper # exemplars = Ontology.load_internal_exemplars(self._internal_exemplars) # ontology.add_internal_exemplars(exemplars, self.mapper, "WM") this_time = time.time() print('{}s\nBuilding ontology embeddings...'.format(this_time - last_time), end=' ') ontology.add_embeddings_to_nodes() # load keywords keywords = utils.read_keywords_from_bbn_annotation( '{}/resource/ontologies/internal/hume/' 'keywords-anchor_annotation.txt'.format(project_root)) # load stopwords stopwords = utils.read_stopwords_from_hume_resources( "{}/resource/ontologies/internal/hume/stopwords.list".format( project_root)) self.model = Grounder(ontology, self._min_groundings) #self.model.specify_user_root_path(user_specified_root) self.model.build_grounder(keywords, stopwords) self.model_loaded = True
def main(): def build_node_set(input_set): def print_tree(root): input_set.add(root.path) return print_tree # internal_ontology = Ontology() # internal_ontology_path = "/home/hqiu/ld100/Hume_pipeline/Hume/resource/ontologies/internal/hume/event_ontology.yaml" # internal_to_external_map = dict() # internal_ontology.load_from_internal_yaml(internal_ontology_path,ontology_map=internal_to_external_map) # tree_iterator(internal_ontology.get_root(),print_tree) # print(internal_to_external_map) external_ontology = Ontology() external_ontology_path = "/home/criley/repos/WM_Ontologies/wm_with_flattened_interventions_metadata.yml" external_ontology.load_from_yaml_with_metadata(external_ontology_path) blacklisted_set = set() for nodes in [external_ontology.get_nodes_by_name(i) for i in {"migration","human_displacement","interventions","intervention"}]: for node in nodes: tree_iterator(node,build_node_set(blacklisted_set)) for i in sorted(blacklisted_set): print("\"{}\",".format(i))
def read_examplers(internal_ontology_path, exemplars_path, ontology_flag): ontology = Ontology() ontology.load_from_internal_yaml(internal_ontology_path) exemplars = ontology.load_internal_exemplars(exemplars_path) ontology.add_internal_exemplars(exemplars, None, ontology_flag) return ontology
def __init__(self, max_number_of_tokens_per_sentence: int, which_ontology, keywords, stopwords, event_ontology_yaml, internal_ontology_yaml, grounding_mode, embeddings, exemplars, bert_centroids, bert_npz_file_list, n_best: int, only_use_bert_from_root: bool, blacklist, event_mention_typing_field, threshold: float, **kwargs): self.counter = defaultdict(int) self.ontology = Ontology() self.ontology_mapper = OntologyMapper() self.max_number_of_tokens_per_sentence = max_number_of_tokens_per_sentence self.which_ontology = which_ontology self.event_mention_typing_field = SerifEventMentionTypingField( event_mention_typing_field) self.threshold = threshold self.only_use_bert_from_root = only_use_bert_from_root self.n_best = n_best self.bert_docid_to_npz = None if self.which_ontology in ONTOLOGIES_REQUIRING_PRE_MAP: if "compositional" in event_ontology_yaml.lower(): root_path = "/wm_compositional/process" # TODO don't hard-code else: root_path = "/wm/concept/causal_factor" # TODO don't hard-code kws = utils.read_keywords_from_bbn_annotation(keywords) stops = utils.read_stopwords_from_hume_resources(stopwords) self.ontology.load_from_yaml_with_metadata(event_ontology_yaml) self.ontology_mapper.load_ontology(internal_ontology_yaml) # bert_function = ontology.add_node_contextual_embeddings_with_mapper # bert_function_args = [ontology_mapper, args.which_ontology] if grounding_mode == 'fast': # simply map entry points to external ontology self.force_entry_points_into_groundings = True self.grounding_modes = [ ] # no modes --> no groundings but entry points elif grounding_mode == 'medium': # Same as Full mode, currently. The similarity mode specified by # utils.SimilarityMode.COMPARE_MENTION_KEYWORDS_TO_EXEMPLARS_AVG # uses the embeddings associated with wm_metadata.yml file exemplars # --these are currently always static. self.force_entry_points_into_groundings = False self.grounding_modes = [ utils.SimilarityMode. COMPARE_MENTION_KEYWORDS_TO_EXEMPLARS_AVG, utils.SimilarityMode. COMPARE_MENTION_STRING_TO_EXEMPLARS_AVG, utils.SimilarityMode.COMPARE_MENTION_STRING_TO_TYPE_NAME, ] self.ontology.init_embeddings(embeddings) elif grounding_mode == 'full': self.force_entry_points_into_groundings = False self.grounding_modes = [ utils.SimilarityMode. COMPARE_MENTION_KEYWORDS_TO_EXEMPLARS_AVG, utils.SimilarityMode. COMPARE_MENTION_STRING_TO_EXEMPLARS_AVG, utils.SimilarityMode.COMPARE_MENTION_STRING_TO_TYPE_NAME, ] self.ontology.init_embeddings(embeddings) else: raise NotImplementedError( "No supported {}".format(grounding_mode)) else: root_path = None kws = {} stops = utils.read_stopwords_from_hume_resources(stopwords) self.ontology_mapper.load_ontology(event_ontology_yaml) self.ontology.load_from_internal_yaml(event_ontology_yaml) # simply map entry points to external ontology if grounding_mode == 'fast': self.force_entry_points_into_groundings = True self.grounding_modes = [ ] # no modes --> no groundings but entry points elif grounding_mode == 'medium': self.force_entry_points_into_groundings = True self.grounding_modes = [ utils.SimilarityMode. COMPARE_MENTION_STRING_TO_EXEMPLARS_AVG, utils.SimilarityMode.COMPARE_MENTION_STRING_TO_TYPE_NAME, ] self.ontology.add_internal_exemplars_from_json( exemplars, None, self.which_ontology) self.ontology.init_embeddings(embeddings) elif grounding_mode == 'full': self.force_entry_points_into_groundings = True self.grounding_modes = [ utils.SimilarityMode. COMPARE_MENTION_KEYWORDS_TO_EXAMPLES_AVG_USING_BERT, utils.SimilarityMode. COMPARE_MENTION_STRING_TO_EXEMPLARS_AVG, utils.SimilarityMode.COMPARE_MENTION_STRING_TO_TYPE_NAME, ] self.ontology.add_internal_exemplars_from_json( exemplars, None, self.which_ontology) self.ontology.init_embeddings(embeddings) internal_name_to_float_list = utils.load_json(bert_centroids) self.ontology.add_node_contextual_embeddings_with_name( internal_name_to_float_list) self.bert_docid_to_npz = utils.build_docid_to_npz_map( bert_npz_file_list) elif grounding_mode == 'centroids_only': self.force_entry_points_into_groundings = True self.grounding_modes = [ utils.SimilarityMode. COMPARE_MENTION_KEYWORDS_TO_EXAMPLES_AVG_USING_BERT ] internal_name_to_float_list = utils.load_json(bert_centroids) self.ontology.add_node_contextual_embeddings_with_name( internal_name_to_float_list) self.bert_docid_to_npz = utils.build_docid_to_npz_map( bert_npz_file_list) else: raise NotImplementedError( "No supported {}".format(grounding_mode)) logger.info( "Mode: {}\nForcing entry points into output: {}\nSimilarity modes: {}" .format(grounding_mode, self.force_entry_points_into_groundings, self.grounding_modes)) self.top_k = self.n_best # due to potential for duplication, let's keep extras if self.which_ontology in ONTOLOGIES_REQUIRING_POST_MAP: self.top_k = self.top_k * 3 self.grounder = Grounder(self.ontology, self.top_k) self.grounder.build_grounder(kws, stops) self.grounder.specify_user_root_path(root_path) self.remove_zero_score_groundings = False if self.only_use_bert_from_root is True: self.grounding_modes = [ (utils.SimilarityMode. COMPARE_MENTION_KEYWORDS_TO_EXAMPLES_AVG_USING_BERT) ] self.force_entry_points_into_groundings = True self.remove_zero_score_groundings = True self.grounding_blacklist = set() if os.path.isfile(blacklist): with open(blacklist, 'r', encoding='utf8') as blacklist_fh: self.grounding_blacklist = set(json.load(blacklist_fh)) self.cache = {}
class SerifXMLGrounder(object): def __init__(self, max_number_of_tokens_per_sentence: int, which_ontology, keywords, stopwords, event_ontology_yaml, internal_ontology_yaml, grounding_mode, embeddings, exemplars, bert_centroids, bert_npz_file_list, n_best: int, only_use_bert_from_root: bool, blacklist, event_mention_typing_field, threshold: float, **kwargs): self.counter = defaultdict(int) self.ontology = Ontology() self.ontology_mapper = OntologyMapper() self.max_number_of_tokens_per_sentence = max_number_of_tokens_per_sentence self.which_ontology = which_ontology self.event_mention_typing_field = SerifEventMentionTypingField( event_mention_typing_field) self.threshold = threshold self.only_use_bert_from_root = only_use_bert_from_root self.n_best = n_best self.bert_docid_to_npz = None if self.which_ontology in ONTOLOGIES_REQUIRING_PRE_MAP: if "compositional" in event_ontology_yaml.lower(): root_path = "/wm_compositional/process" # TODO don't hard-code else: root_path = "/wm/concept/causal_factor" # TODO don't hard-code kws = utils.read_keywords_from_bbn_annotation(keywords) stops = utils.read_stopwords_from_hume_resources(stopwords) self.ontology.load_from_yaml_with_metadata(event_ontology_yaml) self.ontology_mapper.load_ontology(internal_ontology_yaml) # bert_function = ontology.add_node_contextual_embeddings_with_mapper # bert_function_args = [ontology_mapper, args.which_ontology] if grounding_mode == 'fast': # simply map entry points to external ontology self.force_entry_points_into_groundings = True self.grounding_modes = [ ] # no modes --> no groundings but entry points elif grounding_mode == 'medium': # Same as Full mode, currently. The similarity mode specified by # utils.SimilarityMode.COMPARE_MENTION_KEYWORDS_TO_EXEMPLARS_AVG # uses the embeddings associated with wm_metadata.yml file exemplars # --these are currently always static. self.force_entry_points_into_groundings = False self.grounding_modes = [ utils.SimilarityMode. COMPARE_MENTION_KEYWORDS_TO_EXEMPLARS_AVG, utils.SimilarityMode. COMPARE_MENTION_STRING_TO_EXEMPLARS_AVG, utils.SimilarityMode.COMPARE_MENTION_STRING_TO_TYPE_NAME, ] self.ontology.init_embeddings(embeddings) elif grounding_mode == 'full': self.force_entry_points_into_groundings = False self.grounding_modes = [ utils.SimilarityMode. COMPARE_MENTION_KEYWORDS_TO_EXEMPLARS_AVG, utils.SimilarityMode. COMPARE_MENTION_STRING_TO_EXEMPLARS_AVG, utils.SimilarityMode.COMPARE_MENTION_STRING_TO_TYPE_NAME, ] self.ontology.init_embeddings(embeddings) else: raise NotImplementedError( "No supported {}".format(grounding_mode)) else: root_path = None kws = {} stops = utils.read_stopwords_from_hume_resources(stopwords) self.ontology_mapper.load_ontology(event_ontology_yaml) self.ontology.load_from_internal_yaml(event_ontology_yaml) # simply map entry points to external ontology if grounding_mode == 'fast': self.force_entry_points_into_groundings = True self.grounding_modes = [ ] # no modes --> no groundings but entry points elif grounding_mode == 'medium': self.force_entry_points_into_groundings = True self.grounding_modes = [ utils.SimilarityMode. COMPARE_MENTION_STRING_TO_EXEMPLARS_AVG, utils.SimilarityMode.COMPARE_MENTION_STRING_TO_TYPE_NAME, ] self.ontology.add_internal_exemplars_from_json( exemplars, None, self.which_ontology) self.ontology.init_embeddings(embeddings) elif grounding_mode == 'full': self.force_entry_points_into_groundings = True self.grounding_modes = [ utils.SimilarityMode. COMPARE_MENTION_KEYWORDS_TO_EXAMPLES_AVG_USING_BERT, utils.SimilarityMode. COMPARE_MENTION_STRING_TO_EXEMPLARS_AVG, utils.SimilarityMode.COMPARE_MENTION_STRING_TO_TYPE_NAME, ] self.ontology.add_internal_exemplars_from_json( exemplars, None, self.which_ontology) self.ontology.init_embeddings(embeddings) internal_name_to_float_list = utils.load_json(bert_centroids) self.ontology.add_node_contextual_embeddings_with_name( internal_name_to_float_list) self.bert_docid_to_npz = utils.build_docid_to_npz_map( bert_npz_file_list) elif grounding_mode == 'centroids_only': self.force_entry_points_into_groundings = True self.grounding_modes = [ utils.SimilarityMode. COMPARE_MENTION_KEYWORDS_TO_EXAMPLES_AVG_USING_BERT ] internal_name_to_float_list = utils.load_json(bert_centroids) self.ontology.add_node_contextual_embeddings_with_name( internal_name_to_float_list) self.bert_docid_to_npz = utils.build_docid_to_npz_map( bert_npz_file_list) else: raise NotImplementedError( "No supported {}".format(grounding_mode)) logger.info( "Mode: {}\nForcing entry points into output: {}\nSimilarity modes: {}" .format(grounding_mode, self.force_entry_points_into_groundings, self.grounding_modes)) self.top_k = self.n_best # due to potential for duplication, let's keep extras if self.which_ontology in ONTOLOGIES_REQUIRING_POST_MAP: self.top_k = self.top_k * 3 self.grounder = Grounder(self.ontology, self.top_k) self.grounder.build_grounder(kws, stops) self.grounder.specify_user_root_path(root_path) self.remove_zero_score_groundings = False if self.only_use_bert_from_root is True: self.grounding_modes = [ (utils.SimilarityMode. COMPARE_MENTION_KEYWORDS_TO_EXAMPLES_AVG_USING_BERT) ] self.force_entry_points_into_groundings = True self.remove_zero_score_groundings = True self.grounding_blacklist = set() if os.path.isfile(blacklist): with open(blacklist, 'r', encoding='utf8') as blacklist_fh: self.grounding_blacklist = set(json.load(blacklist_fh)) self.cache = {} def process_doc(self, serif_doc): docid, mentions_and_entry_points, serif_doc = self.read_serifxml_event_mentions( serif_doc) # "grounding cache" format for (mention_candidate, entry_points, serifxml_entry_types, serif_em) in mentions_and_entry_points: if len(entry_points) == 0 and len(serifxml_entry_types) > 0: # map blacklisted node without grounding logger.warning( "Mapping blacklisted node without grounding. Scores may " "be unexpected.") grounded_classes = [] for blacklisted_type in serifxml_entry_types: blacklisted_paths = self.ontology_mapper.look_up_external_types( blacklisted_type.event_type, self.which_ontology) for path in blacklisted_paths: # FIXME avoids Grounder.get_grounding_key() grounded_classes.append((path, blacklisted_type.score)) grounded_classes = sorted(grounded_classes, key=lambda x: -x[1]) elif (len(entry_points) == 1 and entry_points[0] == self.grounder.get_ontology().get_root().get_name()): logger.warning("Not grounding generic {} with score {}".format( serifxml_entry_types[0].event_type, serifxml_entry_types[0].score)) grounded_classes = [] else: # ground normally grounded_classes = self.grounder.ground_mention( mention_candidate, entry_points, self.threshold, modes=self.grounding_modes, force_entry_points=self.force_entry_points_into_groundings, remove_groundings_with_small_score=self. remove_zero_score_groundings, blacklisted_paths=self.grounding_blacklist) # pass through input entry points and scores if self.force_entry_points_into_groundings or len( grounded_classes) < 1: groundings_dict = {g: s for g, s in grounded_classes} entry_points_dict = {} for i, serifxml_entry_type in enumerate(serifxml_entry_types): score = serifxml_entry_type.score paths_for_entry_point = {} if self.which_ontology in ONTOLOGIES_REQUIRING_PRE_MAP: for path in self.ontology_mapper.look_up_external_types( serifxml_entry_type.event_type, self.which_ontology): paths_for_entry_point[path] = score else: nodes = self.ontology.get_nodes_by_name( serifxml_entry_type.event_type) for node in nodes: path = node.get_path() paths_for_entry_point[path] = score # overwrites repeated external sources with highest score utils.merge_groundings(entry_points_dict, paths_for_entry_point) grounded_classes = ( self.grounder.get_top_k_from_ep_and_grounded_dicts( entry_points_dict, groundings_dict)) if len(grounded_classes) < 1: if self.only_use_bert_from_root is False: if self.which_ontology in ONTOLOGIES_REQUIRING_PRE_MAP: logging.warning( "We cannot find external type mappings " "for {}".format(serifxml_entry_types)) grounded_classes = self.grounder.get_top_k_from_dict( {self.grounder.get_user_root_path(): 0.00001}) # CX: map internal ontology path to external source if self.which_ontology in ONTOLOGIES_REQUIRING_POST_MAP: self.grounder.set_k(self.n_best) grounded_classes = ( self.ontology_mapper.map_internal_paths_to_k_sources( grounded_classes, self.grounder, self.which_ontology)) if len(grounded_classes) < 1 and len(serifxml_entry_types) > 0: raise ValueError( "We're discarding {} from {} , and there will be 0 {} which" " will cause downstream problems.".format( serifxml_entry_types, self.event_mention_typing_field, self.event_mention_typing_field)) self.add_groundings_to_cache( grounded_classes, mention_candidate, serifxml_entry_types, ) self.maintain_serif_doc_event_mention( serif_em, grounded_classes, ) def read_serifxml_event_mentions(self, serif_doc): mentions = [] docid = serif_doc.docid using_bert = self.bert_docid_to_npz is not None contextual_embeddings = None contextual_token_map = None if using_bert: if hasattr(serif_doc, "aux") and hasattr(serif_doc.aux, "bert_npz"): contextual_embeddings, contextual_token_map = ( utils.truncation_npz(serif_doc.aux.bert_npz)) else: npz_path = self.bert_docid_to_npz.get(docid) if npz_path is None: logger.warning( "No BERT npz file for docid {}".format(docid)) return docid, mentions, serif_doc contextual_embeddings, contextual_token_map = ( utils.load_contextual_npz(npz_path)) for sentence_index, sentence in enumerate(serif_doc.sentences): for sentence_theory in sentence.sentence_theories: if self.max_number_of_tokens_per_sentence > -1: if len(sentence_theory.token_sequence) == 0 or len( sentence_theory.token_sequence ) > self.max_number_of_tokens_per_sentence: logger.info( "Will not process lengthy sentences docid: {}, sentid: {}, sent: {}" .format( docid, sentence_index, " ".join(token.text for token in sentence_theory.token_sequence))) continue sentence_string = u" ".join( token.text for token in sentence_theory.token_sequence) mention_to_embedding = dict() mention_to_anchor_embeddings = dict() if using_bert: if len(sentence_theory.token_sequence) > 0: mention_to_embedding, mention_to_anchor_embeddings = ( utils. get_serif_event_mention_to_contextual_embedding( sentence_theory, sentence_index, contextual_embeddings, contextual_token_map)) for event_mention in sentence_theory.event_mention_set: event_types = ( PGEventType. from_serif_event_typing_to_pg_event_type_list( event_mention, self.event_mention_typing_field)) #original_score = event_mention.score sentence_tokens = list(sentence.token_sequence) mention_start = int(event_mention.semantic_phrase_start) mention_end = int(event_mention.semantic_phrase_end) semantic_phrase_tokens = ( sentence_tokens[mention_start:mention_end + 1]) mention_string = u' '.join( [t.text for t in semantic_phrase_tokens]) if self.which_ontology in ONTOLOGIES_REQUIRING_PRE_MAP: # WM grounds to external ontology: entry point is external entry_point_paths = [] for et in event_types: entry_point_paths.extend( self.ontology_mapper.look_up_external_types( et.event_type, self.which_ontology)) entry_points = [] for path in entry_point_paths: node = self.ontology.get_node_by_path(path) if node is not None: entry_points.append(node.get_name()) else: # CX grounds to internal ontology: entry point is internal entry_points = [et.event_type for et in event_types] mention_candidate = MentionCandidate( mention_string[:], sentence_string) mention_candidate.add_structured_data( self.grounder.get_keywords()) mention_candidate.remove_stopwords_from_mention( self.grounder.get_stopwords()) contextual_embedding = mention_to_embedding.get( event_mention) if contextual_embedding is not None: mention_candidate.set_contextual_embedding( contextual_embedding) anchor_embeddings = mention_to_anchor_embeddings.get( event_mention, []) for anchor_embedding in anchor_embeddings: mention_candidate.add_contextual_anchor_embedding( anchor_embedding) mentions.append((mention_candidate, entry_points, event_types, event_mention)) self.counter[(tuple( sorted([et.event_type for et in event_types])), mention_string)] += 1 return docid, mentions, serif_doc def maintain_serif_doc_event_mention( self, serif_em, grounded_classes, ): assert isinstance(serif_em, serifxml3.EventMention) if (self.event_mention_typing_field == SerifEventMentionTypingField.event_type): highest_prob_event_type = max(grounded_classes.keys(), key=lambda g: grounded_classes[g]) highest_prob_score = grounded_classes[highest_prob_event_type] serif_em.event_type = highest_prob_event_type serif_em.score = highest_prob_score return # Erase all existing types if (self.event_mention_typing_field == SerifEventMentionTypingField.event_types): serif_em.event_types = list() for grounding, score in grounded_classes: serif_em.add_new_event_mention_type(grounding, score) elif (self.event_mention_typing_field == SerifEventMentionTypingField.factor_types): serif_em.factor_types = list() for grounding, score in grounded_classes: serif_em.add_new_event_mention_factor_type(grounding, score) else: raise NotImplementedError def add_groundings_to_cache(self, groundings, mc, entry_types): cache_key = utils.get_legacy_cache_key( mc, sorted([et.event_type for et in entry_types]), self.grounder.get_ontology().get_root().get_name()) self.cache[cache_key] = groundings def dump_cache(self, output): # print json.dumps(cache, indent=4, sort_keys=True, cls=ComplexEncoder) if output is not None: with io.open(output, 'w', encoding='utf8') as f: f.write( json.dumps(self.cache, ensure_ascii=False, indent=4, sort_keys=True)) with io.open(output + '.frequencies', 'w', encoding='utf8') as f: for pair, freq in sorted(self.counter.items(), reverse=True, key=lambda x: x[1]): f.write(u"{}\t{}\t{}\n".format(freq, pair[0], pair[1])) cache_key = u'|||'.join( ["Event", u';'.join(pair[0]), pair[1]]) for grounding, score in self.cache.get(cache_key, []): f.write(u'\t{}\t{}\n'.format(grounding, score)) def write_serifxmls(self, output_serifxml_dir, doc): if output_serifxml_dir.lower() != "none": os.makedirs(output_serifxml_dir, exist_ok=True) doc.save( os.path.join(output_serifxml_dir, "{}.xml".format(doc.docid)))