def is_both_data_or_time(mention1: MentionDataLight, mention2: MentionDataLight) -> bool: """ check if both phrases refers to time or date Returns: bool """ mention1_ner = mention1.mention_ner mention2_ner = mention2.mention_ner if mention1_ner is None: _, _, _, mention1_ner = StringUtils.find_head_lemma_pos_ner( mention1.tokens_str) if mention2_ner is None: _, _, _, mention2_ner = StringUtils.find_head_lemma_pos_ner( mention2.tokens_str) is1_time_or_data = 'DATE' in mention1_ner or 'TIME' in mention1_ner is2_time_or_data = 'DATE' in mention2_ner or 'TIME' in mention2_ner result = False if is1_time_or_data and is2_time_or_data: result = True return result
def __init__( self, tokens_str: str, mention_context: str = None, mention_head: str = None, mention_head_lemma: str = None, mention_pos: str = None, mention_ner: str = None, ): """ Object represent a mention with only text values Args: tokens_str: str the tokens combine text (join with space) mention_head: str mention_head_lemma: str """ self.tokens_str = tokens_str self.mention_context = mention_context if not mention_head and not mention_head_lemma: ( self.mention_head, self.mention_head_lemma, self.mention_head_pos, self.mention_ner, ) = StringUtils.find_head_lemma_pos_ner(str(tokens_str)) else: self.mention_head = mention_head self.mention_head_lemma = mention_head_lemma self.mention_head_pos = mention_pos self.mention_ner = mention_ner
def read_json_mention_data_line(mention_line: str): """ Args: mention_line: a Json representation of a single mention Returns: MentionData object """ mention_data = None try: topic_id = None coref_chain = None doc_id = None sent_id = None tokens_numbers = None score = -1 mention_type = None predicted_coref_chain = None mention_context = None is_continue = False is_singleton = False mention_pos = None mention_ner = None mention_text = mention_line['tokens_str'] if 'topic_id' in mention_line: topic_id = mention_line['topic_id'] if 'coref_chain' in mention_line: coref_chain = mention_line['coref_chain'] if 'doc_id' in mention_line: doc_id = mention_line['doc_id'] if '.xml' not in doc_id: doc_id = doc_id + '.xml' if 'sent_id' in mention_line: sent_id = mention_line['sent_id'] if 'tokens_number' in mention_line: tokens_numbers = mention_line['tokens_number'] if 'mention_context' in mention_line: mention_context = mention_line['mention_context'] if 'mention_head' in mention_line and 'mention_head_lemma' in mention_line: mention_head = mention_line['mention_head'] mention_head_lemma = mention_line['mention_head_lemma'] if 'mention_head_pos' in mention_line: mention_pos = mention_line['mention_head_pos'] if 'mention_ner' in mention_line: mention_ner = mention_line['mention_ner'] else: mention_head, mention_head_lemma, mention_pos, \ mention_ner = StringUtils.find_head_lemma_pos_ner(str(mention_text)) if 'mention_type' in mention_line: mention_type = mention_line['mention_type'] if 'score' in mention_line: score = mention_line['score'] if 'is_continuous' in mention_line: is_continue = mention_line['is_continuous'] if 'is_singleton' in mention_line: is_singleton = mention_line['is_singleton'] if 'predicted_coref_chain' in mention_line: predicted_coref_chain = mention_line['predicted_coref_chain'] mention_data = MentionData( topic_id, doc_id, sent_id, tokens_numbers, mention_text, mention_context, mention_head, mention_head_lemma, coref_chain, mention_type, is_continue, is_singleton, score, predicted_coref_chain, mention_pos, mention_ner) except Exception: print('Unexpected error:', sys.exc_info()[0]) raise Exception('failed reading json line-' + str(mention_line)) return mention_data
def read_json_mention_data_line(mention_line: str): """ Args: mention_line: a Json representation of a single mention Returns: MentionData object """ # pylint: disable=too-many-branches try: topic_id = None coref_chain = None doc_id = None sent_id = None tokens_numbers = None score = -1 mention_type = None predicted_coref_chain = None mention_context = None is_continue = False is_singleton = False mention_pos = None mention_ner = None mention_index = -1 mention_text = mention_line["tokens_str"] if "topic_id" in mention_line: topic_id = mention_line["topic_id"] if "coref_chain" in mention_line: coref_chain = mention_line["coref_chain"] if "doc_id" in mention_line: doc_id = mention_line["doc_id"] if ".xml" not in doc_id: doc_id = doc_id + ".xml" if "sent_id" in mention_line: sent_id = mention_line["sent_id"] if "tokens_number" in mention_line: tokens_numbers = mention_line["tokens_number"] if "mention_context" in mention_line: mention_context = mention_line["mention_context"] if "mention_head" in mention_line and "mention_head_lemma" in mention_line: mention_head = mention_line["mention_head"] mention_head_lemma = mention_line["mention_head_lemma"] if "mention_head_pos" in mention_line: mention_pos = mention_line["mention_head_pos"] if "mention_ner" in mention_line: mention_ner = mention_line["mention_ner"] else: ( mention_head, mention_head_lemma, mention_pos, mention_ner, ) = StringUtils.find_head_lemma_pos_ner(str(mention_text)) if "mention_type" in mention_line: mention_type = mention_line["mention_type"] if "score" in mention_line: score = mention_line["score"] if "is_continuous" in mention_line: is_continue = mention_line["is_continuous"] if "is_singleton" in mention_line: is_singleton = mention_line["is_singleton"] if "predicted_coref_chain" in mention_line: predicted_coref_chain = mention_line["predicted_coref_chain"] if "mention_index" in mention_line: mention_index = mention_line["mention_index"] mention_data = MentionData( topic_id, doc_id, sent_id, tokens_numbers, mention_text, mention_context, mention_head, mention_head_lemma, coref_chain, mention_type, is_continue, is_singleton, score, predicted_coref_chain, mention_pos, mention_ner, mention_index, ) except Exception: print("Unexpected error:", sys.exc_info()[0]) raise Exception("failed reading json line-" + str(mention_line)) return mention_data