def is_both_data_or_time(mention1: MentionDataLight,
                             mention2: MentionDataLight) -> bool:
        """
        check if both phrases refers to time or date

        Returns:
            bool
        """
        mention1_ner = mention1.mention_ner
        mention2_ner = mention2.mention_ner

        if mention1_ner is None:
            _, _, _, mention1_ner = StringUtils.find_head_lemma_pos_ner(
                mention1.tokens_str)
        if mention2_ner is None:
            _, _, _, mention2_ner = StringUtils.find_head_lemma_pos_ner(
                mention2.tokens_str)

        is1_time_or_data = 'DATE' in mention1_ner or 'TIME' in mention1_ner
        is2_time_or_data = 'DATE' in mention2_ner or 'TIME' in mention2_ner

        result = False
        if is1_time_or_data and is2_time_or_data:
            result = True

        return result
Exemplo n.º 2
0
 def __init__(
     self,
     tokens_str: str,
     mention_context: str = None,
     mention_head: str = None,
     mention_head_lemma: str = None,
     mention_pos: str = None,
     mention_ner: str = None,
 ):
     """
     Object represent a mention with only text values
     Args:
         tokens_str: str the tokens combine text (join with space)
         mention_head: str
         mention_head_lemma: str
     """
     self.tokens_str = tokens_str
     self.mention_context = mention_context
     if not mention_head and not mention_head_lemma:
         (
             self.mention_head,
             self.mention_head_lemma,
             self.mention_head_pos,
             self.mention_ner,
         ) = StringUtils.find_head_lemma_pos_ner(str(tokens_str))
     else:
         self.mention_head = mention_head
         self.mention_head_lemma = mention_head_lemma
         self.mention_head_pos = mention_pos
         self.mention_ner = mention_ner
Exemplo n.º 3
0
    def read_json_mention_data_line(mention_line: str):
        """

        Args:
            mention_line: a Json representation of a single mention

        Returns:
            MentionData object
        """
        mention_data = None
        try:
            topic_id = None
            coref_chain = None
            doc_id = None
            sent_id = None
            tokens_numbers = None
            score = -1
            mention_type = None
            predicted_coref_chain = None
            mention_context = None
            is_continue = False
            is_singleton = False
            mention_pos = None
            mention_ner = None

            mention_text = mention_line['tokens_str']

            if 'topic_id' in mention_line:
                topic_id = mention_line['topic_id']

            if 'coref_chain' in mention_line:
                coref_chain = mention_line['coref_chain']

            if 'doc_id' in mention_line:
                doc_id = mention_line['doc_id']
                if '.xml' not in doc_id:
                    doc_id = doc_id + '.xml'

            if 'sent_id' in mention_line:
                sent_id = mention_line['sent_id']

            if 'tokens_number' in mention_line:
                tokens_numbers = mention_line['tokens_number']

            if 'mention_context' in mention_line:
                mention_context = mention_line['mention_context']

            if 'mention_head' in mention_line and 'mention_head_lemma' in mention_line:
                mention_head = mention_line['mention_head']
                mention_head_lemma = mention_line['mention_head_lemma']
                if 'mention_head_pos' in mention_line:
                    mention_pos = mention_line['mention_head_pos']
                if 'mention_ner' in mention_line:
                    mention_ner = mention_line['mention_ner']
            else:
                mention_head, mention_head_lemma, mention_pos, \
                    mention_ner = StringUtils.find_head_lemma_pos_ner(str(mention_text))

            if 'mention_type' in mention_line:
                mention_type = mention_line['mention_type']
            if 'score' in mention_line:
                score = mention_line['score']

            if 'is_continuous' in mention_line:
                is_continue = mention_line['is_continuous']

            if 'is_singleton' in mention_line:
                is_singleton = mention_line['is_singleton']

            if 'predicted_coref_chain' in mention_line:
                predicted_coref_chain = mention_line['predicted_coref_chain']

            mention_data = MentionData(
                topic_id, doc_id, sent_id, tokens_numbers, mention_text,
                mention_context, mention_head, mention_head_lemma, coref_chain,
                mention_type, is_continue, is_singleton, score,
                predicted_coref_chain, mention_pos, mention_ner)
        except Exception:
            print('Unexpected error:', sys.exc_info()[0])
            raise Exception('failed reading json line-' + str(mention_line))

        return mention_data
Exemplo n.º 4
0
    def read_json_mention_data_line(mention_line: str):
        """
        Args:
            mention_line: a Json representation of a single mention

        Returns:
            MentionData object
        """
        # pylint: disable=too-many-branches

        try:
            topic_id = None
            coref_chain = None
            doc_id = None
            sent_id = None
            tokens_numbers = None
            score = -1
            mention_type = None
            predicted_coref_chain = None
            mention_context = None
            is_continue = False
            is_singleton = False
            mention_pos = None
            mention_ner = None
            mention_index = -1

            mention_text = mention_line["tokens_str"]

            if "topic_id" in mention_line:
                topic_id = mention_line["topic_id"]

            if "coref_chain" in mention_line:
                coref_chain = mention_line["coref_chain"]

            if "doc_id" in mention_line:
                doc_id = mention_line["doc_id"]
                if ".xml" not in doc_id:
                    doc_id = doc_id + ".xml"

            if "sent_id" in mention_line:
                sent_id = mention_line["sent_id"]

            if "tokens_number" in mention_line:
                tokens_numbers = mention_line["tokens_number"]

            if "mention_context" in mention_line:
                mention_context = mention_line["mention_context"]

            if "mention_head" in mention_line and "mention_head_lemma" in mention_line:
                mention_head = mention_line["mention_head"]
                mention_head_lemma = mention_line["mention_head_lemma"]
                if "mention_head_pos" in mention_line:
                    mention_pos = mention_line["mention_head_pos"]
                if "mention_ner" in mention_line:
                    mention_ner = mention_line["mention_ner"]
            else:
                (
                    mention_head,
                    mention_head_lemma,
                    mention_pos,
                    mention_ner,
                ) = StringUtils.find_head_lemma_pos_ner(str(mention_text))

            if "mention_type" in mention_line:
                mention_type = mention_line["mention_type"]
            if "score" in mention_line:
                score = mention_line["score"]

            if "is_continuous" in mention_line:
                is_continue = mention_line["is_continuous"]

            if "is_singleton" in mention_line:
                is_singleton = mention_line["is_singleton"]

            if "predicted_coref_chain" in mention_line:
                predicted_coref_chain = mention_line["predicted_coref_chain"]

            if "mention_index" in mention_line:
                mention_index = mention_line["mention_index"]

            mention_data = MentionData(
                topic_id,
                doc_id,
                sent_id,
                tokens_numbers,
                mention_text,
                mention_context,
                mention_head,
                mention_head_lemma,
                coref_chain,
                mention_type,
                is_continue,
                is_singleton,
                score,
                predicted_coref_chain,
                mention_pos,
                mention_ner,
                mention_index,
            )
        except Exception:
            print("Unexpected error:", sys.exc_info()[0])
            raise Exception("failed reading json line-" + str(mention_line))

        return mention_data