Exemplo n.º 1
0
    def parse_messages(self, task_result: TaskResult,
                       context: List[TaskResult],
                       language: str) -> List[Message]:
        if not task_result.processor == "Comparison":
            raise WrongResourceException()

        messages: List[Message] = []

        corpus, corpus_type = self.build_corpus_fields(task_result)
        input_processor = task_result.parameters["input_processor"]
        if input_processor == "ExtractFacets":
            input_processor += ":" + task_result.parameters["facet"]

        messages += self._jsd_message_parser(task_result, corpus, corpus_type,
                                             input_processor)
        messages += self._value_divergence_parser(task_result, corpus,
                                                  corpus_type, input_processor)

        return messages
Exemplo n.º 2
0
    def parse_messages(self, task_result: TaskResult,
                       context: List[TaskResult],
                       language: str) -> List[Message]:
        if not task_result.processor == "ExtractWords":
            raise WrongResourceException()

        unit: str = task_result.parameters.get("unit")
        if unit == "tokens":
            unit = "TOKEN"
        elif unit == "stems":
            unit = "STEM"
        else:
            log.error(
                "Unexpected unit '{}', expected 'tokens' or 'stems'".format(
                    task_result.parameters.get("unit")))
            raise ParsingException()
        corpus, corpus_type = self.build_corpus_fields(task_result)

        messages = []
        for word in task_result.task_result["result"]["vocabulary"]:
            interestingness = task_result.task_result["interestingness"].get(
                word, ProcessorResource.EPSILON)
            for result_idx, result_name in enumerate(
                ["Count", "RelativeCount", "TFIDF"]):
                result = task_result.task_result["result"]["vocabulary"][word][
                    result_idx]
                messages.append(
                    Message([
                        Fact(
                            corpus,  # corpus
                            corpus_type,  # corpus_type
                            None,  # timestamp_from
                            None,  # timestamp_to
                            "all_time",  # timestamp_type
                            "ExtractWords:" + result_name,  # analysis_type
                            "[{}:{}]".format(unit, word),  # result_key
                            result,  # result_value
                            interestingness,  # outlierness
                            "[LINK:{}]".format(task_result.uuid),  # uuid
                        )
                    ]))
        return messages
    def parse_messages(self, task_result: TaskResult,
                       context: List[TaskResult],
                       language: str) -> List[Message]:

        language = language.split("-")[0]

        if not task_result.processor == "TrackNameSentiment":
            raise WrongResourceException()

        corpus, corpus_type = self.build_corpus_fields(task_result)

        entries: Dict[str, Dict[int, Tuple[float, float]]] = {}
        for entity in task_result.task_result["result"]:
            entity_name_map: Dict[
                str,
                str] = task_result.task_result["result"][entity].get("names")
            if entity_name_map is None:
                entity_name_map = {}
            entity_name_priority_list = [
                entity_name_map.get(language, None),
                entity_name_map.get("en", None),
                list(entity_name_map.values())[0]
                if list(entity_name_map.values()) else None,
                entity,
            ]

            if not entity_name_map:
                entity_name_priority_list.insert(
                    0, self._resolve_name_from_solr(entity, language))

            name = next(name for name in entity_name_priority_list if name)

            years: Dict[int, Tuple[float, float]] = {}
            for year in task_result.task_result["result"][entity]:
                if year == "names":
                    # Skip the names-map
                    continue
                sentiment = task_result.task_result["result"][entity][year]
                interestingness = task_result.task_result["interestingness"][
                    entity][1][year]
                if sentiment != 0 or interestingness != 0:
                    years[int(year)] = (sentiment, interestingness)

            entries[name] = years

        messages: List[Message] = []

        for entry, years in entries.items():
            if not years:
                continue
            max_interestingness = max(interestingness
                                      for (year,
                                           (sentiment,
                                            interestingness)) in years.items())
            max_sentiment, max_sentiment_year = max(
                (sentiment, year)
                for (year, (sentiment, interestingness)) in years.items())
            min_sentiment, min_sentiment_year = min(
                (sentiment, year)
                for (year, (sentiment, interestingness)) in years.items())
            mean_sentiment = sum(sentiment for (year, (
                sentiment, interestingness)) in years.items()) / len(years)
            min_year = min(years)
            max_year = max(years)
            year_count = len(years)

            messages.append(
                Message(
                    Fact(
                        corpus,
                        corpus_type,
                        min_year,
                        max_year,
                        "between_years",
                        "TrackNameSentiment:Mean",
                        "[ENTITY:NAME:{}]".format(entry),
                        mean_sentiment,
                        max_interestingness,
                        "[LINK:{}]".format(task_result.uuid),  # uuid
                    )))

            if len(years) > 1:
                messages.append(
                    Message(
                        Fact(
                            corpus,
                            corpus_type,
                            min_year,
                            max_year,
                            "between_years",
                            "TrackNameSentiment:CountYears",
                            "[ENTITY:NAME:{}]".format(entry),
                            year_count,
                            max_interestingness,
                            "[LINK:{}]".format(task_result.uuid),  # uuid
                        )))
                messages.append(
                    Message(
                        Fact(
                            corpus,
                            corpus_type,
                            min_sentiment_year,
                            min_sentiment_year,
                            "year",
                            "TrackNameSentiment:Min",
                            "[ENTITY:NAME:{}]".format(entry),
                            min_sentiment,
                            max_interestingness,
                            "[LINK:{}]".format(task_result.uuid),  # uuid
                        )), )
                messages.append(
                    Message(
                        Fact(
                            corpus,
                            corpus_type,
                            max_sentiment_year,
                            max_sentiment_year,
                            "year",
                            "TrackNameSentiment:Max",
                            "[ENTITY:NAME:{}]".format(entry),
                            max_sentiment,
                            max_interestingness,
                            "[LINK:{}]".format(task_result.uuid),  # uuid
                        )))

        return messages
Exemplo n.º 4
0
 def parse_messages(self, task_result: TaskResult,
                    context: List[TaskResult],
                    language: str) -> List[Message]:
     # This thing shouldn't *actually* parse anything, ever
     raise WrongResourceException()
    def parse_messages(self, task_result: TaskResult,
                       context: List[TaskResult],
                       language: str) -> List[Message]:

        language = language.split("-")[0]

        if not task_result.processor == "ExtractNames":
            raise WrongResourceException()

        corpus, corpus_type = self.build_corpus_fields(task_result)

        for entity in task_result.task_result["result"]:
            entity_name_map: Dict[
                str, str] = task_result.task_result["result"][entity].get(
                    "names", {})

            entity_names = [
                entity_name_map.get(language, None),
                entity_name_map.get("en", None),
                list(entity_name_map.values())[0]
                if list(entity_name_map.values()) else None,
                entity,
            ]

            if not entity_name_map:
                entity_names.insert(
                    0, self._resolve_name_from_solr(entity, language))

            task_result.task_result["result"][entity]["entity"] = next(
                name for name in entity_names if name)

        entities_with_interestingness = [
            (entity, max(interestingness.values()))
            for (entity, interestingness
                 ) in zip(task_result.task_result["result"].values(),
                          task_result.task_result["interestingness"].values())
        ]

        entities_with_interestingness = sorted(entities_with_interestingness,
                                               key=lambda pair: pair[1],
                                               reverse=True)

        max_interestingness = entities_with_interestingness[0][1]

        if max_interestingness < 0.01:
            entities_with_interestingness = entities_with_interestingness[0]

        else:
            entities_with_interestingness = [
                (entity, interestingness)
                for (entity, interestingness) in entities_with_interestingness
                if interestingness >= 0.01
            ]

        if len(entities_with_interestingness) == 0:
            return []
        count = min(5, len(entities_with_interestingness))
        entities_with_interestingness = entities_with_interestingness[:count]

        single_or_multiple = "Single" if len(
            entities_with_interestingness) == 1 else "Multiple"

        return [
            Message(
                Fact(
                    corpus,
                    corpus_type,
                    None,
                    None,
                    "all_time",
                    "ExtractNames:" + single_or_multiple,
                    "ExtractNames",
                    "[ExtractNamesList:{}]".format("|".join([
                        "{}:{}:{}".format(entity["entity"], entity["salience"],
                                          entity["stance"])
                        for (entity,
                             interestingness) in entities_with_interestingness
                    ])),
                    task_result.task_result["interestingness"]["overall"],
                    "[LINK:{}]".format(task_result.uuid),  # uuid
                ))
        ]