Пример #1
0
    def run_multiple(
        self,
        data: Union[str, PathLike] = None,
        parsed_data: Union[str, PathLike] = None,
        out_dir: Union[str, PathLike] = INFERENCE_OUT,
    ):
        if not parsed_data:
            if not self.parser:
                raise RuntimeError("Parser not initialized (try parse=True at init)")
            parsed_dir = Path(out_dir) / "parsed" / Path(data).stem
            parsed_data = self.parse_data(data, out_dir=parsed_dir)

        sentiment_docs = {}
        for f, parsed_doc in tqdm(_load_parsed_docs_from_dir(out_dir)):
            sentiment_doc = self.run(parsed_doc=parsed_doc)
            sentiment_docs[f] = sentiment_doc
        return sentiment_docs
Пример #2
0
    def acquire_lexicons(self, parsed_dir: str or PathLike):
        """Acquire new opinion and aspect lexicons.

        Args:
            parsed_dir (PathLike): Path to parsed documents folder.
        """
        parsed_docs = _load_parsed_docs_from_dir(parsed_dir)
        dataset_sentence_len = 0
        for parsed_doc in parsed_docs.values():
            dataset_sentence_len += len(parsed_doc.sentences)

        add_to_thresholds = int(
            dataset_sentence_len / self.NUM_OF_SENTENCES_PER_OPINION_AND_ASPECT_TERM_INC
        )
        self.min_freq_opinion_candidate += add_to_thresholds
        self.min_freq_aspect_candidate += add_to_thresholds

        for iteration_num in range(self.max_num_of_iterations):
            if (
                len(self.opinion_candidate_list_prev_iter) == 0
                and len(self.aspects_candidate_list_prev_iter) == 0
            ):
                break

            print("\n#Iteration: {}".format(iteration_num + 1))

            self.extract_opinion_and_aspect_terms(iter(parsed_docs.values()), len(parsed_docs))

            self._insert_new_terms_to_tables()

        self.opinion_candidates_list_final = generate_final_opinion_candidates_list(
            self.opinion_candidate_list_raw,
            self.opinion_candidates_list_final,
            self.min_freq_opinion_candidate,
        )
        self.aspect_candidates_list_final = _generate_final_aspect_candidates_list(
            self.aspect_candidate_list_raw,
            self.aspect_candidates_list_final,
            self.min_freq_aspect_candidate,
        )

        self._write_candidate_opinion_lex()

        aspect_dict = _add_lemmas_aspect_lex(self.aspect_candidates_list_final)

        return aspect_dict
Пример #3
0
    def acquire_lexicons(self, parsed_dir: str or PathLike):
        """Acquire new opinion and aspect lexicons.

        Args:
            parsed_dir (PathLike): Path to parsed documents folder.
        """
        parsed_docs = _load_parsed_docs_from_dir(parsed_dir)
        dataset_sentence_len = 0
        for parsed_doc in parsed_docs.values():
            dataset_sentence_len += len(parsed_doc.sentences)

        add_to_thresholds = \
            int(dataset_sentence_len / self.NUM_OF_SENTENCES_PER_OPINION_AND_ASPECT_TERM_INC)
        self.min_freq_opinion_candidate += add_to_thresholds
        self.min_freq_aspect_candidate += add_to_thresholds

        for iteration_num in range(self.MAX_NUM_OF_ITERATIONS):
            if len(self.opinion_candidate_list_prev_iter) == 0 \
                    and len(self.aspects_candidate_list_prev_iter) == 0:
                break

            print("\n#Iteration: {}".format(iteration_num + 1))

            self.extract_opinion_and_aspect_terms(iter(parsed_docs.values()),
                                                  len(parsed_docs))

            self._insert_new_terms_to_tables()

        self.opinion_candidates_list_final = \
            generate_final_opinion_candidates_list(
                self.opinion_candidate_list_raw, self.opinion_candidates_list_final,
                self.min_freq_opinion_candidate)
        self.aspect_candidates_list_final = \
            _generate_final_aspect_candidates_list(
                self.aspect_candidate_list_raw,
                self.aspect_candidates_list_final,
                self.min_freq_aspect_candidate)

        self._write_output()

        aspect_dict = {}
        for cand_term in self.aspect_candidates_list_final:
            aspect_dict[cand_term.term[0]] = cand_term.frequency

        return aspect_dict