def run(
        self,
        aspect_lex: PathLike = None,
        opinion_lex: PathLike = None,
        data: PathLike = None,
        parsed_data: PathLike = None,
        inference_results: PathLike = None,
    ) -> Optional[pd.DataFrame]:

        opinions = load_opinion_lex(opinion_lex)
        if not opinions:
            raise ValueError("Empty opinion lexicon!")
        aspects = pd.read_csv(aspect_lex, header=None, encoding="utf-8")[0]
        if aspects.empty:
            raise ValueError("Empty aspect lexicon!")
        if inference_results:
            with open(inference_results, encoding="utf-8") as f:
                results = json.loads(f.read(),
                                     object_hook=SentimentDoc.decoder)
        elif data or parsed_data:
            inference = SentimentInference(aspect_lex, opinions, parse=False)
            parse = None
            if not parsed_data:  # source data is raw text, need to parse
                from nlp_architect.pipelines.spacy_bist import SpacyBISTParser

                parse = SpacyBISTParser().parse

            results = {}
            print("Running inference on data files... (Iterating data files)")
            data_source = parsed_data if parsed_data else data
            for file, doc in self._iterate_docs(data_source):
                parsed_doc = (parse(doc) if parse else json.loads(
                    doc, object_hook=CoreNLPDoc.decoder))
                sentiment_doc = inference.run(parsed_doc=parsed_doc)
                if sentiment_doc:
                    results[file] = sentiment_doc
            with open(SENTIMENT_OUT / "inference_results.json",
                      "w",
                      encoding="utf-8") as f:
                json.dump(results,
                          f,
                          cls=SentimentDocEncoder,
                          indent=4,
                          sort_keys=True)
        else:
            print(
                "No input given. Please supply one of: "
                "data directory, parsed data directory, or inference results.")
            return None

        print("\nComputing statistics...")
        stats = self._compute_stats(results, aspects, opinions)
        print("Done.")
        return stats
示例#2
0
    def __init__(self,
                 aspect_lex: PathLike,
                 opinion_lex: PathLike or dict,
                 parse: bool = True):
        """Inits SentimentInference with given aspect and opinion lexicons."""
        INFERENCE_OUT.mkdir(parents=True, exist_ok=True)
        self.opinion_lex = \
            opinion_lex if type(opinion_lex) is dict else load_opinion_lex(opinion_lex)
        self.aspect_lex = _load_aspect_lexicon(aspect_lex)
        self.intensifier_lex = _read_lexicon_from_csv('IntensifiersLex.csv')
        self.negation_lex = _read_lexicon_from_csv('NegationSentLex.csv')

        if parse:
            from nlp_architect.pipelines.spacy_bist import SpacyBISTParser
            self.parser = SpacyBISTParser()
        else:
            self.parser = None
示例#3
0
    def __init__(
        self,
        aspect_lex: Union[str, PathLike],
        opinion_lex: Union[str, PathLike, dict],
        parse: bool = True,
    ):
        """Inits SentimentInference with given aspect and opinion lexicons."""
        INFERENCE_OUT.mkdir(parents=True, exist_ok=True)
        self.opinion_lex = (opinion_lex if type(opinion_lex) is dict else
                            load_opinion_lex(Path(opinion_lex)))
        self.aspect_lex = _load_aspect_lexicon(Path(aspect_lex))
        self.intensifier_lex = _read_lexicon_from_csv("IntensifiersLex.csv")
        self.negation_lex = _read_lexicon_from_csv("NegationSentLex.csv")

        if parse:
            from nlp_architect.pipelines.spacy_bist import SpacyBISTParser

            self.parser = SpacyBISTParser(spacy_model="en")
        else:
            self.parser = None
示例#4
0
    def __init__(
        self,
        aspect_lex: Union[str, PathLike],
        opinion_lex: Union[str, PathLike, dict],
        parse: bool = True,
        parser="spacy",
        spacy_model="en_core_web_sm",
    ):
        """Inits SentimentInference with given aspect and opinion lexicons."""
        INFERENCE_OUT.mkdir(parents=True, exist_ok=True)
        self.opinion_lex = (
            opinion_lex if type(opinion_lex) is dict else load_opinion_lex(Path(opinion_lex))
        )
        self.aspect_lex = _load_aspect_lexicon(Path(aspect_lex))
        self.intensifier_lex = _read_lexicon_from_csv("IntensifiersLex.csv")
        self.negation_lex = _read_lexicon_from_csv("NegationSentLex.csv")
        self.parser_name = parser

        if parse:
            if parser == "bist":
                from nlp_architect.pipelines.spacy_bist import SpacyBISTParser

                self.parser = SpacyBISTParser(spacy_model=spacy_model)
            elif parser == "spacy":
                from nlp_architect.utils.text import SpacyInstance

                disable = [
                    "merge_noun_chunks",
                    "ner",
                    "entity_linker",
                    "textcat",
                    "entity_ruler",
                    "sentencizer",
                    "merge_entities",
                ]
                self.parser = SpacyInstance(
                    model=spacy_model, disable=disable, ptb_pos=True, n_jobs=1
                )
        else:
            self.parser = None