示例#1
0
    def test_regex_name_match_selector(self) -> None:
        selector = RegexNameMatchSelector()
        selector.initialize(
            configs={"select_name": "^.*\\d$"},
        )
        packs = selector.select(self.multi_pack)
        doc_ids = ["1", "2"]
        for doc_id, pack in zip(doc_ids, packs):
            self.assertEqual(doc_id, pack.pack_name)

        # Test reverse selection.
        selector.initialize(
            {"select_name": "^.*\\d$", "reverse_selection": True}
        )
        packs = selector.select(self.multi_pack)
        doc_ids = ["Three"]
        for doc_id, pack in zip(doc_ids, packs):
            self.assertEqual(doc_id, pack.pack_name)
示例#2
0
    def test_ir_selector(self):
        """
        Test the intermediate representation of selector.
        """
        # Build original pipeline with RegexNameMatchSelector
        pl: Pipeline = Pipeline[MultiPack]()
        pl.set_reader(DummyMultiPackReader())
        pl.add(
            DummyProcessor(),
            selector=RegexNameMatchSelector(),
            selector_config={"select_name": "^.*\\d$"},
        )
        pl.save(self._pl_config_path)

        # Verify the selector from IR
        test_pl: Pipeline = Pipeline[MultiPack]()
        test_pl.init_from_config_path(self._pl_config_path)
        test_pl.initialize()
        for multi_pack in test_pl.process_dataset():
            for _, pack in multi_pack.iter_packs():
                self.assertEqual(
                    pack.num_generics_entries, int(pack.pack_name in ("1", "2"))
                )
示例#3
0
 def test_regex_name_match_selector(self) -> None:
     selector = RegexNameMatchSelector(select_name="^.*\\d$")
     packs = selector.select(self.multi_pack)
     doc_ids = ["1", "2"]
     for doc_id, pack in zip(doc_ids, packs):
         self.assertEqual(doc_id, pack.meta.doc_id)
示例#4
0
    def test_regex_name_match_selector_backward_compatability(self) -> None:
        selector = RegexNameMatchSelector(select_name="^.*\\d$")
        selector.initialize()
        packs = selector.select(self.multi_pack)
        doc_ids = ["1", "2"]
        for doc_id, pack in zip(doc_ids, packs):
            self.assertEqual(doc_id, pack.pack_name)

        # Test different configuration method (backward compatibility)
        selector = RegexNameMatchSelector("^.*\\d$")
        selector.initialize()
        packs = selector.select(self.multi_pack)
        doc_ids = ["1", "2"]
        for doc_id, pack in zip(doc_ids, packs):
            self.assertEqual(doc_id, pack.pack_name)

        # Test reverse selection.
        selector.initialize({"reverse_selection": True})
        packs = selector.select(self.multi_pack)
        doc_ids = ["Three"]
        for doc_id, pack in zip(doc_ids, packs):
            self.assertEqual(doc_id, pack.pack_name)
示例#5
0
    config = Config(config, default_hparams=None)

    # Build pipeline and add the reader, which will read query from terminal.
    nlp: Pipeline = Pipeline()
    nlp.set_reader(reader=TerminalReader())

    # Start to work on multi-packs in the rest of the pipeline, so we use a
    # boxer to change this.
    nlp.add(MultiPackBoxer(), config=config.boxer)

    # Search tweets.
    nlp.add(TweetSearchProcessor(), config=config.twitter_search)

    # Conduct sentiment analysis.
    pattern = rf"{config.twitter_search.response_pack_name_prefix}_\d"
    selector_hit = RegexNameMatchSelector(select_name=pattern)
    nlp.add(
        component=VaderSentimentProcessor(),
        selector=selector_hit,
        config=config.vader_sentiment,
    )

    nlp.initialize()

    # process dataset
    m_pack: MultiPack
    for m_pack in nlp.process_dataset():
        print("The number of datapacks(including query) is", len(m_pack.packs))

        tweets, pos_sentiment, neg_sentiment, neutral_sentiment = 0, 0, 0, 0