def test_regex_name_match_selector(self) -> None: selector = RegexNameMatchSelector() selector.initialize( configs={"select_name": "^.*\\d$"}, ) packs = selector.select(self.multi_pack) doc_ids = ["1", "2"] for doc_id, pack in zip(doc_ids, packs): self.assertEqual(doc_id, pack.pack_name) # Test reverse selection. selector.initialize( {"select_name": "^.*\\d$", "reverse_selection": True} ) packs = selector.select(self.multi_pack) doc_ids = ["Three"] for doc_id, pack in zip(doc_ids, packs): self.assertEqual(doc_id, pack.pack_name)
def test_ir_selector(self): """ Test the intermediate representation of selector. """ # Build original pipeline with RegexNameMatchSelector pl: Pipeline = Pipeline[MultiPack]() pl.set_reader(DummyMultiPackReader()) pl.add( DummyProcessor(), selector=RegexNameMatchSelector(), selector_config={"select_name": "^.*\\d$"}, ) pl.save(self._pl_config_path) # Verify the selector from IR test_pl: Pipeline = Pipeline[MultiPack]() test_pl.init_from_config_path(self._pl_config_path) test_pl.initialize() for multi_pack in test_pl.process_dataset(): for _, pack in multi_pack.iter_packs(): self.assertEqual( pack.num_generics_entries, int(pack.pack_name in ("1", "2")) )
def test_regex_name_match_selector(self) -> None: selector = RegexNameMatchSelector(select_name="^.*\\d$") packs = selector.select(self.multi_pack) doc_ids = ["1", "2"] for doc_id, pack in zip(doc_ids, packs): self.assertEqual(doc_id, pack.meta.doc_id)
def test_regex_name_match_selector_backward_compatability(self) -> None: selector = RegexNameMatchSelector(select_name="^.*\\d$") selector.initialize() packs = selector.select(self.multi_pack) doc_ids = ["1", "2"] for doc_id, pack in zip(doc_ids, packs): self.assertEqual(doc_id, pack.pack_name) # Test different configuration method (backward compatibility) selector = RegexNameMatchSelector("^.*\\d$") selector.initialize() packs = selector.select(self.multi_pack) doc_ids = ["1", "2"] for doc_id, pack in zip(doc_ids, packs): self.assertEqual(doc_id, pack.pack_name) # Test reverse selection. selector.initialize({"reverse_selection": True}) packs = selector.select(self.multi_pack) doc_ids = ["Three"] for doc_id, pack in zip(doc_ids, packs): self.assertEqual(doc_id, pack.pack_name)
config = Config(config, default_hparams=None) # Build pipeline and add the reader, which will read query from terminal. nlp: Pipeline = Pipeline() nlp.set_reader(reader=TerminalReader()) # Start to work on multi-packs in the rest of the pipeline, so we use a # boxer to change this. nlp.add(MultiPackBoxer(), config=config.boxer) # Search tweets. nlp.add(TweetSearchProcessor(), config=config.twitter_search) # Conduct sentiment analysis. pattern = rf"{config.twitter_search.response_pack_name_prefix}_\d" selector_hit = RegexNameMatchSelector(select_name=pattern) nlp.add( component=VaderSentimentProcessor(), selector=selector_hit, config=config.vader_sentiment, ) nlp.initialize() # process dataset m_pack: MultiPack for m_pack in nlp.process_dataset(): print("The number of datapacks(including query) is", len(m_pack.packs)) tweets, pos_sentiment, neg_sentiment, neutral_sentiment = 0, 0, 0, 0