Exemplo n.º 1
0
 def test_doc(self):
     cli = "https://www.example.com --doc zip"
     namespace = self.get_namespace(cli)
     c = generate_crawler_config(namespace)
     correct_output = CrawlerConfig()
     correct_output.documents.add("zip")
     self.assertEqual(vars(c), vars(correct_output))
Exemplo n.º 2
0
 def test_doc_and_no_docs(self):
     cli = "https://www.example.com --find phone email social --doc zip"
     namespace = self.get_namespace(cli)
     c = generate_crawler_config(namespace)
     correct_output = CrawlerConfig()
     correct_output.documents = {"zip"}
     self.assertEqual(vars(c), vars(correct_output))
Exemplo n.º 3
0
 def test_agent(self):
     cli = "https://www.example.com -a g"
     namespace = self.get_namespace(cli)
     c = generate_crawler_config(namespace)
     correct_output = CrawlerConfig()
     correct_output.agent = arachnid_enums.Agent.GOOGLE.value
     self.assertEqual(vars(c), vars(correct_output))
Exemplo n.º 4
0
 def test_stealth(self):
     cli = "https://www.example.com --stealth"
     namespace = self.get_namespace(cli)
     c = generate_crawler_config(namespace)
     correct_output = CrawlerConfig()
     correct_output.set_stealth()
     self.assertEqual(vars(c), vars(correct_output))
Exemplo n.º 5
0
 def test_stealth_modified(self):
     cli = "https://www.example.com -T none --stealth"
     namespace = self.get_namespace(cli)
     c = generate_crawler_config(namespace)
     correct_output = CrawlerConfig()
     correct_output.set_stealth()
     correct_output.default_delay = arachnid_enums.Delay.NONE.value
     self.assertEqual(vars(c), vars(correct_output))
Exemplo n.º 6
0
 def test_multiple(self):
     cli = "https://www.example.com --find email phone social docs"
     namespace = self.get_namespace(cli)
     c = generate_crawler_config(namespace)
     correct_output = CrawlerConfig()
     correct_output.scrape_phone_number = True
     correct_output.scrape_email = True
     correct_output.scrape_social_media  = True
     self.assertEqual(vars(c), vars(correct_output))
Exemplo n.º 7
0
 def test_phone_number(self):
     cli = "https://www.example.com --find phone"
     namespace = self.get_namespace(cli)
     c = generate_crawler_config(namespace)
     correct_output = CrawlerConfig()
     correct_output.scrape_phone_number = True
     correct_output.scrape_email = False
     correct_output.scrape_social_media  = False
     correct_output.documents = {}
     self.assertEqual(vars(c), vars(correct_output))
Exemplo n.º 8
0
 def test_delay(self):
     cli = "https://www.example.com -T high"
     namespace = self.get_namespace(cli)
     c = generate_crawler_config(namespace)
     correct_output = CrawlerConfig()
     correct_output.default_delay = arachnid_enums.Delay.HIGH.value
Exemplo n.º 9
0
 def test_default(self):
     cli = "https://www.example.com"
     namespace = self.get_namespace(cli)
     c = generate_crawler_config(namespace)
     self.assertEqual(vars(c), vars(CrawlerConfig()))