示例#1
0
    def test_worker_add_links_in_crawled(self):
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
        worker.crawled = []

        len_to_crawl_before = len(worker.to_crawl)
        worker.add_links(["https://www.reddit.com/user/Chrikelnel"])
        len_to_crawl_after = len(worker.to_crawl)

        self.assertEqual(len_to_crawl_after, len_to_crawl_before)




            def test_worker_contact(self):

                contact = handle_worker_contact(self, worker, address):

                self.assertRaises(ConnectionRefusedError, worker.run)

            def send_mother(self):

                worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")

                data = worker.recv(self.buff_size)
                original_target = None
                send_to_mother(self, data, original_target)
    def test_worker_add_links_in_crawled(self):
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
        worker.crawled = []

        len_to_crawl_before = len(worker.to_crawl)
        worker.add_links(["https://www.reddit.com/user/Chrikelnel"])
        len_to_crawl_after = len(worker.to_crawl)

        self.assertEqual(len_to_crawl_after, len_to_crawl_before)
示例#3
0
    def test_worker_duplicate_links(self):
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
        worker.crawled = []
        len_initial = len(worker.to_crawl)

        worker.crawled.append("https://www.reddit.com/user/Chrikelnel")
        worker.add_links(["https://www.reddit.com/user/Chrikelnel"])
        len_after_adding_duplicate = len(worker.to_crawl)

        self.assertEqual(len_after_adding_duplicate, len_initial)
示例#4
0
    def test_worked_cannot_add_already_crawled_links(self):
        """
        adding a link that has already been crawled does not change the to_crawl length
        """
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
        worker.crawled = []

        len_to_crawl_before = len(worker.to_crawl)
        worker.add_links(["https://www.reddit.com/user/Chrikelnel"])
        len_to_crawl_after = len(worker.to_crawl)

        self.assertEqual(len_to_crawl_after, len_to_crawl_before + 1)
    def add_multiple_links(self):
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
        worker.crawled = []

        len_to_crawl_before = len(worker.to_crawl)

        worker.add_links([
            "https://www.reddit.com/user/Chrikelnel", "https://www.google.ca",
            "https://hotmail.com"
        ])
        len_to_crawl_after = len(worker.to_crawl)

        self.assertEqual(len_to_crawl_after, len_to_crawl_before + 2)
    def test_zelan_test_two(self):
        """
        this test is to test if the added link in list
        """
        worker = None
        worker = BasicUserParseWorker("https://www.reddit.com")
        worker.crawled = []

        len_to_crawl_before = len(worker.to_crawl)
        worker.add_links(["https://www.reddit.com/user/Chrikelnel"])

        self.assertIn("https://www.reddit.com/user/Chrikelnel",
                      worker.to_crawl)
    def test_zelan_test_one(self):
        """
        this test is to test if add one to worker correctly
        """
        worker = None
        worker = BasicUserParseWorker("https://www.reddit.com")
        worker.crawled = []
        worker.max_links = 5

        len_to_crawl_before = len(worker.to_crawl)
        worker.add_links(["https://www.reddit.com/user/Chrikelnel"])
        len_to_crawl_after = len(worker.to_crawl)

        self.assertEqual(len_to_crawl_after, len_to_crawl_before + 1)
    def test_zelan_test_four(self):
        """
        this test is to test if the added two links in list, which the two links are not same
        """
        worker = None
        worker = BasicUserParseWorker("https://www.reddit.com")
        worker.crawled = []

        len_to_crawl_before = len(worker.to_crawl)
        worker.add_links([
            "https://www.reddit.com/user/Chrikelnel", "https://www.google.ca"
        ])
        len_to_crawl_after = len(worker.to_crawl)
        self.assertEqual(len_to_crawl_after, len_to_crawl_before + 2)
示例#9
0
    def test_worker_cannot_add_duplicate_links(self):
        """
        calling add_links() with two identical links only adds 1 link
        """
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
        worker.crawled = []

        len_to_crawl_before = len(worker.to_crawl)
        worker.add_links([
            "https://www.reddit.com/user/GallowBoob",
            "https://www.reddit.com/user/GallowBoob"
        ])
        len_to_crawl_after = len(worker.to_crawl)

        self.assertEqual(len_to_crawl_after, len_to_crawl_before + 1)
示例#10
0
    def test_worker_add_links_in_crawled(self):
        """
        calling add_links() with one link on a worker increases the to_crawl length by 1

        This unit test was partially implemented in class but was broken
        It is now fixed
        """
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
        worker.crawled = []

        len_to_crawl_before = len(worker.to_crawl)
        worker.add_links(["https://www.reddit.com/user/GallowBoob"])
        len_to_crawl_after = len(worker.to_crawl)

        self.assertEqual(len_to_crawl_after, len_to_crawl_before + 1)
示例#11
0
    def test_worker_crawl_links(self):
        """
        Purpose: Test if worker.to_crawl and worker.crawled are updated correctly after links are crawled
        Expectation: Once all links are crawled, len_to_crawl should be 0 and len crawled should be equal to number of links
        """
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
        worker.crawled = []
        num_links_to_crawl = len(worker.to_crawl)
        len_crawled_before = len(worker.crawled)

        self.assertRaises(ConnectionRefusedError, worker.run)
        len_to_crawl_after = len(worker.to_crawl)
        len_crawled_after = len(worker.crawled)

        self.assertEqual(len_to_crawl_after, 0)
        self.assertEqual(len_crawled_before + num_links_to_crawl,
                         len_crawled_after)