Пример #1
0
 def setUp(self):
     self.__class__.index1 = Index(10010,
                                   "Index 1",
                                   "www.index1.com",
                                   OccurrenceList(["www.index2.com",
                                                   "www.index3.com"]))
     self.__class__.index2 = Index(10011,
                                   "Index 2",
                                   "www.index2.com",
                                   OccurrenceList(["www.index1.com",
                                                   "www.index3.com"]))
     self.__class__.index3 = Index(10010,
                                   "Index 3",
                                   "www.index3.com",
                                   OccurrenceList(["www.index1.com",
                                                   "www.index2.com"]))
Пример #2
0
    def test_6_add_and_search_multiple_words(self):
        # Arrange
        trie = self.__class__.trie
        words = [
            "bear", "bell", "bid", "bull", "buy", "sell", "stock", "stop",
            "beat", "belly", "bully", "sea", "stoop", "gamma", "game"
        ]

        for i in range(len(words)):
            value = OccurrenceList()
            value.append(
                Index("1000{0}".format(i), "Test {0}".format(i),
                      "www.test{0}.com".format(i), []))
            result = trie.add(words[i], value)
            self.assertTrue(result, "Word {0} is not added".format(words[i]))
        print(trie)

        # Act and Assert
        for i in range(len(words)):
            result = trie.search(words[i])
            self.assertIsNotNone(result,
                                 "Word {0} is not found".format(words[i]))
            self.assertTrue(words[i].endswith(result.key),
                            "Suffix not matching with {0}".format(words[i]))
            self.assertTrue(result.value[0].index.endswith(str(i)),
                            "Value suffix not matching with {0}".format(i))
Пример #3
0
    def test_4_add_with_existing_prefix(self):
        # Arrange
        trie = self.__class__.trie

        # Act
        value = OccurrenceList()
        value.append(Index("10041", "Test 3", "www.test3.com", []))
        result = trie.add("bell", value)

        # Assert
        self.assertTrue(result, "Word is not added")
        print(trie)
Пример #4
0
    def test_3_add_again(self):
        # Arrange
        trie = self.__class__.trie

        # Act
        value = OccurrenceList()
        value.append(Index("10023", "Test 2", "www.test2.com", []))
        result = trie.add("bear", value)

        # Assert
        self.assertTrue(result, "Word is not added")
        print(trie)
Пример #5
0
    def populate_web(self):
        """
        Populates the web and the inverted index keyword
        dictionary with the urls provided
        """
        for url in self.urls:
            page = scrape(url)
            keywords = get_keywords(page.text)
            index = len(self.web)
            self.web.append(Index(index, page.title, page.url, page.links_to))

            for word in keywords:
                value = OccurrenceList()
                value.append(index)
                self.keywords.add(word.lower(), value)
Пример #6
0
    def test_5_add_multiple_words(self):
        # Arrange
        trie = self.__class__.trie
        words = [
            "bear", "bell", "bid", "bull", "buy", "sell", "stock", "stop",
            "beat", "sea", "stoop"
        ]

        # Act and Assert
        for i in range(len(words)):
            value = OccurrenceList()
            value.append(
                Index("1000{0}".format(i), "Test {0}".format(i),
                      "www.test{0}.com".format(i), []))
            result = trie.add(words[i], value)
            self.assertTrue(result, "Word {0} is not added".format(words[i]))

        print(trie)
Пример #7
0
    def test_9_validate_to_json(self):
        # Arrange
        trie = self.__class__.trie
        words = [
            "bear", "bell", "bid", "bull", "buy", "sell", "stock", "stop",
            "beat", "belly", "bully", "sea", "stoop", "gamma", "game"
        ]

        for i in range(len(words)):
            value = OccurrenceList()
            value.append(
                Index("1000{0}".format(i), "Test {0}".format(i),
                      "www.test{0}.com".format(i), []))
            result = trie.add(words[i], value)
            self.assertTrue(result, "Word {0} is not added".format(words[i]))

        # Act
        json_string = trie.to_json()

        # Assert
        self.assertIsNotNone(json_string, "to_json returns None")
        self.assertIs(type(json_string), type(str()),
                      "to_json doesn't return string")