Пример #1
0
 def test_import_stopwords(self):
     hashtable = import_stopwords("stop_words.txt", HashTableSepchain())
     self.assertEqual(hashtable["unless"], "unless")
     self.assertRaises(KeyError, hashtable.get, "Parth")
     hashtable = import_stopwords("stop_words.txt", HashTableLinear())
     self.assertEqual(hashtable["unless"], "unless")
     self.assertRaises(KeyError, hashtable.get, "Parth")
     hashtable = import_stopwords("stop_words.txt", HashTableQuadratic())
     self.assertEqual(hashtable["unless"], "unless")
     self.assertRaises(KeyError, hashtable.get, "Parth")
Пример #2
0
def main():
    """ Entry point of the program
    User enters "q" to quit program, and "s:{query} to search
    Upon searching, a list of relevant files in descending order is displayed
    """
    # takes a directory name as its command line argument
    dir_name = sys.argv[1]

    # creates stop_word hash table
    stop_table = HashTable()
    stop_table = import_stopwords("stop_words.txt", stop_table)

    # create an instance of SearchEngine by passing the directory name
    search_engine = SearchEngine(dir_name, stop_table)

    # enter an infinite loop
    print("Enter 'q' to exit program")
    print("Enter 's:{query}' to search")
    while True:

        # prompt user for input
        raw_query = str(input("Enter query: "))

        # if input is "q"
        if raw_query == "q":
            break

        elif raw_query[0:2] == "s:":
            search_engine.search(raw_query[2:])
Пример #3
0
 def test_SE(self):
     SE = SearchEngine(
         "docs", import_stopwords("stop_words.txt", HashTableLinear()))
     self.assertEqual(SE.doc_length.num_items, 4)
     self.assertEqual(SE.stopwords,
                      import_stopwords("stop_words.txt", HashTableLinear()))
     self.assertEqual(
         SE.search("Computer Science")[0], Pair("docs\\test.txt", 1.0))
     self.assertEqual(SE.search("ADT")[0][0], "docs\\data_structure.txt")
     self.assertEqual(round(SE.search("ADT")[0][1], 2), 0.01)
     self.assertEqual(
         SE.search("Hash Table")[1][0], "docs\\data_structure.txt")
     self.assertEqual(round(SE.search("Hash Table")[1][1], 2), 0.01)
     list_of_pairs = [
         Pair("P", 5),
         Pair("A", 2),
         Pair("R", 1),
         Pair("T", 4),
         Pair("H", 3)
     ]
     self.assertEqual(SE.rank(list_of_pairs), [
         Pair("P", 5),
         Pair("T", 4),
         Pair("H", 3),
         Pair("A", 2),
         Pair("R", 1)
     ])
     self.assertEqual(
         SE.get_scores(["computer", "science"])[0],
         Pair("docs\\test.txt", 1.0))
     self.assertEqual(SE.get_scores(["every", "nothing", "few"]), [])
     self.assertEqual(round(SE.get_wf(6), 2), 2.79)
     self.assertEqual(SE.get_wf(-6), 0)
     list1 = [
         "Automated information retrieval systems of ",
         "Information retrieval and afterwards say\n"
     ]
     list2 = [
         'automated', 'information', 'retrieval', 'systems', 'information',
         'retrieval'
     ]
     self.assertEqual(SE.parse_words(list1), list2)
     self.assertEqual(
         SE.parse_words(["and afterwards say\n", "much without the"]), [])
     self.assertEqual(SE.read_file("docs\\test.txt"),
                      ["computer science\n"])
Пример #4
0
 def test_linear3(self):
     ht = HashTableLinear()
     stop_words = import_stopwords(stop_words.txt, ht)
     self.assertEqual(stop_words.size(), 305)
     self.assertTrue(0.3 <= stop_words.load_factor() <= 0.4)
     self.assertFalse("collision" in stop_words)
     self.assertTrue("very" in stop_words)
     self.assertFalse("linear" in stop_words)
     self.assertTrue("a" in stop_words)
Пример #5
0
 def test_sepchain5(self):
     ht = HashTableSepchain()
     stop_words = import_stopwords(FILE, ht)
     self.assertEqual(stop_words.size(), 305)
     self.assertTrue(stop_words.load_factor() <= 1.5)
     self.assertFalse("collision" in stop_words)
     self.assertTrue("very" in stop_words)
     self.assertFalse("linear" in stop_words)
     self.assertTrue("a" in stop_words)
Пример #6
0
def entry_point(dir_name):
    ht = HashTableLinear()
    stop_words = import_stopwords('stop_words.txt', ht)
    search = SearchEngine(dir_name, stop_words)
    while True:
        s = input('Input Search: ')
        if s == 'q':
            break
        scores = search.search(s)
        print(scores)
Пример #7
0
def build_stopwords(filename):
    """ Function to build hash table of stop words from a text list
        Args:
            filename (str): path of stop words file
    """

    hash_table = HashTableLinear()
    stop_words = import_stopwords(filename, hash_table)

    return stop_words
Пример #8
0
def main():
    """It  takes a directory name as its command line argument and continuously askes for
       user input on what query terms to search for. It will return the relavent files
       associated with the query terms or inputing q will exit the function and return None
    """
    search_engine = SearchEngine(
        sys.argv[1], import_stopwords("stop_words.txt", HashTableLinear()))
    while True:
        user_input = input(
            "Type 's:' and what you would like to search for or type 'q' to exit: "
        )
        if user_input == "q":
            return
        if "s:" in user_input:
            user_input = user_input[2::].lower().strip()
            print(search_engine.search(user_input))
    def test_whole_functionality(self):
        """ Tests the Separate Chain Hash Table Functionality"""

        filename = 'stop_words.txt'
        hash_table = HashTableLinear()

        hash_table = import_stopwords(filename, hash_table)

        self.assertRaises(KeyError, hash_table.get, 'BubbleGum')
        self.assertTrue('to' in hash_table)

        second_hash = HashTableLinear()
        second_hash.put('three', 'three')
        third_hash = HashTableLinear()
        third_hash.put('three', 'three')
        self.assertEqual(second_hash, third_hash)
        self.assertNotEqual(hash_table, second_hash)
        self.assertNotEqual(hash_table, 5)
        expected = "Hash_val = 0: None\n" \
            "Hash_val = 1: None\n" \
            "Hash_val = 2: None\n" \
            "Hash_val = 3: None\n" \
            "Hash_val = 4: ('three', 'three')\n" \
            "Hash_val = 5: None\n" \
            "Hash_val = 6: None\n" \
            "Hash_val = 7: None\n" \
            "Hash_val = 8: None\n" \
            "Hash_val = 9: None\n" \
            "Hash_val = 10: None\n"

        self.assertEqual(expected, repr(second_hash))

        second_hash['four'] = 'four'
        self.assertEqual(second_hash['four'], 'four')
        second_hash['five'] = 'five'
        self.assertEqual(0, hash_table.get('from'))

        self.assertFalse(second_hash.contains('p'))
        self.assertTrue(second_hash.contains('five'))
        second_hash.remove('five')
        self.assertFalse(second_hash.contains('five'))
        self.assertRaises(KeyError, second_hash.remove, 'p')

        self.assertEqual(1, third_hash.size())

        self.assertEqual(0, third_hash.collisions())
def main():
    # execute unit tests
    directory = input("please enter a directory name\n")
    yeet = True
    while yeet:
        command = input("press q to exit\n"
                        "press s to search\n"
                        "What would you like to do?\n")
        if command == "q":
            break
        elif command == "s":
            search = SearchEngine(
                directory, import_stopwords("stop_words.txt",
                                            HashTableLinear()))
        else:
            print("that is not a valid command\n")
            continue
        new_query = [input("what would you like to search?\n")]
        query_string = search.parse_words(new_query)
        search.search(query_string)
Пример #11
0
def main():
    """The main entry point. Displays instructions and asks user for a
    directory to search in, then continually requests search queries until
    the user chooses to quit.
    """
    print("================== INSTRUCTIONS ==================")
    print("1. Enter the name of a directory to search in.")
    print("2. Enter a search query, or quit. To search,")
    print("   prepend your search query with 's:'. For example,")
    print("   's:Computer Science'. Type 'q:' to quit.")
    stopwords = import_stopwords("stop_words.txt", HashTable())
    directory = input("\nEnter a search directory: ")
    engine = SearchEngine(directory, stopwords)
    running = True
    while running:
        query = input("\nEnter a command: ").lower()
        if query[:2] == 's:':
            results = engine.search(query[2:])
            for result in results:
                print(f"{result[0]}: {result[1]}")
        elif query == 'q:':
            running = False
 def setUp(self):
     self.dir = "docs"
     stopwords = import_stopwords("stop_words.txt", HashTable())
     self.se = SearchEngine(self.dir, stopwords)