def test_difference2(self):
     hs1 = HashSet(["Jake", "Jeff", "Shane", "Fre"])
     hs2 = HashSet(
         ["Melody", "Erik", "Shane", "Alan", "Bob", "Kaichi", "Jake"])
     difference = hs1.difference(hs2)
     assert difference.size == 7
     assert difference.contains("Jake") == False
     assert difference.contains("Shane") == False
     assert difference.contains("Jeff") == True
     assert difference.contains("Alan") == True
 def test_intersection(self):
     hs1 = HashSet(["Melody", "Erik", "Jake", "Shane"])
     hs2 = HashSet(["Jake", "Alan", "Shane"])
     intersection = hs1.intersection(hs2)
     assert intersection.size == 2
     assert intersection.contains("Jake") == True
     assert intersection.contains("Shane") == True
     assert intersection.contains("Melody") == False
     assert intersection.contains("Erik") == False
     assert intersection.contains("Alan") == False
 def test_difference(self):
     hs1 = HashSet(["Melody", "Erik", "Jake", "Shane"])
     hs2 = HashSet(["Jake", "Alan", "Shane"])
     difference = hs1.difference(hs2)
     assert difference.size == 3
     assert difference.contains("Melody") == True
     assert difference.contains("Erik") == True
     assert difference.contains("Alan") == True
     assert difference.contains("Jake") == False
     assert difference.contains("Shane") == False
     assert difference.contains("Bob") == False
 def test_union(self):
     hs1 = HashSet(["Melody", "Erik", "Jake"])
     hs2 = HashSet(["Jake", "Alan", "Shane"])
     union = hs1.union(hs2)
     assert union.size == 5
     assert union.contains("Melody") == True
     assert union.contains("Erik") == True
     assert union.contains("Jake") == True
     assert union.contains("Alan") == True
     assert union.contains("Shane") == True
     assert union.contains("Bob") == False
 def test_is_subset(self):
     hs1 = HashSet(["Jake", "Jeff", "Shane", "Fre"])
     hs2 = HashSet(["Jeff", "Shane"])
     is_subset = hs1.is_subset(hs2)
     assert is_subset == True
     hs3 = HashSet(["Jeff", "Shane", "Alec"])
     is_subset2 = hs1.is_subset(hs3)
     assert is_subset2 == False
def createConcordance(inputFile, stopWords, outputFile):

    #declare data structures
    stopSet = HashSet(3500)
    dictionary = HashDictionary(10000)
    bst = BST()

    #declare regular expressions
    newLine = re.compile(r'\n')
    exprSpaces = re.compile(r'\s')
    dhyp = re.compile('--')
    notChars = re.compile('\W|-')
    
    #populate hashset with stop words
    stopWords = open(stopWords, 'r')
    for stop in stopWords:
        x = newLine.search(stop)
        stop = stop[:x.start()]
        if stop == "":
            break
        stopSet.add(stop)
    stopWords.close()
    
    #open the input and process into words
    f = open(inputFile, 'r')
    lineNum = 0
    while True:
        line = f.readline()
        lineNum += 1
        if line == "":
            break

        #split lines
        m = dhyp.split(line)
        alist = []
        for i in range(len(m)):
            g = exprSpaces.split(m[i])
            alist = alist + g
            
        #strip down to words
        print alist
        for word in alist:
            if word == None:
                pass
            else:
                word = string.lower(word)
                while True:
                    n = notChars.search(word)
                    if len(word) <= 0:
                        break
                    elif n != None:
                        if n.start() == 0:
                            word = word[n.end():]
                        else:
                            word = word[:n.start()]
                    else:
                        break
                            
                #check if word is stop word
                if not stopSet.contains(word) and len(word) > 0:
                    #if word isn't already in dictionary
                    if dictionary.search(word) == None:
                        linkedValue = LinkedIndexedList()
                        dictionary.store(word, linkedValue)
                        dictionary.search(word).append(lineNum)
                        bst.add(word)
                    #if the word is in the dictionary
                    else:
                        dictionary.search(word).append(lineNum)
    f.close()

    #open output and use BST to print out words
    #   in alphabetical order
    output = open(outputFile, 'w')
    lyst = bst.inorder()
    temp = None
    for item in lyst:
        temp = dictionary.search(item)
        output.write(item + " - " + str(temp)+"\n")
    output.close()
Exemplo n.º 7
0
 def setUp(self):
     self.set = HashSet()
Exemplo n.º 8
0
class HashSetTest(unittest.TestCase):
    def setUp(self):
        self.set = HashSet()

    def test_find(self):
        phone1 = Phone('017-0774-1234-5678', 'Maria')
        phone2 = Phone('112-4567-1122-9740', 'Sasha')
        phone3 = Phone('150-2570-7171-7575', 'Helen')
        self.set.add(phone1)
        self.set.add(phone2)
        self.set.add(phone3)

        self.assertTrue(self.set.find(phone1))
        self.assertTrue(self.set.find(phone2))
        self.assertTrue(self.set.find(phone3))
        self.assertFalse(self.set.find(Phone('123', 'test')))

    def test_remove(self):
        phone1 = Phone('017-0774-1234-5678', 'Maria')
        phone2 = Phone('112-4567-1122-9740', 'Sasha')
        phone3 = Phone('150-2570-7171-7575', 'Helen')
        self.set.add(phone1)
        self.set.add(phone2)
        self.set.add(phone3)

        self.assertEqual('Maria', self.set.remove(phone1).name)
        self.assertEqual('Sasha', self.set.remove(phone2).name)
        self.assertEqual('Helen', self.set.remove(phone3).name)
        self.assertIsNone(self.set.remove(Phone('123', 'test')))
    def test_remove(self):
        hs = HashSet(["Shane", "Alan", "Laurel"])
        assert hs.size == 3
        assert hs.contains("Shane") == True
        assert hs.contains("Alan") == True
        assert hs.contains("Laurel") == True

        hs.remove("Alan")
        assert hs.size == 2
        assert hs.contains("Alan") == False
        assert hs.contains("Shane") == True
        assert hs.contains("Laurel") == True

        hs.remove("Shane")
        assert hs.size == 1
        assert hs.contains("Alan") == False
        assert hs.contains("Shane") == False
        assert hs.contains("Laurel") == True

        hs.remove("Laurel")
        assert hs.size == 0
        assert hs.contains("Alan") == False
        assert hs.contains("Shane") == False
        assert hs.contains("Laurel") == False
 def test_add(self):
     hs = HashSet()
     assert hs.size == 0
     hs.add("Shane")
     assert hs.size == 1
     assert hs.contains("Shane") == True
     assert hs.contains("Alan") == False
     assert hs.contains("Bob") == False
     hs.add("Alan")
     assert hs.size == 2
     assert hs.contains("Shane") == True
     assert hs.contains("Alan") == True
     assert hs.contains("Bob") == False
 def test_init_edge_cases(self):
     hs = HashSet(["", 1, None])
     assert hs.size == 2
     assert hs.contains("") == True
 def test_filled_init(self):
     hs = HashSet(["Shane", "Alan"])
     assert hs.size == 2
     assert hs.contains("Alan") == True
     assert hs.contains("Bob") == False
 def test_empty_init(self):
     hs = HashSet()
     assert hs.size == 0