def build_hash_table(unique_attributes):
    hash_table = HashTable(np.size(unique_attributes))

    for i in xrange(0, np.size(unique_attributes)):
        hash_table.put(unique_attributes[i], i)

    return hash_table
Exemplo n.º 2
0
class SymbolTable:
    def __init__(self):
        self.__hashTable = HashTable()

    def __str__(self):
        return str(self.__hashTable)

    def add(self, value):
        return self.__hashTable.add(value)

    def get(self, value):
        return self.__hashTable.getID(value)

    def getHashTable(self):
        return self.__hashTable.getHashTable()
Exemplo n.º 3
0
def main():
    #Data Structures
    LinkedList.test()
    QueueArray.test()
    QueueLL.test()
    ResizingArray.test()
    HashTable.test()
    BinarySearchTree.test()
    Heap.test()
    Trie.test()
    GraphTraversal.test()

    #Search / Sort
    BinarySearch.test()
    BubbleSort.test()
Exemplo n.º 4
0
    def test_search(self):
        ht = HashTable()
        ht.insert("frog", 2)
        ht.insert("cow", 12)
        ht.insert("tiger", 1)
        ht.insert("ostrich", 10)

        found_node = ht.search("frog")
        sol_node = HashNode("frog", 2)

        print(ht)
        assert (sol_node == found_node)
 def setUp(self):
     self.empty = HashTable(100)
     self.oneBucket = HashTable(1)
     self.tenBuckets = HashTable(10)
     self.hundredBuckets = HashTable(100)
     
     self.hashes = [[self.oneBucket,'One Bucket'], [self.tenBuckets,'Ten Buckets'], [self.hundredBuckets,'100 Buckets']]
 def test_contains(self):
     ht = HashTable()
     ht.set('I', 1)
     ht.set('V', 5)
     ht.set('X', 10)
     assert ht.contains('I') is True
     assert ht.contains('V') is True
     assert ht.contains('X') is True
     assert ht.contains('A') is False
Exemplo n.º 7
0
Arquivo: main.py Projeto: wxiea/Lab4
def ReadFile(filename):
    table = [None] * 5500000
    with open(filename, encoding="utf8") as file:
        for line in file:
            values = line.split()
            name = values.pop(0)
            table = HashTable.insert(table, name, values)
        return table
Exemplo n.º 8
0
 def __init__(self,folder,maxFileNumber,hashSize,hash=True):
     files = [f for f in listdir(folder) if isfile(join(folder,f))]
     self.hash = hash
     self.words = []
     self.bows = []
     cnt = 0
     self.hashTable = HashTable(hashSize)
     for file in files:
         cnt = cnt + 1
         print("File Num : "+str(cnt)+" Loading : "+folder+"/"+str(file))
         if hash == True:
             tempBoW = self.createBowWithHash(folder,file)
         else:
             tempBoW = self.createBow(folder,file)
         self.bows.append(tempBoW)
         if cnt == maxFileNumber:
             break
Exemplo n.º 9
0
    def test_put(self):
        ht = HashTable(17, 2)
        str_to_put = chr(10)
        cases = [chr(17) for i in range(18)]
        results = [(i * 2 + 10) % 17 for i in range(17)] + [None]

        for i, c in enumerate(cases):
            with self.subTest(case=c):
Exemplo n.º 10
0
def testRemove():
    states = HashTable(100)

    states.add("Texas", "Austin")

    assert states.exists("Texas")

    states.remove("Texas")

    assert not states.exists("Texas")
Exemplo n.º 11
0
class SymbolTable:
    def __init__(self, size):
        self.__hashTable = HashTable(size)

    def add(self, key):
        self.__hashTable.add(key)

    def size(self):
        return self.__hashTable.size()

    def search(self, key):
        return self.__hashTable.search(key)

    '''
    The method adds in the hashtable the given key if it does no exist.
    If it does, it returns what is already there
    Input: @key - a string
    Output: a pair
    '''

    def position(self, key):
        if self.__hashTable.search(key):
            return self.__hashTable.position(key)
        else:
            return self.__hashTable.add(key)

    def __str__(self):
        return str(self.__hashTable)
Exemplo n.º 12
0
def main():
    myMap = HashTable()
    myMap["Mike"] = 3

    print(myMap["Mike"])

    myMap["Mike"] = 76

    print(myMap["Mike"])
Exemplo n.º 13
0
def create_table(file_name):
    try:
        english_words = HashTable(5000)

        # Open file and read first line
        file = open(file_name, "r")
        line = file.readline()

        # Loop will go trough every line in the file
        while line:
            english_words.insert(line.rstrip())
            line = file.readline()

        # Returns Hash Table
        return english_words
    # Catches error when the given file does not exist
    except FileNotFoundError:
        print("File not found. Please try again.")
Exemplo n.º 14
0
    def test_find(self):
        size = 7
        table = HashTable(size, 0)

        assert table.find('abc') is None
        table.put('abc')
        assert table.find('abc') == table.hash_fun('abc')
        assert table.find('lol') is None
Exemplo n.º 15
0
class SymbolTable:
    def __init__(self, size) -> None:
        self.__ht = HashTable(size)
        self.__size = size

    def add(self, key):
        return self.__ht.add(key)

    def contains(self, key):
        return self.__ht.contains(key)

    def remove(self, key):
        self.__ht.remove(key)

    def getPosition(self, key):
        return self.__ht.getPosition(key)

    def __str__(self) -> str:
        return str(self.__ht)
Exemplo n.º 16
0
 def get_indices(self):
     tables = ["article", "keywords", "author", "conference"]
     for table in tables:
         if self.tables[table].primary_key == "":
             continue
         current = self.indices[table] = HashTable()
         size = self.tables[table].record_count
         for hash in range(0, size):
             h = self.input.read(HashTable.hash_size)
             o = int(self.input.read(HashTable.offset_size))
             current.add_item(h, o)
Exemplo n.º 17
0
class Graph:
    def __init__(self, edges=List(), vertices=List()):
        self.edges = List()
        self.vertices = HashTable(hash_fn = lambda x: hash(x.obj), \
            compare_fn = lambda a, b: -1 if a.obj < b.obj else 1 if a.obj > b.obj else 0)
        for v in vertices:
            self.add_vertex(v)
        for edge in edges:
            self.add_edge(edge)

    def add_edge(self, edge):
        self.edges.insert(edge)
        self.get_vertex(edge.v1.obj).add_neighbor(edge)
        self.get_vertex(edge.v2.obj).add_neighbor(edge)

    def add_vertex(self, vertex):
        self.vertices.add(vertex)

    def get_vertex(self, obj):
        return self.vertices.lookup(Vertex(obj))
Exemplo n.º 18
0
def Parser(file_name):
    """
    Parses the xmlbif file and builds the list of variables.
    Also builds the list conditional probability tables.
    Also couple hash tables are built to quickly access things.
    """
    tree = ET.parse(file_name)
    root = tree.getroot()

    n_prime = 67

    Hash_Variables = HashTable(n_prime)
    Hash_CPT = HashTable(n_prime)
    Hash_Nodes = HashTable(n_prime)

    variables = []
    cp_tables = []

    if root.tag != "BIF":
        print "file must be xmlbif"
    else:
        root = root[0]
        if root.tag != "NETWORK":
            print "file must have a NETWORK tag"
        else:
            for i in xrange(1, len(root)):
                if root[i].tag == "VARIABLE":
                    domain = getDomain(root[i])
                    rv = RandomVariable(root[i][0].text, domain)
                    variables.append(rv)
                    Hash_Variables.put(rv.name,rv)
                elif root[i].tag == "DEFINITION":
                    cpt = CPT()
                    for j in xrange(0, len(root[i])):
                        if root[i][j].tag == "FOR":
                            cpt.add_for_variable(Hash_Variables.get(root[i][j].text))
                        elif root[i][j].tag == "GIVEN":
                            cpt.add_given_variable(Hash_Variables.get(root[i][j].text))
                        elif root[i][j].tag == "TABLE":
                            cpt.build_table()
                            split_text = root[i][j].text.split(" ")
                            for t in xrange(0, len(split_text)):
                                try:
                                    p = float(split_text[t])
                                    cpt.add_prob(p)
                                except ValueError:
                                    ad = 3
                    cp_tables.append(cpt)
                    Hash_CPT.put(cpt.name,cpt)

    return variables, cp_tables, Hash_Variables, Hash_CPT, Hash_Nodes
Exemplo n.º 19
0
class Location(PackageHolder):
    # statics
    locations = HashTable()

    @staticmethod
    def all_locations_str():
        output = "Locations\n\n"
        for value in Location.locations.return_entire_table():
            output += f"{str(value)}\n"
        return output

    @staticmethod
    def get_all_locations():
        return Location.locations.return_entire_table()

    @staticmethod
    def max_location_number():
        Location.locations.index.inorder()
        Location.locations.index.ordered_values.clear()
        ordered_values = []
        for node in Location.locations.index.ordered_nodes:
            ordered_values.append(node.value)
        return max(ordered_values)

    # Overrides
    def __init__(self,
                 number,
                 address,
                 city="",
                 state="",
                 zip_code=99999,
                 distance_list=None):
        super().__init__()
        self.number = number
        self.address = address
        self.city = city
        self.state = state
        self.zip_code = zip_code
        self.distance_list = distance_list
        self.packages_going_here = []
        self.cluster = 0
        Location.locations.insert(self, number)

    def __str__(self):
        output = f"Location ID: {self.number}\nAddress: {self.address}\nCity: {self.city}\n" \
                 f"State: {self.state}\nZip: {self.zip_code}\n{super().__str__()}"
        return output

    def get_path_to(self, to_location):
        return self.distance_list[to_location][0]

    def get_distance_to(self, to_location):
        return self.distance_list[to_location][1]
Exemplo n.º 20
0
    def test_remove1(self):
        m = 11
        h = HashTable(m=m)

        for i in range(m):
            h.insert(i, i + 1)

        h.remove(5)

        assert h.get(5) == None
Exemplo n.º 21
0
def test_successful_hash_add_and_delete():
    ht = HashTable()
    hash = ht.h(5)
    ht.CHAINED_HASH_INSERT(5)
    x = ht.CHAINED_HASH_SEARCH(5)
    ht.CHAINED_HASH_DELETE(x)
    assert ht.T[hash].head == None
Exemplo n.º 22
0
 def test_delete(self):
     ht = HashTable(12)
     reassignment = [None,None,None,None,HashNode('test', 5),None,HashNode('abc', 3),None,HashNode('brain', 1),None,HashNode('bean', 4),
                     None,None,None,None,None,None,None,HashNode('five', 5),None,None,None,None,None]
     solution1 = [None, None, None, None, HashNode('test', 3), None,
                  HashNode('abc', 1), HashNode('five', 5), None, None, HashNode('bean', 4), None]
     solution = [None, None, None, None, HashNode(None, None), None,
                 HashNode(None, None), HashNode(None, None), None, None, HashNode(None, None), None]
     rehashed_solution = [None, None, None, None, None, None, None, None, None, None, None, None]
     ht.insert("abc", 1)
     ht.insert("test", 3)
     ht.insert("bean", 4)
     ht.insert("five", 5)
     print(ht.table[0].is_available)
Exemplo n.º 23
0
def shortest_path(graph, start, end):
    queue = Heap(compare_fn=lambda p1, p2: 1 if p1.get_cost() < p2.get_cost()
                 else -1 if p1.get_cost() > p2.get_cost() else 0)
    checked = HashTable()
    checked.add(start)
    for edge in graph.get_vertex(start).neighbors:
        path = Path()
        endpt = edge.v1.obj
        if edge.v1.obj == start:
            endpt = edge.v2.obj
        path.add_edge(edge, endpt)
        queue.insert(path)

    while len(queue) > 0:
        path = queue.pop()
        first = path.end
        if first not in checked:
            checked.add(first)
            for edge in graph.get_vertex(first).neighbors:
                path2 = Path(path)
                other = edge.v1.obj
                if edge.v1.obj == first:
                    other = edge.v2.obj
                path2.add_edge(edge, other)
                if other == end:
                    return path2
                else:
                    queue.insert(path2)
    return None
Exemplo n.º 24
0
class PriorityQueue:
    def __init__(self):
        # self.current_size = 0
        # self.queue = LinkedList()
        self.queue = []
        self.dictionary = HashTable()
        heapq.heapify(self.queue)
        self.current_size = 0

    def isEmpty(self):
        if self.current_size == 0:
            return True
        return False

    def enqueue(self, data):
        # self.queue.add(data)
        # heapq.heapify(self.queue)
        score, item = data
        score *= -1  # invert to make a max heap
        heapq.heappush(self.queue, score)
        self.dictionary.put(score, item)
        self.current_size += 1

    def dequeue(self):
        if self.current_size == 0:
            raise IndexError("Queue is empty")
        # front = self.queue[0]
        # self.queue.delete(0)
        top = heapq.heappop(self.queue)
        item = self.dictionary.get(top)
        self.current_size -= 1
        return item

    def peek(self):
        front = self.queue[0]
        return front

    def size(self):
        return self.current_size
Exemplo n.º 25
0
    def test_find_value_in_hash_table(self):
        missing_value = 'goodbye world'

        hash_table = HashTable(self.hash_size, self.hash_step)
        hash_table.put(self.hash_value)

        self.assertEqual(14, hash_table.find(self.hash_value))
        self.assertIsNone(hash_table.find(missing_value))
Exemplo n.º 26
0
def create_blog_post(user_id):
    data = request.get_json()
    user = User.query.filter_by(id=user_id).first()

    if not user:
        return jsonify({"message": "user does not exist!"}), 400

    ht = HashTable(10)
    ht.AddKeyVal("title", data["title"])
    ht.AddKeyVal("body", data["body"])
    ht.AddKeyVal("date", now)
    ht.AddKeyVal("user_id", user_id)

    newBPost = BlogPost(title=ht.GetVal("title"),
                        body=ht.GetVal("body"),
                        date=ht.GetVal("date"),
                        user_id=ht.GetVal("user_id"))

    return jsonify({"message": "post created"}), 200
Exemplo n.º 27
0
    def average(self, attribute=""):
        if hasattr(Candidato(), attribute):
            groups = HashTable()

            for candidate in self.__candidatos:
                group = getattr(candidate, attribute)
                total = candidate.total_declarado

                if group not in groups: groups[group] = DoubleChainList([total])
                else: groups[group].append(total)
            
            return {k: sum(v)/len(v) for k, v in a.items()}
        else:
            raise ValueError("Invalide attribute in Candidato()")
Exemplo n.º 28
0
class Relation:
    def __init__(self, id, cardinal, relation_table_schema):
        self.relation_table_schema = relation_table_schema
        self.id = id
        self.cardinal = cardinal
        self.hash_table = HashTable(
            self.relation_table_schema['rows_identifiers'],
            self.relation_table_schema['columns_identifiers'])

    def populate_table(self, data):
        self.hash_table.populate(data)

    def __getitem__(self, item):
        row, col = item
        return self.hash_table.table[row][col]

    def __setitem__(self, key, value):
        row, col = key
        self.hash_table.table[row][col] = value

    def __str__(self):
        return str(self.id + '\n' + self.hash_table.__str__() + '\n' +
                   'Cardinality: ' + str(self.cardinal))
Exemplo n.º 29
0
    def test_double_hashing(self):
        ht = HashTable()

        index = ht.double_hashing("abc", True)
        assert (index == 0)

        index = ht.double_hashing("def", True)
        assert (index == 3)

        index = ht.double_hashing("dog", True)
        assert (index == 1)


        test = ht.double_hashing("a", True)
        test = ht.double_hashing("def", True)
        test = ht.double_hashing("bb", True)
Exemplo n.º 30
0
    def analyze(filename):
        fileHandler = open(filename, "r")
        fileContent = fileHandler.readlines()
        PIF = []
        ST = HashTable()
        errors = ""
        for i in range(len(fileContent)):
            line = re.split("(" + lr.separators + ")", fileContent[i])
            for token in line:
                token = token.strip("\n")
                if token == '':
                    continue
                if re.match(lr.keywords, token) or re.match(
                        lr.separators, token) or re.match(lr.operators, token):
                    PIF.append([CodificationTable[token], -1])
                elif re.match(lr.identifier, token):
                    pos = ST.add(token)
                    PIF.append([CodificationTable['identifier'], pos])
                elif re.match(lr.integer, token):
                    token = int(token)
                    pos = ST.add(token)
                    PIF.append([CodificationTable['constant'], pos])
                elif re.match(lr.string, token) or re.match(
                        lr.ArrayList, token):
                    pos = ST.add(token)
                    PIF.append([CodificationTable['constant'], pos])
                else:
                    errors += "Lexical error on line " + str(i) + ":\n"
                    error = re.split("(" + token + ")", fileContent[i])
                    errors += error[0] + error[1] + "\n"
                    errors += " " * (len(error[0]) + len(error[1])) + "^"
                    return errors, ST, PIF

        if PIF[0][0] != 32 or PIF[-1][0] != 33:
            errors += "Error:\nCode should be between the ~Start and ~End tokens"

        return errors, ST, PIF
Exemplo n.º 31
0
def import_packages():
    """
    A function to import csv data into the program

    Takes a set file that contains the package data and reads it into the program
    then it loads it into the hashtable and generates Package objects.
    18N+6
    Time complexity of O(N)
    """
    with open((pathlib.Path.cwd() / "src/data/PackageFile.csv")) as csvfile:
        readCSV = csv.reader(csvfile, delimiter=',')

        imported_data = list(readCSV)  # import the package data
        num_of_package_data_points = 7

        # data points in each package times the number of packages in the data
        # so that there is limited collisions
        package_space = len(imported_data) * num_of_package_data_points

        DataStorage.packages = HashTable(package_space)
        num_of_packages = 0

        # Read the data into the package objects
        for row in imported_data:
            package_id = row[0]
            address = row[1]
            city = row[2]
            state = row[3]
            zip_code = row[4]
            delivery_deadline = row[5]
            mass_kilo = row[6]
            special_notes = row[7]

            # Create a new package
            package = Package(package_id, address, city, state, zip_code,
                              delivery_deadline, mass_kilo, special_notes)
            # Insert package into the hashtable
            DataStorage.packages.insert(package.id, package)
            DataStorage.packages.insert(package.address, package)
            DataStorage.packages.insert(package.city, package)
            DataStorage.packages.insert(package.state, package)
            DataStorage.packages.insert(package.zip, package)
            DataStorage.packages.insert(package.delivery_deadline, package)
            DataStorage.packages.insert(package.mass_kilo, package)
            DataStorage.packages.insert(package.delivery_status, package)
            # track number of packages created
            num_of_packages = num_of_packages + 1

        DataStorage.number_of_packages_in_data = num_of_packages
Exemplo n.º 32
0
def main():
    m = 5
    n = 10
    ht = HashTable(m, n)
    print(ht.toString() + "\n")
    print("INSERT\n----")
    for key in range(0, 34):
        ht.insert(key)
    print("\nAFTER INSERT\n----")
    print(ht.toString())
    print("\nSEARCH")
    res = ht.search(7)
    print(res)
def is_formation_possible(lst, word):
    # Write your code here
    ht = HashTable()
    for w_ in lst:
        ht.insert(w_, True)

    for i in range(len(word)):
        if ht.search(word[0:i + 1]) and ht.search(word[i + 1:]):
            return True

    return False
Exemplo n.º 34
0
def testHashTable():
    ht = HashTable()
    while True:
        print ("choose Operation: \n",
               "1 - Set Item\n",
               "2 - Get Item\n",
               "3 - GetKeys\n",
               "4 - Exit")
        choice = input("Enter choice: ")
        if choice == '1':
            ht = setItem(ht)
            ht.printHashTable()
        elif choice == '2':
            getItem(ht)
            ht.printHashTable()
        elif choice == '3':
            ht.showSlots()
        elif choice == '4':
            break
        else:
            print ("Bad Choice - Choose Valid Operation")
            continue
Exemplo n.º 35
0
	def nextActions(self, removeDups):
		self.newActions = []
		for act in self.story.actions:
			self.validActions(act, removeDups)
		if(removeDups):
			ht = HashTable()
			removeEle = []
			for act in self.newActions:
				if ht.isPresent(act):
					removeEle.append(act)
					continue
				if(len(act['values']) != len(set(act['values']))):
					removeEle.append(act)
					continue
				ht.add(act)
			for delAct in removeEle:
				self.newActions.remove(delAct)
			ht.destroy()
		return self.newActions
#100,000 Buckets
#LOW:    1 entries
#HIGH:   12 entries
#TOTAL: 147644 entries
#Load Factor:   1.47644 entries per bucket
#Time to ADD ALL: 3.84 seconds

import time
from HashTable import HashTable

fileIn = open('../Lorem_ipsum.txt','r').read()
words = fileIn.split(' ')

start1 = time.clock()
print("HashTable(100) start time:%f"%start1)
ht = HashTable(100)

wordCount = 0
for word in words:
    ht.add(str(wordCount)+word, wordCount)
    wordCount += 1
add1 = time.clock()
print("HashTable(100) add time:%f\n"%(add1-start1))
    
print(ht.printDistribution())

print('\n')

start2 = time.clock()
print("HashTable(1000) start time:%f"%start2)
ht2 = HashTable(1000)
Exemplo n.º 37
0
 def __init__(self):
     self.sentences = [] # will be a list of sentences or clauses
     self.SymbolTable = HashTable(13) # will be a HashTable that relates strings to Symbols
class TestHashTable(unittest.TestCase):
    
    def setUp(self):
        self.empty = HashTable(100)
        self.oneBucket = HashTable(1)
        self.tenBuckets = HashTable(10)
        self.hundredBuckets = HashTable(100)
        
        self.hashes = [[self.oneBucket,'One Bucket'], [self.tenBuckets,'Ten Buckets'], [self.hundredBuckets,'100 Buckets']]
    
    def testEmpty(self):
        self.assertEqual(self.empty.__str__(),"{}")
        self.assertFalse(self.empty.delete('DeleteMe'))
        self.assertFalse(self.empty.lookUp('FindMe'))
        self.assertFalse(self.empty.updateValue('UpdateMe', 'ToThis'))
        
        self.assertTrue(self.empty.add('AddMe',2))
        self.assertEqual(self.empty.__str__(),"{'AddMe': 2}")
        
        print('\ntestEmpty PASSED')
    
    def testAdd(self):
        for ht,name in self.hashes:
            self.assertTrue(ht.add('AddMe',2))
            self.assertEqual(ht.__str__(),"{'AddMe': 2}")
            
            self.assertTrue(ht.add('AddMe',3))
            self.assertEqual(ht.__str__(),"{'AddMe': 3}")
            
            self.assertTrue(ht.add('2ndItem',4))
            expResult1 = "\{'AddMe': 3, '2ndItem': 4\}"
            expResult2= "\{'2ndItem': 4, 'AddMe': 3\}"
            self.assertRegex(ht.__str__(), ('%s|%s' %(expResult1,expResult2)))
        
            print('\ntestAdd on %s PASSED'%name)
    
    def testDelete(self):
        for ht,name in self.hashes:
            self.assertTrue(ht.add('AddMe',2))
            self.assertEqual(ht.__str__(),"{'AddMe': 2}")
            
            self.assertTrue(ht.add('AddMe',3))
            self.assertEqual(ht.__str__(),"{'AddMe': 3}")
            
            self.assertTrue(ht.add('2ndItem',4))
            expResult1 = "\{'AddMe': 3, '2ndItem': 4\}"
            expResult2= "\{'2ndItem': 4, 'AddMe': 3\}"
            self.assertRegex(ht.__str__(), ('%s|%s' %(expResult1,expResult2)))
            
            self.assertTrue(ht.delete('AddMe'))
            self.assertEqual(ht.__str__(),"{'2ndItem': 4}")
            
            self.assertFalse(ht.delete('AddMe'))
            self.assertTrue(ht.delete('2ndItem'))
            
            self.assertEqual(ht.__str__(),"{}")
        
            print('\ntestDelete on %s PASSED'%name)
    
    def testLookUp(self):
        for ht,name in self.hashes:
            self.assertTrue(ht.add('AddMe',3))
            self.assertEqual(ht.__str__(),"{'AddMe': 3}")
            
            self.assertTrue(ht.add('2ndItem',4))
            expResult1 = "\{'AddMe': 3, '2ndItem': 4\}"
            expResult2= "\{'2ndItem': 4, 'AddMe': 3\}"
            self.assertRegex(ht.__str__(), ('%s|%s' %(expResult1,expResult2)))
            
            self.assertEqual(ht.lookUp('AddMe'), 3)
            self.assertEqual(ht.lookUp('2ndItem'), 4)
            
            self.assertFalse(ht.lookUp('missing'))
        
            print('\ntestLookUp on %s PASSED'%name)
    
    def testUpdate(self):
        for ht,name in self.hashes:
            self.assertTrue(ht.add('AddMe',3))
            self.assertEqual(ht.__str__(),"{'AddMe': 3}")
            
            self.assertTrue(ht.add('2ndItem',4))
            expResult1 = "\{'AddMe': 3, '2ndItem': 4\}"
            expResult2= "\{'2ndItem': 4, 'AddMe': 3\}"
            self.assertRegex(ht.__str__(), ('%s|%s' %(expResult1,expResult2)))
            
            self.assertTrue(ht.updateValue('AddMe', 4))
            expResult1 = "\{'AddMe': 4, '2ndItem': 4\}"
            expResult2= "\{'2ndItem': 4, 'AddMe': 4\}"
            self.assertRegex(ht.__str__(), ('%s|%s' %(expResult1,expResult2)))
            
            self.assertFalse(ht.updateValue('missing',4))
            
            print('\ntestUpdate on %s PASSED'%name)

    def testHashing(self):
        key = 'test'
        hashing = (ord('t')-32) + (ord('e')-32)*95 + (ord('s')-32)*95*95 + (ord('t')-32)*95*95*95
        self.assertEqual(hashing, 72775214)
        self.assertEqual(hashing%1, 0)
        self.assertEqual(hashing%10, 4)
        self.assertEqual(hashing%100, 14)
        
        key2 = '~ '
        hashing2 = (ord('~')-32) + (ord(' ')-32)*95
        self.assertEqual(hashing2, 94)
        for ht,name in self.hashes:
            self.assertEqual(ht.hash(''), None)
            self.assertEqual(ht.hash(key), hashing%ht.numBuckets)
            self.assertEqual(ht.hash(key2), hashing2%ht.numBuckets)
            
            print('\ntestHashing on %s PASSED'%name)
            
    def testKeyType(self):
        for ht,name in self.hashes:
            self.assertTrue(ht.add('11','2'))
            self.assertEqual(ht.__str__(),"{'11': '2'}")
            
            self.assertTrue(ht.add(11,3))
            expResult1 = "\{'11': '2', 11: 3\}"
            expResult2= "\{11: 3, '11': '2'\}"
            self.assertRegex(ht.__str__(), ('%s|%s' %(expResult1,expResult2)))
            
            self.assertEqual(ht.lookUp('11'),'2')
            self.assertEqual(ht.lookUp(11),3)
            
            self.assertTrue(ht.updateValue('11',2))
            expResult1 = "\{'11': 2, 11: 3\}"
            expResult2= "\{11: 3, '11': 2\}"
            self.assertRegex(ht.__str__(), ('%s|%s' %(expResult1,expResult2)))
            self.assertEqual(ht.lookUp('11'),2)
            
            self.assertTrue(ht.updateValue(11,'3'))
            expResult1 = "\{'11': 2, 11: '3'\}"
            expResult2= "\{11: '3', '11': 2\}"
            self.assertRegex(ht.__str__(), ('%s|%s' %(expResult1,expResult2)))
            self.assertEqual(ht.lookUp(11),'3')
            
            self.assertTrue(ht.delete(11))
            self.assertEqual(ht.__str__(),"{'11': 2}")
            
            self.assertFalse(ht.delete(11))
            
            self.assertTrue(ht.delete('11'))
            self.assertEqual(ht.__str__(),"{}")
            print('\ntestKeyType on %s PASSED'%name)
Exemplo n.º 39
0
class Lexer:

    def __init__(self, token_definition, text):
        self.token_definition = token_definition
        self.symbol_table = HashTable()
        self.program_structure = []

        self.identificator_fsm = FSM(list('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789'))

        self.identificator_fsm.addState(State('WithoutDigits'))
        self.identificator_fsm.addState(State('WithDigits'))

        self.identificator_fsm.addTransition(r'[a-zA-Z_]',
            self.identificator_fsm.findState('Root'), self.identificator_fsm.findState('WithoutDigits'))
        self.identificator_fsm.addTransition(r'[a-zA-Z0-9_]',
            self.identificator_fsm.findState('WithoutDigits'), self.identificator_fsm.findState('WithDigits'))
        self.identificator_fsm.addTransition(r'[a-zA-Z0-9_]',
            self.identificator_fsm.findState('WithDigits'), self.identificator_fsm.findState('WithDigits'))

        self.constanta_integer_fsm = FSM(list('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789'))

        self.constanta_integer_fsm.addState(State('Digit'))
        self.constanta_integer_fsm.addTransition(r'[0-9]',
            self.constanta_integer_fsm.findState('Root'), self.constanta_integer_fsm.findState('Digit'))
        self.constanta_integer_fsm.addTransition(r'[0-9]',
            self.constanta_integer_fsm.findState('Digit'), self.constanta_integer_fsm.findState('Digit'))

        self.constanta_string_fsm = FSM(list('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789\'\"'))
        self.constanta_string_fsm.addState(State('LeftQuote', isfinal=False))
        self.constanta_string_fsm.addState(State('Character', isfinal=False))
        self.constanta_string_fsm.addState(State('RightQuote'))
        self.constanta_string_fsm.addTransition(r'\"',
            self.constanta_string_fsm.findState('Root'), self.constanta_string_fsm.findState('LeftQuote'))
        self.constanta_string_fsm.addTransition(r'[a-zA-Z0-9_]',
            self.constanta_string_fsm.findState('LeftQuote'), self.constanta_string_fsm.findState('Character'))
        self.constanta_string_fsm.addTransition(r'[a-zA-Z0-9_]',
            self.constanta_string_fsm.findState('Character'), self.constanta_string_fsm.findState('Character'))
        self.constanta_string_fsm.addTransition(r'\"',
            self.constanta_string_fsm.findState('Character'), self.constanta_string_fsm.findState('RightQuote'))
        self.constanta_string_fsm.addTransition(r'\"',
            self.constanta_string_fsm.findState('LeftQuote'), self.constanta_string_fsm.findState('RightQuote'))

        self.constanta_char_fsm = FSM(list('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789\'\"'))
        self.constanta_char_fsm.addState(State('LeftQuote', isfinal=False))
        self.constanta_char_fsm.addState(State('Character', isfinal=False))
        self.constanta_char_fsm.addState(State('RightQuote'))
        self.constanta_char_fsm.addTransition(r'\"',
            self.constanta_char_fsm.findState('Root'), self.constanta_char_fsm.findState('LeftQuote'))
        self.constanta_char_fsm.addTransition(r'[a-zA-Z0-9_]',
            self.constanta_char_fsm.findState('LeftQuote'), self.constanta_char_fsm.findState('Character'))
        self.constanta_char_fsm.addTransition(r'\"',
            self.constanta_char_fsm.findState('Character'), self.constanta_char_fsm.findState('RightQuote'))

        self.constanta_double_fsm = FSM(list('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789'))
        self.constanta_double_fsm.addState(State('Dot', isfinal=False))
        self.constanta_double_fsm.addState(State('Digit'))
        self.constanta_double_fsm.addTransition(r'[0-9]',
            self.constanta_double_fsm.findState('Root'), self.constanta_double_fsm.findState('Digit'))
        self.constanta_double_fsm.addTransition(r'[0-9]',
            self.constanta_double_fsm.findState('Digit'), self.constanta_double_fsm.findState('Digit'))
        self.constanta_double_fsm.addTransition(r'.',
            self.constanta_double_fsm.findState('Digit'), self.constanta_double_fsm.findState('Dot'))

        self.parse(text)


    def isident(self, char):
        return  char.isalnum() or (char == '_')

    def get_token(self, text, pos):
        if (pos >= len(text)):
            return None, None

        matches = []
        for key in self.token_definition:
            m,k = re.match(self.token_definition[key][0], text[pos:]), key
            if m is not None:
                matches += [(m.group(), k)]

        prefix = self.identificator_fsm.longestPrefix(text[pos:])
        if (len(prefix) > 0):
            matches += [(prefix, 'IDENTIFICATOR')]

        prefix = self.constanta_integer_fsm.longestPrefix(text[pos:])
        if (len(prefix) > 0):
            matches += [(prefix, 'NUMBER')]

        prefix = self.constanta_double_fsm.longestPrefix(text[pos:])
        if (len(prefix) > 0):
            matches += [(prefix, 'REAL_NUMBER')]

        prefix = self.constanta_string_fsm.longestPrefix(text[pos:])
        if (len(prefix) > 0):
            matches += [(prefix, 'STRING_CONST')]

        prefix = self.constanta_char_fsm.longestPrefix(text[pos:])
        if (len(prefix) > 0):
            matches += [(prefix, 'CHAR_CONST')]

        if len(matches) == 0:
            return None, None

        if len(matches) == 1:
            return matches[0]

        result = None, None
        for match in matches:
            if match[1] in keywords:
                return match
            if result[0] is None or (len(result[0]) < len(match[0])):
                result = match

        return result


    def parse(self, text):
        result = []
        seq = text
        pos = 0
        token, token_type = self.get_token(seq, pos)

        if (token == None and pos < len(seq)):
            raise Exception('Unrecognized token ' + seq[pos:])

        while pos < len(seq):
            result += [(token, token_type)]

            if (token_type in constants) or (token_type == 'IDENTIFICATOR'):
                self.add_to_symbol_table(token)
                self.add_to_fip(token_type, self.symbol_table.get_value(token))
            elif (token_type != 'SEPARATOR'):
                self.add_to_fip(token_type, 0)

            pos += len(token)
            token, token_type = self.get_token(seq, pos)

            if (token == None and pos < len(seq)):
                raise Exception('Unrecognized token ' + seq[pos:])

        self.parse_result = result

    def add_to_symbol_table(self, token):
        if not (self.symbol_table.contains_key(token)):
            value = self.symbol_table.num + 1
            #self.symbol_table[token] = value
            self.symbol_table.add_key(token, value)

    def add_to_fip(self, token_type, value):
        self.program_structure += [(token_definition[token_type][1], value)]


    #def parse_without_regexp(self, text):
    #    result = []
    #    i = 0
    #    while i < len(text):
    #        if (text[i].isalpha() or text[i] == '_'):
    #            ident = ""
    #            while (i < len(text) and self.isident(text[i])):
    #                ident += text[i]
    #                i += 1
    #            token, token_type = self.get_token(ident, 0)
    #            result += [(token, token_type)]
    #            if (token_type == 'IDENTIFICATOR'):
    #                self.add_to_symbol_table(token)
    #                self.add_to_fip(token_type, self.symbol_table.get_value(token))
    #            else:
    #                self.add_to_fip(token_type, 0)
    #            continue
    #
    #        if (text[i].isdigit()):
    #            num = ""
    #            while (i < len(text) and text[i].isdigit()):
    #                num += text[i]
    #                i += 1
    #
    #            if (i < len(text) and text[i] == '.'):
    #                num += text[i]
    #                i += 1
    #                while (i < len(text) and text[i].isdigit()):
    #                    num += text[i]
    #                    i += 1
    #
    #            token, token_type = self.get_token(num, 0)
    #            result += [(token, token_type)]
    #            if (token_type in constants):
    #                self.add_to_symbol_table(token)
    #                self.add_to_fip(token_type, self.symbol_table.get_value(token))
    #            continue
    #
    #        if (text[i] in characters):
    #            token, token_type = self.get_token(text[i], 0)
    #            result += [(token, token_type)]
    #            i += 1
    #            self.add_to_fip(token_type, 0)
    #            continue
    #
    #        if (text[i] in ambiguous_characters):
    #            ch = text[i]
    #            i += 1
    #            if (i < len(text) and text[i] == '='):
    #                ch += text[i]
    #                i += 1
    #            else:
    #                if ch == '!':
    #                    raise Exception('Unkown token near ' + ch)
    #                i += 1
    #            token, token_type = self.get_token(ch, 0)
    #            self.add_to_fip(token_type, 0)
    #            result += [(token, token_type)]
    #            continue
    #
    #        if (text[i] in separators):
    #            token, token_type = self.get_token(text[i], 0)
    #            result += [(token, token_type)]
    #            i += 1
    #            continue
    #
    #        if (text[i] == "'"):
    #            ch = "'"
    #            i += 1
    #            if (i < len(i) and self.isadmissiblechar(text[i])):
    #                ch += text[i]
    #                i += 1
    #            else:
    #                i += 1
    #
    #            if (i < len(i) and text[i] == "'"):
    #                ch += "'"
    #                i += 1
    #            else:
    #                raise Exception('Unrecognized token at ' + text[i])
    #
    #            token, token_type = self.get_token(ch, 0)
    #            result += [(token, token_type)]
    #            self.add_to_symbol_table(token)
    #            self.add_to_fip(token_type, self.symbol_table.get_value(token))
    #            continue
    #
    #        if (text[i] == '"'):
    #            ch = '"'
    #            i += 1
    #            while (i < len(text) and self.isadmissiblechar(text[i])):
    #                ch += text[i]
    #                i += 1
    #            if (i < len(text) and text[i] == '"'):
    #                ch += text[i]
    #                i += 1
    #            token, token_type = self.get_token(ch, 0)
    #            result += [(token, token_type)]
    #            self.add_to_symbol_table(token)
    #            self.add_to_fip(token_type, self.symbol_table.get_value(token))
    #            continue
    #
    #        raise Exception('Unrecognized token at ' + text[i])
    #
    #    self.parse_result = result

    def isadmissiblechar(self, ch):
        return ch.isalpha() or ch.isdigit() or ch == ' ' or ch == '_'
Exemplo n.º 40
0
class KnowledgeBase(object):
    """
    This represents the Knowledge Base object.
    It contains a python list of sentences.
    A sentence is a particular object created for this project.
    It contains a Hash Table that relates a symbol's alphabetical name to a randomly chosen id.
    It also contains a python list of models, where each element is a particular model and includes all 2^n possible models.
    A model is an object created for this project that represents one possible set of values for the symbols.
    """
    def __init__(self):
        self.sentences = [] # will be a list of sentences or clauses
        self.SymbolTable = HashTable(13) # will be a HashTable that relates strings to Symbols

    def delete_last_sentence(self):
        self.sentences.pop()
    
    def build_models(self):
        """
        This will build every possible model where there are 2^n number of models
        """
        scount = self.SymbolTable.key_count
        symbol_int_list = self.SymbolTable.list_of_ints
        self.ModelTable = ModelTable(scount, symbol_int_list)
        
        self.model_list = [None]*(2**scount)
        for i in xrange(0, 2**scount):
            self.model_list[i] = Model(self.ModelTable.Table[i])


    def intern(self,name):
        """
        This places a specific symbol into a HashTable that makes it easier to verify equality.
        """

        k = self.SymbolTable.get(name)
        if k == -1:
            self.SymbolTable.put(name)
        return Symbol(name)


    def add(self, sentence):
        """
        This will add a sentence to the knowledge base.
        A more complicated sentence like a implication is added also using this.
        """
        self.sentences.append(sentence)

    def find_KB_models(self):
        """
        This will return a list of the indexes of every model that satisfies the knowledge base
        """
        list_of_verified_models = []
        for i in xrange(0, np.size(self.model_list)):
            check = True
            for j in xrange(0, len(self.sentences)):
                if check:
                    check = check and self.sentences[j].isSatisfiedBy(self.model_list[i].model, self.SymbolTable)
            if check:
                list_of_verified_models.append(i)
        return list_of_verified_models

    def verify_alpha(self, sentence_verify):
        """
        This will return a list of the indexes of every model that satsifies the alpha sentence.
        """
        list_of_verified_models = []
        for i in xrange(0, np.size(self.model_list)):
            if sentence_verify.isSatisfiedBy(self.model_list[i].model, self.SymbolTable):
                list_of_verified_models.append(i)
        
        return list_of_verified_models

    def walk_SAT(self, p, max_flips):
        """
        This is uses the WalkSAT method to find a model that satisfies all the conditions.
        If no models satsify all the conditions, then it returns false.
        This first randomly chooses a model. Then checks if it satisfies all the clauses.
        If it does not, then it selecs a random clause that is not satsfied.
        Then with a probability of p it selects a random symbol in that clause and flips it.
        If the probability p is not randomly satisfied, then it uses flips whichever symbol
        maximizes the number of satisfied clauses.

        It loops through 10,000 times, and if no model satisfies everything it assumes that,
        entailment is true. If it does find a model, then entailment is false.
        """

        k = np.random.randint(0, len(self.model_list))

        # randomly chosen model to start
        model = self.model_list[k].model

        for i in xrange(0, max_flips):
            check = True
            list_of_unsatisfied_clauses = []
            for j in xrange(0, len(self.sentences)):
                clause_value = self.sentences[j].isSatisfiedBy(model, self.SymbolTable)
                check = check and clause_value
                if not(clause_value):
                    list_of_unsatisfied_clauses.append(j)

            if check: # a model satisfies the sentence
                print "a model satisfies the sentences"
                print model
                return True
            else:
                k = np.random.randint(0, len(list_of_unsatisfied_clauses))
                index_of_sentence = list_of_unsatisfied_clauses[k]

                list_of_symbols = self.get_all_symbols_from_sentence(self.sentences[index_of_sentence])
                list_of_symbol_ids = self.get_symbol_ids(list_of_symbols)

                if np.random.rand() < p:
                    # randomly choose a symbol in the selected clause
                    k = np.random.randint(0, len(list_of_symbols))
                    id = list_of_symbol_ids[k]
                    # flip sign of selected symbol
                    where = np.where(model == id)[0][0]
                    model[where][1] = (model[where][1] + 1) % 2
                else:
                    id_of_symbol_chosen = self.get_maximize_satisfied_clauses(list_of_symbol_ids, model)

        print "No model was found"
        return False

    def test(self):
        print self.walk_SAT(.5, 1000)

    def get_maximize_satisfied_clauses(self, list_of_symbol_ids, model):
        number_of_satisfied_clauses = np.zeros(len(list_of_symbol_ids), dtype=np.int16)

        for i in xrange(0, len(list_of_symbol_ids)):
            temp_model = np.copy(model)
            self.flip_sign(temp_model,list_of_symbol_ids[i])
            for j in xrange(0, len(self.sentences)):
                if self.sentences[j].isSatisfiedBy(temp_model, self.SymbolTable):
                    number_of_satisfied_clauses[i] += 1

        index_of_max = np.argmax(number_of_satisfied_clauses)
        return list_of_symbol_ids[index_of_max]


    def flip_sign(self, temp_model, id):
        where = np.where(temp_model == id)[0][0]
        temp_model[where][1] = (temp_model[where][1] + 1) % 2

    def get_symbol_ids(self, list_of_symbols):
        list_of_symbol_ids = []
        for i in xrange(0, len(list_of_symbols)):
            list_of_symbol_ids.append(self.SymbolTable.get(list_of_symbols[i]))
        return list_of_symbol_ids

    def get_all_symbols_from_sentence(self, sen):
        if sen.grammar_type < 2:
            return [sen.sentence.atom.name]
        return self.get_all_symbols_from_sentence(sen.sentenceLHS) + self.get_all_symbols_from_sentence(sen.sentenceRHS)
Exemplo n.º 41
0
    def __init__(self, token_definition, text):
        self.token_definition = token_definition
        self.symbol_table = HashTable()
        self.program_structure = []

        self.identificator_fsm = FSM(list('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789'))

        self.identificator_fsm.addState(State('WithoutDigits'))
        self.identificator_fsm.addState(State('WithDigits'))

        self.identificator_fsm.addTransition(r'[a-zA-Z_]',
            self.identificator_fsm.findState('Root'), self.identificator_fsm.findState('WithoutDigits'))
        self.identificator_fsm.addTransition(r'[a-zA-Z0-9_]',
            self.identificator_fsm.findState('WithoutDigits'), self.identificator_fsm.findState('WithDigits'))
        self.identificator_fsm.addTransition(r'[a-zA-Z0-9_]',
            self.identificator_fsm.findState('WithDigits'), self.identificator_fsm.findState('WithDigits'))

        self.constanta_integer_fsm = FSM(list('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789'))

        self.constanta_integer_fsm.addState(State('Digit'))
        self.constanta_integer_fsm.addTransition(r'[0-9]',
            self.constanta_integer_fsm.findState('Root'), self.constanta_integer_fsm.findState('Digit'))
        self.constanta_integer_fsm.addTransition(r'[0-9]',
            self.constanta_integer_fsm.findState('Digit'), self.constanta_integer_fsm.findState('Digit'))

        self.constanta_string_fsm = FSM(list('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789\'\"'))
        self.constanta_string_fsm.addState(State('LeftQuote', isfinal=False))
        self.constanta_string_fsm.addState(State('Character', isfinal=False))
        self.constanta_string_fsm.addState(State('RightQuote'))
        self.constanta_string_fsm.addTransition(r'\"',
            self.constanta_string_fsm.findState('Root'), self.constanta_string_fsm.findState('LeftQuote'))
        self.constanta_string_fsm.addTransition(r'[a-zA-Z0-9_]',
            self.constanta_string_fsm.findState('LeftQuote'), self.constanta_string_fsm.findState('Character'))
        self.constanta_string_fsm.addTransition(r'[a-zA-Z0-9_]',
            self.constanta_string_fsm.findState('Character'), self.constanta_string_fsm.findState('Character'))
        self.constanta_string_fsm.addTransition(r'\"',
            self.constanta_string_fsm.findState('Character'), self.constanta_string_fsm.findState('RightQuote'))
        self.constanta_string_fsm.addTransition(r'\"',
            self.constanta_string_fsm.findState('LeftQuote'), self.constanta_string_fsm.findState('RightQuote'))

        self.constanta_char_fsm = FSM(list('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789\'\"'))
        self.constanta_char_fsm.addState(State('LeftQuote', isfinal=False))
        self.constanta_char_fsm.addState(State('Character', isfinal=False))
        self.constanta_char_fsm.addState(State('RightQuote'))
        self.constanta_char_fsm.addTransition(r'\"',
            self.constanta_char_fsm.findState('Root'), self.constanta_char_fsm.findState('LeftQuote'))
        self.constanta_char_fsm.addTransition(r'[a-zA-Z0-9_]',
            self.constanta_char_fsm.findState('LeftQuote'), self.constanta_char_fsm.findState('Character'))
        self.constanta_char_fsm.addTransition(r'\"',
            self.constanta_char_fsm.findState('Character'), self.constanta_char_fsm.findState('RightQuote'))

        self.constanta_double_fsm = FSM(list('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789'))
        self.constanta_double_fsm.addState(State('Dot', isfinal=False))
        self.constanta_double_fsm.addState(State('Digit'))
        self.constanta_double_fsm.addTransition(r'[0-9]',
            self.constanta_double_fsm.findState('Root'), self.constanta_double_fsm.findState('Digit'))
        self.constanta_double_fsm.addTransition(r'[0-9]',
            self.constanta_double_fsm.findState('Digit'), self.constanta_double_fsm.findState('Digit'))
        self.constanta_double_fsm.addTransition(r'.',
            self.constanta_double_fsm.findState('Digit'), self.constanta_double_fsm.findState('Dot'))

        self.parse(text)
Exemplo n.º 42
0
 def setUp(self):
     self.ht = HashTable()
Exemplo n.º 43
0
class HashTableTest(unittest.TestCase):
    def setUp(self):
        self.ht = HashTable()

    def test_basic(self):
        self.ht.insert('entry1', 'value1')
        self.ht.insert('entry2', 'value2')

        self.assertEqual(self.ht.search('entry1'), 'value1')
        self.assertEqual(self.ht.search('entry2'), 'value2')
        self.assertEqual(self.ht.search('entry3'), None)

        self.ht.delete('entry1')
        self.ht.delete('entry2')

        self.assertEqual(self.ht.search('entry1'), None)

    def test_ins_del(self):
        count = 10000

        for i in range(count):
            self.ht.insert(i, i)
            self.assertEqual(self.ht.search(i), i)

        for i in range(count):
            self.ht.delete(i)
            self.assertEqual(self.ht.search(i), None)
Exemplo n.º 44
0
def test(searchcount,invalidcount,stringcount):

        #now run this
        w = Worker()
        search_item_count = searchcount
        invalid_item_count = invalidcount
        string_count = stringcount
        w.build(string_count,search_item_count,invalid_item_count)

        #Save input to pickle file
        import time,pickle
        datadir = "data"
        filetime = str(stringcount)
        inputfilename = datadir+"/"+filetime+"/"+"input-"+filetime
        import os
        if not os.path.exists(os.path.dirname(inputfilename)):
            os.makedirs(os.path.dirname(inputfilename))

        inputfile = open(inputfilename,"w+")
        pickle.dump(w,inputfile)
        inputfile.close()

        #restore = restoreInput(inputfilename)

        trial_count = 10

        #------HASH TABLE------#
        #now we insert
        print("Adding items to hash table....")
        h = HashTable(57)
        import sys
        import time

        datafilename = datadir+"/"+filetime+"/"+"HT-"+filetime+".csv"        
        import os
        if not os.path.exists(os.path.dirname(datafilename)):
          os.makedirs(os.path.dirname(datafilename))
        datafile = open(datafilename,"wb+")
        #prep data file
        datafile.write("\n\nTest "+"SearchCount: "+str(searchcount)+" Invalid Count: "+str(invalidcount)+" StringCount: "+str(stringcount)+"\n")
        datafile.write("HashTable-"+filetime+"-Insert\n")
        datafile.write("Trial,N,BasicOps,Time\n")
        
        for trial in range(trial_count):
          datafile.write(str(trial+1)+",")
          h = HashTable(57)
          basicop = 0
          begintime = time.time()
          for i in w.random_strings:
            basicop+=h.add(i)
          endtime = time.time()
          diff = endtime - begintime
          datafile.write(str(stringcount)+",")
          datafile.write(str(basicop)+",")
          datafile.write(str(diff)+"\n")
          print("insert time: %s" % diff)
          print("basic op count: %s" % basicop)

        #search trials
        datafile.write("\n\nHashTable-"+filetime+"-SearchSuccessful\n")
        datafile.write("Trial,N,BasicOps,Time\n")
 
        for trial in range(trial_count):
          datafile.write(str(trial+1)+",")

          basicop = 0
          begintime = time.time()
          for i in w.test_strings:
            ret=h.search(i)
            basicop+=ret[0]
          endtime = time.time()
          diff = endtime - begintime
          datafile.write(str(stringcount)+",")
          datafile.write(str(basicop)+",")
          datafile.write(str(diff)+"\n")
          print("search time: %s" % diff)
          print("basic op count: %s" % basicop)

        datafile.write("\n\nHashTable-"+filetime+"-SearchInvalidStrings\n")
        datafile.write("Trial,N,BasicOps,Time\n")
 
        for trial in range(trial_count):
          datafile.write(str(trial+1)+",")

          basicop = 0
          begintime = time.time()
          for i in w.error_strings:
            ret=h.search(i)
            basicop+=ret[0]
          endtime = time.time()
          diff = endtime - begintime
          datafile.write(str(stringcount)+",")
          datafile.write(str(basicop)+",")
          datafile.write(str(diff)+"\n")
          print("invalid search time: %s" % diff)
          print("basic op count: %s" % basicop)

        #-----BST--------------#

        datafilename = datadir+"/"+filetime+"/"+"BST-"+filetime+".csv"        
        import os
        if not os.path.exists(os.path.dirname(datafilename)):
          os.makedirs(os.path.dirname(datafilename))
        datafile = open(datafilename,"wb+")
        #prep data file
        datafile.write("\n\nTest "+"SearchCount: "+str(searchcount)+" Invalid Count: "+str(invalidcount)+" StringCount: "+str(stringcount)+"\n")
        datafile.write("BST-"+filetime+"-Insert\n")
        datafile.write("Trial,N,BasicOps,Time\n")
        
        for trial in range(trial_count):
          datafile.write(str(trial+1)+",")
          b = bst()
          basicop = 0
          begintime = time.time()
          for i in w.random_strings:
            basicop+=b.insert(i)
          endtime = time.time()
          diff = endtime - begintime
          datafile.write(str(stringcount)+",")
          datafile.write(str(basicop)+",")
          datafile.write(str(diff)+"\n")
          print("insert time: %s" % diff)
          print("basic op count: %s" % basicop)

        #search trials
        datafile.write("\n\nBST-"+filetime+"-SearchSuccessful\n")
        datafile.write("Trial,N,BasicOps,Time\n")
 
        for trial in range(trial_count):
          datafile.write(str(trial+1)+",")

          basicop = 0
          begintime = time.time()
          for i in w.test_strings:
            ret=b.search(i)
            basicop+=ret[0]
          endtime = time.time()
          diff = endtime - begintime
          datafile.write(str(stringcount)+",")
          datafile.write(str(basicop)+",")
          datafile.write(str(diff)+"\n")
          print("search time: %s" % diff)
          print("basic op count: %s" % basicop)

        datafile.write("\n\nBST-"+filetime+"-SearchInvalidStrings\n")
        datafile.write("Trial,N,BasicOps,Time\n")
 
        for trial in range(trial_count):
          datafile.write(str(trial+1)+",")

          basicop = 0
          begintime = time.time()
          for i in w.error_strings:
            ret=b.search(i)
            basicop+=ret[0]
          endtime = time.time()
          diff = endtime - begintime
          datafile.write(str(stringcount)+",")
          datafile.write(str(basicop)+",")
          datafile.write(str(diff)+"\n")
          print("invalid search time: %s" % diff)
          print("basic op count: %s" % basicop)


        #-----234treeeeeeeeeee--------------#

        datafilename = datadir+"/"+filetime+"/"+"234-"+filetime+".csv"        
        import os
        if not os.path.exists(os.path.dirname(datafilename)):
          os.makedirs(os.path.dirname(datafilename))
        datafile = open(datafilename,"wb+")
        #prep data file
        datafile.write("\n\nTest "+"SearchCount: "+str(searchcount)+" Invalid Count: "+str(invalidcount)+" StringCount: "+str(stringcount)+"\n")
        datafile.write("234-"+filetime+"-Insert\n")
        datafile.write("Trial,N,BasicOps,Time\n")
        
        for trial in range(trial_count):
          datafile.write(str(trial+1)+",")
          root = twothreefour.tNode(None)
          tree = twothreefour.Tree(root)
          basicop = 0
          begintime = time.time()
          for i in w.random_strings:
            basicop+=tree.insert(i,root)
          endtime = time.time()
          diff = endtime - begintime
          datafile.write(str(stringcount)+",")
          datafile.write(str(basicop)+",")
          datafile.write(str(diff)+"\n")
          print("insert time: %s" % diff)
          print("basic op count: %s" % basicop)

        #search trials
        datafile.write("\n\n234-"+filetime+"-SearchSuccessful\n")
        datafile.write("Trial,N,BasicOps,Time\n")

        #>>> root = twothreefour.tNode(None)
        #>>> root
        #<twothreefour.tNode instance at 0x10e3547a0>
        #>>> tree = twothreefour.Tree(root)
        #>>> tree
        #<twothreefour.Tree instance at 0x10e354758>
        #>>> tree.insert("DERPEPRP",root)
 
        for trial in range(trial_count):
          datafile.write(str(trial+1)+",")

          basicop = 0
          begintime = time.time()
          for i in w.test_strings:
            basicop+=tree.search(root,i)[0]
          endtime = time.time()
          diff = endtime - begintime
          datafile.write(str(stringcount)+",")
          datafile.write(str(basicop)+",")
          datafile.write(str(diff)+"\n")
          print("search time: %s" % diff)
          print("basic op count: %s" % basicop)

        datafile.write("\n\n234-"+filetime+"-SearchInvalidStrings\n")
        datafile.write("Trial,N,BasicOps,Time\n")
 
        for trial in range(trial_count):
          datafile.write(str(trial+1)+",")

          basicop = 0
          begintime = time.time()
          for i in w.error_strings:
            basicop+=tree.search(root,i)[0]
          endtime = time.time()
          diff = endtime - begintime
          datafile.write(str(stringcount)+",")
          datafile.write(str(basicop)+",")
          datafile.write(str(diff)+"\n")
          print("invalid search time: %s" % diff)
          print("basic op count: %s" % basicop)

        datafile.close()
Exemplo n.º 45
0
from HashTable import HashTable
import collections
import logging

h = HashTable() 

def open_file(file):
    with open(file) as f:
        for w in f.read().split():
            h.add_to_table(w, 1)                   
    return h.hashtable

print open_file("random.txt")
print h.add_to_table("the", 10)
print h.get_value("the")


#Performance test functions -> Speed

def time(word):
    import time
    start = time.clock()
    h.get_value(word)
    return time.clock() - start
    
'''
#Non-collision lookup times ------> 

print time("the")
# --> 1.79999999972e-05 
print time("a")
Exemplo n.º 46
0
from HashTable import HashTable
from Permutations import Permutations

ht = HashTable()
ht.add(1)
ht.add(2, {1:"Kapil", 2:"KKA"})
print ht.get(1)
print ht.get(2)
print ht.get(3)
print ht.get(2)


a = [1, 2, 3, 4]
p = Permutations(a)
p.dummy()
p.permuter()
Exemplo n.º 47
0
import re
import sys

from HashTable import HashTable

#text = open("20leagues.txt", "r")
#print(text.read())

parser = argparse.ArgumentParser( "Print frequency of words used in a text file.")
parser.add_argument( 'filenames', nargs='*')

args = parser.parse_args()

counter = 0

ht = HashTable()

# hashCode = ht._hashcode("abc")
# print( "hashed string to: ", hashCode)
# hashCode = ht._hashcode( ht)
# print( "hashed ht to: ", hashCode)

wordcount = 0

def process(line):
    global wordcount
    if re.match(r"CHAPTER [IVXLCDM]+$", line):
        return 0
    elif re.match(r"PART [A-Z]+$", line):
        return 0
    elif re.match(r"\n+", line):