예제 #1
0
	def combineWithChild(self):
		#first, let's see if it's even legal for us to combine with one of our children
		if (self.verb.verb.isModal() or self.verb.verb.isHelper()):
			return

		#the first verb to compare
		first = self.child.verb
		firstFull = first.verb.get(True)[0]["full"]
		#and the second
		second = self.verb
		secondFull = second.verb.get(True)[0]["full"]

		forms = (
			(word.word(firstFull + " " + secondFull, first.sentLoc, first.clauseLoc, first.numWords), second),
			(word.word(secondFull + " " + firstFull, first.sentLoc, first.clauseLoc, first.numWords), first),
			(word.word(firstFull + secondFull, first.sentLoc, first.clauseLoc, first.numWords), second),
			(word.word(secondFull + firstFull, first.sentLoc, first.clauseLoc, first.numWords), first)
		)

		#go through all possible combinations
		for f in forms:
			if (len(f[0].translations.searchFromDB()) > 0):
				#store the conjugation and defineable forms
				self.verb = f[0]
				self.conjugation = f[1]

				#we're absorbing our child, remove him
				self.child = self.child.child

				#and store the verbs we used to return in appendVerbs()
				self.verbs = (first, second)

				#and set our flag that we're not combined with our child
				self.isCombined = True
				break
예제 #2
0
    def test_ordered_dict_search(self):
        odict = self.new_ordered_dict(4)
        odict.insert(10, 110)
        odict.insert(12, 112)
        odict.insert(15, 115)
        odict.insert(13, 113)
        odict.insert(14, 114)

        # no common edges in a trie
        with self.assertRaises(KeyError):
            odict.search(0)

        # some comon edges in a trie
        with self.assertRaises(KeyError):
            odict.search(11)

        # contained elements
        result = odict.search(10)
        self.assertEqual(result, 110)

        result = odict.search_node(10)
        self.assertEqual(result.key, word(10, 4))
        self.assertEqual(result.value, 110)

        result = odict.search(12)
        self.assertEqual(result, 112)

        result = odict.search_node(12)
        self.assertEqual(result.key, word(12, 4))
        self.assertEqual(result.value, 112)
예제 #3
0
    def test_ordered_dict_insert(self):
        odict = self.new_ordered_dict(8)

        self.assertEqual(odict.size(), 0)

        q = word(12, 8)
        node = odict.insert(q, 112)
        self.assertEqual(node.key, q)
        self.assertEqual(node.value, 112)
        self.assertEqual(odict.size(), 1)

        q = word(14, 8)
        node = odict.insert(q, 114)
        self.assertEqual(node.key, q)
        self.assertEqual(node.value, 114)
        self.assertEqual(odict.size(), 2)

        q = word(13, 8)
        node = odict.insert(q, 113)
        self.assertEqual(node.key, q)
        self.assertEqual(node.value, 113)
        self.assertEqual(odict.size(), 3)

        q = word(77, 8)
        node = odict.insert(q, 177)
        self.assertEqual(node.key, q)
        self.assertEqual(node.value, 177)
        self.assertEqual(odict.size(), 4)
예제 #4
0
    def test_insert(self):
        veb = self.new_trie(4)
        ref = self.new_reference_trie(4)

        # \
        #  0111
        a = word(0b0111, 4)
        ref.insert(a)
        veb.insert(a)

        self.assertEqualTrie(veb, ref)

        #   0111
        # /
        # \
        #   1000
        b = word(0b1000, 4)
        ref.insert(b)
        veb.insert(b)

        self.assertEqualTrie(veb, ref)

        #   0111
        # /
        # \
        #   1 - 000
        #    \
        #     - 001
        c = word(0b1001, 4)
        ref.insert(c)
        veb.insert(c)

        self.assertEqualTrie(veb, ref)
예제 #5
0
    def test_depths(self):
        trie = self.new_trie(16)

        q = word(0b0011111011101110, 16)
        depths = list(trie._depths(q))
        expect = [q.split_fst(14), q.split_fst(12), word.epsilon]

        self.assertEqual(depths, expect)

        q = word(0b0011010111010001, 16)
        depths = list(trie._depths(q))
        expect = [q.split_fst(14), q.split_fst(12), word.epsilon]

        self.assertEqual(depths, expect)

        #
        trie = self.new_trie(8)

        q = word(0b00111110, 8)
        depths = list(trie._depths(q))
        expect = [q.split_fst(6), q.split_fst(4), word.epsilon]

        self.assertEqual(depths, expect)

        q = word(0b00110101, 8)
        depths = list(trie._depths(q))
        expect = [q.split_fst(6), q.split_fst(4), word.epsilon]

        self.assertEqual(depths, expect)
예제 #6
0
    def test_insert_order(self):
        # seed: 6897201961525902772
        veb = self.new_trie(8)
        ref = self.new_reference_trie(8)

        a = word(0b00110111, 8)
        ref.insert(a)
        veb.insert(a)

        self.assertEqualTrie(veb, ref)

        # \
        #  00 - 011001
        #    \
        #     - 110111
        b = word(0b00011001, 8)
        ref.insert(b)
        veb.insert(b)

        self.assertEqualTrie(veb, ref)

        # \
        #  00 - 011 - 001
        #    |     \
        #    |      - 100
        #    \
        #     - 110   111

        c = word(0b00011100, 8)
        ref.insert(c)
        veb.insert(c)

        self.assertEqualTrie(veb, ref)
예제 #7
0
    def test_has_prefix(self):
        # word itself is prefix
        a = word(0b1100, 4)
        p = word(0b1100, 4)

        result = a.has_prefix(p)
        self.assertTrue(result)

        # epsilon is a prefix
        a = word(0b1100, 4)
        p = word.epsilon

        result = a.has_prefix(p)
        self.assertTrue(result)

        # prefix is a prefix
        a = word(0b1100, 4)
        p = word(0b110, 3)

        result = a.has_prefix(p)
        self.assertTrue(result)

        # word which is shorter, but is no prefix
        a = word(0b1100, 4)
        p = word(0b010, 3)

        result = a.has_prefix(p)
        self.assertFalse(result)

        # word which is longer can't be a prefix
        a = word(0b1100, 4)
        p = word(0b11001, 5)

        result = a.has_prefix(p)
        self.assertFalse(result)
예제 #8
0
    def test_search_phase_two(self):
        trie = Mihai.Tree(16)
        trie.construct([
            0b0111111000000010, 0b1000100100010011, 0b1010101101011110,
            0b1110110010010001, 0b1111100110110010, 0b0110000011111000,
            0b0000011110101100, 0b0101101000111011, 0b0111101010010111,
            0b0001010010110101, 0b0110100011010001, 0b0101010100000001,
            0b1100101010101110, 0b1110001101101010, 0b0010001001100001,
            0b0001101011100100, 0b0111100011011101, 0b0100000010000111,
            0b1100110011100000, 0b0101010100110111, 0b1000111001111010,
            0b0000101100001000, 0b1000001010000011, 0b0010011101100011,
            0b1010110101110111, 0b0110100100101001, 0b0011101101101101,
            0b0100010000000101, 0b0000101001001101, 0b1011000111100100
        ])

        # q is below (u,v)
        q = word(0b1101110101100101, 16)
        index = trie._lca_search2(q, 2, 2, 4)
        self.assertEqual(index, 3)

        # q is in between (u,v)
        q = word(0b1000011010001001, 16)
        index = trie._lca_search2(q, 4, 4, 8)
        self.assertEqual(index, 5)

        # q is below (u,v)
        q = word(0b1010100101111010, 16)
        index = trie._lca_search2(q, 5, 5, 8)
        self.assertEqual(index, 6)

        # q is below (u,v)
        q = word(0b1100101111111111, 16)
        index = trie._lca_search2(q, 5, 5, 8)
        self.assertEqual(index, 7)

        # q is in between (u,v)
        q = word(0b1010000010100001, 16)
        index = trie._lca_search2(q, 4, 4, 8)
        self.assertEqual(index, 4)

        # q is in between (u,v)
        q = word(0b0110100011110001, 16)
        index = trie._lca_search2(q, 8, 8, 12)
        self.assertEqual(index, 10)

        # q is in between (u,v)
        q = word(0b0101010100000000, 16)
        index = trie._lca_search2(q, 12, 12, 16)
        self.assertEqual(index, 14)

        # q is in between (u,v)
        q = word(0b0000011110100101, 16)
        index = trie._lca_search2(q, 12, 12, 16)
        self.assertEqual(index, 12)

        # q is contained
        q = word(0b0111111000000010, 16)
        index = trie._lca_search2(q, 16, 16, 16)
        self.assertEqual(index, 16)
예제 #9
0
    def test_hash(self):
        a = word(12, 8)
        b = word(12, 8)
        hashmap = {}

        hashmap[a] = False
        hashmap[b] = True

        self.assertEqual(hash(a), hash(b))
        self.assertTrue(hashmap[a], 'a should hash to the same as b location')
        self.assertTrue(hashmap[b], 'b should hash to the same as a location')
예제 #10
0
    def test_pred(self):
        self.assertEqual(word(3, 2).pred(), word(2, 2))
        self.assertEqual(word(2, 2).pred(), word(1, 2))
        self.assertEqual(word(1, 2).pred(), word(0, 2))

        with self.assertRaises(TypeError):
            word(0, 2).pred()
예제 #11
0
    def insert(self, q, value=None):
        """
        Insert q into the trie
        """
        q = word(q, self.w)

        start_node = self.root.child(q)

        if start_node is None:
            node = self._insert_leaf(q, value, self.root)
            self._size += 1
            return node

        assert \
            start_node.is_leaf() or (
                start_node.left is not None and
                start_node.right is not None
            ), "start_node is either a leaf or a strict branch node"

        lca, child = self.lowest_common_ancestor(q)

        if lca.is_leaf_of(q):
            lca.value = value
            return lca

        assert \
            child is not None, \
            "the subtree in which q belongs is not empty"

        _, new_node = self._insert_node(q, value, lca, child)
        self._size += 1
        return new_node
예제 #12
0
	def __participleMeanings(self, participles, meanings):
		#add our participles to our meanings
		for p in participles:
			presentParticiple = p.verb.isPresentParticiple()
			forms = p.verb.get(unknownHelper = True)

			#save the full form of our word for the translation
			origWord = p.verb.word

			#if we found no conjugations for the verb, then we had something like "gesehenen",
			#so we need to get a new word from the stem of the participle, then we let the
			#translator run through all its stuff and get the meaning of the verb, and then to
			#the output it goes
			if (len(forms) == 0):
				p = word.word(p.verb.getParticipleStem()[0], p.sentLoc, p.clauseLoc, p.numWords)
				forms = p.verb.get(True)

			fullForm = forms[0]["full"]
			loc = p.sentLoc

			#fix for python 2.4
			tense = "past participle"
			if (fullForm == origWord):
				tense = "infinitive"
			elif (presentParticiple):
				tense = "present participle"

			for t in p.get("verb"):
				meanings.append({
					"en": "(" + tense + ") " + t["en"],
					"de": fullForm,
					"deOrig": origWord,
					"deWordLocation": loc
				})
예제 #13
0
 def parse(self,links):
    strwords = []
    for link in links:
       #get html
       with urllib.request.urlopen(link) as url:
          page = url.read()
       #beautiful soup object
       soup = BeautifulSoup(page)
       #extract and combine paragraphs
       paragraphs = soup.find_all('p')
       for x in paragraphs:
          #separate all words (returns list)
          strwords = strwords + x.getText().split()
       
    #test regex
    regex = r'\.$|\W$'
    self.filtered = parse.test_regex(strwords,regex)
    #parse words. 
    #regex: \.$ is periods at the end of string OR \W$ is special chars at end 
    #of string (which also includes special chars alone.
    #still need work on apostrophies, missed spaces after periods and word?word
    #leaves empty strings instead of deleting, I think
    #this should be made into a method.
    strwords[:] = [re.sub(regex,'',x) for x in strwords]
    #returns a dictionary object
    strwords = Counter(strwords)
    for x,y in strwords.items():
       #could also use dict defaultset()
       if x in self.words:
          self.words[x].incr_count(y) 
       else:
          self.words[x] = word(x,None,None,None,None,y,None)
예제 #14
0
def get_set(doc_num, num_set):
    word_set = {}
    doc_dir = os.listdir(train_path)
    w_dict = {}
    for dd in doc_dir:
        f_list = os.listdir(train_path + dd)
        print "get in the --->   " + dd + "  <---"
        for fpath in f_list:
            d_path = train_path + dd + '/' + fpath
            with open(d_path, "rb") as d_file:
                list_tmp = []
                lines = d_file.readlines()
                for line in lines:
                    tokens = nltk.regexp_tokenize(line, pattern)
                    for t in tokens:
                        if t.lower() not in stopword:
                            list_tmp.append(t.lower())
                set_tmp = set(list_tmp)
                for w in set_tmp:
                    if w in word_set:
                        word_set[w].update_dict(cat_dic[dd])
                    else:
                        #superise! if I did not initial the dict, all will use the same dict!
                        word_set[w] = word(w, 0, 0, 0, {cat_dic[dd]: 1})
            d_file.close()
    #get the word_in_doc nums
    for idx in word_set:
        word_set[idx].get_docs()
        word_set[idx].get_widf(doc_num)
        word_set[idx].get_s(doc_num, num_set)
    return word_set
예제 #15
0
    def lowest_common_ancestor(self, q):
        """
        lca - lowest common ancestor
        child - child of lowest common ancestor in the direction of q

        return [lca, child]
        """
        q = word(q, self.w)
        start_node = self.root.child(q)

        # q navigates into an empty subtree of the root
        if start_node is None:
            return [self.root, None]

        curr = start_node
        c, i = q.split(curr.edge.w)

        # c == curr.edge iff q.has_prefix(curr.key)
        while not curr.is_leaf() and c == curr.edge:
            curr = curr.child(i)
            c, i = i.split(curr.edge.w)

        if c == curr.edge:
            return [curr, None]

        return [curr.parent, curr]
예제 #16
0
    def new_node(self, q, value=None):
        q = word(q, self.w)
        node = self.root.new_node(q, value)

        # create a new loose node
        node.parent = None
        return node
예제 #17
0
파일: trie.py 프로젝트: sandeva/appspot
 def List2Trie(self, filename):
     self.CharCounts = {}
     totalCharCount = 0
     self.CharCounts = dict([(i,1) for i in range(len(variations.Chars))])
     self.allWords = {}
     self.allWords[-1] = word(self, -1)
     self.BaseWord = -1
 
     splited_words = open(os.path.join(os.path.dirname(__file__),"splited.csv")).readlines()#[:10000]
     c = 0
     self.totalWordCount = 0
     for s in splited_words:
         c+=1
         m = re.match("(.*?),([0-9]+)", s)
         if len(m.groups()) == 2:
             st1 = m.groups()[0]
             chrs = variations.SplitIntoChars(st1)
             cnt = int(m.groups()[1])
             self.totalWordCount += cnt
             for i in chrs:
                 self.CharCounts[i] += cnt
             totalCharCount += len(chrs) * cnt
             self.GetIndexedWord(self.BaseWord).Add(chrs,cnt, st1)
     bw=open("charprobabilities.dat",'wb')
     charprobabilities = ""
     for i in range(len(variations.Chars)):
         self.CharCounts[i] = math.log(self.CharCounts[i]) - math.log(totalCharCount)
         charprobabilities += variations.Chars[i] + "," + str(self.CharCounts[i]) + "\n"
         bw.write(pack('d', self.CharCounts[i]))
     bw.close()
     open("charprobabilities.csv",'w').write(charprobabilities)
     self.xCountCalcAll(self.BaseWord, 0)
     self.Save(filename,"modified.csv")
예제 #18
0
    def processa_linea_vocabolo(self,line):

        splittedline = line.split(";")
        if len(splittedline) != 4:
            # every line supposed to have four char
            print(bcolors.FAIL, "Linea: " + line + " ignorata, non contiene 4 campi distinti separati dal \";\"",
                  bcolors.RESET)
            return None
        solution = splittedline[0]
        rule = splittedline[1]
        term = splittedline[2]
        group = splittedline[3]
        splitted_line = line.split(";")


        solution_set = {}
        solution_set["e"]=["è","é","e"]
        solution_set["o"] = ["ò", "ó","o"]
        solution_set["s"] = ["ss", "s","sss"]
        solution_set["z"] = ["zz", "z","zzz"]
        if group not in solution_set.keys():
            print(bcolors.FAIL,"Linea: "+line+" ignorata; Il gruppo: ",group," non è valido",bcolors.RESET)
            return None
        elif solution not in solution_set[group]:
            print(bcolors.FAIL,"Linea: "+line+" ignorata; per il  gruppo: ",group," le soluzioni accettate sono: ",solution_set[group],bcolors.RESET)
            return None
        #if here alles ist gut
        lexic = word.word(solution, rule, term, group)
        return lexic
        possible_solution_set =["è","é","ò","ó"]
예제 #19
0
def get_set(doc_num,num_set):
    word_set = {}
    doc_dir = os.listdir(train_path)
    w_dict = {}
    for dd in doc_dir:
        f_list = os.listdir(train_path+dd)
        print "get in the --->   "+dd+"  <---"
        for fpath in f_list:
            d_path = train_path+dd+'/'+fpath
            with open(d_path,"rb") as d_file:
                list_tmp = []
                lines = d_file.readlines()
                for line in lines:
                    tokens = nltk.regexp_tokenize(line,pattern)
                    for t in tokens:
                        if t.lower() not in stopword:
                            list_tmp.append(t.lower())
                set_tmp = set(list_tmp)
                for w in set_tmp:
                    if w in word_set:
                        word_set[w].update_dict(cat_dic[dd])
                    else:
                        #superise! if I did not initial the dict, all will use the same dict!
                        word_set[w] = word(w,0,0,0,{cat_dic[dd]:1})
            d_file.close()
    #get the word_in_doc nums
    for idx in word_set:
        word_set[idx].get_docs()
        word_set[idx].get_widf(doc_num)
        word_set[idx].get_s(doc_num,num_set)
    return word_set
예제 #20
0
    def test_ordered_dict_random_remove(self, seed=None):
        with self.random(seed) as rand:
            size = rand.randint(0, 150)
            samples = rand.sample(xrange(255), size)

            while len(samples) > 0:
                odict1 = self.new_ordered_dict(8)
                odict2 = self.new_ordered_dict(8)

                odict1.extend(samples)

                rand.shuffle(samples)
                val = samples.pop()

                result = odict1.remove(val)
                odict2.extend(samples)

                self.assertEqual(result.key, word(val, 8))

                self.assertEqual(odict1.elements(), odict2.elements())
                self.assertEqual(odict1.size(), len(samples))

                min1 = odict1.min_node()
                min2 = odict2.min_node()
                self.assertEqual(min1 and min1.key, min2 and min2.key)

                max1 = odict1.max_node()
                max2 = odict2.max_node()
                self.assertEqual(max1 and max1.key, max2 and max2.key)
예제 #21
0
    def test_successor_with_lca(self):
        # special case trie is empty
        trie = self.new_trie(8)

        q = word(54, 8)
        result = trie.successor_with_lca(q, trie.root, None)
        self.assertIsNone(result)
예제 #22
0
    def successor_node(self, q):
        q = word(q, self.w)

        # q is in the set
        child = self.search_node(q)
        if child is not None:
            return child.next_leaf()

        # print
        # print "%s" % self.T_d
        for c in self._depths(q):

            # print "depth: %s (%s)" % (c, c.w)

            if c in self.T_d:
                # print "T_d[c] != None"
                lca, child = self.lowest_common_ancestor_start(q, c)
                return self.successor_with_lca(q, lca, child)

            successor = self._successor(q, c)
            if successor:
                return successor

            predecessor = self._predecessor(q, c)
            if predecessor:
                return predecessor.next_leaf()

        return None
예제 #23
0
def buildWords(theList):
    global theWords
    with open('wordsEn.txt','r') as filetxt:
        for line in filetxt:
            for aword in line.split():
                newWord=word.word(aword.strip())
                theWords.append(newWord)
예제 #24
0
 def insert(self, q, value=None):
     q = word(q, self.w)
     try:
         new_node = self._insert(q, q, value)
         self._size += 1
         return new_node
     except ValueError as err:
         return err.args[0]
예제 #25
0
 def remove(self, q):
     q = word(q, self.w)
     try:
         removed = self._remove(q)
         self._size -= 1
         return removed
     except KeyError:
         raise KeyError(q.x)
예제 #26
0
    def test_insert_3bit(self):
        veb = self.new_ordered_dict(3)
        xs = []

        for x in xrange(8):
            veb.insert(x)
            xs.append(word(x, 3))
            self.assertEqual(veb.elements(), xs)
예제 #27
0
    def test_predecessor_query_is_in_successor_tree(self):
        items = [41, 72, 110, 150, 210]

        trie = self.new_trie(8, items)

        result = trie.predecessor(90)
        expect = word(72, 8)
        self.assertEqual(result, expect)
예제 #28
0
    def test_search_phase_one(self):
        trie = Mihai.Tree(16)
        trie.construct([
            0b0111111000000010, 0b1000100100010011, 0b1010101101011110,
            0b1110110010010001, 0b1111100110110010, 0b0110000011111000,
            0b0000011110101100, 0b0101101000111011, 0b0111101010010111,
            0b0001010010110101, 0b0110100011010001, 0b0101010100000001,
            0b1100101010101110, 0b1110001101101010, 0b0010001001100001,
            0b0001101011100100, 0b0111100011011101, 0b0100000010000111,
            0b1100110011100000, 0b0101010100110111, 0b1000111001111010,
            0b0000101100001000, 0b1000001010000011, 0b0010011101100011,
            0b1010110101110111, 0b0110100100101001, 0b0011101101101101,
            0b0100010000000101, 0b0000101001001101, 0b1011000111100100
        ])

        q = word(0b1101110101100101, 16)
        index = trie._lca_search1(q, 0, 0, trie.sqrt_log_u - 1)
        self.assertEqual(index, 0)

        q = word(0b1001011010001001, 16)
        index = trie._lca_search1(q, 0, 0, trie.sqrt_log_u - 1)
        self.assertEqual(index, 0)

        q = word(0b1010100101111010, 16)
        index = trie._lca_search1(q, 0, 0, trie.sqrt_log_u - 1)
        self.assertEqual(index, 4)

        q = word(0b1010000010100001, 16)
        index = trie._lca_search1(q, 0, 0, trie.sqrt_log_u - 1)
        self.assertEqual(index, 4)

        q = word(0b1100101111111111, 16)
        index = trie._lca_search1(q, 0, 0, trie.sqrt_log_u - 1)
        self.assertEqual(index, 4)

        q = word(0b0110100011110001, 16)
        index = trie._lca_search1(q, 0, 0, trie.sqrt_log_u - 1)
        self.assertEqual(index, 8)

        q = word(0b0101010100000000, 16)
        index = trie._lca_search1(q, 0, 0, trie.sqrt_log_u - 1)
        self.assertEqual(index, 12)

        q = word(0b0000011110100101, 16)
        index = trie._lca_search1(q, 0, 0, trie.sqrt_log_u - 1)
        self.assertEqual(index, 12)

        q = word(0b0111111000000010, 16)
        index = trie._lca_search1(q, 0, 0, trie.sqrt_log_u)
        self.assertEqual(index, 16)
예제 #29
0
    def test_create(self):
        # test normal creation
        a = word(15, 4)
        self.assertIsInstance(a, word)

        # test normal creation with a word as argument
        a = word(a, 4)
        self.assertIsInstance(a, word)

        # 4095 needs 12 bits, but it can only hold 8 bits
        with self.assertRaises(TypeError):
            a = word(4095, 8)

        # you can't create a word with a word, which does not matches the
        # wordsize
        with self.assertRaises(TypeError):
            a = word(15, 4)
            a = word(a, 8)
예제 #30
0
    def search_node(self, q):
        q = word(q, self.w)

        lca, child = self.lowest_common_ancestor(q)

        if lca.is_leaf_of(q):
            return lca

        return None
예제 #31
0
    def test_split_concat(self):
        xs = [(w, x) for w in range(4) for x in range(8)]

        for w, x in xs:
            a = word(x, 3)
            c, i = a.split(w)
            b = c.concat(i)

            self.assertEqual(a, b)
예제 #32
0
    def successor_node(self, q):
        q = word(q, self.w)

        # tree is empty
        if self.root.is_leaf():
            return None

        lca, child = self.lowest_common_ancestor(q)
        return self.successor_with_lca(q, lca, child)
예제 #33
0
    def test_common_prefix_split(self):
        # a = 1100 = 12
        # b = 1100 = 12
        # p = 1100 = 12
        # suffix1 = 0 with word size 0
        # suffix2 = 0 with word size 0
        a = b = p = word(12, 8)
        s1 = s2 = word(0, 0)

        pre, suf1, suf2 = a.common_prefix_split(b)
        self.assertEqual(pre, p)
        self.assertEqual(suf1, s1)
        self.assertEqual(suf2, s2)

        # a = 1111 0001 0001 1100 = 61724
        # b = 1111 0010 1001 0100 = 62100
        # p = 1111 00 = 60
        # suffix1 = 0 with word size 0
        # suffix2 = 0 with word size 0
        a = word(61724, 16)
        b = word(62100, 16)
        p = word(60, 6)
        s1 = word(284, 10)
        s2 = word(660, 10)

        pre, suf1, suf2 = a.common_prefix_split(b)
        self.assertEqual(pre, p)
        self.assertEqual(suf1, s1)
        self.assertEqual(suf2, s2)

        # a = 1100 1101 0001 1100 = 52508
        # b = 0100 1100 1001 0100 = 19604
        # p = empty = 0 with word size 0
        # suffix1 = 1100 1101 0001 1100 = 52508
        # suffix2 = 0100 1100 1001 0100 = 19604
        a = word(52508, 16)
        b = word(19604, 16)
        p = word(0, 0)
        s1, s2 = a, b

        pre, suf1, suf2 = a.common_prefix_split(b)
        self.assertEqual(pre, p)
        self.assertEqual(suf1, s1)
        self.assertEqual(suf2, s2)
예제 #34
0
	def add_word(self):
		pair = self.to_add.pop(0)
		while pair[1] in self.word_card_map:
			if pair[1] != self.word_card_map[pair[1]]:
				self.to_add.append((pair[0],pair[1] + ' 2'))
			if len(self.to_add) == 0:
				return
			pair = self.to_add.pop(0)
		w = word(pair[0], pair[1], False)
		card(self, w)
예제 #35
0
	def test_guess_word(self):
		word.raw_input = lambda _: give_answer(.5, 'çekirge')
		word.word.say = lambda _: True
		word.display = lambda _: True
		w = word.word('çekirge', 'grasshopper')
		for i in range(2000):
			w.guess_word()
		self.assertEqual(2001, w.num_times_seen)
		self.assertEqual(True, abs(float(w.num_times_correct)/w.num_times_seen - .5) < .03)
		word.raw_input = lambda x: raw_input(x)
예제 #36
0
    def test_ordered_dict_remove(self):
        odict = self.new_ordered_dict(8)
        odict.insert(249, 1249)

        with self.assertRaises(KeyError):
            odict.remove(12)

        result = odict.remove(249)
        self.assertEqual(result.key, word(249, 8))
        self.assertEqual(result.value, 1249)
        self.assertEqual(odict.size(), 0)
예제 #37
0
    def search_node(self, q):
        q = word(q, self.w)

        try:
            root = self.T[q]
        except KeyError:
            return None

        # root is the parent of the searched leaf
        q = q.remove_prefix(root.key)[0]
        return root.child(q)
예제 #38
0
    def test_ordered_dict_update_value(self):
        odict = self.new_ordered_dict(8)

        self.assertEqual(odict.size(), 0)

        q = word(12, 8)
        node1 = odict.insert(12, 15)
        node2 = odict.insert(q, 18)

        self.assertIs(node1, node2)
        self.assertEqual(node1.value, 18)
        self.assertEqual(odict.size(), 1)
예제 #39
0
def chooseWord():
    global wordLength
    global answer
    global theWords
    global wordIdx
    global trys
    wordIdx=randint(0,len(theWords)-1)
    #DEBUGprint (wordIdx)
    wordLength=theWords[wordIdx].wordLength
    #DEBUGprint(theWords[wordIdx].getString())
    answer=word.word(" ",True,wordLength)
    trys=0
예제 #40
0
	def __doTranslations(self, fullForm):
		if (not verbNode.doTranslations):
			return

		#check if we're a `kennen lernen` type guy
		toTranslate = fullForm
		if (self.conjugation.word != self.verb.word):
			words = self.verb.word.split(" ")
			#get the original form of the word
			words[len(words) - 1] = self.conjugation.word
			toTranslate = " ".join(words)

		trans = word.word(toTranslate).get("verb")
		self.__meaning(trans, fullForm)
예제 #41
0
	def test_heap(self):
		word.word.say = lambda _: True
		word.display = lambda _: True
		for pair in [('çekirge', 'grasshopper'), ('okul yılı', 'school year'), ('akort etmek', 'to tune'), ('oynamak', 'to preform'), ('düğmelemek', 'to button'), ('korku', 'fear'), ('memeli', 'mammal'), ('gelir', 'revenue')]:
			text, meaning = pair
			word.raw_input = lambda _: give_answer(.5, text)
			w = word.word(text, meaning)
			heap.heap_node(self.pl, w)
			self.check_values()
		for i in range(20):
			h = self.pl.heap_root
			word.raw_input = lambda _: give_answer(.5, h.word.text)
			h.word.guess_word()
			h.update()
			self.check_values()
예제 #42
0
	def wordCount(self, text):
		#THis function returns a dictionary with each word in the text and the number of times it occurs
		print "[-] Preparing datastructures for analysis"
		text = self.sanitise(text)
		text = text.split(' ')
		for word in text:
			word = word.replace(' ', "").strip()
			try:
				self.wordCountDict[word] = self.wordCountDict[word] + 1
			except:
				self.wordCountDict[word] = 1
		for w in self.wordCountDict:
			wordOb = wd.word(w,int(self.wordCountDict[w]))
			self.wordObjectList.append(wordOb)
		
		self.stats()
예제 #43
0
def get_word_dict(path):
    with open(path['lex_path'],'r') as infile:
        word_dict = {}
        py_dict = {}
        for line in infile.readlines():
            str_tmp = line.rstrip().split('\t')
            word_dict[str_tmp[0]] = word(str_tmp[0],{},0)
            py_tmp = get_pinyin(str_tmp[1:])
            if py_tmp in py_dict:
                py_dict[py_tmp].append(str_tmp[0])
            else:
                py_dict[py_tmp] = [str_tmp[0]]
        infile.close()
    print "Have got the word dict"
    #max_len = max(len(x) for x in word_dict)
    return word_dict,py_dict
예제 #44
0
def get_voc_set():
    word_dict = {}
    word_no = 0
    doc_dir = os.listdir(tr_data_path)
    for doc_cat in doc_dir:
        file_list = os.listdir(tr_data_path+doc_cat)
        print '开始处理: '+doc_cat+' 文件夹文件'
        for file_path in file_list:
            doc_f = open(tr_data_path+doc_cat+'/'+file_path,'rb')
            document = doc_f.read()
            tokens = set(nlp.word_tokenize(document))
            for w in tokens:
                if w not in word_dict:
                    word_dict[w] = word(w,word_no)
                    word_no += 1
                word_dict[w].update_dict(cat_dict[doc_cat])
    return word_dict
예제 #45
0
def translate(query, beAggressive):
	"""Does the hefty work of translating the input"""

	try:
		query = utf8.encode(query)

		if (sentenceFigurer.canTranslate(query)):
			s = sentenceFigurer(query)
			return s.translate(beAggressive)
		else:
			w = word.word(query)
			return w.get()
	except:
		if (app.config.get('debug', False)):
			raise
		else:
			return []
예제 #46
0
	def __translateInheritedTense_modal(self, uberParent):
		"""
		Translates an inherited tense.  When we get here, it means we're doing something like:

			"Ich würde bleiben müssen" -> "I would have to stay"
		"""

		form = uberParent.verb.verb.get(True)[0]
		stem = uberParent.verb.verb.getStem()

		#this is the only case I can think of right now, more to come, I'm sure
		if (form["subj2"] == stem):
			self.setTense(tenses.INFINITIVE)

		#and add our translations to the output
		for v in self.conjugation.verb.get(True):
			trans = word.word(v["full"]).get("verb")
			self.__meaning(trans, v["full"])
예제 #47
0
	def translate(self, beAggressive):
		"""Assumes we can translate it, then runs a sentence guesser on it"""

		#remove any character that can't be used as a word
		tmpClauses = [re.sub(u"[^a-zA-Z0-9ÄÖÜäöüß\s]*", "", r.strip()) for r in re.split("[,\.\?\!\;\:]*", self.query) if len(r) > 0]

		#do a pass over the sentence to count words and stuff and stuff
		words = []
		numWords = 0
		for c in tmpClauses:
			w = c.split(" ")
			numWords += len(w)
			words.append(w)

		#and now do a final pass to build up our word objects
		loc = 0
		ret = []
		for w in words:
			wLen = len(w)
			w = [word.word(w, loc + i, i, wLen) for w, i in zip(w, range(0, wLen))]
			ret += clauseFigurer().translate(w, beAggressive)
			loc += wLen

		return ret
예제 #48
0
# Pygame Hangman
# hangman.py
# [email protected]
# A simple implementaiton of Hangman using python3 and pygame
import pygame
import word
from random import randint

#game variables
wordList=[]
trys=0
wordLength=0
wordIdx=-1
answer=word.word(" ")
complete=False
lost=False
theWords=[]
flashC = 0
isFlash=True

# Define some colors
black = ( 0, 0, 0)
white = ( 255, 255, 255)
green = ( 0, 255, 0)
red = ( 255, 0, 0)


def chooseKey(event):
    key="a"
    if event.key == pygame.K_b:
        key="b"
예제 #49
0
	def translate(self, words, beAggressive):
		"""Given a complete clause, finds relations amongst verbs and determines their tenses."""

		#run for all the possible verbs (participles could be included in this list)
		tmpVerbs = [v for v in words if v.isVerb()]

		if (len(tmpVerbs) == 0):
			tmpVerbs = [v for v in words if v.isVerb(ignoreLocation = True)]

			if (len(tmpVerbs) == 0):
				participles = []
				meanings = []
				[participles.append(w) for w in words if w.verb.isPastParticiple()]
				[participles.append(w) for w in words if w not in participles and w.verb.isPresentParticiple()]
				self.__participleMeanings(participles, meanings)

				return meanings

		#lowercase the verbs -- we need this for our compares later
		for v in tmpVerbs:
			v.word = v.word.lower()
			v.verb.word = v.verb.word.lower()

		#all the possible verbs in the sentence
		possibleVerbs = [v for v in tmpVerbs if not v.verb.isPresentParticiple()]

		#the present participles that were originally mistaken for verbs -- they were excluded in
		#the above statement, so we need to grab them here
		participles = [v for v in tmpVerbs if v not in possibleVerbs]

		#only add in past participles if they're not in our list of possible verbs -- if it is really
		#a participle and included in the list of possible verbs, it will be pruned out later
		[participles.append(w) for w in words if w not in possibleVerbs and w.verb.isPastParticiple()]

		#present particples are easy -> only add them if they were not gotten from the mistaken list of
		#verbs above
		[participles.append(w) for w in words if w not in participles and w.verb.isPresentParticiple()]

		#step 2: since we are in a clause, we have isolation from all other verbs, so let's
		#start building out our verb tree
		#
		#do we have a separable prefix that needs re-attaching?
		lastWord = words[len(words) - 1]
		if (lastWord.isSeparablePrefix() and len(possibleVerbs) > 0):
			#attempt to see if when we add the prefix to the verb, it is still a verb
			prefixed = word.word(lastWord.word + possibleVerbs[0].word, possibleVerbs[0].sentLoc, possibleVerbs[0].clauseLoc, possibleVerbs[0].numWords)
			if (prefixed.isVerb()):
				tmpVerbs.remove(possibleVerbs[0])
				possibleVerbs[0] = prefixed #it's a separable verb, so replace it

		#pass it onto the tree constructor to build out our verb tree
		tree = verbTree()
		tree.build(possibleVerbs)

		#do our first pass on the tree to clean out the remaining participles
		tree.translate(translate = False)

		#clear our ambiguous words
		ambi = tree.pruneAmbiguousWords(beAggressive)

		if (len(ambi) > 0):
			[tmpVerbs.remove(v) for v in ambi if v in tmpVerbs]
			[possibleVerbs.remove(v) for v in ambi if v in possibleVerbs]

			#and rebuild our tree...again
			tree.build(possibleVerbs)

			#do our second pass on the tree, if we removed some "sein"s
			tree.translate(translate = False)

		for i in (1,2):
			#add the mistaken participles to our participle list
			participles += tree.pruneParticiples()

			[possibleVerbs.remove(v) for v in participles if v in possibleVerbs]
			tree.build(possibleVerbs)

			#do a final pass (now that it's clean) for the actual tenses and translations
			tree.translate(translate = True)

		#debugging dump of the tenses and nodes
		tree.dump()

		#grab all the used verbs
		verbs = tmpVerbs[:]
		[verbs.remove(v) for v in tree.getVerbs() if v in verbs]

		#only add participles to our list if they're not already in the list (no duplicates allowed)
		#anything left over in verbs as this point was not used in the tree, so chances are it is
		#a participle
		[participles.append(v) for v in verbs if v not in participles]

		#the meanings of the used, conjugated verbs
		meanings = tree.getMeanings()
		self.__participleMeanings(participles, meanings)
		return meanings
예제 #50
0
import word

a1 = word.word("I")
a2 = word.word_question("want")
a3 = word.word("eat")
a4 = word.word("soup")

<<<<<<< HEAD
# test

=======
>>>>>>> language
print a1,a2,a3,a4


예제 #51
0
	def __translateWithHelper(self, parent):
		#grab our helper's conjugations and stuff
		helperConj = parent.verb.verb.getStem()
		helper = parent.verb.verb.get(unknownHelper = True)[0]



		#if we're going for simple tenses
		if (helper["stem"] == "hab" or helper["stem"] == "sein"):
			#it's possible that we have numerous verbs that take the same past-tense form
			verbs = []
			stem = self.conjugation.verb.getStem()

			#is the verb in the right form for having a helper?
			#check here to make sure that the entered verb is in the right past-tense form
			for v in self.conjugation.verb.get(helper = helper["full"]):
				#make sure we have the right helper, too
				if (v["perfect"] == stem and v["hilfsverb"] == helper["full"]):
					verbs.append(word.word(v["full"]))

			#two loops...otherwise things get far too indented and painful
			for v in verbs:
				used = False

				#process the translation into its proper output form
				if (helperConj in (helper["third"], helper["firstPlural"], helper["first"], helper["thirdPlural"], helper["stem"])):
					self.setTense(tenses.PAST_PERFECT)
					used = True
				elif (helperConj == helper["subj2"]):
					self.setTense(tenses.CONDITIONAL_PAST)
					used = True
				elif (helperConj == helper["preterite"]):
					self.setTense(tenses.PLUSQUAM)
					used = True

				#and set the translations with the full form of our word
				#it can grab from our node the conjugated values, &etc.
				if (used):
					self.__doTranslations(v.word)

		#this is a special-case tense -> the combination of a helper and a modal...owwies
		elif (helper["stem"] == "werd"
			and self.verb.word in (word.canoo.helperHaben, word.canoo.helperSein)
			and self.child != None
			and self.child.conjugation.verb.getStem() == self.child.conjugation.verb.get(True)[0]["perfect"]
		):
			self.setTense(tenses.FUTURE2_HELPER)
			self.child.setTense(tenses.FUTURE2)
			self.child.__doTranslations(self.child.conjugation.verb.get(True)[0]["full"])
		#something going on with werden -> conditional present, passive voice
		elif (helper["stem"] == "werd"):
			conjugatedStem = self.conjugation.verb.getStem()

			#all the possible verbs (ex: gedenken + denken for gedacht)
			for v in self.conjugation.verb.get(helper["full"]):
				used = False

				#if we're looking at an unconjugated form of the verb: sehen
				if (conjugatedStem == v["perfect"]):
					if (helperConj == helper["preterite"]):
						self.setTense(tenses.PASSIVE_PAST)
						used = True
					elif (helperConj in (helper["third"], helper["firstPlural"], helper["first"], helper["thirdPlural"], helper["stem"])):
						self.setTense(tenses.PASSIVE_PRESENT)
						used = True
				elif (conjugatedStem == v["stem"]):
					if (helperConj == helper["subj2"]):
						self.setTense(tenses.CONDITIONAL)
						used = True
					elif (helperConj in (helper["third"], helper["firstPlural"], helper["first"], helper["thirdPlural"], helper["stem"])):
						self.setTense(tenses.FUTURE)
						used = True

				if (used):
					self.__doTranslations(v["full"])
예제 #52
0
	def test_to_str(self):
		w = word.word('çöişüğıÇÖİŞIÜĞasd', 'Testing Turkish Characters', False)
		self.assertEqual(w.text, 'çöişüğıÇÖİŞIÜĞasd')
		self.assertEqual(str(w), 'çöişüğıÇÖİŞIÜĞasd')
예제 #53
0
파일: readgre.py 프로젝트: robturtle/iwords
#!/usr/bin/env python
# Filename: readgre.sh
# Author:   LIU Yang
# Create Time: Sun Aug 25 03:04:20 HKT 2013
# License:     LGPL v2.0+
# Contact Me:  [email protected]

import fileinput, shelve
import word
from config import GRE_DB

wordbook = dict()

windex = 10000 # Magic number 4 GRE words, no portable issue HA HA HA!
for line in fileinput.input():
    items = line.split()
    name, mean = items[0], ' '.join(items[1:])

    wordbook[windex] = word.word(name, mean)
    windex += 1

for idx in wordbook:
    print idx, wordbook[idx]

gre_db = shelve.open(GRE_DB)
for windex in wordbook:
    gre_db[str(windex)] = wordbook[windex]
gre_db.close()
예제 #54
0
		)
	)
	...
)
'''

parts = [
 'n','aj','av','pr', 'ab']
desc = [
[False,False,False,True, False],
[True, False,False,False,False],
[True, True, True, True, False],
[True, True, True, True, False],
[True, True, True, True, False]
]
dummy = word.word('')
dummy.part = ':dnn'

def describes(head,tail):
	if (head == 'cj') or\
	   (tail == 'cj') or\
	   (head == 'dn')or\
	   (tail == 'dn'):
		return False
	if head[0] == ':':
		if head[1:3] == 'dn':
			return False
	if tail[0] == ':':
		if tail[1:3] == 'dn':
			return describes(head,tail[3:])
예제 #55
0
        # single key
		res = match_pattern(items, partkeys)

	if res == []:
		res = match_idiom(items, combine)

	if res == []:
		res = match_idiom(items, partkeys)

	if res == []:
		sys.stderr.write('ERR: partkey not found in line: %s' % line)

    # do cache
	index, wname, mean = res
	if wordbook.has_key(index):
		wordbook[index] = wordbook[index] + word.word(wname, mean)
	else:
		wordbook[index] = word.word(wname, mean)
	# End reading from file #

# store into database
word_db = shelve.open(IBT_DB)
for idx in wordbook:
	word_db[idx] = wordbook[idx]
word_db.close()

if __name__ == '__main__':
	db_in = shelve.open(IBT_DB)
	for idx in db_in.keys():
		print db_in[idx]
	db_in.close()