Пример #1
0
    def readAllWords(self):
        tmp_wds = [
            ("word", "단어"),
            ("asd", "ㅁㄴㄹ"),
            ("qwer", "ㅂㅈㄷㄱ, ㅂㅈㄷㄱ"),
            ("zcxvzxcv",
             "ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ"
             ),
            ("rtyu", "ㄱ쇼ㅕ, ㄱ쇼ㅕ"),
            ("vbcbv", "ㅠㅊ풏ㅊ"),
            ("vbcbvyy", "ㅠㅊ풏ㅊㅊㅊ"),
            ("realword", "진짜 단어"),
            ("bback", "빡빡이아저씨"),
            ("aaaa", "아아아아"),
        ]

        for w in tmp_wds:
            self.words.append(Word(w[0], w[1], False))

        tmp_fwds = [
            ("focus", "집중하다"),
            ("wow", "놀라운"),
            ("amazing", "엄청난"),
            ("awesome", "개쩌는"),
            ("verylonglonglongandlongword",
             "매우 길고 길고 또 길고 그리고 긴 단어를 아주 길고 길게 적는중"),
        ]

        for w in tmp_fwds:
            ww = Word(w[0], w[1], True)
            self.words.append(ww)
            self.focusedWords.append(ww)

        print("Data Loaded")
Пример #2
0
def trainTweets(type, tweet, grams):
    tokens = tweet.split()
    fooSet = set()
    '''
        fooset is to avoid adding same word to dictionary twice. I am sorry that I couldn't come up with a better name
    '''
    for gram in grams:
        for i in range(len(tokens)):
            if (i + gram > len(tokens)):
                break  #break the code, if you don't want to break the code.
            word = " ".join(tokens[i:i + gram])
            if (word in fooSet): continue
            fooSet.add(word)
            '''
                The 2 lines below are important. If I didn't write it, it wouldn't get bigger-grams or unigrams.
                Why not both?
            '''
            if (gram == 1): count = tokens.count(word)
            else: count = tweet.count(word)
            '''
                I really like using try instead of "if bla bla in bla bla". It is much faster this way in dictionaries.
                Actually, the difference was much bigger in python2, but seems like they tried fixing it in python3
                But I like doing things fast, so I didn't check if a word is in dictionary. I just put it.
            '''
            try:
                wordDict[word].addWord(count, int(type))
            except KeyError:
                inst = Word(word)
                inst.addWord(count, int(type))
                wordDict[word] = inst
    def insert(self, value):
        """
        ---------------------------------------------------------
        Inserts value into the hashset, allows only one copy of value.
        Calls _rehash if the hashset _LOAD_FACTOR is exceeded.
        Use: inserted = hs.insert( value )
        -------------------------------------------------------
        Preconditions:
            value - a comparable data element (?)
        Postconditions:
            returns
            inserted - True if value is inserted, False otherwise.
        -------------------------------------------------------
        """
        hash_slot = self._find_slot(value)
        val = Word("no")
        for i in hash_slot:
            if i == val:
                val = Word("yes")
        if hash_slot.is_empty() and val.word == "no":
            hash_slot.insert(value)
            inserted = True
            self._count += 1
        else:
            inserted = False

        if self._count > self._total:
            self._rehash()

        return inserted
Пример #4
0
    def integrate(self, concept: Concept) -> Concept:
        integrated = self.get_concept(concept)
        if integrated is None:
            ip = []  # type: list[Concept]
            for p in concept.parents:
                ip.append(self.integrate(p))

            integrated = Concept(concept.name, concept.relation, ip,
                                 concept.probability)
            integrated.store = self
            integrated.register_with_parents()
            self.add_concept(integrated)

            if integrated.relation == Relation.Word:
                word = integrated.name
                if word not in self._words:
                    w = Word(word)
                    w.add_meaning(integrated)
                    self._words[word] = w
            elif integrated.relation == Relation.Implication:
                integrated.propagate_probability_from_parent(
                    integrated.parents[0])

        else:
            integrated.merge_probability(concept.probability)

        return integrated
def get_senses(sense_file, words, t_to_use):
    senses = {}
    if os.path.isfile(sense_file):
        senses = pickle.load(open(sense_file, 'rb'))
    else:
        get_sense = Word('', '')
        senses = {w: get_sense.make_sense_list(w, 'all') for w in words}
        pickle.dump(senses, open(sense_file, 'wb'))

    num_senses, first = {}, {}
    for w in words:
        if not w in senses or len(senses[w]) == 0:
            print('No HTE entry:', w)
            continue
        senses_processed = [hack_sense_objs(s) for s in senses[w]]
        senses_processed = [
            sense for pos, sense in senses_processed if pos == NOUN
        ]
        processed_times = []
        for i, s in enumerate(senses_processed):
            for j, t in enumerate(s['times']):
                if not 'ending_time' in t:
                    t['ending_time'] = t['starting_time']
                processed_time = (t['starting_time'], t['ending_time'])
                processed_times.append(processed_time)
        if len(processed_times) == 0:
            continue
        first[w] = sorted([t[0] for t in processed_times])[0]
        num_senses[w] = len([
            t for t in processed_times
            if t[0] <= t_to_use and t[-1] >= t_to_use
        ])
    return (num_senses, first)
Пример #6
0
def spellMed(word):
    w = Word(word)

    candidates = isMedicine([word])

    if len(candidates) != 0:
        for x in candidates:
            x = x.encode('utf-8')
        return max(candidates, key=MED_COUNTS.get).encode('utf-8')

    candidates = isMedicine(w.typos())

    if len(candidates) is not 0:
        for x in candidates:
            x = x.encode('utf-8')
        return max(candidates, key=MED_COUNTS.get).encode('utf-8')

    candidates = isMedicine(w.double_typos())

    if len(candidates) is not 0:
        for x in candidates:
            x = x.encode('utf-8')
        return max(candidates, key=MED_COUNTS.get).encode('utf-8')

    # candidates = (isMedicine([word]) or isMedicine(w.typos()) or isMedicine(w.double_typos()))

    if len(candidates) is 0:
        return -1
Пример #7
0
 def lookup(self, word, return_distances=False):
     results = set()
     candidates = set([(word, 0)])  # a set of tuples (candidate, candidate_distance)
     for delete in Word.deletes(word, self.edit_distance_max):
         delete_distance = len(word) - len(delete)
         candidates.update([(delete, delete_distance)])
     candidates = sorted(candidates, key=lambda x: x[1])  # sort by increasing distance
     while candidates:
         candidate, candidate_distance = candidates.pop()  # the distance of the candidate from `word`
         candidate_count = self._terms[candidate]  # the (possibly 0) no. of occurrences for candidate
         if candidate_count > 0:  # there is an entry for this item in the dictionary
             #  candidate is an original word!
             results.update([(candidate, candidate_distance)])
         suggestions = self._suggestions[candidate]  # the (possibly not existing) suggestions for candidate
         for suggestion in suggestions:
             if not suggestion in [r[0] for r in results]:  # the sugg. exists and hasn't been found yet
                 if suggestion == word:  # suggestion _is_ the word we are looking for
                     real_distance = 0
                 elif candidate_distance == 0:  # candidate _is_ the word we are looking up for
                     real_distance = len(suggestion) - len(candidate)  # suggestion_distance
                 else:  # candidate is a delete edit of the word we are looking up for
                     real_distance = Word.damerau_levenshtein_distance(word, suggestion)
                 if real_distance <= self.edit_distance_max:
                     results.update([(suggestion, real_distance)])
     # sort the results first by increasing distance, then by decreasing frequency
     results = sorted(list(results), key=lambda r: (r[1], -self._terms[r[0]]))
     if self.best_suggestions_only and len(results) > 1:
         # only take the original word (if present) and the suggestions with minimum distance from `word`
         min_index = 0 if results[0][1] != 0 else 1  # possibly exclude `word` from the minimum distance
         best_dist = min(results[min_index:], key=lambda r: r[1])[1]  # results[0] may be the original word
         results = [r for r in results if r[1] <= best_dist]
     if not return_distances:
         results = [r[0] for r in results]  # pop out the distances and keep only the suggestions
     return results
Пример #8
0
def gameMain():
    word = Word('words.txt')
    guess = Guess(word.randFromDB())
    hangman = Hangman()

    while hangman.remainingLives > 0:

        display = hangman.currentShape()
        print(display)
        display = guess.displayCurrent()
        print('Current: ' + display)
        display = guess.displayGuessed()
        print('Already Used: ' + display)

        guessedChar = input('Select a letter: ')
        if len(guessedChar) != 1:
            print('One character at a time!')
            continue
        if guessedChar in guess.guessedChars:
            print('You already guessed \"' + guessedChar + '\"')
            continue

        success = guess.guess(guessedChar)
        if success == False:
            hangman.decreaseLife()

        if guess.finished() == True:
            print('**** ' + guess.displayCurrent() + ' ****')
            print('Success')
            break
    else:
        print(hangman.currentShape())
        print('word [' + guess.secretWord + ']')
        print('guess [' + guess.displayCurrent() + ']')
        print('Fail')
def make_optimal_paths(transducer_input):
    transducer = pickle.loads(pickle.dumps(transducer_input, -1))
    alphabet = transducer.get_alphabet()
    new_arcs = list()
    for segment in alphabet:
        word = Word(segment.get_symbol())
        word_transducer = word.get_transducer()
        #print(word_transducer.dot_representation())
        intersected_machine = Transducer.intersection(word_transducer, transducer)
        states = transducer.get_states()
        for state1, state2 in itertools.product(states, states):
            initial_state = word_transducer.initial_state & state1
            final_state = word_transducer.get_a_final_state() & state2
            temp_transducer = pickle.loads(pickle.dumps(intersected_machine, -1))
            temp_transducer.initial_state = initial_state
            temp_transducer.set_final_state(final_state)
            temp_transducer.clear_dead_states()
            if final_state in temp_transducer.get_final_states():  # otherwise no path.
                try:
                    temp_transducer = remove_suboptimal_paths(temp_transducer)
                    range = temp_transducer.get_range()
                    arc = Arc(state1, segment, range, _get_path_cost(temp_transducer), state2)
                    new_arcs.append(arc)
                except KeyError:
                    pass
                #print("****")
                #print(temp_transducer.dot_representation())

    transducer.set_arcs(new_arcs)
    return transducer
Пример #10
0
    def setUp(self):
        self.d1 = DataManager()

        self.wd1 = Word('software', '소프트웨어', (True, self.d1))
        self.wd2 = Word('project', '프로젝트', (True, self.d1))

        self.d1.words = [self.wd1, self.wd2]
Пример #11
0
 def valid_close_word(self, file_path):
     df_close_word = pd.read_csv(file_path)
     w = Word()
     for row in df_close_word.itertuples():
         words = w.preprocess(row.word)
         if len(words) > 0:
             assert words[0] == row.replace_word
Пример #12
0
def gameMain():
    word = Word('words.txt')
    guess = Guess(word.randFromDB())

    finished = False
    hangman = Hangman()
    maxTries = hangman.getLife()

    while guess.numTries < maxTries:

        display = hangman.get(maxTries - guess.numTries)
        print(display)
        guess.display()

        guessedChar = input('Select a letter: ')
        if len(guessedChar) != 1:
            print('One character at a time!')
            continue
        if guessedChar in guess.guessedChars:
            print('You already guessed \"' + guessedChar + '\"')
            continue

        finished = guess.guess(guessedChar)
        if finished == True:
            break

    if finished == True:
        print('Success')
    else:
        print(hangman.get(0))
        print('word [' + guess.secretWord + ']')
        print('guess [' + guess.currentWord + ']')
        print('Fail')
Пример #13
0
    def load(self, filename):
        """ Loads input file to self._data.
        :type filename: str
        """
        # todo: should be easily configurable
        logger.debug("Trying to load file '%s'." % filename)
        with open(filename, 'r') as csvfile:
            reader = csv.reader(csvfile, delimiter='\t')

            # must match Anki fieldnames. If you want to ignore a field, just set it to ""
            field_names = ["Expression", "Kana", "Meaning", None]

            for row in reader:
                element = Word()
                element.line = '\t'.join(row)
                fields = [c.decode('utf8').strip() for c in row]

                if not len(fields) == len(field_names):
                    raise (
                        ValueError,
                        "The number of supplied field_names (%d) doesn't match the number of "
                        "fields in the file %s (%d)." %
                        (len(field_names), filename, len(fields)))

                for i in range(len(fields)):
                    element[field_names[i]] = fields[i]

                if self.scan_for_duplicates:
                    element.check_duplicate()

                self._data.append(element)
def analysis_one_segment(segment):
    detected_phrases = []

    cut_words = cut(segment)
    first_seg_word, first_seg_pos = next(cut_words)

    logging.debug("{} {}".format(first_seg_word, first_seg_pos))

    new_phrase_segments = Segment(init=Word(first_seg_word, first_seg_pos))

    new_cut_words = []

    for word, pos in cut_words:
        logging.debug("{} {}".format(word, pos))

        _phrase, consistent = could_concatenate(new_phrase_segments,
                                                Word(word, pos))

        if _phrase:
            new_phrase_segments.append(Word(word, pos, consistent))
        else:
            new_cut_words.append(new_phrase_segments.merge())
            detected_phrases = add_detected_new_phrase(new_phrase_segments,
                                                       detected_phrases)
            new_phrase_segments = Segment(init=Word(word, pos))

    new_cut_words.append(new_phrase_segments.merge())

    if len(new_phrase_segments) > 1:
        detected_phrases = add_detected_new_phrase(new_phrase_segments,
                                                   detected_phrases)

    return detected_phrases, new_cut_words
Пример #15
0
def gameMain():
    word = Word('words.txt')
    guess = Guess(word.randFromDB())

    finished = False
    hangman = Hangman()
    maxTries = hangman.getLife()

    while guess.numTries < maxTries:

        display = hangman.get(maxTries - guess.numTries)
        print(display)
        guess.display()

        guessedChar = input('Select a letter: ')

        finished = guess.guess(guessedChar)
        if finished:
            break

    if finished:
        print('Success')
    else:
        print(hangman.get(0))
        print('word [' + guess.word + ']')
        print("Guess:", end=" ")
        for i in range(len(guess.current)):
            print(guess.current[i], end=" ")
        print()
        print('Fail')
Пример #16
0
class Display:
    def __init__(self, word, images):
        self.hangman = Hangman(images)
        self.word = Word(word)
        self.letter_buttons = LetterButtons()

    def is_letter_not_in_word(self, ltr):
        if ltr not in self.word.get_word():
            self.hangman.set_hangman_status()

    def mouse_click(self, mouse_x, mouse_y):
        letters = self.letter_buttons.get_letters()
        for letter in letters:
            if letter.get_visible():
                distance = math.sqrt((letter.get_x() - mouse_x)**2 +
                                     (letter.get_y() - mouse_y)**2)
                if distance < RADIUS:
                    self.letter_buttons.if_inside_button(letter)
                    self.is_letter_not_in_word(letter.get_letter())

    def draw_window(self):
        WINDOW.fill(WHITE)
        self.word.draw(self.letter_buttons.get_guessed())
        self.letter_buttons.draw()
        self.hangman.draw()
        pygame.display.update()

    def is_won(self):
        for letter in self.word.get_word():
            if letter not in self.letter_buttons.get_guessed():
                return False
        return True

    def is_lost(self):
        return self.hangman.get_hangman_status() == 6
Пример #17
0
def gameMain():
    word = Word('words.txt')
    guess = Guess(word.randFromDB())
    hangman = Hangman()
    UI = TextUI(guess, hangman)

    while hangman.getLife() > 0:
        guessedChar = input("Select a letter: ")
        # 잘못된 입력에 대한 처리
        if len(guessedChar) is not 1:
            UI.errorPrint("""
            =================================
            =====Input just one character====
            =================================""")
            continue
        if guessedChar in guess.guessedList:
            UI.errorPrint("""
            =================================
            =====Input another character=====
            =================================""")
            continue
        # Guess결과에 따른 처리
        result = guess.guess(guessedChar)
        if result is 1:
            break
        if result is 0:
            hangman.minusLife()
        UI.display()
    UI.display()
    UI.endOfGame(hangman.getLife())
Пример #18
0
 def get_basic_latin_word(self):
     """
     >>> a = Algorithm('abc def c')
     >>> a.get_basic_latin_word()
     abc
     >>> a.get_basic_latin_word()
     def
     >>> a.get_basic_latin_word()
     c
     >>> a = Algorithm('abc 我 c')
     >>> a.get_basic_latin_word()
     abc
     >>> a.get_basic_latin_word()
     """
     basicLatinWord = []
     while (self.pos < self.length and is_basic_latin(self.text[self.pos])):
         current_char = self.text[self.pos]
         self.pos += 1
         if current_char.isspace():
             if len(basicLatinWord):
                 return Word(u''.join(basicLatinWord), BASICLATIN_WORD)
         basicLatinWord.append(current_char)
     if len(basicLatinWord):
         return Word(u''.join(basicLatinWord), BASICLATIN_WORD)
     else:
         return None
Пример #19
0
def gameMain():
    word = Word('words.txt')
    guess = Guess(word.randFromDB())

    hangman = Hangman()
    maxTries = hangman.getLife()

    while (maxTries - guess.numTries):

        display = hangman.get(maxTries - guess.numTries)
        print(display)
        guess.display()

        guessedChar = input("Select a letter:")
        if len(guessedChar) != 1:
            print("One character at a time!")
            continue
        if guessedChar in guess.guessedChars:
            print("You already guessed \' %c \' " % (guessedChar))
            continue

        if guess.guess(guessedChar) == True:
            print("Success!")
            break

    if guess.guess(guessedChar) == False:
        print(hangman.get(0))
        print("word [ %s ]" % (guess.secretWord))
        print("guess [ %s ]" % (guess.currentStatus))
        print('Fail')
Пример #20
0
    def test_clean_word(self):
        test_cases_replace = (
            ('慶応大学', '大学'),
            ('朝日新聞', '新聞'),
            ('JR東日本', 'JR'),
            ('高校受験', '受験'),
            ('iPhone7', 'iPhone'),
        )

        test_cases_exclude = (
            '1日目',
            '2日目',
            '三日目',
            '1年生',
            '2013年',
            '1984年生まれ',
            '6月',
            '4人',
            '鹿児島出身',
            '鹿児島県',
            '58キロ',
            'ごめんなさい。',
            '要注意',
            '1万円',
            '0円',
            '2期生',
        )

        w = Word()
        for word, rep_word in test_cases_replace:
            assert w.preprocess(word)[0] == rep_word

        for word in test_cases_exclude:
            assert len(w.preprocess(word)) == 0
Пример #21
0
    def count_words(self, is_canonical, list_test_pos, list_training_pos,
                    list_test_neg, list_training_neg):
        if is_canonical:
            find_words = NaifBayes.find_words_tagged
        else:
            find_words = NaifBayes.find_words_untagged

        dict_words = dict()

        for file_name in list_training_pos:
            with codecs.open(file_name, "r", "utf-8") as file:
                for line in file.readlines():
                    for word in find_words(line):
                        self.nbr_pos += 1
                        if word in dict_words.keys():
                            dict_words[word].incr_pos()
                        else:
                            dict_words[word] = Word(word)

        for file_name in list_training_neg:
            with codecs.open(file_name, "r", "utf-8") as file:
                for line in file.readlines():
                    for word in find_words(line):
                        self.nbr_neg += 1
                        if word in dict_words.keys():
                            dict_words[word].incr_neg()
                        else:
                            dict_words[word] = Word(word)
        return dict_words
Пример #22
0
def gameMain():
    word = Word('words.txt')
    guess = Guess(word.randFromDB())  # 랜덤하게 단어선택

    finished = False
    hangman = Hangman()
    maxTries = hangman.getLife()  # 목숨의 개수를 초기화 시킴

    while guess.numTries < maxTries:  # 목숨이 몇개 남았는지 체크해줌

        display = hangman.get(maxTries - guess.numTries)
        print(display)
        guess.display()

        guessedChar = input('Select a letter: ')
        if len(guessedChar) != 1:  # 한글자가 아니면
            print('One character at a time!')
            continue
        if guessedChar in guess.guessedChars:  # 이미 사용한 문자라면
            print('You already guessed \"' + guessedChar + '\"')
            continue

        finished = guess.guess(guessedChar)
        if finished == True:
            break

    if finished == True:
        print('Success')
        print('word : ' + guess.secretWord)
    else:
        print(hangman.get(0))
        print('word [' + guess.secretWord + ']')
        print('guess [' + guess.currentStatus + ']')
        print('Fail')
Пример #23
0
def gameMain():
    word = Word('words.txt')
    guess = Guess(word.randFromDB())
    hangman = Hangman()

    while hangman.remainingLives > 0:

        display = hangman.currentShape()
        print(display)
        display = guess.displayCurrent()
        print('Current: ' + display)
        display = guess.displayGuessed()
        print('Already Used: ' + display)

        success = guess.guess(getChar())
        if success == 1:
            continue
        elif success == 2:
            continue
        elif success == False:
            hangman.decreaseLife()
        
        if guess.finished():
            break

    if guess.finished() == True:
        print('**** ' + guess.displayCurrent() + ' ****')
        print('Success')
    else:
        print(hangman.currentShape())
        print('word [' + guess.secretWord + ']')
        print('guess [' + guess.displayCurrent() + ']')
        print('Fail')
Пример #24
0
 def __init__(self, word, guess_limit):
     self.guesses_remaining = guess_limit
     self.word = Word(word=word)
     self.settings = settings
     self.game_won = False
     self.game_in_progress = True
     self.previous_guesses = set()
Пример #25
0
 def load(self):
     """
     Read the lesson's path, loading and creating words
     """
     if self.path is None:
         return False
     if (os.path.exists(os.path.join(self.path, self.word_list_file))):
         #new loading method with a words.list
         ifile = open(os.path.join(self.path, self.word_list_file), "r")
         for line in ifile:
             s = line.split(";")
             word_name = s[1].strip().decode('utf-8')
             full_file_name = os.path.join(self.path, s[0].decode('utf-8'))
             print "Filename: " + full_file_name
             print "Word: " + word_name
             self.words.append(Word(word_name, full_file_name))
     else:
         #legacy loading method, kept for backward compatibility
         for file in os.listdir(self.path):
             full_file_name = os.path.join(self.path, file)
             if (os.path.isfile(full_file_name)
                     and os.path.splitext(file)[1] == ".wav"):
                 word_name = os.path.splitext(file)[0]
                 self.words.append(Word(word_name, full_file_name))
     return True
Пример #26
0
    def __init__(self, data):
        self._data = data
        self._score = Score()

        self._category = None
        self._iter_category = None
        self._word = Word()
Пример #27
0
 def __init__(self):
     self.word = Word('words.txt')
     self.secretWord = self.word.randFromDB()
     self.numTries = 0 # 7이되면 gameOver
     self.hangmanList = hangmanList
     self.currentStatus = "_"*len(self.secretWord)
     self.guessedChars = ""
Пример #28
0
def main(argv):
    #Parse out the commandline arguments
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description=DESCRIPTION,
        epilog=EPILOG
    )

    parser.add_argument("-t", "--test",
                        nargs=1,
                        default=["something"])

    parser.add_argument("-d", "--debug",
                        action="store_true",
                        help="Enable Debug Messages")

    args = parser.parse_args()
    print "Running Script: %s" % NAME


    if args.debug:
        print "test: %s" % str(args.test[0])

    my_string = "hello"
    print "Orginal String: %s" % my_string
    w = Word(my_string)
    print "Output: %s" % w.reverse()
Пример #29
0
	def addWordEncounter(self, word):
		if word not in self.stochasticGrammar.wordData:
			newWord = Word(word, self.dataGranularity)
			newWord.initializeValues(self.initialValue)
			self.stochasticGrammar.wordData[word] = newWord
			return True
		return False
def test_filter_rules():

    rule1 = {
        'applies': {
            'positive': ['nasal']
        },
        'conditions': {
            'negative': ['nasal'],
            'positive': ['syllabic']
        },
        'name': 'nasalization'
    }

    rule2 = {
        'applies': {
            'positive': ['tonal']
        },
        'conditions': {
            'positive': ['syllabic']
        },
        'name': 'valid'
    }

    word1 = Word(
        [Segment(['consonantal'], ['tonal']),
         Segment(['sonorant'], ['high'])])

    word2 = Word(
        [Segment(['syllabic', 'low'], []),
         Segment(['high'], ['sonorant'])])

    assert filter_rules([word1, word2], [rule1, rule2]) == [rule2]
Пример #31
0
class WordTest(unittest.TestCase):
    def setUp(self):
        self.word = Word(
            os.path.normpath(
                os.path.join(os.path.dirname(__file__), '../words.txt')))

    def test_get_word(self):
        assert self.word is not None

    def test_words_count(self):
        self.assertEqual(len(self.word.words), 19184)

    def test_get_guessed_characters(self):
        self.assertIsInstance(self.word.get_guessed_characters(), set)

    def test_guess(self):
        for char in string.ascii_lowercase:
            result = self.word.guess(char)
            if result == 1:
                self.assertEqual(self.word.current_status,
                                 self.word.get_word())
            elif result is not None:
                self.assertEqual(result, -1)

        self.assertEqual(len(self.word.get_guessed_characters()), 26)
Пример #32
0
 def __init__(self, word, score, remove):
     self.word = Word(word)
     self.question = self.word.define().option().output()
     self.symbols = ["A", "B", "C", "D", "E"]
     self.view = View()
     self.isCorrect = False
     self.score = score
     self.remove = remove
Пример #33
0
    def __init__(self, sentence_block):
        self._tokens = []
        for i in range(len(sentence_block)):
            word = Word(sentence_block[i])

            if not word.get_lemma() or word.get_lemma() == '_':
                if i != 0 and self._tokens[i - 1].get_form() == '_':
                    word.set_lemma(self._tokens[i - 1].get_lemma())
            self._tokens.append(word)
Пример #34
0
    def gen_word(element):
        word = Word(
            word=element["word"],
            coarse=CoarsePOS(element["coarse"]).name,
            fine=FinePOS(element["fine"]).name
        )

        if has_children:
            word.children = []

        return word
Пример #35
0
def test_apply_rule_deletion():
    rule = {'applies': {'positive': ['deletion']},
            'conditions': {'positive': ['syllabic']},
            'name': 'deletion'}

    word = Word([Segment(['nasal'], ['syllabic']),
                 Segment(['syllabic'], ['nasal'])])

    target_word = Word([Segment(['nasal'], ['syllabic'])])

    assert word.apply_rule(rule) == target_word
 def getWord(self, word):
     if isinstance(word, basestring):
         w = self.collection.find_one({"word":word})
         if w == None:
             return w
         wn = Word(word)
         wn.__dict__ = w
         return wn
     else:
         print "Error Querying Database with ", word
         return None
Пример #37
0
 def find_best_word_outter_function(self, position_seed, board, rotated=False):
     from word import Word
     square = board.get_square(position_seed)
     tile = square.tile
     if tile is None:
         raise ValueError("Can't play off an empty square")
     for i in xrange(len(self.tile_set)):
         potential_word = Word(rotated=rotated)
         potential_word.extend_from_anchor(position_seed, i, board)
         self.find_best_word_inner_function(potential_word, self.tile_set, board)
     return
Пример #38
0
class Player(object):
    '''
    classdocs
    '''


    def __init__(self, name):
        '''
        Constructor
        '''
        self._name = name
        self._opponent = None
        self._word = None
        self._misses = []

    def set_opponent(self,opponent):
        self._opponent = opponent
        
    def choose_word(self):
        done = False
        while (not done):
            word = input(self._name + ", enter a word : ")
            if (len(word) > 0):
                self._word = Word(word)
                done = True
            else:
                print("A word must have letters!")
        # scroll our word off the screen!
        for i in range(1, 30) : print("")

    def guess_letter(self, letter):
        return self._word.guess(letter)

    def guessed_letters(self):
        return self._word.guessed_for_display()

    def all_guessed(self):
        return self._word.all_guessed()

    def guess(self):
        guessed = self._opponent.guessed_letters()
        print("----------------------------------------------------")
        print("Word : " + guessed)
        print("Incorrect guesses : ", end='')
        print(self._misses)
        done = False
        while (not done):
            letter = input(self._name + ", enter a letter : ")
            done = len(letter) == 1
        guessed = self._opponent.guess_letter(letter)
        if not guessed :
            self._misses.append(letter)
        return guessed
Пример #39
0
def test_index_applicable():
    syllable_rule = {'before': {'negative': ['syllabic']},
                     'conditions': {'positive': ['syllabic']}}

    word = Word([Segment(['consonantal', 'voice', 'labial', 'long'],
                         ['syllabic', 'stress']),
                 Segment(['syllabic', 'sonorant', 'continuant', 'approximant', 'voice', 'labial', 'round', 'dorsal', 'low', 'back'],
                         ['stress', 'long']),
                 Segment(['consonantal', 'coronal', 'anterior'],
                         ['syllabic', 'stress'])])

    assert word.index_applicable(1, syllable_rule)
Пример #40
0
 def parseVerseTextLine(self, line):
     # Must also add it to current book
     if line == "":
         pass
     else:
         words = line.split(" ")
         words_list = []
         for w in words:
             w2 = Word(0,variant_none)
             w2.surface = w
             words_list.append(w2)
         self.verse.words = words_list
         self.books[-1].verses.append(self.verse)
Пример #41
0
    def load_words(self,files):
        for file_name in files:
            f = open(file_name)
            data = yaml.load(f)
            f.close()
            yamlhelp.merge(data,self.words)

        for word_id in self.words:
            word = self.words[word_id]
            word = Word(word_id, **word)
            self.words[word_id] = word

            word.color = yamlhelp.load_color(word.color)
Пример #42
0
def create_words(the_reflection):
    """
    Create a list of words from reflection
    """
    word_list = list()
    for word in word_tokenize(the_reflection.text_blob):
        if word not in string.punctuation:
            try:
                word = Word(id=word.lower()).save()
            except UniqueProperty:  # if a word has already been stored at word, plot a relationship
                word = Word.nodes.get(id=word.lower())
            word.reflections.connect(the_reflection)
            word_list.append(word)
    return word_list
Пример #43
0
def test_index_applicable_boundaries():
    start_boundary = {'conditions': {'positive': ['syllabic']},
                      'first': True}

    word = Word([Segment(['syllabic'], ['nasal']),
                 Segment(['syllabic'], ['nasal'])])

    assert word.index_applicable(0, start_boundary)
    assert not word.index_applicable(1, start_boundary)

    end_boundary = {'conditions': {'positive': ['syllabic']},
                    'last': True}

    assert not word.index_applicable(0, end_boundary)
    assert word.index_applicable(1, end_boundary)
Пример #44
0
 def create_entity(self, word, dictionary, transcription, translation):
     entity = Word()
     entity.word = word
     entity.transcription = transcription
     entity.translation = translation
     entity.dictionary = dictionary
     entity.usages = 1
     entity.put()
Пример #45
0
 def loop(self):
     #Main loop
     while (True):
         print(str(self.current_leader) + ", pick a word!")
         self.current_word = Word(input())
         self.guesses = MAX_GUESSES
         print("\n"*20)
         
         while (True):
             #Loop through players which are non-leaders
             for player in [player for player in self.players if player != self.current_leader]:
                 if (self.guesses > 0):
                     self.current_word.print_word()
                     player.turn(self.current_word)
                 else:
                     print("No guesses left, " + str(self.current_leader) + " +3 points!")
                     self.current_leader.score += 3
                     break
                 if (self.guessed): 
                     break
             else:
                 continue; #Executed if the loop finished without breaking
             break #This never gets called until the above "continue" isn't called (which happens when break is called within the inner loop)
         
         print("Keep playing? y/n")
         
         choice = get_choice(("y","n"))
         if (choice == "n"):
             self.end_game()
             break
         elif (choice == "y"):
             self.clear_vars()
             continue
Пример #46
0
 def test_damerau_levenshtein_distance(self):
     self.assertEqual(Word.damerau_levenshtein_distance('ciao', 'ciao'), 0)
     self.assertEqual(Word.damerau_levenshtein_distance('ciao', 'cia'), 1)
     self.assertEqual(Word.damerau_levenshtein_distance('cia', 'ciao'), 1)
     self.assertEqual(Word.damerau_levenshtein_distance('ciao', 'co'), 2)
     self.assertEqual(Word.damerau_levenshtein_distance('ciao', 'c'), 3)
     self.assertEqual(Word.damerau_levenshtein_distance('ciao', ''), 4)
     self.assertEqual(Word.damerau_levenshtein_distance('simone', 'siomne'), 1)
     self.assertEqual(Word.damerau_levenshtein_distance('simone', 'siomen'), 2)
Пример #47
0
def wordTest(inputPath, distributionGranularity, wordGranularity, tempFile):
	salaryDictionary = SalaryDistribution(inputPath, distributionGranularity)
	salaryDictionary.parse()
	test = Word("Potato", wordGranularity)
	test.initializeValues(salaryDictionary)
	with open(tempFile, 'w') as dataDump:
		dataDump.write(test.configure())
	with open(tempFile, 'r') as reReading:
		for line in reReading:
			try:
				check=Word.fromFileString(line)
				print check.configure()
				check.increment(8200, 3, 0.75)
				print ("~~~~~~~")
				print check.configure()
			except NameError:
				continue
Пример #48
0
    def get_word(self):
        sections = self.get_sections()
        false = False
        from_section_level = -1
        word = Word(self.title)
        for section in sections:
            if section[1] == self.orig_section_name:
                from_section_level = section[0]
                continue
            elif from_section_level >= 0 and section[0] <= from_section_level:
                from_section_level = -1

            if from_section_level >= 0:
                if self.is_pronunce_section(section[1]):
                    ipa, audio = self._parse_pronunciation(section[2])
                    word.set_pronunciation(ipa)
                    word.set_audio(audio)
                elif self.is_partos_section(section[1]):
                    try:
                        meanings = self._parse_part_of_speech(section[2])
                    except:
                        print self.title, "!!!!!!", section[2]
                        raise
                    en_part_of_speach = self.get_en_part_of_speech(section[1])
                    word.set_part_of_speech(en_part_of_speach, meanings)
        return word
Пример #49
0
    def write_sonnet(cls):
        cls.my_sonnet = []
        for i in range(cls.desired_lines):
            line = []
            following_word = Word("@")
            
            syllables = 0

            if (cls.rhyme_lines[i] >= 0):
                following_word = cls.word_chain.get_rhyming_word(cls.my_sonnet[cls.rhyme_lines[i]][-1], cls.rhyme_level)
                line.insert(0, following_word.get_word())
                syllables += following_word.count_syllables()
            
            while syllables < cls.desired_length:
                next_word = cls.word_chain.get_random_leader(following_word.get_word())
                for k in range(1, 5):
                    if next_word.get_word() != "@":
                        break
                    next_word = cls.word_chain.get_random_leader(following_word.get_word())
                if next_word.get_word() == "@":
                    break
                line.insert(0, next_word.get_word())
                following_word = next_word
                following_word.get_word_stress()
                syllables += next_word.count_syllables()
                
                cls.print_progress(i+1, syllables)
                
            cls.my_sonnet.append(line)
Пример #50
0
	def startElement(self, name, attrs):
		if name == "Image":
			self.image_name = str(attrs['name'])
			
		if name == "TextLine":
			self.cur_line = []

		if name == "Word":
			word = Word()
			word.top = int(attrs['top'])
			word.bottom = int(attrs['bottom'])
			word.left = int(attrs['left'])
			word.right = int(attrs['right'])
			word.text = unicode(attrs['text'])
			word.shear = int(attrs['shear'])
			self.cur_line.append(word)

		if name == "Character":
			char = Character()
			if 'top' in attrs:
				char.top = int(attrs['top'])
			else:
				char.top = self.cur_line[-1].top
			if 'bottom' in attrs:
				char.bottom = int(attrs['bottom'])
			else:
				char.bottom = self.cur_line[-1].bottom
			char.left = int(attrs['left'])
			char.right = int(attrs['right'])
			char.text = unicode(attrs['text'])
			if 'shear' in attrs:
				char.shear = int(attrs['shear'])
			else:
				char.shear = self.cur_line[-1].shear
			self.cur_line[-1].characters.append(char)
Пример #51
0
	def parse(self):
		with open(self.wordPath, 'r') as wordSource:
			for line in wordSource:
				currentWord = Word.fromFileString(line)
				self.wordData[currentWord.text] = currentWord
		with open(self.wordPairPath, 'r') as wordPairSource:
			for line in wordPairSource:
				currentWordPair = WordPair.fromFileString(line)
				self.wordPairData[currentWordPair.text] = currentWordPair
Пример #52
0
 def choose_word(self):
     done = False
     while (not done):
         word = input(self._name + ", enter a word : ")
         if (len(word) > 0):
             self._word = Word(word)
             done = True
         else:
             print("A word must have letters!")
     # scroll our word off the screen!
     for i in range(1, 30) : print("")
Пример #53
0
def convert_spacy_format(text):
    parsed_text = []
    # instantiate Spacy's parser
    parser = English()
    # parse text via Spacy's parser
    doc = parser(unicode(text, "utf-8"))
    for sent in doc.sents:
        s = Sentence()
        s.string = str(sent)
        word_index = 0
        for token in sent:
            # problem: sometimes, spacy interprets a new line in a text-file wrongly and provides an empty token.
            # solved: by the following condition
            if len(token.orth_) > 1:
                # Spacy's tags for each word in the sentence are stored in a new Word-object
                w = Word()
                w.string = token.orth_
                w.lemma = token.lemma_
                w.index = word_index
                # less verbose tags are provided by "token.pos_"
                w.tag = token.tag_
                w.entity = token.ent_type_
                word_index += 1
                # each word is appended to a Sentence-object
                s.words.append(w)
        # each Sentence-object is appended to an array
        parsed_text.append(s)
    return parsed_text
Пример #54
0
    def post(self, *args):
        body = json.loads(self.request.body)

        word = urllib.unquote(body["word"].encode("ascii")).decode("utf8")
        translation = urllib.unquote(body["translation"].encode("ascii")).decode("utf8")
        transcription = urllib.unquote(body["transcription"].encode("ascii")).decode("utf8")
        dictionary = urllib.unquote(body["dictionary"].encode("ascii")).decode("utf8")

        entity = Word.gql("where word=:1", word).get()
        if entity is None:
            db.run_in_transaction(self.create_entity, word, dictionary, transcription, translation)
        else:
            db.run_in_transaction(self.update_usages, entity, translation)
Пример #55
0
	def __init__(self, trainingDataPath, salaryProbabilityPath, distributionGranularity, 
					dataGranularity, grammar, dropOffWidth = 0, stepDown = 0,
					increment = 5.0):
		self.salaryData = SalaryDistribution(salaryProbabilityPath, distributionGranularity)
		self.salaryData.parse()
		self.trainingDataPath = trainingDataPath
		self.dataGranularity = dataGranularity
		self.stochasticGrammar = grammar
		#print self.stochasticGrammar.wordData['surrey'].values[8000]
		#print self.stochasticGrammar.wordData['limited'].values[8000]
		#print self.stochasticGrammar.wordData['experience'].values[8000]
		#self.stochasticGrammar.wordData['surrey'].increment(8250, 0, 0)
		#self.stochasticGrammar.wordData['experience'].increment(8250, 0, 0)
		#print self.stochasticGrammar.wordData['surrey'].values[8000]
		#print self.stochasticGrammar.wordData['limited'].values[8000]
		#print self.stochasticGrammar.wordData['experience'].values[8000]
		self.dropOffWidth = dropOffWidth
		self.stepDown = stepDown
		self.increment = increment
		dummy = Word("", self.dataGranularity)
		dummy.initializeNewValues(self.salaryData)
		self.initialValue = dummy.values
def wordTest(inputPath, distributionGranularity, wordGranularity, tempFile):
	salaryDictionary = SalaryDistribution(inputPath, distributionGranularity)
	salaryDictionary.parse()
	test1 = Word("Potato", wordGranularity)
	test1.initializeNewValues(salaryDictionary)
	test2 = Word("Celery", wordGranularity)
	test2.initializeNewValues(salaryDictionary)
	with open(tempFile, 'w') as dataDump:
		dataDump.write(test1.configure())
		dataDump.write(test2.configure())
	with open(tempFile, 'r') as reReading:
		wordList = []
		for line in reReading:
			try:
				wordList.append(Word.fromFileString(line))
			except NameError:
				continue
	print sum(wordList[0].values.values())
	print sum(wordList[1].values.values())
	wordList[0].increment(8250,increment = 147.25)
	print sum(wordList[0].values.values())
	print sum(wordList[1].values.values())
Пример #57
0
 def test_deletes(self):
     self.assertSetEqual(Word.deletes('ciao', 3),
                         set(['iao', 'cio', 'cao', 'cia', 'co', 'ca', 'ci', 'ao', 'io', 'ia', 'c', 'i', 'a', 'o']))
     self.assertSetEqual(Word.deletes('ciao', 2),
                         set(['iao', 'cio', 'cao', 'cia', 'co', 'ca', 'ci', 'ao', 'io', 'ia']))
     self.assertSetEqual(Word.deletes('ciao', 1),
                         set(['iao', 'cio', 'cao', 'cia']))
     self.assertSetEqual(Word.deletes('ciao', 0),
                         set())
     self.assertSetEqual(Word.deletes('aaa', 30),
                         set(['aa', 'a']))
     self.assertSetEqual(Word.deletes('bbb', 3),
                         set(['bb', 'b']))
     self.assertSetEqual(Word.deletes('woho', 1),
                         set(['woh', 'oho', 'who', 'woo']))
Пример #58
0
def populate_db():
    print "Filling database with word/reading data from JMdict..."
    
    if not os.path.exists(JMDICT_PATH):
        print "No jmdict database found at %s" % JMDICT_PATH
        print "Cannot continue without it."
        return
        
    #get connection to jmdict database 
    jm_conn = sqlite3.connect(JMDICT_PATH)
    jm_conn.row_factory = sqlite3.Row

    start = time.time()

    s = '''
        SELECT  r_ele.reb AS r_ele_reb,
                r_ele.re_nokanji AS r_ele_re_nokanji,
                re_restr.keb AS re_restr_keb,
                k_ele.keb AS k_ele_keb 
        FROM r_ele
        LEFT OUTER JOIN re_restr
        ON re_restr.r_ele_id=r_ele.id
        LEFT OUTER JOIN k_ele
        ON k_ele.entry_ent_seq=r_ele.entry_ent_seq
        '''

    results = jm_conn.execute(s)
    
    for r in results:
        #some words have no kanji elements
        if r['k_ele_keb'] is None:
            continue
        #if this entry has a restricted reb, only apply it to the
        #corresponding keb
        if r['re_restr_keb'] is not None:
            if r['re_restr_keb'] == r['k_ele_keb']:
                #print r['k_ele_keb'], r['r_ele_reb']
                word = Word(conn, r['k_ele_keb'], r['r_ele_reb'])
                word.save()
        else: #otherwise, all rebs apply to all kebs in this entry
            #but some readings don't use kanji, so no related kebs
            if r['r_ele_re_nokanji'] is not None:
                #print r['k_ele_keb'], r['r_ele_reb']
                word = Word(conn, r['k_ele_keb'], r['r_ele_reb'])
                word.save()
    conn.commit()
    print 'Filling database with word/reading data took '\
            '%s seconds' % (time.time() - start)
Пример #59
0
    def run(self):
        print "\nGetting word from Internet..."
        self.word=Word()

        while(self.correct==False and self.failures < 6):
            self.printState()
            letter=raw_input("\nWhich letter do you choose, "+str(self.playersList[self.playerTurn]['name'])+"?\n").upper()
            if self.word.matchLetter(letter):
                print "The word contains the letter '"+letter+"'\n"
            else:
                print "Oh, the word doesn't contain the letter '"+letter+"'\n"
                self.failures=self.failures+1
            self.usedLetters.add(letter)
            self.correct=self.word.matchWord()
            if self.correct != True: self.playerTurn=(self.playerTurn+1)%len(self.playersList)

        self.printState()
        if self.correct == True:
            print "Congratulations! "+str(self.playersList[self.playerTurn]['name'])+" have guessed the word! :)"
        else:
            print "Oh no! You haven't guessed the word :(\nThe word was "+self.word.getWord()+"\n"
Пример #60
0
def convert_pattern_format(text):
    """
    Text is parsed through pattern's parsing function into a standardized format.
    """
    parsed_text = []
    # parse text via Pattern's parser
    pattern_parsed_text = Text(parse(text, relations=True, lemmata=True))
    for sentence in pattern_parsed_text:
        s = Sentence()
        s.string = remove_blanks(sentence.string)
        for word in sentence:
            # Patterns tags for each word in the sentence are stored in a new Word-object
            w = Word()
            w.string = word.string
            w.lemma = word.lemma
            w.index = word.index
            w.tag = word.type
            w.entity = ""
            # each word is appended to a Sentence-object
            s.words.append(w)
        # each Sentence-object is appended to an array
        parsed_text.append(s)
    return parsed_text