예제 #1
0
def generateResults(input, message):
    result = "Rating : "
    count = 1

    if (input == '' or input == None):
        return

    sentences = tokenizer.tokenize(input.get("1.0", 'end-1c'))

    for sentence in sentences:
        #Split the sentence into word
        wordList = nltk.pos_tag(specialCharacter(sentence), tagset='universal')
        words = []

        for i in wordList:
            if (i[0] in wordDataset):
                words.append(word.Word(i[0], wordDataset[i[0]], i[1]))
            else:
                words.append(word.Word(i[0], 0, "X"))
        ans = generateTree(words)
        if (words[-1].word == "?"):
            ans = 0.00
        if (ans > 10):
            ans = 10.00
        elif (ans < -10):
            ans = -10.00
        result += "\nSentence {} = {}".format(count, round(ans, 2))
        count += 1

    message.configure(text=result)
예제 #2
0
    def test___init__(self):
        # test with str
        word_test = word.Word("man")
        self.assertEqual(list(word_test.descriptor.keys())[0], "man")

        # test with integer
        with self.assertRaises(TypeError):
            word.Word(1)

        # test with float
        with self.assertRaises(TypeError):
            word.Word(1.0)

        # test with object
        with self.assertRaises(TypeError):
            word.Word(object)

        # test with dict
        with self.assertRaises(TypeError):
            word.Word({"test": 123})

        # test with list
        with self.assertRaises(TypeError):
            word.Word([1])

        # test with tuple
        with self.assertRaises(TypeError):
            word.Word((1, 2))

        # test with empty string
        with self.assertRaises(ValueError):
            word.Word("")
예제 #3
0
    def test_magnitude(self):
        word_test = word.Word("man")

        # test with no components
        with self.assertRaises(IndexError):
            word_test.magnitude

        # test with 1 component with a value of 3
        word_test.append("i")
        word_test.append("i")
        word_test.append("i")

        self.assertAlmostEqual(word_test.magnitude, 3.0)

        # test with 2 components both with values of 3
        word_test.append("am")
        word_test.append("am")
        word_test.append("am")

        self.assertAlmostEqual(word_test.magnitude, math.sqrt(18.0))

        # test with 3 components with a total value of 22
        word_test.append("a")
        word_test.append("a")

        self.assertAlmostEqual(word_test.magnitude, math.sqrt(22.0))
예제 #4
0
def loadWords(files):
    global C
    global dTerms
    C = int(input("Insert the value of C: "))

    print(files)

    words = {}
    i = 1
    for fil in files:

        # Reading the files
        f = open(fil, "r")
        dTerms[fil] = 0
        fileWords = {}
        for line in f:
            # Eliminating non valid characters
            for word in re.split('; |, |\*|\n|;|!|\?|\.|\t| ', line):
                # Checking if the word's smaller than the parameter C
                if len(word) < C:
                    continue
                theWord = un.unidecode(word.lower())
                if theWord not in words:
                    words[theWord] = wd.Word(theWord, len(files))
                if theWord not in fileWords:
                    fileWords[theWord] = theWord
                    dTerms[fil] += 1
                words[theWord].incrementOccurs(i)
        i = i + 1
        f.close()
    
    return words
    def __init__(self, wordname):
        self._word = word.Word()
        self._word.set_name(wordname)
        SGMLParser.__init__(self)

        self._phonetic_found = 0

        self._etcTrans_found = 0

        self._etcTrans_li_found = 0

        self._p_additional_found = 0

        self._synonyms_found = 0
        self._synonyms_a_found = 0

        self._tPowerTrans_found = 0
        self._tPowerTrans_ul_sense_ex_found = 0
        self._tPowerTrans_ul_sense_ex_p_found = 0

        self._examplesToggle_found = 0
        self._examplesToggle_bilingual_found = 0
        self._examplesToggle_bilingual_p_found = 0
        self._examplesToggle_bilingual_p_noattrs_found = 0  #用来解决“报错”这个<p>

        self._tmp_sentence_list = []
        self._tmp_sentence = ''
예제 #6
0
	def createWords(self):
		num = 1
		while num < self.numberOfWords:
			myWord = self.solicitWord()
			gameWord = word.Word(myWord)
			self.gameWords.append(gameWord)
			num += 1
예제 #7
0
def groupWords(dir):
    group = []
    index = 1
    crossover = []
    for i in set(gridIndexs):
        if gridIndexs.count(i) == 2 and i not in crossover:
            crossover.append(i)
    words = []
    for i in set(crossover):
        plane = []
        num = -1
        pos = i
        while pos != '':
            plane.insert(0, pos)
            pos = isOneBeside(i, dir, num)
            num -= 1
        pos = '_'
        num = 1
        while pos != '':
            if pos == '_':
                pos = isOneBeside(i, dir, num)
                num += 1
                plane.append(pos)
                pos = isOneBeside(i, dir, num)
                num += 1
            else:
                plane.append(pos)
                pos = isOneBeside(i, dir, num)
                num += 1
        word = getWord(plane)
        if word not in words:
            words.append(word)
            group.append(crosswordAnswer.Word(word, index, plane[0]))
            index += 1
    return group
예제 #8
0
def single_run_partA(pid, articles):
    '''
    First part of a single run of a single process: Stops after interestingness was computed.

    '''
    logging.info("Starting PID {}".format(pid))
    if pid > 0:
        # create random sample and compute correlation
        articles = mng.shuffle_publicationdates(articles)
    logging.debug("PID {}: A".format(pid))
    #Get a dict of dicts for each calendar week with word frequencies from getWordCounts
    wordCounts = mng.getWordCounts(articles)
    #List of distinct words
    distinctWords = mng.getDistinctWords(wordCounts)
    logging.debug("PID {}: B".format(pid))
    #List of lists of tuples containing weekly word frequency
    countsPerWeek = []
    for w in distinctWords:
        countsPerWeek.append((w, mng.getCountPerWeek(wordCounts, w)))
    logging.debug("PID {}: C".format(pid))
    #Create list of word objects for each keyword
    words = []
    for c in countsPerWeek:
        words.append(wpy.Word(c[0], ts_articles=timeseries.Timeseries(c[1])))
    logging.debug("PID {}: D".format(pid))
    interestingWords = mng.filter_interestingness(articles, 10, 5)
    keywords = []
    for k in interestingWords:
        keywords.append(k)
    logging.debug("PID {}: E".format(pid))
    output.put([pid, keywords, words])
예제 #9
0
파일: train.py 프로젝트: narrowsnap/chatbot
def train():
    w = word.Word('data/dgk_shooter.conv', 'data/dgk_segment.conv',
                  'data/dgk_segment.conv', 'model/dgk_gensim_model')
    q, a = w.QA()
    generate = w.generate_vector(q, a)
    i, q_v, a_v = next(generate)  # 生成数据, i用于判断结束
    ds = dataset.VecDataSet(q_v, a_v)
    train_loader = data.DataLoader(ds, batch_size=64, shuffle=True)
    encoder = model.EncoderRNN(q_v.shape[2], args.hidden_size, q_v.shape[2],
                               args.num_layers, args.batch_size).to(device)
    decoder = model.DecoderRNN(q_v.shape[2], args.hidden_size, q_v.shape[2],
                               args.num_layers, args.batch_size).to(device)
    encoder_optimizer = optim.Adam(encoder.parameters(), lr=args.learning_rate)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=args.learning_rate)
    loss_func = nn.CrossEntropyLoss()
    for i, (x, y) in enumerate(train_loader):
        x = x.to(device)
        y = y.to(device)
        h0 = encoder.initHidden()
        output, hidden = encoder(x, (h0, h0))
        # 把encoder的输出作为decoder的第一个输入
        y = y[:, :-1, :]  # 去掉y最后一个词,也可以在生成数据时把y少填充一个
        y = torch.cat((output, y), 1)
        h0 = decoder.initHidden()
        output, hidden = decoder(y, (h0, h0))
        loss = loss_func(output, y)
        encoder_optimizer.zero_grad()
        decoder_optimizer.zero_grad()
        loss.backward()
        encoder_optimizer.step()
        decoder_optimizer.step()
    def startPhase1(self):
        self.phase = 1

        self.main.get(graphics.SwimBackground(self.main))
        self.main.status = status.Status(self.main)
        self.main.get(self.main.status)
        self.main.get(word.Word(self.main))
        self.main.get(graphics.Shower(self.main))
예제 #11
0
 def new_game(self):
     '''
         starts a new game
     '''
     for i in range(wordsOnScreen):
         w = wordDictionary.pick_word()
         x = random.randint(scrn.width)
         y = 0
         self.words = word.Word(w, x, y)
예제 #12
0
def find_random_word():
    """ Return type Word object which is random word from database """
    random_word = find_word_by_number(
        random.randrange(1,
                         length_of_collection() + 1))
    game_word = word.Word(random_word['number'], random_word['english_word'],
                          random_word['description'],
                          random_word['polish_word'])
    return game_word
예제 #13
0
 def generate_word(cls):
     import word
     from random import choice
     random_word = choice(assets.words)
     print('YOU GOT A NEW WORD!')
     cls.new_word = word.Word(random_word)
     # print(cls.new_word.word) # for testing only
     cls.new_word.split_letters()
                 .generate_attempts()
                 .display_word()
예제 #14
0
def get_word(word_list, lookahead=1, start=None, max_length=math.inf):
    """Return a generated wordlike string.
    
    Arguments:
    word_list -- list of '\\n' terminated strings
    lookahead -- index used to match subsequent characters in the string
    start     -- string of characters to start generation
    """
    generated_word = word.Word(word_list, lookahead, start, max_length)
    return str(generated_word)
예제 #15
0
 def add_words(self):
     '''
         add words to scr if not enough are there
     '''
     if len(self.words) < self.wordsOnScreen:
         w = self.wordDictionary.pick_word()
         y = random.choice(self.poss_vals)
         self.poss_vals.remove(y)
         w = word.Word(w, 1, y)
         self.words.append(w)
예제 #16
0
 def __init__(self, para_file):
     self.para = parameter.yaml_loader().load(para_file)
     self.io = IO(para_file)
     self.lat_pattern = re.compile(r',4\d\.\d+,')
     self.lng_pattern = re.compile(r',-7\d\.\d+,')
     self.parser = word.Word(stem=True, min_length=3)
     self.parser.load_stopwords(self.io.stopword_file)
     self.checkins = {}
     self.word_dict = {}
     self.sequences = []
예제 #17
0
def add_to_vocabulary(words, post_type, vocab):
    for w in words:
        index = get_word_index(w, vocab)
        # Word exists, increment its frequency.
        if index != -1:
            vocab[index].increment_count(post_type)
        # Word does not exist, add it.
        else:
            new_word = word.Word(w)
            new_word.increment_count(post_type)
            vocab.append(new_word)
예제 #18
0
def score_words(words):
  result = ""
  scrabble_words = scrabble.Scrabble("./words.txt")
  scored = []
  for w in words:
    scored.append(word.Word(w, scrabble_words.score_word(w)))
  scored.sort(key = lambda x: x.score, reverse = True)

  for each in scored:
    result += str(each)
  return result
예제 #19
0
 def load_dictionary(self, filename):
     with open(filename, encoding='utf8') as file:
         lines = file.readlines()
         for line in lines:
             word_parts = line.split(',')
             native = word_parts[0]
             alternate = word_parts[1]
             latin = word_parts[2]
             translation = word_parts[3].strip()
             new_word = word.Word(native, alternate, latin, translation)
             self.add_word(new_word)
예제 #20
0
 def remove_words_out_of_bounds(self):
     for wor in self.words:
         if wor.x + len(wor.word) > self.scrn.width - 3:
             self.stat.missedWords += 1
             self.hp -= 1
             self.poss_vals.append(wor.y)
             self.words.remove(wor)
             if wor in self.probableWords:
                 self.probableWords.remove(wor)
             empty = word.Word(' ' * len(wor.word), wor.x, wor.y)
             self.scrn.render_word(empty)
예제 #21
0
 def parse_lines_AccentedTischendorf(self, lines):
     mystring = "\n".join(lines)
     words = mystring.split()
     self.end_monad += 1
     index = -1
     break_kind = "."
     for w in words:
         index += 1
         if w[0] in "0123456789":
             myarr = w.split(":")
             mychapter = int(myarr[0])
             try:
                 myverse = int(myarr[1])
             except:
                 print "UP100: w = '%s' words[index:+30] = '%s'" % (
                     w, words[index:index + 30])
             chapter_end = self.end_monad - 1
             if self.chapter != mychapter:
                 if self.chapter <> 0:
                     self.parseChapter(self.chapter, chapter_end)
                 self.chapter = mychapter
             self.verse = myverse
             verse = Verse([], self.bookname, self.booknumber)
             verse.chapter = mychapter
             verse.verse = myverse
             verse.first_monad = self.end_monad
             verse.last_monad = self.end_monad
             self.verses.append(verse)
         elif w == "{P}":
             break_kind = "P"
         elif w == "{C}":
             break_kind = "C"
         elif w == "-":
             pass  # Remove silently. FIXME: Do something more intelligent!
         else:
             wd = word.Word(self.end_monad, variant_none)
             wd.surface = w
             if "&" in wd.surface:
                 [wd.surface, wd.qere] = wd.surface.split("&")
             else:
                 wd.qere = wd.surface
             wd.qere_noaccents = word.RemoveAccents(
                 word.BETAtoOLBtranslate(wd.qere))
             #wd.parsing = "CONJ"
             #wd.Strongs1 = 9999
             wd.makeSurfacesAccentedTischendorf()
             wd.break_kind = break_kind
             break_kind = "."
             #print wd.surface, wd.accented_surface
             self.verses[-1].words.append(wd)
             self.verses[-1].last_monad = self.end_monad
             self.end_monad += 1
     self.parseChapter(self.chapter, chapter_end)
예제 #22
0
 def process_linear_word(self, mybreak_kind, mysurface, myqere, mytag,
                         mystrongs, strongslemma, ANLEXlemma):
     w = word.Word(self.end_monad, variant_none)
     w.break_kind = mybreak_kind
     w.surface = mysurface
     w.qere = myqere
     w.accented_surface = mysurface
     w.parsing = mytag
     w.Strongs1 = mystrongs
     w.strongslemma = strongslemma
     w.ANLEXlemma = ANLEXlemma
     self.verses[-1].words.append(w)
     self.verses[-1].last_monad = self.end_monad
예제 #23
0
    def test_descriptor_components(self):
        word_test = word.Word("man")
        word_test.append("i")
        word_test.append("i")
        word_test.append("i")

        word_test.append("am")
        word_test.append("am")
        word_test.append("am")

        word_test.append("a")
        word_test.append("a")

        word_test.append("sick")

        word_test.append("spiteful")

        word_test.append("an")

        word_test.append("unattractive")


        # hard coded version of the dictionary created above
        hard_coded = {
                        "man": {
                                "i": 3,
                                "am": 3,
                                "a": 2,
        # these have the same value
                                "sick": 1,
                                "spiteful": 1,
                                "an": 1,
                                "unattractive": 1,
                        }
                    }

        # these have the same value
        self.assertEqual(word_test.descriptor_components, hard_coded["man"])

        # these no longer have the same value
        word_test.append("unattractive")
        self.assertNotEqual(word_test.descriptor_components, hard_coded["man"])

        # now they both have the same value again
        hard_coded["man"]["unattractive"] += 1
        self.assertEqual(word_test.descriptor_components, hard_coded["man"])

        # these no longer have the same value
        hard_coded["man"]["unattractive"] += 1
        self.assertNotEqual(word_test.descriptor_components, hard_coded["man"])
예제 #24
0
    def __init__(self,
                 native_word,
                 translated_word,
                 word_known_level=0,
                 lower_case=True):
        """Initialize the object.

        :native_word: The word in the native language.
        :translated_word: The translation of the word in the new learned
        language.
        :word_known_level: The level in which the word is already known. For
                           new words this value should remain empty.
        :lower_case: True if you want to save the lower case of the word, False
                     otherwise.

        """
        if lower_case:
            native_word = native_word.lower()
            translated_word = translated_word.lower()
        self.native_word = word.Word(native_word)
        self.translated_word = word.Word(translated_word)
        self.word_known_level = word_known_level
        self._current_stage_in_learned_level = 0
예제 #25
0
    def new_game(self):
        '''
            starts a new game
            by choosing random words to be displayed on scr
        '''
        for i in range(self.wordsOnScreen):
            w = self.wordDictionary.pick_word()
            x = 1
            y = random.choice(self.poss_vals)
            self.poss_vals.remove(y)
            self.words.append(word.Word(w, x, y))

        self.scrn.render_title()
        self.game_loop()
예제 #26
0
    def test_append(self):
        # add new component
        word_test = word.Word("man")
        self.assertEqual(len(word_test.descriptor_components), 0)

        # test with integer
        with self.assertRaises(TypeError):
            word_test.append(1)

        # test with float
        with self.assertRaises(TypeError):
            word_test.append(1.0)

        # test with object
        with self.assertRaises(TypeError):
            word_test.append(object)

        # test with dict
        with self.assertRaises(TypeError):
            word_test.append({"test": 123})

        # test with list
        with self.assertRaises(TypeError):
            word_test.append([1])

        # test with tuple
        with self.assertRaises(TypeError):
            word_test.append((1, 2))

        # test with empty string
        with self.assertRaises(ValueError):
            word_test.append("")

        # increment existing component
        word_test.append("i")
        self.assertEqual(len(word_test.descriptor_components), 1)

        word_test.append("i")
        self.assertEqual(len(word_test.descriptor_components), 1)

        # add new component
        word_test.append("am")
        self.assertEqual(len(word_test.descriptor_components), 2)

        # increment existing component
        word_test.append("am")
        self.assertEqual(len(word_test.descriptor_components), 2)
예제 #27
0
 def add_words(self):
     '''Create word object for each word in the chunk.'''
     words = self.label.split(' ')
     self.words = []
     for i, w in enumerate(words):
         if w:
             w = word.Word(w,
                           i,
                           self.chunk_number,
                           self.st,
                           self.et,
                           self.filename,
                           self.fid,
                           self.sid,
                           self.cid,
                           corpus=self.corpus,
                           register=self.register,
                           chunk=self)
             self.words.append(w)
예제 #28
0
    def game_loop(self):
        while self.hp > 0:
            if (not self.paused):

                #push the words along
                new_time = datetime.datetime.now()
                micro_seconds_passed = (new_time - self.last_time).microseconds
                #move the words if enough time has passed

                for wor in self.words:
                    empty = word.Word(' ' * len(wor.word), wor.x, wor.y)
                    self.scrn.render_word(empty)

                if micro_seconds_passed >= (0.1) * (1000000):
                    self.last_time = new_time
                    for wor in self.words:
                        wor.x += self.stat.modifier
                #render words
                for wor in self.words:
                    if (wor in self.probableWords):
                        self.scrn.render_word_typed(wor, self.pos_in_word)
                    else:
                        self.scrn.render_word(wor)
                #self.scrn.render_stats()
                self.scrn.scr.refresh()

            #take in input
            key_pressed = self.process_input()

            #call whatever uses keypressed
            self.pause(key_pressed)
            self.game_mode(key_pressed)
            if (not self.should_keep_going):
                break

            #update data
            self.remove_words_out_of_bounds()
            self.add_words()

        return self.stat
예제 #29
0
 def add_to_database(self, entry):
     (itemA, itemB) = entry.split(",", 1)  # max splits is one
     word1 = word.Word(self.columnheads[0], itemA.rstrip())
     word2 = word.Word(self.columnheads[1], itemB.rstrip())
     new_entry = (word1, word2)
     self.languagedb.add(new_entry)
예제 #30
0
def compute_mostInterestingKeywordsTable(path_source,
                                         path_result,
                                         min_weektotal=10,
                                         min_changerate=5):
    """
    Creates a Markdown table at ``path_results`` with the most interesting keywords of the database at ``path_source``. 

    Keywords must fulfill the following two conditions in at least one (and the same) week:
    1. Keyword was mentioned at least ``min_weektotal`` in that week.
    2. Keyword has a relative changerate of mentions of ``min_changerate`` compared to the previous week.

    Parameters
    ----------
    path_source : str
        Path on local machine where article database is located.
    path_result : str
        Path on local machine where resulting Markdown table should be stored.
    min_weektotal : int
        Minimum number of total mentions in at least one week (default is 10).
    min_changerate : int
        Minimum number for the relative changerate of mentions compared to the previous week (default is 5).
    
    Returns
    -------
    None 

    """
    articles = mng.load_articles(path_source)
    # Get a dict of dicts for each calendar week with word frequencies from getWordCounts
    wordCounts = mng.getWordCounts(articles)
    # List of distinct words
    distinctWords = mng.getDistinctWords(wordCounts)
    # List of lists of tuples containing weekly word frequency
    countsPerWeek = []
    for w in distinctWords:
        countsPerWeek.append((w, mng.getCountPerWeek(wordCounts, w)))
    # Create list of word objects for each keyword
    words = []
    for c in countsPerWeek:
        words.append(wpy.Word(c[0], ts_articles=timeseries.Timeseries(c[1])))
    # Compute interestingness
    interestingWords = mng.filter_interestingness(
        articles, min_weektotal=min_weektotal, min_changerate=min_changerate)
    res = {}
    keywords = []
    for k in interestingWords:
        keywords.append(k)
        # Delete year but keep calendar week and round numbers:
        res[k] = {}
        for el in interestingWords[k]:
            res[k][el[1]] = [
                interestingWords[k][el][0],
                round(interestingWords[k][el][1], 2)
            ]
    m = matching.groupmatch(keywords, articles)
    writer = open(path_result, 'a', encoding="utf-8")
    writer.write(
        "|row|Keyword|week: [ total , changerate ]| computed query (advanced)  | matching result (advanced) |"
    )
    writer.write("\n|---|---|---|---|---|")
    for i in range(0, len(keywords)):
        if len(m[keywords[i]]["link"]) > 0:
            writer.write("\n| {}. | {} | {} | {} | {} |".format(
                i + 1, keywords[i], res[keywords[i]], m[keywords[i]]["query"],
                m[keywords[i]]["link"][1]).replace("{", "").replace("}", ""))
        else:
            writer.write("\n| {}. | {} | {} | {} | EMPTY MATCHING |".format(
                i + 1, keywords[i], res[keywords[i]],
                m[keywords[i]]["query"]).replace("{", "").replace("}", ""))
    writer.close()