def mergeddictmwdictreading2mwaudio(self, mergeddictmws, noundictreading): dictreading = [] for _, mwpinyinwords in mergeddictmws: # The audio field will contain <random number> <mw> <noun> for every possible MW # NB: we explicitly encode the tokens rather than doing a lookup because e.g. 几 has # several readings, but we know precisely the one we want here and can avoid ambiguity dictreading.append(model.Word(random.choice(numbers.hanziquantitypinyin))) dictreading.extend(mwpinyinwords) dictreading.extend(noundictreading) # This comma doesn't currently do anything, but it might come in useful if we # add delay generation in the audio code later on dictreading.append(model.Word(model.Text(", "))) # Only apply the sandhi generator at this point: we have carefully avoided doing it for the # input up to now (especially for the noundictreading). Probably doesn't make a difference # with the current implementation, but better safe than sorry. return generateaudio(self.notifier, self.mediamanager, self.config, transformations.tonesandhi(dictreading))
def updatefactalways(self, fact, reading): # We better still give it a miss if the update will fail if 'reading' not in fact: return # Identify probable pinyin in the user's freeform input, reformat them according to the # current rules, and pop the result back into the field fact['reading'] = preparetokens(self.config, [model.Word(*model.tokenize(reading))])
def set_word_status(): word = request.json["word"] state = request.json["state"] wordrow = model.Word.query.filter_by(word=word).first() if not wordrow: word = model.Word(word=word, state="learning", language=language()) db.session.add(word) else: wordrow.state = state db.session.commit() return jsonify(result=True)
def finish_page(id, page_num): page = model.Article.page(id, language(), page_num) for word in page.words_in_page: wordrow = model.Word.query.filter_by(word=word).first() if not wordrow: new_wordrow = model.Word(word=word, state="known", language=language()) db.session.add(new_wordrow) db.session.commit() return redirect(url_for("read_article", id=id, page_num=page_num + 1))
def process_word_name(message): word_name = message.text word_name = word_name.capitalize() post = post_dict[message.chat.id] new_word = model.Word(word_name) post.words.append(new_word) post_dict[message.chat.id] = post msg = bot.send_message( message.chat.id, 'Caso Nao Envie seu Usuário A Postagem sera Nomeada Como "@DextyOficialBot"', reply_markup=skip_markup()) bot.register_next_step_handler(msg, process_part_of_speech)
def process_word_name(message): word_name = message.text word_name = word_name.capitalize() post = post_dict[message.chat.id] new_word = model.Word(word_name) post.words.append(new_word) post_dict[message.chat.id] = post msg = bot.send_message( message.chat.id, 'Send phonetic transcription. You can skip by pressing button "Skip"', reply_markup=skip_markup()) bot.register_next_step_handler(msg, process_phonetic_transcription)
def reformataudio(self, audio): output = u"" for recognised, match in utils.regexparse( re.compile(ur"\[sound:([^\]]*)\]"), audio): if recognised: # Must be a sound tag - leave it well alone output += match.group(0) else: # Process as if this non-sound tag were a reading, in order to turn it into some tags output += generateaudio(self.notifier, self.mediamanager, self.config, [model.Word(*model.tokenize(match))])
def process_word_name(message): word_name = message.text word_name = word_name.capitalize() post = post_dict[message.chat.id] new_word = model.Word(word_name) post.words.append(new_word) post_dict[message.chat.id] = post msg = bot.send_message( message.chat.id, 'Envie Seu Usuário exemplo : @DextyOficial [ Pos o Envio da um Click em [OK]"', reply_markup=skip_markup()) bot.register_next_step_handler(msg, process_part_of_speech)
def meaningfromnumberlike(expression, dictionary): stringify = lambda digits: u"".join([unicode(digit) for digit in digits]) handlers = [ lambda digits: stringify(digits), lambda leadingdigits, trailingdigits: stringify(leadingdigits) + "." + stringify(trailingdigits), lambda digits: stringify(digits) + "AD", lambda digits: stringify(digits) + "%", lambda numdigits, denomdigits: stringify(numdigits) + "/" + stringify(denomdigits) ] # Generates a meaning from approximately Western expressions (almost useless, but does handle nian suffix) text = parsewesternnumberlike(expression, *handlers) if not(text): # Generate a meaning from approximately Chinese expressions text = parsechinesenumberlike(expression, *handlers) # Wrap the result in the appropriate gumpf return utils.bind_none(text, lambda nonnulltext: [[model.Word(model.Text(nonnulltext))]])
import model word = model.Word() word.make("MORSEISFUN228") while True: word.pronounce()
def reformatreading(self, reading): return preparetokens(self.config, [model.Word(*model.tokenize(reading))])
def unpreparetokens(flat): return [model.Word(*model.tokenize(striphtml(flat)))]
help='a plain text file') args = parser.parse_args() #################### # database connect # #################### engine = create_engine(args.db_url) Session = sessionmaker(bind=engine) session = Session() model.Base.metadata.create_all(engine) model.session = session tree = parsetree(args.text.read()) wid = 1 for n in range(len(tree.sentences)): s = tree[n] sentence = session.query(model.Sentence).get(s.id) if sentence is None: sentence = model.Sentence(id=s.id, sentence=s.string) session.add(sentence) for w in s.words: session.add( model.Word(id=wid, word=w.string, pos=w.pos, sentence=sentence)) wid += 1 session.commit()
from datetime import datetime import model if __name__ == '__main__': min_word_length = 4 print "Loading words..." tstart = datetime.now() model.Word.objects.delete() with open('words.txt', 'r') as f: words = [] for line in f: line = line.strip().lower() if (len(line) >= min_word_length): words.append(line) for word in words: word_object = model.Word(word=word).save() tend = datetime.now() print "Loaded words: Total time: " + str((tend - tstart).seconds)
def delete_all_chars(word, chars): for char in chars: word = word.replace(char, '') return word if __name__ == '__main__': min_word_length = 4 print "Loading words..." tstart = datetime.now() model.Word.objects.delete() stop_words = set(stopwords.words('english')) stop_words = map(lambda x: delete_all_chars(x, string.punctuation), stop_words) with open('words.txt', 'r') as f: words = [] for line in f: line = line.strip().lower() if ((len(line) >= min_word_length) and (line not in stop_words)): word_object = model.Word(word=line).save() tend = datetime.now() print "Loaded words: Total time: " + str((tend - tstart).seconds)