def wordByWord(self, sentence, hiragana=True): """Get reading for every element in provided sentence""" self.includeSurface().includeReading() info = self.parse(sentence) words = [] if info: # Compile list of {word: reading} excluding okurigana and and so on for word in info: # No reading if not word.get('pronounciation'): reading = u'' # Word is already in kana elif ( word.get('pronounciation') == word.get('surface') or kata2hira(word.get('pronounciation')) == word.get('surface') ): reading = u'' # Need to convert to hiragana elif hiragana: reading = kata2hira(word.get('pronounciation')) # Otherwise, let it be else: reading = word.get('pronounciation') # Append tuple to word list words.append((word.get('surface'), reading)) return words
def word_by_word(self, sentence, hiragana=True): """Get reading for every element in provided sentence""" words = [] info = self.include('pronounciation', 'surface').parse(sentence) if info: # Compile list of {word: reading} excluding okurigana and and so on for word in info: # No reading if not word.get('pronounciation'): reading = u'' # Word is already in kana elif ( word.get('pronounciation') == word.get('surface') or kata2hira(word.get('pronounciation')) == word.get('surface') ): reading = u'' # Need to convert to hiragana elif hiragana: reading = kata2hira(word.get('pronounciation')) # Otherwise, let it be else: reading = word.get('pronounciation') # Append tuple to word list words.append((word.get('surface'), reading)) return words
def parseReadings(self): items_grouped = {} for kanji in self.items: readings = {} try: lookup = self.kjd[kanji.character] for kun in lookup.kun_readings: kun = kun.replace('.', '').replace('-', '') for word in kanji.word: if kun in kata2hira(MecabTool.parseToReadingsKana(word.word)[0]): if readings.has_key(kun): readings[kun].append(word.word) else: readings[kun] = [word.word] for on in lookup.on_readings: on = kata2hira(on.replace('.', '').replace('-', '')) for word in kanji.word: if on in kata2hira(MecabTool.parseToReadingsKana(word.word)[0]): if readings.has_key(on): readings[on].append(word.word) else: readings[on] = [word.word] except Exception, e: log.error(e) # simple solution - difficult implementation # items_grouped[kanji.character] = readings # slightly more complicated solution - easier implementation for reading in readings: i = 0 if items_grouped.has_key(kanji.character): items_grouped[kanji.character + '_' + str(i)] = (reading, readings[reading]); i += 1 else: items_grouped[kanji.character] = (reading, readings[reading])
def kana_minus_dakuten(char): if is_katakana(char): hira = kata2hira(char) hira = __by_dakuten.get(hira, hira) return hira2kata(hira) else: return __by_dakuten.get(char, char)
def getWordPronunciationFromExample(self, item): words = MecabTool.parseToWordsFull(self.currentExample.sentence) answer = self.find(lambda word: item in word['word'] , words) try: return kata2hira(answer['pronunciation']) except Exception: return u' '
def getCorrectAnswer(self): words = MecabTool.parseToWordsFull(self.currentExample.sentence) answer = self.find(lambda word: self.currentItem.character in word['word'] , words) try: return kata2hira(answer['pronunciation']) except Exception: return u' '
def getWordPronunciationFromText(query, text): words = MecabTool.parseToWordsFull(text) answer = MecabTool.findUsingF(lambda word: query in word['word'] , words) try: return kata2hira(answer['pronunciation']) except Exception: return None
def getWordPronunciationFromText(query, text): words = MecabTool.parseToWordsFull(text) answer = MecabTool.findUsingF(lambda word: query in word['word'], words) try: return kata2hira(answer['pronunciation']) except Exception: return None
def _reading(node): surface = node.surface.decode('utf8') reading = node.feature.decode('utf8').split(',')[-2] if reading == '*': return None reading = jcconv.kata2hira(reading) return reading
def kana_plus_mini(char): yield char is_kata = is_katakana(char) if is_kata: char = kata2hira(char) for char in __to_mini.get(char, ''): yield hira2kata(char) if is_kata else char
def create_idx_file(datfile, idxfile): print "Reading in dat file..." words = OrderedDict() position = 0 with codecs.open(datfile, 'r', "utf-8") as f: for line in f: # TODO create key for each ; separated value temp = line.split(' ', 1)[0] temp2 = line.split(';', 1)[0] if len(temp) < len(temp2): symbol = temp else: symbol = temp2 # vu wouldn't be converted if not reserved, but for clarity purposes ... symbol = jcconv.kata2hira(symbol, 'ヴ') if not words.get(symbol): words.update({symbol: str(position)}) else: words.update({symbol: words.get(symbol) + "," + str(position)}) symbol_in_brackets = re.search(r'\[(.*?)\]', line.split('/', 1)[0]) if symbol_in_brackets: symbol_in_brackets = symbol_in_brackets.group(1) # vu wouldn't be converted if not reserved, but for clarity purposes ... symbol_in_brackets = jcconv.kata2hira(symbol_in_brackets, 'ヴ') if words.get(symbol_in_brackets): words.update({ symbol_in_brackets: words.get(symbol_in_brackets) + "," + str(position) }) else: words.update({symbol_in_brackets: str(position)}) position += len(line) print "Finished reading in dat file, now sorting index..." words = OrderedDict(sorted(words.items(), key=lambda t: t[0])) print "Finished sorting index, now writing idx file..." with codecs.open(idxfile, 'w+', "utf-8") as f: for key, value in words.iteritems(): f.write(key + "," + value + "\n") print "Finished writing idx file"
def create_idx_file(datfile, idxfile): print "Reading in dat file..." words = OrderedDict() position = 0 with codecs.open(datfile, 'r', "utf-8") as f: for line in f: # TODO create key for each ; separated value temp = line.split(' ', 1)[0] temp2 = line.split(';', 1)[0] if len(temp) < len(temp2): symbol = temp else: symbol = temp2 # vu wouldn't be converted if not reserved, but for clarity purposes ... symbol = jcconv.kata2hira(symbol, 'ヴ') if not words.get(symbol): words.update({symbol : str(position)}) else: words.update({symbol : words.get(symbol) + "," + str(position)}) symbol_in_brackets = re.search(r'\[(.*?)\]', line.split('/', 1)[0]) if symbol_in_brackets: symbol_in_brackets = symbol_in_brackets.group(1) # vu wouldn't be converted if not reserved, but for clarity purposes ... symbol_in_brackets = jcconv.kata2hira(symbol_in_brackets, 'ヴ') if words.get(symbol_in_brackets): words.update({symbol_in_brackets : words.get(symbol_in_brackets) + "," + str(position)}) else: words.update({symbol_in_brackets : str(position)}) position += len(line) print "Finished reading in dat file, now sorting index..." words = OrderedDict(sorted(words.items(), key=lambda t: t[0])) print "Finished sorting index, now writing idx file..." with codecs.open(idxfile, 'w+', "utf-8") as f: for key, value in words.iteritems(): f.write(key + "," + value + "\n") print "Finished writing idx file"
def __getattribute__(self, key): feature_keys = dict.__getattribute__(self, 'feature_keys') if key in self: return self[key] if key in feature_keys: features = dict.__getattribute__(self, 'feature').split(',') for name, index in feature_keys.iteritems(): self[name] = features[index] readings = dict.__getattribute__(self, 'readings') readings = set([jcconv.kata2hira(e) for e in readings if e not in ('*',)]) self['readings'] = readings return dict.__getattribute__(self, key)
def all_to_hiragana(string): out = u'' for index, char in enumerate(string): if char == u'ー' or char == u'|': char = char_to_base_vowel(out[-1]) char = kata2hira(char) out += char return out
def reading(self, sentence, hiragana=True): """Get reading for provided sentence|word""" self.includeReading() info = self.parse(sentence) if info: kana = u''.join([ reading.get('pronounciation', '') for reading in info if reading.get('pronounciation') ]) if hiragana: return kata2hira(kana) return kana
def generate_reading(expression): expression = expression.encode(settings.MECAB_ENCODING) proc = subprocess.Popen('mecab', shell=False, stdin=subprocess.PIPE, stdout=subprocess.PIPE) mecab_output = proc.communicate(expression)[0].decode(settings.MECAB_ENCODING) lines = mecab_output.split(u'\n')[:-2] #skip the \nEOS\n ret = u'' for line in lines: if line[0] == u',': ret += u',' continue elif line[:3] == u'EOS': ret += u'\n' continue elif line[0].strip() == '': ret += line[0] continue fields = line.split(u',') word = fields[0].split()[0] if len(fields) == 9: reading = fields[7] # Has kanji and a reading? if (jcconv.kata2hira(reading) != word and reading != word and any(_code_page(char) != 'hiragana' and _code_page(char) != 'katakana' for char in word)): # The reading comes in as katakana, we want hiragana. reading = jcconv.kata2hira(reading) ret += _furiganaize(word, reading, not ret) else: ret += word else: ret += word return ret
def get_reading(self, sentence, hiragana=True): """ Get reading for provided sentence|word NB: for some rare words there may be no readings available! """ info = self.include('pronounciation').parse(sentence) if info: katakana = u''.join([ reading.get('pronounciation', '') for reading in info if reading.get('pronounciation') ]) if hiragana: return kata2hira(katakana) return katakana
def getKanaReading(query): MECAPI_URL = u'http://mimitako.net/api/mecapi.cgi?sentence=' OPTIONS = u'&response=pronounciation' XML_TAG = u'word/pronounciation' url = MECAPI_URL + query + OPTIONS result = urllib2.urlopen(url) tree = ElementTree.fromstring(result.read()) reading = [] for node in tree.findall(XML_TAG): reading.append(node.text) return kata2hira(''.join(reading))
def reading(self, sentence, hiragana=True): """ Get reading for provided sentence|word NB: for some rare words there may be no readings available! """ self.include('pronounciation') info = self.parse(sentence) if info: kana = u''.join([ reading.get('pronounciation', '') for reading in info if reading.get('pronounciation') ]) if hiragana: return kata2hira(kana) return kana
def as_mecab(self): parser = BakaMeCab(self.front) parsed_example = [] for word, info in parser.get_info().iteritems(): reading = u'' if(len(info) > 4): kana = info[6] if len(info) > 6 else info[4] hiragana = kata2hira(kana) if kana != word and hiragana != word and word != 'は': reading = hiragana parsed_example.append({'front': word, 'reading': reading}) return { 'parsed': parsed_example, 'original': self.front, 'reading': self.reading, 'gloss': self.gloss }
def getCurrentSentenceReading(self): return kata2hira(''.join(MecabTool.parseToReadingsKana(self.currentExample.sentence)))
def getWordPronounciation(self, item): try: return kata2hira(MecabTool.parseToWordsFull(item)[0]['pronunciation']) except: return item
def _can_furigana(self, ma, expression): return (ma.reading != expression[ma.position:ma.position+ma.word_length] and kata2hira(expression[ma.position:ma.position+ma.word_length]) != ma.reading and expression[ma.position:ma.position+ma.word_length] not in u"一二三四五六七八九十0123456789")
total_freq = 0 for file_name in file_list: print file_name with codecs.open(file_name, 'r', 'utf-8') as f: for line in f: arr = line.split() base = 0 for i, e in enumerate(arr): if e[0] <= '9' and e[0] >= '0': base = i kanji = arr[base - 1][1:] freq = arr[base][:-2] kana = kata2hira(arr[base + 2][:-1]) print kanji, freq, kana if len(kana) == 0: continue elif len(kana) > 1 and kana[0] == '{': ks = kana[1:kana.index('}')].split('/') ks = [k + kana[kana.index('}') + 1:] for k in ks if len(k) > 0] else: ks = [ kana, ] freq = int(freq) for k in ks:
def doGo(self): #pressing the go button we first see which dict is selected try: theWord = str(self.ui.textEdit.toPlainText()) except: # for JDICT theWord = self.ui.textEdit.toPlainText() if self.ui.comboBox.currentText() == 'KATEGLO': start = time.clock() kx = kateglo() data = kx.getData(theWord) if data == -1: s = '\nHello : ' + theWord + ' maybe mispelled or not Indonesian or not the correct root' self.ui.textEdit_2.append(s) self.ui.textEdit_2.append('Correct and try again') else: tr = kx.translator(data) self.aline() s = '\nPhrase : ' + theWord self.ui.textEdit_2.append(s) rxc = 0 for rx in tr: if rxc == 0: s = 'Source : ' + rx self.ui.textEdit_2.append(s) else: self.ui.textEdit_2.append(rx) rxc += 1 self.aline() df = kx.definitor(data) self.ui.textEdit_2.append("\nDefinitions\n") for rx in df: self.ui.textEdit_2.append(rx) pr = kx.proverbor(data) self.ui.textEdit_2.append("\nProverbs\n") for rx in pr: self.ui.textEdit_2.append(rx) rl = kx.relator(data) self.ui.textEdit_2.append("\nRelations\n") for rx in rl: self.ui.textEdit_2.append(rx) self.ui.label_3.setText('Finished KATEGLO request ...') end = time.clock() s = 'Request duration : ' + str(end - start) + ' secs' self.ui.label_4.setText(s) elif self.ui.comboBox.currentText() == 'KBBI': start = time.clock() kb = kbbi() soup = kb.processData(theWord) sxt = soup.get_text() atitle = soup.title.string.split('- definisi kata') s = '\n' + atitle[0] self.ui.textEdit_2.append(s) self.aline() s = '\nKata : ' + atitle[1] self.ui.textEdit_2.append(s) sxts = sxt.split('Pranala (link): http://kbbi.web.id/%s' % theWord) try: sxts2 = sxts[1] sxts2 = sxts2.split('Tweet') res1 = sxts2[0] res1 = res1.split(';') s = '' s = res1[0].strip('-1').strip('-2').strip('-3') self.ui.textEdit_2.append(s) s = '' for rx in range(1, len(res1)): rc = res1[rx] s = rc + '\n' self.ui.textEdit_2.append(s) except: self.ui.textEdit_2.append('') s = theWord + ' ==> Tidak ditemukan - KBBI\n\nMaybe incorrect root word' self.ui.textEdit_2.append(s) # we want to get data from the Memuat section , if any sxtch = sxt.split('Memuat') try: sxtchz = sxtch[1].split('Pranala') self.ui.textEdit_2.append( 'Try with these suggestions provided by kbbi (if any) :' ) s = sxtchz[0].replace('1', '\n').replace( '2', '\n').replace( '3', '\n') # occasionaly there is are subscripts self.ui.textEdit_2.append(s) except: pass self.ui.label_3.setText('Finished KBBI request ...') end = time.clock() s = 'Request duration : ' + str(end - start) + ' secs' self.ui.label_4.setText(s) elif self.ui.comboBox.currentText() == 'GLOSBE': start = time.clock() lp = str(self.ui.comboBox_2.currentText()) al = lp.split('/') orglang = al[0] destlang = al[1] gb = glosbe() data = gb.getData(theWord, orglang, destlang) if data == -1: s = '\nHello : ' + theWord + ' maybe mispelled or not the correct root word for dicitionary lookup or wrong language code' self.ui.textEdit_2.append(s) s = 'Correct and try again' self.ui.textEdit_2.append(s) else: s = 'From : ' + data['from'] self.ui.textEdit_2.append(s) s = 'Dest : ' + data['dest'] self.ui.textEdit_2.append(s) s = 'Result : ' + data['result'] self.ui.textEdit_2.append(s) s = 'Phrases: ' + data['phrase'] self.ui.textEdit_2.append(s) self.ui.textEdit_2.append('\n') self.ui.textEdit_2.append('Translations : ') # translation results phr = data['tuc'] for item in range(0, len(phr)): try: s = data['tuc'][item]['phrase']['text'] + ' , ' self.ui.textEdit_2.append(s) except: pass self.ui.textEdit_2.append( '\n\n\n Translation + Sample Sentence\n\n') r2 = requests.get( 'http://glosbe.com/gapi/translate?from=%s&dest=%s&format=json&tm=true&phrase=%s&pretty=true' % (orglang, destlang, theWord)) try: data = json.loads(r2.text) # translation results phr = data['tuc'] #pprint(phr) if len(phr) == 0: self.ui.textEdit_2.append( ' Nothing returned from Glosbe') else: # for formatting precalc maxl maxl = 0 ll = 0 for item in range(0, len(phr)): try: if destlang == 'jpn' or destlang == 'zh' or destlang == 'rus': ll = len( str(phr[item]['phrase']['text']).rstrip( ' ')) else: ll = len( str(phr[item]['phrase']['text']).encode( 'UTF-8').rstrip(' ')) if ll > maxl: maxl = ll except: ll = 10 pass try: self.ui.textEdit_2.append('\nPhrase/Meanings :\n') if len(phr) == 0: self.ui.textEdit_2.append( ' Nothing returned from Glosbe') else: for item in range(0, len(phr)): try: if destlang == 'jpn' or destlang == 'zh' or destlang == 'rus': if phr[item]['phrase']['text'] <> '': s = 'Phrase : ' + phr[item][ 'phrase']['text'].replace( ''', "'").replace( '’', "'").replace( 'é', '`') self.ui.textEdit_2.append(s) for itx in range( 0, len(phr[item])): try: if phr[item]['meanings'][ itx]['text'] <> '': s = 'Meaning : ' + phr[ item]['meanings'][itx][ 'text'].replace( ''', "'" ).replace( '’', "'" ).replace( 'é', '`') self.ui.textEdit_2.append( s) except: pass else: if phr[item]['phrase']['text'].encode( 'UTF-8') <> '': s = 'Phrase : ' + phr[item][ 'phrase']['text'].encode( 'UTF-8').replace( ''', "'").replace( '’', "'").replace( 'é', '`') self.ui.textEdit_2.append(s) for itx in range( 0, len(phr[item])): try: if phr[item]['meanings'][ itx]['text'].encode( 'UTF-8') <> '': s = 'Meaning : ' + phr[ item]['meanings'][ itx]['text'].encode( 'UTF-8' ).replace( ''', "'" ).replace( '’', "'" ).replace( 'é', '`') self.ui.textEdit_2.append( s) except: pass self.aline() except: pass except: #print 'Error in Phrase/Meanings' #raise pass try: self.ui.textEdit_2.append('\n') if data['tuc'][item]['phrase']['text'] <> ' ': for ite in range(0, len(data['examples'])): if destlang == 'jpn' or destlang == 'zh' or destlang == 'rus': ss = data['examples'][ite][ 'second'].replace( '<strong class="keyword">', '') ss = ss.replace('</strong>', '').replace( '#', '').replace('|', '') sf = data['examples'][ite][ 'first'].replace( '<strong class="keyword">', '') sf = sf.replace('</strong>', '').replace( '#', '').replace('|', '') if sf <> '': s = '\nExamples for : ' + data['tuc'][ item]['phrase']['text'] self.ui.textEdit_2.append(s) s = sf self.ui.textEdit_2.append(s) self.ui.textEdit_2.append(ss) else: ss = data['examples'][ite][ 'second'].encode('UTF-8').replace( '<strong class="keyword">', '') ss = ss.replace('</strong>', '').replace( '#', '').replace('|', '') sf = data['examples'][ite][ 'first'].replace( '<strong class="keyword">', '') sf = sf.replace('</strong>', '').replace( '#', '').replace('|', '') if sf.encode('utf-8') <> '': s = '\nExamples for : ' + data['tuc'][ item]['phrase']['text'] self.ui.textEdit_2.append(s) s = sf.encode('utf-8') self.ui.textEdit_2.append(s) self.ui.textEdit_2.append(ss) except: #raise pass except: #raise self.ui.textEdit_2.append( 'JSon Error, maybe no data retrieved') self.ui.textEdit_2.append('Re-try') end = time.clock() s = 'Request duration : ' + str(end - start) + ' secs' self.ui.label_4.setText(s) self.ui.label_3.setText('Finished GLOSBE request ...') elif self.ui.comboBox.currentText() == 'WEBLIO': start = time.clock() try: self.oldcurrLine = self.lineNumber2() s = Weblio() lt = -1 theText = theWord # we need to limit the length: testing with 80 lt = len(theText) #firstLine = self.lineNumber2() #print 'FirstLine :',firstLine #firstPosition = self.ui.textEdit_3.textCursor().position() #print 'Firstpos :',firstPosition if (lt > 0) and (lt < 80): self.ui.textEdit_2.append('\nWeblio Results') self.ui.textEdit_2.append('for:') self.ui.textEdit_2.append(theText) self.ui.textEdit_2.append('---------------------\n') #x = self.ui.spinBox_2.value() # how many items to fetch # here we hardset to 10 examples max x = 10 res = s.examples(theText, x) # ok key = 0 for dx in res: rx = self.remove_comments2(dx[1], '<!--') rx2 = self.remove_comments2(dx[2], '<!--') rx3 = self.doMecab(rx2) key += 1 # now we check if we are ascii and print accordingly try: theText.decode('ascii') except: # we selected a japanese text rx4 = self.doMecab(rx) # oks=str(key)+' : '+rx+' '+rx2.encode('utf8')+' '+kata2hira(rx4).decode('utf8') oks = str(key) + ' : ' + rx self.ui.textEdit_2.append(oks) oks = str(key) + ' : ' + rx2.encode('utf8').strip( '\n') self.ui.textEdit_2.append(oks) oks = str(key) + ' : ' + kata2hira(rx4).decode( 'utf8') self.ui.textEdit_2.append(oks) else: # we selected a english text oks = str(key) + ' : ' + rx.encode('utf8') self.ui.textEdit_2.append(oks) oks = str(key) + ' : ' + rx2 self.ui.textEdit_2.append(oks) oks = str(key) + ' : ' + kata2hira(rx3).decode( 'utf8') self.ui.textEdit_2.append(oks) self.ui.textEdit_2.append('---------------------\n') else: if lt > 1: oks = 'Weblio line is too long. Length : %i' % lt self.ui.textEdit_2.append(oks) self.ui.textEdit_2.append('---------------------\n') except: pass # we try jump to the begining of the latest weblio data # for time being just jump to bottom self.ui.textEdit_2.moveCursor(QTextCursor.End) currLine = self.lineNumber2() mv = currLine - self.oldcurrLine # now move #print '\nOldCurrLine :',self.oldcurrLine #print 'CurrLine :',currLine #print 'mv :',mv #print '\n' for j in range(0, mv): self.ui.textEdit_2.moveCursor( QTextCursor.Up) #,QTextCursor.MoveAnchor) nowLine = self.lineNumber2() #print 'NowLine : ',nowLine self.oldcurrLine = currLine self.ui.label_3.setText('Finished Weblio request ....') end = time.clock() s = 'Request duration : ' + str(end - start) + ' secs' self.ui.label_4.setText(s) elif self.ui.comboBox.currentText() == 'JDICT': start = time.clock() # try translate from JDic # while ok it cud be faster self.ui.textEdit_2.clear() self.JDictToggleFlag = True if self.JDictToggleFlag == True: try: jdictranslations = JDic().lookup(unicode(theWord)) sl = 0 jdi = 0 # nrset used as divider for linefeed inserts below nrset = [ '(1)', '(2)', '(3)', '(4)', '(5)', '(6)', '(7)', '(8)', '(9)', '(10)', '(11)', '(12)', '(13)', '(14)', '(15)', '(16)', '(17)', '(18)', '(19)', '(20)' ] for key in jdictranslations.keys(): sl += 1 # if no key than we never come here if jdi == 0: # only append once per loop self.ui.textEdit_2.append("\nJDic Info : \n") jdi = 1 # this gives a wider view but still messy #self.ui.textEdit_2.append(jdictranslations[key]+"\n") # this gives a more readable view tt = '' for xs in jdictranslations[key]: if xs <> ";": # only one space or semicolon or we get empty stuff tt = tt + xs else: # below code by trial and error to have a readable representation tt = tt.replace( ' ', ' ') # note this is a space and a tab tt = tt.replace('\n', '') # get rid of linefeeds tt = tt.replace( ' ', '\n' ) # insert a linefeed if there are 2 spaces tt = tt + ';' # add the semicolon back for nx in nrset: # iterate over our nrset and insert linefeeds for better readability tt = tt.replace(nx, '\n' + nx + '\n') self.ui.textEdit_2.append(tt) tt = '' if sl == 0: self.ui.textEdit_2.append('No info from JDict for ' + theWord) except: # occasional non type objects will occure so we just skip it pass finally: # give a notice if run finished self.ui.textEdit_2.append('JDict-Finished') #TODO:: try similar wordnet , wordnet has quota so may not work as wanted end = time.clock() s = 'Request duration : ' + str(end - start) + ' secs' self.ui.label_4.setText(s)
total_freq = 0 for file_name in file_list: print file_name with codecs.open(file_name, 'r', 'utf-8') as f: for line in f: arr = line.split() base = 0 for i, e in enumerate(arr): if e[0] <= '9' and e[0] >= '0': base = i kanji = arr[base-1][1:] freq = arr[base][:-2] kana = kata2hira(arr[base+2][:-1]) print kanji, freq, kana if len(kana) == 0: continue elif len(kana) > 1 and kana[0] == '{': ks = kana[1:kana.index('}')].split('/') ks = [k + kana[kana.index('}')+1:] for k in ks if len(k) > 0] else: ks = [kana, ] freq = int(freq) for k in ks: if k not in temp_obj:
def getExamplesKana(query): return JishoClient.getExamples(kata2hira(''.join(MecabTool.parseToReadingsKana(query)))) #it works but slightly incorrect (mecab shenanigans) #test = JishoClient.getExamplesKana(u'軈て') #print '\n'.join(test)