def getChars(freqFile, startNo, endNo): chars = [] reader = unicode_csv_reader(codecs.open(freqFile, 'rb', "utf-8"), dialect='excel-tab') frequencyList = [x for x in reader] #read the whole list frequencyList = frequencyList[startNo:endNo] for row in frequencyList: templist = list(row[i] for i in [1, 4, 5]) pinyin = ReadingFactory() readings = templist[1].split('/') # print readings readingString = "" for reading in readings: readingString += pinyin.convert(reading, 'Pinyin', 'Pinyin', sourceOptions={ 'toneMarkType': 'numbers', 'missingToneMark': 'fifth' }) + " " templist[1] = readingString chars.append(templist) return chars
class PinyinICUTest(NeedsDatabaseTest, unittest.TestCase): """Test Pinyin tonemark conversion on ICU transformation rule.""" CONVERSION_DIRECTION = ('Pinyin', 'Pinyin') def setUp(self): NeedsDatabaseTest.setUp(self) self.f = ReadingFactory(dbConnectInst=self.db) try: import PyICU self.toNumeric = PyICU.Transliterator.createInstance( "Latin-NumericPinyin", PyICU.UTransDirection.UTRANS_FORWARD) self.fromNumeric = self.toNumeric.createInverse() except ImportError: pass def testToneMarkPlacement(self): """Test Pinyin tonemark conversion on ICU transformation rule.""" if not hasattr(self, 'toNumeric'): return for readingEntity in self.f.getReadingEntities('Pinyin'): if readingEntity in (u'hn\u0304g', u'h\u0144g', u'h\u0148g', u'h\u01f9g', u'n\u0304g', u'\u0144g', u'\u0148g', u'\u01f9g'): continue targetEntity = self.f.convert(readingEntity, 'Pinyin', 'Pinyin', targetOptions={'toneMarkType': 'numbers', 'missingToneMark': 'fifth'}) self.assertEquals(targetEntity, self.toNumeric.transliterate(readingEntity)) for readingEntity in self.f.getReadingEntities('Pinyin', toneMarkType='numbers', missingToneMark='fifth'): if readingEntity in ('hng1', 'hng2', 'hng3', 'hng4', 'ng1', 'ng2', 'ng3', 'ng4', u'ê1', u'ê2', u'ê3', u'ê4'): continue targetEntity = self.f.convert(readingEntity, 'Pinyin', 'Pinyin', sourceOptions={'toneMarkType': 'numbers', 'missingToneMark': 'fifth'}) self.assertEquals(targetEntity, self.fromNumeric.transliterate(readingEntity))
class ReadingConversion(Base): """Converts the entries' reading string to the given target reading.""" def __init__(self, toReading=None, targetOptions=None): """ Constructs the conversion strategy. :type toReading: str :param toReading: target reading, if omitted, the dictionary's reading is assumed. :type targetOptions: dict :param targetOptions: target reading conversion options """ Base.__init__(self) self.toReading = toReading if targetOptions: self.targetOptions = targetOptions else: self.targetOptions = {} def setDictionaryInstance(self, dictInstance): super(ReadingConversion, self).setDictionaryInstance(dictInstance) if (not hasattr(self._dictInstance, 'READING') or not hasattr(self._dictInstance, 'READING_OPTIONS')): raise ValueError('Incompatible dictionary') self.fromReading = self._dictInstance.READING self.sourceOptions = self._dictInstance.READING_OPTIONS self._readingFactory = ReadingFactory( dbConnectInst=self._dictInstance.db) toReading = self.toReading or self.fromReading if not self._readingFactory.isReadingConversionSupported( self.fromReading, toReading): raise ValueError("Conversion from '%s' to '%s' not supported" % (self.fromReading, toReading)) def format(self, string): toReading = self.toReading or self.fromReading try: return self._readingFactory.convert( string, self.fromReading, toReading, sourceOptions=self.sourceOptions, targetOptions=self.targetOptions) except (exception.DecompositionError, exception.CompositionError, exception.ConversionError): return None
def _decomposeAndRemovePinyinTones(string, type='diacritics'): if string is None: return None if not isinstance(string, unicode): string = unicode(string, 'utf-8') # print "isinstance of unique: " + keyword from cjklib.reading import ReadingFactory rf = ReadingFactory() readings = rf.decompose(string, 'Pinyin') readings = [rf.convert(string, 'Pinyin', 'Pinyin', sourceOptions={'toneMarkType': type}, targetOptions={'toneMarkType': 'none'}).lower().replace(u'ü', u'v') for string in readings] readings = [r for r in readings if r != ' ' and r != "'"] return readings
class ReadingConversion(Base): """Converts the entries' reading string to the given target reading.""" def __init__(self, toReading=None, targetOptions=None): """ Constructs the conversion strategy. :type toReading: str :param toReading: target reading, if omitted, the dictionary's reading is assumed. :type targetOptions: dict :param targetOptions: target reading conversion options """ Base.__init__(self) self.toReading = toReading if targetOptions: self.targetOptions = targetOptions else: self.targetOptions = {} def setDictionaryInstance(self, dictInstance): super(ReadingConversion, self).setDictionaryInstance( dictInstance) if (not hasattr(self._dictInstance, 'READING') or not hasattr(self._dictInstance, 'READING_OPTIONS')): raise ValueError('Incompatible dictionary') self.fromReading = self._dictInstance.READING self.sourceOptions = self._dictInstance.READING_OPTIONS self._readingFactory = ReadingFactory( dbConnectInst=self._dictInstance.db) toReading = self.toReading or self.fromReading if not self._readingFactory.isReadingConversionSupported( self.fromReading, toReading): raise ValueError("Conversion from '%s' to '%s' not supported" % (self.fromReading, toReading)) def format(self, string): toReading = self.toReading or self.fromReading try: return self._readingFactory.convert(string, self.fromReading, toReading, sourceOptions=self.sourceOptions, targetOptions=self.targetOptions) except (exception.DecompositionError, exception.CompositionError, exception.ConversionError): # wighack return string
class ReadingTransliterator(icu.Transliterator): def __init__(self, fromReading, toReading, variant=None, **options): self.id = '%s-%s' % (fromReading, toReading) if variant: self.id += '/' + variant icu.Transliterator.__init__(self, self.id) self._conv = ReadingFactory().createReadingConverter( fromReading, toReading, **options) def handleTransliterate(self, text, position, complete): substring = str(text[position.start:position.limit]) converted = self._conv.convert(substring) text[position.start:position.limit] = converted lenDiff = len(substring) - len(converted) position.limit -= lenDiff position.contextLimit -= lenDiff position.start = position.limit @staticmethod def register(fromReading, toReading, variant=None, registerInverse=False, **options): trans = ReadingTransliterator(fromReading, toReading, variant=variant, **options) icu.Transliterator.registerInstance(trans) if registerInverse: inverseOptions = options.copy() inverseOptions['targetOptions'] = options.get('sourceOptions', {}) inverseOptions['sourceOptions'] = options.get('targetOptions', {}) invTrans = ReadingTransliterator(toReading, fromReading, variant=variant, **inverseOptions) icu.Transliterator.registerInstance(invTrans) return trans.id
def getChars(freqFile,startNo,endNo): chars = [] reader=unicode_csv_reader(codecs.open(freqFile, 'rb',"utf-8"), dialect='excel-tab') frequencyList = [x for x in reader] #read the whole list frequencyList = frequencyList[startNo:endNo] for row in frequencyList: templist = list(row[i] for i in [1,4,5]) pinyin = ReadingFactory() readings = templist[1].split('/') # print readings readingString = "" for reading in readings: readingString += pinyin.convert(reading, 'Pinyin', 'Pinyin', sourceOptions={'toneMarkType': 'numbers','missingToneMark': 'fifth'}) +" " templist[1] = readingString chars.append(templist) return chars
class ReadingTransliterator(icu.Transliterator): def __init__(self, fromReading, toReading, variant=None, **options): self.id = '%s-%s' % (fromReading, toReading) if variant: self.id += '/' + variant icu.Transliterator.__init__(self, self.id) self._conv = ReadingFactory().createReadingConverter(fromReading, toReading, **options) def handleTransliterate(self, text, position, complete): substring = unicode(text[position.start:position.limit]) converted = self._conv.convert(substring) text[position.start:position.limit] = converted lenDiff = len(substring) - len(converted) position.limit -= lenDiff position.contextLimit -= lenDiff position.start = position.limit @staticmethod def register(fromReading, toReading, variant=None, registerInverse=False, **options): trans = ReadingTransliterator(fromReading, toReading, variant=variant, **options) icu.Transliterator.registerInstance(trans) if registerInverse: inverseOptions = options.copy() inverseOptions['targetOptions'] = options.get('sourceOptions', {}) inverseOptions['sourceOptions'] = options.get('targetOptions', {}) invTrans = ReadingTransliterator(toReading, fromReading, variant=variant, **inverseOptions) icu.Transliterator.registerInstance(invTrans) return trans.id
class ChineseLessonsComMandarinPronunciation(GlobbingPronunciationBuilder): """ Builds an index on pronunciation files for Mandarin provided by chinese-lessions.com. """ PROVIDES = "Pronunciation_Pinyin" DEPENDS = ['PinyinSyllables'] BASE_DIRECTORY_NAME = "chineselessionscom_cmn" def __init__(self, **options): super(ChineseLessonsComMandarinPronunciation, self).__init__(**options) self.readingFactory = ReadingFactory() def getReadingFromFileName(self, fileName): fileRoot, _ = os.path.splitext(fileName) try: return self.readingFactory.convert(fileRoot, 'Pinyin', 'Pinyin', sourceOptions={'toneMarkType': 'numbers'}) except exception.UnsupportedError: pass except exception.ConversionError: pass
def handle_noargs(self, **options): # 一事無成 一事无成 [yi1 shi4 wu2 cheng2] /to have achieved nothing/to be a total failure/to get nowhere/ # EMPTY ALL ZH + PY KEYS self._del_keys('ZH:*') self._del_keys('PY:*') # NOW LETS START file = open(settings.DICT_FILE_LOCATION) item_count = 0 for line in file: if line.startswith("#"): pass else: # OPEN REDIS CONNECTION NOW r_server = _get_redis() # GATHER ALL THE MAIN VARIABLES new = line.split() numbered_pinyin = line[(line.index('[')+1):(line.index(']'))] f = ReadingFactory() tonal_pinyin = f.convert(numbered_pinyin, 'Pinyin', 'Pinyin', sourceOptions={'toneMarkType': 'numbers', 'yVowel': 'v', 'missingToneMark': 'fifth'}) meanings = line[(line.index('/')+1):(line.rindex('/'))] characters = new[1] # REMOVE ALL THE UGLY CHARACTERS if ',' in characters: characters = characters.replace(',', '') # GET AND CLEAN THE MEASURE WORD mws = None if "CL:" in meanings: new_meanings = meanings.split('/') for idx, val in enumerate(new_meanings): if "CL:" in val: mws = [] for x in val.replace('CL:', '').split(','): x = x[:(x.index('['))] if '|' in x: x = x[(x.index('|')+1):] # ADD THE MEAASURE WORDS ENTRY # ---------------------------- mws_key = settings.MEASURE_WORD_KEY % x if r_server.exists(mws_key): values = json.loads(_search_redis(mws_key)) values['chars'].append(characters) else: values = {'chars': [characters,]} r_server.set(mws_key, json.dumps(values)) mws.append(x) new_meanings.pop(idx) meanings = "/".join(new_meanings) char_key = settings.CHINESE_WORD_KEY % ((len((characters))/3), characters) # CREATE THE PRONUNCIATION/MEANING PAIR pair = {} pair['pinyin'] = tonal_pinyin pair['pinyin_numbered'] = _normalize_pinyin(numbered_pinyin) pair['meaning'] = meanings pair['measure_words'] = mws # ADD THE PINYIN ENTRY # -------------------- py_key = settings.PINYIN_WORD_KEY % _pinyin_to_ascii(numbered_pinyin) if r_server.exists(py_key): values = json.loads(_search_redis(py_key)) if smart_unicode(characters) not in values: values.append(characters) else: values = [characters,] r_server.set(py_key, json.dumps(values)) # ADD THE CHINESE CHARACTER ENTRY # ------------------------------- if r_server.exists(char_key): values = json.loads(_search_redis(char_key)) values['meanings'].append(pair) else: values = { 'chars': characters, 'meanings': [pair,], } r_server.set(char_key, json.dumps(values)) item_count += 1 print item_count print "%s Chinese items added" % item_count file.close()
class LeoDownloader(AudioDownloader): """Download audio from LEO""" def __init__(self): AudioDownloader.__init__(self) self.file_extension = u'.mp3' self.url = 'http://www.leo.org/dict/audio_{language}/{word}.mp3' # And, yes, they use ch for Chinese. # (I'm not sure if they really have anything for ru or it.) self.language_dict = { 'de': 'de', 'en': 'en', 'es': 'es', 'fr': 'fr', 'it': 'it', 'ru': 'ru', 'zh': 'ch' } # It kind of looks like they have Swiss pronunciations, but hey don't. self.chinese_code = 'ch' # We should keep a number of site icons handy, with the right # flag for the request. self.site_icon_dict = {} self.site_file_name_encoding = 'ISO-8859-1' self.icon_url_dict = { 'de': 'http://dict.leo.org/favicon.ico', 'en': 'http://dict.leo.org/favicon.ico', 'es': 'http://dict.leo.org/favicon_es.ico', 'fr': 'http://dict.leo.org/favicon_fr.ico', 'it': 'http://dict.leo.org/favicon_it.ico', 'ru': 'http://dict.leo.org/favicon_ru.ico', # When we use this dict, we have already munged the 'zh' to 'ch' 'ch': 'http://dict.leo.org/favicon_ch.ico' } # As the name implies, a hack. Try to use the cjklib TTEMPÉ # brings along. A syntem-wide installed one should work as # well. self.have_tried_cjklib_hack = False self.reading_factory = None def download_files(self, word, base, ruby, split): """ Download a word from LEO We try to get pronunciations for the text for German, English, Spanish, French, Italian and Russian, and from the ruby for Chinese. There may not be any pronunciations available for Italian or Russian. """ self.downloads_list = [] # Fix the language. EAFP. self.language = self.language_dict[self.language[:2].lower()] # set_names also checks the language. self.set_names(word, base, ruby) if self.chinese_code == self.language and not split: return # Only get the icon when we have a word # self.maybe_get_icon() self.get_flag_icon() # EAFP. self.query_url may return None... word_url = self.query_url(word, ruby) # ... then the get_data will blow up word_data = self.get_data_from_url(word_url) word_file_path, word_file_name = self.get_file_name() with open(word_file_path, 'wb') as word_file: word_file.write(word_data) # We have a file, but not much to say about it. self.downloads_list.append( (word_file_path, word_file_name, dict(Source='Leo'))) def query_url(self, word, ruby): """Build query URL""" if self.chinese_code == self.language: word = self.fix_pinyin(ruby) return self.url.format(language=self.language, word=urllib.quote( word.encode(self.site_file_name_encoding))) def fix_pinyin(self, pinyin): # Hacks. It is overkill to ship cjklib with this add-on. But # to get the tone numbers as numbers, we should use it. My # hope (guess) is that the typical user that will want Chinese # pronunciations will also have TTEMPÉ's (version of mine) # chinese-support-plugin installed. So try to use that and # don't complain if it doesn't work. if not self.have_tried_cjklib_hack: try: # If this works, the whole shebang is run as an Anki2 # add-on. If not, we will still look for a system-wide # cjklib, but obviously not for anothre add-on. from aqt.utils import isWin except: pass else: from aqt import mw addon_dir = mw.pm.addonFolder() if isWin: # The isWin bit is copied from TTEMPÉ's code. addon_dir = addon_dir.encode(sys.getfilesystemencoding()) sys.path.append(os.path.join(addon_dir, "chinese")) self.have_tried_cjk_hack = True if not self.reading_factory: try: from cjklib.reading import ReadingFactory except ImportError: return pinyin else: self.reading_factory = ReadingFactory() return self.reading_factory.convert(pinyin, 'Pinyin', 'Pinyin', targetOptions={ 'toneMarkType': 'numbers' }).replace('5', '0') def get_flag_icon(self): """ Set self.site_icon to the right icon. We should use different icons, depending on the request language. We store these icons in self.site_icon_dict and use the AudioDownloader.maybe_get_icon() if we don't have it yet. """ if not with_pyqt: return try: # If this works we already have it. self.site_icon = self.site_icon_dict[self.language] except KeyError: # We have to get it ourself. (We know it's just 16x16, so # no resize. And we know the address). self.site_icon_dict[self.language] = \ QImage.fromData(self.get_data_from_url( self.icon_url_dict[self.language])) self.site_icon = self.site_icon_dict[self.language] def set_names(self, text, base, ruby): """ Set the display text and file base name variables. """ if self.language == self.chinese_code: if not ruby: raise ValueError('Nothing to download') self.base_name = u"{0}_{1}".format(base, ruby) self.display_text = u"{1} ({0})".format(base, ruby) else: if not text: raise ValueError('Nothing to download') self.base_name = text self.display_text = text
f = ReadingFactory() [ 'GR', 'Pinyin', 'WadeGiles', 'MandarinBraille', 'MandarinIPA', 'ShanghaineseIPA', #'Hangul', #'Kana', 'Hiragana', 'Katakana', 'CantoneseYale', 'CantoneseIPA', 'Jyutping' ] DConv = { # Mandarin conversions ('cmn_Latn|Gwoyeu Romatzyh', 'cmn_Latn|x-Pinyin'): lambda s: f.convert(s, 'GR', 'Pinyin'), ('cmn_Latn|Gwoyeu Romatzyh', 'cmn_Latn|Wade-Giles'): lambda s: f.convert(s, 'GR', 'WadeGiles'), ('cmn_Latn|Gwoyeu Romatzyh', 'cmn_Latn|Braille'): lambda s: f.convert(s, 'GR', 'MandarinBraille'), ('cmn_Latn|Gwoyeu Romatzyh', 'cmn_Latn|Alternative IPA'): lambda s: f.convert(s, 'GR', 'MandarinIPA'), ('cmn_Latn|Numeric Pinyin', 'cmn_Latn|x-Pinyin'): lambda s: f.convert(s, 'Pinyin', 'Pinyin', sourceOptions={ 'toneMarkType': 'numbers' }), ('cmn_Latn|Numeric Pinyin', 'cmn_Latn|Gwoyeu Romatzyh'): lambda s: f.convert(s, 'Pinyin', 'GR', sourceOptions={ 'toneMarkType': 'numbers' }), ('cmn_Latn|Numeric Pinyin', 'cmn_Latn|Wade-Giles'): lambda s: f.convert(s, 'Pinyin', 'WadeGiles', sourceOptions={ 'toneMarkType': 'numbers' }), ('cmn_Latn|Numeric Pinyin', 'cmn_Latn|Braille'): lambda s: f.convert(s, 'Pinyin', 'MandarinBraille', sourceOptions={
Radical 9 9 4EBA man rén Radical 30 30 53E3 mouth kǒu Radical 61 61 5FC3 heart xīn Radical 3 3 4E36 dot zhù Radical 4 4 4E3F slash piě Radical 5 5 4E59 second, fishing hook yǐ Radical 6 6 4E85 hook jué Radical 7 7 4E8C two èr Radical 8 8 4EA0 lid, head tóu Radical 10 10 513F legs ér Radical 11 11 5165 enter rù Radical 12 12 516B eight bā Radical 140 140 8278 grass cǎo Radical 24 24 5341 ten shí Radical 13 13 5182 wide jiōng Radical 14 14 5196 cover mī Radical 15 15 51AB ice bīng """ from cjklib.reading import ReadingFactory f = ReadingFactory() for line in entries.split('\n'): if not line.strip(): continue _, radicalIdx, _, meaning, pinyin = line.strip('\t').split('\t') pinyinNumbers = f.convert(pinyin, 'Pinyin', 'Pinyin', targetOptions={'toneMarkType': 'numbers'}) print '%(idx)d,"%(pinyin)s","%(meaning)s"' \ % {'meaning': meaning, 'idx': int(radicalIdx), 'pinyin': pinyinNumbers}
class NTrain(Tk.Tk): def __init__(self, *args, **kwargs): Tk.Tk.__init__(self, *args, **kwargs) self.title("Ntrain") # place window in the center self.eval('tk::PlaceWindow %s center' % self.winfo_pathname(self.winfo_id())) self._default_font = tkFont.nametofont("TkDefaultFont") self._default_font.configure(size=30) # define default dataset self._defaultfile = os.path.join( os.path.dirname(os.path.abspath(__file__)), 'chinese100.xlsx') # load default filename into label basename = os.path.basename(self._defaultfile) self._filename_value = Tk.StringVar() self._sett_fn_label = Tk.Entry(textvariable=self._filename_value, font=self._default_font, width=12) self._filename_value.set(basename) self._sett_fn_label.grid(row=1, column=0, sticky=Tk.W) # button to browse for datafile self.browse = Tk.Button(self, text="Browse", command=self._get_file) self.browse.grid(row=1, column=1, sticky=Tk.W) # OK button to start game self._reset_button = Tk.Button(text="Reset", command=self._reset_list) self._reset_button.grid(row=1, column=2) # label self._sett_label = Tk.Label(text="Number of Cards:") self._sett_label.grid(row=2, column=0, sticky=Tk.E) # entry field for number of cards entryText = Tk.StringVar() self._sett_entry = Tk.Entry(textvariable=entryText, font=self._default_font, width=3) entryText.set("30") self._sett_entry.grid(row=2, column=1, sticky=Tk.W) self._sett_entry.focus_set() # reverse option self._radio_val = Tk.IntVar() self._radio1 = Tk.Radiobutton(text="Ch to E", variable=self._radio_val, value=1) self._radio1.grid(row=4, column=0) self._radio2 = Tk.Radiobutton(text="E to Ch", variable=self._radio_val, value=2) self._radio2.grid(row=4, column=1) self._radio_val.set(1) # OK button to start game self._sett_button = Tk.Button(text="OK", command=self._start_game) self._sett_button.grid(columnspan=3) # Bind return key to start game self.bind('<Return>', self._start_game) self._p = Pinyin() self._f = ReadingFactory() def _get_file(self): # open dialogue to chose datafile my_file = askopenfilename() # update label to show filename in gui self._filename_value.set(os.path.basename(my_file)) def _start_game(self, *args): # get filname self._datafile = os.path.join( os.path.dirname(os.path.abspath(__file__)), self._filename_value.get()) # get number of cards self._n_cards = int(self._sett_entry.get()) # remove previous gui components self._sett_fn_label.destroy() self.browse.destroy() self._reset_button.destroy() self._sett_label.destroy() self._sett_entry.destroy() self._sett_button.destroy() self._radio1.destroy() self._radio2.destroy() self._save_reminder = 0 # load in data file try: self._vocTot = pd.read_excel(self._datafile) except: tkMessageBox.showinfo("Error", "File not found!", icon='warning') self._restart() #pdb.set_trace() # get indices of all filled cards filled_idx = self._vocTot[self._vocTot['Learned'] == 0].index.tolist() del filled_idx[0] # shuffle indices self._renew_index(filled_idx) # setup new gui self._setup_game_gui() # start with first question self._show_next_question() def _setup_game_gui(self): # labels for chinese symbols self.C_labels = [] # labels for questions self.Q_labels = [] # label for correct solution self._sol_label_value = Tk.StringVar() self._sol_label = Tk.Label(textvariable=self._sol_label_value) self._sol_label.grid(row=3, column=2) # entry field for answer self._entry_value = Tk.StringVar() self._entry = Tk.Entry(textvariable=self._entry_value, font=self._default_font) self._entry.grid(row=4, column=2) self._entry.focus_set() self.bind('<Return>', self._check_answer) # Check button self._check_button = Tk.Button(text="Check", command=self._check_answer) self._check_button.grid(row=1, column=1, sticky=Tk.W) # Save button self._save_button = Tk.Button(text="Save", command=self._save) self._save_button.grid(row=2, column=1, sticky=Tk.W) # Next button self._next_button = Tk.Button(text="Next", command=self._show_next_question) self._next_button.grid(row=3, column=1, sticky=Tk.W) # New button self._new_button = Tk.Button(text="New", command=self._restart) self._new_button.grid(row=4, column=1, sticky=Tk.W) # translate field self._tr_value = Tk.StringVar() self._tr = Tk.Entry(textvariable=self._tr_value, font=self._default_font) self._tr.grid(row=5, column=2) self._tr_button = Tk.Button(text="E-C", command=self._translate) self._tr_button.grid(row=5, column=1, sticky=Tk.W) # initialize list of wrong cards self._wrong_indices = [] # initialize current index self._no = 0 def _renew_index(self, indices): # TODO: catch too many cards chosen as input shuffle(indices) # take the first n cards self._indices = indices[0:self._n_cards] def _show_next_question(self): try: # get the next index in the list self._no = self._indices.pop(0) # empty entry field self._entry_value.set("") # empty Q and C labels for i in self.C_labels: i.destroy() for i in self.Q_labels: i.destroy() i = 1 self.C_labels = [] self.Q_labels = [] # loop over Chinese characters for char in self._vocTot.C[self._no]: my_pinyin = self._p.get_pinyin(char, ' ') self.C_labels.append(Tk.Label(text=char)) self.C_labels[-1].grid(row=2, column=i + 1) to_tone = (to_tone_number(my_pinyin)) if "1" in to_tone: self.C_labels[-1].config(fg='red') elif "2" in to_tone: self.C_labels[-1].config(fg='green') elif "3" in to_tone: self.C_labels[-1].config(fg='blue') elif "4" in to_tone: self.C_labels[-1].config(fg='purple') else: self.C_labels[-1].config(fg='grey') if self._radio_val.get() == 1: self.Q_labels.append(Tk.Label(text=my_pinyin)) self.Q_labels[-1].grid(row=1, column=i + 1) i += 1 if self._radio_val.get() == 1: self._curr_ans = self._vocTot.E[self._no].encode('utf-8') elif self._radio_val.get() == 2: try: my_english = self._vocTot.E_long[self._no].encode('utf-8') except: my_english = self._vocTot.E[self._no].encode('utf-8') self.Q_labels.append(Tk.Label(text=my_english)) self.Q_labels[-1].grid(row=1, column=2, columnspan=i - 1) self._curr_ans = self._p.get_pinyin(self._vocTot.C[self._no], ' ') self._entry.grid(row=4, column=2, columnspan=i - 1) # set real_correct to default value of yes self._real_correct = 1 except IndexError: # start new round, when no card in list left self._new_round() def _check_answer(self, *args): # derive input answer = self._entry_value.get().strip().lower() # convert numbers, if provided, to pinjin tone mark tone = 0 if any(char.isdigit() for char in answer): tone = 1 answer = self._f.convert(answer, 'Pinyin', 'Pinyin', sourceOptions={ 'toneMarkType': 'numbers' }).encode('utf-8') # derive expected answer # ask for English word if self._radio_val.get() == 1: answer_to_check = self._curr_ans.encode('utf-8').lower() # ask for Chinese word elif self._radio_val.get() == 2: # pinyin with tone marks if tone == 1: answer_to_check = self._p.get_pinyin( self._vocTot.C[self._no], ' ').encode('utf-8').lower() # pinyin without tone marks else: answer_to_check = self._p.get_pinyin( self._vocTot.C[self._no], ' ').encode('utf-8').lower() # check if answer is correct if answer == answer_to_check: # if correct: move to 'learned' columns # move columns if card was correct on first attempt if self._real_correct: self._vocTot.Learned[self._no] = 1 self._save_reminder = 1 self._sol_label_value.set("") # if correct, go on to next card self._show_next_question() else: # if wrong: self._real_correct = 0 # store index in list of wrong cards self._wrong_indices.append(self._no) # display correct answer self._sol_label_value.set(self._curr_ans) self._sol_label.grid(row=3, column=2, columnspan=len(self.C_labels)) # clear entry field self._entry_value.set("") def _new_round(self): # if wrong cards still left, start new round if self._wrong_indices: # empty all display fields self._sol_label_value.set("New round!") self._renew_index(self._wrong_indices) # clear list of wrong indices self._wrong_indices = [] # start new round with the next question self._show_next_question() else: # if no wrong cards left, finish the session self._exit() def _reset_list(self): self._datafile = os.path.join( os.path.dirname(os.path.abspath(__file__)), self._filename_value.get()) my_file = pd.read_excel(self._datafile) my_file['Learned'] = 0 writer = ExcelWriter(self._datafile) #pdb.set_trace() my_file.to_excel(writer, 'Sheet1', index=False) writer.save() def _exit(self): try: self._save() except: self._sol_label_value.set("Didn't work?!") return self._sol_label_value.set("") self._entry_value.set("") self._sol_label_value.set("Done!") self._check_button['state'] = 'disabled' self._save_button['state'] = 'disabled' self.unbind('<Return>') self._next_button['state'] = 'disabled' def _save(self, *args): writer = ExcelWriter(self._datafile) self._vocTot.to_excel(writer, 'Sheet1', index=False) writer.save() self._sol_label_value.set("Saved!") self._sol_label.grid(row=3, column=2, columnspan=len(self.C_labels)) self._save_reminder = 0 def _restart(self): if self._save_reminder: result = tkMessageBox.askquestion("Warning", "Save before exiting?") if result == 'yes': try: self._save() except: self._sol_label_value.set("Didn't work?!") return python = sys.executable os.execl(python, python, *sys.argv) def _translate(self): to_translate = self._tr_value.get() if isinstance(to_translate, unicode): # Todo: doesn't work url = 'https://translate.google.com/#zh-CN/en/' + to_translate else: to_translate = to_translate.replace(' ', '%20') url = 'https://translate.google.com/#en/zh-CN/' + to_translate webbrowser.open(url)
def handle_noargs(self, **options): # EXAMPLE: 一中一台 [yi1 Zhong1 yi1 Tai2] /first meaning/second meaning/ file = open(settings.DICT_FILE_LOCATION) r_server = _get_redis() # EMPTY ALL EN KEYS FROM THE DATABASE item_count = 0 keys = r_server.keys('EN:*') for x in keys: r_server.delete(x) item_count += 1 print "Deleted %s items" % item_count # NOW LETS START item_count = 0 for line in file: if not line.startswith("#"): # GATHER ALL THE MAIN VARIABLES new = line.split() characters = new[1] numbered_pinyin = line[(line.index('[')+1):(line.index(']'))] f = ReadingFactory() tonal_pinyin = f.convert(numbered_pinyin, 'Pinyin', 'Pinyin', sourceOptions={'toneMarkType': 'numbers', 'yVowel': 'v', 'missingToneMark': 'fifth'}) meanings = line[(line.index('/')+1):(line.rindex('/'))] # CREATE AN INDEX: What we'll do first is try to strip out # as much crap as possible from each definition, and as close as # possible find a single word that we can index on. for x in meanings.split('/'): ns = x # new_string # REMOVE ANYTHING BETWEEN BRACKETS try: ns = ns.replace(ns[(ns.index('(')+1):(ns.index(')'))], '') ns = ns.replace('(', '').replace(')', '') #replace the brackets too except ValueError: pass # REMOVE ANYTHING BETWEEN SQUARE BRACKETS try: ns = ns.replace(ns[(ns.index('[')+1):(ns.index(']'))], '') ns = ns.replace('[', '').replace(']', '') #replace the brackets too except ValueError: pass # IGNORE THE MEANING IF IT CONTAINS AN EXCLUDED PHRASE if len(filter(lambda y: y not in ns, EXCLUSIONS)) != len(EXCLUSIONS): continue # IF THE MEANING IS NOW EMPTY, IGNORE IT ns = ns.strip() if ns == '': continue # DEAL WITH INFINITIVE VERBS LIKE "TO DO" WITH 2 WORDS if len(ns.split(' ')) <= 3 and ns.startswith('to '): ns = ns.split(' ', 1)[1] # REMOVE ITEMS LIKE "SEE XYZ" if ns.split(' ')[0] == 'see' and ns[-1] not in string.ascii_letters: continue # THERE'S ALSO SOME ANNOYING "..." MARKS TOO if "..." in ns: ns = ns.replace('...', '') # FOR NOW, JUST ADD ITEMS WITH 2 WORDs if len(ns.split(' ')) <= 3: key = "EN:%sW:%s" % (len(ns.split(' ')), ns.lower()) print key if r_server.exists(key): values = json.loads(_search_redis(key)) values['characters'].append(characters) r_server.set(key, json.dumps(values)) else: values = { 'english': x, 'characters': [characters,], } r_server.set(key, json.dumps(values)) item_count += 1 print item_count #if item_count > 20: # break print "%s English dictionary items added" % item_count file.close()
class LeoDownloader(AudioDownloader): """Download audio from LEO""" def __init__(self): AudioDownloader.__init__(self) self.file_extension = u'.mp3' self.url = 'http://www.leo.org/dict/audio_{language}/{word}.mp3' # And, yes, they use ch for Chinese. # (I'm not sure if they really have anything for ru or it.) self.language_dict = {'de': 'de', 'en': 'en', 'es': 'es', 'fr': 'fr', 'it': 'it', 'ru': 'ru', 'zh': 'ch'} # It kind of looks like they have Swiss pronunciations, but hey don't. self.chinese_code = 'ch' # We should keep a number of site icons handy, with the right # flag for the request. self.site_icon_dict = {} self.site_file_name_encoding = 'ISO-8859-1' self.icon_url_dict = { 'de': 'http://dict.leo.org/favicon.ico', 'en': 'http://dict.leo.org/favicon.ico', 'es': 'http://dict.leo.org/favicon_es.ico', 'fr': 'http://dict.leo.org/favicon_fr.ico', 'it': 'http://dict.leo.org/favicon_it.ico', 'ru': 'http://dict.leo.org/favicon_ru.ico', # When we use this dict, we have already munged the 'zh' to 'ch' 'ch': 'http://dict.leo.org/favicon_ch.ico'} # As the name implies, a hack. Try to use the cjklib TTEMPÉ # brings along. A syntem-wide installed one should work as # well. self.have_tried_cjklib_hack = False self.reading_factory = None def download_files(self, word, base, ruby, split): """ Download a word from LEO We try to get pronunciations for the text for German, English, Spanish, French, Italian and Russian, and from the ruby for Chinese. There may not be any pronunciations available for Italian or Russian. """ self.downloads_list = [] # Fix the language. EAFP. self.language = self.language_dict[self.language[:2].lower()] # set_names also checks the language. self.set_names(word, base, ruby) # Only get the icon when we have a word # self.maybe_get_icon() self.get_flag_icon() # EAFP. self.query_url may return None... word_url = self.query_url(word, ruby) # ... then the get_data will blow up word_data = self.get_data_from_url(word_url) word_file_path, word_file_name = self.get_file_name() with open(word_file_path, 'wb') as word_file: word_file.write(word_data) # We have a file, but not much to say about it. self.downloads_list.append( (word_file_path, word_file_name, dict(Source='Leo'))) def query_url(self, word, ruby): """Build query URL""" if self.chinese_code == self.language: word = self.fix_pinyin(ruby) return self.url.format( language=self.language, word=urllib.quote(word.encode( self.site_file_name_encoding))) def fix_pinyin(self, pinyin): # Hacks. It is overkill to ship cjklib with this add-on. But # to get the tone numbers as numbers, we should use it. My # hope (guess) is that the typical user that will want Chinese # pronunciations will also have TTEMPÉ's (version of mine) # chinese-support-plugin installed. So try to use that and # don't complain if it doesn't work. if not self.have_tried_cjklib_hack: try: # If this works, the whole shebang is run as an Anki2 # add-on. If not, we will still look for a system-wide # cjklib, but obviously not for anothre add-on. from aqt.utils import isWin except: pass else: from aqt import mw addon_dir = mw.pm.addonFolder() if isWin: # The isWin bit is copied from TTEMPÉ's code. addon_dir = addon_dir.encode(sys.getfilesystemencoding()) sys.path.append(os.path.join(addon_dir, "chinese")) self.have_tried_cjk_hack = True if not self.reading_factory: try: from cjklib.reading import ReadingFactory except ImportError: return pinyin else: self.reading_factory = ReadingFactory() return self.reading_factory.convert( pinyin, 'Pinyin', 'Pinyin', targetOptions={ 'toneMarkType': 'numbers'}).replace('5', '0') def get_flag_icon(self): """ Set self.site_icon to the right icon. We should use different icons, depending on the request language. We store these icons in self.site_icon_dict and use the AudioDownloader.maybe_get_icon() if we don't have it yet. """ if not with_pyqt: return try: # If this works we already have it. self.site_icon = self.site_icon_dict[self.language] except KeyError: # We have to get it ourself. (We know it's just 16x16, so # no resize. And we know the address). self.site_icon_dict[self.language] = \ QImage.fromData(self.get_data_from_url( self.icon_url_dict[self.language])) self.site_icon = self.site_icon_dict[self.language] def set_names(self, text, base, ruby): """ Set the display text and file base name variables. """ if self.language == self.chinese_code: if not ruby: raise ValueError('Nothing to download') self.base_name = u"{0}_{1}".format(base, ruby) self.display_text = u"{1} ({0})".format(base, ruby) else: if not text: raise ValueError('Nothing to download') self.base_name = text self.display_text = text