def posegment(self, posource, sourcelanguage, targetlanguage, stripspaces=True, onlyaligned=True): """helper that convert po source without requiring files""" inputfile = wStringIO.StringIO(posource) inputpo = po.pofile(inputfile) sourcelang = lang_factory.getlanguage(sourcelanguage) targetlang = lang_factory.getlanguage(targetlanguage) convertor = posegment.segment(sourcelang, targetlang, stripspaces=stripspaces, onlyaligned=onlyaligned) outputpo = convertor.convertstore(inputpo) return outputpo
def segmentfile(inputfile, outputfile, templatefile, sourcelanguage="en", targetlanguage=None, stripspaces=True, onlyaligned=False): """reads in inputfile, segments it then, writes to outputfile""" # note that templatefile is not used, but it is required by the converter... inputstore = factory.getobject(inputfile) if inputstore.isempty(): return 0 sourcelang = lang_factory.getlanguage(sourcelanguage) targetlang = lang_factory.getlanguage(targetlanguage) convertor = segment(sourcelang, targetlang, stripspaces=stripspaces, onlyaligned=onlyaligned) outputstore = convertor.convertstore(inputstore) outputstore.serialize(outputfile) return 1
def gtk_textview_compute_optimal_height(widget, width): if not widget.props.visible: return buf = widget.get_buffer() # For border calculations, see gtktextview.c:gtk_text_view_size_request in the GTK source border = 2 * widget.border_width - 2 * widget.parent.border_width if widget.style_get_property("interior-focus"): border += 2 * widget.style_get_property("focus-line-width") buftext = buf.get_text(buf.get_start_iter(), buf.get_end_iter()) # A good way to test height estimation is to use it for all units and # compare the reserved space to the actual space needed to display a unit. # To use height estimation for all units (not just empty units), use: # if True: if not buftext: text = getattr(widget, "_source_text", u"") if text: lang = factory.getlanguage(pan_app.settings.language["targetlang"]) buftext = lang.alter_length(text) buftext = markup.escape(buftext) _w, h = rendering.make_pango_layout(widget, buftext, width - border).get_pixel_size() if h == 0: # No idea why this bug happens, but it often happens for the first unit # directly after the file is opened. For now we try to guess a more # useful default than 0. This should look much better than 0, at least. h = 28 parent = widget.parent if isinstance(parent, gtk.ScrolledWindow) and parent.get_shadow_type() != gtk.SHADOW_NONE: border += 2 * parent.rc_get_style().ythickness widget.parent.set_size_request(-1, h + border)
def test_punctranslate(): """Tests that we can translate punctuation.""" language = factory.getlanguage('ar') assert language.punctranslate(u"") == u"" assert language.punctranslate(u"abc efg") == u"abc efg" assert language.punctranslate(u"abc efg.") == u"abc efg." assert language.punctranslate(u"abc, efg; d?") == u"abc، efg؛ d؟"
def test_punctranslate(): """Tests that we can translate punctuation.""" language = factory.getlanguage('fa') assert language.punctranslate("") == "" assert language.punctranslate("abc efg") == "abc efg" assert language.punctranslate("abc efg.") == "abc efg." assert language.punctranslate("Delete file: %s?") == "Delete file: %s؟" assert language.punctranslate('"root" is powerful') == "«root» is powerful" assert language.punctranslate("'root' is powerful") == "«root» is powerful" assert language.punctranslate("`root' is powerful") == "«root» is powerful" assert language.punctranslate('The user "root"') == "The user «root»" assert language.punctranslate("The user 'root'") == "The user «root»" assert language.punctranslate("The user `root'") == "The user «root»" assert language.punctranslate('The user "root"?') == "The user «root»؟" assert language.punctranslate("The user 'root'?") == "The user «root»؟" assert language.punctranslate("The user `root'?") == "The user «root»؟" assert language.punctranslate('Watch the " mark') == 'Watch the " mark' assert language.punctranslate("Watch the ' mark") == "Watch the ' mark" assert language.punctranslate("Watch the ` mark") == "Watch the ` mark" assert language.punctranslate('Watch the “mark”') == "Watch the «mark»" assert language.punctranslate('The <a href="info">user</a> "root"?') == 'The <a href="info">user</a> «root»؟' assert language.punctranslate("The <a href='info'>user</a> 'root'?") == "The <a href='info'>user</a> «root»؟" #Broken because we test for equal number of ` and ' in the string #assert language.punctranslate(u"The <a href='info'>user</a> `root'?") == u"The <a href='info'>user</a> «root»؟" assert language.punctranslate("The <a href='http://koeie'>user</a>") == "The <a href='http://koeie'>user</a>" assert language.punctranslate("Copying `%s' to `%s'") == "Copying «%s» to «%s»"
def test_punctranslate(): """Tests that we can translate punctuation.""" language = factory.getlanguage('ro') assert language.punctranslate(u"") == u"" assert language.punctranslate(u"abc efg") == u"abc efg" assert language.punctranslate(u"abc efg.") == u"abc efg." assert language.punctranslate(u"abc efg!") == u"abc efg!" assert language.punctranslate(u"abc efg? hij!") == u"abc efg? hij!" assert language.punctranslate(u"Delete file: %s?") == u"Delete file: %s?" assert language.punctranslate(u'"root" is powerful') == u"„root” is powerful" assert language.punctranslate(u"'root' is powerful") == u"„root” is powerful" assert language.punctranslate(u"`root' is powerful") == u"„root” is powerful" assert language.punctranslate(u"‘root’ is powerful") == u"„root” is powerful" assert language.punctranslate(u"“root” is powerful") == u"„root” is powerful" assert language.punctranslate(u'The user "root"') == u"The user „root”" assert language.punctranslate(u"The user 'root'") == u"The user „root”" assert language.punctranslate(u"The user `root'") == u"The user „root”" assert language.punctranslate(u'The user "root"?') == u"The user „root»?" assert language.punctranslate(u"The user 'root'?") == u"The user „root”?" assert language.punctranslate(u"The user `root'?") == u"The user „root”?" assert language.punctranslate(u"The user ‘root’?") == u"The user „root”?" assert language.punctranslate(u"The user “root”?") == u"The user „root”?" assert language.punctranslate(u'Watch the " mark') == u'Watch the " mark' assert language.punctranslate(u"Watch the ' mark") == u"Watch the ' mark" assert language.punctranslate(u"Watch the ` mark") == u"Watch the ` mark" assert language.punctranslate(u'Watch the “mark”') == u"Watch the „mark”" assert language.punctranslate(u'The <a href="info">user</a> "root"?') == u'The <a href="info">user</a> „root”?' assert language.punctranslate(u"The <a href='info'>user</a> 'root'?") == u"The <a href='info'>user</a> „root”?" assert language.punctranslate(u"The <a href='info'>user</a> ‘root’?") == u"The <a href='info'>user</a> „root”?" assert language.punctranslate(u"The <a href='info'>user</a> “root”?") == u"The <a href='info'>user</a> „root”?" assert language.punctranslate(u"The <a href='http://koeie'>user</a>") == u"The <a href='http://koeie'>user</a>" assert language.punctranslate(u"Copying `%s' to `%s'") == u"Copying „%s” to „%s”"
def _on_entry_changed(self, entry): self.btn_add_term.props.sensitive = True self.eb_add_term_errors.hide() src_text = self.ent_source.get_text() tgt_text = self.ent_target.get_text() dup = self.term_model.get_duplicates(src_text, tgt_text) if dup: self.lbl_add_term_errors.set_text(_('Identical entry already exists.')) self.eb_add_term_errors.modify_bg(gtk.STATE_NORMAL, gdk.color_parse(current_theme['warning_bg'])) self.eb_add_term_errors.show_all() self.btn_add_term.props.sensitive = False return same_src_units = self.term_model.get_units_with_source(src_text) if src_text and same_src_units: # We want to separate multiple terms with the correct list # separator for the UI language: from translate.lang import factory as lang_factory from virtaal.common.pan_app import ui_language separator = lang_factory.getlanguage(ui_language).listseperator #l10n: The variable is an existing term formatted for emphasis. The default is bold formatting, but you can remove/change the markup if needed. Leave it unchanged if you are unsure. translations = separator.join([_('<b>%s</b>') % (u.target) for u in same_src_units]) errormsg = _('Existing translations: %(translations)s') % { 'translations': translations } self.lbl_add_term_errors.set_markup(errormsg) self.eb_add_term_errors.modify_bg(gtk.STATE_NORMAL, gdk.color_parse(current_theme['warning_bg'])) self.eb_add_term_errors.show_all() return
def __init__(self, sourcelanguage='en', targetlanguage='en', checkerstyle=None): self.sourcelanguage = sourcelanguage self.targetlanguage = targetlanguage self.language = factory.getlanguage(self.sourcelanguage) # self.init_checker(checkerstyle) self.classification = {}
def test_punctranslate(): """Tests that we can translate punctuation.""" language = factory.getlanguage('fr') assert language.punctranslate("") == "" assert language.punctranslate("abc efg") == "abc efg" assert language.punctranslate("abc efg.") == "abc efg." assert language.punctranslate("abc efg!") == "abc efg\u00a0!" assert language.punctranslate("abc efg? hij!") == "abc efg\u00a0? hij\u00a0!" assert language.punctranslate("Delete file: %s?") == "Delete file\u00a0: %s\u00a0?" assert language.punctranslate('"root" is powerful') == "«\u00a0root\u00a0» is powerful" assert language.punctranslate("'root' is powerful") == "«\u00a0root\u00a0» is powerful" assert language.punctranslate("`root' is powerful") == "«\u00a0root\u00a0» is powerful" assert language.punctranslate('The user "root"') == "The user «\u00a0root\u00a0»" assert language.punctranslate("The user 'root'") == "The user «\u00a0root\u00a0»" assert language.punctranslate("The user `root'") == "The user «\u00a0root\u00a0»" assert language.punctranslate('The user "root"?') == "The user «\u00a0root\u00a0»\u00a0?" assert language.punctranslate("The user 'root'?") == "The user «\u00a0root\u00a0»\u00a0?" assert language.punctranslate("The user `root'?") == "The user «\u00a0root\u00a0»\u00a0?" assert language.punctranslate('Watch the " mark') == 'Watch the " mark' assert language.punctranslate("Watch the ' mark") == "Watch the ' mark" assert language.punctranslate("Watch the ` mark") == "Watch the ` mark" assert language.punctranslate('Watch the “mark”') == "Watch the «\u00a0mark\u00a0»" assert language.punctranslate('The <a href="info">user</a> "root"?') == 'The <a href="info">user</a> «\u00a0root\u00a0»\u00a0?' assert language.punctranslate("The <a href='info'>user</a> 'root'?") == "The <a href='info'>user</a> «\u00a0root\u00a0»\u00a0?" #Broken because we test for equal number of ` and ' in the string #assert language.punctranslate(u"The <a href='info'>user</a> `root'?") == u"The <a href='info'>user</a> «\u00a0root\u00a0»\u00a0?" assert language.punctranslate("The <a href='http://koeie'>user</a>") == "The <a href='http://koeie'>user</a>" assert language.punctranslate("Copying `%s' to `%s'") == "Copying «\u00a0%s\u00a0» to «\u00a0%s\u00a0»"
def test_sentences(): """Tests basic functionality of sentence segmentation.""" language = factory.getlanguage('uk') sentences = language.sentences(u"") assert sentences == [] sentences = language.sentences(u"Ел. пошта") assert sentences == [u"Ел. пошта"]
def test_sentences(): """Tests basic functionality of sentence segmentation.""" language = factory.getlanguage('tr') sentences = language.sentences(u"Normal case. Nothing interesting.") assert sentences == [u"Normal case.", u"Nothing interesting."] sentences = language.sentences(u"1. sayı, 2. sayı.") assert sentences == [u"1. sayı, 2. sayı."]
def test_punctranslate(): """Tests that we can translate punctuation.""" language = factory.getlanguage('th') assert language.punctranslate(u"") == u"" assert language.punctranslate(u"abc efg") == u"abc efg" assert language.punctranslate(u"abc efg.") == u"abc efg" assert language.punctranslate(u"abc efg. hij") == u"abc efg hij"
def create_default_languages(): """Create the default languages. We afford this privilege to languages with reasonably complete interface translations for Pootle. """ from translate.lang import data, factory from pootle_language.models import Language default_languages = ("af", "ak", "ht", "nso", "ve", "wo", "zh_cn", "zh_hk", "zh_tw", "ca_valencia", "son", "lg", "gd") # import languages from toolkit for code in data.languages.keys(): try: tk_lang = factory.getlanguage(code) criteria = { 'code': code, 'fullname': tk_lang.fullname, 'nplurals': tk_lang.nplurals, 'pluralequation': tk_lang.pluralequation, 'specialchars': tk_lang.specialchars, } lang, created = Language.objects.get_or_create(**criteria) if code in default_languages: lang.save() except: pass
def test_sentences(): """Tests basic functionality of sentence segmentation.""" language = factory.getlanguage('zh') sentences = language.sentences(u"") assert sentences == [] sentences = language.sentences(u"這個用戶名稱已經存在。現在會寄一封信給已登記的電郵地址。\n") assert sentences == [u"這個用戶名稱已經存在。", u"現在會寄一封信給已登記的電郵地址。"]
def test_sentences(): """Tests basic functionality of sentence segmentation.""" language = factory.getlanguage('or') sentences = language.sentences(u"") assert sentences == [] sentences = language.sentences(u"ଗୋଟିଏ ଚାବିକୁ ଆଲୋକପାତ କରିବା ପାଇଁ ମାଉସ ସୂଚକକୁ ତାହା ଉପରକୁ ଘୁଞ୍ଚାନ୍ତୁ। ଚୟନ କରିବା ପାଇଁ ଗୋଟିଏ ସୁଇଚକୁ ଦବାନ୍ତୁ।") assert sentences == [u"ଗୋଟିଏ ଚାବିକୁ ଆଲୋକପାତ କରିବା ପାଇଁ ମାଉସ ସୂଚକକୁ ତାହା ଉପରକୁ ଘୁଞ୍ଚାନ୍ତୁ।", u"ଚୟନ କରିବା ପାଇଁ ଗୋଟିଏ ସୁଇଚକୁ ଦବାନ୍ତୁ।"]
def test_punctranslate(): """Tests that we can translate punctuation.""" language = factory.getlanguage('or') assert language.punctranslate(u"") == u"" assert language.punctranslate(u"Document loaded") == u"Document loaded" assert language.punctranslate(u"Document loaded.") == u"Document loaded।" assert language.punctranslate(u"Document loaded.\n") == u"Document loaded।\n" assert language.punctranslate(u"Document loaded...") == u"Document loaded..."
def test_sentences(): """Tests basic functionality of sentence segmentation.""" language = factory.getlanguage('el') sentences = language.sentences(u"") assert sentences == [] sentences = language.sentences(u"Θέλετε να αποθηκεύσετε το παιχνίδι σας; (Θα σβησθούν οι Αυτόματες-Αποθηκεύσεις)") assert sentences == [u"Θέλετε να αποθηκεύσετε το παιχνίδι σας;", u"(Θα σβησθούν οι Αυτόματες-Αποθηκεύσεις)"]
def test_punctranslate(): """Tests that we can translate punctuation.""" language = factory.getlanguage('el') assert language.punctranslate("") == "" assert language.punctranslate("abc efg") == "abc efg" assert language.punctranslate("abc efg. hij.") == "abc efg. hij." assert language.punctranslate("abc efg;") == "abc efg·" assert language.punctranslate("abc efg? hij!") == "abc efg; hij!"
def test_getlanguage(): """Tests that a basic call to getlanguage() works.""" kmlanguage = factory.getlanguage('km') assert kmlanguage.code == 'km' assert kmlanguage.fullname == 'Central Khmer' # Test a non-exisint code language = factory.getlanguage('zz') assert language.nplurals == 0 # Test a code without a module language = factory.getlanguage('fy') assert language.nplurals == 2 assert language.fullname == "Frisian" assert "n != 1" in language.pluralequation # Test a code without a module and with a country code language = factory.getlanguage('de_AT') assert language.nplurals == 2 assert language.fullname == "German" # Test with None as language code language = factory.getlanguage(None) assert language.code == '' #Test with a language code that is a reserved word in Python language = factory.getlanguage('is') assert language.nplurals == 2 #Test with a language code contains '@' language = factory.getlanguage('ca@valencia') assert language.nplurals == 2
def test_sentences(): """Tests basic functionality of sentence segmentation.""" language = factory.getlanguage('ko') sentences = language.sentences(u"") assert sentences == [] sentences = language.sentences(u"이 연락처에 바뀐 부분이 있습니다. 바뀐 사항을 저장하시겠습니까?") print(sentences) assert sentences == [u"이 연락처에 바뀐 부분이 있습니다.", u"바뀐 사항을 저장하시겠습니까?"]
def test_punctranslate(): """Tests that we can translate punctuation.""" language = factory.getlanguage('ne') assert language.punctranslate("") == "" assert language.punctranslate("abc efg") == "abc efg" assert language.punctranslate("abc efg.") == "abc efg ।" assert language.punctranslate("(abc efg).") == "(abc efg) ।" assert language.punctranslate("abc efg...") == "abc efg..." assert language.punctranslate("abc efg?") == "abc efg ?"
def test_sentences(): """Tests basic functionality of sentence segmentation.""" language = factory.getlanguage('hy') sentences = language.sentences(u"") assert sentences == [] sentences = language.sentences(u"Արխիվն արդեն գոյություն ունի։ Դուք ցանկանու՞մ եք կրկին գրել այն։") print sentences assert sentences == [u"Արխիվն արդեն գոյություն ունի։", u"Դուք ցանկանու՞մ եք կրկին գրել այն։"]
def test_sentences(): """Tests basic functionality of sentence segmentation.""" language = factory.getlanguage('af') sentences = language.sentences(u"Normal case. Nothing interesting.") assert sentences == [u"Normal case.", "Nothing interesting."] sentences = language.sentences(u"Wat? 'n Fout?") assert sentences == [u"Wat?", "'n Fout?"] sentences = language.sentences(u"Dit sal a.g.v. 'n fout gebeur.") assert sentences == [u"Dit sal a.g.v. 'n fout gebeur."]
def test_punctranslate(): """Tests that we can translate punctuation.""" language = factory.getlanguage('ne') assert language.punctranslate(u"") == u"" assert language.punctranslate(u"abc efg") == u"abc efg" assert language.punctranslate(u"abc efg.") == u"abc efg ।" assert language.punctranslate(u"(abc efg).") == u"(abc efg) ।" assert language.punctranslate(u"abc efg...") == u"abc efg..." assert language.punctranslate(u"abc efg?") == u"abc efg ?"
def posegment( posource, sourcelanguage, targetlanguage, stripspaces=True, onlyaligned=True, ): """helper that convert po source without requiring files""" inputfile = BytesIO(posource.encode()) inputpo = po.pofile(inputfile) sourcelang = lang_factory.getlanguage(sourcelanguage) targetlang = lang_factory.getlanguage(targetlanguage) convertor = posegment.segment(sourcelang, targetlang, stripspaces=stripspaces, onlyaligned=onlyaligned) outputpo = convertor.convertstore(inputpo) return outputpo
def test_sentences(): """Tests basic functionality of sentence segmentation.""" language = factory.getlanguage('es') sentences = language.sentences(u"") assert sentences == [] sentences = language.sentences(u"El archivo <b>%1</b> ha sido modificado. ¿Desea guardarlo?") print(sentences) assert sentences == [u"El archivo <b>%1</b> ha sido modificado.", u"¿Desea guardarlo?"]
def test_punctranslate(): """Tests that we can translate punctuation.""" language = factory.getlanguage('ar') assert language.punctranslate(u"") == u"" assert language.punctranslate(u"abc efg") == u"abc efg" assert language.punctranslate(u"abc efg.") == u"abc efg." assert language.punctranslate(u"abc, efg; d?") == u"abc، efg؛ d؟" # See https://github.com/translate/translate/issues/1819 assert language.punctranslate(u"It is called “abc”") == u"It is called ”abc“"
def test_punctranslate(): """Tests that we can translate punctuation.""" language = factory.getlanguage("hy") assert language.punctranslate("") == "" assert language.punctranslate("abc efg") == "abc efg" assert language.punctranslate("abc efg.") == "abc efg։" assert language.punctranslate("abc efg. hij.") == "abc efg։ hij։" assert language.punctranslate("abc efg!") == "abc efg՜" assert language.punctranslate("Delete file: %s") == "Delete file՝ %s"
def test_sentences(): """Tests basic functionality of sentence segmentation.""" language = factory.getlanguage('es') sentences = language.sentences(u"") assert sentences == [] sentences = language.sentences(u"El archivo <b>%1</b> ha sido modificado. ¿Desea guardarlo?") print sentences assert sentences == [u"El archivo <b>%1</b> ha sido modificado.", u"¿Desea guardarlo?"]
def project_checker(project_style, source_lang): if project_style: from translate.filters.checks import projectcheckers checker = projectcheckers.get(project_style, None) if checker: checker = checker() from translate.lang import factory checker.config.sourcelang = factory.getlanguage(source_lang) return checker
def test_punctranslate(): """Tests that we can translate punctuation.""" language = factory.getlanguage('hy') assert language.punctranslate(u"") == u"" assert language.punctranslate(u"abc efg") == u"abc efg" assert language.punctranslate(u"abc efg.") == u"abc efg։" assert language.punctranslate(u"abc efg. hij.") == u"abc efg։ hij։" assert language.punctranslate(u"abc efg!") == u"abc efg՜" assert language.punctranslate(u"Delete file: %s") == u"Delete file՝ %s"
def test_punctranslate(): """Tests that we can translate punctuation.""" language = factory.getlanguage("am") assert language.punctranslate("") == "" assert language.punctranslate("abc efg") == "abc efg" assert language.punctranslate("abc efg.") == "abc efg።" assert language.punctranslate("abc efg. hij.") == "abc efg። hij።" assert language.punctranslate("abc efg, hij;") == "abc efg፣ hij፤" assert language.punctranslate("Delete file: %s?") == "Delete file: %s?"
def test_sentences(): """Tests basic functionality of sentence segmentation.""" language = factory.getlanguage('km') sentences = language.sentences(u"") assert sentences == [] sentences = language.sentences(u"លក្ខណៈនេះអាចឲ្យយើងធ្វើជាតូបនីយកម្មកម្មវិធីកុំព្យូទ័រ ។ លក្ខណៈនេះអាចឲ្យយើងធ្វើជាតូបនីយកម្មកម្មវិធីកុំព្យូទ័រ ។") print sentences assert sentences == [u"លក្ខណៈនេះអាចឲ្យយើងធ្វើជាតូបនីយកម្មកម្មវិធីកុំព្យូទ័រ ។", u"លក្ខណៈនេះអាចឲ្យយើងធ្វើជាតូបនីយកម្មកម្មវិធីកុំព្យូទ័រ ។"]
def test_punctranslate(): """Tests that we can translate punctuation.""" language = factory.getlanguage("ar") assert language.punctranslate("") == "" assert language.punctranslate("abc efg") == "abc efg" assert language.punctranslate("abc efg.") == "abc efg." assert language.punctranslate("abc, efg; d?") == "abc، efg؛ d؟" # See https://github.com/translate/translate/issues/1819 assert language.punctranslate("It is called “abc”") == "It is called ”abc“"
def test_punctranslate(): """Tests that we can translate punctuation.""" language = factory.getlanguage('ar') assert language.punctranslate(u"") == u"" assert language.punctranslate(u"abc efg") == u"abc efg" assert language.punctranslate(u"abc efg.") == u"abc efg." assert language.punctranslate(u"abc, efg; d?") == u"abc، efg؛ d؟" # See http://bugs.locamotion.org/show_bug.cgi?id=1819 assert language.punctranslate(u"It is called “abc”") == u"It is called ”abc“"
def test_sentences(): """Tests basic functionality of sentence segmentation.""" language = factory.getlanguage('am') sentences = language.sentences(u"") assert sentences == [] sentences = language.sentences(u"ለምልክቱ መግቢያ የተለየ መለያ። ይህ የሚጠቅመው የታሪኩን ዝርዝር ለማስቀመጥ ነው።") print sentences assert sentences == [u"ለምልክቱ መግቢያ የተለየ መለያ።", u"ይህ የሚጠቅመው የታሪኩን ዝርዝር ለማስቀመጥ ነው።"]
def test_punctranslate(): """Tests that we can translate punctuation.""" language = factory.getlanguage('am') assert language.punctranslate(u"") == u"" assert language.punctranslate(u"abc efg") == u"abc efg" assert language.punctranslate(u"abc efg.") == u"abc efg።" assert language.punctranslate(u"abc efg. hij.") == u"abc efg። hij።" assert language.punctranslate(u"abc efg, hij;") == u"abc efg፣ hij፤" assert language.punctranslate(u"Delete file: %s?") == u"Delete file: %s?"
def test_sentences(): """Tests basic functionality of sentence segmentation.""" language = factory.getlanguage('ko') sentences = language.sentences(u"") assert sentences == [] sentences = language.sentences(u"이 연락처에 바뀐 부분이 있습니다. 바뀐 사항을 저장하시겠습니까?") print sentences assert sentences == [u"이 연락처에 바뀐 부분이 있습니다.", u"바뀐 사항을 저장하시겠습니까?"]
def test_sentences(): """Tests basic functionality of sentence segmentation.""" language = factory.getlanguage('km') sentences = language.sentences(u"") assert sentences == [] sentences = language.sentences(u"លក្ខណៈនេះអាចឲ្យយើងធ្វើជាតូបនីយកម្មកម្មវិធីកុំព្យូទ័រ ។ លក្ខណៈនេះអាចឲ្យយើងធ្វើជាតូបនីយកម្មកម្មវិធីកុំព្យូទ័រ ។") print(sentences) assert sentences == [u"លក្ខណៈនេះអាចឲ្យយើងធ្វើជាតូបនីយកម្មកម្មវិធីកុំព្យូទ័រ ។", u"លក្ខណៈនេះអាចឲ្យយើងធ្វើជាតូបនីយកម្មកម្មវិធីកុំព្យូទ័រ ។"]
def test_punctranslate(): """Tests that we can translate punctuation.""" language = factory.getlanguage("fr") assert language.punctranslate("") == "" assert language.punctranslate("abc efg") == "abc efg" assert language.punctranslate("abc efg.") == "abc efg." assert language.punctranslate("abc efg!") == "abc efg\u00a0!" assert language.punctranslate("abc efg? hij!") == "abc efg\u00a0? hij\u00a0!" assert language.punctranslate("Delete file: %s?") == "Delete file\u00a0: %s\u00a0?" assert ( language.punctranslate('"root" is powerful') == "«\u00a0root\u00a0» is powerful" ) assert ( language.punctranslate("'root' is powerful") == "«\u00a0root\u00a0» is powerful" ) assert ( language.punctranslate("`root' is powerful") == "«\u00a0root\u00a0» is powerful" ) assert language.punctranslate('The user "root"') == "The user «\u00a0root\u00a0»" assert language.punctranslate("The user 'root'") == "The user «\u00a0root\u00a0»" assert language.punctranslate("The user `root'") == "The user «\u00a0root\u00a0»" assert ( language.punctranslate('The user "root"?') == "The user «\u00a0root\u00a0»\u00a0?" ) assert ( language.punctranslate("The user 'root'?") == "The user «\u00a0root\u00a0»\u00a0?" ) assert ( language.punctranslate("The user `root'?") == "The user «\u00a0root\u00a0»\u00a0?" ) assert language.punctranslate('Watch the " mark') == 'Watch the " mark' assert language.punctranslate("Watch the ' mark") == "Watch the ' mark" assert language.punctranslate("Watch the ` mark") == "Watch the ` mark" assert language.punctranslate("Watch the “mark”") == "Watch the «\u00a0mark\u00a0»" assert ( language.punctranslate('The <a href="info">user</a> "root"?') == 'The <a href="info">user</a> «\u00a0root\u00a0»\u00a0?' ) assert ( language.punctranslate("The <a href='info'>user</a> 'root'?") == "The <a href='info'>user</a> «\u00a0root\u00a0»\u00a0?" ) # Broken because we test for equal number of ` and ' in the string # assert language.punctranslate("The <a href='info'>user</a> `root'?") == "The <a href='info'>user</a> «\u00a0root\u00a0»\u00a0?" assert ( language.punctranslate("The <a href='http://koeie'>user</a>") == "The <a href='http://koeie'>user</a>" ) assert ( language.punctranslate("Copying `%s' to `%s'") == "Copying «\u00a0%s\u00a0» to «\u00a0%s\u00a0»" )
def test_punctranslate(): """Tests that we can translate punctuation.""" language = factory.getlanguage('or') assert language.punctranslate(u"") == u"" assert language.punctranslate(u"Document loaded") == u"Document loaded" assert language.punctranslate(u"Document loaded.") == u"Document loaded।" assert language.punctranslate( u"Document loaded.\n") == u"Document loaded।\n" assert language.punctranslate( u"Document loaded...") == u"Document loaded..."
def test_sentences(): """Tests basic functionality of sentence segmentation.""" language = factory.getlanguage("vi") sentences = language.sentences("") assert sentences == [] sentences = language.sentences("Normal case. Nothing interesting.") assert sentences == ["Normal case.", "Nothing interesting."] sentences = language.sentences("Is that the case ? Sounds interesting !") assert sentences == ["Is that the case ?", "Sounds interesting !"]
def test_punctranslate(): """Tests that we can translate punctuation.""" language = factory.getlanguage('ar') assert language.punctranslate(u"") == u"" assert language.punctranslate(u"abc efg") == u"abc efg" assert language.punctranslate(u"abc efg.") == u"abc efg." assert language.punctranslate(u"abc, efg; d?") == u"abc، efg؛ d؟" # See http://bugs.locamotion.org/show_bug.cgi?id=1819 assert language.punctranslate( u"It is called “abc”") == u"It is called ”abc“"
def test_punctranslate(): """Tests that we can translate punctuation.""" language = factory.getlanguage('zh') assert language.punctranslate(u"") == u"" assert language.punctranslate(u"abc efg") == u"abc efg" assert language.punctranslate(u"abc efg.") == u"abc efg。" assert language.punctranslate(u"(abc efg).") == u"(abc efg)。" assert language.punctranslate(u"(abc efg). hijk") == u"(abc efg)。hijk" assert language.punctranslate(u".") == u"。" assert language.punctranslate(u"abc efg...") == u"abc efg..."
def test_sentences(): """Tests basic functionality of sentence segmentation.""" language = factory.getlanguage("ja") sentences = language.sentences("") assert sentences == [] sentences = language.sentences("明日は、明日の風が吹く。吾輩は猫である。\n") assert sentences == ["明日は、明日の風が吹く。", "吾輩は猫である。"] sentences = language.sentences("頑張れ!甲子園に行きたいか?") assert sentences == ["頑張れ!", "甲子園に行きたいか?"]
def test_sentences(): """Tests basic functionality of sentence segmentation.""" language = factory.getlanguage('af') sentences = language.sentences("Normal case. Nothing interesting.") assert sentences == ["Normal case.", "Nothing interesting."] sentences = language.sentences("Wat? 'n Fout?") assert sentences == ["Wat?", "'n Fout?"] sentences = language.sentences("Dit sal a.g.v. 'n fout gebeur.") assert sentences == ["Dit sal a.g.v. 'n fout gebeur."] sentences = language.sentences("Weet nie hoe om lêer '%s' te open nie.\nMiskien is dit 'n tipe beeld wat nog nie ondersteun word nie.\n\nKies liewer 'n ander prent.") assert len(sentences) == 3
def test_punctranslate(): """Tests that we can translate punctuation.""" language = factory.getlanguage('ko') # Nothing should be translated assert language.punctranslate(u"") == u"" assert language.punctranslate(u"abc efg") == u"abc efg" assert language.punctranslate(u"abc efg.") == u"abc efg." assert language.punctranslate(u"abc efg. hij.") == u"abc efg. hij." assert language.punctranslate(u"abc efg!") == u"abc efg!" assert language.punctranslate(u"abc efg? hij!") == u"abc efg? hij!" assert language.punctranslate(u"Delete file: %s?") == u"Delete file: %s?"
def test_sentences(): """Tests basic functionality of sentence segmentation.""" language = factory.getlanguage("am") sentences = language.sentences("") assert sentences == [] sentences = language.sentences( "ለምልክቱ መግቢያ የተለየ መለያ። ይህ የሚጠቅመው የታሪኩን ዝርዝር ለማስቀመጥ ነው።" ) print(sentences) assert sentences == ["ለምልክቱ መግቢያ የተለየ መለያ።", "ይህ የሚጠቅመው የታሪኩን ዝርዝር ለማስቀመጥ ነው።"]
def test_punctranslate(): """Tests that we can translate punctuation.""" language = factory.getlanguage('km') assert language.punctranslate(u"") == u"" assert language.punctranslate(u"abc efg") == u"abc efg" assert language.punctranslate(u"abc efg.") == u"abc efg\u00a0។" print(language.punctranslate(u"abc efg. hij.").encode('utf-8')) print(u"abc efg\u00a0។ hij\u00a0។".encode('utf-8')) assert language.punctranslate(u"abc efg. hij.") == u"abc efg\u00a0។ hij\u00a0។" assert language.punctranslate(u"abc efg!") == u"abc efg\u00a0!" assert language.punctranslate(u"abc efg? hij!") == u"abc efg\u00a0? hij\u00a0!" assert language.punctranslate(u"Delete file: %s?") == u"Delete file\u00a0៖ %s\u00a0?"
def test_sentences(): """Tests basic functionality of sentence segmentation.""" language = factory.getlanguage('el') sentences = language.sentences(u"") assert sentences == [] sentences = language.sentences(u"Θέλετε να αποθηκεύσετε το παιχνίδι σας; (Θα σβησθούν οι Αυτόματες-Αποθηκεύσεις)") assert sentences == [u"Θέλετε να αποθηκεύσετε το παιχνίδι σας;", u"(Θα σβησθούν οι Αυτόματες-Αποθηκεύσεις)"] sentences = language.sentences(u"Πρώτη πρόταση. Δεύτερη πρόταση.") assert sentences == [u"Πρώτη πρόταση.", u"Δεύτερη πρόταση."] sentences = language.sentences(u"Πρώτη πρόταση. δεύτερη πρόταση.") assert sentences == [u"Πρώτη πρόταση. δεύτερη πρόταση."]
def test_punctranslate(): """Tests that we can translate punctuation.""" language = factory.getlanguage('es') assert language.punctranslate(u"") == u"" assert language.punctranslate(u"abc efg") == u"abc efg" assert language.punctranslate(u"abc efg.") == u"abc efg." assert language.punctranslate(u"abc efg?") == u"¿abc efg?" assert language.punctranslate(u"abc efg!") == u"¡abc efg!" # We have to be a bit more gentle on the code by using capitals correctly. # Can we be more robust with this witout affecting sentence segmentation? assert language.punctranslate(u"Abc efg? Hij.") == u"¿Abc efg? Hij." assert language.punctranslate(u"Abc efg! Hij.") == u"¡Abc efg! Hij."
def test_capsstart(): """Tests that the indefinite article ('n) doesn't confuse startcaps().""" language = factory.getlanguage('af') assert not language.capsstart("") assert language.capsstart("Koeie kraam koeie") assert language.capsstart("'Koeie' kraam koeie") assert not language.capsstart("koeie kraam koeie") assert language.capsstart("\n\nKoeie kraam koeie") assert language.capsstart("'n Koei kraam koeie") assert language.capsstart("'n 'Koei' kraam koeie") assert not language.capsstart("'n koei kraam koeie") assert language.capsstart("\n\n'n Koei kraam koeie")
def test_sentences(): """Tests basic functionality of sentence segmentation.""" language = factory.getlanguage("or") sentences = language.sentences("") assert sentences == [] sentences = language.sentences( "ଗୋଟିଏ ଚାବିକୁ ଆଲୋକପାତ କରିବା ପାଇଁ ମାଉସ ସୂଚକକୁ ତାହା ଉପରକୁ ଘୁଞ୍ଚାନ୍ତୁ। ଚୟନ କରିବା ପାଇଁ ଗୋଟିଏ ସୁଇଚକୁ ଦବାନ୍ତୁ।" ) assert sentences == [ "ଗୋଟିଏ ଚାବିକୁ ଆଲୋକପାତ କରିବା ପାଇଁ ମାଉସ ସୂଚକକୁ ତାହା ଉପରକୁ ଘୁଞ୍ଚାନ୍ତୁ।", "ଚୟନ କରିବା ପାଇଁ ଗୋଟିଏ ସୁଇଚକୁ ଦବାନ୍ତୁ।", ]
def test_sentences(): """Tests basic functionality of sentence segmentation.""" language = factory.getlanguage('ar') sentences = language.sentences(u"") assert sentences == [] sentences = language.sentences(u"يوجد بالفعل مجلد بالإسم \"%s\". أترغب في استبداله؟") print sentences assert sentences == [u"يوجد بالفعل مجلد بالإسم \"%s\".", u"أترغب في استبداله؟"] # This probably doesn't make sense: it is just the above reversed, to make sure # we test the '؟' as an end of sentence marker. sentences = language.sentences(u"أترغب في استبداله؟ يوجد بالفعل مجلد بالإسم \"%s\".") print sentences assert sentences == [u"أترغب في استبداله؟", u"يوجد بالفعل مجلد بالإسم \"%s\"."]
def test_punctranslate(): """Tests that we can translate punctuation.""" language = factory.getlanguage('vi') assert language.punctranslate("") == "" assert language.punctranslate("abc efg") == "abc efg" assert language.punctranslate("abc efg.") == "abc efg." assert language.punctranslate("abc efg!") == "abc efg !" assert language.punctranslate("abc efg? hij!") == "abc efg? hij !" assert language.punctranslate("Delete file: %s?") == "Delete file : %s?" assert language.punctranslate('The user "root"') == "The user «\u00a0root\u00a0»" # More exhaustive testing of the quoting is in test_fr.py assert language.punctranslate('Lưu "Tập tin"') == "Lưu «\u00a0Tập tin\u00a0»" assert language.punctranslate("Lưu 'Tập tin'") == "Lưu «\u00a0Tập tin\u00a0»" assert language.punctranslate("Lưu `Tập tin'") == "Lưu «\u00a0Tập tin\u00a0»"
def test_sentences(): """Tests basic functionality of sentence segmentation.""" language = factory.getlanguage("ar") sentences = language.sentences("") assert sentences == [] sentences = language.sentences('يوجد بالفعل مجلد بالإسم "%s". أترغب في استبداله؟') print(sentences) assert sentences == ['يوجد بالفعل مجلد بالإسم "%s".', "أترغب في استبداله؟"] # This probably doesn't make sense: it is just the above reversed, to make sure # we test the '؟' as an end of sentence marker. sentences = language.sentences('أترغب في استبداله؟ يوجد بالفعل مجلد بالإسم "%s".') print(sentences) assert sentences == ["أترغب في استبداله؟", 'يوجد بالفعل مجلد بالإسم "%s".']