def _check_pofiles_content(self): """ Check if by mistake we have included non Catalan language strings in the transtation memories """ # The list of invalid chars is specific to Catalan language invalid_chars = {u"á", u"ñ", u"ë", u"ù", u"â", u"ê", u"î", u"ô", u"û", u"ë", u"ÿ", u"ä", u"ö"} try: THRESHOLD_PERCENTAGE = 1 findFiles = FindFiles() for filename in findFiles.find(self.temp_dir, "*.po"): poFile = pofile(filename) invalid = 0 for entry in poFile: # Only localized segments. Skips developers names, # untranslated country names, etc if entry.msgid == entry.msgstr: continue for char in entry.msgstr.lower(): if char in invalid_chars: invalid = invalid + 1 if len(poFile) > 100 and invalid > 0: percentage = 100.0 * invalid / len(poFile) if percentage > THRESHOLD_PERCENTAGE: self.errors = self.errors + 1 print "Unsual number of invalid chars at {0} ({1}%)".format(filename, str(percentage)) except Exception as detail: print detail
def _get_po_entries(self, directory): entries = 0 findFiles = FindFiles() for filename in findFiles.find(directory, '*.po'): poFile = pofile(filename) entries += len(poFile.translated_entries()) return entries
def _get_po_entries(self, directory): entries = 0 findFiles = FindFiles() for filename in findFiles.find(directory, '*.po'): poFile = pofile(filename) entries += len(poFile.translated_entries()) return entries
def process(self): stopwords_file = open("terminology/stop-words/stop-words.txt") self._read_stop_words(stopwords_file) findFiles = FindFiles() f = open('corpus.txt', 'w') for filename in findFiles.find(self.directory, '*.po'): try: print("Reading: " + filename) pofile = polib.pofile(filename) terms = {} for entry in pofile.translated_entries(): self.strings += 1 msgid = self._clean_string(entry.msgid) msgstr = self._clean_string(entry.msgstr) if not self._should_select_string(msgid, msgstr): continue msgstr = self._clean_localized(msgstr) self.strings_selected += 1 log = u'source:{0} ({1}) - target:{2} ({3}) - {4}\n' log = log.format(msgid, entry.msgid, msgstr, entry.msgstr, filename) f.write(log) if msgid not in terms.keys(): translations = [] else: translations = terms[msgid] self.source_words.add(msgid) translations.append(msgstr) terms[msgid] = translations self.documents[filename] = terms self.files += 1 except Exception as detail: logging.error("Cannot read {0}:{1}".format( filename, str(detail))) f.close()
def _check_number_of_files(self, tm_filename, extensions, expected_files, minimum_size): files = 0 findFiles = FindFiles() for filename in findFiles.find(self.temp_dir, extensions): files = files + 1 size = os.path.getsize(filename) if size < minimum_size: self.errors += 1 print("File {0} has size {1} but expected was at least {2}".format(filename, size, minimum_size)) if files != expected_files: self.errors += 1 print("{0} expected {1} files but contains {2}".format(tm_filename, expected_files, files))
def process(self): stopwords_file = open("terminology/stop-words/stop-words.txt") self._read_stop_words(stopwords_file) findFiles = FindFiles() f = open('corpus.txt', 'w') for filename in findFiles.find(self.directory, '*.po'): try: print("Reading: " + filename) pofile = polib.pofile(filename) terms = {} for entry in pofile.translated_entries(): self.strings += 1 msgid = self._clean_string(entry.msgid) msgstr = self._clean_string(entry.msgstr) if not self._should_select_string(msgid, msgstr): continue self.strings_selected += 1 log = u'source:{0} ({1}) - target:{2} ({3}) - {4}\n' log = log.format(msgid, entry.msgid, msgstr, entry.msgstr, filename) f.write(log) if msgid not in terms.keys(): translations = [] else: translations = terms[msgid] self.source_words.add(msgid) translations.append(msgstr) terms[msgid] = translations self.documents[filename] = terms self.files += 1 except Exception as detail: logging.error("Cannot read {0}:{1}".format(filename, str(detail))) f.close()
def _check_pofiles_content(self): """ Check if by mistake we have included non Catalan language strings in the transtation memories """ # The list of invalid chars is specific to Catalan language invalid_chars = {'á', 'ñ', 'ë', 'ù', 'â', 'ê', 'î', 'ô', 'û', 'ë', 'ÿ', 'ä', 'ö'} try: THRESHOLD_PERCENTAGE_INVALID_CHARS = 1 THRESHOLD_PERCENTAGE_NOT_LOCALIZED = 30 findFiles = FindFiles() for filename in findFiles.find(self.temp_dir, "*.po"): poFile = pofile(filename) invalid = 0 not_localized = 0 for entry in poFile: # Only localized segments. Skips developers names, # untranslated country names, etc if entry.msgid == entry.msgstr: not_localized = not_localized + 1 continue for char in entry.msgstr.lower(): if char in invalid_chars: invalid = invalid + 1 if len(poFile) < 100: continue if invalid > 0: percentage = 100.0 * invalid / len(poFile) if percentage > THRESHOLD_PERCENTAGE_INVALID_CHARS: print("Unsual number of invalid chars at {0} ({1:.2f}%)". format(filename, percentage)) if not_localized > 0: percentage = 100.0 * not_localized / len(poFile) if percentage > THRESHOLD_PERCENTAGE_NOT_LOCALIZED: print("Unsual number of untranslated strings at {0} ({1:.2f}%)". format(filename, percentage)) except Exception as detail: print(detail)
def _check_number_of_files(self, tm_filename, extensions, expected_files, minimum_size): files = 0 findFiles = FindFiles() for filename in findFiles.find(self.temp_dir, extensions): files = files + 1 size = os.path.getsize(filename) if size < minimum_size: self.errors += 1 print('File {0} has size {1} but expected was at least {2}'. format(filename, size, minimum_size)) if files != expected_files: self.errors += 1 print('{0} expected {1} files but contains {2}'.format( tm_filename, expected_files, files))
def process(self): stopwords_file = open("stop-words/stop-words.txt") self._read_stop_words(stopwords_file) findFiles = FindFiles() f = open('corpus.txt', 'w') for filename in findFiles.find(self.directory, '*.po'): print("Reading: " + filename) pofile = polib.pofile(filename) terms = {} for entry in pofile.translated_entries(): self.strings += 1 msgid = self._clean_string(entry.msgid) msgstr = self._clean_string(entry.msgstr) if not self._should_select_string(msgid, msgstr): continue self.strings_selected += 1 log = u'source:{0} ({1}) - target:{2} ({3}) - {4}\n' log = log.format(msgid, entry.msgid, msgstr, entry.msgstr, filename) f.write(log.encode('utf-8')) if not msgid in terms.keys(): translations = [] else: translations = terms[msgid] self.source_words.add(msgid) translations.append(msgstr) terms[msgid] = translations self.documents[filename] = terms self.files += 1 f.close()
def process(self): stopwords_file = open("terminology/stop-words/stop-words.txt") self._read_stop_words(stopwords_file) findFiles = FindFiles() f = open("corpus.txt", "w") for filename in findFiles.find(self.directory, "*.po"): print("Reading: " + filename) pofile = polib.pofile(filename) terms = {} for entry in pofile.translated_entries(): self.strings += 1 msgid = self._clean_string(entry.msgid) msgstr = self._clean_string(entry.msgstr) if not self._should_select_string(msgid, msgstr): continue self.strings_selected += 1 log = u"source:{0} ({1}) - target:{2} ({3}) - {4}\n" log = log.format(msgid, entry.msgid, msgstr, entry.msgstr, filename) f.write(log) if not msgid in terms.keys(): translations = [] else: translations = terms[msgid] self.source_words.add(msgid) translations.append(msgstr) terms[msgid] = translations self.documents[filename] = terms self.files += 1 f.close()
def _check_pofiles_content(self): """ Check if by mistake we have included non Catalan language strings in the transtation memories """ # The list of invalid chars is specific to Catalan language invalid_chars = { u'á', u'ñ', u'ë', u'ù', u'â', u'ê', u'î', u'ô', u'û', u'ë', u'ÿ', u'ä', u'ö' } try: THRESHOLD_PERCENTAGE = 1 findFiles = FindFiles() for filename in findFiles.find(self.temp_dir, "*.po"): poFile = pofile(filename) invalid = 0 for entry in poFile: # Only localized segments. Skips developers names, # untranslated country names, etc if entry.msgid == entry.msgstr: continue for char in entry.msgstr.lower(): if char in invalid_chars: invalid = invalid + 1 if len(poFile) > 100 and invalid > 0: percentage = 100.0 * invalid / len(poFile) if percentage > THRESHOLD_PERCENTAGE: self.errors = self.errors + 1 print "Unsual number of invalid chars at {0} ({1}%)".\ format(filename, str(percentage)) except Exception as detail: print detail
def _clean_pos(self, directory): findFiles = FindFiles() for filename in findFiles.find(directory, '*.po'): remove(filename)
def _clean_pos(self, directory): findFiles = FindFiles() for filename in findFiles.find(directory, '*.po'): remove(filename)