def _check_pofiles_content(self):
        """
            Check if by mistake we have included non Catalan language
            strings in the transtation memories
        """

        # The list of invalid chars is specific to Catalan language
        invalid_chars = {u"á", u"ñ", u"ë", u"ù", u"â", u"ê", u"î", u"ô", u"û", u"ë", u"ÿ", u"ä", u"ö"}

        try:

            THRESHOLD_PERCENTAGE = 1
            findFiles = FindFiles()
            for filename in findFiles.find(self.temp_dir, "*.po"):
                poFile = pofile(filename)

                invalid = 0
                for entry in poFile:
                    # Only localized segments. Skips developers names,
                    # untranslated country names, etc
                    if entry.msgid == entry.msgstr:
                        continue

                    for char in entry.msgstr.lower():
                        if char in invalid_chars:
                            invalid = invalid + 1

                if len(poFile) > 100 and invalid > 0:
                    percentage = 100.0 * invalid / len(poFile)
                    if percentage > THRESHOLD_PERCENTAGE:
                        self.errors = self.errors + 1
                        print "Unsual number of invalid chars at {0} ({1}%)".format(filename, str(percentage))

        except Exception as detail:
            print detail
    def _get_po_entries(self, directory):
        entries = 0
        findFiles = FindFiles()
        for filename in findFiles.find(directory, '*.po'):
            poFile = pofile(filename)
            entries += len(poFile.translated_entries())

        return entries
    def _get_po_entries(self, directory):
        entries = 0
        findFiles = FindFiles()
        for filename in findFiles.find(directory, '*.po'):
            poFile = pofile(filename)
            entries += len(poFile.translated_entries())

        return entries
Exemplo n.º 4
0
    def process(self):
        stopwords_file = open("terminology/stop-words/stop-words.txt")
        self._read_stop_words(stopwords_file)

        findFiles = FindFiles()

        f = open('corpus.txt', 'w')

        for filename in findFiles.find(self.directory, '*.po'):

            try:
                print("Reading: " + filename)

                pofile = polib.pofile(filename)

                terms = {}
                for entry in pofile.translated_entries():
                    self.strings += 1

                    msgid = self._clean_string(entry.msgid)
                    msgstr = self._clean_string(entry.msgstr)

                    if not self._should_select_string(msgid, msgstr):
                        continue

                    msgstr = self._clean_localized(msgstr)

                    self.strings_selected += 1

                    log = u'source:{0} ({1}) - target:{2} ({3}) - {4}\n'
                    log = log.format(msgid, entry.msgid, msgstr, entry.msgstr,
                                     filename)

                    f.write(log)

                    if msgid not in terms.keys():
                        translations = []
                    else:
                        translations = terms[msgid]

                    self.source_words.add(msgid)
                    translations.append(msgstr)
                    terms[msgid] = translations

                self.documents[filename] = terms
                self.files += 1
            except Exception as detail:
                logging.error("Cannot read {0}:{1}".format(
                    filename, str(detail)))

        f.close()
    def _check_number_of_files(self, tm_filename, extensions, expected_files, minimum_size):
        files = 0
        findFiles = FindFiles()
        for filename in findFiles.find(self.temp_dir, extensions):
            files = files + 1

            size = os.path.getsize(filename)
            if size < minimum_size:
                self.errors += 1
                print("File {0} has size {1} but expected was at least {2}".format(filename, size, minimum_size))

        if files != expected_files:
            self.errors += 1
            print("{0} expected {1} files but contains {2}".format(tm_filename, expected_files, files))
Exemplo n.º 6
0
    def process(self):
        stopwords_file = open("terminology/stop-words/stop-words.txt")
        self._read_stop_words(stopwords_file)

        findFiles = FindFiles()

        f = open('corpus.txt', 'w')

        for filename in findFiles.find(self.directory, '*.po'):

            try:
                print("Reading: " + filename)

                pofile = polib.pofile(filename)

                terms = {}
                for entry in pofile.translated_entries():
                    self.strings += 1

                    msgid = self._clean_string(entry.msgid)
                    msgstr = self._clean_string(entry.msgstr)

                    if not self._should_select_string(msgid, msgstr):
                        continue

                    self.strings_selected += 1

                    log = u'source:{0} ({1}) - target:{2} ({3}) - {4}\n'
                    log = log.format(msgid, entry.msgid, msgstr, entry.msgstr,
                                     filename)

                    f.write(log)

                    if msgid not in terms.keys():
                        translations = []
                    else:
                        translations = terms[msgid]

                    self.source_words.add(msgid)
                    translations.append(msgstr)
                    terms[msgid] = translations

                self.documents[filename] = terms
                self.files += 1
            except Exception as detail:
                logging.error("Cannot read {0}:{1}".format(filename, str(detail)))

        f.close()
Exemplo n.º 7
0
    def _check_pofiles_content(self):
        """
            Check if by mistake we have included non Catalan language
            strings in the transtation memories
        """

        # The list of invalid chars is specific to Catalan language
        invalid_chars = {'á', 'ñ', 'ë', 'ù', 'â', 'ê', 'î', 'ô', 'û',
                         'ë', 'ÿ', 'ä', 'ö'}

        try:

            THRESHOLD_PERCENTAGE_INVALID_CHARS = 1
            THRESHOLD_PERCENTAGE_NOT_LOCALIZED = 30
            findFiles = FindFiles()
            for filename in findFiles.find(self.temp_dir, "*.po"):
                poFile = pofile(filename)

                invalid = 0
                not_localized = 0
                for entry in poFile:
                    # Only localized segments. Skips developers names,
                    # untranslated country names, etc
                    if entry.msgid == entry.msgstr:
                        not_localized = not_localized + 1
                        continue

                    for char in entry.msgstr.lower():
                        if char in invalid_chars:
                            invalid = invalid + 1

                if len(poFile) < 100:
                    continue

                if invalid > 0:
                    percentage = 100.0 * invalid / len(poFile)
                    if percentage > THRESHOLD_PERCENTAGE_INVALID_CHARS:
                        print("Unsual number of invalid chars at {0} ({1:.2f}%)".
                              format(filename, percentage))

                if not_localized > 0:
                    percentage = 100.0 * not_localized / len(poFile)
                    if percentage > THRESHOLD_PERCENTAGE_NOT_LOCALIZED:
                        print("Unsual number of untranslated strings at {0} ({1:.2f}%)".
                              format(filename, percentage))

        except Exception as detail:
            print(detail)
Exemplo n.º 8
0
    def _check_number_of_files(self, tm_filename, extensions, expected_files,
                               minimum_size):
        files = 0
        findFiles = FindFiles()
        for filename in findFiles.find(self.temp_dir, extensions):
            files = files + 1

            size = os.path.getsize(filename)
            if size < minimum_size:
                self.errors += 1
                print('File {0} has size {1} but expected was at least {2}'.
                      format(filename, size, minimum_size))

        if files != expected_files:
            self.errors += 1
            print('{0} expected {1} files but contains {2}'.format(
                tm_filename, expected_files, files))
Exemplo n.º 9
0
    def process(self):
        stopwords_file = open("stop-words/stop-words.txt")
        self._read_stop_words(stopwords_file)

        findFiles = FindFiles()

        f = open('corpus.txt', 'w')

        for filename in findFiles.find(self.directory, '*.po'):
            print("Reading: " + filename)

            pofile = polib.pofile(filename)

            terms = {}
            for entry in pofile.translated_entries():
                self.strings += 1

                msgid = self._clean_string(entry.msgid)
                msgstr = self._clean_string(entry.msgstr)

                if not self._should_select_string(msgid, msgstr):
                    continue

                self.strings_selected += 1

                log = u'source:{0} ({1}) - target:{2} ({3}) - {4}\n'
                log = log.format(msgid, entry.msgid, msgstr, entry.msgstr,
                                 filename)

                f.write(log.encode('utf-8'))

                if not msgid in terms.keys():
                    translations = []
                else:
                    translations = terms[msgid]

                self.source_words.add(msgid)
                translations.append(msgstr)
                terms[msgid] = translations

            self.documents[filename] = terms
            self.files += 1

        f.close()
Exemplo n.º 10
0
    def process(self):
        stopwords_file = open("terminology/stop-words/stop-words.txt")
        self._read_stop_words(stopwords_file)

        findFiles = FindFiles()

        f = open("corpus.txt", "w")

        for filename in findFiles.find(self.directory, "*.po"):
            print("Reading: " + filename)

            pofile = polib.pofile(filename)

            terms = {}
            for entry in pofile.translated_entries():
                self.strings += 1

                msgid = self._clean_string(entry.msgid)
                msgstr = self._clean_string(entry.msgstr)

                if not self._should_select_string(msgid, msgstr):
                    continue

                self.strings_selected += 1

                log = u"source:{0} ({1}) - target:{2} ({3}) - {4}\n"
                log = log.format(msgid, entry.msgid, msgstr, entry.msgstr, filename)

                f.write(log)

                if not msgid in terms.keys():
                    translations = []
                else:
                    translations = terms[msgid]

                self.source_words.add(msgid)
                translations.append(msgstr)
                terms[msgid] = translations

            self.documents[filename] = terms
            self.files += 1

        f.close()
Exemplo n.º 11
0
    def _check_pofiles_content(self):
        """
            Check if by mistake we have included non Catalan language
            strings in the transtation memories
        """

        # The list of invalid chars is specific to Catalan language
        invalid_chars = {
            u'á', u'ñ', u'ë', u'ù', u'â', u'ê', u'î', u'ô', u'û', u'ë', u'ÿ',
            u'ä', u'ö'
        }

        try:

            THRESHOLD_PERCENTAGE = 1
            findFiles = FindFiles()
            for filename in findFiles.find(self.temp_dir, "*.po"):
                poFile = pofile(filename)

                invalid = 0
                for entry in poFile:
                    # Only localized segments. Skips developers names,
                    # untranslated country names, etc
                    if entry.msgid == entry.msgstr:
                        continue

                    for char in entry.msgstr.lower():
                        if char in invalid_chars:
                            invalid = invalid + 1

                if len(poFile) > 100 and invalid > 0:
                    percentage = 100.0 * invalid / len(poFile)
                    if percentage > THRESHOLD_PERCENTAGE:
                        self.errors = self.errors + 1
                        print "Unsual number of invalid chars at {0} ({1}%)".\
                              format(filename, str(percentage))

        except Exception as detail:
            print detail
 def _clean_pos(self, directory):
     findFiles = FindFiles()
     for filename in findFiles.find(directory, '*.po'):
         remove(filename)
 def _clean_pos(self, directory):
     findFiles = FindFiles()
     for filename in findFiles.find(directory, '*.po'):
         remove(filename)