def _context_of_message(self, message): """Get the context value of a message node.""" context_node = message.parentNode context_name_element = _getElementByTagName(context_node, "name") if context_name_element.firstChild: if context_name_element.firstChild.nodeValue: context_name = escape_context( [context_name_element.firstChild.nodeValue]) else: context_name = [] else: context_name = [] try: c_node = _getElementByTagName(message, "comment") comment_text = _getText(c_node.childNodes) if comment_text: comment = escape_context([comment_text]) else: comment = [] except LinguistParseError, e: comment = []
def _context_of_message(message): """Get the context value of a message node.""" context_node = message.parentNode context_name_element = _getElementByTagName(context_node, "name") if context_name_element.firstChild: if context_name_element.firstChild.nodeValue: context_name = escape_context( [context_name_element.firstChild.nodeValue]) else: context_name = [] else: context_name = [] try: c_node = _getElementByTagName(message, "comment") comment_text = _getText(c_node.childNodes) if comment_text: comment = escape_context([comment_text]) else: comment = [] except LinguistParseError, e: comment = []
def _update_plural_hashes(self, translations, content): """Update plural hashes for the target language.""" for entry in itertools.ifilter(lambda e: e.msgid_plural, self.po): plural_keys = {} # last rule excluding other(5) lang_rules = self.language.get_pluralrules_numbers() # Initialize all plural rules up to the last string_hash = hash_tag( entry.msgid, escape_context(entry.msgctxt) or '' ) for p in range(len(lang_rules)): plural_keys[p] = "%s_pl_%d" %(string_hash, p) entry.msgstr_plural = plural_keys return unicode(self.po)
for entry in self._po: pluralized = False same_nplural = True # skip obsolete entries if entry.obsolete: continue # treat fuzzy translation as nonexistent if "fuzzy" in entry.flags: if not is_source: if not entry.msgid_plural: self._add_suggestion_string( entry.msgid, entry.msgstr, context=escape_context(entry.msgctxt) or '', occurrences=self._serialize_occurrences(entry.occurrences) ) continue else: # Drop fuzzy flag from template entry.flags.remove("fuzzy") if entry.msgid_plural: pluralized = True if is_source: nplural_file = len(entry.msgstr_plural.keys()) if nplural_file != 2: raise PoParseError("Your source file is not a POT file and" " the translation file you're using has more" " than two plurals which is not supported."
# This needed to be commented out due the 'is_source' parameter. # When is_source=True we return the value of the <source> node as the # translation for the given file, instead of the <translation> node(s). #stringset.target_language = language #language = get_attribute(root, "language", die = STRICT) i = 1 # There can be many <message> elements, they might have # 'encoding' or 'numerus' = 'yes' | 'no' attributes # if 'numerus' = 'yes' then 'translation' element contains 'numerusform' elements for context in root.getElementsByTagName("context"): context_name_element = _getElementByTagName(context, "name") if context_name_element.firstChild: if context_name_element.firstChild.nodeValue: context_name = escape_context( [context_name_element.firstChild.nodeValue]) else: context_name = [] else: context_name = [] for message in context.getElementsByTagName("message"): occurrences = [] # NB! There can be zero to many <location> elements, but all # of them must have 'filename' and 'line' attributes for location in message.getElementsByTagName("location"): if location.attributes.has_key("filename") and \ location.attributes.has_key("line"): occurrences.append("%s:%i" % ( location.attributes["filename"].value,
def parse_file(self, is_source=False, lang_rules=None): """ Parses Qt file and exports all entries as GenericTranslations. """ fh = open(self.filename, "ru") buf = fh.read() fh.close() def clj(s, w): return s[:w].replace("\n", " ").ljust(w) if lang_rules: nplural = len(lang_rules) else: nplural = self.language.get_pluralrules_numbers() doc = xml.dom.minidom.parseString(buf) if hasattr(doc, 'doctype') and hasattr(doc.doctype, 'name'): if doc.doctype.name != "TS": raise LinguistParseError("Incorrect doctype!") else: raise LinguistParseError("Uploaded file has no Doctype!") root = doc.documentElement if root.tagName != "TS": raise LinguistParseError("Root element is not 'TS'") stringset = StringSet() suggestions = StringSet() # This needed to be commented out due the 'is_source' parameter. # When is_source=True we return the value of the <source> node as the # translation for the given file, instead of the <translation> node(s). #stringset.target_language = language #language = get_attribute(root, "language", die = STRICT) i = 1 # There can be many <message> elements, they might have # 'encoding' or 'numerus' = 'yes' | 'no' attributes # if 'numerus' = 'yes' then 'translation' element contains 'numerusform' elements for context in root.getElementsByTagName("context"): context_name_element = _getElementByTagName(context, "name") if context_name_element.firstChild: if context_name_element.firstChild.nodeValue: context_name = escape_context( [context_name_element.firstChild.nodeValue]) else: context_name = [] else: context_name = [] for message in context.getElementsByTagName("message"): occurrences = [] # NB! There can be zero to many <location> elements, but all # of them must have 'filename' and 'line' attributes for location in message.getElementsByTagName("location"): if location.attributes.has_key("filename") and \ location.attributes.has_key("line"): occurrences.append("%s:%i" % ( location.attributes["filename"].value, int(location.attributes["line"].value))) elif STRICT: raise LinguistParseError("Malformed 'location' element") pluralized = False if message.attributes.has_key("numerus") and \ message.attributes['numerus'].value=='yes': pluralized = True source = _getElementByTagName(message, "source") try: translation = _getElementByTagName(message, "translation") except LinguistParseError: translation = None try: ec_node = _getElementByTagName(message, "extracomment") extracomment = _getText(ec_node.childNodes) except LinguistParseError, e: extracomment = None # <commend> in ts files are also used to distinguish entries, # so we append it to the context to make the entry unique try: c_node = _getElementByTagName(message, "comment") comment_text = _getText(c_node.childNodes) if comment_text: comment = escape_context([comment_text]) else: comment = [] except LinguistParseError, e: comment = [] status = None if source.firstChild: sourceString = _getText(source.childNodes) else: sourceString = None # WTF? # Check whether the message is using logical id if message.attributes.has_key("id"): sourceStringText = sourceString sourceString = message.attributes['id'].value else: sourceStringText = None same_nplural = True obsolete, fuzzy = False, False messages = [] if is_source: if translation and translation.attributes.has_key("variants") and \ translation.attributes['variants'].value == 'yes': logger.error("Source file has unsupported" " variants.") raise LinguistParseError("Qt Linguist variants are" " not yet supported.") # Skip obsolete strings. if translation and translation.attributes.has_key("type"): status = translation.attributes["type"].value.lower() if status == "obsolete": continue translation_text = None if translation: translation_text = _getText(translation.childNodes) messages = [(5, translation_text or sourceStringText or sourceString)] # remove unfinished/obsolete attrs from template if translation and translation.attributes.has_key("type"): status = translation.attributes["type"].value.lower() if status == "unfinished": del translation.attributes["type"] if pluralized: if translation: try: numerusforms = translation.getElementsByTagName('numerusform') messages = [] for n,f in enumerate(numerusforms): if numerusforms[n].attributes.has_key("variants") and \ numerusforms[n].attributes['variants'].value == 'yes': logger.error("Source file has unsupported" " variants.") raise LinguistParseError("Source file" " could not be imported: Qt Linguist" " variants are not supported.") for n,f in enumerate(numerusforms): if numerusforms[n].attributes.has_key("variants") and \ numerusforms[n].attributes['variants'].value == 'yes': continue for n,f in enumerate(numerusforms): nf=numerusforms[n] messages.append((nplural[n], _getText(nf.childNodes) or sourceStringText or sourceString )) except LinguistParseError, e: pass else: plural_numbers = self.language.get_pluralrules_numbers() for p in plural_numbers: if p != 5: messages.append((p, sourceStringText or sourceString))
def parse_tag_trans_unit(self, trans_unit_node, is_source=False, context=[], source_string = None, rule = None): source = "" source_node = trans_unit_node.getElementsByTagName("source")[0] if len(source_node.childNodes)>1: for i in source_node.childNodes: source += i.toxml() else: source = source_node.firstChild.data if source_string: pluralized = True else: pluralized = False for node in trans_unit_node.childNodes: if node.nodeType == node.ELEMENT_NODE and node.localName == "context-group" and not source_string and not rule: context.extend(self.parse_tag_context_group(node, is_source)) # TODO prop-group, note, count-group, alt-trans # TODO seq-source context = escape_context(context) if is_source: translation = source if pluralized: source = source_string target = self.doc.createElement("target") target.childNodes = [] if source_string and rule: target.appendChild(self.doc.createTextNode( ("%(hash)s_pl_%(rule)s" % {'hash': hash_tag( source_string, context), 'rule':rule}) )) else: target.appendChild(self.doc.createTextNode( ("%(hash)s_tr" % {'hash': hash_tag( source, context)}) )) if translation and not translation.strip(): return indent_node = source_node.previousSibling.cloneNode(True) if source_node.nextSibling: trans_unit_node.insertBefore(target, source_node.nextSibling) trans_unit_node.insertBefore(indent_node, source_node.nextSibling) else: trans_unit_node.appendChild(indent_node) trans_unit_node.appendChild(target) else: if pluralized: source = source_string target_list = trans_unit_node.getElementsByTagName("target") if target_list: if len(target_list[0].childNodes)>1: translation = self._getText(target_list[0].childNodes) else: if target_list[0].firstChild: translation = target_list[0].firstChild.data else: translation = u"" else: translation = u"" if not translation: return # TODO - do something with inline elements if pluralized: self.stringset_.strings.append(GenericTranslation(source, translation, rule=rule, context=context, pluralized=True, fuzzy=False, obsolete=False)) else: self.stringset_.strings.append(GenericTranslation(source, translation, rule=5, context=context, pluralized=False, fuzzy=False, obsolete=False))
class LinguistHandler(SimpleCompilerFactory, Handler): name = "Qt4 TS parser" format = "Qt4 Translation XML files (*.ts)" method_name = 'QT' HandlerParseError = LinguistParseError HandlerCompileError = LinguistCompileError CompilerClass = QtCompiler def _escape(self, s): return xml_escape(s, {"'": "'", '"': '"'}) def _parse(self, is_source, lang_rules): """ Parses Qt file and exports all entries as GenericTranslations. """ def clj(s, w): return s[:w].replace("\n", " ").ljust(w) if lang_rules: nplural = len(lang_rules) else: nplural = self.language.get_pluralrules_numbers() try: doc = xml.dom.minidom.parseString( self.content.encode(self.format_encoding)) except Exception, e: logger.warning("QT parsing: %s" % e.message, exc_info=True) raise LinguistParseError( _("Your file doesn't seem to contain valid xml: %s!" % e.message)) if hasattr(doc, 'doctype') and hasattr(doc.doctype, 'name'): if doc.doctype.name != "TS": raise LinguistParseError(_("Incorrect doctype!")) else: raise LinguistParseError(_("Uploaded file has no Doctype!")) root = doc.documentElement if root.tagName != "TS": raise LinguistParseError(_("Root element is not 'TS'")) # This needed to be commented out due the 'is_source' parameter. # When is_source=True we return the value of the <source> node as the # translation for the given file, instead of the <translation> node(s). #stringset.target_language = language #language = get_attribute(root, "language", die = STRICT) i = 1 # There can be many <message> elements, they might have # 'encoding' or 'numerus' = 'yes' | 'no' attributes # if 'numerus' = 'yes' then 'translation' element contains 'numerusform' elements for context in root.getElementsByTagName("context"): context_name_element = _getElementByTagName(context, "name") if context_name_element.firstChild: if context_name_element.firstChild.nodeValue: context_name = escape_context( [context_name_element.firstChild.nodeValue]) else: context_name = [] else: context_name = [] for message in context.getElementsByTagName("message"): occurrences = [] # NB! There can be zero to many <location> elements, but all # of them must have 'filename' and 'line' attributes for location in message.getElementsByTagName("location"): if location.attributes.has_key("filename") and \ location.attributes.has_key("line"): occurrences.append( "%s:%i" % (location.attributes["filename"].value, int(location.attributes["line"].value))) elif STRICT: raise LinguistParseError( _("Malformed 'location' element")) pluralized = False if message.attributes.has_key("numerus") and \ message.attributes['numerus'].value=='yes': pluralized = True source = _getElementByTagName(message, "source") try: translation = _getElementByTagName(message, "translation") except LinguistParseError: translation = None try: ec_node = _getElementByTagName(message, "extracomment") extracomment = _getText(ec_node.childNodes) except LinguistParseError, e: extracomment = None # <commend> in ts files are also used to distinguish entries, # so we append it to the context to make the entry unique try: c_node = _getElementByTagName(message, "comment") comment_text = _getText(c_node.childNodes) if comment_text: comment = escape_context([comment_text]) else: comment = [] except LinguistParseError, e: comment = [] status = None if source.firstChild: sourceString = _getText(source.childNodes) else: sourceString = None # WTF? # Check whether the message is using logical id if message.attributes.has_key("id"): sourceStringText = sourceString sourceString = message.attributes['id'].value else: sourceStringText = None same_nplural = True obsolete, fuzzy = False, False messages = [] if is_source: if translation and translation.attributes.has_key("variants") and \ translation.attributes['variants'].value == 'yes': logger.error("Source file has unsupported" " variants.") raise LinguistParseError( _("Qt Linguist variants are" " not yet supported.")) # Skip obsolete strings. if translation and translation.attributes.has_key("type"): status = translation.attributes["type"].value.lower() if status == "obsolete": continue translation_text = None if translation: translation_text = _getText(translation.childNodes) messages = [(5, translation_text or sourceStringText or sourceString)] # remove unfinished/obsolete attrs from template if translation and translation.attributes.has_key("type"): status = translation.attributes["type"].value.lower() if status == "unfinished": del translation.attributes["type"] if pluralized: if translation: try: numerusforms = translation.getElementsByTagName( 'numerusform') messages = [] for n, f in enumerate(numerusforms): if numerusforms[n].attributes.has_key("variants") and \ numerusforms[n].attributes['variants'].value == 'yes': logger.error( "Source file has unsupported" " variants.") raise LinguistParseError( _("Source file" " could not be imported: Qt Linguist" " variants are not supported.")) for n, f in enumerate(numerusforms): if numerusforms[n].attributes.has_key("variants") and \ numerusforms[n].attributes['variants'].value == 'yes': continue for n, f in enumerate(numerusforms): nf = numerusforms[n] messages.append( (nplural[n], _getText(nf.childNodes) or sourceStringText or sourceString)) except LinguistParseError, e: pass else: plural_numbers = self.language.get_pluralrules_numbers( ) for p in plural_numbers: if p != 5: messages.append((p, sourceStringText or sourceString))