def _parse(self, is_source, lang_rules): resource = self.resource context = "" text = self.content name_start_char = u':A-Z_a-z\xC0-\xD6\xD8-\xF6\xF8-\u02FF' + \ u'\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF'+\ u'\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD' name_char = name_start_char + ur'\-\.0-9' + u'\xB7\u0300-\u036F\u203F-\u2040' name = u'[' + name_start_char + u'][' + name_char + u']*' re_entity = u'<!ENTITY\s+(' + name + u')\s+((?:\"[^\"]*\")|(?:\'[^\']*\'))\s*>' re_comment = u'\<!\s*--(.*?)(?:--\s*\>)' re_tag = re.compile("(%s|%s)" % (re_entity, re_comment), re.M|re.S|re.U) latest_comment = "" for (orig, key, value, comment) in re.findall(re_tag, text): if key: self.stringset.strings.append(GenericTranslation(key, self._unescape(value[1:-1]), rule=5, # no plural forms context=context, comment=latest_comment, pluralized=False, fuzzy=False, obsolete=False)) if is_source: hashed_entity = orig.replace(value, '"%(hash)s_tr"' % {'hash': hash_tag(key, context)} ) text = text.replace(orig, hashed_entity) latest_comment = "" if comment: latest_comment = comment return text
def _parse(self, is_source, lang_rules): """Parse a .properties content and create a stringset with all entries in it. """ resource = self.resource context = "" self._find_linesep(self.content) template = u"" lines = self._iter_by_line(self.content) comment_lines = [] for line in lines: line = self._prepare_line(line) # Skip empty lines and comments if not line or line.startswith(self.comment_chars): if is_source: template += line + self.linesep if not line: # Reset comment block to zero, if newline happened. # That is to omit start Licence texts and such comment_lines = [] else: # this is a comment, add it to the block comment_lines.append(line[1:]) continue # If the last character is a backslash # it has to be preceded by a space in which # case the next line is read as part of the # same property while line[-1] == '\\' and not self._is_escaped(line, -1): # Read next line nextline = self._prepare_line(lines.next()) # This line will become part of the value line = line[:-1] + self._check_escaped_ws(nextline) key, value, old_value = self._key_value_from_line(line) if is_source: if not (value and value.strip()): template += line + self.linesep # Keys with no values should not be shown to translator continue else: key_len = len(key) template += line[:key_len] + re.sub( re.escape(old_value), "%(hash)s_tr" % {'hash': hash_tag(key, context)}, line[key_len:] ) + self.linesep elif not SourceEntity.objects.filter(resource=resource, string=key).exists(): # ignore keys with no translation continue self.stringset.add(GenericTranslation( key, self._unescape(value), context=context, comment="\n".join(comment_lines), )) # reset comment block, it has already been written comment_lines = [] if is_source: template = template[:-1*(len(self.linesep))] return template
def parse_file(self, is_source=False, lang_rules=None): """ Parse an INI file and create a stringset with all entries in the file. """ stringset = StringSet() suggestions = StringSet() fh = codecs.open(self.filename, "r", "utf-8") try: buf = fh.read() finally: fh.close() for line in buf.split('\n'): # Skip empty lines and comments if not line or line.startswith(self.comment_chars): continue try: source, trans = line.split('=', 1) except ValueError: # Maybe abort instead of skipping? logger.error('Could not parse line "%s". Skipping...' % line) continue # In versions >=1.6 translations are surrounded by double quotes. So remove them # Normally, if the translation starts with '"', it is a 1.6-file and must # end with '"', since translations starting with '"' are not allowed in 1.5. # But, let's check both the first and last character of the translation to be safe. if trans.startswith('"') and trans.endswith('"'): trans = trans[1:-1] # We use empty context context = "" if is_source: source_len = len(source) new_line = line[:source_len] + re.sub( re.escape(trans), "%(hash)s_tr" % {'hash': hash_tag(source, context)}, line[source_len:] ) # this looks fishy buf = re.sub(re.escape(line), new_line, buf) stringset.strings.append(GenericTranslation(source, trans, rule=5, context=context, pluralized=False, fuzzy=False, obsolete=False)) self.stringset=stringset self.suggestions=suggestions if is_source: self.template = str(buf.encode('utf-8'))
def _parse(self, is_source, lang_rules): """ Parse an INI file and create a stringset with all entries in the file. """ content = self.content self.jformat = JoomlaIniVersion.create(self.content) self._find_linesep(content) comment = "" buf = '' for line in self._iter_by_line(content): # Skip empty lines and comments if not line or line.startswith(self.comment_chars): if is_source: buf += line + self.linesep if line.startswith(self.comment_chars): comment = line[1:] + self.linesep else: comment = "" continue try: source, trans = line.split('=', 1) except ValueError: # Maybe abort instead of skipping? logger.warning('Could not parse line "%s". Skipping...' % line) continue escaped_trans = self.jformat.get_translation(trans) if isinstance(self.jformat, JoomlaIniNew): trans = trans[1:-1] context = "" # We use empty context if is_source: if not trans.strip(): buf += line + self.linesep continue source_len = len(source) new_line = line[:source_len] + re.sub( re.escape(trans), "%(hash)s_tr" % {'hash': hash_tag(source, context)}, line[source_len:] ) buf += new_line + self.linesep elif not SourceEntity.objects.filter(resource=self.resource, string=source).exists()\ or not escaped_trans.strip(): #ignore keys with no translation context="" continue self._add_translation_string(source, self._unescape(escaped_trans), context=context, comment=comment) comment = "" return buf[:buf.rfind(self.linesep)]
def _update_plural_hashes(self, translations, content): """Update plural hashes for the target language.""" for entry in itertools.ifilter(lambda e: e.msgid_plural, self.po): plural_keys = {} # last rule excluding other(5) lang_rules = self.language.get_pluralrules_numbers() # Initialize all plural rules up to the last string_hash = hash_tag( entry.msgid, escape_context(entry.msgctxt) or '' ) for p in range(len(lang_rules)): plural_keys[p] = "%s_pl_%d" %(string_hash, p) entry.msgstr_plural = plural_keys return unicode(self.po)
def _parse(self, is_source, lang_rules): """Parse a .properties content and create a stringset with all entries in it. """ resource = self.resource context = "" self._find_linesep(self.content) template = u"" lines = self._iter_by_line(self.content) for line in lines: line = self._prepare_line(line) # Skip empty lines and comments if not line or line.startswith(self.comment_chars): if is_source: template += line + self.linesep continue # If the last character is a backslash # it has to be preceded by a space in which # case the next line is read as part of the # same property while line[-1] == '\\' and not self._is_escaped(line, -1): # Read next line nextline = self._prepare_line(lines.next()) # This line will become part of the value line = line[:-1] + self._check_escaped_ws(nextline) key, value, old_value = self._key_value_from_line(line) if is_source: if not (value and value.strip()): template += line + self.linesep # Keys with no values should not be shown to translator continue else: key_len = len(key) template += line[:key_len] + re.sub( re.escape(old_value), "%(hash)s_tr" % {'hash': hash_tag(key, context)}, line[key_len:]) + self.linesep elif not SourceEntity.objects.filter(resource=resource, string=key).exists(): # ignore keys with no translation continue self.stringset.add( GenericTranslation(key, self._unescape(value), context=context)) if is_source: template = template[:-1 * (len(self.linesep))] return template
def _parse(self, is_source, lang_rules): """Parse a .properties content and create a stringset with all entries in it. """ resource = self.resource context = "" self._find_linesep(self.content) template = u"" lines = self._iter_by_line(self.content) for line in lines: line = self._prepare_line(line) # Skip empty lines and comments if not line or line.startswith(self.comment_chars): if is_source: template += line + self.linesep continue # If the last character is a backslash # it has to be preceded by a space in which # case the next line is read as part of the # same property while line[-1] == "\\" and not self._is_escaped(line, -1): # Read next line nextline = self._prepare_line(lines.next()) # This line will become part of the value line = line[:-1] + self._check_escaped_ws(nextline) key, value, old_value = self._key_value_from_line(line) if is_source: if not (value and value.strip()): template += line + self.linesep # Keys with no values should not be shown to translator continue else: key_len = len(key) template += ( line[:key_len] + re.sub(re.escape(old_value), "%(hash)s_tr" % {"hash": hash_tag(key, context)}, line[key_len:]) + self.linesep ) elif not SourceEntity.objects.filter(resource=resource, string=key).exists(): # ignore keys with no translation continue self.stringset.add(GenericTranslation(key, self._unescape(value), context=context)) if is_source: template = template[: -1 * (len(self.linesep))] return template
def compare_to_actual_file(self, handler, actual_file): template = handler.template compiler = handler.CompilerClass(handler.resource) compiler._tdecorator = Decorator(escape_func=handler._escape) compiler._examine_content(handler.template) compiler.language = handler.language sources = [(idx, "%s" % hash_tag(s.source_entity, "")) for idx, s in enumerate(handler.stringset)] translations = dict([(idx, s.translation) for idx, s in enumerate(handler.stringset)]) with patch.object(compiler, '_get_source_strings') as smock: with patch.object(compiler, '_tset', create=True) as tmock: smock.return_value = sources tmock.return_value = translations compiler._compile(handler.template) template = compiler.compiled_template with open(actual_file, 'r') as f: actual_content = f.read() self.assertEquals(template, actual_content)
def _parse(self, is_source, lang_rules): self._find_linesep(self.content) par_splitter = self.linesep + self.linesep template_open = "{{" template_ends = "}}" template = self.content context = '' prev_split_pos = 0 prev_text_pos = 0 while 1: par_pos = self.content.find(par_splitter, prev_split_pos) t_open_pos = self.content.find(template_open, prev_split_pos) if prev_text_pos == -1: break elif par_pos == -1 and t_open_pos == -1: # end of document source = trans = self.content[prev_text_pos:].strip() prev_text_pos = -1 elif par_pos < t_open_pos or t_open_pos == -1: source = trans = self.content[prev_text_pos:par_pos].strip() if par_pos == -1: prev_split_pos = prev_text_pos = -1 else: prev_split_pos = prev_text_pos = par_pos + 2 else: t_end_pos = self.content.find(template_ends, prev_split_pos + 1) prev_split_pos = t_end_pos continue if not source.strip('\n'): continue source_len = len(source) template = re.sub( re.escape(trans), "%(hash)s_tr" % {'hash': hash_tag(source, context)}, template ) self.stringset.add(GenericTranslation( source, trans, context=context )) return template
def _parse(self, is_source, lang_rules): self._find_linesep(self.content) par_splitter = self.linesep + self.linesep template_open = "{{" template_ends = "}}" template = self.content context = '' prev_split_pos = 0 prev_text_pos = 0 while 1: par_pos = self.content.find(par_splitter, prev_split_pos) t_open_pos = self.content.find(template_open, prev_split_pos) if prev_text_pos == -1: break elif par_pos == -1 and t_open_pos == -1: # end of document source = trans = self.content[prev_text_pos:].strip() prev_text_pos = -1 elif par_pos < t_open_pos or t_open_pos == -1: source = trans = self.content[prev_text_pos:par_pos].strip() if par_pos == -1: prev_split_pos = prev_text_pos = -1 else: prev_split_pos = prev_text_pos = par_pos + 2 else: t_end_pos = self.content.find(template_ends, prev_split_pos + 1) prev_split_pos = t_end_pos continue if not source.strip('\n'): continue source_len = len(source) template = re.sub( re.escape(trans), "%(hash)s_tr" % {'hash': hash_tag(source, context)}, template) self.stringset.add( GenericTranslation(source, trans, context=context)) return template
def _parse(self, is_source, lang_rules): resource = self.resource context = "" text = self.content name_start_char = u':A-Z_a-z\xC0-\xD6\xD8-\xF6\xF8-\u02FF' + \ u'\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF'+\ u'\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD' name_char = name_start_char + ur'\-\.0-9' + u'\xB7\u0300-\u036F\u203F-\u2040' name = u'[' + name_start_char + u'][' + name_char + u']*' re_entity = u'<!ENTITY\s+(' + name + u')\s+((?:\"[^\"]*\")|(?:\'[^\']*\'))\s*>' re_comment = u'\<!\s*--(.*?)(?:--\s*\>)' re_tag = re.compile("(%s|%s)" % (re_entity, re_comment), re.M | re.S | re.U) latest_comment = "" for (orig, key, value, comment) in re.findall(re_tag, text): if key: self.stringset.add( GenericTranslation( key, self._unescape(value[1:-1]), rule=5, # no plural forms context=context, comment=latest_comment, pluralized=False, fuzzy=False, obsolete=False)) if is_source: hashed_entity = orig.replace( value, '"%(hash)s_tr"' % {'hash': hash_tag(key, context)}) text = text.replace(orig, hashed_entity) latest_comment = "" if comment: latest_comment = comment return text
def compare_to_actual_file(self, handler, actual_file): template = handler.template compiler = handler.CompilerClass(handler.resource) compiler._tdecorator = Decorator(escape_func=handler._escape) compiler._examine_content(handler.template) compiler.language = handler.language sources = [ (idx, "%s" % hash_tag(s.source_entity, "")) for idx, s in enumerate(handler.stringset) ] translations = dict([ (idx, s.translation) for idx, s in enumerate(handler.stringset) ]) with patch.object(compiler, '_get_source_strings') as smock: with patch.object(compiler, '_tset', create=True) as tmock: smock.return_value = sources tmock.return_value = translations compiler._compile(handler.template) template = compiler.compiled_template with open(actual_file, 'r') as f: actual_content = f.read() self.assertEquals(template, actual_content)
obsolete=obsolete) i += 1 if is_source: if sourceString is None: continue if message.attributes.has_key("numerus") and \ message.attributes['numerus'].value=='yes' and translation: numerusforms = translation.getElementsByTagName( 'numerusform') for n, f in enumerate(numerusforms): f.appendChild( doc.createTextNode( "%(hash)s_pl_%(key)s" % { 'hash': hash_tag(sourceString, context_name + comment), 'key': n })) else: if not translation: translation = doc.createElement("translation") # Delete all child nodes. This is usefull for xml like # strings (eg html) where the translation text is split # in multiple nodes. translation.childNodes = [] translation.appendChild( doc.createTextNode(("%(hash)s_tr" % { 'hash':
def _parse(self, is_source, lang_rules): """Parse an apple .strings file and create a stringset with all entries in the file. See http://developer.apple.com/library/mac/#documentation/MacOSX/Conceptual/BPInternational/Articles/StringsFiles.html for details. """ resource = self.resource context = "" f = self.content p = re.compile( r'(?P<line>(("(?P<key>[^"\\]*(?:\\.[^"\\]*)*)")|(?P<property>\w+))\s*=\s*"(?P<value>[^"\\]*(?:\\.[^"\\]*)*)"\s*;)', re.U, ) c = re.compile(r"\s*/\*(.|\s)*?\*/\s*", re.U) ws = re.compile(r"\s+", re.U) buf = u"" end = 0 start = 0 for i in p.finditer(f): start = i.start() end_ = i.end() line = i.group("line") key = i.group("key") if not key: key = i.group("property") value = i.group("value") while end < start: m = c.match(f, end, start) or ws.match(f, end, start) if not m or m.start() != end: raise StringsParseError("Invalid syntax.") if is_source: buf += f[end : m.end()] end = m.end() end = end_ if is_source: if not value.strip(): buf += line continue else: line = f[start:end] buf += line[0 : i.start("value") - start] buf += re.sub( re.escape(value), "%(hash)s_tr" % {"hash": hash_tag(key, context)}, line[i.start("value") - start : i.end("value") - start], ) buf += line[i.end("value") - start :] elif not SourceEntity.objects.filter(resource=resource, string=key).exists() or not value.strip(): # ignore keys with no translation continue self.stringset.strings.append( GenericTranslation( key, self._unescape(value), rule=5, context=context, pluralized=False, fuzzy=False, obsolete=False ) ) while len(f[end:]): m = c.match(f, end) or ws.match(f, end) if not m or m.start() != end: raise StringsParseError("Invalid syntax.") if is_source: buf += f[end : m.end()] end = m.end() if end == 0: break return buf
def _parse(self, is_source, lang_rules): """Parse a .properties content and create a stringset with all entries in it. """ resource = self.resource context = "" self._find_linesep(self.content) template = u"" key_dict = {} rule = 5 lines = self._iter_by_line(self.content) for line in lines: line = self._prepare_line(line) # Skip empty lines and comments if not line or line.startswith(self.comment_chars): if is_source: template += line + self.linesep continue # If the last character is a backslash # it has to be preceded by a space in which # case the next line is read as part of the # same property while line[-1] == '\\' and not self._is_escaped(line, -1): # Read next line nextline = self._prepare_line(lines.next()) # This line will become part of the value line = line[:-1] + self._prepare_line(nextline) key, value = self._split(line) self._visit_value(value) if is_source: if not value: template += line + self.linesep # Keys with no values should not be shown to translator continue else: key_len = len(key) template += line[:key_len] + re.sub( re.escape(value), "%(hash)s_tr" % {'hash': hash_tag(key, context)}, line[key_len:] ) + self.linesep elif not SourceEntity.objects.filter(resource=resource, string=key).exists(): # ignore keys with no translation continue if key in key_dict and key_dict[key].get(rule, None): g = GenericTranslation(key, self._unescape( key_dict[key][rule]['translation']), context=key_dict[key][rule]['context']) self.stringset.strings.remove(g) else: if key in key_dict: key_dict[key][rule] = { 'translation':self._unescape(value), 'context': context } else: key_dict[key] = { rule: { 'translation': self._unescape(value), 'context': context } } self._add_translation_string( key, self._unescape(value), context=context ) return template
comment = None if entry.flags: flags = ', '.join( f for f in entry.flags) else: flags = None context=escape_context(entry.msgctxt) or '' self._add_translation_string( entry.msgid, msgstr[1], context=context, occurrences=self._serialize_occurrences(entry.occurrences), rule=msgstr[0], pluralized=pluralized, comment=comment, flags=flags ) if is_source: entry.msgstr = "%(hash)s_tr" % { 'hash': hash_tag(entry.msgid, context) } if entry.msgid_plural: for n, rule in enumerate(plural_keys): entry.msgstr_plural['%s' % n] = ( "%(hash)s_pl_%(key)s" % { 'hash':hash_tag(entry.msgid, context), 'key':n } ) return self._po def _generate_template(self, po): return self.get_po_contents(po)
occurrences = ";".join(occurrences), pluralized=pluralized, fuzzy=fuzzy, comment=extracomment, obsolete=obsolete) i += 1 if is_source: if sourceString is None: continue if message.attributes.has_key("numerus") and \ message.attributes['numerus'].value=='yes' and translation: numerusforms = translation.getElementsByTagName('numerusform') for n,f in enumerate(numerusforms): f.appendChild(doc.createTextNode( "%(hash)s_pl_%(key)s" % { 'hash': hash_tag(sourceString, context_name + comment), 'key': n } )) else: if not translation: translation = doc.createElement("translation") # Delete all child nodes. This is usefull for xml like # strings (eg html) where the translation text is split # in multiple nodes. translation.childNodes = [] translation.appendChild(doc.createTextNode( ("%(hash)s_tr" % {'hash': hash_tag( sourceString, context_name + comment)})
def parse_file(self, is_source=False, lang_rules=None): """ Parse a java .properties file and create a stringset with all entries in the file. See http://download.oracle.com/javase/1.4.2/docs/api/java/util/PropertyResourceBundle.html, http://download.oracle.com/javase/1.4.2/docs/api/java/util/Properties.html#encoding and http://download.oracle.com/javase/1.4.2/docs/api/java/util/Properties.html#load(java.io.InputStream) for details. """ resource = self.resource stringset = StringSet() suggestions = StringSet() context = "" fh = open(self.filename, "r") try: self.find_linesep(fh) buf = u"" for line in fh: line = line.decode(self.ENCODING) line = self._prepare_line(line) # Skip empty lines and comments if not line or line.startswith(self.COMMENT_CHARS): if is_source: buf += line + self._linesep continue # If the last character is a backslash # it has to be preceded by a space in which # case the next line is read as part of the # same property while line[-1] == '\\' and not self._is_escaped(line, -1): # Read next line nextline = self._prepare_line(fh.next()) # This line will become part of the value line = line[:-1] + self._prepare_line(nextline) key, value = self._split(line) if is_source: if not value: buf += line + self._linesep # Keys with no values should not be shown to translator continue else: key_len = len(key) buf += line[:key_len] + re.sub( re.escape(value), "%(hash)s_tr" % {'hash': hash_tag(key, context)}, line[key_len:] ) + self._linesep elif not SourceEntity.objects.filter(resource=resource, string=key).exists(): # ignore keys with no translation continue stringset.strings.append(GenericTranslation(key, self._unescape(value), rule=5, context=context, pluralized=False, fuzzy=False, obsolete=False)) except UnicodeDecodeError, e: # raise JavaParseError( # 'Java .proeprties files must be in %s encoding.' % self.ENCODING # ) raise JavaParseError(e.message)
def parse_file(self, is_source=False, lang_rules=None): """ Parse an apple .strings file and create a stringset with all entries in the file. See http://developer.apple.com/library/mac/#documentation/MacOSX/Conceptual/BPInternational/Articles/StringsFiles.html for details. """ resource = self.resource stringset = StringSet() suggestions = StringSet() context = "" fh = open(self.filename, "r") p = re.compile(r'(?P<line>(("(?P<key>[^"\\]*(?:\\.[^"\\]*)*)")|(?P<property>\w+))\s*=\s*"(?P<value>[^"\\]*(?:\\.[^"\\]*)*)"\s*;)', re.U) c = re.compile(r'\s*/\*(.|\s)*?\*/\s*', re.U) ws = re.compile(r'\s+', re.U) try: f = fh.read() if chardet.detect(f)['encoding'].startswith('UTF-16'): f = f.decode('utf-16') else: f = f.decode(self.ENCODING) buf = u"" end=0 start = 0 for i in p.finditer(f): start = i.start() end_ = i.end() line = i.group('line') key = i.group('key') if not key: key = i.group('property') value = i.group('value') while end < start: m = c.match(f, end, start) or ws.match(f, end, start) if not m or m.start() != end: raise StringsParseError("Invalid syntax.") if is_source: buf += f[end:m.end()] end = m.end() end = end_ if is_source: if not value.strip(): buf += line continue else: line = f[start:end] value = f[i.start('value'):i.end('value')] buf += re.sub( re.escape(value), "%(hash)s_tr" % {'hash': hash_tag(key, context)}, line ) elif not SourceEntity.objects.filter(resource=resource, string=key).exists() or not value.strip(): # ignore keys with no translation continue stringset.strings.append(GenericTranslation(key, self._unescape(value), rule=5, context=context, pluralized=False, fuzzy=False, obsolete=False)) while len(f[end:]): m = c.match(f, end) or ws.match(f, end) if not m or m.start() != end: raise StringsParseError("Invalid syntax.") if is_source: buf += f[end:m.end()] end = m.end() if end == 0: break except UnicodeDecodeError, e: raise StringsParseError(e.message)
msgstr[1], context=escape_context(entry.msgctxt) or "", occurrences=", ".join([":".join([i for i in t]) for t in entry.occurrences]), rule=msgstr[0], pluralized=pluralized, ) stringset.strings.append(translation) if entry.comment: translation.comment = entry.comment if entry.flags: translation.flags = ", ".join(f for f in entry.flags) if is_source: entry.msgstr = "%(hash)s_tr" % {"hash": hash_tag(translation.source_entity, translation.context)} if entry.msgid_plural: for n, rule in enumerate(plural_keys): entry.msgstr_plural["%s" % n] = "%(hash)s_pl_%(key)s" % { "hash": hash_tag(translation.source_entity, translation.context), "key": n, } if is_source: self.template = self.get_po_contents(pofile) self.stringset = stringset self.suggestions = suggestions return pofile
def _parse(self, is_source, lang_rules): """Parse an apple .strings file and create a stringset with all entries in the file. See http://developer.apple.com/library/mac/#documentation/MacOSX/Conceptual/BPInternational/Articles/StringsFiles.html for details. """ resource = self.resource context = "" f = self.content prefix = "" if f.startswith(u'\ufeff'): prefix = u'\ufeff' f = f.lstrip(u'\ufeff') #regex for finding all comments in a file cp = r'(?:/\*(?P<comment>(?:[^*]|(?:\*+[^*/]))*\**)\*/)' p = re.compile(r'(?:%s[ \t]*[\n]|[\r\n]|[\r]){0,1}(?P<line>(("(?P<key>[^"\\]*(?:\\.[^"\\]*)*)")|(?P<property>\w+))\s*=\s*"(?P<value>[^"\\]*(?:\\.[^"\\]*)*)"\s*;)'%cp, re.DOTALL|re.U) #c = re.compile(r'\s*/\*(.|\s)*?\*/\s*', re.U) c = re.compile(r'//[^\n]*\n|/\*(?:.|[\r\n])*?\*/', re.U) ws = re.compile(r'\s+', re.U) buf = u"" end=0 start = 0 for i in p.finditer(f): start = i.start('line') end_ = i.end() line = i.group('line') key = i.group('key') comment = i.group('comment') or '' if not key: key = i.group('property') value = i.group('value') while end < start: m = c.match(f, end, start) or ws.match(f, end, start) if not m or m.start() != end: raise StringsParseError("Invalid syntax: %s" %\ f[end:start]) if is_source: buf += f[end:m.end()] end = m.end() end = end_ key = self._unescape_key(key) if is_source: if not value.strip(): buf += line continue else: line = f[start:end] buf += line[0:i.start('value')-start] buf += re.sub( re.escape(value), "%(hash)s_tr" % {'hash': hash_tag(key, context)}, line[i.start('value')-start:i.end('value')-start] ) buf += line[i.end('value')-start:] elif not SourceEntity.objects.filter(resource=resource, string=key).exists() or not value.strip(): # ignore keys with no translation continue self.stringset.add(GenericTranslation( key, self._unescape(value), rule=5, context=context, pluralized=False, fuzzy=False, comment=comment, obsolete=False )) while len(f[end:]): m = c.match(f, end) or ws.match(f, end) if not m or m.start() != end: raise StringsParseError("Invalid syntax: %s" % f[end:]) if is_source: buf += f[end:m.end()] end = m.end() if end == 0: break if is_source: buf = prefix + buf return buf
def parse_tag_trans_unit(self, trans_unit_node, is_source=False, context=[], source_string = None, rule = None): source = "" source_node = trans_unit_node.getElementsByTagName("source")[0] if len(source_node.childNodes)>1: for i in source_node.childNodes: source += i.toxml() else: source = source_node.firstChild.data if source_string: pluralized = True else: pluralized = False for node in trans_unit_node.childNodes: if node.nodeType == node.ELEMENT_NODE and node.localName == "context-group" and not source_string and not rule: context.extend(self.parse_tag_context_group(node, is_source)) # TODO prop-group, note, count-group, alt-trans # TODO seq-source context = escape_context(context) if is_source: translation = source if pluralized: source = source_string target = self.doc.createElement("target") target.childNodes = [] if source_string and rule: target.appendChild(self.doc.createTextNode( ("%(hash)s_pl_%(rule)s" % {'hash': hash_tag( source_string, context), 'rule':rule}) )) else: target.appendChild(self.doc.createTextNode( ("%(hash)s_tr" % {'hash': hash_tag( source, context)}) )) if translation and not translation.strip(): return indent_node = source_node.previousSibling.cloneNode(True) if source_node.nextSibling: trans_unit_node.insertBefore(target, source_node.nextSibling) trans_unit_node.insertBefore(indent_node, source_node.nextSibling) else: trans_unit_node.appendChild(indent_node) trans_unit_node.appendChild(target) else: if pluralized: source = source_string target_list = trans_unit_node.getElementsByTagName("target") if target_list: if len(target_list[0].childNodes)>1: translation = self._getText(target_list[0].childNodes) else: if target_list[0].firstChild: translation = target_list[0].firstChild.data else: translation = u"" else: translation = u"" if not translation: return # TODO - do something with inline elements if pluralized: self.stringset_.strings.append(GenericTranslation(source, translation, rule=rule, context=context, pluralized=True, fuzzy=False, obsolete=False)) else: self.stringset_.strings.append(GenericTranslation(source, translation, rule=5, context=context, pluralized=False, fuzzy=False, obsolete=False))