def convertpo(inputfile, outputfile, templatefile): """reads in stdin using fromfileclass, converts using convertorclass, writes to stdout""" engstore = po.pofile(inputfile) forstore = po.pofile(templatefile) convertor = amo2po() outputstore = convertor.convertstore(engstore, forstore) if outputstore.isempty(): return 0 outputfile.write(str(outputstore)) return 1
def create_pofile_from_babel(extracted): try: if settings.TOWER_ADD_HEADERS: catalog = po.pofile() else: catalog = po.pofile(inputfile="") except AttributeError: catalog = po.pofile(inputfile="") for filename, lineno, message, comments in extracted: unit = create_pounit(filename, lineno, message, comments) catalog.addunit(unit) catalog.removeduplicates() return catalog
def import_file(file): pofile = po.pofile(file.read()) header = pofile.parseheader() pootle_path = header.get("X-Pootle-Path") if not pootle_path: raise ValueError(_("File %r missing X-Pootle-Path header\n") % (file.name)) rev = header.get("X-Pootle-Revision") if not rev or not rev.isdigit(): raise ValueError( _("File %r missing or invalid X-Pootle-Revision header\n") % (file.name) ) rev = int(rev) try: store, created = Store.objects.get_or_create(pootle_path=pootle_path) if rev < store.get_max_unit_revision(): # TODO we could potentially check at the unit level and only reject # units older than most recent. But that's in store.update(). raise ValueError( _("File %r was rejected because its X-Pootle-Revision is too old.") % (file.name) ) except Exception as e: raise ValueError( _("Could not create %r. Missing Project/Language? (%s)") % (file.name, e) ) store.update(overwrite=True, store=pofile)
def po2php(self, posource): """helper that converts po source to .php source without requiring files""" inputfile = wStringIO.StringIO(posource) inputpo = po.pofile(inputfile) convertor = po2php.po2php() outputphp = convertor.convertstore(inputpo) return outputphp
def po2web2py(self, po_source): """helper that converts po source to web2py source without requiring files""" input_file = wStringIO.StringIO(po_source) input_po = po.pofile(input_file) convertor = po2web2py.po2pydict() output_web2py = convertor.convertstore(input_po, False) return output_web2py.read()
def merge_store(self, template_store, input_store, blankmsgstr=False, duplicatestyle="msgctxt"): """converts two .rc files to a .po file...""" output_store = po.pofile() output_header = output_store.init_headers( x_accelerator_marker="&", x_merge_on="location", ) output_header.addnote("extracted from %s, %s" % (template_store.filename, input_store.filename), "developer") input_store.makeindex() for template_unit in template_store.units: origpo = self.convert_unit(template_unit, "developer") # try and find a translation of the same name... template_unit_name = "".join(template_unit.getlocations()) if template_unit_name in input_store.locationindex: translatedrc = input_store.locationindex[template_unit_name] translatedpo = self.convert_unit(translatedrc, "translator") else: translatedpo = None # if we have a valid po unit, get the translation and add it... if origpo is not None: if translatedpo is not None and not blankmsgstr: origpo.target = translatedpo.source output_store.addunit(origpo) elif translatedpo is not None: print >> sys.stderr, "error converting original rc definition %s" % template_unit.name output_store.removeduplicates(duplicatestyle) return output_store
def po2dtd(self, posource, remove_untranslated=False): """helper that converts po source to dtd source without requiring files""" inputfile = wStringIO.StringIO(posource) inputpo = po.pofile(inputfile) convertor = po2dtd.po2dtd(remove_untranslated=remove_untranslated) outputdtd = convertor.convertstore(inputpo) return outputdtd
def test_keep_translations(self): """check that we can grep unicode messages and use unicode regex search strings""" posource = '#: schemas.in\nmsgid "test"\nmsgstr "rest"\n' poresult = self.pogrep(posource, "schemas.in", ["--invert-match", "--keep-translations", "--search=locations"]) assert poresult.index(posource) >= 0 poresult = self.pogrep(posource, "schemas.in", ["--invert-match", "--search=locations"]) assert headerless_len(po.pofile(poresult).units) == 0
def po2resx(self, resxsource, po_source): """ Helper that merges po translations to .resx source without requiring files """ po_store = po.pofile(po_source.encode('utf-8')) template_file = wStringIO.StringIO(resxsource) convertor = po2resx.po2resx(template_file, po_store) output_resx = convertor.convertstore() return output_resx.decode('utf-8')
def po2lang(self, posource): """helper that converts po source to .lang source without requiring files""" inputfile = wStringIO.StringIO(posource) inputpo = po.pofile(inputfile) convertor = po2mozlang.po2lang(mark_active=False) outputlang = convertor.convertstore(inputpo) return bytes(outputlang).decode('utf-8')
def convertstore(self, thepropfile, personality="java", duplicatestyle="msgctxt"): """converts a .properties file to a .po file...""" self.personality = personality thetargetfile = po.pofile() if self.personality == "mozilla" or self.personality == "skype": targetheader = thetargetfile.init_headers(charset="UTF-8", encoding="8bit", x_accelerator_marker="&") else: targetheader = thetargetfile.init_headers(charset="UTF-8", encoding="8bit") targetheader.addnote("extracted from %s" % thepropfile.filename, "developer") # we try and merge the header po with any comments at the start of the properties file appendedheader = False waitingcomments = [] for propunit in thepropfile.units: pounit = self.convertunit(propunit, "developer") if pounit is None: waitingcomments.extend(propunit.comments) # FIXME the storage class should not be creating blank units if pounit is "discard": continue if not appendedheader: if propunit.isblank(): targetheader.addnote("\n".join(waitingcomments).rstrip(), "developer", position="prepend") waitingcomments = [] pounit = None appendedheader = True if pounit is not None: pounit.addnote("\n".join(waitingcomments).rstrip(), "developer", position="prepend") waitingcomments = [] thetargetfile.addunit(pounit) thetargetfile.removeduplicates(duplicatestyle) return thetargetfile
def mergestore(self, templatestore, inputstore, blankmsgstr=False, duplicatestyle="msgctxt"): """converts two .php files to a .po file...""" outputstore = po.pofile() outputheader = outputstore.header() outputheader.addnote("extracted from %s, %s" % (templatestore.filename, inputstore.filename), "developer") inputstore.makeindex() # loop through the original file, looking at units one by one for templateunit in templatestore.units: outputunit = self.convertunit(templateunit, "developer") # try and find a translation of the same name... if templateunit.name in inputstore.locationindex: translatedinputunit = inputstore.locationindex[templateunit.name] # Need to check that this comment is not a copy of the developer comments translatedoutputunit = self.convertunit(translatedinputunit, "translator") else: translatedoutputunit = None # if we have a valid po unit, get the translation and add it... if outputunit is not None: if translatedoutputunit is not None and not blankmsgstr: outputunit.target = translatedoutputunit.source outputstore.addunit(outputunit) elif translatedoutputunit is not None: logger("error converting original properties definition %s", templateunit.name) outputstore.removeduplicates(duplicatestyle) return outputstore
def po2lang(self, posource): """helper that converts po source to .lang source without requiring files""" inputfile = wStringIO.StringIO(posource) inputpo = po.pofile(inputfile) convertor = po2mozlang.po2lang() outputlang = convertor.convertstore(inputpo) return outputlang
def convertstore(self, theoofile, duplicatestyle="msgctxt"): """converts an entire oo file to a base class format (.po or XLIFF)""" thetargetfile = po.pofile() # create a header for the file bug_url = 'http://qa.openoffice.org/issues/enter_bug.cgi?%s' % \ parse.urlencode({ "subcomponent": "ui", "comment": "", "short_desc": "Localization issue in file: %s" % theoofile.filename, "component": "l10n", "form_name": "enter_issue", }) targetheader = thetargetfile.init_headers( x_accelerator_marker="~", x_merge_on="location", report_msgid_bugs_to=bug_url, ) targetheader.addnote("extracted from %s" % theoofile.filename, "developer") thetargetfile.setsourcelanguage(self.sourcelanguage) thetargetfile.settargetlanguage(self.targetlanguage) # go through the oo and convert each element for theoo in theoofile.units: unitlist = self.convertelement(theoo) for unit in unitlist: thetargetfile.addunit(unit) thetargetfile.removeduplicates(duplicatestyle) return thetargetfile
def test_convertphpempty(self): """checks that the convertphp function is working with empty template""" phpsource = '' phptemplate = '' posource = self.convertphp(phpsource, phptemplate, 0) pofile = po.pofile(wStringIO.StringIO(posource)) assert len(pofile.units) == 0
def convertdtd(inputfile, outputfile, templatefile, includefuzzy=False, remove_untranslated=False, outputthreshold=None): inputstore = po.pofile(inputfile) if not convert.should_output_store(inputstore, outputthreshold): return False # Some of the DTD files used for Firefox Mobile are actually completely # different with different escaping and quoting rules. The best way to # identify them seems to be on their file path in the tree (based on code # in compare-locales). android_dtd = False header_comment = "" input_header = inputstore.header() if input_header: header_comment = input_header.getnotes("developer") if "embedding/android" in header_comment or "mobile/android/base" in header_comment: android_dtd = True if templatefile is None: convertor = po2dtd(android=android_dtd, remove_untranslated=remove_untranslated) else: templatestore = dtd.dtdfile(templatefile, android=android_dtd) convertor = redtd(templatestore, android=android_dtd, remove_untranslated=remove_untranslated) outputstore = convertor.convertstore(inputstore, includefuzzy) outputfile.write(str(outputstore)) return 1
def create_pofile_from_babel(extracted): catalog = po.pofile(inputfile="") for filename, lineno, message, comments in extracted: unit = create_pounit(filename, lineno, message, comments) catalog.addunit(unit) catalog.removeduplicates() return catalog
def mergestore(self, origpropfile, translatedpropfile, personality="java", blankmsgstr=False, duplicatestyle="msgctxt"): """converts two .properties files to a .po file...""" self.personality = personality thetargetfile = po.pofile() if self.personality in ("mozilla", "skype"): targetheader = thetargetfile.init_headers( x_accelerator_marker="&", x_merge_on="location", ) else: targetheader = thetargetfile.header() targetheader.addnote("extracted from %s, %s" % (origpropfile.filename, translatedpropfile.filename), "developer") translatedpropfile.makeindex() # we try and merge the header po with any comments at the start of # the properties file appendedheader = False waitingcomments = [] # loop through the original file, looking at units one by one for origprop in origpropfile.units: origpo = self.convertunit(origprop, "developer") if origpo is None: waitingcomments.extend(origprop.comments) # FIXME the storage class should not be creating blank units if origpo is "discard": continue # handle the header case specially... if not appendedheader: if origprop.isblank(): targetheader.addnote("".join(waitingcomments).rstrip(), "developer", position="prepend") waitingcomments = [] origpo = None appendedheader = True # try and find a translation of the same name... if origprop.name in translatedpropfile.locationindex: translatedprop = translatedpropfile.locationindex[origprop.name] # Need to check that this comment is not a copy of the # developer comments translatedpo = self.convertunit(translatedprop, "translator") if translatedpo is "discard": continue else: translatedpo = None # if we have a valid po unit, get the translation and add it... if origpo is not None: if translatedpo is not None and not blankmsgstr: origpo.target = translatedpo.source origpo.addnote("".join(waitingcomments).rstrip(), "developer", position="prepend") waitingcomments = [] thetargetfile.addunit(origpo) elif translatedpo is not None: logger.error("didn't convert original property definition '%s'", origprop.name) if self.personality == "gaia": thetargetfile = self.fold_gaia_plurals(thetargetfile) thetargetfile.removeduplicates(duplicatestyle) return thetargetfile
def merge_store(template_store, input_store, blankmsgstr=False, duplicatestyle="msgctxt"): """converts two subtitle files to a .po file...""" output_store = po.pofile() output_header = output_store.headers() output_header.addnote("extracted from %s, %s" % (template_store.filename, input_store.filename), "developer") input_store.makeindex() for template_unit in template_store.units: origpo = convert_unit(template_unit, "developer") # try and find a translation of the same name... template_unit_name = "".join(template_unit.getlocations()) if template_unit_name in input_store.locationindex: translatedini = input_store.locationindex[template_unit_name] translatedpo = convert_unit(translatedini, "translator") else: translatedpo = None # if we have a valid po unit, get the translation and add it... if origpo is not None: if translatedpo is not None and not blankmsgstr: origpo.target = translatedpo.source output_store.addunit(origpo) elif translatedpo is not None: logger.error("error converting original subtitle definition %s", origini.name) output_store.removeduplicates(duplicatestyle) return output_store
def outputconflicts(self, options): """saves the result of the conflict match""" print("%d/%d different strings have conflicts" % (len(self.conflictmap), len(self.textmap))) reducedmap = {} def str_len(x): return len(x) for source, translations in six.iteritems(self.conflictmap): words = source.split() words.sort(key=str_len) source = words[-1] reducedmap.setdefault(source, []).extend(translations) # reduce plurals plurals = {} for word in reducedmap: if word + "s" in reducedmap: plurals[word] = word + "s" for word, pluralword in six.iteritems(plurals): reducedmap[word].extend(reducedmap.pop(pluralword)) for source, translations in six.iteritems(reducedmap): flatsource = self.flatten(source, "-") fulloutputpath = os.path.join(options.output, flatsource + os.extsep + "po") conflictfile = po.pofile() for target, unit, filename in translations: unit.othercomments.append("# (poconflicts) %s\n" % filename) conflictfile.units.append(unit) with open(fulloutputpath, "wb") as fh: conflictfile.serialize(fh)
def po2xlf(inputfile, originalfile, outputfile, lang = None): tree = etree.parse(originalfile) po = pofile() po.parse(open(inputfile)) if lang is not None: fileNode = tree.xpath("//xlf:file", namespaces=namespaces)[0] fileNode.attrib['target-language'] = lang for po_unit in po.units: if po_unit.obsolete or len(po_unit.msgctxt) != 1: continue msgctxt = po_unit.msgctxt[0] nodes = tree.xpath('//xlf:trans-unit[@id={0}]'.format(msgctxt), namespaces=namespaces) if len(nodes) != 1: print 'WARNING: XLIFF file missing trans-unit with id {0}.'.format(msgctxt) continue tu = nodes[0] target = tu.xpath('xlf:target', namespaces=namespaces) if len(target) == 0: target = etree.Element('target', nsmap=namespaces) tu.append(target) else: target = target[0] target_text = unicode(po_unit.gettarget()) if target_text is not None: target.text = target_text tree.write(outputfile, encoding='UTF-8')
def merge_store(self, template_store, input_store, blankmsgstr=False, duplicatestyle="msgctxt"): """Converts two JSON files to a PO file""" output_store = po.pofile() output_header = output_store.init_headers(charset="UTF-8", encoding="8bit") output_header.addnote("extracted from %s, %s" % (template_store.filename, input_store.filename), "developer") input_store.makeindex() for template_unit in template_store.units: origpo = self.convert_unit(template_unit, "developer") # try and find a translation of the same name... template_unit_name = "".join(template_unit.getlocations()) if template_unit_name in input_store.locationindex: translatedjson = input_store.locationindex[template_unit_name] translatedpo = self.convert_unit(translatedjson, "translator") else: translatedpo = None # if we have a valid po unit, get the translation and add it... if origpo is not None: if translatedpo is not None and not blankmsgstr: origpo.target = translatedpo.source output_store.addunit(origpo) elif translatedpo is not None: print >> sys.stderr, "Error converting original JSON definition %s" % origpo.name output_store.removeduplicates(duplicatestyle) return output_store
def web2py2po(self, web2py_source): """helper that converts po source to web2py source without requiring files""" input_web2py = eval(web2py_source) new_pofile = po.pofile() convertor = web2py2po.web2py2po(new_pofile) output_po = convertor.convertstore(input_web2py) return output_po
def test_timezones(): pofile = po.pofile() # The following will only work on Unix because of tzset() and %z if 'tzset' in time.__dict__: os.environ['TZ'] = 'Asia/Kabul' time.tzset() assert time.timezone == -16200 # Typically "+0430" assert poheader.tzstring() == time.strftime("%z") os.environ['TZ'] = 'Asia/Seoul' time.tzset() assert time.timezone == -32400 # Typically "+0900" assert poheader.tzstring() == time.strftime("%z") os.environ['TZ'] = 'Africa/Johannesburg' time.tzset() assert time.timezone == -7200 # Typically "+0200" assert poheader.tzstring() == time.strftime("%z") os.environ['TZ'] = 'Africa/Windhoek' time.tzset() assert time.timezone == -3600 # Typically "+0100" # For some reason python's %z doesn't know about Windhoek DST #assert poheader.tzstring() == time.strftime("%z") os.environ['TZ'] = 'UTC' time.tzset() assert time.timezone == 0 # Typically "+0000" assert poheader.tzstring() == time.strftime("%z")
def test_simplegrep_comments(self): """grep for a string in the comments""" posource = '# (review) comment\n#: test.c\nmsgid "test"\nmsgstr "rest"\n' poresult = self.pogrep(posource, "review", ["--search=comment"]) assert poresult.index(posource) >= 0 poresult = self.pogrep(posource, "test", ["--search=comment"]) assert headerless_len(po.pofile(poresult).units) == 0
def convertstore(self, thecsvfile): """converts a csvfile to a pofile, and returns it. uses templatepo if given at construction""" self.csvfile = thecsvfile if self.pofile is None: self.pofile = po.pofile() mergemode = False else: mergemode = True if self.pofile.units and self.pofile.units[0].isheader(): targetheader = self.pofile.units[0] self.pofile.updateheader(content_type="text/plain; charset=UTF-8", content_transfer_encoding="8bit") else: targetheader = self.pofile.makeheader(charset="UTF-8", encoding="8bit") targetheader.addnote("extracted from %s" % self.csvfile.filename, "developer") mightbeheader = True for csvunit in self.csvfile.units: #if self.charset is not None: # csvunit.source = csvunit.source.decode(self.charset) # csvunit.target = csvunit.target.decode(self.charset) if mightbeheader: # ignore typical header strings... mightbeheader = False if csvunit.match_header(): continue if len(csvunit.location.strip()) == 0 and csvunit.source.find("Content-Type:") != -1: continue if mergemode: self.handlecsvunit(csvunit) else: pounit = self.convertunit(csvunit) self.pofile.addunit(pounit) self.pofile.removeduplicates(self.duplicatestyle) return self.pofile
def test_simplegrep_msgstr(self): """grep for a string in the target""" posource = '#: test.c\nmsgid "test"\nmsgstr "rest"\n' poresult = self.pogrep(posource, "rest", ["--search=msgstr"]) assert poresult.index(posource) >= 0 poresult = self.pogrep(posource, "test", ["--search=msgstr"]) assert headerless_len(po.pofile(poresult).units) == 0
def po2ini(self, posource): """helper that converts po source to .ini source without requiring files""" inputfile = wStringIO.StringIO(posource) inputpo = po.pofile(inputfile) convertor = po2ini.reini() outputini = convertor.convertstore(inputpo) return outputini
def test_simplegrep_locations(self): """grep for a string in the location comments""" posource = '#: test.c\nmsgid "test"\nmsgstr "rest"\n' poresult = self.pogrep(posource, "test.c", ["--search=locations"]) assert poresult.index(posource) >= 0 poresult = self.pogrep(posource, "rest.c", ["--search=locations"]) assert headerless_len(po.pofile(poresult).units) == 0
def convertstore(self, inputfile, duplicatestyle="msgctxt"): """Converts a .xliff file to .po format""" # XXX: The inputfile is converted to string because Pootle supplies # XXX: a PootleFile object as input which cannot be sent to PoXliffFile # XXX: The better way would be to have a consistent conversion API. if not isinstance(inputfile, (io.IOBase, wStringIO.StringIO)): inputfile = str(inputfile) XliffFile = xliff.xlifffile.parsestring(inputfile) thetargetfile = po.pofile() targetheader = thetargetfile.header() # TODO: support multiple files for transunit in XliffFile.units: if transunit.isheader(): thetargetfile.updateheader(add=True, **XliffFile.parseheader()) if transunit.getnotes('translator'): targetheader.addnote(transunit.getnotes('translator'), origin='translator', position='replace') if transunit.getnotes('developer'): targetheader.addnote(transunit.getnotes('developer'), origin='developer', position='replace') targetheader.markfuzzy(transunit.isfuzzy()) continue thepo = self.converttransunit(transunit) thetargetfile.addunit(thepo) thetargetfile.removeduplicates(duplicatestyle) return thetargetfile
def convertstore(self, thedtdfile): thetargetfile = po.pofile() targetheader = thetargetfile.init_headers(charset="UTF-8", encoding="8bit", x_accelerator_marker="&") targetheader.addnote("extracted from %s" % thedtdfile.filename, "developer") thedtdfile.makeindex() self.findmixedentities(thedtdfile) # go through the dtd and convert each unit for thedtd in thedtdfile.units: if thedtd.isnull(): continue thepo = self.convertdtdunit(thedtdfile, thedtd) if thepo is not None: thetargetfile.addunit(thepo) thetargetfile.removeduplicates(self.duplicatestyle) return thetargetfile
def convertstore(self, thelangfile): """converts a file to .po format""" thetargetfile = po.pofile() # Set up the header targetheader = thetargetfile.header() targetheader.addnote("extracted from %s" % thelangfile.filename, "developer") # For each lang unit, make the new po unit accordingly for langunit in thelangfile.units: newunit = thetargetfile.addsourceunit(langunit.source) newunit.settarget(langunit.target) newunit.addlocations(langunit.getlocations()) newunit.addnote(langunit.getnotes(), 'developer') # Remove duplicates, because we can thetargetfile.removeduplicates(self.duplicatestyle) return thetargetfile
def convertstore(self, thepropfile, personality="java", duplicatestyle="msgctxt"): """converts a .properties file to a .po file...""" self.personality = personality thetargetfile = po.pofile() if self.personality == "mozilla" or self.personality == "skype": targetheader = thetargetfile.init_headers(charset="UTF-8", encoding="8bit", x_accelerator_marker="&") else: targetheader = thetargetfile.init_headers(charset="UTF-8", encoding="8bit") targetheader.addnote("extracted from %s" % thepropfile.filename, "developer") # we try and merge the header po with any comments at the start of the # properties file appendedheader = False waitingcomments = [] for propunit in thepropfile.units: pounit = self.convertunit(propunit, "developer") if pounit is None: waitingcomments.extend(propunit.comments) # FIXME the storage class should not be creating blank units if pounit is "discard": continue if not appendedheader: if propunit.isblank(): targetheader.addnote("\n".join(waitingcomments).rstrip(), "developer", position="prepend") waitingcomments = [] pounit = None appendedheader = True if pounit is not None: pounit.addnote("\n".join(waitingcomments).rstrip(), "developer", position="prepend") waitingcomments = [] thetargetfile.addunit(pounit) thetargetfile.removeduplicates(duplicatestyle) return thetargetfile
def convertfile(storefile, template_store): store = pofile() contents = open_idml(storefile) # Create it here to avoid having repeated ids. id_maker = IdMaker() for filename, translatable_file in contents.items(): parse_state = ParseState(NO_TRANSLATE_ELEMENTS, INLINE_ELEMENTS) po_store_adder = make_postore_adder(store, id_maker, filename) build_idml_store( BytesIO(translatable_file), store, parse_state, store_adder=po_store_adder, ) return store
def convertstore(self, thetikifile): """Converts a given (parsed) tiki file to a po file. :param thetikifile: a tikifile pre-loaded with input data """ thetargetfile = po.pofile() # For each lang unit, make the new po unit accordingly for unit in thetikifile.units: if not self.includeunused and "unused" in unit.getlocations(): continue newunit = po.pounit() newunit.source = unit.source newunit.settarget(unit.target) locations = unit.getlocations() if locations: newunit.addlocations(locations) thetargetfile.addunit(newunit) return thetargetfile
def convert_store(self, input_store, duplicatestyle="msgctxt"): """Converts a RESX file to a PO file""" output_store = po.pofile() output_header = output_store.init_headers(charset="UTF-8", encoding="8bit", x_accelerator_marker="&") output_header.addnote("extracted from %s" % input_store.filename, "developer") for input_unit in input_store.units: if input_unit.istranslatable(): output_unit = self.convert_unit(input_unit, "developer") if output_unit is not None: # Split out translator & dev comments before adding them self.split_comments(output_unit, output_unit) output_store.addunit(output_unit) output_store.removeduplicates(duplicatestyle) return output_store
def convertstore(self, thepropfile): """converts a .properties file to a .po file...""" thetargetfile = po.pofile() if self.personality in ("mozilla", "skype"): targetheader = thetargetfile.init_headers( x_accelerator_marker="&", x_merge_on="location", ) else: targetheader = thetargetfile.header() targetheader.addnote("extracted from %s" % thepropfile.filename, "developer") thepropfile.makeindex() self.mixedkeys = self.mixer.match_entities(thepropfile.id_index) # we try and merge the header po with any comments at the start of the # properties file appendedheader = False waitingcomments = [] for propunit in thepropfile.units: pounit = self.convertpropunit(thepropfile, propunit, "developer") if pounit is None: waitingcomments.extend(propunit.comments) # FIXME the storage class should not be creating blank units if pounit is "discard": continue if not appendedheader: if propunit.isblank(): targetheader.addnote("\n".join(waitingcomments).rstrip(), "developer", position="prepend") waitingcomments = [] pounit = None appendedheader = True if pounit is not None: pounit.addnote("\n".join(waitingcomments).rstrip(), "developer", position="prepend") waitingcomments = [] thetargetfile.addunit(pounit) if self.personality == "gaia": thetargetfile = self.fold_gaia_plurals(thetargetfile) thetargetfile.removeduplicates(self.duplicatestyle) return thetargetfile
def mergestore(self, origdtdfile, translateddtdfile): thetargetfile = po.pofile() targetheader = thetargetfile.init_headers(charset="UTF-8", encoding="8bit") targetheader.addnote("extracted from %s, %s" % (origdtdfile.filename, translateddtdfile.filename), "developer") origdtdfile.makeindex() self.findmixedentities(origdtdfile) translateddtdfile.makeindex() self.findmixedentities(translateddtdfile) # go through the dtd files and convert each unit for origdtd in origdtdfile.units: if origdtd.isnull(): continue origpo = self.convertdtdunit(origdtdfile, origdtd, mixbucket="orig") if origdtd.entity in self.mixedentities: mixedentitydict = self.mixedentities[origdtd.entity] if "orig" not in mixedentitydict: # this means that the entity is mixed in the translation, but not the original - treat as unmixed mixbucket = "orig" del self.mixedentities[origdtd.entity] elif mixedentitydict["orig"]: # the original entity is already mixed successfully mixbucket = "translate" else: # ?? mixbucket = "orig" else: mixbucket = "translate" if origpo is None: # this means its a mixed entity (with accesskey) that's already been dealt with) continue if origdtd.entity in translateddtdfile.index: translateddtd = translateddtdfile.index[origdtd.entity] translatedpo = self.convertdtdunit(translateddtdfile, translateddtd, mixbucket=mixbucket) else: translatedpo = None if origpo is not None: if translatedpo is not None and not self.blankmsgstr: origpo.target = translatedpo.source thetargetfile.addunit(origpo) thetargetfile.removeduplicates(self.duplicatestyle) return thetargetfile
def merge_store(self, template_store, input_store, blankmsgstr=False, duplicatestyle="msgctxt"): """Converts two RESX files to a PO file""" output_store = po.pofile() output_header = output_store.init_headers(charset="UTF-8", encoding="8bit", x_accelerator_marker="&") output_header.addnote( "extracted from %s, %s" % (template_store.filename, input_store.filename), "developer") input_store.makeindex() for template_unit in template_store.units: origpo = self.convert_unit(template_unit, "developer") # try and find a translation of the same name... template_unit_name = "".join(template_unit.getlocations()) if template_unit_name in input_store.locationindex: translatedresx = input_store.locationindex[template_unit_name] translatedpo = self.convert_unit(translatedresx, "translator") else: translatedpo = None # if we have a valid po unit, get the translation and add it... if origpo is not None: if translatedpo is not None and not blankmsgstr: origpo.target = translatedpo.source # Split out translator & dev comments before adding them self.split_comments(origpo, translatedpo) output_store.addunit(origpo) elif translatedpo is not None: logger.error("Error converting original RESX definition %s" % origpo) output_store.removeduplicates(duplicatestyle) return output_store
def convertlang(inputfile, outputfile, templates, includefuzzy=False, mark_active=True, outputthreshold=None, remove_untranslated=None): """reads in stdin using fromfileclass, converts using convertorclass, writes to stdout""" inputstore = po.pofile(inputfile) if not convert.should_output_store(inputstore, outputthreshold): return False if inputstore.isempty(): return 0 convertor = po2lang(mark_active=mark_active) outputstore = convertor.convertstore(inputstore, includefuzzy) outputfile.write(str(outputstore)) return 1
def convertcsv(inputfile, outputfile, templatefile, charset=None, columnorder=None, duplicatestyle="msgctxt"): """reads in inputfile using csvl10n, converts using csv2po, writes to outputfile""" inputstore = csvl10n.csvfile(inputfile, fieldnames=columnorder) if templatefile is None: convertor = csv2po(charset=charset, duplicatestyle=duplicatestyle) else: templatestore = po.pofile(templatefile) convertor = csv2po(templatestore, charset=charset, duplicatestyle=duplicatestyle) outputstore = convertor.convertstore(inputstore) if outputstore.isempty(): return 0 outputfile.write(str(outputstore)) return 1
def translate_po(file, sl, tl): openfile = po.pofile(open(file)) nb_elem = len(openfile.units) moves = 1 cur_elem = 0 for unit in openfile.units: # report progress cur_elem += 1 s = "\r%f %% - (%d msg processed out of %d) " \ % (100 * float(cur_elem) / float(nb_elem), cur_elem, nb_elem) sys.stderr.write(s) if not unit.isheader(): if len(unit.msgid): if unit.msgstr==[u'""']: moves += 1 unit.msgstr = ['"%s"' % htmldecode(get_translation(sl, tl, x)) for x in unit.msgid ] if not bool(moves % 50): print "Saving file..." openfile.save() openfile.save()
def convertfile(self, inputfile): """converts a .ts file to .po format""" tsfile = ts.QtTsParser(inputfile) thetargetfile = po.pofile() targetheader = thetargetfile.init_headers(charset="UTF-8", encoding="8bit") for contextname, messages in tsfile.iteritems(): messagenum = 0 for message in messages: messagenum += 1 source = tsfile.getmessagesource(message) translation = tsfile.getmessagetranslation(message) comment = tsfile.getmessagecomment(message) transtype = tsfile.getmessagetype(message) thepo = self.convertmessage(contextname, messagenum, source, translation, comment, transtype) thetargetfile.addunit(thepo) thetargetfile.removeduplicates(self.duplicatestyle) return thetargetfile
def convertstore(self, dtd_store): target_store = po.pofile() targetheader = target_store.init_headers( x_accelerator_marker="&", x_merge_on="location", ) targetheader.addnote("extracted from %s" % dtd_store.filename, "developer") dtd_store.makeindex() self.mixedentities = self.mixer.match_entities(dtd_store.id_index) # go through the dtd and convert each unit for dtd_unit in dtd_store.units: if not dtd_unit.istranslatable(): continue po_unit = self.convertdtdunit(dtd_store, dtd_unit) if po_unit is not None: target_store.addunit(po_unit) target_store.removeduplicates(self.duplicatestyle) return target_store
def converthtml(inputfile, outputfile, templatefile, includefuzzy=False, outputthreshold=None): """reads in stdin using fromfileclass, converts using convertorclass, writes to stdout""" inputstore = po.pofile(inputfile) if not convert.should_output_store(inputstore, outputthreshold): return False convertor = po2html() if templatefile is None: raise ValueError("must have template file for HTML files") else: outputstring = convertor.mergestore(inputstore, templatefile, includefuzzy) outputfile.write(outputstring.encode('utf-8')) return 1
def convertstore(self, thecsvfile): """converts a csvfile to a pofile, and returns it. uses templatepo if given at construction """ self.csvfile = thecsvfile if self.pofile is None: self.pofile = po.pofile() mergemode = False else: mergemode = True if self.pofile.units and self.pofile.units[0].isheader(): targetheader = self.pofile.units[0] self.pofile.updateheader( content_type="text/plain; charset=UTF-8", content_transfer_encoding="8bit", ) else: targetheader = self.pofile.makeheader(charset="UTF-8", encoding="8bit") targetheader.addnote("extracted from %s" % self.csvfile.filename, "developer") mightbeheader = True for csvunit in self.csvfile.units: # if self.charset is not None: # csvunit.source = csvunit.source.decode(self.charset) # csvunit.target = csvunit.target.decode(self.charset) if mightbeheader: # ignore typical header strings... mightbeheader = False if csvunit.match_header(): continue if ( len(csvunit.location.strip()) == 0 and csvunit.source.find("Content-Type:") != -1 ): continue if mergemode: self.handlecsvunit(csvunit) else: pounit = self.convertunit(csvunit) self.pofile.addunit(pounit) self.pofile.removeduplicates(self.duplicatestyle) return self.pofile
def mergestore(self, templatesource, inputsource, mergeblanks="yes", mergefuzzy="yes", mergecomments="yes"): """merges the sources of the given files and returns a new pofile object""" templatefile = wStringIO.StringIO(templatesource) inputfile = wStringIO.StringIO(inputsource) outputfile = wStringIO.StringIO() assert pomerge.mergestore( inputfile, outputfile, templatefile, mergeblanks=mergeblanks, mergefuzzy=mergefuzzy, mergecomments=mergecomments, ) outputpostring = outputfile.getvalue() outputpofile = po.pofile(outputpostring) return outputpofile
def convertfile(self, inputfile, filename, includeheader, includeuntagged=False, duplicatestyle="msgctxt", keepcomments=False): """converts a html file to .po format""" thetargetfile = po.pofile() htmlparser = html.htmlfile(includeuntaggeddata=includeuntagged, inputfile=inputfile) if includeheader: targetheader = thetargetfile.init_headers(charset="UTF-8", encoding="8bit") for htmlunit in htmlparser.units: thepo = thetargetfile.addsourceunit(htmlunit.source) thepo.addlocations(htmlunit.getlocations()) if keepcomments: thepo.addnote(htmlunit.getnotes(), "developer") thetargetfile.removeduplicates(duplicatestyle) return thetargetfile
def run_converter(inputfile, outputfile, templatefile, includefuzzy=False): source_store = po.pofile(inputfile) names = {} for unit in source_store.units: if unit.istranslated() or (unit.isfuzzy() and includefuzzy and unit.target): loc, = unit.getlocations() tag_name, name = loc.split(':') names[name] = unit.target xml = parse(templatefile) po2xaml(xml, names) xml.normalize() xml_str = xml.toxml() i = xml_str.find('?>') # remove prefix '<?xml version="1.0" ?>' xml_str = xml_str[i + 2:] outputfile.write(xml_str.encode('utf-16le')) outputfile.write(bytes(2)) # null terminator return 1
def convertstore(self, inputstore): """Converts a given .po file (Python Format) to a PHP format .po file. The difference being how variable substitutions work. PHP uses a %1$s format, and Python uses a {0} format (zero indexed). This method will convert:: I have {1} apples and {0} oranges To:: I have %2$s apples and %1$s oranges This method ignores strings with %s as both languages will recognize that. """ thetargetfile = po.pofile() for unit in inputstore.units: newunit = self.convertunit(unit) thetargetfile.addunit(newunit) return thetargetfile
def convertprop(inputfile, outputfile, templatefile, personality="java", includefuzzy=False, encoding=None, remove_untranslated=False, outputthreshold=None): inputstore = po.pofile(inputfile) if not convert.should_output_store(inputstore, outputthreshold): return False if templatefile is None: raise ValueError("must have template file for properties files") # convertor = po2prop() else: convertor = reprop(templatefile, inputstore, personality, encoding, remove_untranslated) outputprop = convertor.convertstore(includefuzzy) outputfile.write(outputprop) return True
def convertstore(self, thecsvfile): """converts a csvfile to a pofile, and returns it. uses templatepo if given at construction""" self.csvfile = thecsvfile if self.pofile is None: self.pofile = po.pofile() mergemode = False else: mergemode = True if self.pofile.units and self.pofile.units[0].isheader(): targetheader = self.pofile.units[0] targetheader.msgstr = [ line.replace("CHARSET", "UTF-8").replace("ENCODING", "8bit") for line in targetheader.msgstr ] else: targetheader = self.pofile.makeheader(charset="UTF-8", encoding="8bit") targetheader.addnote("extracted from %s" % self.csvfile.filename, "developer") mightbeheader = True for csvunit in self.csvfile.units: if self.charset is not None: csvunit.source = csvunit.source.decode(self.charset) csvunit.target = csvunit.target.decode(self.charset) if mightbeheader: # ignore typical header strings... mightbeheader = False if [item.strip().lower() for item in csvunit.comment, csvunit.source, csvunit.target] == \ ["location", "source", "target"]: continue if len(csvunit.comment.strip() ) == 0 and csvunit.source.find("Content-Type:") != -1: continue if mergemode: self.handlecsvunit(csvunit) else: pounit = self.convertunit(csvunit) self.pofile.addunit(pounit)
def convertfile(storefile, template_store): store = pofile() # Fake input file with a blank filename htmlparser = htmlfile(inputfile=BytesIOMode("", storefile.read())) for htmlunit in htmlparser.units: locations = htmlunit.getlocations() if template_store: # Transalation template = template_store.find_unit_mono("".join(locations)) if template is None: # Skip locations not present in the source HTML file continue # Create unit with matching source thepo = store.addsourceunit(template.source) thepo.target = htmlunit.source else: # Source file thepo = store.addsourceunit(htmlunit.source) thepo.target = htmlunit.source thepo.addlocations(htmlunit.getlocations()) thepo.addnote(htmlunit.getnotes(), "developer") store.removeduplicates("msgctxt") return store
def convertrc(inputfile, outputfile, templatefile, includefuzzy=False, charset=None, lang=None, sublang=None, outputthreshold=None): inputstore = po.pofile(inputfile) if not convert.should_output_store(inputstore, outputthreshold): return False if not lang: raise ValueError("must specify a target language") if templatefile is None: raise ValueError("must have template file for rc files") # convertor = po2rc() else: convertor = rerc(templatefile, charset, lang, sublang) outputrclines = convertor.convertstore(inputstore, includefuzzy) try: outputfile.write(outputrclines.encode('cp1252')) except UnicodeEncodeError: outputfile.write(codecs.BOM_UTF16_LE) outputfile.write(outputrclines.encode('utf-16-le')) outputfile.close() templatefile.close() return 1
def convertrc(inputfile, outputfile, templatefile, includefuzzy=False, charset=None, lang=None, sublang=None, outputthreshold=None): inputstore = po.pofile(inputfile) if not convert.should_output_store(inputstore, outputthreshold): return False if not lang: raise ValueError("must specify a target language") if templatefile is None: raise ValueError("must have template file for rc files") # convertor = po2rc() else: convertor = rerc(templatefile, charset, lang, sublang) outputrclines = convertor.convertstore(inputstore, includefuzzy) outputfile.writelines(outputrclines) return 1
def merge2prop( self, propsource, posource, personality="java", remove_untranslated=False, encoding="utf-8", ): """helper that merges po translations to .properties source without requiring files""" inputfile = BytesIO(posource.encode()) inputpo = po.pofile(inputfile) templatefile = BytesIO( propsource.encode() if isinstance(propsource, str) else propsource) # templateprop = properties.propfile(templatefile) convertor = po2prop.reprop( templatefile, inputpo, personality=personality, remove_untranslated=remove_untranslated, ) outputprop = convertor.convertstore() print(outputprop) return outputprop.decode(encoding)
def convertfiles(self, inputfile, tmxfile, sourcelanguage='en', targetlanguage=None, comment=None): """converts a .po file (possibly many) to TMX file""" inputstore = po.pofile(inputfile) for inunit in inputstore.units: if inunit.isheader() or inunit.isblank( ) or not inunit.istranslated() or inunit.isfuzzy(): continue source = inunit.source translation = inunit.target commenttext = { 'source': self.cleancomments(inunit.sourcecomments, "source"), 'type': self.cleancomments(inunit.typecomments, "type"), 'others': self.cleancomments(inunit.othercomments), }.get(comment, None) tmxfile.addtranslation(source, sourcelanguage, translation, targetlanguage, commenttext)
def convertstore(self, inputfile): """Converts a .xliff file to .po format""" # XXX: The inputfile is converted to string because Pootle supplies # XXX: a PootleFile object as input which cannot be sent to PoXliffFile. # XXX: The better way would be to have a consistent conversion API. if not isinstance(inputfile, (file, wStringIO.StringIO)): inputfile = str(inputfile) XliffFile = xliff.xlifffile.parsestring(inputfile) thetargetfile = po.pofile() targetheader = thetargetfile.init_headers(charset="UTF-8", encoding="8bit") # TODO: support multiple files for transunit in XliffFile.units: if transunit.isheader(): thetargetfile.updateheader(add=True, **XliffFile.parseheader()) if transunit.getnotes('translator'): targetheader.addnote(transunit.getnotes('translator'), origin='translator', position='replace') if transunit.getnotes('developer'): targetheader.addnote(transunit.getnotes('developer'), origin='developer', position='replace') targetheader.markfuzzy(transunit.isfuzzy()) continue thepo = self.converttransunit(transunit) thetargetfile.addunit(thepo) return thetargetfile
def convertdtd(inputfile, outputfile, templatefile, includefuzzy=False): inputstore = po.pofile(inputfile) # Some of the DTD files used for Firefox Mobile are actually completely # different with different escaping and quoting rules. The best way to # identify them seems to be on their file path in the tree (based on code # in compare-locales). android_dtd = False header_comment = u"" input_header = inputstore.header() if input_header: header_comment = input_header.getnotes("developer") if "embedding/android" in header_comment or "mobile/android/base" in header_comment: android_dtd = True if templatefile is None: convertor = po2dtd(android=android_dtd) else: templatestore = dtd.dtdfile(templatefile, android=android_dtd) convertor = redtd(templatestore, android=android_dtd) outputstore = convertor.convertstore(inputstore, includefuzzy) outputfile.write(str(outputstore)) return 1
def convertstore(self, inputfile, duplicatestyle="msgctxt"): """Converts a .xliff file to .po format""" XliffFile = xliff.xlifffile.parsestring(inputfile) thetargetfile = po.pofile() targetheader = thetargetfile.header() # TODO: support multiple files for transunit in XliffFile.units: if transunit.isheader(): thetargetfile.updateheader(add=True, **XliffFile.parseheader()) if transunit.getnotes('translator'): targetheader.addnote(transunit.getnotes('translator'), origin='translator', position='replace') if transunit.getnotes('developer'): targetheader.addnote(transunit.getnotes('developer'), origin='developer', position='replace') targetheader.markfuzzy(transunit.isfuzzy()) continue thepo = self.converttransunit(transunit) thetargetfile.addunit(thepo) thetargetfile.removeduplicates(duplicatestyle) return thetargetfile