def test_detect_encoding7(self): """ Test with iso-8859-15 .mo file. """ self.assertEqual( polib.detect_encoding('tests/test_iso-8859-15.mo', True), 'ISO_8859-15')
def test_detect_encoding4(self): """ Test with utf8 data (no file). """ with open("tests/test_utf8.po", "rb") as f: data = str(f.read(), "utf-8") self.assertEqual(polib.detect_encoding(data), "UTF-8")
def test_detect_encoding4(self): """ Test with utf8 data (no file). """ f = open('tests/test_utf8.po','r') try: self.assertEqual(polib.detect_encoding(f.read()), 'UTF-8') finally: f.close()
def test_detect_encoding4(self): """ Test with utf8 data (no file). """ if polib.PY3: f = open('tests/test_utf8.po', 'rb') data = str(f.read(), 'utf-8') else: f = open('tests/test_utf8.po', 'r') data = f.read() try: self.assertEqual(polib.detect_encoding(data), 'UTF-8') finally: f.close()
def parse_pofile(fn_or_string): """Parses a po file and attaches original poentry blocks When polib parses a pofile, it captures the line number of the start of the block, but doesn't capture the original string for the block. When you call str() on the poentry, it "reassembles" the block with textwrapped lines, so it returns something substantially different than the original block. This is problematic if we want to print out the block with the line numbers--one for each line. So this wrapper captures the line numbers and original text for each block and attaches that to the parsed poentries in an attribute named "original" thus allowing us to print the original text with line numbers. """ from polib import _is_file, detect_encoding, io, pofile # This parses the pofile parsed_pofile = pofile(fn_or_string) # Now we need to build a linenumber -> block hash so that we can # accurately print out what was in the pofile because polib will # reassembled what it parsed, but it's not the same. if _is_file(fn_or_string): enc = detect_encoding(fn_or_string, 'pofile') fp = io.open(fn_or_string, 'rt', encoding=enc) else: fp = fn_or_string.splitlines(True) fp = list(fp) entries = list(parsed_pofile) for i, poentry in enumerate(entries): # Grab the lines that make up the poentry. # Note: linenum is 1-based, so we convert it to 0-based. try: lines = fp[poentry.linenum-1:entries[i+1].linenum-1] except IndexError: lines = fp[poentry.linenum-1:] # Nix blank lines at the end. while lines and not lines[-1].strip(): lines.pop() # Join them and voila! poentry.original = ''.join(lines) return parsed_pofile
def parse_pofile(fn_or_string): """Parses a po file and attaches original poentry blocks When polib parses a pofile, it captures the line number of the start of the block, but doesn't capture the original string for the block. When you call str() on the poentry, it "reassembles" the block with textwrapped lines, so it returns something substantially different than the original block. This is problematic if we want to print out the block with the line numbers--one for each line. So this wrapper captures the line numbers and original text for each block and attaches that to the parsed poentries in an attribute named "original" thus allowing us to print the original text with line numbers. """ from polib import _is_file, detect_encoding, io, pofile # This parses the pofile parsed_pofile = pofile(fn_or_string) # Now we need to build a linenumber -> block hash so that we can # accurately print out what was in the pofile because polib will # reassembled what it parsed, but it's not the same. if _is_file(fn_or_string): enc = detect_encoding(fn_or_string, "pofile") fp = io.open(fn_or_string, "rt", encoding=enc) else: fp = fn_or_string.splitlines(True) fp = list(fp) entries = list(parsed_pofile) for i, poentry in enumerate(entries): # Grab the lines that make up the poentry. # Note: linenum is 1-based, so we convert it to 0-based. try: lines = fp[poentry.linenum - 1:entries[i + 1].linenum - 1] except IndexError: lines = fp[poentry.linenum - 1:] # Nix blank lines at the end. while lines and not lines[-1].strip(): lines.pop() # Join them and voila! poentry.original = "".join(lines) return parsed_pofile
def test_detect_encoding5(self): """ Test with utf8 .mo file. """ self.assertEqual(polib.detect_encoding("tests/test_utf8.mo"), "UTF-8")
def test_detect_encoding5(self): """ Test with utf8 .mo file. """ self.assertEqual(polib.detect_encoding('tests/test_utf8.mo', True), 'UTF-8')
def test_detect_encoding2(self): """ Test with a .pot file. """ self.assertEqual(polib.detect_encoding("tests/test_merge.pot"), "utf-8")
def test_detect_encoding1(self): """ Test that given encoding is returned when file has no encoding defined. """ self.assertEqual(polib.detect_encoding("tests/test_noencoding.po"), "utf-8")
def test_detect_encoding1(self): """ Test that given enconding is returned when file has no encoding defined. """ self.assertEqual(polib.detect_encoding('tests/test_noencoding.po'), 'utf-8')
def test_detect_encoding7(self): """ Test with iso-8859-15 .mo file. """ self.assertEqual(polib.detect_encoding('tests/test_iso-8859-15.mo', True), 'ISO_8859-15')
def test_detect_encoding7(self): """ Test with iso-8859-15 .mo file. """ self.assertEqual(polib.detect_encoding("tests/test_iso-8859-15.mo"), "ISO_8859-15")
def test_detect_encoding3(self): """ Test with an utf8 .po file. """ self.assertEqual(polib.detect_encoding('tests/test_utf8.po'), 'UTF-8')
def mergepo_minimaldiff(target, source, options): changed = 0 po2 = polib.pofile(source) target_enc = polib.detect_encoding(target) # for utf8 files we can use our self written parser to minimize diffs, # otherwise we need to use polib if not target_enc in ['UTF-8', 'utf-8', 'utf_8']: raise # open file with universal newlines, since it can happen that we are # on unix, but the file has been written on windows or vice versa. po1 = io.open(target, 'r', encoding='utf_8', newline=None) oldlines = read(po1) po1.close() newlines = [] in_msgid = False in_msgstr = False flags = [] msgstr_lines = [] msgid_lines = [] msgid = '' for line in oldlines: if in_msgid: if line.find('"') == 0 or line.find('#~ "') == 0: msgid_lines.append(line) else: in_msgid = False msgid = parse_msg(msgid_lines) elif in_msgstr: if line.find('"') == 0 or line.find('#~ "') == 0: msgstr_lines.append(line) else: in_msgstr = False changed = changed + translate(msgid, flags, msgstr_lines, po2, options) if len(flags) > 0: flagline = u'#, ' + u', '.join(flags) newlines.append(flagline) flags = [] newlines.extend(msgid_lines) newlines.extend(msgstr_lines) msgid_lines = [] msgstr_lines = [] msgid = '' if not in_msgid and not in_msgstr: if line.find('#,') == 0 and len(flags) == 0: flags = line[2:].strip().split(u', ') elif line.find('msgid') == 0 or line.find('#~ msgid') == 0: msgid_lines.append(line) in_msgid = True elif line.find('msgstr') == 0 or line.find('#~ msgstr') == 0: if line.find('msgstr[') == 0 or line.find('#~ msgstr[') == 0: # plural forms are not implemented raise msgstr_lines.append(line) in_msgstr = True else: newlines.append(line) if msgid != '': # the file ended with a msgstr changed = changed + translate(msgid, flags, msgstr_lines, po2, options) if len(flags) > 0: flagline = u'#, ' + u', '.join(flags) newlines.append(flagline) flags = [] newlines.extend(msgid_lines) newlines.extend(msgstr_lines) msgid_lines = [] msgstr_lines = [] msgid = '' if changed > 0: # we store .po files with unix line ends in git, # so do always write them even on windows po1 = io.open(target, 'w', encoding='utf_8', newline='\n') for line in newlines: po1.write(line + '\n') return changed
def test_detect_encoding2(self): """ Test with a .pot file. """ self.assertEqual(polib.detect_encoding('tests/test_merge.pot'), 'utf-8')