def test_utf8_file(self): filename = splitext(__file__)[0] + ".py" # decode by letting it read from file directly u1 = decode_from_file(filename) # second way by reading file as binary first, and then decoding the # binary string b1 = read_binary_file(filename) u2_enc_lossy = decode_heuristically(b1) # assert that the two unicode strings are equal self.assertEqual(u1, u2_enc_lossy[0]) # assert that enc="utf_8" and that decoding was not "lossy" self.assertEqual(u2_enc_lossy[2], False) self.assertEqual(u2_enc_lossy[1], "utf_8")
def test_cp1252_file(self): # write file under py3.0 only #bs = b"some cp1252 \n \x80\x81\x82\x83\x84 \nchars" #write_binary_file(absname("dh_cp1252.txt"), bs) # u1 = decode_from_file(absname("dh_cp1252.txt")) u2_enc_lossy = decode_heuristically( read_binary_file(absname("dh_cp1252.txt"))) self.assertEqual(u1, u2_enc_lossy[0]) # The following i svery likely to give different results on differnet # platforms: #pr("test_cp1252_file", u2_enc_lossy[1:]) #self.failUnless(u2_enc_lossy[1] in ["cp1252", "mac-roman"] ) self.assertEqual(u2_enc_lossy[2], False)
def redecode(self): """ () -> ts_raw:str Re-decode raw template string from file """ #+ if "#..." then just use caller's ts_raw return decode_from_file(self.get_abspath(), enc=self.input_encoding)