def getRowCount(fn, headerLines=1): fc = open(fn, "r") filenameOnly, fileExtension = os.path.splitext(fn) # Try to get data at least n times (in case the server is loaded and returning 504 - timeout) tries = 5 for i in range(0,tries): try: if fileExtension == '.csv': readerc = csv.reader((x.replace('\0', '') for x in fc), dialect=csv.excel) elif fileExtension == '.tsv' or fileExtension == '.txt': x = fc.next() if x.startswith('\xff\xfe'): fc = open(fn, "r") sr = codecs.StreamRecoder(fc,codecs.getencoder('utf-8'),codecs.getdecoder('utf-8'),codecs.getreader('utf-16'),codecs.getwriter('utf-16')) readerc = csv.reader(sr, dialect='excel-tab',quoting=csv.QUOTE_NONE) headerLines = 0 else: readerc = csv.reader((x.replace('\0', '') for x in fc), dialect='excel-tab',quoting=csv.QUOTE_NONE) headerLines = 0 count = sum(1 for row in readerc if (len(row[0]) > 0)) except csv.Error, e: print fn, "csv Error:", e, fn if i == tries: count = headerLines continue break
def transcode(backend_stream, backend_encoding, frontend_encoding, errors='strict'): enc = codecs.getencoder(frontend_encoding) dec = codecs.getdecoder(frontend_encoding) rd = codecs.getreader(backend_encoding) wr = codecs.getwriter(backend_encoding) return codecs.StreamRecoder(backend_stream, enc, dec, rd, wr, errors)
def __init__(self, f, encoding): f.seek(0) if six.PY3: self.reader = f if six.PY2: self.reader = codecs.StreamRecoder(f, codecs.getencoder('utf-8'), codecs.getdecoder('utf-8'), codecs.getreader(encoding), codecs.getwriter(encoding))
def __init__( self, host=PUDB_RDB_HOST, port=PUDB_RDB_PORT, port_search_limit=100, out=sys.stdout, term_size=None, reverse=False, ): self.active = True self.out = out self._prev_handles = sys.stdin, sys.stdout self._client, (address, port) = self.get_client(host=host, port=port, search_limit=port_search_limit, reverse=reverse) self.remote_addr = ":".join(str(v) for v in address) self.say(SESSION_STARTED.format(self=self)) # makefile ignores encoding if there's no buffering. raw_sock_file = self._client.makefile("rwb", 0) import codecs if sys.version_info[0] < 3: sock_file = codecs.StreamRecoder( raw_sock_file, codecs.getencoder("utf-8"), codecs.getdecoder("utf-8"), codecs.getreader("utf-8"), codecs.getwriter("utf-8"), ) else: sock_file = codecs.StreamReaderWriter(raw_sock_file, codecs.getreader("utf-8"), codecs.getwriter("utf-8")) self._handle = sys.stdin = sys.stdout = sock_file # nc negotiation doesn't support telnet options if not reverse: import telnetlib as tn raw_sock_file.write(tn.IAC + tn.WILL + tn.SGA) resp = raw_sock_file.read(3) assert resp == tn.IAC + tn.DO + tn.SGA raw_sock_file.write(tn.IAC + tn.WILL + tn.ECHO) resp = raw_sock_file.read(3) assert resp == tn.IAC + tn.DO + tn.ECHO Debugger.__init__(self, stdin=self._handle, stdout=self._handle, term_size=term_size)
def __init__(self, host=PUDB_RDB_HOST, port=PUDB_RDB_PORT, port_search_limit=100, out=sys.stdout, term_size=None): self.active = True self.out = out self._prev_handles = sys.stdin, sys.stdout self._sock, this_port = self.get_avail_port(host, port, port_search_limit) self._sock.setblocking(1) self._sock.listen(1) self.ident = '{0}:{1}'.format(self.me, this_port) self.host = host self.port = this_port self.say(BANNER.format(self=self)) self._client, address = self._sock.accept() self._client.setblocking(1) self.remote_addr = ':'.join(str(v) for v in address) self.say(SESSION_STARTED.format(self=self)) # makefile ignores encoding if there's no buffering. raw_sock_file = self._client.makefile("rwb", 0) import codecs if sys.version_info[0] < 3: sock_file = codecs.StreamRecoder(raw_sock_file, codecs.getencoder("utf-8"), codecs.getdecoder("utf-8"), codecs.getreader("utf-8"), codecs.getwriter("utf-8")) else: sock_file = codecs.StreamReaderWriter(raw_sock_file, codecs.getreader("utf-8"), codecs.getwriter("utf-8")) self._handle = sys.stdin = sys.stdout = sock_file import telnetlib as tn raw_sock_file.write(tn.IAC + tn.WILL + tn.SGA) resp = raw_sock_file.read(3) assert resp == tn.IAC + tn.DO + tn.SGA raw_sock_file.write(tn.IAC + tn.WILL + tn.ECHO) resp = raw_sock_file.read(3) assert resp == tn.IAC + tn.DO + tn.ECHO Debugger.__init__(self, stdin=self._handle, stdout=self._handle, term_size=term_size)
def readCoreStats(coreFile, delimiter, quoteChar, headerLines, idCol, basisCol, occCol): fc = open(coreFile, "r") x = fc.next() fc = open(coreFile, "r") if "\\t" == delimiter: if 0 == len(quoteChar): if x.startswith('\xff\xfe'): sr = codecs.StreamRecoder(fc,codecs.getencoder('utf-8'),codecs.getdecoder('utf-8'),codecs.getreader('utf-16'),codecs.getwriter('utf-16')) readerc = csv.reader(sr, dialect='excel-tab',quoting=csv.QUOTE_NONE) else: readerc = csv.reader((x.replace('\0', '') for x in fc), dialect='excel-tab', quoting=csv.QUOTE_NONE) else: readerc = csv.reader((x.replace('\0', '') for x in fc), dialect='excel-tab', quotechar=quoteChar) else: if 0 == len(quoteChar): readerc = csv.reader((x.replace('\0', '') for x in fc), delimiter=delimiter, quoting=csv.QUOTE_NONE) else: readerc = csv.reader((x.replace('\0', '') for x in fc), delimiter=delimiter, quotechar=quoteChar) for i in range(0, headerLines): readerc.next() row = readerc.next() count = 1 uniqIds = {} basisOfRecord = '' if type(basisCol) == str: basisOfRecord = basisCol elif -1 < basisCol: basisOfRecord = row[basisCol] sampleGuid = "NOID" selectedIdCol = -1 if -1 < idCol and 0 < len(row[idCol]): sampleGuid = row[idCol] uniqIds[row[idCol]] = "" selectedIdCol = idCol elif -1 < occCol and 0 < len(row[occCol]): sampleGuid = row[occCol] uniqIds[row[occCol]] = "" selectedIdCol = occCol for row in readerc: if -1 < selectedIdCol: if row[selectedIdCol] not in uniqIds: uniqIds[row[selectedIdCol]] = "" count = count + 1 fc.close() return count, len(uniqIds), basisOfRecord, sampleGuid
def test_bad_stream_exception(all_parsers, csv_dir_path): # see gh-13652 # # This test validates that both the Python engine and C engine will # raise UnicodeDecodeError instead of C engine raising ParserError # and swallowing the exception that caused read to fail. path = os.path.join(csv_dir_path, "sauron.SHIFT_JIS.csv") codec = codecs.lookup("utf-8") utf8 = codecs.lookup("utf-8") parser = all_parsers msg = "'utf-8' codec can't decode byte" # Stream must be binary UTF8. with open(path, "rb") as handle, codecs.StreamRecoder( handle, utf8.encode, utf8.decode, codec.streamreader, codec.streamwriter) as stream: with pytest.raises(UnicodeDecodeError, match=msg): parser.read_csv(stream)
def main(args): src_file_path = args.src dest_file_path = args.out src_codec = utf_codec dest_codec = sjis_codec if args.tou: print("converting to UTF8") src_codec = sjis_codec dest_codec = utf_codec else: print("converting to SJIS") # ファイルオブジェクトを開く with open(src_file_path, "rb") as src, open(dest_file_path, "wb") as dest: # 変換ストリームを作成 stream = codecs.StreamRecoder( src, # src dest_codec.encode, # dest codec src_codec.decode, # src codec src_codec.streamreader, # src streamer dest_codec.streamwriter, # dest streamer ) reader = io.BufferedReader(stream) # writer = io.BufferedWriter(stream) # 書き込み while True: data = reader.read1() if not data: break u = data.decode('utf-8') # s = u.encode('cp932', errors='ignore') # dest.write(s) # dest.flush() dest_codec.streamwriter.write(data)
import sys import codecs import os.path try: fansi_name = sys.argv[1] except: print("No input file name provided") sys.exit() if(not os.path.exists(fansi_name) or not os.path.isfile(fansi_name)): print("File doesn't exist") sys.exit() fansi = open(fansi_name, mode="rb") ofpath, ext = os.path.splitext(fansi_name) fansiout = open(ofpath + "-utf" + ext, mode="wb") futf = codecs.StreamRecoder(fansi, codecs.getencoder('utf-8'), codecs.getdecoder('utf-8'), codecs.getreader('cp1256'), codecs.getwriter('cp1256') ) fbytes = futf.read() fansiout.write(fbytes) fansi.close() fansiout.close()