Пример #1
0
def getRowCount(fn, headerLines=1):
    fc = open(fn, "r")
    filenameOnly, fileExtension = os.path.splitext(fn)
    # Try to get data at least n times (in case the server is loaded and returning 504 - timeout)
    tries = 5
    for i in range(0,tries):
        try:
            if fileExtension == '.csv':
                readerc = csv.reader((x.replace('\0', '') for x in fc), dialect=csv.excel)
            elif fileExtension == '.tsv' or fileExtension == '.txt':
                x = fc.next()
                if x.startswith('\xff\xfe'):
                    fc = open(fn, "r")
                    sr = codecs.StreamRecoder(fc,codecs.getencoder('utf-8'),codecs.getdecoder('utf-8'),codecs.getreader('utf-16'),codecs.getwriter('utf-16'))
                    readerc = csv.reader(sr, dialect='excel-tab',quoting=csv.QUOTE_NONE)
                    headerLines = 0
                else:
                    readerc = csv.reader((x.replace('\0', '') for x in fc), dialect='excel-tab',quoting=csv.QUOTE_NONE)
                    headerLines = 0
            count = sum(1 for row in readerc if (len(row[0]) > 0))
        except csv.Error, e:
            print fn, "csv Error:", e, fn
            if i == tries:
                count = headerLines
            continue
        break
Пример #2
0
def transcode(backend_stream, backend_encoding, frontend_encoding,
              errors='strict'):
    enc = codecs.getencoder(frontend_encoding)
    dec = codecs.getdecoder(frontend_encoding)
    rd = codecs.getreader(backend_encoding)
    wr = codecs.getwriter(backend_encoding)
    return codecs.StreamRecoder(backend_stream, enc, dec, rd, wr, errors)
Пример #3
0
 def __init__(self, f, encoding):
     f.seek(0)
     if six.PY3:
         self.reader = f
     if six.PY2:
         self.reader = codecs.StreamRecoder(f, codecs.getencoder('utf-8'),
                                            codecs.getdecoder('utf-8'),
                                            codecs.getreader(encoding),
                                            codecs.getwriter(encoding))
Пример #4
0
    def __init__(
        self,
        host=PUDB_RDB_HOST,
        port=PUDB_RDB_PORT,
        port_search_limit=100,
        out=sys.stdout,
        term_size=None,
        reverse=False,
    ):
        self.active = True
        self.out = out

        self._prev_handles = sys.stdin, sys.stdout
        self._client, (address,
                       port) = self.get_client(host=host,
                                               port=port,
                                               search_limit=port_search_limit,
                                               reverse=reverse)
        self.remote_addr = ":".join(str(v) for v in address)

        self.say(SESSION_STARTED.format(self=self))

        # makefile ignores encoding if there's no buffering.
        raw_sock_file = self._client.makefile("rwb", 0)
        import codecs

        if sys.version_info[0] < 3:
            sock_file = codecs.StreamRecoder(
                raw_sock_file,
                codecs.getencoder("utf-8"),
                codecs.getdecoder("utf-8"),
                codecs.getreader("utf-8"),
                codecs.getwriter("utf-8"),
            )
        else:
            sock_file = codecs.StreamReaderWriter(raw_sock_file,
                                                  codecs.getreader("utf-8"),
                                                  codecs.getwriter("utf-8"))

        self._handle = sys.stdin = sys.stdout = sock_file

        # nc negotiation doesn't support telnet options
        if not reverse:
            import telnetlib as tn

            raw_sock_file.write(tn.IAC + tn.WILL + tn.SGA)
            resp = raw_sock_file.read(3)
            assert resp == tn.IAC + tn.DO + tn.SGA

            raw_sock_file.write(tn.IAC + tn.WILL + tn.ECHO)
            resp = raw_sock_file.read(3)
            assert resp == tn.IAC + tn.DO + tn.ECHO

        Debugger.__init__(self,
                          stdin=self._handle,
                          stdout=self._handle,
                          term_size=term_size)
Пример #5
0
    def __init__(self,
                 host=PUDB_RDB_HOST,
                 port=PUDB_RDB_PORT,
                 port_search_limit=100,
                 out=sys.stdout,
                 term_size=None):
        self.active = True
        self.out = out

        self._prev_handles = sys.stdin, sys.stdout

        self._sock, this_port = self.get_avail_port(host, port,
                                                    port_search_limit)
        self._sock.setblocking(1)
        self._sock.listen(1)
        self.ident = '{0}:{1}'.format(self.me, this_port)
        self.host = host
        self.port = this_port
        self.say(BANNER.format(self=self))

        self._client, address = self._sock.accept()
        self._client.setblocking(1)
        self.remote_addr = ':'.join(str(v) for v in address)
        self.say(SESSION_STARTED.format(self=self))

        # makefile ignores encoding if there's no buffering.
        raw_sock_file = self._client.makefile("rwb", 0)
        import codecs

        if sys.version_info[0] < 3:
            sock_file = codecs.StreamRecoder(raw_sock_file,
                                             codecs.getencoder("utf-8"),
                                             codecs.getdecoder("utf-8"),
                                             codecs.getreader("utf-8"),
                                             codecs.getwriter("utf-8"))
        else:
            sock_file = codecs.StreamReaderWriter(raw_sock_file,
                                                  codecs.getreader("utf-8"),
                                                  codecs.getwriter("utf-8"))

        self._handle = sys.stdin = sys.stdout = sock_file

        import telnetlib as tn

        raw_sock_file.write(tn.IAC + tn.WILL + tn.SGA)
        resp = raw_sock_file.read(3)
        assert resp == tn.IAC + tn.DO + tn.SGA

        raw_sock_file.write(tn.IAC + tn.WILL + tn.ECHO)
        resp = raw_sock_file.read(3)
        assert resp == tn.IAC + tn.DO + tn.ECHO

        Debugger.__init__(self,
                          stdin=self._handle,
                          stdout=self._handle,
                          term_size=term_size)
Пример #6
0
def readCoreStats(coreFile, delimiter, quoteChar, headerLines, idCol, basisCol, occCol):
    fc = open(coreFile, "r")
    x = fc.next()
    fc = open(coreFile, "r")
    if "\\t" == delimiter:
        if 0 == len(quoteChar):
            if x.startswith('\xff\xfe'):
                sr = codecs.StreamRecoder(fc,codecs.getencoder('utf-8'),codecs.getdecoder('utf-8'),codecs.getreader('utf-16'),codecs.getwriter('utf-16'))
                readerc = csv.reader(sr, dialect='excel-tab',quoting=csv.QUOTE_NONE)
            else:
                readerc = csv.reader((x.replace('\0', '') for x in fc), dialect='excel-tab', quoting=csv.QUOTE_NONE)
        else:
            readerc = csv.reader((x.replace('\0', '') for x in fc), dialect='excel-tab', quotechar=quoteChar)
    else:
        if 0 == len(quoteChar):
            readerc = csv.reader((x.replace('\0', '') for x in fc), delimiter=delimiter, quoting=csv.QUOTE_NONE)
        else:
            readerc = csv.reader((x.replace('\0', '') for x in fc), delimiter=delimiter, quotechar=quoteChar)
    for i in range(0, headerLines):
        readerc.next()
    row = readerc.next()
    count = 1
    uniqIds = {}
    basisOfRecord = ''
    if type(basisCol) == str:
        basisOfRecord = basisCol
    elif -1 < basisCol:
        basisOfRecord = row[basisCol]
    sampleGuid = "NOID"
    selectedIdCol = -1
    if -1 < idCol and 0 < len(row[idCol]):
        sampleGuid = row[idCol]
        uniqIds[row[idCol]] = ""
        selectedIdCol = idCol
    elif -1 < occCol and 0 < len(row[occCol]):
        sampleGuid = row[occCol]
        uniqIds[row[occCol]] = ""
        selectedIdCol = occCol
    for row in readerc:
        if -1 < selectedIdCol:
            if row[selectedIdCol] not in uniqIds:
                uniqIds[row[selectedIdCol]] = ""
        count = count + 1
    fc.close()
    return count, len(uniqIds), basisOfRecord, sampleGuid
Пример #7
0
def test_bad_stream_exception(all_parsers, csv_dir_path):
    # see gh-13652
    #
    # This test validates that both the Python engine and C engine will
    # raise UnicodeDecodeError instead of C engine raising ParserError
    # and swallowing the exception that caused read to fail.
    path = os.path.join(csv_dir_path, "sauron.SHIFT_JIS.csv")
    codec = codecs.lookup("utf-8")
    utf8 = codecs.lookup("utf-8")
    parser = all_parsers
    msg = "'utf-8' codec can't decode byte"

    # Stream must be binary UTF8.
    with open(path, "rb") as handle, codecs.StreamRecoder(
            handle, utf8.encode, utf8.decode, codec.streamreader,
            codec.streamwriter) as stream:

        with pytest.raises(UnicodeDecodeError, match=msg):
            parser.read_csv(stream)
Пример #8
0
def main(args):

    src_file_path = args.src
    dest_file_path = args.out

    src_codec = utf_codec
    dest_codec = sjis_codec
    if args.tou:
        print("converting to UTF8")
        src_codec = sjis_codec
        dest_codec = utf_codec
    else:
        print("converting to SJIS")

    # ファイルオブジェクトを開く
    with open(src_file_path, "rb") as src, open(dest_file_path, "wb") as dest:

        # 変換ストリームを作成
        stream = codecs.StreamRecoder(
            src,  # src
            dest_codec.encode,  # dest codec
            src_codec.decode,  # src codec
            src_codec.streamreader,  # src streamer
            dest_codec.streamwriter,  # dest streamer
        )
        reader = io.BufferedReader(stream)
        # writer = io.BufferedWriter(stream)

        # 書き込み
        while True:
            data = reader.read1()
            if not data:
                break
            u = data.decode('utf-8')
            # s = u.encode('cp932', errors='ignore')
            # dest.write(s)
            # dest.flush()
            dest_codec.streamwriter.write(data)
Пример #9
0
import sys
import codecs
import os.path

try:
	fansi_name = sys.argv[1]
except:
	print("No input file name provided")
	sys.exit()
	
if(not os.path.exists(fansi_name) or not os.path.isfile(fansi_name)):
	print("File doesn't exist")
	sys.exit()
	

fansi = open(fansi_name, mode="rb")
ofpath, ext = os.path.splitext(fansi_name)
fansiout = open(ofpath + "-utf" + ext, mode="wb")


futf = codecs.StreamRecoder(fansi,
    codecs.getencoder('utf-8'), codecs.getdecoder('utf-8'),
    codecs.getreader('cp1256'), codecs.getwriter('cp1256') )


fbytes = futf.read()
fansiout.write(fbytes)
fansi.close()
fansiout.close()