Exemplo n.º 1
0
    def test_errors_property(self):
        import _io

        with _io.open(self.tmpfile, "w") as f:
            assert f.errors == "strict"
        with _io.open(self.tmpfile, "w", errors="replace") as f:
            assert f.errors == "replace"
Exemplo n.º 2
0
def test_attributes(tempfile):
    import warnings
    with _io.open(tempfile, "wb", buffering=0) as f:
        assert f.mode == "wb"

    with warnings.catch_warnings(record=True) as l:
        warnings.simplefilter("always")
        with _io.open(tempfile, "U") as f:
            assert f.name == tempfile
            assert f.buffer.name == tempfile
            assert f.buffer.raw.name == tempfile
            assert f.mode == "U"
            assert f.buffer.mode == "rb"
            assert f.buffer.raw.mode == "rb"
    assert isinstance(l[0].message, DeprecationWarning)

    with _io.open(tempfile, "w+") as f:
        assert f.mode == "w+"
        assert f.buffer.mode == "rb+"
        assert f.buffer.raw.mode == "rb+"

        with _io.open(f.fileno(), "wb", closefd=False) as g:
            assert g.mode == "wb"
            assert g.raw.mode == "wb"
            assert g.name == f.fileno()
            assert g.raw.name == f.fileno()
Exemplo n.º 3
0
    def test_errors_property(self):
        import _io

        with _io.open(self.tmpfile, "w") as f:
            assert f.errors == "strict"
        with _io.open(self.tmpfile, "w", errors="replace") as f:
            assert f.errors == "replace"
Exemplo n.º 4
0
def open_as_bytes_stream(filename):
    ''' If filesize < TWO_GB, read whole file as BytesIO object '''
    filesize = os.path.getsize(filename)
    if filesize < TWO_GB:
        with open(filename, 'rb') as f:
            return BytesIO(f.read(filesize))
    else:
        return open(filename, 'rb', buffering=bestIOBufferSize)
Exemplo n.º 5
0
def test_open(tempfile):
    f = _io.open(tempfile, "rb")
    assert f.name.endswith('tempfile')
    assert f.mode == 'rb'
    f.close()

    with _io.open(tempfile, "rt") as f:
        assert f.mode == "rt"
Exemplo n.º 6
0
def loadjson(name, objHook=None) -> dict:
    """ Load json from file and return dict """
    try:
        with _io.open(name, encoding='utf-8', errors='replace') as f:
            return json.loads(f.read(), encoding='utf-8', object_hook=objHook)
    except Exception as e:
        if 'BOM' in str(e):
            with _io.open(name, encoding='utf-8-sig', errors='replace') as f:
                return json.loads(f.read(), encoding='utf-8-sig', object_hook=objHook)
Exemplo n.º 7
0
def loadjson(name, objHook=None) -> dict:
    ''' Load json from file return dict '''
    try:
        with _io.open(name, encoding='utf-8', errors='replace') as f:
            return json.loads(f.read(), encoding='utf-8', object_hook=objHook)
    except Exception as e:
        if 'BOM' in str(e):
            with _io.open(name, encoding='utf-8-sig', errors='replace') as f:
                return json.loads(f.read(), encoding='utf-8-sig', object_hook=objHook)
Exemplo n.º 8
0
    def read(self, fn, mode='rb', encoding='utf-8-sig', errors='replace'):
        bufsize = bestIOBufferSize
        parser_ = self.parse_line

        if 'b' in mode:
            for line in open(fn, mode, buffering=bufsize):
                parser_(line.rstrip())
        else:
            for line in open(fn, mode, buffering=bufsize, encoding=encoding, errors=errors):
                parser_(line.rstrip())
Exemplo n.º 9
0
def test_array_write(tempfile):
    a = array.array('i', range(10))
    n = len(a.tostring())
    with _io.open(tempfile, "wb", 0) as f:
        res = f.write(a)
        assert res == n

    with _io.open(tempfile, "wb") as f:
        res = f.write(a)
        assert res == n
Exemplo n.º 10
0
def test_truncate(tempfile):
    with _io.open(tempfile, "w+") as f:
        f.write("abc")

    with _io.open(tempfile, "w+") as f:
        f.truncate()

    with _io.open(tempfile, "r+") as f:
        res = f.read()
        assert res == ""
Exemplo n.º 11
0
    def createFile(self, path, name):
        """

        Metoda kreira fajl sa zadatim imenom u zadatom direktorijumu

        Args:
            path: putanje do direktorijuma u kom se kreira fajl
            name: ime fajla koji ce biti kreiran
        """
        open(os.path.join(path, name), mode='w').close()
Exemplo n.º 12
0
    def test_array_write(self):
        import _io, array
        a = array.array(b'i', range(10))
        n = len(a.tostring())
        with _io.open(self.tmpfile, "wb", 0) as f:
            res = f.write(a)
            assert res == n

        with _io.open(self.tmpfile, "wb") as f:
            res = f.write(a)
            assert res == n
Exemplo n.º 13
0
def test_opener(tempfile):
    import os
    with _io.open(tempfile, "w") as f:
        f.write("egg\n")
    fd = os.open(tempfile, os.O_RDONLY)

    def opener(path, flags):
        return fd

    with _io.open("non-existent", "r", opener=opener) as f:
        assert f.read() == "egg\n"
Exemplo n.º 14
0
    def test_array_write(self):
        import _io, array
        a = array.array('i', range(10))
        n = len(a.tostring())
        with _io.open(self.tmpfile, "wb", 0) as f:
            res = f.write(a)
            assert res == n

        with _io.open(self.tmpfile, "wb") as f:
            res = f.write(a)
            assert res == n
Exemplo n.º 15
0
    def test_truncate(self):
        import _io

        with _io.open(self.tmpfile, "w+") as f:
            f.write("abc")

        with _io.open(self.tmpfile, "w+") as f:
            f.truncate()

        with _io.open(self.tmpfile, "r+") as f:
            res = f.read()
            assert res == ""
Exemplo n.º 16
0
    def test_truncate(self):
        import _io

        with _io.open(self.tmpfile, "w+") as f:
            f.write(u"abc")

        with _io.open(self.tmpfile, "w+") as f:
            f.truncate()

        with _io.open(self.tmpfile, "r+") as f:
            res = f.read()
            assert res == ""
Exemplo n.º 17
0
 def test_seek_append_bom(self):
     # Same test, but first seek to the start and then to the end
     import _io, os
     filename = self.tmpfile + '_x3'
     for charset in ('utf-8-sig', 'utf-16', 'utf-32'):
         with _io.open(filename, 'w', encoding=charset) as f:
             f.write('aaa')
         with _io.open(filename, 'a', encoding=charset) as f:
             f.seek(0)
             f.seek(0, os.SEEK_END)
             f.write('xxx')
         with _io.open(filename, 'rb') as f:
             assert f.read() == 'aaaxxx'.encode(charset)
Exemplo n.º 18
0
def gen_cfg(condition):
    token = condition.split('-')

    f_condition = open('condition.yaml')
    data = ordered_yaml_load('condition.yaml')
    data_enc = data[condition]['encoder']
    data_dec = data[condition]['decoder']
    f_condition.close()

    f_ratepoint = open('ratepoint.yaml')
    rps = yaml.load(f_ratepoint, Loader=yaml.Loader)
    f_ratepoint.close()

    f_sequence = open('sequences.yaml')
    seqs = yaml.load(f_sequence, Loader=yaml.Loader)
    f_sequence.close()

    for seq_name in seqs:
        seq = seqs[seq_name]
        rps_geom = rps[token[1]][seq['category']]
        rps_attr = rps[token[2]]

        for r in rps_attr:
            dir = '{}/{}/{}'.format(condition, seq_name, r)
            mkdir_p(dir)

            # encoder cfg
            data_r = data_enc
            if len(rps_geom) == 1:
                rate_geom = rps_geom['']
            else:
                rate_geom = rps_geom[r]
            rate_attr = rps_attr[r]

            for key in rate_geom:
                if key in data_r:
                    data_r[key] = rate_geom[key]
            for key in rate_attr:
                if key in data_r:
                    data_r[key] = rate_attr[key]
            for key in seq:
                if key in data_r:
                    data_r[key] = seq[key]

            with open('{}/encoder.cfg'.format(dir), 'w') as f:
                ordered_yaml_dump(data_r, f, default_flow_style=False)

            # decoder cfg
            data_r = data_dec
            with open('{}/decoder.cfg'.format(dir), 'w') as f:
                ordered_yaml_dump(data_r, f, default_flow_style=False)
Exemplo n.º 19
0
def file2matrix(filename):
    fr = open(filename)
    numberOfLines = len(fr.readlines())  #get the number of lines in the file
    returnMat = zeros((numberOfLines, 3))  #prepare matrix to return
    classLabelVector = []  #prepare labels return
    fr = open(filename)
    index = 0
    for line in fr.readlines():
        line = line.strip()
        listFromLine = line.split('\t')
        returnMat[index, :] = listFromLine[0:3]
        classLabelVector.append(int(listFromLine[-1]))
        index += 1
    return returnMat, classLabelVector
Exemplo n.º 20
0
def file2matrix(filename):
    fr = open(filename)
    numberOfLines = len(fr.readlines())         #get the number of lines in the file
    returnMat = zeros((numberOfLines,3))        #prepare matrix to return
    classLabelVector = []                       #prepare labels return   
    fr = open(filename)
    index = 0
    for line in fr.readlines():
        line = line.strip()
        listFromLine = line.split('\t')
        returnMat[index,:] = listFromLine[0:3]
        classLabelVector.append(int(listFromLine[-1]))
        index += 1
    return returnMat,classLabelVector
Exemplo n.º 21
0
def test_newlines_attr(tempfile):
    with _io.open(tempfile, "r") as f:
        assert f.newlines is None

    with _io.open(tempfile, "wb") as f:
        f.write(b"hello\nworld\n")

    with _io.open(tempfile, "r") as f:
        res = f.readline()
        assert res == "hello\n"
        res = f.readline()
        assert res == "world\n"
        assert f.newlines == "\n"
        assert type(f.newlines) is str
Exemplo n.º 22
0
def test_append_bom(tempfile):
    # The BOM is not written again when appending to a non-empty file
    for charset in ["utf-8-sig", "utf-16", "utf-32"]:
        with _io.open(tempfile, "w", encoding=charset) as f:
            f.write("aaa")
            pos = f.tell()
        with _io.open(tempfile, "rb") as f:
            res = f.read()
            assert res == "aaa".encode(charset)
        with _io.open(tempfile, "a", encoding=charset) as f:
            f.write("xxx")
        with _io.open(tempfile, "rb") as f:
            res = f.read()
            assert res == "aaaxxx".encode(charset)
Exemplo n.º 23
0
 def test_seek_bom(self):
     # The BOM is not written again when seeking manually
     import _io
     filename = self.tmpfile + '_x3'
     for charset in ('utf-8-sig', 'utf-16', 'utf-32'):
         with _io.open(filename, 'w', encoding=charset) as f:
             f.write('aaa')
             pos = f.tell()
         with _io.open(filename, 'r+', encoding=charset) as f:
             f.seek(pos)
             f.write('zzz')
             f.seek(0)
             f.write('bbb')
         with _io.open(filename, 'rb') as f:
             assert f.read() == 'bbbzzz'.encode(charset)
Exemplo n.º 24
0
def create_load_data_directory (triplesFileName, rootDir, outputDir):
    with open(triplesFileName, 'r') as tf:
        for line in tf:
            opHolder,opinion,topic = line.split(',')
            opHolder = sub(' ', '_', opHolder.strip())
            opinion = opinion.strip()
            topic = sub(' ', '_', topic.strip())
            
            try:
                ohfname = rootDir+'/opinion_holders/'+opHolder
                tfname = rootDir+'/topics/'+topic
                outfname = outputDir+'/'+opinion+'/'+opHolder+'__'+topic
                with codecs.open(ohfname, 'r',errors='ignore') as ohFile, codecs.open(tfname,'r', errors='ignore') as tFile:
                    content = ''
                    for line1 in ohFile:
                        content = content+line1
                    for line2 in tFile:
                        content = content+line2
                    with codecs.open(outfname, 'w', errors='ignore') as outFile:
                        print ('Writing...'+ opHolder+ ', '+ topic)
                        outFile.write(content)
                        outFile.close()
            except Exception:
                print ("Something wrong with either "+ opHolder+ ' or '+ 
                       topic + '...skipping...')
    pass
Exemplo n.º 25
0
    def __init__(self, fileName=None):
        '''
        Constructor
        '''
        self.vertex = {}
        self.edge = {}
        self.setOfEdges = set()

        if fileName != None:
            file = open(fileName, 'r')

            # iterates through the lines of the file
            for line in file:
                splittedLine = line.split(None)
                if len(splittedLine) >= 3:
                    # Vertex
                    if splittedLine[0] == 'V':
                        self.vertex[splittedLine[1]] = [
                            int(splittedLine[2]), True
                        ]
                        self.edge[splittedLine[1]] = {}
                    # Edge
                    elif splittedLine[0] == 'E':
                        self.edge[splittedLine[1]][splittedLine[2]] = [
                            int(splittedLine[3]),
                            int(splittedLine[3])
                        ]
                        self.edge[splittedLine[2]][splittedLine[1]] = [
                            int(splittedLine[3]),
                            int(splittedLine[3])
                        ]

                        self.setOfEdges.add((splittedLine[1], splittedLine[2]))
Exemplo n.º 26
0
def dumpjson(obj:dict, name=None) -> str:
    ''' Dump json(dict) to file '''
    str = json.dumps(obj, indent=4, ensure_ascii=False)
    if name:
        with _io.open(name, 'w') as f:
            f.write(str)
    return str
Exemplo n.º 27
0
    def UpdateStopWords(self, stopWordsFileName):
        # Open the stop words file name
        fin = open(stopWordsFileName, 'r', encoding='utf-8')

        # Read the file line by line
        for line in fin:

            # The word is the first entry in the line
            rawWord = line.split()[0]

            # Stem the word if enabled
            if (self.enableStemming == "true"):
                word = self.stemmer.stem(rawWord)
            else:
                word = rawWord

            # Insert the word only if it doesn't exist before in the model
            if not word in self.stopWords:
                # Put the word frequency as 1 since it's the first incident
                self.stopWords[word] = 1
            else:
                # Increment the frequency
                self.stopWords[word] += 1

        # Close the file
        fin.close()
Exemplo n.º 28
0
def dumpjson(obj:dict, name=None) -> str:
    """ Dump json(dict) to file """
    jstr = json.dumps(obj, indent=4, ensure_ascii=False)
    if name:
        with _io.open(name, 'w') as f:
            f.write(jstr)
    return jstr
Exemplo n.º 29
0
 def __init__(self, fileName=None):
     '''
     Constructor
     '''
     self.vertex = {};
     self.edge = {};
     self.setOfEdges = set();
     
     if fileName != None:
         file = open(fileName, 'r');
             
         # iterates through the lines of the file
         for line in file:
             splittedLine = line.split(None);
             if len(splittedLine) >= 3:
                 # Vertex
                 if splittedLine[0] == 'V':
                     self.vertex[splittedLine[1]] = [int(splittedLine[2]), True];
                     self.edge[splittedLine[1]] = {};
                 # Edge
                 elif splittedLine[0] == 'E':
                     self.edge[splittedLine[1]][splittedLine[2]] = [int(splittedLine[3]), int(splittedLine[3])];
                     self.edge[splittedLine[2]][splittedLine[1]] = [int(splittedLine[3]), int(splittedLine[3])];
                     
                     self.setOfEdges.add((splittedLine[1], splittedLine[2]));
Exemplo n.º 30
0
    def test_append_bom(self):
        import _io

        # The BOM is not written again when appending to a non-empty file
        for charset in ["utf-8-sig", "utf-16", "utf-32"]:
            with _io.open(self.tmpfile, "w", encoding=charset) as f:
                f.write("aaa")
                pos = f.tell()
            with _io.open(self.tmpfile, "rb") as f:
                res = f.read()
                assert res == "aaa".encode(charset)
            with _io.open(self.tmpfile, "a", encoding=charset) as f:
                f.write("xxx")
            with _io.open(self.tmpfile, "rb") as f:
                res = f.read()
                assert res == "aaaxxx".encode(charset)
Exemplo n.º 31
0
    def test_newlines_attr(self):
        import _io

        with _io.open(self.tmpfile, "r") as f:
            assert f.newlines is None

        with _io.open(self.tmpfile, "wb") as f:
            f.write("hello\nworld\n")

        with _io.open(self.tmpfile, "r") as f:
            res = f.readline()
            assert res == "hello\n"
            res = f.readline()
            assert res == "world\n"
            assert f.newlines == "\n"
            assert type(f.newlines) is unicode
Exemplo n.º 32
0
 def __init__(self, filename):
     self.filename = filename
     with _io.open(filename, 'rb') as wav:
         self.__raw_header = wav.read(72)
         self.__struct_header = struct.unpack('4sL4s4sLHHLLHH4sL', self.__raw_header)
         self.chunk_id, \
             self.chunk_size, \
             self.format, \
             self.sub_chunk_id_1, \
             self.sub_chunk_size_1, \
             self.format, \
             self.num_channel, \
             self.sample_rate_bit, \
             self.byte_rate, \
             self.block_align, \
             self.bit_per_sample, \
             self.sub_chunk_id_2, \
             self.sub_chunk_size_2 = self.__struct_header
         self.is_compressed = False if self.format == 1 else True
         self.byte_per_sample = int(self.bit_per_sample / 8)
         self.data = wav.read()
     self.len = len(self.data)
     self.__struct_mask = {1:'B',2:'H',4:'I',8:'Q'}[self.byte_per_sample]
     self.__struct_mask = self.__struct_mask * self.num_channel
     self.__sample_bytes = self.byte_per_sample * self.num_channel
Exemplo n.º 33
0
def create_stdio(fd, writing, name, encoding, errors, unbuffered):
    import _io
    # stdin is always opened in buffered mode, first because it
    # shouldn't make a difference in common use cases, second because
    # TextIOWrapper depends on the presence of a read1() method which
    # only exists on buffered streams.
    buffering = 0 if unbuffered and writing else -1
    mode = 'w' if writing else 'r'
    try:
        buf = _io.open(fd, mode + 'b', buffering, closefd=False)
    except OSError as e:
        if e.errno != errno.EBADF:
            raise
        return None

    raw = buf.raw if buffering else buf
    raw.name = name
    # We normally use newline='\n' below, which turns off any translation.
    # However, on Windows (independently of -u), then we must enable
    # the Universal Newline mode (set by newline = None): on input, \r\n
    # is translated into \n; on output, \n is translated into \r\n.
    # We must never enable the Universal Newline mode on POSIX: CPython
    # never interprets '\r\n' in stdin as meaning just '\n', unlike what
    # it does if you explicitly open a file in text mode.
    newline = None if sys.platform == 'win32' else '\n'
    stream = _io.TextIOWrapper(buf,
                               encoding,
                               errors,
                               newline=newline,
                               line_buffering=unbuffered or raw.isatty())
    stream.mode = mode
    return stream
Exemplo n.º 34
0
def read_csv(name: str,
             delim=',',
             withhead=False,
             strip=True,
             convert=None,
             encoding='utf-8',
             errors='strict') -> ([], []):
    """ Read csv file, return head and body as list """
    if convert and strip:
        make = lambda s: convert(s.strip(b'" ').decode(encoding, errors))
    elif convert:
        make = lambda s: convert(s.decode(encoding, errors))
    elif strip:
        make = lambda s: s.strip(b'" ').decode(encoding, errors)
    else:
        make = lambda s: s.decode(encoding, errors)
    head, body = [], []
    delim = delim.encode()
    if isinstance(name, str):
        name = _io.open(name, 'rb')
    if withhead:
        line = next(name).rstrip()
        head = [i.decode() for i in line.rstrip(b',').split(delim)]
    for line in name:
        body.append([make(i) for i in line.rstrip(b'"\r\n, ').split(delim)])
    return head, body
 def _loadFile(self):
     try:
         
         f = open(self._fileName, "r")
         s = f.readline()
         
         while len(s)>1:
             tok = s.split(",")
             """
             function to turn string into entity
             """
             if len(tok) == 3:
                 tok[2] = tok[2].split("\n")
                 book = Book(int(tok[0]), tok[1], tok[2][0])
             else:
                 tok[3] = tok[3].split("\n")
                 book = Book(int(tok[0]), tok[1], tok[2], tok[3][0])
             BookRepository.add(self, book)
             s = f.readline()
         
         
     except Exception as e:
         raise RepoError("Error reading input file: " + str(e))
     finally:
         f.close()
Exemplo n.º 36
0
def create_data_file():
    global x_max, x_min, x_step, n_max, n_min, n_step, nb_opr, current_opr, outfilename, iSWorking, label_stat_prc
    out = open(outfilename, "w")
    #out.write("x_value\tn_value\tfast_way_time\tnormal_way_time\n")
    x = x_min
    n = n_min
    while (x <= x_max):
        while (n <= n_max):
            t1 = power_fast_calculted_time(x, n)
            t2 = power(x, n)
            line = str(x) + "\t" + str(n) + "\t" + str(t1) + "\t" + str(
                t2) + "\n"
            out.write(line)
            n = n + n_step
            current_opr = current_opr + 1
            if (iSWorking == False):
                label_stat_prc.config(bg="red")
                messagebox.showerror("Work", "STOP")
                return
        out.write("\n")
        x = x + x_step
        n = n_min

    out.close()
    iSWorking = False
    messagebox.showinfo("Work", "Job Done [%s], thank you" % current_opr)
Exemplo n.º 37
0
    def test_newlines_attr(self):
        import _io

        with _io.open(self.tmpfile, "r") as f:
            assert f.newlines is None

        with _io.open(self.tmpfile, "wb") as f:
            f.write(b"hello\nworld\n")

        with _io.open(self.tmpfile, "r") as f:
            res = f.readline()
            assert res == "hello\n"
            res = f.readline()
            assert res == "world\n"
            assert f.newlines == "\n"
            assert type(f.newlines) is str
Exemplo n.º 38
0
 def test_fileread_line_bin(self):
     f = _io.open('test_io.py', 'rb')
     f.readline()    # skip 1st line
     b = f.readline()
     self.assertEqual(b,
         b'#\xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a\n')
     f.close()
Exemplo n.º 39
0
def test_issue1902(tempfile):
    with _io.open(tempfile, 'w+b', 4096) as f:
        f.write(b'\xff' * 13569)
        f.flush()
        f.seek(0, 0)
        f.read(1)
        f.seek(-1, 1)
        f.write(b'')
Exemplo n.º 40
0
 def SaveModel(self):
     # You must close and open to append to the binary file
     # Open the serialization file
     serializationFile = open(self.languageModelSerializationFileName, 'wb')
     # Save the model
     pickle.dump(self.languageModel, serializationFile)
     # Open the serialization file
     serializationFile.close()
Exemplo n.º 41
0
    def test_chunk_size(self):
        import _io

        with _io.open(self.tmpfile) as f:
            assert f._CHUNK_SIZE >= 1
            f._CHUNK_SIZE = 4096
            assert f._CHUNK_SIZE == 4096
            raises(ValueError, setattr, f, "_CHUNK_SIZE", 0)
Exemplo n.º 42
0
    def test_chunk_size(self):
        import _io

        with _io.open(self.tmpfile) as f:
            assert f._CHUNK_SIZE >= 1
            f._CHUNK_SIZE = 4096
            assert f._CHUNK_SIZE == 4096
            raises(ValueError, setattr, f, "_CHUNK_SIZE", 0)
Exemplo n.º 43
0
def write_module(module, modulename, write_path):
    global _f
    with io.open(write_path + '/' + modulename + '.txt', 'w',
                 encoding="ascii") as f:
        _f = f
        p(handle_module(module, modulename))

    _f = None
Exemplo n.º 44
0
 def findjsonkeyval(self, file_input_path, json_key, data_key_num): 
     int_data_key_num = int(data_key_num)
     json_data= open(file_input_path,'r')
     json_items= ijson.items(json_data,json_key)
     #column=list(json_items)
     list_convert=list(json_items)
     output = list_convert[int_data_key_num]
     return output
Exemplo n.º 45
0
 def readFileContent(self,file_name,read_op="r+"):
     
     
     with open(file_name,read_op) as f:
     
         lines=f.readlines()
        
         return lines
Exemplo n.º 46
0
 def __init__(self):
     self.pageNo=1
     self.houseItems=[]
     self.houseItem=[]
     self.xiaoquItems=[]
     self.keepdoing=True
     self.fp=open('houseInfo.txt','w',encoding="utf-8")
     self.conn=pymysql.connect(host='localhost',user='******',password='',db='app_',charset='utf8mb4',cursorclass=pymysql.cursors.DictCursor)
Exemplo n.º 47
0
    def split_by_lines(self, nline:int):
        ''' Split by every `nline` lines '''
        nfile = self.__howmany(self._nline, nline)
        newnames = self.splited_names(nfile)
        inames = iter(newnames)
        fout = open(next(inames), 'wb')
        n = 1

        with open_as_bytes_stream(self._fname) as stream:
            for line in stream:
                if not (n % nline):
                    fout.close()
                    fout = open(next(inames), 'wb')
                n += 1
                fout.write(line)
            fout.close()

        return newnames
Exemplo n.º 48
0
 def test_issue1902(self):
     import _io
     with _io.open(self.tmpfile, 'w+b', 4096) as f:
         f.write(b'\xff' * 13569)
         f.flush()
         f.seek(0, 0)
         f.read(1)
         f.seek(-1, 1)
         f.write(b'')
Exemplo n.º 49
0
    def test_issue1902(self):
        import _io

        with _io.open(self.tmpfile, "w+b", 4096) as f:
            f.write(b"\xff" * 13569)
            f.flush()
            f.seek(0, 0)
            f.read(1)
            f.seek(-1, 1)
            f.write(b"")
Exemplo n.º 50
0
    def test_instance(self):
        f = _io.open('test_io.py')
        self.assertTrue(isinstance(f, _io.TextIOWrapper))
        f.close()

        f = _io.open('/tmp/foo', 'bw')
        self.assertTrue(isinstance(f, _io.BufferedWriter))
        f.close()

        f = _io.open('/tmp/foo', 'br')
        self.assertTrue(isinstance(f, _io.BufferedReader))
        f.close()

        f = _io.open('/tmp/foo', 'w')
        self.assertTrue(isinstance(f, _io.TextIOWrapper))
        f.close()

        f = _io.open('/tmp/foo', 'r')
        self.assertTrue(isinstance(f, _io.TextIOWrapper))
        f.close()
Exemplo n.º 51
0
def spamTest():
    docList = []
    classList = []
    fullText = []
    for i in range(1,26):
        wordList = textParse(open('C:\\db\\email\\spam\\'+str(i)+'.txt').read())
        docList.append(wordList)
        fullText.append(wordList)
        classList.append(1)
        wordList = textParse(open('C:\\db\\email\\ham\\'+str(i)+'.txt').read())
        docList.append(wordList)
        fullText.append(wordList)
        classList.append(0)
    vocabList = createVocabList(docList)
    trainingSet = list(range(50))
    testSet = []
    for i in range(10):
        randIndex = int(random.uniform(0,len(trainingSet)))
        testSet.append(trainingSet[randIndex])
        del(trainingSet[randIndex]) 
    trainMat=[]; trainClasses = []
    for docIndex in trainingSet:#train the classifier (get probs) trainNB0
        trainMat.append(bagOfWords2VecMN(vocabList, docList[docIndex]))
        trainClasses.append(classList[docIndex])
    p0V,p1V,pSpam = trainNB0(array(trainMat),array(trainClasses))
    errorCount = 0
    for docIndex in testSet:        #classify the remaining items
        wordVector = setOfWord2Vec(vocabList, docList[docIndex])
        if classifyNB(array(wordVector),p0V,p1V,pSpam) != classList[docIndex]:
            errorCount += 1
            print ("classification error",docList[docIndex])
    print ('the error rate is: ',float(errorCount)/len(testSet))

    errorCount = 0
    for docIndex in testSet:        #classify the remaining items
        wordVector = bagOfWords2VecMN(vocabList, docList[docIndex])
        if classifyNB(array(wordVector),p0V,p1V,pSpam) != classList[docIndex]:
            errorCount += 1
            print ("classification error",docList[docIndex])
    print ('the error rate is: ',float(errorCount)/len(testSet))
Exemplo n.º 52
0
    def test_seek_and_tell(self):
        import _io

        with _io.open(self.tmpfile, "wb") as f:
            f.write(b"abcd")

        with _io.open(self.tmpfile) as f:
            decoded = f.read()

        # seek positions
        for i in range(len(decoded) + 1):
            # read lenghts
            for j in [1, 5, len(decoded) - i]:
                with _io.open(self.tmpfile) as f:
                    res = f.read(i)
                    assert res == decoded[:i]
                    cookie = f.tell()
                    res = f.read(j)
                    assert res == decoded[i:i + j]
                    f.seek(cookie)
                    res = f.read()
                    assert res == decoded[i:]
Exemplo n.º 53
0
 def write(self, overwrite=False):
   """Write file to disk"""
   #dirty, dirty hack
   cwd = os.listdir()[0][0:3] if self.filename[0:3] not in ['0:/','1:/'] else ''
   if cwd+self.filename in os.listdir() and not overwrite: raise IOError('File exists')
   file = _io.open(cwd+self.filename,'wb')
   header = b''
   for t in zip(self.__header_slist, self.__get_header_values()):
     header += struct.pack(t[0], t[1])
   file.write(header)
   file.write(self.data)
   file.close()
   return len(header)+len(self.data)
Exemplo n.º 54
0
    def test_issue1902_3(self):
        import _io
        buffer_size = 4096
        with _io.open(self.tmpfile, 'w+b', buffer_size) as f:
            f.write(b'\xff' * buffer_size * 3)
            f.flush()
            f.seek(0, 0)

            f.read(1)
            f.seek(-1, 1)
            f.write(b'\xff')
            f.seek(1, 0)
            f.read(buffer_size * 2)
            assert f.tell() == 1 + buffer_size * 2
Exemplo n.º 55
0
def write_csv(name:str, delim:str, body:list, head:list, body_format=None, head_format=None) -> int:
    ''' Write data to csv file, note body should be a bivariate table '''
    if not head_format:
        head_format = (delim).join(['%s'] * len(head)) + '\n'
    elif not head_format.endswith('\n'):
        head_format += '\n'

    if not body_format:
        try:
            body_format = delim.join(['%s'] * len(body[0])) + '\n'
        except TypeError:
            try:
                body_format = delim.join(['%s'] * numpy.shape(body)[1]) + '\n'
            except IndexError:
                body_format = delim.join(['%s'] * numpy.shape(body)[0]) + '\n'
    elif not body_format.endswith('\n'):
        body_format += '\n'

    with _io.open(name, 'wb') as fout:
        fout.write(BOM_UTF8)

    with _io.open(name, 'a', encoding='utf-8') as fout:
        nlines = 0
        if head:
            nlines += 1
            fout.write(head_format % tuple(head))
        if len(numpy.shape(body)) == 1:
            for i in body:
                fout.write('%s\n' % i)
                nlines += 1
        else:
            for row in body:
                fout.write(body_format % tuple(row))
                nlines += 1

    return nlines
Exemplo n.º 56
0
    def test_attributes(self):
        import _io

        with _io.open(self.tmpfile, "wb", buffering=0) as f:
            assert f.mode == "wb"

        with _io.open(self.tmpfile, "U") as f:
            assert f.name == self.tmpfile
            assert f.buffer.name == self.tmpfile
            assert f.buffer.raw.name == self.tmpfile
            assert f.mode == "U"
            assert f.buffer.mode == "rb"
            assert f.buffer.raw.mode == "rb"

        with _io.open(self.tmpfile, "w+") as f:
            assert f.mode == "w+"
            assert f.buffer.mode == "rb+"
            assert f.buffer.raw.mode == "rb+"

            with _io.open(f.fileno(), "wb", closefd=False) as g:
                assert g.mode == "wb"
                assert g.raw.mode == "wb"
                assert g.name == f.fileno()
                assert g.raw.name == f.fileno()
Exemplo n.º 57
0
    def split_by_size(self, nbyte:int):
        ''' Split by every `nbyte` bytes '''
        nfile = self.__howmany(self._nbyte, nbyte)
        newnames = self.splited_names(nfile)
        inames = iter(newnames)

        with open_as_bytes_stream(self._fname) as stream:
            buf = stream.read(nbyte)
            while buf:
                fout = open(next(inames), 'wb')
                fout.write(buf)
                fout.close()
                buf = stream.read(nbyte)

        return newnames
Exemplo n.º 58
0
    def test_io_after_close(self):
        import _io

        for kwargs in [
            {"mode": "w"},
            {"mode": "wb"},
            {"mode": "w", "buffering": 1},
            {"mode": "w", "buffering": 2},
            {"mode": "wb", "buffering": 0},
            {"mode": "r"},
            {"mode": "rb"},
            {"mode": "r", "buffering": 1},
            {"mode": "r", "buffering": 2},
            {"mode": "rb", "buffering": 0},
            {"mode": "w+"},
            {"mode": "w+b"},
            {"mode": "w+", "buffering": 1},
            {"mode": "w+", "buffering": 2},
            {"mode": "w+b", "buffering": 0},
        ]:
            print kwargs
            if "b" not in kwargs["mode"]:
                kwargs["encoding"] = "ascii"
            f = _io.open(self.tmpfile, **kwargs)
            f.close()
            raises(ValueError, f.flush)
            raises(ValueError, f.fileno)
            raises(ValueError, f.isatty)
            raises(ValueError, f.__iter__)
            if hasattr(f, "peek"):
                raises(ValueError, f.peek, 1)
            raises(ValueError, f.read)
            if hasattr(f, "read1"):
                raises(ValueError, f.read1, 1024)
            if hasattr(f, "readall"):
                raises(ValueError, f.readall)
            if hasattr(f, "readinto"):
                raises(ValueError, f.readinto, bytearray(1024))
            raises(ValueError, f.readline)
            raises(ValueError, f.readlines)
            raises(ValueError, f.seek, 0)
            raises(ValueError, f.tell)
            raises(ValueError, f.truncate)
            raises(ValueError, f.write, b"" if "b" in kwargs["mode"] else u"")
            raises(ValueError, f.writelines, [])
            raises(ValueError, next, f)
Exemplo n.º 59
0
 def test_pickling(self):
     import _io
     import pickle
     # Pickling file objects is forbidden
     for kwargs in [
             {"mode": "w"},
             {"mode": "wb"},
             {"mode": "wb", "buffering": 0},
             {"mode": "r"},
             {"mode": "rb"},
             {"mode": "rb", "buffering": 0},
             {"mode": "w+"},
             {"mode": "w+b"},
             {"mode": "w+b", "buffering": 0},
         ]:
         for protocol in range(pickle.HIGHEST_PROTOCOL + 1):
             with _io.open(self.tmpfile, **kwargs) as f:
                 raises(TypeError, pickle.dumps, f, protocol)
Exemplo n.º 60
0
def _get_data(archive, toc_entry):
    datapath, compress, data_size, file_size, file_offset, time, date, crc = toc_entry
    if data_size < 0:
        raise ZipImportError('negative data size')

    with _io.open(archive, 'rb') as fp:
        # Check to make sure the local file header is correct
        try:
            fp.seek(file_offset)
        except OSError:
            raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
        buffer = fp.read(30)
        if len(buffer) != 30:
            raise EOFError('EOF read where not expected')

        if buffer[:4] != b'PK\x03\x04':
            # Bad: Local File Header
            raise ZipImportError(f'bad local file header: {archive!r}', path=archive)

        name_size = _unpack_uint16(buffer[26:28])
        extra_size = _unpack_uint16(buffer[28:30])
        header_size = 30 + name_size + extra_size
        file_offset += header_size  # Start of file data
        try:
            fp.seek(file_offset)
        except OSError:
            raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
        raw_data = fp.read(data_size)
        if len(raw_data) != data_size:
            raise OSError("zipimport: can't read data")

    if compress == 0:
        # data is not compressed
        return raw_data

    # Decompress with zlib
    try:
        decompress = _get_decompress_func()
    except Exception:
        raise ZipImportError("can't decompress data; zlib not available")
    return decompress(raw_data, -15)