def test6(): text = """\ 1.0, 1, 2, 3, 4 10.0, 11, 12, 13, 14 100.0, 21, 22, 23, 24 """ f = open(filename, 'w') f.write(text) f.close() f = open(filename, 'r') dt = np.dtype([('x', np.float32), ('codes', np.uint8, 4)]) a = np.array([(1.0, (1, 2, 3, 4))], dtype=dt) b = readrows(f, dt, delimiter=',', numrows=1) my_assert_array_equal(a, b) a = np.array([ (10.0, (11, 12, 13, 14)), (100.0, (21, 22, 23, 24))], dtype=dt) b = readrows(f, dt, delimiter=',', numrows=2) my_assert_array_equal(a, b) f.close() os.remove(filename)
def test1(): dt = np.dtype([('x', float), ('y', float)]) a = np.array([(1.0, 2.0), (3.0, 4.5)], dtype=dt) for sep in ' ,|': for fmt in ['%.18e', '%.2f', '%g']: np.savetxt(filename, a, delimiter=sep, fmt=fmt) b = readrows(filename, dt, delimiter=sep) assert_array_equal(a, b) os.remove(filename)
def test2(): nrows = 100 ncols = 50 dt = np.dtype([('x' + str(k), np.float64) for k in range(ncols)]) a = np.arange(float(nrows * ncols)).reshape(nrows, ncols).view(dt).squeeze() for sep in ' ,|': for fmt in ['%.18e', '%.2f']: np.savetxt(filename, a, delimiter=sep, fmt=fmt) b = readrows(filename, dt, delimiter=sep) assert_array_equal(a, b) os.remove(filename)
def test5(): text = """\ 1.0-2.0j, 3+4j 5.0e-1, 6.0+0j """ f = open(filename, 'w') f.write(text) f.close() dt = np.dtype([('x', np.complex128), ('y', np.complex128)]) a = np.array([(1.0-2.0j, 3.0+4.0j), (0.5, 6.0)], dtype=dt) b = readrows(filename, dt, delimiter=',') my_assert_array_equal(a, b) os.remove(filename)
def test3(): """Tests datetime_fmt and usecols.""" text = """\ 2011-01-02 00:30,1.0,,15,FR 2011-01-02 00:45,1.25,,16,TG 2011-01-02 00:56,1.5,,17,NK 2011-01-02 01:13,1.0,,18,QQ """ f = open(filename, 'w') f.write(text) f.close() dt = np.dtype([('timestamp', np.datetime64), ('x', np.float32), ('index', np.int16), ('code', 'S2')]) a = np.array([ (datetime(2011, 1, 2, 0, 30, 0, 0), 1.0, 15, 'FR'), (datetime(2011, 1, 2, 0, 45, 0, 0), 1.25, 16, 'TG'), (datetime(2011, 1, 2, 0, 56, 0, 0), 1.5, 17, 'NK'), (datetime(2011, 1, 2, 1, 13, 0, 0), 1.0, 18, 'QQ')], dtype=dt) b = readrows(filename, dt, delimiter=',', usecols=(0, 1, -2, -1), datetime_fmt="%Y-%m-%d %H:%M", tzoffset=None) assert_array_equal(a, b) os.remove(filename)
import time import os import numpy as np from textreader import readrows filename = 'data/big.csv' if not os.path.exists(filename): print "{name} does not exist. Run the script generate_big.py to generate {name}.".format(name=filename) else: fields = [('f'+str(k), np.float32) for k in range(50)] dt = np.dtype(fields) t0 = time.time() a = readrows(filename, dtype=dt, delimiter=',') t1 = time.time() print t1 - t0, "seconds"
import numpy as np from textreader import readrows filename = 'data/sci_d.csv' dt = np.dtype([('w', np.float64), ('x', np.float64), ('y', np.float64), ('z', np.float64)]) a = readrows(filename, dt, delimiter=',', sci='D') print "a.shape =", a.shape print "a.dtype =", a.dtype print "a:" print a print
import numpy as np import os import sys from textreader import readrows filename = 'data/datedata.txt' dt = np.dtype([('timestamp', np.datetime64), ('x', np.float32)]) data = readrows(filename, delimiter=',', dtype=dt, datetime_fmt="%m/%d/%Y %H:%M:%S") print "Read {name} into the array 'data'.".format(name=filename) print "data.dtype =", data.dtype print "data.shape =", data.shape print "data:" print data
def _continuePartionate(self): print 'cprt0' f = open(self._apath) f.seek(self._p.zshift + (self._p.portions[-1]['b'] + self._p.bfin) * self._p.strs) self._p.portions.pop() cc = len(self._p.portions) - 1 ncp = cc + 1 # non-calculated parts while os.stat(self._apath).st_size - self._p.zshift - f.tell( ) > self._p.bfsz * self._p.strs: cc += 1 f.seek(-self._p.bfin * self._p.strs, 1) curbline = (f.tell() - self._p.zshift) / self._p.strs numlines = self._p.bfsz + 2 * self._p.bfin f.seek(numlines * self._p.strs, 1) # p0 = StringIO(f.read(numlines*self._p.strs)) print 'tx loading..', sys.stdout.flush() #todo: replace with tx # a0 = np.loadtxt(p0, dtype=self._p.dtp) a0 = tx.readrows(self._apath, self._p.dtp, skiprows=curbline, numrows=numlines) print 'OK' print a0.shape fnm = os.path.join(self._dir, 'part%d.npy' % (cc)) self._p.addPortion(curbline, curbline + a0.shape[0], a0['t'][0], \ a0['t'][-1],fnm) np.save(fnm, a0) del a0 cc += 1 f.seek(-self._p.bfin * self._p.strs, 1) fnm = os.path.join(self._dir, 'part%d.npy' % (cc)) curbline = (f.tell() - self._p.zshift) / self._p.strs # protect from reading non-full line. Reading integer count of lines. print 'loading strio..', sys.stdout.flush() numlines = ( (os.stat(self._apath).st_size - self._p.zshift - f.tell()) / self._p.strs) # p0 = StringIO(f.read( numlines * self._p.strs )) f.seek(numlines * self._p.strs, 1) # skip instead of reading print 'SKIP' print 'tx.readrows from file..', sys.stdout.flush() a0 = tx.readrows(self._apath, self._p.dtp, skiprows=curbline, numrows=numlines) #a0 = np.loadtxt(p0, dtype=self._p.dtp) print 'OK' # update information about last processed byte self._p.lastfsz = (curbline + a0.shape[0]) * self._p.strs self._p.addPortion(curbline, curbline + a0.shape[0], a0['t'][0], \ a0['t'][-1],fnm) print 'saving..', sys.stdout.flush() np.save(fnm, a0) print 'OK' del a0 f = open(os.path.join(self._dir, '_ldrparams'), 'w') pickle.dump(self._p, f, 2) f.close() self._recalcParts = range(ncp, len(self._p.portions)) print 'cprt1'
def _partionateAll(self): f = open(self._apath, 'r') # skipt commented header if self._p.zshift == -1: l = '#' zeroshift = 0 while l[0] == '#': zeroshift = f.tell() l = f.readline() f.seek(zeroshift) N = len(l.strip().split()) print N - 1 self._p = LDRParams(N - 1) self._p.strs = len(l) self._p.zshift = zeroshift self._p.bfsz = (__blocksize__ * 1024) / self._p.strs self._p.bfin = (__intercept__ * 1024) / self._p.strs print ''' File ``%s'' is opened. Raw data begins from byte #%d. ''' % \ ( self._apath, self._p.zshift ) numlines = (self._p.bfsz + self._p.bfin) # p0 = StringIO(f.read(s)) f.seek(numlines * self._p.strs, 0) # a0 = np.loadtxt(p0, dtype=self._p.dtp) print 'tx loading..', sys.stdout.flush() a0 = tx.readrows(self._apath, self._p.dtp, numrows=numlines) print 'OK' np.save(os.path.join(self._dir, 'part0.npy'), a0) # is file longer than first buffer/ if (a0.shape[0] == self._p.bfsz + self._p.bfin): # for a long file self._p.addPortion(0, self._p.bfsz+self._p.bfin, a0['t'][0], \ a0['t'][-1], os.path.join(self._dir, 'part0.npy')) del a0 cc = 0 while os.stat(self._apath).st_size - self._p.zshift - f.tell() >\ self._p.bfsz*self._p.strs: cc += 1 f.seek(-self._p.bfin * self._p.strs, 1) curbline = (f.tell() - self._p.zshift) / self._p.strs numlines = (self._p.bfsz + 2 * self._p.bfin) #p0 = StringIO(f.read(numlines*self._p.strs)) f.seek(numlines * self._p.strs, 1) #a0 = np.loadtxt(p0, dtype=self._p.dtp) print 'tx loading..', sys.stdout.flush() a0 = tx.readrows(self._apath, self._p.dtp, skiprows=curbline, numrows=numlines) print 'OK' fnm = os.path.join(self._dir, 'part%d.npy' % (cc)) self._p.addPortion(curbline, curbline + a0.shape[0], a0['t'][0], \ a0['t'][-1],fnm) np.save(fnm, a0) del a0 cc += 1 f.seek(-self._p.bfin * self._p.strs, 1) fnm = os.path.join(self._dir, 'part%d.npy' % (cc)) curbline = (f.tell() - self._p.zshift) / self._p.strs # protect from reading non-full line. Reading integer count of lines. numlines = (os.stat(self._apath).st_size - self._p.zshift - f.tell()) / self._p.strs #p0 = StringIO(f.read( numlines * self._p.strs )) #a0 = np.loadtxt(p0, dtype=self._p.dtp) print 'tx loading..', sys.stdout.flush() a0 = tx.readrows(self._apath, self._p.dtp, skiprows=curbline, numrows=numlines) print 'OK' # update information about last processed byte self._p.lastfsz = (curbline + a0.shape[0]) * self._p.strs self._p.addPortion(curbline, curbline + a0.shape[0], a0['t'][0], \ a0['t'][-1],fnm) np.save(fnm, a0) del a0 else: # for a short file. Adding the ONLY portion :) self._p.addPortion(0, a0.shape[0], a0['t'][0], \ a0['t'][-1], os.path.join(self._dir, 'part0.npy')) self._p.lastfsz = self._p.zshift + a0.shape[0] * self._p.strs del a0 f = open(os.path.join(self._dir, '_ldrparams'), 'w') pickle.dump(self._p, f, 2) f.close() self._recalcParts = range(0, len(self._p.portions))
import numpy as np from textreader import readrows filename = "data/hastext.csv" dt = np.dtype([("name", "S12"), ("b", np.float32), ("c", np.int16)]) foo = readrows(filename, dt, delimiter=",") print "foo.shape =", foo.shape print "foo.dtype =", foo.dtype print "foo:" print foo print cols = (0, 2) print "Now try usecols={}".format(cols) print dt = np.dtype([("name", "S12"), ("c", np.int16)]) foo = readrows(filename, dt, delimiter=",", usecols=cols) print "foo.shape =", foo.shape print "foo.dtype =", foo.dtype print "foo:" print foo
import numpy as np from textreader import readrows filename = 'data/sci_d_comma.csv' dt = np.dtype([('w', np.float64), ('x', np.float64), ('y', np.float64), ('z', np.float64)]) a = readrows(filename, dt, delimiter=';', sci='D', decimal=',') print "a.shape =", a.shape print "a.dtype =", a.dtype print "a:" print a print
import numpy as np from textreader import readrows filename = 'data/bad_float.csv' dt = np.dtype([('a', np.int16), ('b', np.int16), ('x', np.float32)]) foo = readrows(filename, dt, delimiter=',') print "foo.shape =", foo.shape print "foo.dtype =", foo.dtype print "foo:" print foo print
filename = "data/complex.dat" f = open(filename, "r") s = f.read() f.close() print "File contents between lines:" print "-" * 25 print s, print "-" * 25 print print "numpy.complex64:" dt = np.dtype([("a", np.complex64), ("b", np.complex64)]) data = readrows(filename, dt) print "data.shape =", data.shape print "data.dtype =", data.dtype print "data:" print data print """ print "numpy.complex128:" dt = np.dtype([('a', np.complex128), ('b', np.complex128)]) data = readrows(filename, dt) print "data.shape =", data.shape print "data.dtype =", data.dtype print "data:" print data print
import numpy as np from textreader import readrows filename = 'data/data1.txt' f = open(filename, 'r') dt = np.dtype([('NAME', 'S8'), ('RATIO', np.float32), ('ALPHA', np.float32), ('BETA', np.float32)]) data1 = readrows(f, dt, delimiter='|', usecols=range(4), skiprows=1) print "data1.shape =", data1.shape print "data1.dtype =", data1.dtype print "data1:" print data1
import numpy as np from textreader import readrows filename = 'data/multi.dat' f = open(filename, 'r') line = f.readline() while len(line) > 0: nrows = int(line) a = readrows(f, np.float32, numrows=nrows, sci='D', delimiter=',') print "a:" print a print line = f.readline()