def printRecArray(recarr, precision=8): """ Print a record array in a mannerly fashion. @param recarr: The record array. @type recarr: recarray @keyword precision: The precision of the floats shown when printed (default: 8) @type precision: int """ print mlab.rec2txt(recarr, precision=precision)
def printRecArray(recarr,precision=8): """ Print a record array in a mannerly fashion. @param recarr: The record array. @type recarr: recarray @keyword precision: The precision of the floats shown when printed (default: 8) @type precision: int """ print mlab.rec2txt(recarr,precision=precision)
def __str__(self): if self.names and self.size: table_ = rec2txt(self, padding=3, precision=4) #line = '.'*max(map(len, table_.partition('\n'))) #return '\n'.join([line, table_, line]) return table_ else: return '[]'
def printResult(self): string = "\n" string+= "MeanVariance Optimization Result\n" string+= "Optimization Mode : %s\n"%self.optimizationMode weight_data = np.array(zip(self.nameList, self.optimizedWeights), dtype=[('Name', 'S20'), ('Weight', float)]) string+= "\n" string+= "Optimization Result:\n" string+= rec2txt(weight_data) string+= "\n" string+= "Optimization Metric:\n" string+= " Mean: %f\n"%self.optimizedMean string+= " (optimized)Risk: %f\n"%self.optimizedRisk string+= " (targeted)Risk: %f\n"%self.targetRisk string+= " Sharpe: %f\n"%(self.optimizedMean / self.optimizedRisk) string+= "\n" return string
def __str__(self): str_ = list() if self.globalmeta: g = '\r\n'.join( ['globalmeta:'] + ['\t{0}:{1}'.format(k, v) for k, v in self.globalmeta.items()]) str_.append(g) if any([bool(fmeta) for fmeta in self.fieldmeta]): fieldmeta = zip(*(self.names, self.fieldmeta)) f = '\r\n'.join(['fieldmeta:'] + ['\t{0}:{1}'.format(n, f) for n, f in fieldmeta]) str_.append(f) try: d = rec2txt(self.view(recarray), padding=3, precision=4) except: d = super(LookUpTable, self).__str__() str_.append(d) return '\r\n'.join(str_)
from __future__ import print_function import numpy as np import matplotlib.mlab as mlab import matplotlib.cbook as cbook datafile = cbook.get_sample_data('aapl.csv', asfileobj=False) print('loading', datafile) r = mlab.csv2rec(datafile) r.sort() r1 = r[-10:] # Create a new array r2 = np.empty(12, dtype=[('date', '|O4'), ('high', np.float), ('marker', np.float)]) r2 = r2.view(np.recarray) r2.date = r.date[-17:-5] r2.high = r.high[-17:-5] r2.marker = np.arange(12) print("r1:") print(mlab.rec2txt(r1)) print("r2:") print(mlab.rec2txt(r2)) defaults = {'marker': -1, 'close': np.NaN, 'low': -4444.} for s in ('inner', 'outer', 'leftouter'): rec = mlab.rec_join(['date', 'high'], r1, r2, jointype=s, defaults=defaults) print("\n%sjoin :\n%s" % (s, mlab.rec2txt(rec)))
rsum = mlab.rec_summarize(r, summaryfuncs) # stats is a list of (dtype_name, function, output_dtype_name). # rec_groupby will summarize the attribute identified by the # dtype_name over the groups in the groupby list, and assign the # result to the output_dtype_name stats = ( ('dreturn', len, 'rcnt'), ('dreturn', np.mean, 'rmean'), ('dreturn', np.median, 'rmedian'), ('dreturn', np.std, 'rsigma'), ) # you can summarize over a single variable, like years or months print 'summary by years' ry = mlab.rec_groupby(rsum, ('years', ), stats) print mlab.rec2txt(ry) print 'summary by months' rm = mlab.rec_groupby(rsum, ('months', ), stats) print mlab.rec2txt(rm) # or over multiple variables like years and months print 'summary by year and month' rym = mlab.rec_groupby(rsum, ('years', 'months'), stats) print mlab.rec2txt(rym) print 'summary by volume' rv = mlab.rec_groupby(rsum, ('volcode', ), stats) print mlab.rec2txt(rv)
rsum = mlab.rec_summarize(r, summaryfuncs) # stats is a list of (dtype_name, function, output_dtype_name). # rec_groupby will summarize the attribute identified by the # dtype_name over the groups in the groupby list, and assign the # result to the output_dtype_name stats = ( ('dreturn', len, 'rcnt'), ('dreturn', np.mean, 'rmean'), ('dreturn', np.median, 'rmedian'), ('dreturn', np.std, 'rsigma'), ) # you can summarize over a single variable, like years or months print('summary by years') ry = mlab.rec_groupby(rsum, ('years', ), stats) print(mlab.rec2txt(ry)) print('summary by months') rm = mlab.rec_groupby(rsum, ('months', ), stats) print(mlab.rec2txt(rm)) # or over multiple variables like years and months print('summary by year and month') rym = mlab.rec_groupby(rsum, ('years', 'months'), stats) print(mlab.rec2txt(rym)) print('summary by volume') rv = mlab.rec_groupby(rsum, ('volcode', ), stats) print(mlab.rec2txt(rv))
rall = np.load(os.path.join(outdir, 'stocks_combined.npy')).view(np.recarray) if 1: fig = plt.figure() plt.plot(rall.price, rall.dreturn, 'o') fig = plt.figure() plt.hist(rall.price, 20) data = [] ranges = [(0, 1), (1, 2), (2, 5), (5, 10), (10, 20), (20, 100), (100, np.inf)] for price_min, price_max in ranges: mask = (rall.price > price_min) & (rall.price <= price_max) rmask = rall[mask] prices = rmask.price dreturn = rmask.dreturn * 100 median = np.median(dreturn) mean = np.mean(dreturn) count = len(dreturn) std = np.std(dreturn) rar = mean / std data.append((price_min, price_max, count, mean, median, std, rar)) rsummary = np.rec.fromrecords( data, names='price_min,price_max,count,mean,median,std,rar') print mlab.rec2txt(rsummary) plt.show()
""" Demonstrate how get_sample_data works with git revisions in the data. git clone [email protected]/matplotlib/sample_data.git and edit testdata.csv to add a new row. After committing the changes, when you rerun this script you will get the updated data (and the new git version will be cached in ~/.matplotlib/sample_data) """ import matplotlib.mlab as mlab import matplotlib.cbook as cbook # get the file handle to the cached data and print the contents datafile = 'testdir/subdir/testsub.csv' fh = cbook.get_sample_data(datafile) print fh.read() # make sure we can read it using csv2rec fh.seek(0) r = mlab.csv2rec(fh) print mlab.rec2txt(r) fh.close()
def analyze_yahoo_data(ticker, date1=datetime.date(2004, 1, 1), date2=datetime.date.today()): ticker, iso_week, data = load_yahoo_data(ticker, date1, date2) result_all = estimate_var(data) result_year = estimate_var(data, True) print rec2txt(result_all) print rec2txt(result_year)
def analyze_dzh_file(filename): ticker, iso_week, data = load_file(filename) result_all = estimate_var(data) result_year = estimate_var(data, True) print rec2txt(result_all) print rec2txt(result_year)
""" Demonstrate how get_sample_data works with git revisions in the data. git clone [email protected]/matplotlib/sample_data.git and edit testdata.csv to add a new row. After committing the changes, when you rerun this script you will get the updated data (and the new git version will be cached in ~/.matplotlib/sample_data) """ from __future__ import print_function import matplotlib.mlab as mlab import matplotlib.cbook as cbook # get the file handle to the cached data and print the contents datafile = 'testdir/subdir/testsub.csv' fh = cbook.get_sample_data(datafile) print(fh.read()) # make sure we can read it using csv2rec fh.seek(0) r = mlab.csv2rec(fh) print(mlab.rec2txt(r)) fh.close()
if 1: rall = np.load(os.path.join(outdir, 'stocks_combined.npy')).view(np.recarray) if 1: fig = plt.figure() plt.plot(rall.price, rall.dreturn, 'o') fig = plt.figure() plt.hist(rall.price, 20) data = [] ranges = [(0, 1), (1, 2), (2, 5), (5, 10), (10, 20), (20, 100), (100, np.inf)] for price_min, price_max in ranges: mask = (rall.price > price_min) & (rall.price <= price_max) rmask = rall[mask] prices = rmask.price dreturn = rmask.dreturn * 100 median = np.median(dreturn) mean = np.mean(dreturn) count = len(dreturn) std = np.std(dreturn) rar = mean / std data.append((price_min, price_max, count, mean, median, std, rar)) rsummary = np.rec.fromrecords(data, names='price_min,price_max,count,mean,median,std,rar') print mlab.rec2txt(rsummary) plt.show()
rsum = mlab.rec_summarize(r, summaryfuncs) # stats is a list of (dtype_name, function, output_dtype_name). # rec_groupby will summarize the attribute identified by the # dtype_name over the groups in the groupby list, and assign the # result to the output_dtype_name stats = ( ('dreturn', len, 'rcnt'), ('dreturn', np.mean, 'rmean'), ('dreturn', np.median, 'rmedian'), ('dreturn', np.std, 'rsigma'), ) # you can summarize over a single variable, like years or months print('summary by years') ry = mlab.rec_groupby(rsum, ('years',), stats) print(mlab. rec2txt(ry)) print('summary by months') rm = mlab.rec_groupby(rsum, ('months',), stats) print(mlab.rec2txt(rm)) # or over multiple variables like years and months print('summary by year and month') rym = mlab.rec_groupby(rsum, ('years', 'months'), stats) print(mlab.rec2txt(rym)) print('summary by volume') rv = mlab.rec_groupby(rsum, ('volcode',), stats) print(mlab.rec2txt(rv))
def volume_code(volume): 'code the continuous volume data categorically' ind = np.searchsorted([1e5,1e6, 5e6,10e6, 1e7], volume) return ind summaryfuncs = ( ('date', lambda x: [thisdate.year for thisdate in x], 'years'), ('date', lambda x: [thisdate.month for thisdate in x], 'months'), ('date', lambda x: [thisdate.weekday() for thisdate in x], 'weekday'), ('adj_close', daily_return, 'dreturn'), ('volume', volume_code, 'volcode'), ) rsum = mlab.rec_summarize(r, summaryfuncs) stats = ( ('dreturn', len, 'rcnt'), ('dreturn', np.mean, 'rmean'), ('dreturn', np.median, 'rmedian'), ('dreturn', np.std, 'rsigma'), ) print 'summary by years' ry = mlab.rec_groupby(rsum, ('years',), stats) print mlab. rec2txt(ry) print 'summary by months' rm = mlab.rec_groupby(rsum, ('months',), stats) print mlab.rec2txt(rm) print 'summary by year and month' rym = mlab.rec_groupby(rsum, ('years','months'), stats) print mlab.rec2txt(rym) print 'summary by volume' rv = mlab.rec_groupby(rsum, ('volcode',), stats) print mlab.rec2txt(rv)
import numpy as np import matplotlib.mlab as mlab r = mlab.csv2rec('../data/aapl.csv') r.sort() r1 = r[-10:] # Create a new array r2 = np.empty(12, dtype=[('date', '|O4'), ('high', np.float), ('marker', np.float)]) r2 = r2.view(np.recarray) r2.date = r.date[-17:-5] r2.high = r.high[-17:-5] r2.marker = np.arange(12) print "r1:" print mlab.rec2txt(r1) print "r2:" print mlab.rec2txt(r2) defaults = {'marker': -1, 'close': np.NaN, 'low': -4444.} for s in ('inner', 'outer', 'leftouter'): rec = mlab.rec_join(['date', 'high'], r1, r2, jointype=s, defaults=defaults) print "\n%sjoin :\n%s" % (s, mlab.rec2txt(rec))
import numpy as np import matplotlib.mlab as mlab r = mlab.csv2rec('../data/aapl.csv') r.sort() r1 = r[-10:] # Create a new array r2 = np.empty(12, dtype=[('date', '|O4'), ('high', np.float), ('marker', np.float)]) r2 = r2.view(np.recarray) r2.date = r.date[-17:-5] r2.high = r.high[-17:-5] r2.marker = np.arange(12) print "r1:" print mlab.rec2txt(r1) print "r2:" print mlab.rec2txt(r2) defaults = {'marker':-1, 'close':np.NaN, 'low':-4444.} for s in ('inner', 'outer', 'leftouter'): rec = mlab.rec_join(['date', 'high'], r1, r2, jointype=s, defaults=defaults) print "\n%sjoin :\n%s" % (s, mlab.rec2txt(rec))
def robustSVD(D,nMode=nMode,sigOut=sigOut,maxIt=maxIt,verbose=True): """ Robust SVD D = U S V.T This decomposition is computed using SVD. SVD is sensitive to outliers, so we perform iterative sigma clipping in the `\chi^2` sense, but also in the distribution of best fit parameters. D_fit[i] = a_1 V_1 + a_2 V_2 + ... a_nMode V_nMode nMode is the (small) number of principle components we wish to fit our data with. If any of the a_j, or \chi^2_i = sum((D_fit[i] - D[i])**2)/D.size is an outlier, remove that row from D. Parameters ---------- D : Data matrix. 1-D vectors stacked vertically (row-wise). May not contain nans, or masked values. nMode : Number of modes sigOut : Clip outliers that are more than sigOut away from the median. Defaults to the config value. maxIt : Maximum number of iterations to perform before exiting. Defaults to the config value. """ Dnrow,Dncol = D.shape D = D.copy() gRow = np.ones(Dnrow,dtype=bool) # Good rows (not outliers) goodid = np.arange(D.shape[0]) # Iterate SVD fits. count = 0 finished = False while finished is False: print(count) if count == maxIt: finished=True D = D[gRow] Dnrow,Dncol = D.shape U, s, V = np.linalg.svd(D,full_matrices=False) S = np.zeros(V.shape) S[:Dnrow,:Dnrow] = np.diag(s) A = np.dot(U,S) # A is matrix of best fit coeff A = A[:,:nMode] Dfit = np.dot(A,V[:nMode]) # Dfit is D represented by a # trucated series of modes # Evaluate Chi2 X2 = np.sum( (Dfit - D)**2,axis=1) / Dncol rL = moments(A) if verbose: print("Moments of principle component weight") print((mlab.rec2txt(rL,precision=1))) # Determine which rows of D are outliers dev = (A - rL['med'])/rL['mad'] # nMode x Dncol matrix of outlier coeffients Aout = abs(dev) > sigOut # Dncol matrix of red-Chi2 outliers Xout = (X2 > 3) | (X2 < 0.5) Xout = Xout.reshape(Xout.size,1) # Dncol matrix with the number of coeff that failed. out = np.hstack([Xout,Aout]) # All coefficients must be inliers gRow = out.astype(int).sum(axis=1) == 0 # If there are no outliers or we've reached the max number of # iterations return the input U,S,V,goodid,X2. If not, clip off # the outliers and repeat. if gRow.all() or (count == maxIt): finished = True else: names = ['ID'] + ['Chi2'] + ['a%i' % (i+1) for i in range(nMode)] dtype = list(zip(names,[float]*len(names))) routData = np.hstack([np.vstack(goodid),np.vstack(X2),dev]) routData = [tuple(r) for r in routData] rout = np.array(routData,dtype=dtype) if verbose: print("First 10 a/MAD(a)") print((mlab.rec2txt(rout[~gRow][:10],precision=1))) print(("%i there are %i outliers " % (count,goodid[~gRow].size))) goodid = goodid[gRow] count+=1 return U,S,V,goodid,X2