def load_spirals(N, plot=True): """ Loads a 3 - class non-linearly seperable data set consisting of 'spirals' Code is adapted from the Spring 2015 Stanford cs231 course: 'Convolutional Neural Networks for Visual Recognition'. Link: http://cs231n.github.io/neural-networks-case-study/ :param N: (int) Number of data points per class :param plot: (bool) Plot resulting 2-D dataset :return: (X, y) X is a NX2 matrix of inputs, y is an NX1 vector of class labels """ D = 2 # dimensionality K = 3 # number of classes X = np.zeros((N*K,D)) # data matrix (each row = single example) y = np.zeros(N*K, dtype='uint8') # class labels for j in xrange(K): ix = range(N*j,N*(j+1)) r = np.linspace(0.0,1,N) # radius t = np.linspace(j*4,(j+1)*4,N) + np.random.randn(N)*0.2 # theta X[ix] = np.c_[r*np.sin(t), r*np.cos(t)] y[ix] = j if plot: plt.scatter(X[:, 0], X[:, 1], c=y, s=40, cmap=plt.cm.Spectral) return X, y
def print_statistics(a1, a2, a1_type, a2_type): ''' Prints selected statistics. Parameters ========== a1, a2: ndarray objects results objects from simulation ''' sta1 = scs.describe(a1) sta2 = scs.describe(a2) print('%14s %14s %14s' % ('statistic', 'data set 1', 'data set 2')) print(45 * "-") print('%14s %14.0f %14.0f' % ('size', sta1[0], sta2[0])) print('%14s %14.3f %14.3f' % ('min', sta1[1][0], sta2[1][0])) print('%14s %14.3f %14.3f' % ('max', sta1[1][1], sta2[1][1])) print('%14s %14.3f %14.3f' % ('mean', sta1[2], sta2[2])) print('%14s %14.3f %14.3f' % ('std', np.sqrt(sta1[3]), np.sqrt(sta2[3]))) print('%14s %14.3f %14.3f' % ('skew', sta1[4], sta2[4])) print('%14s %14.3f %14.3f' % ('kurtosis', sta1[5], sta2[5])) a1_sort = np.sort(a1) a2_sort = np.sort(a2) plt.scatter(x=a1_sort, y=a2_sort, marker='.', color='darkred') plt.plot(a1_sort, a1_sort, linestyle='dashed', color='darkblue', alpha=0.4) plt.xlabel(a1_type) plt.ylabel(a2_type)
def create_plot(x, y, styles, labels, axlabels): plt.figure(figsize=(10, 6)) plt.scatter(x[0], y[0]) plt.scatter(x[1], y[1]) plt.xlabel(axlabels[0]) plt.ylabel(axlabels[1]) plt.legend(loc=0) plt.show()
def biaozhukedu(dfc, weibiao): if weibiao == dfc.index.max(): kedus = [dfc.loc[weibiao]] else: kedus = [dfc.loc[weibiao], dfc.loc[dfc.index.max()]] # print(type(kedus[0])) for ii in range(len(kedus)): kedu = kedus[ii] if (len(dfc.index)) > 12: idx = kedu.name else: idx = list(dfc.index).index(kedu.name) if not np.isnan(kedu.iloc[0]): plt.plot([idx, idx], [0, kedu.iloc[0]], 'c--') plt.annotate(str(kedu.name), xy=(idx, 0), xycoords='data', xytext=(-20, -20), textcoords='offset points', color='r', arrowprops=dict(arrowstyle="->", connectionstyle="arc3,rad=0")) for i in range(len(kedu)): if np.isnan(kedu.iloc[i]): # print(kedu.iloc[i]) # print(type(kedu.iloc[i])) continue plt.scatter([ idx, ], [kedu.iloc[i]], 50, color='Wheat') # global ywananchor if kedu.map(lambda x: abs(x)).max() >= ywananchor: kedubiaozhi = "%.1f万" % (kedu.iloc[i] / 10000) plt.gca().yaxis.set_major_formatter( FuncFormatter(lambda x, pos: "%d万" % int(x / 10000)) ) # 纵轴主刻度文本用y_formatter函数计算 else: kedubiaozhi = "%d" % kedu.iloc[i] fontsize = 8 if (i % 2) == 0: zhengfu = -1 else: zhengfu = 0.4 plt.annotate( kedubiaozhi, xy=(idx, kedu.iloc[i]), xycoords='data', xytext=(len(kedubiaozhi) * fontsize * zhengfu, int(len(kedubiaozhi) * fontsize * (-1) * zhengfu / 2)), textcoords='offset points', fontsize=fontsize, arrowprops=dict(arrowstyle="->", connectionstyle="arc3,rad=.2", color='Purple'))
def _plot(self): sum_elements = sum( [len(scatter[0]) for scatter in self.scatters.values()]) for name, scatter in self.scatters.items(): alpha_normed = min( float(self.alpha) / len(scatter[0]) * sum_elements, 1.) plt.scatter(scatter[0], scatter[1], s=self.size, c=next(_COLOR_CYCLE), alpha=alpha_normed, label=name)
def scatter_plot(self, var1, var2, range_var, linear_regression, force=False): fig_name = "{}/scatterplot_{}_{}.pdf".format(self.fig_folder, var1, var2) if path.exists(fig_name) and not force: return print("Doing scatter plot '{}' against '{}'.".format(var2, var1)) x = np.asarray(self.stats.data[var1]) y = np.asarray(self.stats.data[var2]) plt.scatter(x=x, y=y, c=self.stats.data["transportation_cost"], s=10, cmap=cm.plasma) plt.xlim(range_var[var1]) plt.ylim(range_var[var2]) plt.xlabel(self.format_label(var1)) plt.ylabel(self.format_label(var2)) if linear_regression: slope, intercept, r_value, p_value, std_err = linregress(x, y) plt.plot(x, intercept + x * slope, c="black", lw=2) with open("{}/stats.txt".format(self.fig_folder), "a", encoding='utf-8') as f: to_write = "*****\n" + \ "{} against {}\n".format(self.format_label(var2), self.format_label(var1)) + \ "p value: {}\n".format(p_value) + \ "intercept: {}\n".format(intercept) + \ "slope: {}\n".format(slope) + \ "r value: {}\n".format(r_value) + \ "\n" f.write(to_write) plt.savefig(fig_name) if self.display: plt.show() plt.close()
def draw_spectrum(data_list): T = 3600 amp_spec, power_spec, freq = spectrum(data_list, T) print('Max amp in spectrum: {np.max(amp_spec)}') plt.figure(figsize=(18, 5)) plt.subplot(131) x = list(range(len(data_list))) y = data_list plt.title("Observation wind data of Kyoto") plt.xlabel('Hours') plt.ylabel('Observation wind data of Kyoto') plt.plot(x, y, color='green') data_len = len(x) plt.subplot(132) plt.title("Power Spectrum of Wind ") x = freq[int(data_len / 2):] y = power_spec[int(data_len / 2):] # set 0 to 0Hz (DC component) y[0] = 0 plt.xlabel('Frequency (Hz)') plt.ylabel('Intensity') plt.plot(x, y, color='orange') ax = plt.gca() x = x[1:] y = y[1:] ax.xaxis.set_major_formatter(mtick.FormatStrFormatter('%.0e')) coeffs = np.polyfit(np.log(x), np.log(y), 1) beta = -coeffs[0] dimension = 1 + (3 - beta) / 2 print(beta) print("The fractal dimension is", dimension) plt.subplot(133) plt.title("the Curve of log(power-spectrum) and log(frequency)") plt.scatter(np.log(x), np.log(y), marker='o', s=10, c=list(range(len(x)))) # plt.plot(np.log(x), np.log(y), 'o', mfc='none') plt.plot(np.log(x), np.polyval(coeffs, np.log(x))) plt.xlabel('log freq') plt.ylabel('log intensity') plt.savefig("../pics/kyoto_wind.png") plt.show()
def short_analysis(data, analysis_file_path, fig_root_name): # Suppose there are two idx for rt for rt_idx in [1, 2]: # Convert your data in array for easier manipulation rt_column_name = "RT {}".format(rt_idx) rt = np.asarray(data[rt_column_name]) rt_mt_column_name = "RT-MT {}".format(rt_idx) rt_mt = np.asarray(data[rt_mt_column_name]) # Look where 'rt' and 'rt_mt' are different to zero cond0 = rt[:] != 0 cond1 = rt_mt[:] != 0 # Combine the two conditions idx = cond0 * cond1 # Use the booleans as index and make a cut in your data rt = rt[idx] rt_mt = rt_mt[idx] # Compute 'mt' mt = rt_mt - rt print("Short analysis.") print("'mt {}' is: \n".format(rt_idx), mt) # Save this in a new 'xlsx' file new_data = dict() new_data["RT{}".format(rt_idx)] = rt new_data["MT{}".format(rt_idx)] = mt write_a_new_file(file_path=analysis_file_path, data=new_data) # Do some plots plt.scatter(mt, rt) plt.xlabel("mt") plt.ylabel("rt") plt.savefig("{}_scatter_rt{}.pdf".format(fig_root_name, rt_idx)) plt.close() plt.hist(mt) plt.xlabel("mt") plt.savefig("{}_hist_mt{}.pdf".format(fig_root_name, rt_idx)) plt.close() plt.hist(rt) plt.xlabel("rt") plt.savefig("{}_hist_rt{}.pdf".format(fig_root_name, rt_idx)) plt.close()
def plot_overview(self,suffix=''): x=self.x; y=self.y; r=self.radius; cx,cy=self.center.real,self.center.imag ax=plt.axes() plt.scatter(x,y, marker='o', c='b', s=40) plt.axhline(y=0,color='grey', zorder=-1) plt.axvline(x=0,color='grey', zorder=-2) t=linspace(0,2*pi,201) circx=r*cos(t) + cx circy=r*sin(t) + cy plt.plot(circx,circy,'g-') plt.plot([cx],[cy],'gx',ms=12) if self.ZorY == 'Z': philist,flist=[self.phi_a,self.phi_p,self.phi_n],[self.fa,self.fp,self.fn] elif self.ZorY == 'Y': philist,flist=[self.phi_m,self.phi_s,self.phi_r],[self.fm,self.fs,self.fr] for p,f in zip(philist,flist): if f is not None: xpos=cx+r*cos(p); ypos=cy+r*sin(p); xos=0.2*(xpos-cx); yos=0.2*(ypos-cy) plt.plot([0,xpos],[0,ypos],'co-') ax.annotate('{:.3f} Hz'.format(f), xy=(xpos,ypos), xycoords='data', xytext=(xpos+xos,ypos+yos), textcoords='data', #textcoords='offset points', arrowprops=dict(arrowstyle="->", shrinkA=0, shrinkB=10) ) #plt.xlim(0,0.16) #plt.ylim(-0.1,0.1) plt.axis('equal') if self.ZorY == 'Z': plt.xlabel(r'resistance $R$ in Ohm'); plt.ylabel(r'reactance $X$ in Ohm') if self.ZorY == 'Y': plt.xlabel(r'conductance $G$ in Siemens'); plt.ylabel(r'susceptance $B$ in Siemens') plt.title("fitting the admittance circle with Powell's method") tx1='best fit (fmin_powell):\n' tx1+='center at G+iB = {:.5f} + i*{:.8f}\n'.format(cx,cy) tx1+='radius = {:.5f}; '.format(r) tx1+='residue: {:.2e}'.format(self.resid) txt1=plt.text(-r,cy-1.1*r,tx1,fontsize=8,ha='left',va='top') txt1.set_bbox(dict(facecolor='gray', alpha=0.25)) idxlist=self.to_be_annotated('triple') ofs=self.annotation_offsets(idxlist,factor=0.1,xshift=0.15) for i,j in enumerate(idxlist): xpos,ypos = x[j],y[j]; xos,yos = ofs[i].real,ofs[i].imag ax.annotate('{:.1f} Hz'.format(self.f[j]), xy=(xpos,ypos), xycoords='data', xytext=(xpos+xos,ypos+yos), textcoords='data', #textcoords='offset points', arrowprops=dict(arrowstyle="->", shrinkA=0, shrinkB=10) ) if self.show: plt.show() else: plt.savefig(join(self.sdc.plotpath,'c{}_fitted_{}_circle'.format(self.sdc.case,self.ZorY)+suffix+'.png'), dpi=240) plt.close()
def draw(weightvect): """ function that draw the graph so the line that separate the points from the special point """ axeofx = [-1, 2] axeofy = calculate(axeofx, weightvect) plt.xlim(-1, 2) plt.ylim(-1, 2) plt.plot(axeofx, axeofy, color="black") pointx = [0, 0, 1] pointy = [0, 1, 0] plt.scatter(pointx, pointy, color="blue") plt.scatter(1, 1, color="red") plt.ylabel("x2") plt.xlabel("x1") title1 = "Weight Vector : " + str(weightvect) plt.title(title1, backgroundcolor ="green",color="black") plt.pause(0.2) plt.cla() return 0
def scatter_plot(self, var1, var2, range_var, linear_regression): print("Doing scatter plot '{}' against '{}'.".format(var2, var1)) x = np.asarray(self.stats.data[var1]) y = np.asarray(self.stats.data[var2]) plt.scatter(x=x, y=y, color="black", s=10) plt.xlim(range_var[var1]) plt.ylim(range_var[var2]) plt.xlabel(self.format_label(var1)) plt.ylabel(self.format_label(var2)) if linear_regression: slope, intercept, r_value, p_value, std_err = linregress(x, y) plt.plot(x, intercept + x * slope, c="black", lw=2) with open("{}/stats.txt".format(self.fig_folder), "a", encoding='utf-8') as f: to_write = "*****\n" + \ "{} against {}\n".format(self.format_label(var2), self.format_label(var1)) + \ "p value: {}\n".format(p_value) + \ "intercept: {}\n".format(intercept) + \ "slope: {}\n".format(slope) + \ "r value: {}\n".format(r_value) + \ "\n" f.write(to_write) plt.savefig("{}/scatterplot_{}_{}.pdf".format(self.fig_folder, var1, var2)) if self.display: plt.show() plt.close()
def makeFig(): plt.scatter(x, y)
w = np.random.random((1000, len(symbols))) w = (w.T / w.sum(axis=1)).T w[:5] pvr = [(port_volatility(rets[symbols], weights), port_return(rets[symbols], weights)) for weights in w] pvr = np.array(pvr) psr = pvr[:, 1] / pvr[:, 0] plt.figure(figsize=(10, 6)) fig = plt.scatter(pvr[:, 0], pvr[:, 1], c=psr, cmap='coolwarm') cb = plt.colorbar(fig) cb.set_label('Sharpe ratio') plt.xlabel('expected volatility') plt.ylabel('expected return') plt.title(' | '.join(symbols)) # Import libraries from mpl_toolkits import mplot3d import numpy as np import matplotlib.pyplot as plt # Creating dataset z = pvr[:, 0] x = pvr[:, 1] y = psr
def __draw_plot(xx, yy): plt.scatter(xx, yy) for i in range(0, len(xx)): plt.plot([xx[i - 1], xx[i]], [yy[i - 1], yy[i]], linestyle='-', color='r')
#mms = MinMaxScaler() #mms.fit(data) #data = mms.transform(data) xlabel = 'sin(ellipticity*pi/2)^0.125: 0 - more spot, 1 - more track/worm' ylabel = 'cos(solidity*pi/2)^0.125: 0 - more spot/track, 1 - more worm' #xlabel = 'ellipticity: 0 - more spot, 1 - more track/worm' #ylabel = 'solidity: 0 - more worm, 1 - more spot/track' title = 'all devices' if False: plt.xlabel(xlabel) plt.ylabel(ylabel) plt.title(title) plt.scatter(data[:, 0], data[:, 1], s=.1) plt.show() #exit(0) #data = mms.transform(data) # # Sum_of_squared_distances = [] # K = range(1,15) # for k in K: # km = KMeans(n_clusters=k) # km = km.fit(data_transformed) # Sum_of_squared_distances.append(km.inertia_) # # plt.plot(K, Sum_of_squared_distances, 'bx-') # plt.xlabel('k') # plt.ylabel('Sum_of_squared_distances')
def display_random_data(): random_sample = data[random.choice(data.columns)] plt.scatter(range(0, len(random_sample)), random_sample.values) plt.show()
n_init=10, n_jobs=None, precompute_distances='auto', random_state=0, tol=0.0001, verbose=0) data['pos_clus'] = model.predict(data[cols]) # Given cluster values, position is chosen data['pos_clus'] = np.where(data['pos_clus'] == 1, -1, 1) data['pos_clus'].values plt.figure(figsize=(10, 6)) plt.scatter(data[cols].iloc[:, 0], data[cols].iloc[:, 1], c=data['pos_clus'], cmap='coolwarm') plt.title('Two clusters as determined by k-means algorithm') # This approach is quite arbitrary in this context as we didn't really specify # what the algorithm should look for, however, it seems to do a bit better than # benchmark. We can see that the hit ratio - i.e. the number of correct # predictions is less than 50% data['strat_clus'] = data['pos_clus'] * data['returns'] data[['returns', 'strat_clus']].sum().apply(np.exp) (data['direction'] == data['pos_clus']).value_counts() data[['returns', 'strat_clus']].cumsum().apply(np.exp).plot(figsize=(10, 6))
#Low-Redshift Quasars #hex_contour(irlzcolor,ch2lzmag, levels=[0.95,0.9,0.7,0.5,0.3,0.1], std=True, min_cnt=10, smoothing=2, hkwargs={'gridsize':25}, skwargs={'color':lzs,'alpha':0.1,'marker':'.'}, ckwargs={'colors':lzc,'alpha':1,'linewidths':2}) #GTR: Don't hard code the levels. Generate them automatically. #JT: Something like this??? levels = np.arange(0.1,1.0,0.2) #High Redshift Quasars #hex_contour(tdata['col1'],tdata['col2'], levels=[0.1,0.3,0.5,0.7,0.9,0.95], std=True, min_cnt=10, smoothing=4, hkwargs={'gridsize':10}, skwargs={'color':sc[0],'alpha':0.1,'marker':'.'}, ckwargs={'colors':[sc[0],sc[0],sc[0]],'alpha':1,'linewidths':2}) #hex_contour(tdata.s1s2,tdata.s2mag, levels=levels, std=True, min_cnt=10, smoothing=4, hkwargs={'gridsize':10}, skwargs={'color':sc[0],'alpha':0.5,'marker':'.'}, ckwargs={'colors':[sc[0],sc[0],sc[0]],'alpha':1,'linewidths':2}) ''' #GTR: Is something like this better: #JT: It is essentially the same thing. The levels are more intuitive in hex_contour (it does percentages, I cant figure out how plt.contour does it...). This is a good option for people who have not downloaded the density plot package though m1 = tdata.s1s2 m2 = tdata.s2mag xmin = m1.min() xmax = m1.max() ymin = m2.min() ymax = m2.max() X, Y = np.mgrid[xmin:xmax:100j, ymin:ymax:100j] positions = np.vstack([X.ravel(), Y.ravel()]) values = np.vstack([m1, m2]) kernel = stats.gaussian_kde(values)
signals = extract_activity_signals(activity, resample='existing') power = signals['power'] balance = signals['left_right_balance'] time = signals['time'] PAvgBalance = sum(power * balance) / sum(power) # get session info records = activity.get_records_by_type('session') for record in records: valid_field_names = record.get_valid_field_names() # plotting CrossPlotFig = plt.figure() sc = plt.scatter(power, balance, s=5, c=time, cmap=plt.get_cmap('brg'), edgecolors='face') plt.colorbar(orientation='horizontal') plt.title('Balance Vs Power over Time (sec)\n' \ + 'power-weighted average = %4.1f' % (PAvgBalance) ) plt.xlabel('Power (w)') plt.ylabel('Right Balance (%)') plt.grid(b=True, which='major', axis='both') ax = plt.gca() grids = arange(10, 100, 10) # force a gride at 50 ax.set_yticks(grids, minor=False) ax.grid(True) plt.show()
def expected_r2(tickers, start_date): today = pd.datetime.today() if start_date == '1y': delta = today - pd.DateOffset(years=1) delta = delta.date() delta = delta.strftime('%Y-%m-%d') elif start_date == '3y': delta = today - pd.DateOffset(years=3) delta = delta.date() delta = delta.strftime('%Y-%m-%d') elif start_date == '5y': delta = today - pd.DateOffset(years=5) delta = delta.date() delta = delta.strftime('%Y-%m-%d') elif start_date == '10y': delta = today - pd.DateOffset(years=10) delta = delta.date() delta = delta.strftime('%Y-%m-%d') elif start_date == 'max': delta = today - pd.DateOffset(years=30) delta = delta.date() delta = delta.strftime('%Y-%m-%d') prices = ffn.get(tickers, start=delta) noa = len(tickers) global rets rets = np.log(prices / prices.shift(1)) #rets.hist(bins=40, figsize=(10,8)) cons = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1}) bnds = tuple((0, 1) for x in range(noa)) eweights = np.array(noa * [ 1. / noa, ]) opts = sco.minimize(min_func_sharpe, eweights, method='SLSQP', bounds=bnds, constraints=cons) st.write("The expected return is: {:.2f}".format( port_ret(opts['x'].round(3)))) st.write("The expected volatility is: {:.2f}".format( port_vol(opts['x'].round(3)))) st.write("The Shapre Ratio is: {:.2f}".format( port_ret(opts['x'] / port_vol(opts['x'])))) st.subheader( "How to best allocate the portfolio to maximize the return:") i = 0 for x in opts['x']: st.write(tickers[i] + ": " + str(x.round(2))) i = i + 1 prets = [] pvols = [] for p in range(2500): weights = np.random.random(noa) weights /= np.sum(weights) prets.append(port_ret(weights)) pvols.append(port_vol(weights)) prets = np.array(prets) pvols = np.array(pvols) #optv = sco.minimize(port_vol, eweights, method='SLSQP', bounds=bnds, constraints=cons) #cons = ({'type': 'eq', 'fun': lambda x: port_ret(x)- tret}, {'type': 'eq', 'fun': lambda x: np.sum(x)-1}) #bnds = tuple((0,1) for x in weights) #trets = np.linspace(0.05, 0.3, 50) #tvols = [] #for tret in trets: # res = sco.minimize(port_vol, eweights, method='SLSQP', bounds=bnds, constraints=cons) # tvols.append(res['fun']) #tvols = np.array(tvols) #fig, ax = plt.subplots() #im = plt.scatter(pvols, prets, c=prets/pvols, marker='.', alpha=0.8, cmap='coolwarm') #plt.plot(port_vol(opts['x']), port_ret(opts['x']), 'y*', markersize=15.0) #plt.plot(port_vol(optv['x']), port_ret(optv['x']), 'r*', markersize=15.0) #plt.xlabel('Expected Volatility') #plt.ylabel('Expected Return') #fig.colorbar(im, label='Sharpe ratio') #st.write(fig) fig, ax = plt.subplots() im = plt.scatter(pvols, prets, c=prets / pvols, marker='o', cmap='coolwarm') plt.xlabel('Expected Volatility') plt.ylabel('Expected Return') fig.colorbar(im, label='Sharpe Ratio') st.write(fig)
prets = [] pvols = [] for p in range(2000): weights = np.random.random(noa) weights /= np.sum(weights) prets.append(port_ret(weights)) pvols.append(port_vol(weights)) prets = np.array(prets) pvols = np.array(pvols) #print(prets, pvols) plt.figure(figsize=(10, 6)) plt.scatter(pvols, prets, c=prets / pvols, marker='o', cmap='coolwarm') plt.xlabel('expected volatility') plt.ylabel('expected return') plt.colorbar(label='Sharpe Ratio') # plt.show() #export_csv = df.to_csv(r'C:\Users\wbart\Desktop\352data\pretspvols.csv', index=None, header=True) def min_func_sharpe(weights): return -port_ret(weights) / port_vol(weights) # add in rf rate here cons = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1}) # sum to -1
print("RRRRRRRRRRR", prets) print("VVVVVVVVVVV", pvols) #print("WWWWWWWWW", pweights) # Monte Carlo simulation of portfolio weights: # With the for loop a given number of portfolios (50000) with different weights is created. We then store the # portfolio returns and the portfolio volatilities into the following arrays: # - prets consists of an array of the returns of all the portfolio created. # - pvols consists of an array of the corresponding voloatility of all the portfolio created. plt.figure(figsize=(20, 12)) plt.scatter( pvols, prets, c=prets / pvols, marker='.', cmap='Spectral', ) plt.xlabel('Expected Volatility', size=20) plt.ylabel('Expected Return', size=20) plt.colorbar(label='Sharpe Ratio') plt.title('Figure 1', size=25) plt.show() # - Illustrates the result of the Monte Carlo Simulation # - Scatter plot of 50000 portfolios created from the 3 underlying # # Optimal Portfolios def min_func_sharpe(weights):
results = pandas.read_csv(".\\data\\table16subj.csv", sep=";") y = results[results.columns[2]] temp_x = normalize(results[results.columns[4:]]) print("Total features results: \n") regr_total(results, y) print "\n Greedy Forward Selection total features results: \n" regr_iteration(temp_x, y) print "\n Greedy Forward Selection LeaveOneOut results: \n" features = regr_leavoneout(temp_x, y) print "\n Greedy Forward Selection Results Score (Test on train): \n" regr_score(features, results, y) ''' # Plot outputs plt.scatter(array(temp_x['vMin']), y, color='blue') plt.scatter(array(temp_x['nr_correct_within_item']), y, color='blue') plt.scatter(array(temp_x['nr_pauses']), y, color='blue') plt.scatter(array(temp_x['vMovingMin']), y, color='blue') plt.scatter(array(temp_x['straightnessMax']), y, color='blue') plt.scatter(array(temp_x['nrZapp']), y, color='blue') plt.plot(new_x, regr.predict(new_x), '.y') plt.show() temp_x = normalize(results[['vMin', 'nr_correct_within_item']]) new_x = [[] for i in arange(0, len(y))] for jj in temp_x.columns: for ix in arange(0, len(y)): new_x[ix].append(temp_x[jj][ix])
def makeFig(): plt.scatter(x, y) # I think you meant this
res2 = mcs_pi_nb2( 50 ) %timeit res = mcs_pi_np( 500000 ) %timeit res1= mcs_pi_py( 500000 ) %timeit res2= mcs_pi_nb1( 500000 ) %timeit res2= mcs_pi_nb2( 500000 ) # ............ Study ................ arr = np.empty( (0,3), float ) myr = np.arange( 1, 9 + 1, 1) ml = [10**6] * myr mlt = [10**7] * myr mlh = [10**8] * myr ss = np.append( ml, mlt ) ss = np.append( ss, mlh ) sci_pi = np.pi for k in ss: t0 = perf_counter_ns() my_pi = mcs_pi_nb2( k ) t1 = perf_counter_ns() - t0 arr = np.append( arr, np.array([[ k, my_pi, t1 ]]), axis = 0 ) print( "At k = " + str( k ) + ", time = " + str( dt.datetime.now() ) ) df = pd.DataFrame( arr, columns = ['SampleSize', 'Estim{ Pi }', 'Estim_Time'] ) plt.scatter( df['SampleSize'], df['Estim{ Pi }'], s = np.log( df['Estim_Time']) ) plt.hlines( y = sci_pi, xmin = 0, xmax = max( ss ), linestyles = 'dashed' , color = 'r', alpha = 0.3 ) plt.title('Monte Carlo Simulation: estimation of PI') plt.xlabel( 'SampleSize')
# title: German DAX index and PCA indices with 1 and 5 components # In[19]: import matplotlib as mpl mpl_dates = mpl.dates.date2num(data.index.to_pydatetime()) mpl_dates[:10] # In[20]: plt.figure(figsize=(8, 4)) plt.scatter(dax['PCA_5'], dax['^GDAXI'], c=mpl_dates) lin_reg = np.polyval(np.polyfit(dax['PCA_5'], dax['^GDAXI'], 1), dax['PCA_5']) plt.plot(dax['PCA_5'], lin_reg, 'r', lw=3) plt.grid(True) plt.xlabel('PCA_5') plt.ylabel('^GDAXI') plt.colorbar(ticks=mpl.dates.DayLocator(interval=250), format=mpl.dates.DateFormatter('%d %b %y')) # tag: pca_3 # title: DAX return values against PCA return values with linear regression # In[21]:
pvols = [] for p in range (2500): weights = np.random.random(noa) weights /= np.sum(weights) prets.append(np.sum(rets.mean() * weights) * 252) pvols.append(np.sqrt(np.dot(weights.T, np.dot(rets.cov() * 252, weights)))) prets = np.array(prets) pvols = np.array(pvols) # In[47]: plt.figure(figsize=(8, 4)) plt.scatter(pvols, prets, c=prets / pvols, marker='o') plt.grid(True) plt.xlabel('expected volatility') plt.ylabel('expected return') plt.colorbar(label='Sharpe ratio') # tag: portfolio_2 # title: Expected return and volatility for different/random portfolio weights # size: 90 # ### Portfolio Optimizations # In[48]: def statistics(weights):
#flat转置为1维数组 #print("x1=\n",x1.flat) grid_hat = clf.predict(grid_test) print('grid_hat = \n', grid_hat) # 预测分类值 grid_hat = grid_hat.reshape(x1.shape) # 使之与输入的形状相同 mpl.rcParams['font.sans-serif'] = [u'SimHei'] mpl.rcParams['axes.unicode_minus'] = False #绘制 #pcolormesh(x,y,z,cmap)这里参数代入x1,x2,grid_hat,cmap=cm_light绘制的是背景。 #scatter中edgecolors是指描绘点的边缘色彩,s指描绘点的大小,cmap指点的颜色。 #xlim指图的边界。 # cm_light = mpl.colors.ListedColormap(['#A0FFA0', '#FFA0A0', '#A0A0FF']) #绿,红,紫 cm_dark = mpl.colors.ListedColormap(['g', 'r', 'b']) plt.pcolormesh(x1, x2, grid_hat, cmap=cm_light) print(grid_hat) plt.scatter(x[:, 0], x[:, 1], c=y, edgecolors='k', s=50, cmap=cm_dark) # 样本 plt.scatter(x_test[:, 0], x_test[:, 1], s=120, facecolors='none', zorder=10) # 圈中测试集样本 plt.xlabel(u'花萼长度', fontsize=13) plt.ylabel(u'花萼宽度', fontsize=13) plt.xlim(x1_min, x1_max) plt.ylim(x2_min, x2_max) plt.title(u'鸢尾花SVM二特征分类', fontsize=15) # plt.grid() plt.show()
optimal_simgmoid_params[station] = {'a':a,'b':b,'y_scale':y_scale} sns.set_context('notebook', font_scale=1.0, rc={"lines.linewidth": 1.5}) sns.set_palette('colorblind') plt.rcParams['savefig.bbox'] = 'tight' plt.figure() for i,station in enumerate(data['station'].unique()): p = optimal_simgmoid_params[station] xvals = np.linspace(0,20,1000) ypred = fsigmoid(xvals, p['a'],p['b']) * p['y_scale'] sub = data[data['station'] == station] X, y = sub['ff'], sub['power'] color = sns.color_palette()[i] plt.plot(xvals,ypred, color=color, alpha=0.8) plt.scatter(X,y, color=color,label=station, s=4,alpha=0.6, edgecolors='face') plt.ylabel('production [kW]') plt.xlabel('windspeed [m/s]') sns.despine() plt.legend() plt.savefig('plots/potential_production_fit.pdf') pickle.dump(optimal_simgmoid_params, open('optimal_sigmoid_params.pkl','wb'))
def _plot(self): sum_elements = sum([len(scatter[0]) for scatter in self.scatters.values()]) for name, scatter in self.scatters.items(): alpha_normed = min(float(self.alpha) / len(scatter[0]) * sum_elements, 1.) plt.scatter(scatter[0], scatter[1], s=self.size, c=_COLOR_CYCLE.next(), alpha=alpha_normed, label=name)
def blanking(self, semi_data, int_data, nug, R): # def blanking(self, semi_data, int_data, prediction, nug, R): # ============================================================================= # Defining and Initializing Variables # ============================================================================= predictor = defaultdict(list) mindistance = defaultdict(list) color = defaultdict(list) frequency = [] mean = [] std = [] lag = [] classed_resi = [] classed_dist = [] col2del = [] covariance = sv.Semivariogram(semi_data).isotropy(nug) # input("Press Enter string to continue") # ============================================================================= # 150m = max. radius for Elevation (Z) in 2001 # 350m = max. radius for Elevation (Z) in 2011 # 250m = max. radius for Synthetic "Profile A" Ice thickness and Profile A&B # 350m = max. radius for Synthetic "Profile B" Ice thickness # ============================================================================= # ============================================================================= # R = maximum radius between the two closest data points in the dataset. # It is obtained by direct observation of the distribution of the dataset. # ============================================================================= # R = 350 C = 10 r0 = R / 100 sep = np.linspace(R / C, R, C) blanking_radii = np.hstack((0, r0, sep)) # blanking_radii = np.hstack((0, r0)) # # ============================================================================= # # Estimating the interpolated value for the entire grid of points # # ============================================================================= # inter = [] # dist_min = [] # for k in range(len(prediction)): # inter.append(kg.Kriging().ordinary(covariance, semi_data, prediction[k, :2], 0, 0)) # dist_min.append(np.min(cdist(int_data[:, :2], prediction[k, :2][None]))) # print(str(k) + ' ' + str(len(prediction))) # inter = np.hstack(inter) # dist_min = np.hstack(dist_min) # krige_mindist = pd.DataFrame(np.column_stack((prediction, inter, dist_min))) # w_krige_mindist = pd.ExcelWriter(str(time.localtime()[0]) + str(time.localtime()[1]) + str(time.localtime()[2])+'_' + # str(time.localtime()[3]) + str(time.localtime()[4]) + str(time.localtime()[5]) + # '_Prediction.xlsx', engine='xlsxwriter') # krige_mindist.to_excel(w_krige_mindist, sheet_name='Prediction') # w_krige_mindist.save() # ============================================================================= # Blanking data inside defined radius prior to kriging to obtain interpolation # with its error # ============================================================================= for i in range(len(blanking_radii)): krige = [] min_dist = [] for j in range(len(int_data)): unblanked = semi_data[( (semi_data[:, :2] - int_data[j, :2])**2).sum(1) > blanking_radii[i]**2] min_dist.append( np.min(cdist(unblanked[:, :2], int_data[j, :2][None]))) krige.append( np.hstack(kg.Kriging().ordinary(covariance, unblanked, int_data[j, :2]))) print(str(i) + ' ' + str(j)) predictor[i] = np.hstack(krige) mindistance[i] = np.hstack(min_dist) del krige del min_dist mindistance = pd.DataFrame(mindistance) predictor = pd.DataFrame(predictor) color = pd.DataFrame(predictor) predictor = predictor.T.drop_duplicates().T mindistance = mindistance.T.drop_duplicates().T color = color.T.drop_duplicates().T predictor = predictor.apply(pd.Series.drop_duplicates, axis=1) mindistance = mindistance.apply(pd.Series.drop_duplicates, axis=1) color = color.apply(pd.Series.drop_duplicates, axis=1) # ============================================================================= # Get the interpolator error # ============================================================================= error = (np.array(predictor).transpose() - int_data[:, 2].transpose()).transpose() # ============================================================================= # Scatter plot of minimum distance between points versus its interpolator error # ============================================================================= mindistance = np.array(mindistance) color = np.array(color) # color = np.random.rand(len(predictor.columns)) for i in range(len(predictor.columns)): for k in range(len(predictor)): color[k, i] = blanking_radii[i] plt.scatter(mindistance, error, c=color) plt.xlim(0, 450) plt.ylim(-250, 300) plt.savefig('Scatter Plot.pdf', fmt='pdf', dpi=200) plt.show() for i in range(len(predictor.columns)): plt.scatter(mindistance[:, i], error[:, i]) plt.xlim(0, 450) plt.ylim(-250, 300) plt.savefig('Scatter-' + str(blanking_radii[i]) + '.pdf', fmt='pdf', dpi=200) plt.show() vecresi = np.array(error).ravel() vecdist = np.array(mindistance).ravel() sep = np.linspace(R / C, R, C) lags = (sep[1:] + sep[:-1]) / 2 lags = np.hstack((0, r0, R / (2 * C), lags, 2 * lags[-1] - lags[-2])) count = -1 for ilag in lags[:-1]: count = count + 1 with warnings.catch_warnings(): warnings.simplefilter("ignore", category=RuntimeWarning) frequency.append( np.sum((vecdist[:] >= ilag) & (vecdist[:] < lags[count + 1]))) classed_resi.append(vecresi[(vecdist[:] >= ilag) & (vecdist[:] < lags[count + 1])]) classed_dist.append( np.average(vecdist[(vecdist[:] >= ilag) & (vecdist[:] < lags[count + 1])])) mean.append( np.average(vecresi[(vecdist[:] >= ilag) & (vecdist[:] < lags[count + 1])])) std.append( np.std(vecresi[(vecdist[:] >= ilag) & (vecdist[:] < lags[count + 1])])) classed_error = pd.DataFrame(classed_resi).transpose() lag = np.hstack((0, r0, sep)) iclassed_error = pd.DataFrame(classed_resi).transpose() count = -1 for i in range(len(classed_error.columns)): count = count + 1 if np.count_nonzero(~np.isnan(np.array(classed_error)[:, i])) < 100: col2del.append(count) iclassed_error.drop(i, axis=1, inplace=True) lag = np.delete(lag, col2del) mean = np.delete(mean, col2del) std = np.delete(std, col2del) classed_dist = np.delete(classed_dist, col2del) frequency = np.delete(frequency, col2del) # ============================================================================= # Export Interpolator Error grouped in classes as excel file # ============================================================================= error = pd.DataFrame(error) w_err_mindist = pd.ExcelWriter( str(time.localtime()[0]) + str(time.localtime()[1]) + str(time.localtime()[2]) + '_' + str(time.localtime()[3]) + str(time.localtime()[4]) + str(time.localtime()[5]) + '_Error.xlsx', engine='xlsxwriter') error.to_excel(w_err_mindist, sheet_name='Error') # ============================================================================= # Export Minimum Distance grouped in classes as excel file # ============================================================================= mindistance = pd.DataFrame(mindistance) mindistance.to_excel(w_err_mindist, sheet_name='Minimum Distance') w_err_mindist.save() # ============================================================================= # Scatter plot of the error gropued in classes defined by the vector "lag" # ============================================================================= iclassed_error = np.array(iclassed_error) plt.scatter(np.tile(classed_dist, len(iclassed_error)), iclassed_error.flatten()) plt.plot(classed_dist, mean, 'o', c='k') plt.plot(classed_dist, std, 'o', c='r') plt.xlabel('Distance') plt.ylabel('Error') plt.title('DBF and DEF') # plt.savefig(str(time.localtime()[0]) + str(time.localtime()[1]) + str(time.localtime()[2])+'_' + # str(time.localtime()[3]) + str(time.localtime()[4]) + str(time.localtime()[5]) + # '_Validation.png', fmt='png', dpi=200) plt.show() # weight = np.sqrt(std) ** 2 weight = ((frequency * classed_dist) / np.sum(frequency * classed_dist)) # weight = np.sqrt(blanking_radii) ** 2 # ============================================================================= # Least square fit function to obtain coefficient of the indeterminate # ============================================================================= paramsMean = curve_fit(fnc.fitfunction, classed_dist, mean, sigma=weight, bounds=((-np.inf, -np.inf, -np.inf, 0), (np.inf, np.inf, np.inf, 0.000001))) paramsStD = curve_fit(fnc.fitfunction, classed_dist, std, sigma=weight, bounds=((-np.inf, -np.inf, -np.inf, 0), (np.inf, np.inf, np.inf, nug))) [m1, m2, m3, m4] = paramsMean[0] [s1, s2, s3, s4] = paramsStD[0] classed_dist = np.hstack((0, classed_dist)) mean = np.hstack((0, mean)) std = np.hstack((s4, std)) frequency = np.hstack((0, frequency)) x_int = np.linspace(np.min(classed_dist), np.max(classed_dist), 200) f_mean = np.poly1d(paramsMean[0]) f_std = np.poly1d(paramsStD[0]) mean_int = f_mean(x_int) std_int = f_std(x_int) _, ax = plt.subplots() plt.plot(x_int, mean_int) plt.plot(x_int, std_int) # plt.plot(classed_dist, mean, 'o', c='k', label=str(m1) + "x^3 " + str(m2) + "x^2 ")# + str(m3) + "x") plt.plot(classed_dist, mean, 'o', c='k', label=str(m1) + "x^3 " + str(m2) + "x^2 " + str(m3) + "x") plt.plot(classed_dist, std, 'x', c='r', label=str(s1) + "x^3 " + str(s2) + "x^2 " + str(s3) + "x " + str(s4)) for i, txt in enumerate(frequency): ax.annotate(txt, (classed_dist[i], mean[i])) ax.annotate(txt, (classed_dist[i], std[i])) plt.legend(loc=2, fontsize='xx-small', borderaxespad=0.) plt.savefig('Validation Fit.pdf', fmt='pdf', dpi=200) plt.show()
def set_figsize(figsize=(3.5, 2.5)): use_svg_display() # 设置图的尺寸。 plt.rcParams['figure.figsize'] = figsize def show_images(imgs, num_rows, num_cols, scale=2): ''' 画出多张图片 ''' figsize = (num_cols * scale, num_rows * scale) _, axes = plt.subplots(num_rows, num_cols, figsize=figsize) for i in range(num_rows): for j in range(num_cols): axes[i][j].imshow(imgs[i * num_cols + j]) axes[i][j].axes.get_xaxis().set_visible(False) axes[i][j].axes.get_yaxis().set_visible(False) return axes if __name__ == '__main__': num_inputs = 2 num_examples = 1000 true_w = [2, -3.4] true_b = 4.2 features = np.random.normal(scale=1, size=(num_examples, num_inputs)) labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b labels += np.random.normal(scale=0.01, size=labels.shape) set_figsize() plt.scatter(features[:, 1], labels, 1) plt.show()
# In[46]: def port_vol(weights): return np.sqrt(np.dot(weights.T, np.dot(rets.cov() * 252, weights))) # In[47]: prets = [] pvols = [] for p in range(2500): weights = np.random.random(noa) weights /= np.sum(weights) prets.append(port_ret(weights)) pvols.append(port_vol(weights)) prets = np.array(prets) pvols = np.array(pvols) # In[48]: plt.figure(figsize=(10, 6)) plt.scatter(pvols, prets, c=prets / pvols, marker='o', cmap='coolwarm') plt.xlabel('Expected Volatility') plt.ylabel('Expected Return') plt.title( 'Expected Return and Volatility for Random Portfolio Weights (SPY, AAPL, MSFT, GLD)' ) plt.colorbar(label='Sharpe Ratio')
def makeFig(): plt.scatter(x,y)