def do_fit( self, dataframe: pd.DataFrame, labeled_segments: List[AnalyticSegment], deleted_segments: List[AnalyticSegment], learning_info: LearningInfo ) -> None: data = utils.cut_dataframe(dataframe) data = data['value'] self.state.pattern_center = list(set(self.state.pattern_center + learning_info.segment_center_list)) self.state.pattern_model = utils.get_av_model(learning_info.patterns_list) convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size) correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size) height_list = learning_info.patterns_value del_conv_list = [] delete_pattern_width = [] delete_pattern_height = [] delete_pattern_timestamp = [] for segment in deleted_segments: delete_pattern_timestamp.append(segment.pattern_timestamp) deleted = utils.get_interval(data, segment.center_index, self.state.window_size) deleted = utils.subtract_min_without_nan(deleted) del_conv = scipy.signal.fftconvolve(deleted, self.state.pattern_model) if len(del_conv): del_conv_list.append(max(del_conv)) delete_pattern_height.append(utils.find_confidence(deleted)[1]) self._update_fiting_result(self.state, learning_info.confidence, convolve_list, del_conv_list, height_list)
def _n_fold_validation(self, train_data, train_target, n=10): n_samples = len(train_data) perm = np.random.permutation(n_samples) kendall_tau = np.full((n, len(self.model_pool)), np.nan) for i, tst_split in enumerate(np.array_split(perm, n)): trn_split = np.setdiff1d(perm, tst_split, assume_unique=True) # loop over all considered surrogate model in pool for j, model in enumerate(self.model_pool): acc_predictor = get_acc_predictor(model, train_data[trn_split], train_target[trn_split]) rmse, rho, tau = utils.get_correlation( acc_predictor.predict(train_data[tst_split]), train_target[tst_split]) kendall_tau[i, j] = tau winner = int( np.argmax( np.mean(kendall_tau, axis=0) - np.std(kendall_tau, axis=0))) print("winner model = {}, tau = {}".format( self.model_pool[winner], np.mean(kendall_tau, axis=0)[winner])) self.winner = self.model_pool[winner] # re-fit the winner model with entire data acc_predictor = get_acc_predictor(self.model_pool[winner], train_data, train_target) self.model = acc_predictor
def do_fit(self, dataframe: pd.DataFrame, labeled_segments: list, deleted_segments: list, learning_info: dict) -> None: data = utils.cut_dataframe(dataframe) data = data['value'] window_size = self.state['WINDOW_SIZE'] last_pattern_center = self.state.get('pattern_center', []) self.state['pattern_center'] = list( set(last_pattern_center + learning_info['segment_center_list'])) self.state['pattern_model'] = utils.get_av_model( learning_info['patterns_list']) convolve_list = utils.get_convolve(self.state['pattern_center'], self.state['pattern_model'], data, window_size) correlation_list = utils.get_correlation(self.state['pattern_center'], self.state['pattern_model'], data, window_size) height_list = learning_info['patterns_value'] del_conv_list = [] delete_pattern_width = [] delete_pattern_height = [] delete_pattern_timestamp = [] for segment in deleted_segments: del_min_index = segment.center_index delete_pattern_timestamp.append(segment.pattern_timestamp) deleted = utils.get_interval(data, del_min_index, window_size) deleted = utils.subtract_min_without_nan(deleted) del_conv = scipy.signal.fftconvolve(deleted, self.state['pattern_model']) if len(del_conv): del_conv_list.append(max(del_conv)) delete_pattern_height.append(utils.find_confidence(deleted)[1]) delete_pattern_width.append(utils.find_width(deleted, False)) self._update_fiting_result(self.state, learning_info['confidence'], convolve_list, del_conv_list, height_list)
def do_fit(self, dataframe: pd.DataFrame, labeled_segments: List[AnalyticSegment], deleted_segments: List[AnalyticSegment], learning_info: LearningInfo) -> None: data = utils.cut_dataframe(dataframe) data = data['value'] last_pattern_center = self.state.pattern_center self.state.pattern_center = utils.remove_duplicates_and_sort( last_pattern_center + learning_info.segment_center_list) self.state.pattern_model = utils.get_av_model( learning_info.patterns_list) convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size) correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size) del_conv_list = [] delete_pattern_timestamp = [] for segment in deleted_segments: del_mid_index = segment.center_index delete_pattern_timestamp.append(segment.pattern_timestamp) deleted_pat = utils.get_interval(data, del_mid_index, self.state.window_size) deleted_pat = utils.subtract_min_without_nan(deleted_pat) del_conv_pat = scipy.signal.fftconvolve(deleted_pat, self.state.pattern_model) if len(del_conv_pat): del_conv_list.append(max(del_conv_pat)) self.state.convolve_min, self.state.convolve_max = utils.get_min_max( convolve_list, self.state.window_size / 3) self.state.conv_del_min, self.state.conv_del_max = utils.get_min_max( del_conv_list, self.state.window_size)
def do_scatter(i, j, ax): """ Draw single scatter plot """ xs, ys = utils.extract(i, j, steadies) ax.scatter(xs, ys) ax.set_xlabel(r"$S_%d$" % i) ax.set_ylabel(r"$S_%d$" % j) cc = utils.get_correlation(xs, ys) ax.set_title(r"Corr: $%.2f$" % cc)
def do_scatter(i, j, ax): """ Draw single scatter plot """ xs, ys = utils.extract(i, j, steadies) ax.scatter(xs, ys) ax.set_xlabel(r'$S_%d$' % i) ax.set_ylabel(r'$S_%d$' % j) cc = utils.get_correlation(xs, ys) ax.set_title(r'Corr: $%.2f$' % cc)
def validate(net, data, target, device): net.eval() with torch.no_grad(): data, target = data.to(device), target.to(device) pred = net(data) pred, target = pred.cpu().detach().numpy(), target.cpu().detach().numpy() rmse, rho, tau = get_correlation(pred, target) # print("Validation RMSE = {:.4f}, Spearman's Rho = {:.4f}, Kendall’s Tau = {:.4f}".format(rmse, rho, tau)) return rmse, rho, tau, pred, target
def optimize_for_sharpe_ratio(allocation, start_value, df, symbols): symbols = symbols[1:] dr = utils.daily_returns(df) df = df[symbols] # print df corr = np.asarray(utils.get_correlation(dr)) corr = np.asarray([1,-.12,.00209,-.004,-.007,.005]) # print symbols cons = ({'type': 'eq', 'fun': lambda x: 1 - sum(x)}) bnds = tuple((0, 1) for item in allocation) result = op.minimize(__get_sharpe_ratio, allocation, args=(df, symbols, start_value,corr), method='SLSQP', bounds=bnds, constraints=cons) print "Correlation to SPY: ", corr[1:] return np.round(result.x, 2)
def do_fit(self, dataframe: pd.DataFrame, labeled_segments: list, deleted_segments: list, learning_info: dict) -> None: data = utils.cut_dataframe(dataframe) data = data['value'] last_pattern_center = self.state.get('pattern_center', []) self.state['pattern_center'] = list(set(last_pattern_center + learning_info['segment_center_list'])) self.state['pattern_model'] = utils.get_av_model(learning_info['patterns_list']) convolve_list = utils.get_convolve(self.state['pattern_center'], self.state['pattern_model'], data, self.state['WINDOW_SIZE']) correlation_list = utils.get_correlation(self.state['pattern_center'], self.state['pattern_model'], data, self.state['WINDOW_SIZE']) del_conv_list = [] delete_pattern_timestamp = [] for segment in deleted_segments: del_mid_index = segment.center_index delete_pattern_timestamp.append(segment.pattern_timestamp) deleted_pat = utils.get_interval(data, del_mid_index, self.state['WINDOW_SIZE']) deleted_pat = utils.subtract_min_without_nan(deleted_pat) del_conv_pat = scipy.signal.fftconvolve(deleted_pat, self.state['pattern_model']) if len(del_conv_pat): del_conv_list.append(max(del_conv_pat)) self.state['convolve_min'], self.state['convolve_max'] = utils.get_min_max(convolve_list, self.state['WINDOW_SIZE'] / 3) self.state['conv_del_min'], self.state['conv_del_max'] = utils.get_min_max(del_conv_list, self.state['WINDOW_SIZE'])
def do_fit(self, dataframe: pd.DataFrame, labeled_segments: List[AnalyticSegment], deleted_segments: List[AnalyticSegment], learning_info: LearningInfo) -> None: data = utils.cut_dataframe(dataframe) data = data['value'] window_size = self.state.window_size last_pattern_center = self.state.pattern_center self.state.pattern_center = utils.remove_duplicates_and_sort( last_pattern_center + learning_info.segment_center_list) self.state.pattern_model = utils.get_av_model( learning_info.patterns_list) convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, window_size) correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, window_size) height_list = learning_info.patterns_value del_conv_list = [] delete_pattern_timestamp = [] for segment in deleted_segments: segment_cent_index = segment.center_index delete_pattern_timestamp.append(segment.pattern_timestamp) deleted_stair = utils.get_interval(data, segment_cent_index, window_size) deleted_stair = utils.subtract_min_without_nan(deleted_stair) del_conv_stair = scipy.signal.fftconvolve(deleted_stair, self.state.pattern_model) if len(del_conv_stair) > 0: del_conv_list.append(max(del_conv_stair)) self._update_fitting_result(self.state, learning_info.confidence, convolve_list, del_conv_list) self.state.stair_height = int( min(learning_info.pattern_height, default=1)) self.state.stair_length = int( max(learning_info.pattern_width, default=1))
def node_degree(data, bin_num_x=100, bin_num_y=100): """ Compare node degree and correlation """ # get data ndegs = [] avg_corrs = [] node_num = -1 for syst, mat, _ in data: graph = nx.DiGraph(syst.jacobian) for i in graph.nodes(): ndegs.append(graph.degree(i)) ncorrs = [abs(mat[i, j]) for j in graph.neighbors(i) if i != j] avg_corrs.append( np.mean(ncorrs) if len(ncorrs) > 0 else 0) node_num = graph.number_of_nodes() assert node_num >= 0, 'Invalid data found' # plot data heatmap, xedges, yedges = np.histogram2d( avg_corrs, ndegs, bins=(bin_num_x, bin_num_y)) extent = [yedges[0], yedges[-1], xedges[0], xedges[-1]] heatmap = heatmap[::-1] plt.imshow( heatmap, extent=extent, interpolation='nearest', aspect=abs((extent[1]-extent[0])/(extent[3]-extent[2]))) plt.colorbar() cc = get_correlation(ndegs, avg_corrs) plt.title(r'Corr: $%.2f$' % cc) plt.xlabel('node degree') plt.ylabel('average absolute correlation to other nodes') plt.tight_layout() save_figure('images/ndegree_corr.pdf', bbox_inches='tight') plt.close()
def node_degree(data, bin_num_x=100, bin_num_y=100): """ Compare node degree and correlation """ # get data ndegs = [] avg_corrs = [] node_num = -1 for syst, mat, _ in data: graph = nx.DiGraph(syst.jacobian) for i in graph.nodes(): ndegs.append(graph.degree(i)) ncorrs = [abs(mat[i, j]) for j in graph.neighbors(i) if i != j] avg_corrs.append(np.mean(ncorrs) if len(ncorrs) > 0 else 0) node_num = graph.number_of_nodes() assert node_num >= 0, 'Invalid data found' # plot data heatmap, xedges, yedges = np.histogram2d(avg_corrs, ndegs, bins=(bin_num_x, bin_num_y)) extent = [yedges[0], yedges[-1], xedges[0], xedges[-1]] heatmap = heatmap[::-1] plt.imshow(heatmap, extent=extent, interpolation='nearest', aspect=abs((extent[1] - extent[0]) / (extent[3] - extent[2]))) plt.colorbar() cc = get_correlation(ndegs, avg_corrs) plt.title(r'Corr: $%.2f$' % cc) plt.xlabel('node degree') plt.ylabel('average absolute correlation to other nodes') plt.tight_layout() save_figure('images/ndegree_corr.pdf', bbox_inches='tight') plt.close()
def search(self): if self.resume: archive = self._resume_from_dir() else: # the following lines corresponding to Algo 1 line 1-7 in the paper archive = [ ] # initialize an empty archive to store all trained CNNs # Design Of Experiment if self.iterations < 1: arch_doe = self.search_space.sample(self.n_doe) else: arch_doe = self.search_space.initialize(self.n_doe) # parallel evaluation of arch_doe top1_err, complexity = self._evaluate(arch_doe, it=0) # store evaluated / trained architectures for member in zip(arch_doe, top1_err, complexity): archive.append(member) # reference point (nadir point) for calculating hypervolume ref_pt = np.array( [np.max([x[1] for x in archive]), np.max([x[2] for x in archive])]) # main loop of the search for it in range(1, self.iterations + 1): # construct accuracy predictor surrogate model from archive # Algo 1 line 9 / Fig. 3(a) in the paper acc_predictor, a_top1_err_pred = self._fit_acc_predictor(archive) # search for the next set of candidates for high-fidelity evaluation (lower level) # Algo 1 line 10-11 / Fig. 3(b)-(d) in the paper candidates, c_top1_err_pred = self._next(archive, acc_predictor, self.n_iter) # high-fidelity evaluation (lower level) # Algo 1 line 13-14 / Fig. 3(e) in the paper c_top1_err, complexity = self._evaluate(candidates, it=it) # check for accuracy predictor's performance rmse, rho, tau = get_correlation( np.vstack((a_top1_err_pred, c_top1_err_pred)), np.array([x[1] for x in archive] + c_top1_err)) # add to archive # Algo 1 line 15 / Fig. 3(e) in the paper for member in zip(candidates, c_top1_err, complexity): archive.append(member) # calculate hypervolume hv = self._calc_hv( ref_pt, np.column_stack( ([x[1] for x in archive], [x[2] for x in archive]))) # print iteration-wise statistics print("Iter {}: hv = {:.2f}".format(it, hv)) print( "fitting {}: RMSE = {:.4f}, Spearman's Rho = {:.4f}, Kendall’s Tau = {:.4f}" .format(self.predictor, rmse, rho, tau)) # dump the statistics with open(os.path.join(self.save_path, "iter_{}.stats".format(it)), "w") as handle: json.dump( { 'archive': archive, 'candidates': archive[-self.n_iter:], 'hv': hv, 'surrogate': { 'model': self.predictor, 'name': acc_predictor.name, 'winner': acc_predictor.winner if self.predictor == 'as' else acc_predictor.name, 'rmse': rmse, 'rho': rho, 'tau': tau } }, handle) if _DEBUG: # plot plot = Scatter(legend={'loc': 'lower right'}) F = np.full((len(archive), 2), np.nan) F[:, 0] = np.array([x[2] for x in archive]) # second obj. (complexity) F[:, 1] = 100 - np.array([x[1] for x in archive]) # top-1 accuracy plot.add(F, s=15, facecolors='none', edgecolors='b', label='archive') F = np.full((len(candidates), 2), np.nan) F[:, 0] = np.array(complexity) F[:, 1] = 100 - np.array(c_top1_err) plot.add(F, s=30, color='r', label='candidates evaluated') F = np.full((len(candidates), 2), np.nan) F[:, 0] = np.array(complexity) F[:, 1] = 100 - c_top1_err_pred[:, 0] plot.add(F, s=20, facecolors='none', edgecolors='g', label='candidates predicted') plot.save( os.path.join(self.save_path, 'iter_{}.png'.format(it))) return
def main_run(): # h = [.01, .01, .01,.01, .01,.01, .01,.01] # # h = pd.Series([0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025],dtype=float) # # # x = [3., 2., 1.5,2., 1.5,3., 4.5,4.9] # # y = [4.2,1.9,1.9,2.1,2.0 ,2.5,3.9,4.5] # # x = [1.5, 2., 1.5,2., 1.5,3., 4.5,4.9] # y = [4.2,1.9,1.9,2.1,2.0 ,2.5,3.9,4.5] # z = [0.012,0.032,0.035,0.902,0.052,0.302,-0.909,0.902] # # df.corr() # b = [1., -1., -1.,1., 1.,-1., 1,-1] # p = [0.12,0.55,0.45,0.3,0.53,0.57,0.19,0.58] start_value = 1000000 # symbols = ['AC','ALI','BDO','BPI','DMC','GLO', # 'GTCAP','HCP','JFC','MBT','MPI','MEG', # 'RLC','SECB','SM','SMPH','TEL','URC'] # symbols = ['AAPL','IBM','XOM','GLD'] # symbols = ['goog','aapl','msft','amzn'] symbols = ['AAPL', 'MSFT','ORCL','QCOM','BBY','MU','GILD','YUM','NFLX','VZ','APA','RRC','MDLZ','CSCO','V','MET','SBUX','GGP','UA','GM'] h = np.ones(len(symbols)) addressable_dates = pd.date_range('2012-01-01','2015-12-31') df = utils.get_data_online(symbols, addressable_dates) df = pd.DataFrame(df,index=df.index,dtype=np.float64) df = df.dropna() x = np.asarray(utils.daily_returns(df[symbols[1:]]).std()) # print x y = np.asarray(utils.daily_returns(df[symbols[1:]]).mean()) # print y # b = [1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.] z = np.asarray(utils.get_correlation(df)[1:]) # print z # p = [.5,.5,.5,.5,.5,.5,.5,.5,.5,.5,.5,.5,.5,.5,.5,.5,.5,.5] b = np.ones(len(symbols) - 1) # print b p = Accuracy.get_accuracy(symbols,addressable_dates) # print p sharpe_ratios = utils.get_sharpe_ratio(utils.daily_returns(df[symbols[1:]])) # print sharpe_ratios data = (x,y,z,b,p) x_sorted = np.sort(x) y_sorted = np.sort(y) # print "h",np.transpose( h) * y # print "cost", error(h) # cons = ({'type':'eq', 'fun': lambda x: 1 - sum(x)}) cons = ({'type': 'eq', 'fun': lambda x: 1 - sum(x)}) # bounds = tuple((0,1) for it in h) bnds = tuple((0,1) for it in h) bounds = bnds # bounds = (0,1) min_result = spo.minimize(error,h,args=(data,), method='SLSQP',bounds=bounds, constraints=cons, options={'disp':True}) print "Parameters = {}, Y = {}".format(np.round(min_result.x,2), np.abs( min_result.fun)) # for tracing # print "xdata",xdata model_bounds = max(x.max(),y.max()) xdata = np.linspace(0,5,8) y_main = func(xdata,model_bounds,1,-model_bounds) print "bias", b * np.round(min_result.x,3) print "risk: ", x print "rewards: ", y print "correlation: ", z print "accuracy: ", p print "y main",y_main plt.plot(xdata,y_main) y = func(xdata,2.5,1.3,0.5) ydata = y + 0.2 * np.random.normal(size=len(xdata)) coeffs, pcov = curve_fit(func,xdata,ydata) yaj = func(xdata, coeffs[0], coeffs[1], coeffs[2]) print pcov # plt.scatter(xdata,ydata) # plt.plot(xdata,yaj) # c,p = curve_fit(sigmoid,x_sorted,y_sorted) plt.scatter(x_sorted,y_sorted) plt.show() return None