def fit_tree(self, weights, X_train, F_train, y_train): """Train decision tree to track path length.""" y_train_hat = self.pred_fun(weights, X_train, F_train) y_train_hat_int = np.rint(y_train_hat).astype(int) tree = DecisionTreeClassifier(min_samples_leaf=25) tree.fit(X_train.T, y_train_hat_int.T) return tree
def predict(self, X): probabilities = self.feed_forward_out(X) # If binary category, "yes/no" if self.n_neurons[-1] == 1: return np.rint(probabilities) # round sigmoid to nearest int(0/1) else: return np.argmax(probabilities, axis=1)
def HEM_predictions(model_weights, df, covariates): """Gives a prediction column by rounding 1-p where p is the probablity of being cured. This is because the cure label is 0 and the not cured label is 1. The df refers to the original datarame and covariates is the list of covariates from the dataframe and in the same order, and similarly for model_weights.""" n_rows = len(df.index) intercept = np.repeat(1, n_rows) table = df[covariates].to_numpy() nonintercept_weights = model_weights[1:] predictions_float = 1 - sigmoid(model_weights[0] * intercept + np.dot( table, nonintercept_weights)) # This is prob of not cured! predictions = { "pred": np.rint(predictions_float), "prob": predictions_float } return predictions
scaler = StandardScaler() xt = scaler.fit_transform(xt) xv = scaler.fit_transform(xv) nb_in = x.shape[-1] nb_out = y.shape[-1] # fit neural network with numpy nn_np = npNetwork([nb_in, 4, nb_out], nonlin='tanh', output='logistic', loss='ce') print('using numpy network:') nn_np.fit(yt, xt, nb_epochs=250, batch_size=64, lr=1e-2) _yt = nn_np.forward(xt) class_error = np.linalg.norm(yt - np.rint(_yt), axis=1) print('numpy', 'train:', 'cost=', nn_np.cost(yt, xt), 'class. error=', np.mean(class_error)) _yv = nn_np.forward(xv) class_error = np.linalg.norm(yv - np.rint(_yv), axis=1) print('numpy', 'test:', 'cost=', nn_np.cost(yv, xv), 'class. error=', np.mean(class_error)) # fit neural network with autograd nn_ag = agNetwork([nb_in, 4, nb_out], nonlin='tanh', output='logistic', loss='ce') print('using autograd network:') nn_ag.fit(yt, xt, nb_epochs=250, batch_size=64, lr=1e-2) _yt = nn_ag.forward(xt) class_error = np.linalg.norm(yt - np.rint(_yt), axis=1) print('autograd', 'train:', 'cost=', nn_ag.cost(yt, xt), 'class. error=', np.mean(class_error))
with open('./trained_models/trained_weights_'+indicator+'_mixed_fs2.pkl', 'wb') as fp: pickle.dump({'gru': gru.gru.weights, 'mlp': gru.mlp.weights}, fp) print('saved trained model to ./trained_models') visualize(gru.tree, './trained_models/tree_'+str(indicator)+'_mixed_fs2.pdf',True) print('saved final decision tree to ./trained_models') print('\n') print('name of the file: ./trained_models/trained_weights_'+indicator+'_mixed_fs2.pkl') X_test = obs_test F_test = fcpt_test y_test = out_test y_hat = gru.pred_fun(gru.weights, X_test, F_test) y_hat_int = np.rint(y_hat).astype(int) auc_test = roc_auc_score(y_test.T, y_hat.T) print('Test AUC: {:.2f}'.format(auc_test)) avg_precision = average_precision_score(y_test.T, y_hat.T) print('Test Average Precision: {:.2f}'.format(avg_precision)) fpr, tpr, thresholds = roc_curve(y_test.T, y_hat.T) # Find optimal probability threshold threshold = Find_Optimal_Cutoff(y_test.T, y_hat.T) print('Test threshold: {:.2f}'.format(threshold[0])) # data['pred'] = data['pred_proba'].map(lambda x: 1 if x > threshold else 0) import numpy as np
def HEM_fit(censored_inputs, noncensored_inputs, C, maxiter, initialize): """ This function implements the algorithm by minimizing the training loss. To initialize the minimization, which is done using the constrained SLSQP method, we need to choose an initial guess for both the unknown cure labels for the censored rows, and the covariate weights. Parameters: ---------------------------- censored_inputs: A numpy array of shape (n_samples, n_covariates+1), containing the censored rows, each of which begins with a '1', accounting for our intercept term. E.g. array([[1,0.2,0.3], [1,0.5,0.6]]) has two samples and two covariates. noncensored_inputs: Same as above except these represent the noncensored rows. The number of covariates is the same as above, but we could have a different number of samples. initialize: This is how we choose our initial guesses for the SLSQP minimization algorithm. The covraiate weights are initialized at random from a unifrom distribution. As for the cure labels, if the option 'censoring_rate' is selected, then we initialize by assuming the probability of being cured is the censoring rate, and use this to generate an initial guess for cure labels for the censored rows. Otherwise, if 'use_clustering' is selected then a single cluster is created from the noncensored rows, and two clusters are created from the censored rows. By comparing the distance of the two censored cluster centers to the noncensored cluster center, cure labels are assigned to the censored rows, for our initial guess. Since the problem is nonconvex, the first two initializations typically lead to solutions close to the initial guess. To overcome this problem, a third option is used by selecting 'use_random', which tries fifty guesses for the unknown cure labels, taken at random from a uniform distribution, then chooses the output weights and labels corresponding to the guess giving the lowest objective. C: The strength of the quadratic regularization term (0.5*w^2) on the non-intercept model covariate weights. maxiter: maximum number of iterations for SLSQP method. Typically, set to 1000. Returns ---------------------------------------- The output is a dictionary containing two keys: (i) 'model_weights', whose value is a array containing the covariate weights, and (ii) 'unknown_cure_labels', which is an array of final outputted cure labels for the censored rows, in the order they were fed into the algorithm. """ n_noncens = len(noncensored_inputs) n_cens = len(censored_inputs) n_rows = n_noncens + n_cens def training_loss(param): # Training loss is the negative log-likelihood of the MLE. weights, unknownlabels = ( param[0:len(censored_inputs[0])], param[len(censored_inputs[0]):], ) covariate_weights = weights[1:] # do not regularize intercept term reg = C * np.dot(covariate_weights, covariate_weights) # regularization term known_loss = 1 - prob(weights, noncensored_inputs) # noncensored loss term unknown_loss = ( np.log(prob(weights, censored_inputs)) * (1 - unknownlabels) + np.log(1 - prob(weights, censored_inputs)) * unknownlabels) return reg - 1 / n_rows * (np.sum(np.log(known_loss)) + np.sum(unknown_loss)) training_gradient = grad(training_loss) def constraint1(param): # Taking into account lower bound for I_i(I_i-1) for the censored rows. Default lower bnd taken as 0.001. weights, unknownlabels = ( param[0:len(censored_inputs[0])], param[len(censored_inputs[0]):], ) return unknownlabels * (unknownlabels - 1) + 0.001 # unknownlabels def constraint2(param): # Taking into account upper bound for I_i(I_i-1) for the censored rows. Default upper bound taken as 0.001. weights, unknownlabels = ( param[0:len(censored_inputs[0])], param[len(censored_inputs[0]):], ) return 0.001 - unknownlabels * (unknownlabels - 1) # -unknownlabels # Set the tolerances/bounds for the above constraints cons1 = {"type": "ineq", "fun": constraint1} cons2 = {"type": "ineq", "fun": constraint2} cons = [cons1, cons2] guess_weights = np.random.uniform(-0.5, 0.5, len(censored_inputs[0])) if initialize == "censoring_rate": # Use censoring rate to initialize the minimization p = ( 1 - n_cens / n_rows ) # probability of getting cured (lanubel 0!) is the censored rate. guess_unknown_labels = np.random.binomial(1, p, n_cens) # result of flipping a coin once n_cens times. guess = np.concatenate((guess_weights, guess_unknown_labels), axis=None) res = minimize( training_loss, guess, method="SLSQP", jac=training_gradient, constraints=cons, options={"maxiter": maxiter}, ) model_weights = res.x[0:len(censored_inputs[0])] unknown = res.x[len(censored_inputs[0]):] unknown_cure_labels = np.rint(unknown) fit = { "model_weights": model_weights, "unknown_cure_labels": unknown_cure_labels, } return fit elif initialize == "use_clustering": # Use clustering to initialize the minimization kmeans_nc = KMeans(n_clusters=1, random_state=0).fit(noncensored_inputs) noncensored_clust_cent = kmeans_nc.cluster_centers_[0] kmeans_c = KMeans(n_clusters=2, random_state=0).fit(censored_inputs) censored_clust_cent = kmeans_c.cluster_centers_ cens_cluster1_cent = censored_clust_cent[0] cens_cluster2_cent = censored_clust_cent[1] c_labels = kmeans_c.labels_ if dist(cens_cluster1_cent, noncensored_clust_cent) < dist( cens_cluster2_cent, noncensored_clust_cent): c_labels = vswap(c_labels) guess_unknown_labels = c_labels guess = np.concatenate((guess_weights, guess_unknown_labels), axis=None) res = minimize( training_loss, guess, method="SLSQP", jac=training_gradient, constraints=cons, options={"maxiter": maxiter}, ) model_weights = res.x[0:len(censored_inputs[0])] unknown = res.x[len(censored_inputs[0]):] unknown_cure_labels = np.rint(unknown) fit = { "model_weights": model_weights, "unknown_cure_labels": unknown_cure_labels, } return fit elif initialize == "use_random": # Iterate many random initializations for unknown labels and take the one giving lowest objective results = {} values = np.array([]) for p in np.random.uniform(0.1, 0.9, 50): guess_unknown_labels = np.random.binomial(1, p, n_cens) # result of flipping a coin once n_cens times. guess = np.concatenate((guess_weights, guess_unknown_labels), axis=None) res = minimize( training_loss, guess, method="SLSQP", jac=training_gradient, constraints=cons, options={"maxiter": maxiter}, ) model_weights = res.x[0:len(censored_inputs[0])] value = res.fun unknown = res.x[len(censored_inputs[0]):] unknown_cure_labels = np.rint(unknown) values = np.concatenate((values, value), axis=None) results[value] = [model_weights, unknown_cure_labels] maxim = values.max() minim = values.min() optimal_model_weights = (results[minim])[0] optimal_model_labels = (results[minim])[1] fit = { "model_weights": optimal_model_weights, "unknown_cure_labels": optimal_model_labels, "minvalue": minim, "maxvalue": maxim, } return fit else: raise ValueError( "Need initialize parameter to be chosen as either 'use_clustering' or 'censoring_rate', or 'use_random' " )
def plot_3D(*opts, fxn_name, animate_gif=True, f_name='v1'): def init(): for line in lines: line.set_data([], []) line.set_3d_properties([]) return lines def animate(i): num1 = int(x2[i]) for lnum, (line, dot) in enumerate(zip(lines, dots)): if num1 < dp[lnum]: line.set_data( xlist[lnum][:num1], ylist[lnum][:num1]) # set data for each line separately. line.set_3d_properties(zlist[lnum][:num1]) line.set_label(nn[lnum]) # set the label and draw the legend plt.legend(loc="upper center", ncol=4) return lines # Get data xlist, ylist, zlist, nn, dp = get_dlists(opts) n = len(opts) if fxn_name == 'rosenbrock': x2 = np.rint(np.linspace(0, len(xlist[1]), endpoint=False, num=200)) c = cm.jet(np.linspace(0, 1, n)) g = 0.4 rot_val = 245 nr = colors.PowerNorm(gamma=g) elif fxn_name == 'saddle': rot_val = -60 nr = None x2 = np.rint(np.linspace(0, len(xlist[1]), endpoint=False, num=100)) c = cm.rainbow(np.linspace(0, 1, n)) else: x2 = np.rint(np.linspace(0, len(xlist[1]), endpoint=False, num=200)) c = cm.rainbow(np.linspace(0, 1, n)) g = 0.25 rot_val = 245 nr = colors.PowerNorm(gamma=g) # Plot 3D Surface fig = plt.figure(figsize=(9.6, 6.4)) ax = plt.axes(projection='3d', azim=rot_val) plt.tight_layout() data, minima, x_lims, y_lims = plot_fxn(fxn_name, '3D') ax.plot_surface(*data, rstride=1, cstride=1, norm=nr, cmap='viridis', edgecolor='none', alpha=1.0) ax.plot(*minima, 'x', markersize=12, mew=2, color='k') ax.set_xlabel('x') ax.set_ylabel('y') # ax.set_xlim(x_lims) # ax.set_ylim(y_lims) # c = new_cmap(np.linspace(0,1,n)) # c = cm.jet(np.linspace(0,1,n)) if animate_gif == False: for k in range(n): x_history = np.array(xlist[k]) y_history = np.array(ylist[k]) path = np.concatenate( (np.expand_dims(x_history, 1), np.expand_dims(y_history, 1)), axis=1).T ax.quiver(path[0, :-1], path[1, :-1], beale_fxn(path[::, :-1][0], path[::, :-1][1]), path[0, 1:] - path[0, :-1], path[1, 1:] - path[1, :-1], beale_fxn(path[::, :-1][0], path[::, :-1][1]) - beale_fxn(path[::, :-1][0], path[::, :-1][1]), color=c[k], label=nn[k], length=1, normalize=False, lw=5) plt.legend(loc="upper left") else: line, = ax.plot([], [], [], lw=2) # dot, = ax.plot([], [], 'o', lw=2) lines = [] dots = [] for index in range(n): l_obj = ax.plot([], [], [], lw=2, color=c[index])[0] lines.append(l_obj) d_obj = ax.scatter([], [], [], marker='o', color=c[index]) dots.append(d_obj) # print(x2) # blit=True --> only re-draw the parts that have changed. # dot = ax.scatter([],[],[]) # Set the dot at some arbitrary position initially anim = animation.FuncAnimation(fig, animate, init_func=init, frames=len(x2), interval=1, blit=True) anim.save('images/{}_3D_{}.gif'.format(fxn_name, f_name), dpi=60, writer="imagemagick") # ax.invert_yaxis() plt.show()
def plot_contour(*opts, fxn_name, animate_gif=True, fig_size=None, save_f=True, f_name='v1', in_type=None): def init(): for line in lines: line.set_data([], []) return lines def animate(i): num1 = int(x2[i]) for lnum, (line, scat) in enumerate(zip(lines, scats)): if num1 < dp[lnum]: # print(i, dp[lnum]) line.set_data( xlist[lnum][:num1], ylist[lnum][:num1]) # set data for each line separately. scat.set_offsets([xlist[lnum][num1], ylist[lnum][num1]]) line.set_label(nn[lnum]) # set the label and draw the legend plt.legend(loc="upper left") return lines if fig_size == 'small': fig, ax = plt.subplots(figsize=(6, 4)) else: fig, ax = plt.subplots(figsize=(8.5, 6.8)) plt.tight_layout() data, minima, x_lims, y_lims = plot_fxn(fxn_name, 'contour') ax.set_xlabel('x') ax.set_ylabel('y') ax.set_xlim(x_lims) ax.set_ylim(y_lims) ax.contour(*data, levels=np.logspace(-.5, 5, 35), norm=LogNorm(), cmap='viridis', alpha=0.7) #cmap=plt.cm.jet) ax.plot(*minima[0:2], 'x', markersize=12, mew=2, color='k') xlist, ylist, zlist, nn, dp = get_dlists(opts, in_type) n = len(xlist) # print(len(xlist)) if fxn_name == 'rosenbrock': if in_type == 'sdict': c = cm.rainbow_r(np.linspace(0, 1, n)) else: c = cm.jet(np.linspace(0, 1, n)) else: c = cm.rainbow(np.linspace(0, 1, n)) if animate_gif == False: for k in range(n): x_history = np.array(xlist[k]) y_history = np.array(ylist[k]) path = np.concatenate( (np.expand_dims(x_history, 1), np.expand_dims(y_history, 1)), axis=1).T ax.quiver(path[0, :-1], path[1, :-1], path[0, 1:] - path[0, :-1], path[1, 1:] - path[1, :-1], scale_units='xy', angles='xy', width=0.003, scale=1, color=c[k], label=nn[k]) plt.legend(loc="upper left") if save_f == True: plt.savefig('images/{}_path.png'.format(fxn_name)) else: line, = ax.plot([], [], lw=2) #, markersize=12) lines = [] scats = [] for index in range(n): l_obj = ax.plot([], [], lw=2, color=c[index])[0] lines.append(l_obj) s_obj = ax.scatter([], [], lw=2, color=c[index]) scats.append(s_obj) num_min = int(len(xlist[0]) / 50) # print(num_min) x2 = np.rint(np.linspace(0, len(xlist[1]), endpoint=False, num=200)) # fix this # print(x2) # x2 = np.arange(len(xlist[0])) # blit=True --> only re-draw the parts that have changed. scat = ax.scatter( [], []) # Set the dot at some arbitrary position initially anim = animation.FuncAnimation(fig, animate, init_func=init, frames=len(x2), interval=1, blit=True) anim.save('images/{}_contour_{}.gif'.format(fxn_name, f_name), dpi=60, writer="imagemagick") plt.show()