def evaluate_instances(self, theta=None, prc=False): """Pass the instance scores and ground truth to result analyzer and return proper metric""" if not self.labeled_data: print ('I have no instance data. Must set them, and labels first') return None if theta is not None: # theta overrides existing one if given self.theta = theta ra = ResultAnalyzer() y_hat = af.calculate_y(self.eval_data, self.theta) ra.addManyResults(self.y_known, y_hat) if prc: return ra.auprc() else: return ra.accuracy(), ra.auc()
def get_instance_scores(self): x = self.data.get_instances() return af.calculate_y(x, self.theta)
def train(self): """Where the magic happens. Optimizes the cost function of the paper, based on the parameters given before. There is a terminating function which determines if optimization should end before the epochs end, based on essentially heuristics. Every 50 iterations prints progress. Keeps the best theta values based on the group reconstruction score. At the end prints detailed stats about classifying with that.""" print('Optimizing for ', self._param_str) self.total_iterations = 0 accs = [] #theta = np.random.random(self.embeddings_dimension) theta = np.zeros(self.embeddings_dimension) #theta=np.loadtxt('training_output/movies/rbf_100_300_300x100_10.0_0.04rbf0.7071_last_theta', delimiter=',') print(theta) best_theta = theta best_acc = 0 terminate = False for epoch in range(self.epochs): self.train_data.rewind_dataset(True) # reset and shuffle data if terminate: break print('-------epoch ', epoch, '-----------') print(self._print_titles) X, gs, gl = self.train_data.get_next_batch() while X is not None: # for each mini-batch # do gd step W_ij = similarity.get_sim_matrix(X, self.similarity_fn, self.sim_variance) # calculate y_hat and derivative Y_ij = af.calculate_y(X, theta) Y_der_ij = af.calculate_y_der(Y_ij, X) # calculate cost similarity_cost = af.similarity_derivative( Y_ij, Y_der_ij, W_ij) / (X.shape[0]**2) group_cost = self.alpha_balance * af.group_derivative( Y_ij, Y_der_ij, gs, gl) / float(len(gs)) #if self.total_iterations %8==0: theta_der = similarity_cost + group_cost #else: #theta_der = similarity_cost #print(theta_der) # new theta # theta = self.momentum_value * theta - self.lr / (epoch + 1) * theta_der #theta = theta - (1 - self.momentum_value) * self.lr / (epoch + 1) * theta_der#(1 - self.momentum_value) * self.total_iterations += 1 # print progress #if self.total_iterations % 50 == 0: acc = self._print_progress(theta) accs.append(acc) if Jilu[-1] < acc: Jilu.append(acc) else: Jilu.append(Jilu[-1]) if acc > best_acc: # save best theta, based on training set best_acc = acc best_theta = theta io.save_theta(theta, self.output_name + self._param_str, best=True) #if self._terminate_conditions(theta, accs): #if self.total_iterations == 100: # terminate = True # break X, gs, gl = self.train_data.get_next_batch() io.save_theta(theta, self.output_name + self._param_str + '_last') print('\n\n\n\t\t\t---BEST THETA VALUE (in training group)---') self._print_progress(best_theta, print_details=True) return self.train_acc, self.group_acc, self.instance_acc, self.instance_auc