def train(self, history_tuples, reg_lambda = 0.01): # history_tuples, function_obj, reg_lambda = 0.01, self.iteration = 0 self.h_tuples = history_tuples self.reg = reg_lambda self.dataset = None # this will be set by create_dataset self.tag_set = self.func.supported_tags #None # this will be also be set by create_dataset - this is the set of all tags self.create_dataset() self.dim = self.dataset.shape[1] #len(self.dataset[0]) self.num_examples = self.dataset.shape[0] if (self.model == None) or (self.model.shape[0] != self.dim): self.model = numpy.array([0 for d in range(self.dim)]) # initialize the model to all 0 #self.model = numpy.array([0 for d in range(self.dim)]) # initialize the model to all 0 dt1 = datetime.datetime.now() #print 'before training: ', dt1 try: from scipy.optimize import minimize as mymin params = mymin(self.cost, self.model, method = 'L-BFGS-B', callback = self.cb, options = {'maxiter':25}) #, jac = self.gradient) # , options = {'maxiter':100} except: #print "Importing alternate minimizer fmin_l_bfgs_b" from scipy.optimize import fmin_l_bfgs_b as mymin params = mymin(self.cost, self.model, fprime = self.gradient) # , options = {'maxiter':100} self.model = params.x dt2 = datetime.datetime.now() #print 'after training: ', dt2, ' total time = ', (dt2 - dt1).total_seconds() if self.pic_file != None: pickle.dump({'model': self.model, 'tag_set': self.tag_set}, open(self.pic_file, "wb")) return self.cost_value
def train(self, history_tuples, reg_lambda=0.01, max_iter=5): # history_tuples, function_obj, reg_lambda = 0.01, self.iteration = 0 self.h_tuples = history_tuples self.reg = reg_lambda self.dataset = None # this will be set by create_dataset self.tag_set = self.func.get_supported_labels( ) #supported_tags - this is the set of all tags self.create_dataset() self.dim = self.dataset.shape[1] #len(self.dataset[0]) self.num_examples = self.dataset.shape[0] if (self.model == None) or (self.model.shape[0] != self.dim): self.model = numpy.array([0 for _ in range(self.dim) ]) # initialize the model to all 0 dt1 = datetime.datetime.now() # print 'before training: ', dt1 try: params = mymin( self.cost, self.model, method='L-BFGS-B', callback=self.cb, options={ 'maxiter': max_iter }) #, jac = self.gradient) # , options = {'maxiter':100} except: # print "Importing alternate minimizer fmin_l_bfgs_b" from scipy.optimize import fmin_l_bfgs_b as mymin params = mymin(self.cost, self.model, fprime=self.gradient) # , options = {'maxiter':100} # print "Min Point is: ", params[1] #self.model = params.x dt2 = datetime.datetime.now() # print 'after training: ', dt2, ' total time = ', (dt2 - dt1).total_seconds() return self.cost_value
def train(self, history_tuples, reg_lambda = 0.01): # history_tuples, function_obj, reg_lambda = 0.01, self.iteration = 0 self.h_tuples = history_tuples self.reg = reg_lambda self.dataset = None # this will be set by create_dataset self.tag_set = self.func.supported_tags #None # this will be also be set by create_dataset - this is the set of all tags self.create_dataset() self.dim = self.dataset.shape[1] #len(self.dataset[0]) self.num_examples = self.dataset.shape[0] if (self.model == None) or (self.model.shape[0] != self.dim): self.model = numpy.array([0 for d in range(self.dim)]) # initialize the model to all 0 #self.model = numpy.array([0 for d in range(self.dim)]) # initialize the model to all 0 dt1 = datetime.datetime.now() print 'before training: ', dt1 try: from scipy.optimize import minimize as mymin params = mymin(self.cost, self.model, method = 'L-BFGS-B', callback = self.cb, options = {'maxiter':25}) #, jac = self.gradient) # , options = {'maxiter':100} except: print "Importing alternate minimizer fmin_l_bfgs_b" from scipy.optimize import fmin_l_bfgs_b as mymin params = mymin(self.cost, self.model, fprime = self.gradient) # , options = {'maxiter':100} self.model = params.x dt2 = datetime.datetime.now() print 'after training: ', dt2, ' total time = ', (dt2 - dt1).total_seconds() if self.pic_file != None: pickle.dump({'model': self.model, 'tag_set': self.tag_set}, open(self.pic_file, "wb")) return self.cost_value
def train(self, training_list_input, training_list_output, reg_lambda=0.01): self.iteration = 0 self.training_list_input = training_list_input self.training_list_output = training_list_output self.reg = reg_lambda self.dataset = numpy.array(training_list_input) self.number_features = self.dataset.shape[1] #len(self.dataset[0]) self.number_training_examples = self.dataset.shape[0] if (self.model == None) or (self.model.shape[0] != self.number_features): self.model = numpy.array([0 for d in range(self.number_features) ]) # initialize the model to all 0 dt1 = datetime.datetime.now() print 'before training: ', dt1 from scipy.optimize import minimize as mymin params = mymin(self.cost, self.model, method='L-BFGS-B' ) #, jac = self.gradient) # , options = {'maxiter':100} self.model = params.x dt2 = datetime.datetime.now() print 'after training: ', dt2, ' total time = ', ( dt2 - dt1).total_seconds() if self.pic_file != None: pickle.dump( { 'model': self.model, 'number_features': self.number_features }, open(self.pic_file, "wb")) return
def train(self): """ Train the classifier """ self.count = 0 # params = mymin(self.cost, self.model, method = 'L-BFGS-B', jac = self.gradient, options = {'disp' : True}) params = mymin(self.cost, self.model, method="L-BFGS-B", options={"disp": True}) self.model = params.x # help(params) print "Done Training", self.model, self.count, params.success
def train(self): ''' Train the classifier ''' self.count = 0 # params = mymin(self.cost, self.model, method = 'L-BFGS-B', jac = self.gradient, options = {'disp' : True}) params = mymin(self.cost, self.model, method = 'L-BFGS-B', options = {'disp' : True}) self.model = params.x # help(params) print "Done Training", self.model, self.count, params.success
def train(self): dt1 = datetime.datetime.now() print 'before training: ', dt1 params = mymin(self.cost, self.model, method = 'L-BFGS-B', options= {'maxiter':100}) #, jac = self.gradient) # , options = {'maxiter':100} self.model = params.x dt2 = datetime.datetime.now() print 'after training: ', dt2, ' total time = ', (dt2 - dt1).total_seconds() if self.pic_file != None: pickle.dump(self.model, open(self.pic_file, "wb")) return
def train(self): dt1 = datetime.datetime.now() #print 'before training: ', dt1 params = mymin(self.cost, self.model, method = 'L-BFGS-B') #, jac = self.gradient) # , options = {'maxiter':100} self.model = params.x dt2 = datetime.datetime.now() #print 'after training: ', dt2, ' total time = ', (dt2 - dt1).total_seconds() if self.pic_file != None: pickle.dump(self.model, open(self.pic_file, "wb")) return
def train(self): dt1 = time() #print 'before training: ', dt1 # this is the optimization function. spark already has this one. we'll use that. # it takes cost, parameter vector and modifies the parameter vector. the process continues # untill training is complete self.preprocess() params = mymin(self.cost, self.param, method = 'L-BFGS-B',jac = self.gradient, options = {'maxiter':1}) #, jac = self.gradient) # , options = {'maxiter':100} # self.gradient([0,0,0]) #self.param = params.x # self.gradient(self.param) print params dt2 = time()
def train(self): dt1 = datetime.datetime.now() print 'before training: ', dt1 # this is the optimization function. spark already has this one. we'll use that. # it takes cost, parameter vector and modifies the parameter vector. the process continues # untill training is complete self.preprocess() params = mymin(self.cost, self.param, method = 'L-BFGS-B',jac = self.gradient, options = {'maxiter':1}) #, jac = self.gradient) # , options = {'maxiter':100} self.param = params.x print self.param # self.gradient(self.param) dt2 = datetime.datetime.now() print 'after training: ', dt2, ' total time = ', (dt2 - dt1).total_seconds()
def train(self, tr=False): ''' Train the model using scipy's optimization of minimizing cost function''' if(tr): self.set_feature_vector() #print self.F #print '*'*200 params = mymin(self.cost, self.model, method = 'L-BFGS-B',options={"disp":True}) #print params if(params.success): self.model = params.x else: print "Training Failed" pickle.dump(self.model,open("model.pkl","wb")) return self.model else: self.model = pickle.load(open("model.pkl","rb")) return self.model
def train(self): dt1 = datetime.datetime.now() print 'before training: ', dt1 # self.preprocess() # self.gradient2(self.param) params = mymin(self.cost, self.param, method = 'L-BFGS-B', jac = self.gradient1, options = {'maxiter':110}) #, jac = self.gradient) # , options = {'maxiter':100} self.param = params.x print self.param # import random # param = [random.random() for i in range(self.dim)] # print self.gradient1(param) == self.gradient2(param) # dt2 = datetime.datetime.now() # print 'after training: ', dt2, ' total time = ', (dt2 - dt1).total_seconds() # dt1 = datetime.datetime.now() # self.gradient2(self.param) dt2 = datetime.datetime.now() print 'after training: ', dt2, ' total time = ', (dt2 - dt1).total_seconds() print cost_no_of_calls, grad_no_of_calls
def train(self): ''' Train the classifier ''' params = mymin(self.cost, self.model, method = 'L-BFGS-B', jac = gradient) self.model = params.x
if __name__ == '__main__': features = [f1,f2,f3,f4,f5,f6,f7,f8,f9,f10, f11, f12, f13,f14] # features = [f1,f2,f3] input_data,tags = create_input_dataset() distributed_input_data = sqlContext.jsonFile('data.json') gradient_preprocess1(input_data,list(set(tags))) # print distributed_input_data.show() all_tags = sc.broadcast(list(set(tags))) no_of_features = sc.broadcast(len(features)) size = sc.broadcast(len(input_data)) param = [0 for i in range(len(features))] # gradient1_new(param) # param = [1 for i in range(len(features))] # gradient1_new(param) dt1 = datetime.datetime.now() print 'before training: ', dt1 params = mymin(cost1, param, method = 'L-BFGS-B', jac = gradient1_new, options = {'maxiter':100}) #, jac = self.gradient) # , options = {'maxiter':100} print params.x print params dt2 = datetime.datetime.now() print 'after training: ', dt2, ' total time = ', (dt2 - dt1).total_seconds() print 'For',num_calls_cost,'calls to cost total time taken is',tot_time_cost print 'Per call avg time taken is',tot_time_cost/num_calls_cost print 'For',num_calls_gradient,'calls to gradient total time taken is',tot_time_gradient print 'Per call avg time taken is',tot_time_gradient/num_calls_gradient