# t-lambda processing (iteration in [1,400]) if random.random() >= (0.025*iteration): action = [random.randint(0,1)] #get reward of the action reward_action=10*abs(2*confidence[0][0]-1) #get the variance of operate number self.frame.Normalize_label() operatenum_pre=Evaluator.evaluate(self.dataset.imgID,self.frame.label,[1,2]) #check the action is True or False action_result=self.frame.setPerception(action) if action_result==False: reward_action=-reward_action #save history self.puthistory(package,action,reward_action,operatenum_pre) #calculate Metric self.frame.Normalize_label() self.Recall=Evaluate.Recall(self.dataset.imgID,self.frame.label) self.Precision=Evaluate.Precision(self.dataset.imgID,self.frame.label) self.operatenum=Evaluator.evaluate(self.dataset.imgID,self.frame.label,[1,2]) self.Recall_edge=Evaluate.Recall_edge(self.dataset.imgID,self.frame.label,1) self.Precision_edge=Evaluate.Precision_edge(self.dataset.imgID,self.frame.label) print self.dataset.size,self.Recall_edge,self.Precision_edge,self.operatenum if __name__=='__main__': t=test()
def begintest(self, iteration=0): # model_R_p2p = svm_load_model( # os.path.join( # self.config.get('REID', 'REWARD_MODEL_SAVED_PATH'), # 'model_r_p2p.model')) # model_R_p2G = svm_load_model( # os.path.join( # self.config.get('REID', 'REWARD_MODEL_SAVED_PATH'), # 'model_r_p2g.model')) # model_R_G2G = svm_load_model( # os.path.join( # self.config.get('REID', 'REWARD_MODEL_SAVED_PATH'), # 'model_r_g2g.model')) with open( os.path.join( self.config.get('REID', 'REWARD_MODEL_SAVED_PATH'), 'model_r_p2p.model')) as f: model_R_p2p = pickle.load(f) with open( os.path.join( self.config.get('REID', 'REWARD_MODEL_SAVED_PATH'), 'model_r_p2g.model')) as f: model_R_p2G = pickle.load(f) with open( os.path.join( self.config.get('REID', 'REWARD_MODEL_SAVED_PATH'), 'model_r_g2g.model')) as f: model_R_G2G = pickle.load(f) is_first_iteration = False model_dir = self.config.get('REID', 'REWARD_MODEL_SAVED_PATH') if os.path.exists(os.path.join(model_dir, 'model_q_p2p.model')): model_Q_p2p = xgb.Booster( model_file=os.path.join(model_dir, 'model_q_p2p.model')) model_Q_p2G = xgb.Booster( model_file=os.path.join(model_dir, 'model_q_p2g.model')) model_Q_G2G = xgb.Booster( model_file=os.path.join(model_dir, 'model_q_g2g.model')) else: is_first_iteration = True data = list(list()) data.append([0, 0, 0]) data_Q = list(list()) data_Q.append([0, 0, 0]) index = 0 reward = 0 decision = Dicision.Dicision() t01 = time.time() while self.frame.checkState(check_batch=True): package = self.frame.getObservation() index += 1 if type(package) == int: print 'Done!' break data[0] = package question_type = len(package) model = None if question_type == 3: #point-----point if not is_first_iteration: model = model_Q_p2p.copy() tp = 'P2P' #Reward Function # action_R, _, confidence = svm_predict([0], data, model_R_p2p, # '-b 1 -q') # confidence = model_R_p2p.predict_proba(data) w = model_R_p2p.coef_[0] b = model_R_p2p.intercept_[0] #Reward Value Function: action = 0 temp = package[:] temp.insert(0, 0) data_Q[0] = temp DM_data = xgb.DMatrix(np.array(data_Q)) if not is_first_iteration: value_0 = model_Q_p2p.predict(DM_data) else: value_0 = [random.random()] del temp[0] #Reward Value Function: action = 1 temp.insert(0, 1) data_Q[0] = temp DM_data = xgb.DMatrix(np.array(data_Q)) if not is_first_iteration: value_1 = model_Q_p2p.predict(DM_data) else: value_1 = [random.random()] #choose the most awarded action if value_1[0] >= value_0[0]: action = [1] else: action = [0] elif question_type == 3 + self.frame.k_size: #point-----Group or group---point if not is_first_iteration: model = model_Q_p2G.copy() tp = 'P2G' #Reward Function # action_R, _, confidence = svm_predict([0], data, model_R_p2G, # '-b 1 -q') # confidence = model_R_p2G.predict_proba(data) w = model_R_p2G.coef_[0] b = model_R_p2G.intercept_[0] #Reward Value Function: action = 0 temp = package[:] temp.insert(0, 0) data_Q[0] = temp DM_data = xgb.DMatrix(np.array(data_Q)) if not is_first_iteration: value_0 = model_Q_p2G.predict(DM_data) else: value_0 = [random.random()] del temp[0] #Reward Value Function: action = 1 temp.insert(0, 1) data_Q[0] = temp DM_data = xgb.DMatrix(np.array(data_Q)) if not is_first_iteration: value_1 = model_Q_p2G.predict(DM_data) else: value_1 = [random.random()] #choose the most awarded action if value_1[0] >= value_0[0]: action = [1] else: action = [0] else: if not is_first_iteration: model = model_Q_G2G.copy() tp = 'G2G' #Reward Function # action_R, _, confidence = svm_predict([0], data, model_R_G2G, # '-b 1 -q') # confidence = model_R_G2G.predict_proba(data) w = model_R_G2G.coef_[0] b = model_R_G2G.intercept_[0] #Reward Value Function: action = 0 temp = package[:] temp.insert(0, 0) data_Q[0] = temp DM_data = xgb.DMatrix(np.array(data_Q)) if not is_first_iteration: value_0 = model_Q_G2G.predict(DM_data) else: value_0 = [random.random()] del temp[0] #Reward Value Function: action = 1 temp.insert(0, 1) data_Q[0] = temp DM_data = xgb.DMatrix(np.array(data_Q)) if not is_first_iteration: value_1 = model_Q_G2G.predict(DM_data) else: value_1 = [random.random()] #choose the most awarded action if value_1[0] > value_0[0]: action = [1] else: action = [0] #获取操作量原数量 # t-lambda processing (iteration in [1,400]) if random.random() >= (0.025 * iteration): action = [random.randint(0, 1)] # get reward of the action # reward_action = 10 * abs(2 * confidence[0] - 1) reward_action = abs(np.sum(np.multiply(w, package)) + b) #get the variance of operate number self.frame.Normalize_label() operatenum_pre = Evaluator.evaluate(self.dataset.imgID, self.frame.label, [0]) #check the action is True or False action_result = self.frame.setPerception(action, save=False) if action_result == False: reward_action = -reward_action #save history self.puthistory(package, action, reward_action, operatenum_pre, model) if not self.inference: #calculate Metric self.frame.Normalize_label() self.Recall = Evaluate.Recall(self.dataset.imgID, self.frame.label) self.Precision = Evaluate.Precision(self.dataset.imgID, self.frame.label) self.operatenum = Evaluator.evaluate(self.dataset.imgID, self.frame.label, [0]) self.Recall_edge = Evaluate.Recall_edge(self.dataset.imgID, self.frame.label, 0) self.Precision_edge = Evaluate.Precision_edge( self.dataset.imgID, self.frame.label) print self.dataset.size, self.Recall_edge, self.Precision_edge, self.operatenum with open( os.path.join( self.config.get('REID', 'REWARD_MODEL_SAVED_PATH'), 'xgboost_output_nstepsarsa_origin.log'), 'a') as f: f.write('{}, {}, {}, {}\n'.format(self.dataset.size, self.Recall_edge, self.Precision_edge, self.operatenum))