Пример #1
0
				# t-lambda processing  (iteration in [1,400])
                if random.random() >= (0.025*iteration):
                    action = [random.randint(0,1)]

				#get reward of the action
                reward_action=10*abs(2*confidence[0][0]-1)

				#get the variance of operate number
                self.frame.Normalize_label()
                operatenum_pre=Evaluator.evaluate(self.dataset.imgID,self.frame.label,[1,2])

				#check the action is True or False
                action_result=self.frame.setPerception(action)
                if action_result==False:
                    reward_action=-reward_action
				#save history
                self.puthistory(package,action,reward_action,operatenum_pre)

		#calculate Metric
        self.frame.Normalize_label()
        self.Recall=Evaluate.Recall(self.dataset.imgID,self.frame.label)
        self.Precision=Evaluate.Precision(self.dataset.imgID,self.frame.label)
        self.operatenum=Evaluator.evaluate(self.dataset.imgID,self.frame.label,[1,2])
        self.Recall_edge=Evaluate.Recall_edge(self.dataset.imgID,self.frame.label,1)
        self.Precision_edge=Evaluate.Precision_edge(self.dataset.imgID,self.frame.label)
        print self.dataset.size,self.Recall_edge,self.Precision_edge,self.operatenum


if __name__=='__main__':
    t=test()
Пример #2
0
    def begintest(self, iteration=0):
        # model_R_p2p = svm_load_model(
        #     os.path.join(
        #         self.config.get('REID', 'REWARD_MODEL_SAVED_PATH'),
        #         'model_r_p2p.model'))
        # model_R_p2G = svm_load_model(
        #     os.path.join(
        #         self.config.get('REID', 'REWARD_MODEL_SAVED_PATH'),
        #         'model_r_p2g.model'))
        # model_R_G2G = svm_load_model(
        #     os.path.join(
        #         self.config.get('REID', 'REWARD_MODEL_SAVED_PATH'),
        #         'model_r_g2g.model'))
        with open(
                os.path.join(
                    self.config.get('REID', 'REWARD_MODEL_SAVED_PATH'),
                    'model_r_p2p.model')) as f:
            model_R_p2p = pickle.load(f)
        with open(
                os.path.join(
                    self.config.get('REID', 'REWARD_MODEL_SAVED_PATH'),
                    'model_r_p2g.model')) as f:
            model_R_p2G = pickle.load(f)
        with open(
                os.path.join(
                    self.config.get('REID', 'REWARD_MODEL_SAVED_PATH'),
                    'model_r_g2g.model')) as f:
            model_R_G2G = pickle.load(f)

        is_first_iteration = False

        model_dir = self.config.get('REID', 'REWARD_MODEL_SAVED_PATH')
        if os.path.exists(os.path.join(model_dir, 'model_q_p2p.model')):
            model_Q_p2p = xgb.Booster(
                model_file=os.path.join(model_dir, 'model_q_p2p.model'))
            model_Q_p2G = xgb.Booster(
                model_file=os.path.join(model_dir, 'model_q_p2g.model'))
            model_Q_G2G = xgb.Booster(
                model_file=os.path.join(model_dir, 'model_q_g2g.model'))
        else:
            is_first_iteration = True

        data = list(list())
        data.append([0, 0, 0])
        data_Q = list(list())
        data_Q.append([0, 0, 0])
        index = 0
        reward = 0
        decision = Dicision.Dicision()
        t01 = time.time()

        while self.frame.checkState(check_batch=True):
            package = self.frame.getObservation()
            index += 1
            if type(package) == int:
                print 'Done!'
                break
            data[0] = package
            question_type = len(package)
            model = None
            if question_type == 3:  #point-----point
                if not is_first_iteration:
                    model = model_Q_p2p.copy()
                tp = 'P2P'
                #Reward Function
                # action_R, _, confidence = svm_predict([0], data, model_R_p2p,
                #                                       '-b 1 -q')
                # confidence = model_R_p2p.predict_proba(data)
                w = model_R_p2p.coef_[0]
                b = model_R_p2p.intercept_[0]
                #Reward Value Function: action = 0
                temp = package[:]
                temp.insert(0, 0)
                data_Q[0] = temp
                DM_data = xgb.DMatrix(np.array(data_Q))
                if not is_first_iteration:
                    value_0 = model_Q_p2p.predict(DM_data)
                else:
                    value_0 = [random.random()]
                del temp[0]
                #Reward Value Function: action = 1
                temp.insert(0, 1)
                data_Q[0] = temp
                DM_data = xgb.DMatrix(np.array(data_Q))
                if not is_first_iteration:
                    value_1 = model_Q_p2p.predict(DM_data)
                else:
                    value_1 = [random.random()]
                #choose the most awarded action
                if value_1[0] >= value_0[0]:
                    action = [1]
                else:
                    action = [0]

            elif question_type == 3 + self.frame.k_size:  #point-----Group or group---point
                if not is_first_iteration:
                    model = model_Q_p2G.copy()
                tp = 'P2G'
                #Reward Function
                # action_R, _, confidence = svm_predict([0], data, model_R_p2G,
                #                                       '-b 1 -q')
                # confidence = model_R_p2G.predict_proba(data)
                w = model_R_p2G.coef_[0]
                b = model_R_p2G.intercept_[0]
                #Reward Value Function: action = 0
                temp = package[:]
                temp.insert(0, 0)
                data_Q[0] = temp
                DM_data = xgb.DMatrix(np.array(data_Q))
                if not is_first_iteration:
                    value_0 = model_Q_p2G.predict(DM_data)
                else:
                    value_0 = [random.random()]
                del temp[0]
                #Reward Value Function: action = 1
                temp.insert(0, 1)
                data_Q[0] = temp
                DM_data = xgb.DMatrix(np.array(data_Q))
                if not is_first_iteration:
                    value_1 = model_Q_p2G.predict(DM_data)
                else:
                    value_1 = [random.random()]
                #choose the most awarded action
                if value_1[0] >= value_0[0]:
                    action = [1]
                else:
                    action = [0]
            else:
                if not is_first_iteration:
                    model = model_Q_G2G.copy()
                tp = 'G2G'
                #Reward Function
                # action_R, _, confidence = svm_predict([0], data, model_R_G2G,
                #                                       '-b 1 -q')
                # confidence = model_R_G2G.predict_proba(data)
                w = model_R_G2G.coef_[0]
                b = model_R_G2G.intercept_[0]
                #Reward Value Function: action = 0
                temp = package[:]
                temp.insert(0, 0)
                data_Q[0] = temp
                DM_data = xgb.DMatrix(np.array(data_Q))
                if not is_first_iteration:
                    value_0 = model_Q_G2G.predict(DM_data)
                else:
                    value_0 = [random.random()]
                del temp[0]
                #Reward Value Function: action = 1
                temp.insert(0, 1)
                data_Q[0] = temp
                DM_data = xgb.DMatrix(np.array(data_Q))
                if not is_first_iteration:
                    value_1 = model_Q_G2G.predict(DM_data)
                else:
                    value_1 = [random.random()]
                #choose the most awarded action
                if value_1[0] > value_0[0]:
                    action = [1]
                else:
                    action = [0]
            #获取操作量原数量
            # t-lambda processing  (iteration in [1,400])
            if random.random() >= (0.025 * iteration):
                action = [random.randint(0, 1)]

            # get reward of the action
            # reward_action = 10 * abs(2 * confidence[0] - 1)
            reward_action = abs(np.sum(np.multiply(w, package)) + b)

            #get the variance of operate number
            self.frame.Normalize_label()
            operatenum_pre = Evaluator.evaluate(self.dataset.imgID,
                                                self.frame.label, [0])

            #check the action is True or False
            action_result = self.frame.setPerception(action, save=False)
            if action_result == False:
                reward_action = -reward_action
            #save history
            self.puthistory(package, action, reward_action, operatenum_pre,
                            model)

        if not self.inference:
            #calculate Metric
            self.frame.Normalize_label()
            self.Recall = Evaluate.Recall(self.dataset.imgID, self.frame.label)
            self.Precision = Evaluate.Precision(self.dataset.imgID,
                                                self.frame.label)
            self.operatenum = Evaluator.evaluate(self.dataset.imgID,
                                                 self.frame.label, [0])
            self.Recall_edge = Evaluate.Recall_edge(self.dataset.imgID,
                                                    self.frame.label, 0)
            self.Precision_edge = Evaluate.Precision_edge(
                self.dataset.imgID, self.frame.label)
            print self.dataset.size, self.Recall_edge, self.Precision_edge, self.operatenum
            with open(
                    os.path.join(
                        self.config.get('REID', 'REWARD_MODEL_SAVED_PATH'),
                        'xgboost_output_nstepsarsa_origin.log'), 'a') as f:
                f.write('{}, {}, {}, {}\n'.format(self.dataset.size,
                                                  self.Recall_edge,
                                                  self.Precision_edge,
                                                  self.operatenum))