Exemplo n.º 1
0
    def transfer_false_test_MF(self):
        # 只需要返回一个train_mashup_api_list
        # 把测试集的数据转化为训练集,按照已选服务个数划分
        # 分别训练测试,得到1,2,3场景下的指标

        # 正例训练集
        train_mashup_id_list, train_api_id_list = [], []
        Mid_Aid_set = set()
        _zip = zip(self.train_data[0], self.train_data[1])
        train_labels = self.train_data[-1]
        for index, Mid_Aid_pair in enumerate(_zip):
            if train_labels[index] and tuple(
                    Mid_Aid_pair) not in Mid_Aid_set:  # 正例且之前未出现过
                train_mashup_id_list.append(Mid_Aid_pair[0])
                train_api_id_list.append(Mid_Aid_pair[1])

        def certain_slt_num_split(train_mashup_id_list, train_api_id_list,
                                  slt_num):
            # 测试集
            set_ = set()  # 存储某个mashup,某个长度已选的数据的集合
            test_mashup_id_list, test_api_id_list, grounds = [], [], []
            for index, test_mashup_ids in enumerate(self.test_mashup_id_list):
                m_id, slt_api_ids = test_mashup_ids[0], self.test_slt_ids[
                    index]
                if len(slt_api_ids) == slt_num and (
                        m_id, len(slt_api_ids)) not in set_:
                    train_mashup_id_list.extend([m_id] *
                                                slt_num)  # 测试集中已选的服务作为正例,还有负例
                    train_api_id_list.extend(slt_api_ids)

                    # 同时也需要测试,跟原来格式相同
                    test_mashup_id_list.append(test_mashup_ids)
                    test_api_id_list.append(self.test_api_id_list[index])
                    grounds.append(self.grounds[index])

            self.train_data.append(
                list(zip(train_mashup_id_list,
                         train_api_id_list)))  # 供get_U_V使用
            self.test_data.append(
                (test_mashup_id_list, test_api_id_list, grounds))

        self.train_data, self.test_data = [], [
        ]  # 改变格式,按照测试集已选的数目,生成几个不同的训练和测试
        for i in range(1, self.args.slt_item_num + 1):
            print('slt_num:', i)
            print('before, train samples:{}'.format(
                len(self.train_mashup_id_list)))
            certain_slt_num_split(list(train_mashup_id_list),
                                  list(train_api_id_list), i)
            print('after, train samples:{}'.format(len(self.train_data[-1])))
            true_train_set_path = os.path.join(
                self.data_root, 'train_set_MF_{}.data'.format(i))
            save_2D_list(true_train_set_path,
                         self.train_data[i - 1])  # 把训练集(加上了某个长度的测试集)存起来,java处理
        print('transfer for MF,done!')
        return self.train_data, self.test_data
Exemplo n.º 2
0
 def save_true_train_data(self):
     # 存储训练集中的正例,供lirbec使用
     # 但是选择的服务可能不同,所以同一个m_id,a_id对可能出现多次??? 所以新场景下的MF不用这个数据处理方法?
     true_train_set_path = os.path.join(self.root_path, 'train_set.data')
     if not os.path.exists(true_train_set_path):
         true_train_mashup_api_pairs =[]
         for index,label in enumerate(self.train_labels):
             if label:
                 true_train_mashup_api_pairs.append(self.train_mashup_api_list[index])
         save_2D_list(true_train_set_path,true_train_mashup_api_pairs)
         return true_train_mashup_api_pairs
Exemplo n.º 3
0
    def show_text_tag_features(self, train_data, show_num=10):
        """
        检查生成的mashup和api的text和tag的特征是否正常
        """
        if self.old_new == 'old':
            m_ids, a_ids = train_data[:-1]
            instances_tuple = self.get_instances(m_ids[:show_num],
                                                 a_ids[:show_num])
        elif self.old_new == 'new':
            m_ids, a_ids, slt_a_ids = train_data[:-1]
            instances_tuple = self.get_instances(m_ids[:show_num],
                                                 a_ids[:show_num],
                                                 slt_a_ids[:show_num])

        text_tag_middle_model = Model(
            inputs=[*self.model.inputs],
            outputs=[
                *self.model.get_layer('all_content_concatenate').input[:4]
            ])
        mashup_text_features, apis_text_features, mashup_tag_features, apis_tag_features = text_tag_middle_model.predict(
            [*instances_tuple], verbose=0)

        mashup_text_features_path = os.path.join(self.model_dir,
                                                 'mashup_text_features.dat')
        apis_text_features_path = os.path.join(self.model_dir,
                                               'apis_text_features.dat')
        mashup_tag_features_path = os.path.join(self.model_dir,
                                                'mashup_tag_features.dat')
        apis_tag_features_path = os.path.join(self.model_dir,
                                              'apis_tag_features.dat')

        save_2D_list(mashup_text_features_path, mashup_text_features, 'a+')
        save_2D_list(apis_text_features_path, apis_text_features, 'a+')
        save_2D_list(mashup_tag_features_path, mashup_tag_features, 'a+')
        save_2D_list(apis_tag_features_path, apis_tag_features, 'a+')
Exemplo n.º 4
0
 def save(self):
     # 存储训练测试样本集
     save_split_train(self.train_instances_path, self.train_mashup_api_list, self.train_labels)
     save_test_instance(self.test_instances_path, self.test_mashup_id_list, self.test_api_id_list)
     save_2D_list(self.all_ground_api_ids_path, self.grounds)
     # 新场景多存储slt_ids
     if new_Para.param.data_mode == 'newScene':
         save_2D_list(self.train_slt_ids_path, self.slt_api_ids_instances)
         save_2D_list(self.test_slt_ids_path, self.test_slt_ids)