def get_text_embedding_layer(self): """" 得到定制的word embedding层,在feature_extracter_from_texts中使用 """ if self.text_embedding_layer is None: # 得到词典中每个词对应的embedding num_words = min(self.args.MAX_NUM_WORDS, len(data_repository.get_md().des_pd.word2index) ) + 1 # 实际词典大小 +1 因为0代表0的填充向量 self.text_embedding_matrix = get_embedding_matrix( data_repository.get_md().des_pd.word2index, self.args.embedding_name, dimension=self.args.embedding_dim) print('built embedding matrix, done!') self.text_embedding_layer = Embedding( num_words, self.args.embedding_dim, embeddings_initializer=Constant(self.text_embedding_matrix), embeddings_regularizer=regularizers.l2( self.args.embeddings_regularizer), input_length=self.args.MAX_SEQUENCE_LENGTH, mask_zero=True, trainable=self.args.embedding_train, name='text_embedding_layer') # mask_zero=True!!! print('built text embedding layer, done!') return self.text_embedding_layer
def get_tag_embedding_layer(self): """" 同text,处理tags,得到定制的word embedding层,在tag_feature_extractor中使用 """ if self.tag_embedding_layer is None: # 得到词典中每个词对应的embedding num_words = min(self.args.MAX_NUM_WORDS, len(data_repository.get_md().cate_pd.word2index) ) + 1 # 实际词典大小 +1 因为0代表0的填充向量 self.tag_embedding_matrix = get_embedding_matrix( data_repository.get_md().cate_pd.word2index, self.args.embedding_name, dimension=self.args.embedding_dim) print('built tag embedding matrix, done!') self.tag_embedding_layer = Embedding( num_words, self.args.embedding_dim, embeddings_initializer=Constant(self.tag_embedding_matrix), embeddings_regularizer=regularizers.l2( self.args.embeddings_regularizer), input_length=self.args.MAX_TAGS_NUM, mask_zero=True, trainable=self.args.embedding_train, name='tag_embedding_layer') print('built tag embedding layer, done!') return self.tag_embedding_layer
def __init__(self, wordindex2emb, gd , HIN_path='', features=None, semantic_name='', if_text_sem=True, if_tag_sem=True, if_mashup_sem=True, if_api_sem=True): self.ws = word_sim(wordindex2emb) # embedding 层参数 self.num_users = data_repository.get_md().mashup_num self.num_items = data_repository.get_md().api_num self.semantic_name = semantic_name # 输入的特征的名字 默认为空,是利用CI模型做的;其他的使用HDP等要输入 self.gd = gd # 要用到其中的编码 # 利用外部传入的content和tag的feature计算相似度 if features is not None: if len(features) == 2: if if_text_sem and not if_tag_sem: # 可以只用text的语义特征,PasRec等使用 self.mashup_texts_features, self.api_texts_features = features if if_mashup_sem and not if_api_sem: # 可以只用mashup的语义特征 HIN中使用 self.mashup_texts_features, self.mashup_tag_features = features elif len(features) == 4: self.mashup_texts_features, self.mashup_tag_features, self.api_texts_features, self.api_tag_features = features self.mashup_apis_dict = list2dict(data_repository.get_md().mashup_api_list) self.api_id2provider = data_repository.get_md().api_df['API_Provider'] # self.path= os.path.join(HIN_path,self.name) # 存放在CI文件夹下!data_repository.get_ds().data_root no_kcv_root_path self.path = HIN_path # 存放相似度的路径 kcvIndex/CIModelPath/HIN_sims/ if not os.path.exists(self.path): os.makedirs(self.path) self.p1_sims, self.p2_sims, self.p3_sims, self.p4_sims, self.p5_sims, self.p6_sims = None, None, None, None, None, None self.p1_sims_sem, self.p2_sims_sem, self.p3_sims_sem, self.p4_sims_sem = None, None, None, None self.flag1, self.flag2, self.flag3, self.flag4, self.flag5, self.flag6 = False, False, False, False, False, False self.flag1_sem, self.flag2_sem, self.flag4_sem = False, False, False
def get_default_gd(tag_times=2, mashup_only=False,strict_train=False): # 对mashup和api的文本+tag,统一处理 gd = gensim_data(get_iterable_values(data_repository.get_md().mashup_df,'final_description'), get_iterable_values(data_repository.get_md().mashup_df,'Categories'), get_iterable_values(data_repository.get_md().api_df,'final_description'), get_iterable_values(data_repository.get_md().api_df,'Categories'), tag_times = tag_times, mashup_only = mashup_only, strict_train=strict_train) # 调整tag出现的次数 return gd
def set_embedding_layers(self): self.api_implict_emb_layer = Embedding( data_repository.get_md().api_num + 1, self.args.implict_feat_dim, embeddings_initializer=Constant(self.i_factors_matrix), mask_zero=False, trainable=False, name='api_implict_embedding_layer')
def get_binary_v(self): dict_size = len(self.dct) mashup_binary_matrix = np.zeros((data_repository.get_md().mashup_num+1, dict_size)) api_binary_matrix = np.zeros((data_repository.get_md().api_num+1, dict_size)) mashup_words_list = [] # 每个mashup中所有出现过的词 api_words_list = [] for id in range(data_repository.get_md().mashup_num+1): temp_words_list, _ = zip(*self.mashup_dow[id]) mashup_words_list.append(temp_words_list) for j in temp_words_list: # 出现的词汇index mashup_binary_matrix[id][j] = 1.0 for id in range(data_repository.get_md().api_num+1): temp_words_list, _ = zip(*self.api_dow[id]) api_words_list.append(temp_words_list) for j in temp_words_list: # 出现的词汇index api_binary_matrix[id][j] = 1.0 return mashup_binary_matrix, api_binary_matrix, mashup_words_list, api_words_list
def get_name(self): """ 用在记录结果部分,记录数据信息+模型信息 :return: """ if not self.name: self.name = data_repository.get_md( ).name + '_' + data_repository.get_ds( ).name + '_' + self.simple_name return self.name
def model_pcs(self, model_name, LDA_topic_num=None): # 模型处理,返回mashup和api的特征:对同一个语料,可以先后使用不同的模型处理 # hdp结果形式:[(0, 0.032271167132309014),(1, 0.02362695056720504)] if self.mashup_only: if self.strict_train: train_corpus = self.train_mashup_dow else: train_corpus = self.mashup_dow else: if self.strict_train: train_corpus = self.train_mashup_dow + self.api_dow else: train_corpus = self.mashup_dow + self.api_dow if model_name == 'HDP': self.model = HdpModel(train_corpus, self.dct) self.num_topics = self.model.get_topics().shape[0] print('num_topics', self.num_topics) elif model_name == 'TF_IDF': self.model = TfidfModel(train_corpus) self.num_topics = len(self.dct) elif model_name == 'LDA': if LDA_topic_num is None: self.model = LdaModel(train_corpus) else: self.model = LdaModel(train_corpus, num_topics=LDA_topic_num) self.num_topics = self.model.get_topics().shape[0] else: raise ValueError('wrong gensim_model name!') # 使用模型处理文本得到稀疏特征向量,再转化为标准的np格式(每个topic上都有) # *** 由于mashup_dow和api_dow默认是全部mashup/api的文本,所以得到的特征列表用全局性的id索引即可 *** self.mashup_features = [self.model[mashup_info] for mashup_info in self.mashup_dow] # 每个mashup和api的feature self.api_features = [self.model[api_info] for api_info in self.api_dow] self.dense_mashup_features = np.zeros((data_repository.get_md().mashup_num, self.num_topics)) self.dense_api_features = np.zeros((data_repository.get_md().api_num, self.num_topics)) for i in range(data_repository.get_md().mashup_num): # 部分维度有值,需要转化成规范array for index, value in self.mashup_features[i]: self.dense_mashup_features[i][index] = value for i in range(data_repository.get_md().api_num): for index, value in self.api_features[i]: self.dense_api_features[i][index] = value return self.dense_mashup_features, self.dense_api_features
def set_embedding_matrixs(self): # id->embedding self.i_factors_matrix = np.zeros( (data_repository.get_md().api_num + 1, self.args.implict_feat_dim)) api_emb_df = data_repository.get_ds().MF_obj.api_emb_df for row in zip(api_emb_df.index.tolist(), api_emb_df.embedding.tolist()): id, embedding = row[0], row[1] if isinstance(embedding, str): embedding = eval(embedding) self.i_factors_matrix[id] = embedding
def get_topTopics(self, topTopicNum=3): # 选取概率最高的topK个主题 [(),(),...] mashup_topics = [] api_topics = [] for index in range(data_repository.get_md().mashup_num): sorted_mashup_feature = sorted(self.mashup_features[index], key=lambda x: x[1], reverse=True) try: topic_indexes, _ = zip(*sorted_mashup_feature) except: # 有时mashup_bow非空,但是mashup_feature为空 topic_indexes = random.sample(range(data_repository.get_md().mashup_num), topTopicNum) num = min(len(topic_indexes), topTopicNum) mashup_topics.append(topic_indexes[:num]) for index in range(data_repository.get_md().api_num): sorted_api_feature = sorted(self.api_features[index], key=lambda x: x[1], reverse=True) try: topic_indexes, _ = zip(*sorted_api_feature) except: topic_indexes = random.sample(range(data_repository.get_md().api_num), topTopicNum) num = min(len(topic_indexes), topTopicNum) api_topics.append(topic_indexes[:num]) return mashup_topics, api_topics
def get_mashup_api_features(self): """ 得到每个mashup和api经过特征提取器或者平均池化得到的特征,可以直接用id索引,供构造instance的文本部分使用 :param text_tag_recommend_model: :param mashup_num: :param api_num: :return: """ if os.path.exists(self.ma_text_tag_feas_path): with open(self.ma_text_tag_feas_path, 'rb') as f1: mashup_texts_features, mashup_tag_features, api_texts_features, api_tag_features = pickle.load( f1) else: # 前四个分别是 user_text_vec, item_text_vec, user_tag_vec, item_tag_vec text_tag_middle_model = Model( inputs=[*self.model.inputs[:2]], outputs=[ self.model.get_layer('all_content_concatenate').input[0], self.model.get_layer('all_content_concatenate').input[1], self.model.get_layer('all_content_concatenate').input[2], self.model.get_layer('all_content_concatenate').input[3] ]) feature_mashup_ids = data_repository.get_md( ).mashup_df.index.tolist() feature_instances_tuple = self.get_instances( feature_mashup_ids, [0] * len(feature_mashup_ids)) mashup_texts_features, _1, mashup_tag_features, _2 = text_tag_middle_model.predict( [*feature_instances_tuple], verbose=0) feature_api_ids = data_repository.get_md().api_df.index.tolist() feature_instances_tuple = self.get_instances( [0] * len(feature_api_ids), feature_api_ids) _1, api_texts_features, _2, api_tag_features = text_tag_middle_model.predict( [*feature_instances_tuple], verbose=0) with open(self.ma_text_tag_feas_path, 'wb') as f2: pickle.dump((mashup_texts_features, mashup_tag_features, api_texts_features, api_tag_features), f2) return mashup_texts_features, mashup_tag_features, api_texts_features, api_tag_features
def set_mashup_api_features(self, recommend_model): """ TODO 设置mashup和api的text和tag特征,用于计算相似度,进而计算mashup的NI表示; 在get_model()和get_instances()之前设置 :param recommend_model: 利用CI模型获得所有特征向量 :return: """ self.mashup_texts_features, self.mashup_tag_features, self.api_texts_features, self.api_tag_features = \ recommend_model.get_mashup_api_features(data_repository.get_md().mashup_num, data_repository.get_md().api_num) # api 需要增加一个全为0的,放在最后,id为api_num,用来对slt_apis填充 self.api_tag_features = np.vstack( (self.api_tag_features, np.zeros((1, self.word_embedding_dim)))) self.api_texts_features = np.vstack( (self.api_texts_features, np.zeros((1, self.inception_fc_unit_nums[-1])))) self.features = (self.mashup_texts_features, self.mashup_tag_features, self.api_texts_features, self.api_tag_features) self.CI_path = recommend_model.model_dir
def set_others(self): # 在set_data()或read_data后设置 self.his_mashup_ids = np.unique( self.train_df['mashup'].values) # 训练mashup id的有序排列 self.his_mashup_ids_set = set(self.his_mashup_ids) print('mashup num in training set :{}'.format(len( self.his_mashup_ids))) self.train_mashup_api_list = list( filter(lambda x: x[0] in self.his_mashup_ids_set, data_repository.get_md().mashup_api_list)) self.train_mashup_api_dict = list2dict(self.train_mashup_api_list) # 模型随数据变化,所以存储在数据的文件夹下 self.model_path = os.path.join(self.data_root, '{}') # simple_model_name CI路径 self.new_best_epoch_path = os.path.join( '{}', 'best_epoch.dat') # model_dir, .format(simple_model_name) self.new_model_para_path = os.path.join( '{}', 'weights_{}.h5') # model_dir, .format(simple_model_name, epoch) self.new_best_NDCG_path = os.path.join( '{}', 'best_NDCG.dat') # model_dir, .format(simple_model_name)
def feature_extracter_from_texts(self, mashup_api=None): """ 对mashup,service的description均需要提取特征,右路的文本的整个特征提取过程 公用的话应该封装成新的model! :param mashup_api: 默认是None,只有'HDP'/'Bert'时为非空 :return: 输出的是一个封装好的model,所以可以被mashup和api公用 """ if self.args.text_extracter_mode in fixed_vector_modes and mashup_api is not None: if self.args.text_extracter_mode == 'Bert': tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') bertModel = BertModel.from_pretrained("bert-base-uncased") if mashup_api == 'mashup': if self.mashup_text_feature_extracter is None: # 没求过 mashup_texts = get_iterable_values( data_repository.get_md().mashup_df, 'final_description', return_ele_type='str') dense_mashup_features = bertModel( tokenizer(mashup_texts, return_tensors='tf')) self.mashup_text_feature_extracter = vector_feature_extracter_from_texts( 'mashup', dense_mashup_features) return self.mashup_text_feature_extracter elif mashup_api == 'api': if self.api_text_feature_extracter is None: api_texts = get_iterable_values( data_repository.get_md().api_df, 'final_description', return_ele_type='str') dense_api_features = bertModel( tokenizer(api_texts, return_tensors='tf')) self.api_text_feature_extracter = vector_feature_extracter_from_texts( 'api', dense_api_features) return self.api_text_feature_extracter else: raise TypeError('wrong mashup_api mode!') else: if self.gd is None: self.gd = get_default_gd( tag_times=0, mashup_only=False, strict_train=True) # 用gensim处理文本,文本中不加tag self.gd.model_pcs(self.args.text_extracter_mode) # if mashup_api == 'mashup': if self.mashup_text_feature_extracter is None: # 没求过 self.mashup_text_feature_extracter = vector_feature_extracter_from_texts( 'mashup', self.gd.dense_mashup_features) return self.mashup_text_feature_extracter elif mashup_api == 'api': if self.api_text_feature_extracter is None: self.api_text_feature_extracter = vector_feature_extracter_from_texts( 'api', self.gd.dense_api_features) return self.api_text_feature_extracter else: raise TypeError('wrong mashup_api mode!') elif self.text_feature_extracter is None: # 没求过 if 'trainable_bert' in self.args.text_extracter_mode.lower(): self.text_feature_extracter = TFDistilBertModel.from_pretrained( "distilbert-base-uncased") # layer if self.args.frozen_bert: self.text_feature_extracter.trainable = False else: text_input = Input(shape=(self.args.MAX_SEQUENCE_LENGTH, ), dtype='int32') text_embedding_layer = self.get_text_embedding_layer( ) # 参数还需设为外部输入! text_embedded_sequences = text_embedding_layer( text_input) # 转化为2D if self.args.text_extracter_mode in ( 'inception', 'textCNN'): # 2D转3D,第三维是channel # print(text_embedded_sequences.shape) text_embedded_sequences = Lambda( lambda x: tf.expand_dims(x, axis=3))( text_embedded_sequences) # tf 和 keras的tensor 不同!!! print(text_embedded_sequences.shape) if self.args.text_extracter_mode == 'inception': x = inception_layer( text_embedded_sequences, self.args.embedding_dim, self.args.inception_channels, self.args.inception_pooling) # inception处理 print('built inception layer, done!') elif self.args.text_extracter_mode == 'textCNN': x = textCNN_feature_extracter_from_texts( text_embedded_sequences, self.args) elif self.args.text_extracter_mode == 'LSTM': x = LSTM_feature_extracter_from_texts( text_embedded_sequences, self.args) else: raise TypeError('wrong extracter!') print('text feature after inception/textCNN/LSTM whole_model,', x) # 观察MLP转化前,模块输出的特征 for FC_unit_num in self.args.inception_fc_unit_nums: x = Dense(FC_unit_num, kernel_regularizer=l2(self.args.l2_reg))( x) # , activation='relu' if self.args.inception_MLP_BN: x = BatchNormalization(scale=False)(x) x = PReLU()(x) # if self.args.inception_MLP_dropout: x = tf.keras.layers.Dropout(0.5)(x) self.text_feature_extracter = Model( text_input, x, name='text_feature_extracter') return self.text_feature_extracter
def get_all_encoded_comments(self): self.unpadded_encoded_mashup_texts = self.encode(get_iterable_values(data_repository.get_md().mashup_df,'final_description')) self.unpadded_encoded_mashup_tags = self.encode(get_iterable_values(data_repository.get_md().mashup_df,'Categories')) self.unpadded_encoded_api_texts = self.encode(get_iterable_values(data_repository.get_md().api_df,'final_description')) self.unpadded_encoded_api_tags = self.encode(get_iterable_values(data_repository.get_md().api_df,'Categories'))
def set_text_tag_enconding_layers(self): # 根据meta-data得到的文本和tag的编码表示,设置编码层 all_mashup_num = data_repository.get_md().mashup_num all_api_num = data_repository.get_md().api_num if 'bert' in self.args.text_extracter_mode.lower(): tokenizer = DistilBertTokenizer.from_pretrained( 'distilbert-base-uncased') def encode(text): # 利用DistilBertTokenizer编码 encoded_text = tokenizer.encode( text, add_special_tokens=True, truncation=True, padding='max_length', max_length=self.args.MAX_BERT_SEQUENCE_LENGTH) return encoded_text mid2encoded_text = data_repository.get_md( ).mashup_df['Description'].tolist() mid2encoded_text[-1] = '' # ''会转化为nan,每次用之前要转化! TODO mid2encoded_text = np.array( [encode(text) for text in mid2encoded_text]) aid2encoded_text = data_repository.get_md( ).api_df['Description'].tolist() aid2encoded_text[-1] = '' aid2encoded_text = np.array( [encode(text) for text in aid2encoded_text]) else: mid2encoded_text = data_repository.get_md( ).mashup_df['padded_description'].tolist() if isinstance(mid2encoded_text[0], str): # TODO mid2encoded_text = list(map(eval, mid2encoded_text)) aid2encoded_text = data_repository.get_md( ).api_df['padded_description'].tolist() if isinstance(aid2encoded_text[0], str): # TODO aid2encoded_text = list(map(eval, aid2encoded_text)) MAX_LENGTH = self.args.MAX_BERT_SEQUENCE_LENGTH if 'bert' in self.args.text_extracter_mode.lower( ) else self.args.MAX_SEQUENCE_LENGTH self.mashup_text_encoding_layer = Embedding( all_mashup_num + 1, MAX_LENGTH, embeddings_initializer=Constant(mid2encoded_text), mask_zero=True, input_length=1, trainable=False, name='mashup_text_encoding_layer') self.api_text_encoding_layer = Embedding( all_api_num + 1, MAX_LENGTH, embeddings_initializer=Constant(aid2encoded_text), mask_zero=True, input_length=1, trainable=False, name='api_text_encoding_layer') mid2encoded_tags = data_repository.get_md( ).mashup_df['padded_categories'].tolist() if isinstance(mid2encoded_tags[0], str): # TODO mid2encoded_tags = list(map(eval, mid2encoded_tags)) self.mashup_tag_encoding_layer = Embedding( all_mashup_num + 1, self.args.MAX_TAGS_NUM, embeddings_initializer=Constant(mid2encoded_tags), mask_zero=True, input_length=1, trainable=False, name='mashup_tag_encoding_layer') aid2encoded_tags = data_repository.get_md( ).api_df['padded_categories'].tolist() if isinstance(aid2encoded_tags[0], str): aid2encoded_tags = list(map(eval, aid2encoded_tags)) self.api_tag_encoding_layer = Embedding( all_api_num + 1, self.args.MAX_TAGS_NUM, embeddings_initializer=Constant(aid2encoded_tags), mask_zero=True, input_length=1, trainable=False, name='api_tag_encoding_layer') return
def Samanta(topK, if_pop=2, MF_mode='node2vec', pop_mode='', text_mode='HDP', LDA_topic_num=None): """ :param Para: :param if_pop 如何使用pop 0 不使用;1,只做重排序;2总乘积做排序 :param topK: 使用KNN表示新query的mf特征 :param text_mode: 使用哪种特征提取方式 LDA HDP :param pop_mode:pop值是否使用sigmoid规约到0-1区间 :param pop_mode:MF_mode 为了省事,直接用node2vec得了 :return: """ api2pop = None if if_pop: api_co_vecs, api2pop = data_repository.get_md().get_api_co_vecs( pop_mode) # TODO root = os.path.join(data_repository.get_ds().data_root, 'baselines') if not os.path.exists(root): os.makedirs(root) mashup_feature_path = os.path.join( root, 'mashup_{}.txt'.format(text_mode)) # ... api_feature_path = os.path.join(root, 'api_{}.txt'.format(text_mode)) # 获取mashup_hdp_features,api_hdp_features if not os.path.exists(api_feature_path): gd = get_default_gd() _mashup_features, _api_features = gd.model_pcs(text_mode, LDA_topic_num) np.savetxt(mashup_feature_path, _mashup_features) np.savetxt(api_feature_path, _api_features) else: _mashup_features = np.loadtxt(mashup_feature_path) _api_features = np.loadtxt(api_feature_path) candidate_ids_list = [] all_predict_results = [] test_data = data_repository.get_ds().test_data test_mashup_num = len(test_data.get('mashup')) mashup_emb_df = data_repository.get_ds().MF_obj.mashup_emb_df api_emb_df = data_repository.get_ds().MF_obj.api_emb_df for i in range(test_mashup_num): test_m_id = test_data.get('mashup')[i][0] # 每个mashup id candidate_ids = test_data.get('api')[i] candidate_ids_list.append(candidate_ids) # 用近邻mashup的latent factor加权表示自己 mid2sim = {} for train_m_id in mashup_emb_df.index.tolist(): mid2sim[train_m_id] = cos_sim(_mashup_features[test_m_id], _mashup_features[train_m_id]) # TODO topK_ids, topK_sims = zip(*( sorted(mid2sim.items(), key=lambda x: x[1], reverse=True)[:topK])) topK_sims = np.array(topK_sims) / sum(topK_sims) # sim归一化 cf_feature = np.zeros((data_repository.get_args().implict_feat_dim, )) for z in range(len(topK_ids)): cf_feature += topK_sims[z] * mashup_emb_df['embedding'][ topK_ids[z]] # 计算跟每个api的打分 predict_results = [] temp_predict_results = [] # 需要用pop进行重排序时的辅助 api_zeros = np.zeros((data_repository.get_args().implict_feat_dim)) api_ids = set(api_emb_df.index.tolist()) for api_id in candidate_ids: # id api_i_feature = api_emb_df['embedding'][ api_id] if api_id in api_ids else api_zeros # 可能存在测试集中的api不在train中出现过的场景 cf_score = np.sum(np.multiply( api_i_feature, cf_feature)) # mashup和api latent factor的内积 sim_score = cos_sim(_mashup_features[test_m_id], _api_features[api_id]) # 特征的余弦相似度 if if_pop == 1: temp_predict_results.append((api_id, cf_score * sim_score)) elif if_pop == 0: predict_results.append(cf_score * sim_score) elif if_pop == 2: predict_results.append(cf_score * sim_score * api2pop[api_id]) if if_pop == 1: max_k_pairs = heapq.nlargest(topK, temp_predict_results, key=lambda x: x[1]) # 首先利用乘积排一次序 max_k_candidates, _ = zip(*max_k_pairs) max_k_candidates = set(max_k_candidates) predict_results = [ api2pop[api_id] if api_id in max_k_candidates else -1 for api_id in candidate_ids ] # 重排序 all_predict_results.append(predict_results) print('Samanta test,done!') evaluate_result = evalute( candidate_ids_list, all_predict_results, data_repository.get_ds().test_data.get('all_ground_api_ids'), data_repository.get_args().topKs) # 评价 _name = '_pop_{}'.format(if_pop) _name += data_repository.get_args().mf_mode csv_table_name = data_repository.get_ds().name + 'Samanta_model_{}'.format( topK) + _name + "\n" # whole_model.name summary(evaluate_path, csv_table_name, evaluate_result, data_repository.get_args().topKs) # 记录 def divide(slt_apiNum): test_api_id_list_, predictions_, grounds_ = [], [], [] for i in range(test_mashup_num): if len(data_repository.get_ds().slt_api_ids_instances[i] ) == slt_apiNum: test_api_id_list_.append(candidate_ids_list[i]) predictions_.append(all_predict_results[i]) grounds_.append(data_repository.get_ds().test_data.get( 'all_ground_api_ids')[i]) return test_api_id_list_, predictions_, grounds_ if data_repository.get_args().data_mode == 'newScene': for slt_apiNum in range(3): test_api_id_list_, predictions_, grounds_ = divide(slt_apiNum + 1) evaluate_result = evalute(test_api_id_list_, predictions_, grounds_, data_repository.get_args().topKs) summary(evaluate_path, str(slt_apiNum + 1) + '_' + csv_table_name, evaluate_result, data_repository.get_args().topKs) #