def generate_neighbours_multi_embed(embed, ent_list, k): ent_frags = ut.div_list(np.array(ent_list), P.nums_threads) ent_frag_indexes = ut.div_list(np.array(range(len(ent_list))), P.nums_threads) pool = multiprocessing.Pool(processes=len(ent_frags)) results = list() for i in range(len(ent_frags)): results.append(pool.apply_async(cal_neighbours_embed, (ent_frags[i], np.array(ent_list), embed[ent_frag_indexes[i], :], embed, k))) pool.close() pool.join() dic = dict() for res in results: dic = ut.merge_dic(dic, res.get()) del embed gc.collect() return dic
def generate_neighbours_multi_embed(embed, ent_list, k, nums_threads): ent_frags = ut.div_list(np.array(ent_list), nums_threads) ent_frag_indexes = ut.div_list(np.array(range(len(ent_list))), nums_threads) pool = multiprocessing.Pool(processes=len(ent_frags)) results = list() for i in range(len(ent_frags)): results.append(pool.apply_async(cal_neighbours_embed, (ent_frags[i], np.array(ent_list), embed[ent_frag_indexes[i], :], embed, k))) pool.close() pool.join() dic = dict() for res in results: dic = ut.merge_dic(dic, res.get()) t1 = time.time() m1 = psutil.virtual_memory().used del embed gc.collect() # print("gc costs {:.3f} s, mem change {:.6f} G".format(time.time() - t1, (psutil.virtual_memory().used - m1) / g)) return dic
def fit_classif(X_train, y_train): ''' Train the 3 differents RF classifiers. We use a CraterGenerator in order to get more images of craters. ''' dic = get_dictionnary_craters(X_train, y_train) for key in dic: if (key - 8) % 10 == 0: print('Shape ', key, ':', len(dic[key]), 'craters') j = 0 #for X, Y in CraterGenerator(X_train, y_train): for X_, Y_ in CraterGenerator(X_train[[i for i in range(X_train.shape[0]) if verify(y_train[i]) == True ]],\ y_train[[i for i in range(X_train.shape[0]) if verify(y_train[i]) == True]]): dic2 = get_dictionnary_craters( np.array(X_)[:, :, :, 0].reshape((len(X_), 224, 224)), Y_) dic = merge_dic(dic, dic2) j += 1 #print(i) if j == 100: break for key in dic: if (key - 8) % 10 == 0: print('Shape ', key, ':', len(dic[key]), 'craters') dic_size_non_crater = get_non_craters(X_train, y_train, None) for key in dic_size_non_crater: if (key - 8) % 10 == 0: print('Shape ', key, ':', len(dic_size_non_crater[key]), 'craters') l = dic[48] + dic[58] + dic[68] dic[58] = l rdf_58 = get_model_by_size(58, dic, dic_size_non_crater, 3) rdf_38 = get_model_by_size(38, dic, dic_size_non_crater, 3) rdf_28 = get_model_by_size(28, dic, dic_size_non_crater, 3) del dic, dic_size_non_crater return rdf_28, rdf_38, rdf_58
def generate_neighbours(entity_embeds, entity_list, neighbors_num, threads_num): entity_list = np.array(entity_list) ent_frags = task_divide(entity_list, threads_num) ent_frag_indexes = task_divide(np.array(range(len(entity_list))), threads_num) pool = multiprocessing.Pool(processes=len(ent_frags)) results = list() for i in range(len(ent_frags)): results.append(pool.apply_async(find_neighbours, args=(ent_frags[i], entity_list, entity_embeds[ent_frag_indexes[i], :], entity_embeds, neighbors_num))) pool.close() pool.join() dic = dict() for res in results: dic = merge_dic(dic, res.get()) del results gc.collect() return dic
def stable_alignment(embed1, embed2, metric, normalize, csls_k, nums_threads, cut=100, sim_mat=None): t = time.time() if sim_mat is None: sim_mat = sim(embed1, embed2, metric=metric, normalize=normalize, csls_k=csls_k) kg1_candidates, kg2_candidates = dict(), dict() num = sim_mat.shape[0] x_tasks = task_divide(np.array(range(num)), nums_threads) pool = multiprocessing.Pool(processes=len(x_tasks)) rests = list() total = 0 for task in x_tasks: total += len(task) mat = sim_mat[task, :] rests.append(pool.apply_async(arg_sort, (task, mat, 'x_', 'y_'))) assert total == num pool.close() pool.join() for rest in rests: kg1_candidates = merge_dic(kg1_candidates, rest.get()) sim_mat = sim_mat.T num = sim_mat.shape[0] y_tasks = task_divide(np.array(range(num)), nums_threads) pool = multiprocessing.Pool(processes=len(y_tasks)) rests = list() for task in y_tasks: mat = sim_mat[task, :] rests.append(pool.apply_async(arg_sort, (task, mat, 'y_', 'x_'))) pool.close() pool.join() for rest in rests: kg2_candidates = merge_dic(kg2_candidates, rest.get()) # print("kg1_candidates", len(kg1_candidates)) # print("kg2_candidates", len(kg2_candidates)) print( "generating candidate lists costs time {:.3f} s ".format(time.time() - t)) t = time.time() matching = galeshapley(kg1_candidates, kg2_candidates, cut) n = 0 for i, j in matching.items(): if int(i.split('_')[-1]) == int(j.split('_')[-1]): n += 1 cost = time.time() - t print("stable alignment precision = {:.3f}%, time = {:.3f} s ".format( n / len(matching) * 100, cost))