Пример #1
0
def generate_neighbours_multi_embed(embed, ent_list, k):
    ent_frags = ut.div_list(np.array(ent_list), P.nums_threads)
    ent_frag_indexes = ut.div_list(np.array(range(len(ent_list))), P.nums_threads)
    pool = multiprocessing.Pool(processes=len(ent_frags))
    results = list()
    for i in range(len(ent_frags)):
        results.append(pool.apply_async(cal_neighbours_embed,
                                        (ent_frags[i], np.array(ent_list), embed[ent_frag_indexes[i], :], embed, k)))
    pool.close()
    pool.join()
    dic = dict()
    for res in results:
        dic = ut.merge_dic(dic, res.get())
    del embed
    gc.collect()
    return dic
Пример #2
0
def generate_neighbours_multi_embed(embed, ent_list, k, nums_threads):
    ent_frags = ut.div_list(np.array(ent_list), nums_threads)
    ent_frag_indexes = ut.div_list(np.array(range(len(ent_list))), nums_threads)
    pool = multiprocessing.Pool(processes=len(ent_frags))
    results = list()
    for i in range(len(ent_frags)):
        results.append(pool.apply_async(cal_neighbours_embed,
                                        (ent_frags[i], np.array(ent_list), embed[ent_frag_indexes[i], :], embed, k)))
    pool.close()
    pool.join()
    dic = dict()
    for res in results:
        dic = ut.merge_dic(dic, res.get())
    t1 = time.time()
    m1 = psutil.virtual_memory().used
    del embed
    gc.collect()
    # print("gc costs {:.3f} s, mem change {:.6f} G".format(time.time() - t1, (psutil.virtual_memory().used - m1) / g))
    return dic
Пример #3
0
def fit_classif(X_train, y_train):
    ''' Train the 3 differents RF classifiers. We use a CraterGenerator
    in order to get more images of craters. '''

    dic = get_dictionnary_craters(X_train, y_train)
    for key in dic:
        if (key - 8) % 10 == 0:
            print('Shape ', key, ':', len(dic[key]), 'craters')

    j = 0
    #for X, Y in CraterGenerator(X_train, y_train):
    for X_, Y_ in CraterGenerator(X_train[[i for i in range(X_train.shape[0]) if verify(y_train[i]) == True ]],\
                                y_train[[i for i in range(X_train.shape[0]) if verify(y_train[i]) == True]]):
        dic2 = get_dictionnary_craters(
            np.array(X_)[:, :, :, 0].reshape((len(X_), 224, 224)), Y_)
        dic = merge_dic(dic, dic2)
        j += 1
        #print(i)
        if j == 100:
            break

    for key in dic:
        if (key - 8) % 10 == 0:
            print('Shape ', key, ':', len(dic[key]), 'craters')

    dic_size_non_crater = get_non_craters(X_train, y_train, None)
    for key in dic_size_non_crater:
        if (key - 8) % 10 == 0:
            print('Shape ', key, ':', len(dic_size_non_crater[key]), 'craters')

    l = dic[48] + dic[58] + dic[68]
    dic[58] = l

    rdf_58 = get_model_by_size(58, dic, dic_size_non_crater, 3)
    rdf_38 = get_model_by_size(38, dic, dic_size_non_crater, 3)
    rdf_28 = get_model_by_size(28, dic, dic_size_non_crater, 3)

    del dic, dic_size_non_crater

    return rdf_28, rdf_38, rdf_58
Пример #4
0
def generate_neighbours(entity_embeds, entity_list, neighbors_num, threads_num):
    entity_list = np.array(entity_list)
    ent_frags = task_divide(entity_list, threads_num)
    ent_frag_indexes = task_divide(np.array(range(len(entity_list))), threads_num)

    pool = multiprocessing.Pool(processes=len(ent_frags))
    results = list()
    for i in range(len(ent_frags)):
        results.append(pool.apply_async(find_neighbours,
                                        args=(ent_frags[i], entity_list,
                                              entity_embeds[ent_frag_indexes[i], :],
                                              entity_embeds, neighbors_num)))
    pool.close()
    pool.join()

    dic = dict()
    for res in results:
        dic = merge_dic(dic, res.get())

    del results
    gc.collect()
    return dic
Пример #5
0
def stable_alignment(embed1,
                     embed2,
                     metric,
                     normalize,
                     csls_k,
                     nums_threads,
                     cut=100,
                     sim_mat=None):
    t = time.time()
    if sim_mat is None:
        sim_mat = sim(embed1,
                      embed2,
                      metric=metric,
                      normalize=normalize,
                      csls_k=csls_k)

    kg1_candidates, kg2_candidates = dict(), dict()

    num = sim_mat.shape[0]
    x_tasks = task_divide(np.array(range(num)), nums_threads)
    pool = multiprocessing.Pool(processes=len(x_tasks))
    rests = list()
    total = 0
    for task in x_tasks:
        total += len(task)
        mat = sim_mat[task, :]
        rests.append(pool.apply_async(arg_sort, (task, mat, 'x_', 'y_')))
    assert total == num
    pool.close()
    pool.join()
    for rest in rests:
        kg1_candidates = merge_dic(kg1_candidates, rest.get())

    sim_mat = sim_mat.T
    num = sim_mat.shape[0]
    y_tasks = task_divide(np.array(range(num)), nums_threads)
    pool = multiprocessing.Pool(processes=len(y_tasks))
    rests = list()
    for task in y_tasks:
        mat = sim_mat[task, :]
        rests.append(pool.apply_async(arg_sort, (task, mat, 'y_', 'x_')))
    pool.close()
    pool.join()
    for rest in rests:
        kg2_candidates = merge_dic(kg2_candidates, rest.get())

    # print("kg1_candidates", len(kg1_candidates))
    # print("kg2_candidates", len(kg2_candidates))

    print(
        "generating candidate lists costs time {:.3f} s ".format(time.time() -
                                                                 t))
    t = time.time()
    matching = galeshapley(kg1_candidates, kg2_candidates, cut)
    n = 0
    for i, j in matching.items():
        if int(i.split('_')[-1]) == int(j.split('_')[-1]):
            n += 1
    cost = time.time() - t
    print("stable alignment precision = {:.3f}%, time = {:.3f} s ".format(
        n / len(matching) * 100, cost))