Exemplo n.º 1
0
def _get_sequence_init(level, cluster_num, sequences1, sequences2, k, w):
    distance = np.ones((cluster_num, len(sequences1)))

    np.random.seed(RAND_SEED_INIT)
    #init = np.array([4263, 3584]) #low
    #init = np.array([6541, 1470]) #high
    #init = np.append(init, np.random.randint(0, len(sequences1) - 1, 1))
    init = np.random.randint(0, len(sequences1) - 1, 1)
    print("--init 3:", init)
    distance[0, :] = cskfuzzy.get_distance(level, w, sequences1, sequences2,
                                           np.array(sequences1)[init],
                                           np.array(sequences2)[init])

    random.seed(RAND_SEED_INIT)
    # get far away cluster initial
    for c in range(1, cluster_num):
        far_cluster = list(
            np.where(
                (distance[0:c, :] >= CLUSTER_DIST_THRESHOLD).all(axis=0))[0])
        far_cluster = list(set(far_cluster) - set(init))
        if len(far_cluster) == 0:
            not_init = list(set(range(len(sequences1))) - set(init))
            far_cluster = sorted(not_init,
                                 key=lambda x: distance[0:c, x].sum(axis=0))
            far_cluster = [far_cluster[-1]]
        add_init = random.sample(far_cluster, 1)
        distance[c, :] = cskfuzzy.get_distance(level, w, sequences1,
                                               sequences2,
                                               np.array(sequences1)[add_init],
                                               np.array(sequences2)[add_init])
        init = np.append(init, add_init)
    print("[fuzzy c means]- get_init_u> \n-- init:", init)

    # get enough k sequences for each cluster
    #u = np.zeros((cluster_num, len(sequences1)))
    filter_k = lambda row: row <= sorted(row)[k - 1]
    large_k_indices = np.apply_along_axis(filter_k, axis=1, arr=distance)
    u = large_k_indices.astype(int)

    random.seed(RAND_SEED_K)
    print("--each cluster initial # before random choose:", u.sum(axis=1))
    for i in range(cluster_num):
        if sum(u[i, :]) > k:
            #print(np.where(u[i, :] == 1))
            indices = set(np.where(u[i, :] == 1)[0])
            indices.remove(init[i])
            rand_k = random.sample(indices, k - 1)
            u[i, :] = 0
            u[i, :][rand_k] = 1
            u[i, init[i]] = 1
    print("--each cluster initial #:", u.sum(axis=1))
    return u
Exemplo n.º 2
0
def _distance_c_c(level, c, k, w, data1, data2, center):
    center_data1 = np.array(data1)[center]
    center_data2 = np.array(data2)[center]
    distance = cskfuzzy.get_distance(level, w, center_data1, center_data2)
    # distance = distance / np.amax(distance)

    each_cluster = np.zeros((c, c))
    np.fill_diagonal(each_cluster, 1)
    each_cluster = np.repeat(each_cluster, k, axis=1)
    d = each_cluster.dot(distance)
    d = d.dot(each_cluster.transpose()) / (k * k)
    np.fill_diagonal(d, 0)
    return d
Exemplo n.º 3
0
Arquivo: index.py Projeto: nextmap/Liu
def _distance_c_c(level, c, k, w, data1, data2, center):
    center_data1 = np.array(data1)[center]
    center_data2 = np.array(data2)[center]
    distance = cskfuzzy.get_distance(level, w, center_data1, center_data2)
    #distance = distance / np.amax(distance)

    each_cluster = np.zeros((c, c))
    np.fill_diagonal(each_cluster, 1)
    each_cluster = np.repeat(each_cluster, k, axis=1)
    d = each_cluster.dot(distance)
    d = d.dot(each_cluster.transpose()) / (k * k)
    np.fill_diagonal(d, 0)
    return d