def clusterCore(channelData1, covMatrixList1, channelData2, centroids, centroidUList, type):
    newChannelData1 = []
    newChannelData2 = []
    newDimension = np.shape(centroidUList[0])[1]
    p = np.shape(channelData1)[0]

    if type == "C":
        # 计算信道相关系数矩阵并输出,然后放到一个矩阵中
        allCovMatrix1 = tools.matrixListToMatrix(covMatrixList1)

        # 确定每个数据分别属于哪个簇
        clusterAssment = kmeans.getClusterAssment(allCovMatrix1, centroids)

        # 变换域
        for i in range(p):
            newChannelData1.append(np.dot(channelData1[i], centroidUList[(int)(clusterAssment[i, 0].real)]))
            newChannelData2.append(np.dot(channelData2[i], centroidUList[(int)(clusterAssment[i, 0].real)]))

    if type == "U":
        informations, SigmaList, UList = tools.getInformations(covMatrixList1)
        allU = tools.matrixListToMatrix_U(UList)
        weights = tools.matrixListToMatrix_U(SigmaList)

        # 确定每个数据分别属于哪个簇
        clusterAssment = kmeans.getClusterAssment_U(allU, weights, centroids, newDimension)

        # 变换域
        for i in range(p):
            newChannelData1.append(np.dot(channelData1[i], centroidUList[(int)(clusterAssment[i, 0].real)]))
            newChannelData2.append(np.dot(channelData2[i], centroidUList[(int)(clusterAssment[i, 0].real)]))

    if type == "S":
        covMatrixList2 = tools.getCovMatrixList(channelData2)
        UList1 = tools.getInformations(covMatrixList1)[2]
        UList2 = tools.getInformations(covMatrixList2)[2]
        iRate = np.shape(centroidUList[0])[1]

        # 变换域
        for i in range(p):
            newChannelData1.append(np.dot(channelData1[i], UList1[i][:, 0:iRate]))
            newChannelData2.append(np.dot(channelData2[i], UList2[i][:, 0:iRate]))

    # 输出处理后的信道数据
    # path = u'/Users/jinruimeng/Downloads/keyan/'
    # nowTime = time.strftime("%Y-%m-%d.%H.%M.%S", time.localtime(time.time()))
    # pathSuffix = type + "_" + slice + "_" + nowTime
    #
    # outNewChannel1ListPath = path + "clusterAddNoise_outNewChannel1List_" + pathSuffix
    # outNewChannel2ListPath = path + "clusterAddNoise_outNewChannel2List_" + pathSuffix
    # readAndWriteDataSet.write(newChannelData1, outNewChannel1ListPath, ".xlsx")
    # readAndWriteDataSet.write(newChannelData2, outNewChannel2ListPath, ".xlsx")

    return newChannelData1, newChannelData2
示例#2
0
def pca_U(channelDataList,
          informations,
          centroidList,
          clusterAssment,
          newDimension=1):
    newChannelDataList = []
    U2s = []

    rates = np.array(np.zeros((len(channelDataList), 2)), dtype=complex)
    # 为了输出,要把rates放到list中
    rateList = []

    # 计算变换矩阵
    for i in range(len(centroidList)):
        U2 = centroidList[i][:, 0:newDimension]
        U2s.append(U2)

    # 降维
    for i in range(len(channelDataList)):
        newChannelData = np.dot(channelDataList[i],
                                U2s[(int)(clusterAssment[i, 0].real)])
        newChannelDataList.append(newChannelData)

    newCovMatrixList = tools.getCovMatrixList(newChannelDataList)
    newInformation = tools.getInformations(newCovMatrixList)[0]

    for i in range(len(channelDataList)):
        rate2 = newInformation[0][i] / informations[0][i]
        rates[i, 1] = rate2

    rateList.append(rates)
    return newChannelDataList, newCovMatrixList, U2s, rateList
示例#3
0
文件: elbow.py 项目: jinruimeng/keyan
def elbowCore(channelDataAll, a, k, iRate, schedule):
    n = np.shape(channelDataAll[0])[1]  # 列数
    p = len(channelDataAll)  # 页数
    sub = n >> a
    rates_C = []
    rates_U = []
    rates_S = []

    for g in range(1 << a):
        # 显示进度
        schedule[1] += 1
        tmpSchedule = schedule[1]
        print(u'共' + str(schedule[0]) + u'部分,' + u'第' + str(tmpSchedule) + u'部分开始!')

        channelData = []
        for h in range(p):
            channelDataPage = channelDataAll[h]
            channelData.append(channelDataPage[:, g * sub:(g + 1) * sub])

        covMatrixList = tools.getCovMatrixList(channelData)
        allCovMatrix = tools.matrixListToMatrix(covMatrixList)

        # 对协方差进行聚类
        centroids, clusterAssment = kmeans.KMeansOushi(allCovMatrix, k)
        centroidList = tools.matrixToMatrixList(centroids)

        # 计算原信道信息量、协方差矩阵特征值、变换矩阵
        informations, SigmaList, UList = tools.getInformations(covMatrixList)

        # 分析PCA效果,计算信息量保留程度
        tmpRates = pca.pca(channelData, informations, centroidList, clusterAssment, iRate)[3][0][:, 1]
        rates_C.append(np.mean(tmpRates))

        # 对变换矩阵进行聚类
        allU = tools.matrixListToMatrix_U(UList)
        weights = tools.matrixListToMatrix_U(SigmaList)
        centroids, clusterAssment = kmeans.KMeansOushi_U(allU, k, weights, iRate)
        centroidList = tools.matrixToMatrixList_U(centroids)

        # 分析PCA效果,计算信息量保留程度
        tmpRates = pca.pca_U(channelData, informations, centroidList, clusterAssment, iRate)[3][0][:, 1]
        rates_U.append(np.mean(tmpRates))

        # 不聚类,直接PCA
        tmpRates = pca.pca_S(SigmaList, iRate)[0][:, 1]
        rates_S.append(np.mean(tmpRates))

        # 显示进度
        print(u'共' + str(schedule[0]) + u'部分,' + u'第' + str(tmpSchedule) + u'部分完成,' + u'已完成' + str(schedule[1]) + u'部分,' + u'完成度:' + '%.2f%%' % (schedule[1] / schedule[0] * 100) + u'!')

    rate_C = np.mean(rates_C)
    rate_U = np.mean(rates_U)
    rate_S = np.mean(rates_S)

    return rate_S.real, rate_C.real, rate_U.real
示例#4
0
文件: elbow.py 项目: jinruimeng/keyan
def elbow2(channelDataAll, low, high, step, a, schedule):
    # 检查参数合理性
    if low <= 0:
        print(u'下限太低:下限小于等于0!')
        return

    if high >= (shape(channelDataAll[0])[1] / (1 << a)):
        print(u'上限太高:降维后维度数大于原数据维度!')
        return

    # 计算PCA的总次数
    time1 = ((int)((high - low) / step + 1))
    time2 = 1 << a
    schedule[0] = time2

    # 利用SSE选择k
    SSE_S = []  # 存放所有结果
    rates_S = np.array(np.zeros((time2, time1)))  # 存放单次结果

    n = np.shape(channelDataAll[0])[1]  # 列数
    p = len(channelDataAll)  # 页数
    sub = n >> a
    for g in range(time2):
        channelData = []
        for h in range(p):
            channelDataPage = channelDataAll[h]
            channelData.append(channelDataPage[:, g * sub:(g + 1) * sub])

        covMatrixList = tools.getCovMatrixList(channelData)

        # 计算原信道信息量、协方差矩阵特征值、变换矩阵
        informations, SigmaList, UList = tools.getInformations(covMatrixList)
        for h in range(time1):
            tmpRates = pca.pca_S(SigmaList, h * step + low)[0][:, 1]
            rates_S[g, h] = np.mean(tmpRates).real

        # 显示进度
        schedule[1] += 1
        print(u'共' + str(schedule[0]) + u'轮,' + u'已完成' + str(schedule[1]) + u'轮,' + u'完成度:' + '%.2f%%' % (
                schedule[1] / schedule[0] * 100) + u'!')

    for h in range(time1):
        SSE_S.append(np.mean(rates_S[:, h]))
    plt.xlabel(u'保留维度数k')

    X = range(low, high + 1, step)
    plt.ylabel(u'特征值保留')
    plt.plot(X, SSE_S, 'k-s')
    plt.show()
    print(u'主进程结束!')
示例#5
0
def clusterCore(channelData1, covMatrixList1, channelData2, centroids, centroidUList, type):
    newChannelData1 = []
    newChannelData2 = []
    newDimension = np.shape(centroidUList[0])[1]

    if type == "C":
        # 计算信道相关系数矩阵并输出,然后放到一个矩阵中
        allCovMatrix1 = tools.matrixListToMatrix(covMatrixList1)

        # 确定每个数据分别属于哪个簇
        clusterAssment = kmeans.getClusterAssment(allCovMatrix1, centroids)

        # 变换域
        for i in range(np.shape(channelDataAll1)[0]):
            newChannelData1.append(np.dot(channelData1[i], centroidUList[(int)(clusterAssment[i, 0].real)]))
            newChannelData2.append(np.dot(channelData2[i], centroidUList[(int)(clusterAssment[i, 0].real)]))

    if type == "U":
        informations, SigmaList, UList = tools.getInformations(covMatrixList1)
        allU = tools.matrixListToMatrix_U(UList)
        weights = tools.matrixListToMatrix_U(SigmaList)

        # 确定每个数据分别属于哪个簇
        clusterAssment = kmeans.getClusterAssment_U(allU, weights, centroids, newDimension)

        # 变换域
        for i in range(np.shape(channelData1)[0]):
            newChannelData1.append(np.dot(channelData1[i], centroidUList[(int)(clusterAssment[i, 0].real)]))
            newChannelData2.append(np.dot(channelData2[i], centroidUList[(int)(clusterAssment[i, 0].real)]))

    if type == "general":
        newChannelData1 = pca.pca_general(channelData1, newDimension)
        newChannelData2 = pca.pca_general(channelData2, newDimension)

    if type == "none":
        newChannelData1 = channelData1
        newChannelData2 = channelData2

    if type == "wt":
        # 变换域
        for i in range(np.shape(channelData1)[0]):
            newChannelData1.append(wt.wt(channelData1[i], newDimension))
            newChannelData2.append(wt.wt(channelData2[i], newDimension))

    return newChannelData1, newChannelData2
示例#6
0
def pca_general(data, newDimension=1):
    try:
        # 如果输入是单个信道,进行以下步骤
        m, n = np.shape(data)
        # 计算协方差矩阵 rowvar=False代表每一列是一个变量
        covMatrix = np.cov(data, rowvar=False)
        # SVD分解协方差矩阵得出变换矩阵
        U = np.transpose(np.linalg.svd(covMatrix)[2])
        return np.dot(data, U[:, 0:newDimension])

    except:
        print(u'pca_general')
        # 如果输入是列表,进行以下步骤
        out = []
        covList = tools.getCovMatrixList(data)
        UList = tools.getInformations(covList)[2]
        for i in range(len(data)):
            out.append(np.dot(data[i], UList[i][:, 0:newDimension]))
        return out
示例#7
0
def pca(channelData, informations, centroidList, clusterAssment, rate=1):
    U2s = []
    rates = np.array(np.zeros((len(channelData), 2)), dtype=complex)
    rateList = []
    newChannelDataList = []

    # 计算变换矩阵
    for i in range(len(centroidList)):
        U, Sigma, VT = np.linalg.svd(centroidList[i])
        sum = np.sum(Sigma)
        curSum = 0
        index = 0
        if rate <= 1:
            for j in range(len(Sigma)):
                curSum += Sigma[j]
                if rate - (curSum / sum) > 0:
                    index += 1
                else:
                    break
        else:
            index = rate - 1
        U2 = np.transpose(VT[0:index + 1, :])
        U2s.append(U2)

    # 降维
    for i in range(len(channelData)):
        newChannelData = np.dot(channelData[i],
                                U2s[(int)(clusterAssment[i, 0].real)])
        newChannelDataList.append(newChannelData)
        index = np.shape(newChannelData)[1]
        rates[i, 0] = index

    newCovMatrixList = tools.getCovMatrixList(newChannelDataList)
    newInformations = tools.getInformations(newCovMatrixList)[0]

    for i in range(len(channelData)):
        rate2 = newInformations[0][i] / informations[0][i]
        rates[i, 1] = rate2

    rateList.append(rates)
    return newChannelDataList, newCovMatrixList, U2s, rateList
示例#8
0
def getCentroids(schedule, path, suffix, channelData, g, k, iRate, type=u'C'):
    # 校验数据正确性
    if k > np.shape(channelData)[0]:
        print(u'聚类中心数量不能大于样本数量!')
        return
    if iRate > np.shape(channelData)[1]:
        print(u'降维后维度不能大于样本原有的维度!')
        return
    if k <= 0 or iRate <= 0:
        print(u'聚类中心数量和降维后维度不能小于1!')
        return

    schedule[1] += 1
    tmpSchedule = schedule[1]
    print(u'共' + str(schedule[0]) + u'部分,' + u'第' + str(tmpSchedule) +
          u'部分开始!')

    # 得到相关系数矩阵并输出,然后放到一个矩阵中
    covMatrixList = tools.getCovMatrixList(channelData)
    informations, SigmaList, UList = tools.getInformations(covMatrixList)

    if type == u'total':
        # 对协方差进行聚类
        getCentroidsCore(path, suffix, channelData, covMatrixList,
                         informations, SigmaList, UList, g, k, iRate, "C")
        # 对变换矩阵进行聚类
        getCentroidsCore(path, suffix, channelData, covMatrixList,
                         informations, SigmaList, UList, g, k, iRate, "U")
    else:
        getCentroidsCore(path, suffix, channelData, covMatrixList,
                         informations, SigmaList, UList, g, k, iRate, type)

    # 显示进度
    print(u'共' + str(schedule[0]) + u'部分,' + u'第' + str(tmpSchedule) +
          u'部分完成,' + u'已完成' + str(schedule[1]) + u'部分,' + u'完成度:' + '%.2f%%' %
          (schedule[1] / schedule[0] * 100) + u'!')
示例#9
0
def clusterCore(channelData1, covMatrixList1, channelData2, centroids,
                centroidUList, type):
    newChannelData1 = []
    newChannelData2 = []
    newDimension = np.shape(centroidUList[0])[1]
    p = np.shape(channelData1)[0]

    if type == "C":
        # 计算信道相关系数矩阵并输出,然后放到一个矩阵中
        allCovMatrix1 = tools.matrixListToMatrix(covMatrixList1)

        # 确定每个数据分别属于哪个簇
        clusterAssment = kmeans.getClusterAssment(allCovMatrix1, centroids)

        # 变换域
        for i in range(p):
            newChannelData1.append(
                np.dot(channelData1[i],
                       centroidUList[(int)(clusterAssment[i, 0].real)]))
            newChannelData2.append(
                np.dot(channelData2[i],
                       centroidUList[(int)(clusterAssment[i, 0].real)]))

    if type == "U":
        informations, SigmaList, UList = tools.getInformations(covMatrixList1)
        allU = tools.matrixListToMatrix_U(UList)
        weights = tools.matrixListToMatrix_U(SigmaList)

        # 确定每个数据分别属于哪个簇
        clusterAssment = kmeans.getClusterAssment_U(allU, weights, centroids,
                                                    newDimension)

        # 变换域
        for i in range(p):
            newChannelData1.append(
                np.dot(channelData1[i],
                       centroidUList[(int)(clusterAssment[i, 0].real)]))
            newChannelData2.append(
                np.dot(channelData2[i],
                       centroidUList[(int)(clusterAssment[i, 0].real)]))

    if type == "general":
        newChannelData1 = pca.pca_general(channelData1, newDimension)
        newChannelData2 = pca.pca_general(channelData2, newDimension)

    if type == "none":
        newChannelData1 = channelData1
        newChannelData2 = channelData2

    allNewCorr = []
    for i in range(p):
        for j in range(newDimension):
            cowCor = np.corrcoef(newChannelData1[i][:, j],
                                 newChannelData2[i][:, j])
            if i == 0:
                allNewCorr.append(cowCor[0, 1])
            else:
                allNewCorr[j] += cowCor[0, 1]

    for i in range(newDimension):
        allNewCorr[i] = abs(allNewCorr[i] / (np.shape(channelData1)[0]))

    path = u'/Users/jinruimeng/Downloads/keyan/'
    nowTime = time.strftime("%Y-%m-%d.%H.%M.%S", time.localtime(time.time()))
    pathSuffix = type + u'_' + nowTime

    newChannelData1Path = path + "clusterAddNoise_newChannelData1_" + pathSuffix
    newChannelData2Path = path + "clusterAddNoise_newChannelData2_" + pathSuffix
    readAndWriteDataSet.write(newChannelData1, newChannelData1Path, ".xlsx")
    readAndWriteDataSet.write(newChannelData2, newChannelData2Path, ".xlsx")

    return allNewCorr