Python MrGMixEmInit.MrGMixEmInit示例

编程语言: Python

命名空间/包名称: mr_GMixEmInitialize

类/类型: MrGMixEmInit

方法/功能: MrGMixEmInit

hotexamples.com的示例: 2

Python MrGMixEmInit.MrGMixEmInit - 已找到2个示例。这些是从开源项目中提取的最受好评的mr_GMixEmInitialize.MrGMixEmInit.MrGMixEmInit现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

MrGMixEmInit(2)

make_runner(2)

示例#1

显示文件

文件： GMixEM.py 项目： slburson/mrbigdata

def main():
    #first run the initializer to get starting centroids
    filePath = '//home//mike-bowles//pyWorkspace//mapReducers//src//mr_GMixEm//inputWide.txt'
    mrJob = MrGMixEmInit(args=[filePath])
    with mrJob.make_runner() as runner:
        runner.run()
    
    #pull out the centroid values to compare with values after one iteration
    emPath = "//home//mike-bowles//pyWorkspace//mapReducers//src//mr_GMixEm//intermediateResults.txt"
    fileIn = open(emPath)
    paramJson = fileIn.read()
    fileIn.close()
    
    delta = 10
    #Begin iteration on change in centroids
    while delta > 0.01:
        #parse old centroid values
        oldParam = json.loads(paramJson)
        #run one iteration
        oldMeans = oldParam[1]
        mrJob2 = MrGMixEm(args=[filePath])
        with mrJob2.make_runner() as runner:
            runner.run()
            
        #compare new centroids to old ones
        fileIn = open(emPath)
        paramJson = fileIn.read()
        fileIn.close()
        newParam = json.loads(paramJson)
        
        k_means = len(newParam[1])
        newMeans = newParam[1]
        
        delta = 0.0
        for i in range(k_means):
            delta += dist(newMeans[i],oldMeans[i])
        
        print delta

示例#2

显示文件

def main():

    #data path parameters
    filePath = os.getcwd() + "/data/"
    inputDataName = "input.txt"  #the dataset you want to rung clustering
    intermediateDataName = "intermediateResults.txt"  #intermediate file for EM
    canopyList = "canopylist.txt"  # list of canopy centers
    #canopyCentroidAssign="canopyCentroidAssign.txt"

    print 'Canopy-EM cluster by CanEM Team'

    #Generate Canopies
    print 'Generating Canopies...'
    #canopyforEM=[]
    mrJob0 = MrCanopy(args=[filePath + inputDataName])
    with mrJob0.make_runner() as runner:
        runner.run()
        for line in runner.stream_output():
            key, value = mrJob0.parse_output_line(
                line)  #only one key; so only one line
            #canopyforEM.append(value)

    #write canopies to file
    canOut = json.dumps(value)
    fileOut = open(filePath + canopyList, 'w')
    fileOut.write(canOut)
    fileOut.close()

    #Run the EM initializer to get starting centroids
    print 'Initializing...'

    mrJob = MrGMixEmInit(args=[filePath + inputDataName])
    with mrJob.make_runner() as runner:
        runner.run()

    #pull out the centroid values to compare with values after one iteration
    fileIn = open(filePath + intermediateDataName)
    paramJson = fileIn.read()
    fileIn.close()

    delta = 10
    #Begin iteration on change in centroids
    print 'Iterating...'
    while delta > 0.01:

        #        #assign centroid to canopy
        #        mrJob3 = MrAssignCentToCan(args=[filePath+intermediateDataName])
        #        with mrJob3.make_runner() as runner:
        #            runner.run()
        #

        #parse old centroid values
        oldParam = json.loads(paramJson)
        #run one iteration
        oldMeans = oldParam[1]
        mrJob2 = MrGMixEm(args=[filePath + inputDataName])
        with mrJob2.make_runner() as runner:
            runner.run()

        #compare new centroids to old ones
        fileIn = open(filePath + intermediateDataName)
        paramJson = fileIn.read()
        fileIn.close()
        newParam = json.loads(paramJson)

        k_means = len(newParam[1])
        newMeans = newParam[1]

        delta = 0.0
        for i in range(k_means):
            delta += dist(newMeans[i], oldMeans[i])

        print delta