Exemplo n.º 1
0
def parseVector(line):
    return [float(x) for x in line.split(',')]


def average(points):
    numVectors = len(points)
    if numVectors > 0:
        pVectors = np.array(points)
        return list(pVectors.mean(0))
    else:
        return []
        
if __name__ == "__main__":
    # Initialize Reduce job
    reduceJob = Reducer(sys.argv)   
        
    # reduce function
    clusterPoints={}
    centersFile=open(CENTER_FILE_PREFIX + reduceJob.reduce, "w")
    for pName in reduceJob.partitionFiles:
        with open(pName) as infile:
            for line in infile:
                tokens=line.split(",")
                clusterId = tokens[0]
                if clusterPoints.get(clusterId,None) == None:
                    clusterPoints[clusterId] = []
                clusterPoints[clusterId].append(parseVector(",".join(tokens[1:])))
                reduceJob.emit( None,','.join(tokens[1:]) )                

    for clusterId,points in clusterPoints.iteritems():
Exemplo n.º 2
0
from pmr.reducer import Reducer
import sys


if __name__ == "__main__":
    # Initialize Reduce job
    reduceJob = Reducer(sys.argv)

    # Reduce function
    count = {}
    # split the map emitted to get words count from each partition file
    for pName in reduceJob.partitionFiles:
        with open(pName) as infile:
            for line in infile:
                tokens = line.split(",")

                # Actual word might contain "," and count is last token.
                value = int(tokens[-1])
                word = ",".join(tokens[:-1])

                if count.has_key(word):
                    count[word] = count[word] + value
                else:
                    count[word] = value

    for word, count in count.iteritems():
        reduceJob.emit(word, count)

    # Finalize reduce job
    reduceJob.finalize()