示例#1
0
文件: top.py 项目: kevinchau321/cs61a
def run(k, normalize=0):
    """Emit the top K values for each key.  If NORMALIZE > 0, divide counts
    by the total for each key and round to NORMALIZE digits."""
    k = int(k)
    for key, value_iterator in values_by_key(sys.stdin):
        counts = Counter(value_iterator)
        format = lambda x: x
        if normalize:
            digits = int(normalize)
            total = sum(counts.values())
            def format(pair):
                return (pair[0], round(pair[1]/total, digits))
        emit(key, tuple(map(format, counts.most_common(k))))
示例#2
0
### Import mr library.

sys.path.append(os.path.dirname(__file__))
from mr import values_by_key

### Helper function to perform component-wise binary operations using reduce.

def helper(x, y):
    def str_sum(str1, str2):
        if str1 == str2:
            return str1
        return str1 + '|' + str2
    return [x[0] + y[0], x[1] + y[1], x[2] + y[2],
            x[3] + y[3], x[4], x[5] + y[5], x[6] + y[6], 
            x[7] + y[7], x[8] + y[8], x[9] + y[9], x[10] + y[10],
            x[11] + y[11], str_sum(x[12], y[12]), 
            x[13] + y[13], x[14] + y[14], x[15] + y[15],
            x[16] + y[16], x[17] + y[17], x[18] + y[18],
            x[19] + y[19], x[20] + y[20], x[21] + y[21],
            x[22] + y[22]]

for key, value_iterator in values_by_key(sys.stdin):
    ### Use our helper function to reduce the value iterator object.
    value_iterator = reduce(helper, value_iterator)
    ### Finalize relative position.
    value_iterator[11] /= float(value_iterator[1])
    ### Compute number of distinct users.
    value_iterator[12] = len(set(value_iterator[12].split('|')))
    print str(key) + '\t' + str(value_iterator).replace(', ', '\t')[1 : -1]

#!/usr/bin/env python3
"""Sum values for each key."""

import sys
from mr import values_by_key, emit

for key, value_iterator in values_by_key(sys.stdin):
    emit(key, sum(value_iterator))
示例#4
0
#!/usr/bin/env python

import sys
from mr import values_by_key, emit  # MapReduce module.

for key, value_iterator in values_by_key(
        sys.stdin):  # group values by key into an iterator value_iterator
    emit(
        key, sum(value_iterator)
    )  # emit pairs of each unique key and sum the related iterator to get pair (key, summation)
示例#5
0
def run():
    for key, value_iterator in values_by_key(sys.stdin):
        emit(key, set(value_iterator))
示例#6
0
def run():
    for key, value_iterator in values_by_key(sys.stdin):
        emit(key, set(value_iterator))