class Combiner(object): def __init__(self): self.config = Config() def __call__(self, key, values): # currently only supports average and sum timestamp, projection, dimensions, unit = key if self.config.aggregations()[unit] == 'average': total = 0.0 num = 0 for item in values: total += item num += 1 value = total / num else: value = sum(values) yield key, value
class Reducer(object): def __init__(self): self.config = Config() def __call__(self, key, values): timestamp, projection, dimensions, unit = key # currently only supports average and sum if self.config.aggregations()[unit] == "average": total = 0.0 num = 0 for item in values: total += item num += 1 value = total / num if value == 0: return else: value = sum(values) # encode dimensions and their attributes in the rowkey. # (it's important that we get the ordering right.) rowkeyarray = [] for d in projection: rowkeyarray.append(d) rowkeyarray.append(dimensions[d]) rowkeyarray.append(str(timestamp)) rowkey = "-".join(rowkeyarray) # rowkey => 'artist-97930-track-102203-20090601' columnfamily = "unit:" cfq = columnfamily + unit # cfq => 'unit:scrobbles' json_payload = json.dumps({cfq: {"value": value}}) # json_payload => '{"unit:scrobbles": {"value": 1338}}' yield rowkey, json_payload