def affinities_m(): """Take the vote dump generated by srrecs.pig on stdin and prepare for reducing the votes into affinities by keying them on (account_id, sr_id)""" @mr_tools.dataspec_m('account_id', 'link_id', 'sr_id', 'dir') def process(aff): yield ('%s_%s' % (aff.account_id, aff.sr_id), aff.account_id, aff.link_id, aff.sr_id, aff.dir) mr_tools.mr_map(process)
def obscure(secret=random.random()): """Turn identifiable components of vote dumps into salted hashes""" def o(d): m = md5(str(d)) m.update(secret) return m.hexdigest() @mr_tools.dataspec_m('account_id', 'link_id', 'sr_id', 'dir') def process(aff): yield o(aff.account_id), o(aff.link_id), o(aff.sr_id), aff.dir mr_tools.mr_map(process)