if FLAGS.start_key and FLAGS.end_key: db_iter = ldb.RangeIter(FLAGS.start_key, FLAGS.end_key) else: db_iter = ldb.RangeIter() elif FLAGS.sharded_db: assert os.path.exists(FLAGS.sharded_db) db = parallel.ShardedDB.open(FLAGS.sharded_db) if FLAGS.start_key and FLAGS.end_key: db_iter = db.range_iter(FLAGS.start_key, FLAGS.end_key) else: db_iter = db.__iter__() else: print 'Must specify --level_db or --sharded_db' print FLAGS.GetHelp() sys.exit(1) cnt = 0 for (k, v) in db_iter: if FLAGS.json_value_out: v = json.dumps(cPickle.loads(v), indent=2, sort_keys=True) if FLAGS.key_only: print k elif FLAGS.value_only: print v else: print k, v cnt += 1 print("Total keys: %s", cnt) if __name__ == '__main__': app.run()
results = self.run_mr("/tmp/test-lineinput", input_data) for i in range(10): key, value = results[i] assert value == "test-line", (i, results[i]) def test_sum(self): # 10 files with 100 lines each results = self.run_mr( "/tmp/test-sum", ["\n".join([str(i) for i in range(100)]) for i in range(10)], reducer=parallel.SumReducer() ) results = set(dict(results).values()) for i in range(100): assert i * 10 in results results.remove(i * 10) assert len(results) == 0, "Unexpected output: %s" % results def test_exception(self): with self.assertRaises(parallel.MRException) as ctx: self.run_mr("/tmp/test-bad-mapper", ["hello" for i in range(10)], mapper=BadMapper()) def main(argv): unittest.main(argv=argv) if __name__ == "__main__": app.run()