def test_treasury_json_config(self): mongo_manager.mongo_import(self.server_hostname, "mongo_hadoop", "yield_historical.in3", JSONFILE_PATH) PARAMS = DEFAULT_PARAMETERS.copy() PARAMS[ 'mongo.splitter.class'] = "com.mongodb.hadoop.splitter.MultiMongoCollectionSplitter" collection_settings = [{ "mongo.input.uri": "mongodb://%s/mongo_hadoop.yield_historical.in" % self.server_hostname, "query": { "dayOfWeek": "FRIDAY" }, "mongo.splitter.class": "com.mongodb.hadoop.splitter.SingleMongoSplitter", "mongo.input.split.use_range_queries": True, "mongo.input.notimeout": True }, { "mongo.input.uri": "mongodb://%s/mongo_hadoop.yield_historical.in3" % self.server_hostname, "mongo.input.split.use_range_queries": True, "mongo.input.notimeout": True }] #we need to escape this for the shell PARAMS["mongo.input.multi_uri.json"] = '"' + re.sub( '"', '\\"', json.dumps(collection_settings)) + '"' runjob(self.server_hostname, PARAMS, input_collection=None) out_col = self.server.connection( )['mongo_hadoop']['yield_historical.out'] print(list(out_col.find()))
def setUp(self): self.shard1 = mongo_manager.ReplicaSetManager(home="/tmp/rs0", with_arbiter=True, num_members=3) self.shard1.start_set(fresh=True) self.shard2 = mongo_manager.ReplicaSetManager(home="/tmp/rs1", with_arbiter=True, num_members=3) self.shard2.start_set(fresh=True) self.configdb = mongo_manager.StandaloneManager(home="/tmp/config_db") self.confighost = self.configdb.start_server(fresh=True) self.mongos = mongo_manager.MongosManager(home="/tmp/mongos") self.mongos_hostname = self.mongos.start_mongos(self.confighost, [h.get_shard_string() for h in (self.shard1,self.shard2)], noauth=False, fresh=True, addShards=True) self.mongos_connection = self.mongos.connection() self.mongos_connection.drop_database('mongo_hadoop') mongo_manager.mongo_import(self.mongos_hostname, "mongo_hadoop", "yield_historical.in", JSONFILE_PATH) mongos_admindb = self.mongos_connection['admin'] mongos_admindb.command("enablesharding", "mongo_hadoop") mongos_admindb.command("shardCollection", "mongo_hadoop.yield_historical.in", key={"_id":1}) mongos_admindb.command("split", "mongo_hadoop.yield_historical.in", find={"_id":1})
def setUpClass(self): self.shard1 = mongo_manager.ReplicaSetManager(home=os.path.join(TEMPDIR, "rs0"), with_arbiter=True, num_members=3) self.shard1.start_set(fresh=True) self.shard2 = mongo_manager.ReplicaSetManager(home=os.path.join(TEMPDIR, "rs1"), with_arbiter=True, num_members=3) self.shard2.start_set(fresh=True) self.configdb = mongo_manager.StandaloneManager(home=os.path.join(TEMPDIR, 'config_db')) self.confighost = self.configdb.start_server(fresh=True) self.mongos = mongo_manager.MongosManager(home=os.path.join(TEMPDIR, 'mongos')) self.mongos_hostname = self.mongos.start_mongos(self.confighost, [h.get_shard_string() for h in (self.shard1,self.shard2)], noauth=False, fresh=True, addShards=True) self.mongos2 = mongo_manager.MongosManager(home=os.path.join(TEMPDIR, 'mongos2')) self.mongos2_hostname = self.mongos2.start_mongos(self.confighost, [h.get_shard_string() for h in (self.shard1,self.shard2)], noauth=False, fresh=True, addShards=False) self.mongos_connection = self.mongos.connection() self.mongos2_connection = self.mongos2.connection() self.mongos_connection.drop_database('mongo_hadoop') mongo_manager.mongo_import(self.mongos_hostname, "mongo_hadoop", "yield_historical.in", JSONFILE_PATH) mongos_admindb = self.mongos_connection['admin'] mongos_admindb.command("enablesharding", "mongo_hadoop") #turn off the balancer self.mongos_connection['config'].settings.update({ "_id": "balancer" }, { '$set' : { 'stopped': True } }, True ); mongos_admindb.command("shardCollection", "mongo_hadoop.yield_historical.in", key={"_id":1}) testcoll = self.mongos_connection['mongo_hadoop']['yield_historical.in'] for chunkpos in [2000, 3000, 1000, 500, 4000, 750, 250, 100, 3500, 2500, 2250, 1750]: mongos_admindb.command("split", "mongo_hadoop.yield_historical.in", middle={"_id":testcoll.find().sort("_id", 1).skip(chunkpos).limit(1)[0]['_id']}) ms_config = self.mongos_connection['config'] shards = list(ms_config.shards.find()) numchunks = ms_config.chunks.count() chunk_source = ms_config.chunks.find_one()['shard'] print "chunk source", chunk_source chunk_dest = [s['_id'] for s in shards if s['_id'] != chunk_source][0] print "chunk dest", chunk_dest #shuffle chunks around for i in xrange(0, numchunks/2): chunk_to_move = ms_config.chunks.find_one({"shard":chunk_source}) print "moving", chunk_to_move, "from", chunk_source, "to", chunk_dest try: mongos_admindb.command("moveChunk", "mongo_hadoop.yield_historical.in", find=chunk_to_move['min'], to=chunk_dest); except Exception, e: print e
def setUpClass(self): self.server = mongo_manager.StandaloneManager( home=os.path.join(TEMPDIR, "standalone1")) self.server_hostname = self.server.start_server(fresh=True) self.server.connection().drop_database('mongo_hadoop') self.server.connection()['mongo_hadoop'].set_profiling_level(2) mongo_manager.mongo_import(self.server_hostname, "mongo_hadoop", "yield_historical.in", JSONFILE_PATH) print "server is ready."
def setUpClass(self): global num_runs self.homedir = "standalone1_" + str(num_runs) self.server = mongo_manager.StandaloneManager(home=os.path.join(TEMPDIR, self.homedir)) self.server_hostname = self.server.start_server(fresh=True) self.server.connection().drop_database("mongo_hadoop") mongo_manager.mongo_import(self.server_hostname, "mongo_hadoop", "yield_historical.in", JSONFILE_PATH) num_runs += 1 print "server is ready."
def setUp(self): self.server = mongo_manager.StandaloneManager(home="/tmp/standalone1") self.server_hostname = self.server.start_server(fresh=True) self.server.connection().drop_database('mongo_hadoop') mongo_manager.mongo_import(self.server_hostname, "mongo_hadoop", "yield_historical.in", JSONFILE_PATH) print "server is ready."
def setUpClass(self): self.server = mongo_manager.StandaloneManager(home=os.path.join(TEMPDIR,"standalone1")) self.server_hostname = self.server.start_server(fresh=True) self.server.connection().drop_database('mongo_hadoop') mongo_manager.mongo_import(self.server_hostname, "mongo_hadoop", "yield_historical.in", JSONFILE_PATH) print "server is ready."
def setUpClass(self): global num_runs self.homedir = "standalone1_" + str(num_runs) self.server = mongo_manager.StandaloneManager(home=os.path.join(TEMPDIR,self.homedir)) self.server_hostname = self.server.start_server(fresh=True,noauth=self.noauth) self.server.connection().drop_database('mongo_hadoop') mongo_manager.mongo_import('localhost:' + str(self.server.port), "mongo_hadoop", "yield_historical.in", JSONFILE_PATH) num_runs += 1
def test_treasury(self): logging.info("testing multiple collection support.") mongo_manager.mongo_import(self.server_hostname, "mongo_hadoop", "yield_historical.in2", JSONFILE_PATH) PARAMS = DEFAULT_PARAMETERS.copy() PARAMS['mongo.splitter.class'] = "com.mongodb.hadoop.splitter.MultiMongoCollectionSplitter" runjob(self.server_hostname, PARAMS, input_collection=['mongo_hadoop.yield_historical.in', \ 'mongo_hadoop.yield_historical.in2']) out_col = self.server.connection()['mongo_hadoop']['yield_historical.out'] reference_doubled = [{"_id":x['_id'], "count":x['count'] * 2, "avg": (x['sum']*2) / (x['count']*2), "sum": x['sum']*2} for x in check_results] self.assertTrue(compare_results(out_col, reference_doubled)) print list(out_col.find())
def test_treasury_json_config(self): mongo_manager.mongo_import(self.server_hostname, "mongo_hadoop", "yield_historical.in3", JSONFILE_PATH) PARAMS = DEFAULT_PARAMETERS.copy() PARAMS['mongo.splitter.class'] = "com.mongodb.hadoop.splitter.MultiMongoCollectionSplitter" collection_settings = [{"mongo.input.uri":"mongodb://%s/mongo_hadoop.yield_historical.in" % self.server_hostname, "query":{"dayOfWeek":"FRIDAY"}, "mongo.splitter.class":"com.mongodb.hadoop.splitter.SingleMongoSplitter", "mongo.input.split.use_range_queries":True, "mongo.input.notimeout":True}, {"mongo.input.uri":"mongodb://%s/mongo_hadoop.yield_historical.in3" % self.server_hostname, "mongo.input.split.use_range_queries":True, "mongo.input.notimeout":True} ] #we need to escape this for the shell PARAMS["mongo.input.multi_uri.json"] = '"' + re.sub('"','\\"', json.dumps(collection_settings) ) + '"' runjob(self.server_hostname, PARAMS, input_collection=None) out_col = self.server.connection()['mongo_hadoop']['yield_historical.out'] print(list(out_col.find()))
import mongo_manager, sys try: shard1 = mongo_manager.ReplicaSetManager(home="/tmp/rs0", with_arbiter=True, num_members=3) shard1.start_set(fresh=True) shard2 = mongo_manager.ReplicaSetManager(home="/tmp/rs1", with_arbiter=True, num_members=3) shard2.start_set(fresh=True) # config server z = mongo_manager.StandaloneManager(home="/tmp/config_db") zhost = z.start_server(fresh=True) s = mongo_manager.MongosManager(home="/tmp/mongos") s.start_mongos(zhost, [h.get_shard_string() for h in (shard1,shard2)], noauth=False, fresh=True, addShards=True) mongo_manager.mongo_import(s.port, "testdb", "testcoll", "/Users/mike/projects/mongo-hadoop/examples/treasury_yield/src/main/resources/yield_historical_in.json") s_client = s.connection() s_client['admin'].command("enablesharding", "testdb") s_client['admin'].command("shardCollection", "testdb.testcoll", key={"_id":1}) sys.exit(0) except: sys.exit(1)
import mongo_manager x = mongo_manager.ReplicaSetManager(home="/tmp/rs0", with_arbiter=True, num_members=3) x.start_set(fresh=True) primary = x.get_primary()[0] mongo_manager.mongo_import(primary, "mongo_hadoop", "yield_historical.in", "/Users/mike/projects/mongo-hadoop/examples/treasury_yield/src/main/resources/yield_historical_in.json")
import mongo_manager x = mongo_manager.ReplicaSetManager(home="/tmp/rs0", with_arbiter=True, num_members=3) x.start_set(fresh=True) primary = x.get_primary()[0] mongo_manager.mongo_import( primary, "mongo_hadoop", "yield_historical.in", "/Users/mike/projects/mongo-hadoop/examples/treasury_yield/src/main/resources/yield_historical_in.json" )
def setUpClass(self): time.sleep(5) global num_runs self.shard1 = mongo_manager.ReplicaSetManager( home=os.path.join(TEMPDIR, "rs0_" + str(num_runs)), with_arbiter=True, num_members=3 ) self.shard1.start_set(fresh=True) self.shard2 = mongo_manager.ReplicaSetManager( home=os.path.join(TEMPDIR, "rs1_" + str(num_runs)), with_arbiter=True, num_members=3 ) self.shard2.start_set(fresh=True) self.configdb = mongo_manager.StandaloneManager(home=os.path.join(TEMPDIR, "config_db_" + str(num_runs))) self.confighost = self.configdb.start_server(fresh=True) self.mongos = mongo_manager.MongosManager(home=os.path.join(TEMPDIR, "mongos_" + str(num_runs))) self.mongos_hostname = self.mongos.start_mongos( self.confighost, [h.get_shard_string() for h in (self.shard1, self.shard2)], noauth=False, fresh=True, addShards=True, ) self.mongos2 = mongo_manager.MongosManager(home=os.path.join(TEMPDIR, "mongos2_" + str(num_runs))) self.mongos2_hostname = self.mongos2.start_mongos( self.confighost, [h.get_shard_string() for h in (self.shard1, self.shard2)], noauth=False, fresh=True, addShards=False, ) self.mongos_connection = self.mongos.connection() self.mongos2_connection = self.mongos2.connection() self.mongos_connection.drop_database("mongo_hadoop") mongo_manager.mongo_import(self.mongos_hostname, "mongo_hadoop", "yield_historical.in", JSONFILE_PATH) mongos_admindb = self.mongos_connection["admin"] mongos_admindb.command("enablesharding", "mongo_hadoop") self.homedirs = [x + str(num_runs) for x in ("rs0_", "rs1_", "config_db_", "mongos_", "mongos2_")] num_runs += 1 # turn off the balancer self.mongos_connection["config"].settings.update({"_id": "balancer"}, {"$set": {"stopped": True}}, True) mongos_admindb.command("shardCollection", "mongo_hadoop.yield_historical.in", key={"_id": 1}) testcoll = self.mongos_connection["mongo_hadoop"]["yield_historical.in"] for chunkpos in [2000, 3000, 1000, 500, 4000, 750, 250, 100, 3500, 2500, 2250, 1750]: mongos_admindb.command( "split", "mongo_hadoop.yield_historical.in", middle={"_id": testcoll.find().sort("_id", 1).skip(chunkpos).limit(1)[0]["_id"]}, ) ms_config = self.mongos_connection["config"] shards = list(ms_config.shards.find()) numchunks = ms_config.chunks.count() chunk_source = ms_config.chunks.find_one()["shard"] logging.info("chunk source", chunk_source) chunk_dest = [s["_id"] for s in shards if s["_id"] != chunk_source][0] logging.info("chunk dest", chunk_dest) # shuffle chunks around for i in xrange(0, numchunks / 2): chunk_to_move = ms_config.chunks.find_one({"shard": chunk_source}) logging.info("moving", chunk_to_move, "from", chunk_source, "to", chunk_dest) try: mongos_admindb.command( "moveChunk", "mongo_hadoop.yield_historical.in", find=chunk_to_move["min"], to=chunk_dest ) except Exception, e: print e
def setUpClass(self): time.sleep(5) global num_runs randstr = generate_id(size=6) self.shard1 = mongo_manager.ReplicaSetManager(home=os.path.join(TEMPDIR, "rs0_" + randstr + "_" + str(num_runs)), with_arbiter=True, num_members=3, noauth=self.noauth) self.shard1.start_set(fresh=True) self.shard2 = mongo_manager.ReplicaSetManager(home=os.path.join(TEMPDIR, "rs1_" + randstr + "_" + str(num_runs)), with_arbiter=True, num_members=3, noauth=self.noauth) self.shard2.start_set(fresh=True) self.configdb = mongo_manager.StandaloneManager(home=os.path.join(TEMPDIR, 'config_db_' + randstr + "_" + str(num_runs))) self.confighost = self.configdb.start_server(fresh=True,noauth=self.noauth) self.mongos = mongo_manager.MongosManager(home=os.path.join(TEMPDIR, 'mongos_' + randstr + "_" + str(num_runs))) self.mongos_hostname = self.mongos.start_mongos(self.confighost, [h.get_shard_string() for h in (self.shard1,self.shard2)], noauth=self.noauth, fresh=True, addShards=True) self.mongos2 = mongo_manager.MongosManager(home=os.path.join(TEMPDIR, 'mongos2_' + randstr + "_" + str(num_runs))) self.mongos2_hostname = self.mongos2.start_mongos(self.confighost, [h.get_shard_string() for h in (self.shard1,self.shard2)], noauth=self.noauth, fresh=True, addShards=False) self.mongos_connection = self.mongos.connection() self.mongos2_connection = self.mongos2.connection() self.mongos_connection.drop_database('mongo_hadoop') mongo_manager.mongo_import("localhost:" + str(self.mongos.port), "mongo_hadoop", "yield_historical.in", JSONFILE_PATH) mongos_admindb = self.mongos_connection['admin'] mongos_admindb.command("enablesharding", "mongo_hadoop") self.homedirs = [x + randstr + "_" + str(num_runs) for x in ("rs0_", "rs1_", "config_db_", "mongos_", "mongos2_")] num_runs += 1 #turn off the balancer self.mongos_connection['config'].settings.update({ "_id": "balancer" }, { '$set' : { 'stopped': True } }, True ); mongos_admindb.command("shardCollection", "mongo_hadoop.yield_historical.in", key={"_id":1}) testcoll = self.mongos_connection['mongo_hadoop']['yield_historical.in'] for chunkpos in [2000, 3000, 1000, 500, 4000, 750, 250, 100, 3500, 2500, 2250, 1750]: mongos_admindb.command("split", "mongo_hadoop.yield_historical.in", middle={"_id":testcoll.find().sort("_id", 1).skip(chunkpos).limit(1)[0]['_id']}) ms_config = self.mongos_connection['config'] shards = list(ms_config.shards.find()) numchunks = ms_config.chunks.count() chunk_source = ms_config.chunks.find_one()['shard'] logging.info("chunk source", chunk_source) chunk_dest = [s['_id'] for s in shards if s['_id'] != chunk_source][0] logging.info("chunk dest", chunk_dest) #shuffle chunks around for i in xrange(0, numchunks/2): chunk_to_move = ms_config.chunks.find_one({"shard":chunk_source}) logging.info("moving", chunk_to_move, "from", chunk_source, "to", chunk_dest) try: mongos_admindb.command("moveChunk", "mongo_hadoop.yield_historical.in", find=chunk_to_move['min'], to=chunk_dest); except Exception, e: print e