def setUp(self): self.__mongod = [Mongod(port=p, replset=self.rsname) for p in self.ports] yield defer.gatherResults([mongo.start() for mongo in self.__mongod]) master_uri = "mongodb://localhost:{0}/?readPreference=secondaryPreferred".format(self.ports[0]) master = ConnectionPool(master_uri) yield master.admin.command("replSetInitiate", self.rsconfig) ready = False n_tries = int(self.__init_timeout / self.__ping_interval) for i in xrange(n_tries): yield self.__sleep(self.__ping_interval) # My practice shows that we need to query both ismaster and replSetGetStatus # to be sure that replica set is up and running, primary is elected and all # secondaries are in sync and ready to became new primary ismaster_req = master.admin.command("ismaster", check=False) replstatus_req = master.admin.command("replSetGetStatus", check=False) ismaster, replstatus = yield defer.gatherResults([ismaster_req, replstatus_req]) initialized = replstatus["ok"] ok_states = set(["PRIMARY", "SECONDARY"]) states_ready = all(m["stateStr"] in ok_states for m in replstatus.get("members", [])) ready = initialized and ismaster["ismaster"] and states_ready if ready: break if not ready: yield self.tearDown() raise Exception("ReplicaSet initialization took more than {0}s".format(self.__init_timeout)) yield master.disconnect()
def __init__(self, uri=None): if uri: mongodb = ConnectionPool(uri=uri) else: mongodb = ConnectionPool() self.db = mongodb.Youdao.Words self.connections = 0 self.MAX_CONNECTIONS = 1024
def open_spider(self, spider: Spider): self.cnx = ConnectionPool(self.uri, codec_options=self.codec_options) self.db = getattr(self.cnx, self.settings[MONGODB_DATABASE]) self.coll = getattr(self.db, self.settings[MONGODB_COLLECTION]) self.coll.with_options(codec_options=self.codec_options) result = yield self.create_index(spider) logger.info('Spider opened: Open the connection to MongoDB: %s', self.uri)
def test_AutoReconnect_from_primary_step_down(self): uri = "mongodb://localhost:{0}/?w={1}".format(self.ports[0], len(self.ports)) conn = ConnectionPool(uri, max_delay=5) # this will force primary to step down, triggering an AutoReconnect that bubbles up # through the connection pool to the client command = conn.admin.command(SON([('replSetStepDown', 86400), ('force', 1)])) self.assertFailure(command, AutoReconnect) yield conn.disconnect()
def test_SlaveOk(self): uri = "mongodb://localhost:{0}/?readPreference=secondaryPreferred".format(self.ports[1]) conn = ConnectionPool(uri) try: empty = yield conn.db.coll.find(flags=QUERY_SLAVE_OK) self.assertEqual(empty, []) yield self.assertFailure(conn.db.coll.insert({'x': 42}), OperationFailure) finally: yield conn.disconnect()
def test_ConnectionUrlParams(self): conn = ConnectionPool("mongodb://{0}:{1}/?w=2&j=true".format(mongo_host, mongo_port)) coll = conn.mydb.mycol try: with self.mock_gle() as mock: yield coll.insert({'x': 42}) mock.assert_called_once_with('mydb', w=2, j=True) finally: yield coll.drop() yield conn.disconnect()
def test_AutoReconnect_from_primary_step_down(self): self.patch(_Connection, 'maxDelay', 5) uri = "mongodb://localhost:{0}/?w={1}".format(self.ports[0], len(self.ports)) conn = ConnectionPool(uri) # this will force primary to step down, triggering an AutoReconnect that bubbles up # through the connection pool to the client command = conn.admin.command(SON([('replSetStepDown', 86400), ('force', 1)])) self.assertFailure(command, AutoReconnect) yield conn.disconnect()
def test_ConnectionUrlParams(self): conn = ConnectionPool("mongodb://{0}:{1}/?w=2&j=true".format( mongo_host, mongo_port)) coll = conn.mydb.mycol try: with self.mock_gle() as mock: yield coll.insert({'x': 42}) mock.assert_called_once_with('mydb', w=2, j=True) finally: yield coll.drop() yield conn.disconnect()
def test_SlaveOk(self): uri = "mongodb://localhost:{0}/?readPreference=secondaryPreferred".format(self.ports[1]) conn = ConnectionPool(uri) try: empty = yield conn.db.coll.find(flags=QUERY_SLAVE_OK) self.assertEqual(empty, []) server_status = yield conn.admin.command("serverStatus") _version = [int(part) for part in server_status["version"].split('.')] expected_error = AutoReconnect if _version > [4, 2] else OperationFailure yield self.assertFailure(conn.db.coll.insert({'x': 42}), expected_error) finally: yield conn.disconnect()
def __init__(self, config): """Opens a MongoDB connection pool""" # Report connection error only once self.report_connection_error = True mongo_url, mongo_db, mongo_collection = config if 'mongo:' in mongo_url: mongo_url = mongo_url.replace('mongo:', 'mongodb:') # Setup MongoDB Connection self.mongo_url = mongo_url self.connection = ConnectionPool(mongo_url, connect_timeout=5) self.mongo_db = self.connection[mongo_db] self.collection = self.mongo_db[mongo_collection]
def test_InvalidRSName(self): uri = "mongodb://localhost:{0}/?replicaSet={1}_X".format(self.ports[0], self.rsname) ok = defer.Deferred() def proto_fail(self, exception): conn.disconnect() if type(exception) == ConfigurationError: ok.callback(None) else: ok.errback(exception) self.patch(MongoProtocol, "fail", proto_fail) conn = ConnectionPool(uri) @defer.inlineCallbacks def do_query(): yield conn.db.coll.insert({'x': 42}) raise Exception("You shall not pass!") yield defer.DeferredList([ok, do_query()], fireOnOneCallback=True, fireOnOneErrback=True) self.flushLoggedErrors(AutoReconnect)
def open_spider(self, spider): self.m = ConnectionPool(self.settings.get("MONGO")) if spider.name == "jetsetter": self.db = self.m.scripture.jsets self._processor = self.process_jset_item elif spider.name == "ustravelzoo": self._processor = self.process_tzoo_item self.db = self.m.scripture.tzoos elif spider.name == "hcom": self._processor = self.process_hotels_item self.db = self.m.scripture.hotels self.rooms_zh_db = self.m.scripture.hcom.zh.rooms self.rooms_en_db = self.m.scripture.hcom.en.rooms elif spider.name == "eventbrite": self._processor = self.process_eventbrite_item self.db = self.m.scripture.eventbrites elif spider.name == "booking": self._processor = self.process_booking_item self.db = self.m.scripture.bookings elif spider.name == "distributed_spider": self._processor = self.distributed_spider self.rds = redis.StrictRedis(host=redis_url.host, port=redis_url.port, db=db, password=redis_url.password) else: self._processor = lambda *args, **kwargs: None
def __init__(self, crawler): self.crawler = crawler mongo_uri = self.crawler.settings.get('MONGODB_URI', 'mongodb://localhost') db_name = self.crawler.settings.get('MONGODB_DB', 'scrapy_mongo_pipeline') self.ctx = ConnectionPool(mongo_uri) self.db = self.ctx[db_name]
def open_spider(self, spider): self._db_client = yield ConnectionPool(self.db_uri) self._db = self._db_client[self.db_name] self._coll = self._db[self.coll_name] yield self._coll.find_one(timeout=True) for index in self.db_index: yield self._coll.create_index(qf.sort(index)) logger.info('{storage} opened'.format(storage=self.__class__.__name__))
def onConnect(self): self.url = 'mongodb://localhost:27017' self.mongo = yield ConnectionPool(self.url) self.join(self.config.realm) self.title = "Test Data" m = hashlib.md5() m.update(self.title) self._id = m.hexdigest()
def setUp(self): self.__mongod = [ Mongod(port=p, replset=self.rsname) for p in self.ports ] yield defer.gatherResults([mongo.start() for mongo in self.__mongod]) yield defer.gatherResults( [self.__check_reachable(port) for port in self.ports]) master_uri = "mongodb://localhost:{0}/?readPreference=secondaryPreferred".format( self.ports[0]) master = ConnectionPool(master_uri) yield master.admin.command("replSetInitiate", self.rsconfig) ready = False n_tries = int(self.__init_timeout / self.__ping_interval) for i in range(n_tries): yield self.__sleep(self.__ping_interval) # My practice shows that we need to query both ismaster and replSetGetStatus # to be sure that replica set is up and running, primary is elected and all # secondaries are in sync and ready to became new primary ismaster_req = master.admin.command("ismaster", check=False) replstatus_req = master.admin.command("replSetGetStatus", check=False) ismaster, replstatus = yield defer.gatherResults( [ismaster_req, replstatus_req]) initialized = replstatus["ok"] ok_states = {"PRIMARY", "SECONDARY"} states_ready = all(m["stateStr"] in ok_states for m in replstatus.get("members", [])) ready = initialized and ismaster["ismaster"] and states_ready if ready: break if not ready: yield self.tearDown() raise Exception( "ReplicaSet initialization took more than {0}s".format( self.__init_timeout)) yield master.disconnect()
def open_spider(self, spider): # Sync # self.client = pymongo.MongoClient(self.settings['MONGODB_URI']) # self.db = self.client[self.settings['MONGODB_DB']] # self.coll = self.db[self.settings['MONGODB_COLL_RAW']] # self.coll.create_index('request_url') # Async self.client = yield ConnectionPool(self.settings['MONGODB_URI']) self.db = self.client[self.settings['MONGODB_DB']] self.coll = self.db[self.settings['MONGODB_COLL_RAW']] self.coll.create_index(sort([('request_url', 1)]))
def loadHomeZones(): global client client = yield ConnectionPool(url) db = client.GsmSimulatedData col = db.PeopleHomeZones print("\033[92mLoading Home Zone data.....\033[0m") retVal = {} id_to_home = yield col.find() for val in id_to_home: retVal[val['id']] = val['zone'] returnValue(retVal)
def open_spider(self, spider: Spider): self.cnx = yield ConnectionPool(self.uri, codec_options=self.codec_options) self.db = yield getattr( self.cnx, self.settings.get(SEEDS_MONGODB_DATABASE, 'seeds')) self.coll = yield getattr( self.db, self.settings.get(SEEDS_MONGODB_COLLECTION, 'seeds')) yield self.coll.with_options(codec_options=self.codec_options) logger.info('Spider opened: Open the connection to MongoDB: %s', self.uri)
def test_AutoReconnect(self): try: uri = "mongodb://localhost:{0}/?w={1}".format(self.ports[0], len(self.ports)) conn = ConnectionPool(uri, max_delay=5) yield conn.db.coll.insert({'x': 42}, safe=True) self.__mongod[0].kill(signal.SIGSTOP) while True: try: result = yield conn.db.coll.find_one() self.assertEqual(result['x'], 42) break except AutoReconnect: pass finally: self.__mongod[0].kill(signal.SIGCONT) yield conn.disconnect() self.flushLoggedErrors(AutoReconnect)
def test_AutoReconnect(self): self.patch(_Connection, 'maxDelay', 5) try: uri = "mongodb://localhost:{0}/?w={1}".format(self.ports[0], len(self.ports)) conn = ConnectionPool(uri) yield conn.db.coll.insert({'x': 42}, safe=True) yield self.__mongod[0].stop() while True: try: result = yield conn.db.coll.find_one() self.assertEqual(result['x'], 42) break except AutoReconnect: pass finally: yield conn.disconnect() self.flushLoggedErrors(AutoReconnect)
def test_TimeExceeded_insert(self): try: uri = "mongodb://localhost:{0}/?w={1}".format(self.ports[0], len(self.ports)) conn = ConnectionPool(uri, retry_delay=3, max_delay=5) yield conn.db.coll.insert({'x': 42}, safe=True) self.__mongod[0].kill(signal.SIGSTOP) while True: try: yield conn.db.coll.insert({'y': 42}, safe=True, timeout=2) self.fail("TimeExceeded not raised!") except TimeExceeded: break # this is what we should have returned except AutoReconnect: pass finally: self.__mongod[0].kill(signal.SIGCONT) yield conn.disconnect() self.flushLoggedErrors(AutoReconnect)
def test_TimeExceeded_insert(self): self.patch(_Connection, 'maxDelay', 5) try: uri = "mongodb://localhost:{0}/?w={1}".format(self.ports[0], len(self.ports)) conn = ConnectionPool(uri, initial_delay=3) yield conn.db.coll.insert({'x': 42}, safe=True) yield self.__mongod[0].stop() while True: try: yield conn.db.coll.insert({'y': 42}, safe=True, timeout=2) self.fail("TimeExceeded not raised!") except TimeExceeded: break # this is what we should have returned except AutoReconnect: pass finally: yield conn.disconnect() self.flushLoggedErrors(AutoReconnect)
def example(): # tls_ctx = ServerTLSContext(privateKeyFileName='./mongodb.key', certificateFileName='./mongodb.crt') mongodb_uri = "mongodb://192.168.10.57:27017" # mongo = yield ConnectionPool(mongodb_uri, ssl_context_factory=tls_ctx) mongo = yield ConnectionPool(mongodb_uri) foo = mongo.foo # `foo` database test = foo.test # `test` collection # fetch some documents # yield test.insert({"title": "sb", "content": "sb"}) docs = yield test.find(limit=10) for doc in docs: print(doc)
def example(): tls_ctx = ServerTLSContext(privateKeyFileName='./mongodb.key', certificateFileName='./mongodb.crt') mongodb_uri = "mongodb://localhost:27017" mongo = yield ConnectionPool(mongodb_uri, ssl_context_factory=tls_ctx) foo = mongo.foo # `foo` database test = foo.test # `test` collection # fetch some documents docs = yield test.find(limit=10) for doc in docs: print doc
def open_spider(self, spider): # input instance of ssl_context_factory as kwargs if inspect.isclass(self.connection_kwargs.get('ssl_context_factory')): self.connection_kwargs['ssl_context_factory'] = \ self.connection_kwargs['ssl_context_factory']() self._db_client = yield ConnectionPool(self.db_uri, **self.connection_kwargs) self._db = self._db_client[self.db_name] self._coll = self._db[self.coll_name] yield self._coll.find_one(timeout=True) for index in self.db_index: yield self._coll.create_index(qf.sort(index)) self.logger.info( '{storage} opened'.format(storage=self.__class__.__name__))
def insert_item(spider_name: str, item: Type[Union[Item, dict]], connection_pool: Optional[ConnectionPool] = None): if not connection_pool: mongo = yield ConnectionPool(dync_settings['MONGO_URI']) else: mongo = connection_pool db = mongo[dync_settings.MONGO_DB_NAME] collection = db[spider_name] if isinstance(item, ImageItem): collection = db[dync_settings.IMAGE_COLLECTION] # TODO: 一个spider可能返回不同的item,需要存储在不同的collection里面 yield collection.insert(dict(item), )
def storeLocData(): client = ConnectionPool("mongodb://localhost:27017") db1 = client.GsmSimulatedData db2 = client.TestingData col = db2.PeopleLocationData persons = {} pbar = ProgressBar() idToHome = yield db1.PeopleHomeZones.find() for val in idToHome: persons[val['id']] = {'zone': val['zone']} for pid in pbar(persons.keys()): pac = yield db1.RawPackets.find(spec={'id': pid, 'tower.zone': persons[pid]['zone']}, limit=1) pac = pac[0] persons[pid]['loc'] = [pac['tower']['lat'], pac['tower']['lon']] yield col.insert_one({'id': pid, 'zone': persons[pid]['zone'], 'loc': persons[pid]['loc']})
def connect_mongodb(host, port): """ Run :py:func:`~.setup_mongodb`. If that succeeds, connect to MongoDB via ``txmongo``. Return a txmongo ConnectionPool. :param host: host to connect to MongoDB on. :type host: str :param port: port to connect to MongoDB on. :type port: int :return: MongoDB connection pool :rtype: txmongo.connection.ConnectionPool """ setup_mongodb(host, port) uri = 'mongodb://%s:%d' % (host, port) logger.info('Connecting to MongoDB via txmongo at %s', uri) try: conn = ConnectionPool(uri=uri) except: logger.critical('Error connecting to MongoDB at %s', uri, exc_info=1) raise SystemExit(2) return conn
def connect(self): tls_ctx = self.ServerTLSContext(privateKeyFileName=self.privkey, certificateFileName=self.certfile) self.db = ConnectionPool(self.connect_url, ssl_context_factory=tls_ctx)
def getConnection(url=url): global cursor cursor = yield ConnectionPool(url)
def __init__(self, db_name, pool_size=10, **kwargs): logging.debug("Creating DB ConnectionPool(pool_size=%d, %s)", pool_size, str(kwargs)) self._pool = ConnectionPool(pool_size=pool_size, **kwargs) self._db = self.pool[db_name]
def __check_reachable(self, port): uri = "mongodb://localhost:{0}/?readPreference=secondaryPreferred".format(port) conn = ConnectionPool(uri) yield conn.admin.command("ismaster", check=False) yield conn.disconnect()
class TestCancelIntegrated(unittest.TestCase): def setUp(self): self.conn = ConnectionPool() self.db = self.conn.db self.coll = self.db.coll @defer.inlineCallbacks def tearDown(self): yield self.coll.drop() yield self.conn.disconnect() @defer.inlineCallbacks def test_integration(self): # Our ConnectionPool is not actually connected yet, so on this # stage operations can be safely cancelled -- they won't be # sent to MongoDB at all. This test checks this. d1 = self.coll.insert_one({'x': 1}) d2 = self.coll.insert_one({'x': 2}) d3 = self.coll.insert_one({'x': 3}) d4 = self.coll.insert_one({'x': 4}) d1.cancel() d3.cancel() yield d4 self.failureResultOf(d1, defer.CancelledError) self.assertTrue(d2.called) self.failureResultOf(d3, defer.CancelledError) docs = yield self.coll.distinct('x') self.assertEqual(set(docs), {2, 4}) @defer.inlineCallbacks def test_remove(self): # Lets test cancellation of some dangerous operation for the peace # of mind. NB: remove can be cancelled only because ConnectionPool # is not connected yet. for i in range(10): self.coll.insert_one({'x': i}) d1 = self.coll.remove({'x': {"$lt": 3}}) d2 = self.coll.remove({'x': {"$gte": 3, "$lt": 6}}) d3 = self.coll.remove({'x': {"$gte": 6, "$lt": 9}}) d2.cancel() yield d3 self.assertTrue(d1.called) self.failureResultOf(d2, defer.CancelledError) x = yield self.coll.distinct('x') self.assertEqual(set(x), {3, 4, 5, 9}) @defer.inlineCallbacks def test_no_way(self): # If ConnectionPool picks already active connection, the query is sent # to MongoDB immediately and there is no way to cancel it yield self.coll.count() d = self.coll.insert({'x': 42}) d.cancel() yield _delay(1) self.failureResultOf(d, defer.CancelledError) cnt = yield self.coll.count() self.assertEqual(cnt, 1)
def setUp(self): self.conn = ConnectionPool() self.db = self.conn.db self.coll = self.db.coll