def __init__(self, uri=None): if uri: mongodb = ConnectionPool(uri=uri) else: mongodb = ConnectionPool() self.db = mongodb.Youdao.Words self.connections = 0 self.MAX_CONNECTIONS = 1024
def open_spider(self, spider): self.m = ConnectionPool(self.settings.get("MONGO")) if spider.name == "jetsetter": self.db = self.m.scripture.jsets self._processor = self.process_jset_item elif spider.name == "ustravelzoo": self._processor = self.process_tzoo_item self.db = self.m.scripture.tzoos elif spider.name == "hcom": self._processor = self.process_hotels_item self.db = self.m.scripture.hotels self.rooms_zh_db = self.m.scripture.hcom.zh.rooms self.rooms_en_db = self.m.scripture.hcom.en.rooms elif spider.name == "eventbrite": self._processor = self.process_eventbrite_item self.db = self.m.scripture.eventbrites elif spider.name == "booking": self._processor = self.process_booking_item self.db = self.m.scripture.bookings elif spider.name == "distributed_spider": self._processor = self.distributed_spider self.rds = redis.StrictRedis(host=redis_url.host, port=redis_url.port, db=db, password=redis_url.password) else: self._processor = lambda *args, **kwargs: None
def __init__(self, crawler): self.crawler = crawler mongo_uri = self.crawler.settings.get('MONGODB_URI', 'mongodb://localhost') db_name = self.crawler.settings.get('MONGODB_DB', 'scrapy_mongo_pipeline') self.ctx = ConnectionPool(mongo_uri) self.db = self.ctx[db_name]
def open_spider(self, spider): self._db_client = yield ConnectionPool(self.db_uri) self._db = self._db_client[self.db_name] self._coll = self._db[self.coll_name] yield self._coll.find_one(timeout=True) for index in self.db_index: yield self._coll.create_index(qf.sort(index)) logger.info('{storage} opened'.format(storage=self.__class__.__name__))
def open_spider(self, spider: Spider): self.cnx = ConnectionPool(self.uri, codec_options=self.codec_options) self.db = getattr(self.cnx, self.settings[MONGODB_DATABASE]) self.coll = getattr(self.db, self.settings[MONGODB_COLLECTION]) self.coll.with_options(codec_options=self.codec_options) result = yield self.create_index(spider) logger.info('Spider opened: Open the connection to MongoDB: %s', self.uri)
def test_AutoReconnect_from_primary_step_down(self): uri = "mongodb://localhost:{0}/?w={1}".format(self.ports[0], len(self.ports)) conn = ConnectionPool(uri, max_delay=5) # this will force primary to step down, triggering an AutoReconnect that bubbles up # through the connection pool to the client command = conn.admin.command(SON([('replSetStepDown', 86400), ('force', 1)])) self.assertFailure(command, AutoReconnect) yield conn.disconnect()
def loadHomeZones(): global client client = yield ConnectionPool(url) db = client.GsmSimulatedData col = db.PeopleHomeZones print("\033[92mLoading Home Zone data.....\033[0m") retVal = {} id_to_home = yield col.find() for val in id_to_home: retVal[val['id']] = val['zone'] returnValue(retVal)
def open_spider(self, spider): # Sync # self.client = pymongo.MongoClient(self.settings['MONGODB_URI']) # self.db = self.client[self.settings['MONGODB_DB']] # self.coll = self.db[self.settings['MONGODB_COLL_RAW']] # self.coll.create_index('request_url') # Async self.client = yield ConnectionPool(self.settings['MONGODB_URI']) self.db = self.client[self.settings['MONGODB_DB']] self.coll = self.db[self.settings['MONGODB_COLL_RAW']] self.coll.create_index(sort([('request_url', 1)]))
def test_ConnectionUrlParams(self): conn = ConnectionPool("mongodb://{0}:{1}/?w=2&j=true".format( mongo_host, mongo_port)) coll = conn.mydb.mycol try: with self.mock_gle() as mock: yield coll.insert({'x': 42}) mock.assert_called_once_with('mydb', w=2, j=True) finally: yield coll.drop() yield conn.disconnect()
def open_spider(self, spider: Spider): self.cnx = yield ConnectionPool(self.uri, codec_options=self.codec_options) self.db = yield getattr( self.cnx, self.settings.get(SEEDS_MONGODB_DATABASE, 'seeds')) self.coll = yield getattr( self.db, self.settings.get(SEEDS_MONGODB_COLLECTION, 'seeds')) yield self.coll.with_options(codec_options=self.codec_options) logger.info('Spider opened: Open the connection to MongoDB: %s', self.uri)
def test_SlaveOk(self): uri = "mongodb://localhost:{0}/?readPreference=secondaryPreferred".format(self.ports[1]) conn = ConnectionPool(uri) try: empty = yield conn.db.coll.find(flags=QUERY_SLAVE_OK) self.assertEqual(empty, []) server_status = yield conn.admin.command("serverStatus") _version = [int(part) for part in server_status["version"].split('.')] expected_error = AutoReconnect if _version > [4, 2] else OperationFailure yield self.assertFailure(conn.db.coll.insert({'x': 42}), expected_error) finally: yield conn.disconnect()
def example(): # tls_ctx = ServerTLSContext(privateKeyFileName='./mongodb.key', certificateFileName='./mongodb.crt') mongodb_uri = "mongodb://192.168.10.57:27017" # mongo = yield ConnectionPool(mongodb_uri, ssl_context_factory=tls_ctx) mongo = yield ConnectionPool(mongodb_uri) foo = mongo.foo # `foo` database test = foo.test # `test` collection # fetch some documents # yield test.insert({"title": "sb", "content": "sb"}) docs = yield test.find(limit=10) for doc in docs: print(doc)
def insert_item(spider_name: str, item: Type[Union[Item, dict]], connection_pool: Optional[ConnectionPool] = None): if not connection_pool: mongo = yield ConnectionPool(dync_settings['MONGO_URI']) else: mongo = connection_pool db = mongo[dync_settings.MONGO_DB_NAME] collection = db[spider_name] if isinstance(item, ImageItem): collection = db[dync_settings.IMAGE_COLLECTION] # TODO: 一个spider可能返回不同的item,需要存储在不同的collection里面 yield collection.insert(dict(item), )
def setUp(self): self.__mongod = [ Mongod(port=p, replset=self.rsname) for p in self.ports ] yield defer.gatherResults([mongo.start() for mongo in self.__mongod]) yield defer.gatherResults( [self.__check_reachable(port) for port in self.ports]) master_uri = "mongodb://localhost:{0}/?readPreference=secondaryPreferred".format( self.ports[0]) master = ConnectionPool(master_uri) yield master.admin.command("replSetInitiate", self.rsconfig) ready = False n_tries = int(self.__init_timeout / self.__ping_interval) for i in range(n_tries): yield self.__sleep(self.__ping_interval) # My practice shows that we need to query both ismaster and replSetGetStatus # to be sure that replica set is up and running, primary is elected and all # secondaries are in sync and ready to became new primary ismaster_req = master.admin.command("ismaster", check=False) replstatus_req = master.admin.command("replSetGetStatus", check=False) ismaster, replstatus = yield defer.gatherResults( [ismaster_req, replstatus_req]) initialized = replstatus["ok"] ok_states = {"PRIMARY", "SECONDARY"} states_ready = all(m["stateStr"] in ok_states for m in replstatus.get("members", [])) ready = initialized and ismaster["ismaster"] and states_ready if ready: break if not ready: yield self.tearDown() raise Exception( "ReplicaSet initialization took more than {0}s".format( self.__init_timeout)) yield master.disconnect()
def storeLocData(): client = ConnectionPool("mongodb://localhost:27017") db1 = client.GsmSimulatedData db2 = client.TestingData col = db2.PeopleLocationData persons = {} pbar = ProgressBar() idToHome = yield db1.PeopleHomeZones.find() for val in idToHome: persons[val['id']] = {'zone': val['zone']} for pid in pbar(persons.keys()): pac = yield db1.RawPackets.find(spec={'id': pid, 'tower.zone': persons[pid]['zone']}, limit=1) pac = pac[0] persons[pid]['loc'] = [pac['tower']['lat'], pac['tower']['lon']] yield col.insert_one({'id': pid, 'zone': persons[pid]['zone'], 'loc': persons[pid]['loc']})
def test_AutoReconnect(self): try: uri = "mongodb://localhost:{0}/?w={1}".format(self.ports[0], len(self.ports)) conn = ConnectionPool(uri, max_delay=5) yield conn.db.coll.insert({'x': 42}, safe=True) self.__mongod[0].kill(signal.SIGSTOP) while True: try: result = yield conn.db.coll.find_one() self.assertEqual(result['x'], 42) break except AutoReconnect: pass finally: self.__mongod[0].kill(signal.SIGCONT) yield conn.disconnect() self.flushLoggedErrors(AutoReconnect)
def connect_mongodb(host, port): """ Run :py:func:`~.setup_mongodb`. If that succeeds, connect to MongoDB via ``txmongo``. Return a txmongo ConnectionPool. :param host: host to connect to MongoDB on. :type host: str :param port: port to connect to MongoDB on. :type port: int :return: MongoDB connection pool :rtype: txmongo.connection.ConnectionPool """ setup_mongodb(host, port) uri = 'mongodb://%s:%d' % (host, port) logger.info('Connecting to MongoDB via txmongo at %s', uri) try: conn = ConnectionPool(uri=uri) except: logger.critical('Error connecting to MongoDB at %s', uri, exc_info=1) raise SystemExit(2) return conn