Пример #1
0
    def test_disconnect(self):
        c = MongoClient(host, port)
        coll = c.pymongo_test.bar

        c.disconnect()
        c.disconnect()

        coll.count()

        c.disconnect()
        c.disconnect()

        coll.count()
Пример #2
0
    def test_disconnect(self):
        c = MongoClient(host, port)
        coll = c.pymongo_test.bar

        c.disconnect()
        c.disconnect()

        coll.count()

        c.disconnect()
        c.disconnect()

        coll.count()
Пример #3
0
    def test_kill_cursors_warning(self):
        # If kill_cursors is called while the client is disconnected, it
        # can't risk taking the lock to reconnect, in case it's being called
        # from Cursor.__del__, see PYTHON-799. Test that it shows a warning
        # in this case.
        client = MongoClient(host, port)
        collection = client.pymongo_test.test
        collection.insert({} for _ in range(4))
        cursor = collection.find().batch_size(1)
        cursor.next()
        client.disconnect()
        ctx = catch_warnings()
        try:
            warnings.simplefilter("error", UserWarning)
            self.assertRaises(UserWarning, cursor.close)
        finally:
            ctx.exit()

        # Reconnect.
        collection.find_one()
        cursor.close()
Пример #4
0
    def test_kill_cursors_warning(self):
        # If kill_cursors is called while the client is disconnected, it
        # can't risk taking the lock to reconnect, in case it's being called
        # from Cursor.__del__, see PYTHON-799. Test that it shows a warning
        # in this case.
        client = MongoClient(host, port)
        collection = client.pymongo_test.test
        collection.insert({} for _ in range(4))
        cursor = collection.find().batch_size(1)
        cursor.next()
        client.disconnect()
        ctx = catch_warnings()
        try:
            warnings.simplefilter("error", UserWarning)
            self.assertRaises(UserWarning, cursor.close)
        finally:
            ctx.exit()

        # Reconnect.
        collection.find_one()
        cursor.close()
Пример #5
0
    def test_copy_db(self):
        c = MongoClient(host, port)
        # Due to SERVER-2329, databases may not disappear
        # from a master in a master-slave pair.
        if server_is_master_with_slave(c):
            raise SkipTest("SERVER-2329")
        # We test copy twice; once starting in a request and once not. In
        # either case the copy should succeed (because it starts a request
        # internally) and should leave us in the same state as before the copy.
        c.start_request()

        self.assertRaises(TypeError, c.copy_database, 4, "foo")
        self.assertRaises(TypeError, c.copy_database, "foo", 4)

        self.assertRaises(InvalidName, c.copy_database, "foo", "$foo")

        c.pymongo_test.test.drop()
        c.drop_database("pymongo_test1")
        c.drop_database("pymongo_test2")
        self.assertFalse("pymongo_test1" in c.database_names())
        self.assertFalse("pymongo_test2" in c.database_names())

        c.pymongo_test.test.insert({"foo": "bar"})

        c.copy_database("pymongo_test", "pymongo_test1")
        # copy_database() didn't accidentally end the request
        self.assertTrue(c.in_request())

        self.assertTrue("pymongo_test1" in c.database_names())
        self.assertEqual("bar", c.pymongo_test1.test.find_one()["foo"])

        c.end_request()
        self.assertFalse(c.in_request())
        c.copy_database("pymongo_test", "pymongo_test2",
                        "%s:%d" % (host, port))
        # copy_database() didn't accidentally restart the request
        self.assertFalse(c.in_request())

        self.assertTrue("pymongo_test2" in c.database_names())
        self.assertEqual("bar", c.pymongo_test2.test.find_one()["foo"])

        # See SERVER-6427 for mongos
        if (version.at_least(c, (1, 3, 3, 1)) and
            not is_mongos(c) and server_started_with_auth(c)):

            c.drop_database("pymongo_test1")

            c.admin.add_user("admin", "password")
            c.admin.authenticate("admin", "password")
            try:
                c.pymongo_test.add_user("mike", "password")

                self.assertRaises(OperationFailure, c.copy_database,
                                  "pymongo_test", "pymongo_test1",
                                  username="******", password="******")
                self.assertFalse("pymongo_test1" in c.database_names())

                self.assertRaises(OperationFailure, c.copy_database,
                                  "pymongo_test", "pymongo_test1",
                                  username="******", password="******")
                self.assertFalse("pymongo_test1" in c.database_names())

                c.copy_database("pymongo_test", "pymongo_test1",
                                username="******", password="******")
                self.assertTrue("pymongo_test1" in c.database_names())
                self.assertEqual("bar", c.pymongo_test1.test.find_one()["foo"])
            finally:
                # Cleanup
                remove_all_users(c.pymongo_test)
                c.admin.remove_user("admin")
                c.disconnect()
Пример #6
0
    def test_copy_db(self):
        c = MongoClient(host, port)
        # Due to SERVER-2329, databases may not disappear
        # from a master in a master-slave pair.
        if server_is_master_with_slave(c):
            raise SkipTest("SERVER-2329")
        # We test copy twice; once starting in a request and once not. In
        # either case the copy should succeed (because it starts a request
        # internally) and should leave us in the same state as before the copy.
        c.start_request()

        self.assertRaises(TypeError, c.copy_database, 4, "foo")
        self.assertRaises(TypeError, c.copy_database, "foo", 4)

        self.assertRaises(InvalidName, c.copy_database, "foo", "$foo")

        c.pymongo_test.test.drop()
        c.drop_database("pymongo_test1")
        c.drop_database("pymongo_test2")
        self.assertFalse("pymongo_test1" in c.database_names())
        self.assertFalse("pymongo_test2" in c.database_names())

        c.pymongo_test.test.insert({"foo": "bar"})

        c.copy_database("pymongo_test", "pymongo_test1")
        # copy_database() didn't accidentally end the request
        self.assertTrue(c.in_request())

        self.assertTrue("pymongo_test1" in c.database_names())
        self.assertEqual("bar", c.pymongo_test1.test.find_one()["foo"])

        c.end_request()
        self.assertFalse(c.in_request())
        c.copy_database("pymongo_test", "pymongo_test2",
                        "%s:%d" % (host, port))
        # copy_database() didn't accidentally restart the request
        self.assertFalse(c.in_request())

        self.assertTrue("pymongo_test2" in c.database_names())
        self.assertEqual("bar", c.pymongo_test2.test.find_one()["foo"])

        # See SERVER-6427 for mongos
        if (version.at_least(c, (1, 3, 3, 1)) and not is_mongos(c)
                and server_started_with_auth(c)):

            c.drop_database("pymongo_test1")

            c.admin.add_user("admin", "password")
            c.admin.authenticate("admin", "password")
            try:
                c.pymongo_test.add_user("mike", "password")

                self.assertRaises(OperationFailure,
                                  c.copy_database,
                                  "pymongo_test",
                                  "pymongo_test1",
                                  username="******",
                                  password="******")
                self.assertFalse("pymongo_test1" in c.database_names())

                self.assertRaises(OperationFailure,
                                  c.copy_database,
                                  "pymongo_test",
                                  "pymongo_test1",
                                  username="******",
                                  password="******")
                self.assertFalse("pymongo_test1" in c.database_names())

                c.copy_database("pymongo_test",
                                "pymongo_test1",
                                username="******",
                                password="******")
                self.assertTrue("pymongo_test1" in c.database_names())
                self.assertEqual("bar", c.pymongo_test1.test.find_one()["foo"])
            finally:
                # Cleanup
                remove_all_users(c.pymongo_test)
                c.admin.remove_user("admin")
                c.disconnect()
Пример #7
0
class MongoDBStorage(object):
	""" MongoDB item pipeline """

	def __init__(self, settings, stats, **kwargs):
		self.stats = stats
		if not settings.get('MONGODB_DATABASE')
			raise NotConfigured
		if not settings.get('MONGODB_COLLECTION'):
			raise NotConfigured

		self._uri = settings.get('MONGODB_URI', 'mongodb://localhost:27017')
		self._database = settings.get('MONGODB_DATABASE')
		self._collection = settings.get('MONGODB_COLLECTION')

		self._fsync = settings.getbool('MONGODB_FSYNC', False)
		self._replica_set = settings.get('MONGODB_REPLICA_SET', None)
		self._write_concern = settings.getint('MONGODB_WRITE_CONCERN', 0)

		self._unique_key = settings.get('MONGODB_UNIQUE_KEY', None)
		self._ignore_null = settings.getbool('MONGODB_IGNORE_NULL', False)
		self._upsert = settings.getbool('MONGODB_UPSERT', True)

	@classmethod
	def from_crawler(cls, crawler):
		return cls(crawler.settings, crawler.stats)

	def open_spider(self, spider):
		self._build_unique_key()

		if self._replica_set is not None:
			self.connection = MongoReplicaSetClient(
				self._uri,
				replicaSet=self._replica_set,
				w=self._write_concern,
				fsync=self._fsync,
				read_preference=ReadPreference.PRIMARY_PREFERRED)
		else:
			self.connection = MongoClient(
				self._uri,
				fsync=self._fsync,
				read_preference=ReadPreference.PRIMARY)

		self.database = self.connection[self._database]
		self.collection = self.database[self._collection]

		log.msg('Connected to MongoDB "%s", using "%s/%s"' %
			self._uri, self._database, self._collection)

		# ensure index
		if self._unique_key:
			log.msg('Creating index for key %s' % self._unique_key)
			self.collection.ensure_index(self._unique_key.items(), unique=True, sparse=True)

	def close_spider(self, spider):
		del self.collection
		self.connection.disconnect()
		del self.database

	def process_item(self, item, spider):
		self.stats.inc_value('mongostorage/total_item_count') # (duplicate counts with multiple mongo pipelines)

		mongoitem = copy.deepcopy(item) # don't modify original

		idx = None
		if self._unique_key:
			idx = dict([(k, mongoitem[k]) for k, _ in self._unique_key.items()])

		mongoitem = self.verify_item(mongoitem, self.collection, idx, spider)
		if not mongoitem:
			return item

		try:
			dictitem = dict(mongoitem)
		except ValueError:
			return item

		self.stats.inc_value('mongostorage/item_count')

		if self._ignore_null:
			# do not insert None values
			for idx, v in dictitem.items():
				if v is None:
					del dictitem[idx]

		if self._unique_key is None: #and dictitem['unique_key'] is None:
			try:
				self.insert_item(dictitem, self.collection, spider)
				log.msg('Item inserted in MongoDB database %s/%s' % (self._database, self._collection),
					level=log.DEBUG, spider=spider)
				self.stats.inc_value('mongostorage/insert_count')
			except errors.DuplicateKeyError:
				self.stats.inc_value('mongostorage/duplicate_count')
		else:
			# TODO: check return value for success?
			try:
				self.update_item(idx, dictitem, self.collection, spider)
				log.msg('Item updated in MongoDB database %s/%s' % (self._database, self._collection),
					level=log.DEBUG, spider=spider)
				self.stats.inc_value('mongostorage/update_count')
			except errors.DuplicateKeyError:
				self.stats.inc_value('mongostorage/duplicate_count')

		self.stats.inc_value('mongostorage/success_count')

		del dictitem
		del mongoitem
		return item

	def verify_item(self, item, db, idx, spider):
		""" Verify and pre-process item.

		Override in subclass.
		"""
		return item

		''' Example:
		if db.find_one(idx):
			log.msg('Item already exists in MongoDB for key %s' % (idx),
				level=log.DEBUG, spider=spider)
			self.stats.inc_value('mongostorage/duplicate_count')
		else:
			return item
		'''

	def insert_item(self, item, db, spider):
		""" Insert database item.

		Override in subclass.
		"""
		db.insert(item, continue_on_error=True)

	def update_item(self, key, item, db, spider):
		""" Update/upsert database item.

		Override in subclass.
		"""
		db.update(key, item, upsert=self._upsert)

	def _build_unique_key(self):
		if self._unique_key:
			if isinstance(key, basestring):
				self._unique_key = {self._unique_key: ASCENDING}
			if isinstance(self._unique_key, list):
				try:
					self._unique_key = dict(self._unique_key)
				except ValueError:
					self._unique_key = dict([(x, ASCENDING) if isinstance(x, basestring) else x for x in self._unique_key])
			if not isinstance(self._unique_key, dict):
				raise AttributeError

			for key, value in self._unique_key.items():
				if isinstance(value, bool):
					if value:
						value = ASCENDING
					else:
						value = DESCENDING
				if isinstance(value, basestring):
					if value.lower()[:1] == 'a':
						value = ASCENDING
					else:
						value = DESCENDING
				# if int, assume it's ASC(1) or DESC(-1) already
				self._unique_key.update({key: value})