def test_disconnect(self): c = MongoClient(host, port) coll = c.pymongo_test.bar c.disconnect() c.disconnect() coll.count() c.disconnect() c.disconnect() coll.count()
def test_disconnect(self): c = MongoClient(host, port) coll = c.pymongo_test.bar c.disconnect() c.disconnect() coll.count() c.disconnect() c.disconnect() coll.count()
def test_kill_cursors_warning(self): # If kill_cursors is called while the client is disconnected, it # can't risk taking the lock to reconnect, in case it's being called # from Cursor.__del__, see PYTHON-799. Test that it shows a warning # in this case. client = MongoClient(host, port) collection = client.pymongo_test.test collection.insert({} for _ in range(4)) cursor = collection.find().batch_size(1) cursor.next() client.disconnect() ctx = catch_warnings() try: warnings.simplefilter("error", UserWarning) self.assertRaises(UserWarning, cursor.close) finally: ctx.exit() # Reconnect. collection.find_one() cursor.close()
def test_kill_cursors_warning(self): # If kill_cursors is called while the client is disconnected, it # can't risk taking the lock to reconnect, in case it's being called # from Cursor.__del__, see PYTHON-799. Test that it shows a warning # in this case. client = MongoClient(host, port) collection = client.pymongo_test.test collection.insert({} for _ in range(4)) cursor = collection.find().batch_size(1) cursor.next() client.disconnect() ctx = catch_warnings() try: warnings.simplefilter("error", UserWarning) self.assertRaises(UserWarning, cursor.close) finally: ctx.exit() # Reconnect. collection.find_one() cursor.close()
def test_copy_db(self): c = MongoClient(host, port) # Due to SERVER-2329, databases may not disappear # from a master in a master-slave pair. if server_is_master_with_slave(c): raise SkipTest("SERVER-2329") # We test copy twice; once starting in a request and once not. In # either case the copy should succeed (because it starts a request # internally) and should leave us in the same state as before the copy. c.start_request() self.assertRaises(TypeError, c.copy_database, 4, "foo") self.assertRaises(TypeError, c.copy_database, "foo", 4) self.assertRaises(InvalidName, c.copy_database, "foo", "$foo") c.pymongo_test.test.drop() c.drop_database("pymongo_test1") c.drop_database("pymongo_test2") self.assertFalse("pymongo_test1" in c.database_names()) self.assertFalse("pymongo_test2" in c.database_names()) c.pymongo_test.test.insert({"foo": "bar"}) c.copy_database("pymongo_test", "pymongo_test1") # copy_database() didn't accidentally end the request self.assertTrue(c.in_request()) self.assertTrue("pymongo_test1" in c.database_names()) self.assertEqual("bar", c.pymongo_test1.test.find_one()["foo"]) c.end_request() self.assertFalse(c.in_request()) c.copy_database("pymongo_test", "pymongo_test2", "%s:%d" % (host, port)) # copy_database() didn't accidentally restart the request self.assertFalse(c.in_request()) self.assertTrue("pymongo_test2" in c.database_names()) self.assertEqual("bar", c.pymongo_test2.test.find_one()["foo"]) # See SERVER-6427 for mongos if (version.at_least(c, (1, 3, 3, 1)) and not is_mongos(c) and server_started_with_auth(c)): c.drop_database("pymongo_test1") c.admin.add_user("admin", "password") c.admin.authenticate("admin", "password") try: c.pymongo_test.add_user("mike", "password") self.assertRaises(OperationFailure, c.copy_database, "pymongo_test", "pymongo_test1", username="******", password="******") self.assertFalse("pymongo_test1" in c.database_names()) self.assertRaises(OperationFailure, c.copy_database, "pymongo_test", "pymongo_test1", username="******", password="******") self.assertFalse("pymongo_test1" in c.database_names()) c.copy_database("pymongo_test", "pymongo_test1", username="******", password="******") self.assertTrue("pymongo_test1" in c.database_names()) self.assertEqual("bar", c.pymongo_test1.test.find_one()["foo"]) finally: # Cleanup remove_all_users(c.pymongo_test) c.admin.remove_user("admin") c.disconnect()
def test_copy_db(self): c = MongoClient(host, port) # Due to SERVER-2329, databases may not disappear # from a master in a master-slave pair. if server_is_master_with_slave(c): raise SkipTest("SERVER-2329") # We test copy twice; once starting in a request and once not. In # either case the copy should succeed (because it starts a request # internally) and should leave us in the same state as before the copy. c.start_request() self.assertRaises(TypeError, c.copy_database, 4, "foo") self.assertRaises(TypeError, c.copy_database, "foo", 4) self.assertRaises(InvalidName, c.copy_database, "foo", "$foo") c.pymongo_test.test.drop() c.drop_database("pymongo_test1") c.drop_database("pymongo_test2") self.assertFalse("pymongo_test1" in c.database_names()) self.assertFalse("pymongo_test2" in c.database_names()) c.pymongo_test.test.insert({"foo": "bar"}) c.copy_database("pymongo_test", "pymongo_test1") # copy_database() didn't accidentally end the request self.assertTrue(c.in_request()) self.assertTrue("pymongo_test1" in c.database_names()) self.assertEqual("bar", c.pymongo_test1.test.find_one()["foo"]) c.end_request() self.assertFalse(c.in_request()) c.copy_database("pymongo_test", "pymongo_test2", "%s:%d" % (host, port)) # copy_database() didn't accidentally restart the request self.assertFalse(c.in_request()) self.assertTrue("pymongo_test2" in c.database_names()) self.assertEqual("bar", c.pymongo_test2.test.find_one()["foo"]) # See SERVER-6427 for mongos if (version.at_least(c, (1, 3, 3, 1)) and not is_mongos(c) and server_started_with_auth(c)): c.drop_database("pymongo_test1") c.admin.add_user("admin", "password") c.admin.authenticate("admin", "password") try: c.pymongo_test.add_user("mike", "password") self.assertRaises(OperationFailure, c.copy_database, "pymongo_test", "pymongo_test1", username="******", password="******") self.assertFalse("pymongo_test1" in c.database_names()) self.assertRaises(OperationFailure, c.copy_database, "pymongo_test", "pymongo_test1", username="******", password="******") self.assertFalse("pymongo_test1" in c.database_names()) c.copy_database("pymongo_test", "pymongo_test1", username="******", password="******") self.assertTrue("pymongo_test1" in c.database_names()) self.assertEqual("bar", c.pymongo_test1.test.find_one()["foo"]) finally: # Cleanup remove_all_users(c.pymongo_test) c.admin.remove_user("admin") c.disconnect()
class MongoDBStorage(object): """ MongoDB item pipeline """ def __init__(self, settings, stats, **kwargs): self.stats = stats if not settings.get('MONGODB_DATABASE') raise NotConfigured if not settings.get('MONGODB_COLLECTION'): raise NotConfigured self._uri = settings.get('MONGODB_URI', 'mongodb://localhost:27017') self._database = settings.get('MONGODB_DATABASE') self._collection = settings.get('MONGODB_COLLECTION') self._fsync = settings.getbool('MONGODB_FSYNC', False) self._replica_set = settings.get('MONGODB_REPLICA_SET', None) self._write_concern = settings.getint('MONGODB_WRITE_CONCERN', 0) self._unique_key = settings.get('MONGODB_UNIQUE_KEY', None) self._ignore_null = settings.getbool('MONGODB_IGNORE_NULL', False) self._upsert = settings.getbool('MONGODB_UPSERT', True) @classmethod def from_crawler(cls, crawler): return cls(crawler.settings, crawler.stats) def open_spider(self, spider): self._build_unique_key() if self._replica_set is not None: self.connection = MongoReplicaSetClient( self._uri, replicaSet=self._replica_set, w=self._write_concern, fsync=self._fsync, read_preference=ReadPreference.PRIMARY_PREFERRED) else: self.connection = MongoClient( self._uri, fsync=self._fsync, read_preference=ReadPreference.PRIMARY) self.database = self.connection[self._database] self.collection = self.database[self._collection] log.msg('Connected to MongoDB "%s", using "%s/%s"' % self._uri, self._database, self._collection) # ensure index if self._unique_key: log.msg('Creating index for key %s' % self._unique_key) self.collection.ensure_index(self._unique_key.items(), unique=True, sparse=True) def close_spider(self, spider): del self.collection self.connection.disconnect() del self.database def process_item(self, item, spider): self.stats.inc_value('mongostorage/total_item_count') # (duplicate counts with multiple mongo pipelines) mongoitem = copy.deepcopy(item) # don't modify original idx = None if self._unique_key: idx = dict([(k, mongoitem[k]) for k, _ in self._unique_key.items()]) mongoitem = self.verify_item(mongoitem, self.collection, idx, spider) if not mongoitem: return item try: dictitem = dict(mongoitem) except ValueError: return item self.stats.inc_value('mongostorage/item_count') if self._ignore_null: # do not insert None values for idx, v in dictitem.items(): if v is None: del dictitem[idx] if self._unique_key is None: #and dictitem['unique_key'] is None: try: self.insert_item(dictitem, self.collection, spider) log.msg('Item inserted in MongoDB database %s/%s' % (self._database, self._collection), level=log.DEBUG, spider=spider) self.stats.inc_value('mongostorage/insert_count') except errors.DuplicateKeyError: self.stats.inc_value('mongostorage/duplicate_count') else: # TODO: check return value for success? try: self.update_item(idx, dictitem, self.collection, spider) log.msg('Item updated in MongoDB database %s/%s' % (self._database, self._collection), level=log.DEBUG, spider=spider) self.stats.inc_value('mongostorage/update_count') except errors.DuplicateKeyError: self.stats.inc_value('mongostorage/duplicate_count') self.stats.inc_value('mongostorage/success_count') del dictitem del mongoitem return item def verify_item(self, item, db, idx, spider): """ Verify and pre-process item. Override in subclass. """ return item ''' Example: if db.find_one(idx): log.msg('Item already exists in MongoDB for key %s' % (idx), level=log.DEBUG, spider=spider) self.stats.inc_value('mongostorage/duplicate_count') else: return item ''' def insert_item(self, item, db, spider): """ Insert database item. Override in subclass. """ db.insert(item, continue_on_error=True) def update_item(self, key, item, db, spider): """ Update/upsert database item. Override in subclass. """ db.update(key, item, upsert=self._upsert) def _build_unique_key(self): if self._unique_key: if isinstance(key, basestring): self._unique_key = {self._unique_key: ASCENDING} if isinstance(self._unique_key, list): try: self._unique_key = dict(self._unique_key) except ValueError: self._unique_key = dict([(x, ASCENDING) if isinstance(x, basestring) else x for x in self._unique_key]) if not isinstance(self._unique_key, dict): raise AttributeError for key, value in self._unique_key.items(): if isinstance(value, bool): if value: value = ASCENDING else: value = DESCENDING if isinstance(value, basestring): if value.lower()[:1] == 'a': value = ASCENDING else: value = DESCENDING # if int, assume it's ASC(1) or DESC(-1) already self._unique_key.update({key: value})