def drop_db(self): db_name = common.cfg().get('db_name') ip = common.cfg().get('db_ip') port = common.cfg().get('db_port') client = MongoClient(ip, port) client.drop_database(db_name) print('\n Drop Database Done.')
def test_cert_ssl_uri_support(self): # Expects the server to be running with the server.pem, ca.pem # and crl.pem provided in mongodb and the server tests eg: # # --sslPEMKeyFile=/path/to/pymongo/test/certificates/server.pem # --sslCAFile=/path/to/pymongo/test/certificates/ca.pem # --sslCRLFile=/path/to/pymongo/test/certificates/crl.pem # # Also requires an /etc/hosts entry where "server" is resolvable if not CERT_SSL: raise SkipTest("No mongod available over SSL with certs") if not SERVER_IS_RESOLVABLE: raise SkipTest("No hosts entry for 'server'. Cannot validate " "hostname in the certificate") uri_fmt = ("mongodb://server/?ssl=true&ssl_certfile=%s&ssl_cert_reqs" "=%s&ssl_ca_certs=%s") client = MongoClient(uri_fmt % (CLIENT_PEM, 'CERT_REQUIRED', CA_PEM)) db = client.pymongo_ssl_test db.test.drop() db.test.insert_one({'ssl': True}) self.assertTrue(db.test.find_one()['ssl']) client.drop_database('pymongo_ssl_test')
def get(self): client = MongoClient('localhost', 27017) client.drop_database('test_database') db = client.test_database now = datetime.now().strftime('%Y/%m/%d %H:%M:%S') group1 = db.groups.insert_one({'name': u'東京シティブラスオルケスター', 'icon': 'tcbo.jpg'}).inserted_id.__str__() group2 = db.groups.insert_one({'name': u'伊藤マリーンズ', 'icon': 'marines.jpg'}).inserted_id.__str__() person1 = db.persons.insert_one({'name': u'中原和夫', 'icon': 'person1.png'}).inserted_id.__str__() person2 = db.persons.insert_one({'name': u'足立悦子', 'icon': 'person2.png'}).inserted_id.__str__() db.messages.insert_one({'body': u'おはよう', 'created': now, 'groupId': group1, 'personId': person1}) db.messages.insert_one({'body': u'こんにちは', 'created': now, 'groupId': group1, 'personId': person2}) db.messages.insert_one({'body': u'こんばんは', 'created': now, 'groupId': group1, 'personId': person1}) db.messages.insert_one({'body': u'また別のおはよう', 'created': now, 'groupId': group2, 'personId': person2}) schedule1 = db.schedules.insert_one({'day': u'2015-09-28', 'place': u'高橋区民センター', 'note': u'指揮者不在のため、合奏はありません。', 'created': now, 'groupId': group1, 'personId': person1}).inserted_id.__str__() schedule2 = db.schedules.insert_one({'day': u'2015-10-05', 'place': u'山田市営ホール', 'note': u'', 'created': now, 'groupId': group1, 'personId': person1}).inserted_id.__str__() db.attendances.insert_one({'choice': u'1', 'note': u'', 'created': now, 'scheduleId': schedule1, 'groupId': group1, 'personId': person1}) db.attendances.insert_one({'choice': u'2', 'note': u'15時ごろから参加します', 'created': now, 'scheduleId': schedule1, 'groupId': group1, 'personId': person2}) db.events.insert_one({'body': u'新規イベント1', 'created': now, 'groupId': group1}) db.events.insert_one({'body': u'新規イベント2', 'created': now, 'groupId': group1}) db.events.insert_one({'body': u'新規イベント3', 'created': now, 'groupId': group2}) self.redirect('/')
class TestAuth(unittest.TestCase): def setUp(self): """ Create an empty database that could be used for logging """ self.db_name = '_mongolog_auth' self.collection_name = 'log' self.user_name = 'MyUsername' self.password = '******' self.conn = Connection() self.db = self.conn[self.db_name] self.collection = self.db[self.collection_name] self.conn.drop_database(self.db_name) self.db.add_user(self.user_name, self.password) def tearDown(self): """ Drop used database """ self.conn.drop_database(self.db_name) def testAuthentication(self): """ Logging example with authentication """ log = logging.getLogger('authentication') log.addHandler(MongoHandler(self.collection_name, self.db_name, username=self.user_name, password=self.password)) log.error('test') message = self.collection.find_one({'levelname': 'ERROR', 'msg': 'test'}) self.assertEqual(message['msg'], 'test')
def test_cert_ssl_implicitly_set(self): # Expects the server to be running with the server.pem, ca.pem # and crl.pem provided in mongodb and the server tests eg: # # --sslPEMKeyFile=/path/to/pymongo/test/certificates/server.pem # --sslCAFile=/path/to/pymongo/test/certificates/ca.pem # --sslCRLFile=/path/to/pymongo/test/certificates/crl.pem # # Also requires an /etc/hosts entry where "server" is resolvable if not CERT_SSL: raise SkipTest("No mongod available over SSL with certs") client = ssl_client response = ssl_client.admin.command('ismaster') if 'setName' in response: client = MongoClient(pair, replicaSet=response['setName'], w=len(response['hosts']), ssl_certfile=CLIENT_PEM) db = client.pymongo_ssl_test db.test.drop() db.test.insert_one({'ssl': True}) self.assertTrue(db.test.find_one()['ssl']) client.drop_database('pymongo_ssl_test')
class MongoArchiverUnitTests(TestCase): def setUp(self): self.client = MongoClient(URL, PORT) self.db = self.client[MONGO_DB] self.archiver = QueuedArchiver(writer=MongoWriter(), formatter=PythonFormatter()) self.archiver.connect_models() self.date = datetime.date.today() self.datetime = datetime.datetime.now() def tearDown(self): self.archiver.destroy() self.client.drop_database(MONGO_DB) def test_archiver_write(self): parent = Parent.objects.create() child = Child.objects.create(parent = parent, date_field=self.date, datetime_field=self.datetime) self.archiver.destroy() collection = self.db['tests.child'] for record in collection.find(): del record['_id'] # Mongo appears to save datetime in resolution of milliseconds instead of microseconds. datetime_to_millis = self.datetime.replace(microsecond = self.datetime.microsecond - (self.datetime.microsecond % 1000)) self.assertEqual(str(record), "{u'db_alias': u'default', u'fields': {u'parent': 3, u'char_field': u'Child CharField contents.', u'datetime_field': " + repr(datetime_to_millis) + ", u'decimal_field': 3.434, u'date_field': " + repr(datetime.datetime.combine(self.date, datetime.datetime.min.time())) + ", u'text_field': u'Child TextField contents.'}, u'pk': 3, u'model': u'tests.child', u'op': u'CREATE'}") collection = self.db['tests.parent'] for record in collection.find(): del record['_id'] self.assertEqual(str(record), "{u'db_alias': u'default', u'fields': {u'char_field': u'Parent CharField contents.'}, u'pk': 3, u'model': u'tests.parent', u'op': u'CREATE'}")
def test_connect_disconnect_works_on_same_document(self): """Ensure that the connect/disconnect works properly with a single Document""" db1 = 'db1' db2 = 'db2' # Ensure freshness of the 2 databases through pymongo client = MongoClient('localhost', 27017) client.drop_database(db1) client.drop_database(db2) # Save in db1 connect(db1) class User(Document): name = StringField(required=True) user1 = User(name='John is in db1').save() disconnect() # Make sure save doesnt work at this stage with self.assertRaises(MongoEngineConnectionError): User(name='Wont work').save() # Save in db2 connect(db2) user2 = User(name='Bob is in db2').save() disconnect() db1_users = list(client[db1].user.find()) self.assertEqual(db1_users, [{'_id': user1.id, 'name': 'John is in db1'}]) db2_users = list(client[db2].user.find()) self.assertEqual(db2_users, [{'_id': user2.id, 'name': 'Bob is in db2'}])
def main(): bottle.debug(True) bottle.run(host='0.0.0.0', port=8090) client = MongoClient() db = client.test # # filler data # addToDB(db, "Steve Rogers", "avengers.com/cap", ["shield throwing", "Java"], "20055") addToDB(db, "Natasha Romanov", "avengers.com/widow", ["assassination", "snark", "CSS"], "20055") addToDB(db, "Sam Wilson", "avengers.com/falcon", ["bird suit", "Java", "HTML", "CSS"], "20055") # # /filler data # skills = searched() zipcode = "20055" skillMatchThreshold = 0.5 matchedWorkers = searchDB(db, skills, zipcode, skillMatchThreshold) if (len(matchedWorkers) < 10): # new scrape # add to DB pass searched(matchedWorkers) client.drop_database(db)
def handleList(res): if res['code'] == 'fail': defer.returnValue(printError(res['message'])) destroyed = [] for cid, corpus in res['result'].items(): since = time() * 1000 - corpus['last_activity'] if since > delay: print "REMOVING old corpus:", cid, corpus['last_activity'], int(since/dayms), "days old" res = yield proxy.callRemote('start_corpus', cid, config['ADMIN_PASSWORD'] if corpus['password'] else '') if res['code'] == 'fail': print >> sys.stderr, "WARNING: could not start old corpus %s: %s" % (cid, res['message']) continue res = yield proxy.callRemote('ping', cid, 30) if res['code'] == 'fail': print >> sys.stderr, "WARNING: could not ping old corpus %s: %s" % (cid, res['message']) continue res = yield proxy.callRemote('destroy_corpus', cid) if res['code'] == 'fail': print sys.stderr, "WARNING: could not destroy old corpus %s: %s" % (cid, res['message']) else: destroyed.append(cid) c = MongoClient(config["mongo-scrapy"]["host"], config["mongo-scrapy"]["mongo_port"]) for d in destroyed: c.drop_database('%s_%s' % (config["mongo-scrapy"]["db_name"], d))
def test01_oper_with_ks(self): cl = MongoClient('localhost') cl.drop_database('test_fabnet_mgmt_db') MgmtDatabaseManager.MGMT_DB_NAME = 'test_fabnet_mgmt_db' dbm = MgmtDatabaseManager('localhost') ManagementEngineAPI.initial_configuration(dbm, 'test_cluster', KS_PATH, 'mongodb://127.0.0.1/test_fabnet_ca') mgmt_api = ManagementEngineAPI(dbm) RESTHandler.setup_mgmt_api(mgmt_api) api = RestAPI('http://127.0.0.1:9944', 'admin', 'admin') with self.assertRaises(RESTException): api.changeUserRoles('admin', roles=[ROLE_RO, ROLE_CF, ROLE_UPGR, ROLE_NM]) api.initKeyStorage(KS_PASSWD) api.changeUserRoles('admin', roles=[ROLE_RO, ROLE_CF, ROLE_UPGR, ROLE_NM]) key = api.getSSHKey() self.assertTrue(len(key)>0) self.assertNotEqual(key, RestAPITest.key) api.setRelease('DHT', 'file://%s/tests/data/valid_release.zip'%os.path.abspath('.')) rels = api.getReleases() self.assertTrue(len(rels)==1) rel = rels[0] self.assertEqual(rel[DBK_ID], 'DHT') self.assertTrue(rel[DBK_RELEASE_URL].startswith('file://')) self.assertEqual(rel[DBK_RELEASE_VERSION], '0.9a-2412') api.getNodesStat() mgmt_api.destroy()
def setUp(self): """ set up DAS core module """ debug = 0 self.db = "test_mapping.db" config = deepcopy(das_readconfig()) dburi = config["mongodb"]["dburi"] logger = PrintManager("TestDASMapping", verbose=debug) config["logger"] = logger config["verbose"] = debug dbname = "test_mapping" collname = "db" config["mappingdb"] = dict(dburi=dburi, dbname=dbname, collname=collname) # add some maps to mapping db conn = MongoClient(dburi) conn.drop_database(dbname) self.coll = conn[dbname][collname] self.pmap = { "presentation": { "block": [{"ui": "Block name", "das": "block.name"}, {"ui": "Block size", "das": "block.size"}] }, "type": "presentation", } self.coll.insert(self.pmap) ver_token = verification_token(self.coll.find(exhaust=True)) rec = {"verification_token": ver_token, "type": "verification_token"} self.coll.insert(rec) self.mgr = DASMapping(config)
class TestQueryJobStore(unittest.TestCase): def setUp(self): parser = argparse.ArgumentParser() parser.add_argument("--host", type=str, default="localhost") parser.add_argument("--port", type=int, default=27017) parser.add_argument( "--dbname", type=str, default="test_sara_uw_website") parser.add_argument( "--collname", type=str, default="test_sara_uw_website") args, unknown = parser.parse_known_args() self._dbname = args.dbname self._collname = args.collname self._client = MongoClient(args.host, args.port) self._db = self._client[self._dbname] self.assertTrue(self._collname not in self._db.collection_names(), "Collection {0} already exists!" "".format(self._collname)) self._collection = self._db[self._collname] def tearDown(self): if self._client: if self._db: if self._collection: print self._db.drop_collection(self._collname) print self._client.drop_database(self._dbname) print self._client.close()
class Database_Connector: def __init__(self, db_Name): print 'Connecting to database...' self.client = MongoClient('localhost',27017) print 'Connection Successful to ' + db_Name self.db_name = db_Name def _delete_databse_(self, db_name): ''' :param db_name: Name of the database :return: None This deletes the databse ''' self.client.drop_database(db_name) def _insert_data_(self, collection, data): ''' :param collection: Name of the collection in database. Input Carefully :param data: BSON object which should be inserted to database :return: None Insert data to database ''' db = self.client.get_database(self.db_name).get_collection(collection).insert(data) print 'Inserted data to : ', collection ,' in ', self.db_name def _get_data_(self, collection, query): ''' :param collection: Name of the collection in database. Input Carefully :param query: BSON query to retrieve data from the database :return: List of BSON objects ''' dataList = [] for db in self.client.get_database(self.db_name).get_collection(collection).find(query): dataList.append(db) return dataList def _get_all_ids(self, collection): ''' :param collection: Name of the collection in database. Input Carefully :return: List of ids in a given database ''' idList = [] for id in self.client.get_database(self.db_name).get_collection(collection).find(): idList.append(id['_id']) def _get_count_(self, collection, query={}): ''' :param collection: Name of the collection in database. Input carefully :return: return the number of documets in the collection ''' if query.__len__() == 0: return self.client.get_database(self.db_name).get_collection(collection).count() else: return self.client.get_database(self.db_name).get_collection(collection).find(query).count()
def PopulateRandomPeople(): currentId = 1 client = MongoClient(Constants.LocalHost, Constants.MongoPort) db = client.get_database(Constants.FeatureVectors) client.drop_database(Constants.People) peopleDb = client.get_database(Constants.People) peopleCollection = peopleDb.get_collection(Constants.People) personToRecord = peopleDb.get_collection(Constants.PersonToRecordCollection) for singleCollection in SamplingRates.keys(): collection = db.get_collection(singleCollection) for vector in collection.find(): if (singleCollection == 'mitdb'): diagnosis = ['Arrhythmia'] elif (singleCollection == 'svdb'): diagnosis = ['Supraventricular Arrhythmia'] elif (singleCollection == 'afdb'): diagnosis = ['Atrial Fibrillation'] elif (singleCollection == 'nsrdb'): diagnosis = ['Normal Sinus Rhythm'] elif (singleCollection == 'cudb'): diagnosis = ['Ventricular Tachyarrhythmia'] newPerson = CreateNewRandomPerson(currentId, diagnosis) currentId += 1 print(newPerson) print('Assigned to record ' + vector[Constants.RecordNumber].__str__() + ' in DB: ' + vector[Constants.Database].__str__()) peopleCollection.insert_one(newPerson._asdict()) personToRecord.insert_one({Constants.ID: newPerson.ID, 'Database': vector[Constants.Database], Constants.Record: vector[Constants.RecordNumber]}) peopleCollection.create_index(Constants.ID) personToRecord.create_index(Constants.ID) personToRecord.create_index([(Constants.Database, pymongo.ASCENDING), (Constants.Record, pymongo.ASCENDING)])
def test_copy_db(self): authed_client = auth_context.client if is_mongos(authed_client): raise SkipTest("SERVER-6427") c = MongoClient(host, port) authed_client.admin.add_user("admin", "password") c.admin.authenticate("admin", "password") c.drop_database("pymongo_test") c.drop_database("pymongo_test1") c.pymongo_test.test.insert({"foo": "bar"}) try: c.pymongo_test.add_user("mike", "password") self.assertRaises(OperationFailure, c.copy_database, "pymongo_test", "pymongo_test1", username="******", password="******") self.assertFalse("pymongo_test1" in c.database_names()) self.assertRaises(OperationFailure, c.copy_database, "pymongo_test", "pymongo_test1", username="******", password="******") self.assertFalse("pymongo_test1" in c.database_names()) c.copy_database("pymongo_test", "pymongo_test1", username="******", password="******") self.assertTrue("pymongo_test1" in c.database_names()) self.assertEqual("bar", c.pymongo_test1.test.find_one()["foo"]) finally: # Cleanup remove_all_users(c.pymongo_test) c.admin.remove_user("admin") c.disconnect()
def main(): client = MongoClient() client.drop_database("experiments") db = client.experiments db.add_son_manipulator(Transform()) # db.add_son_manipulator(BinaryTransform()) # dataset = DataSet(10) dataset = DataSet(datetime.datetime.utcnow(), "oxygen", {"0": 22, "1": 22, "2": 22, "3": 22, "4": 22, "5": 22}) # data = { # 'timestamp': datetime.datetime.utcnow(), # 'name': 'oxygen', # 'values': { # '0': 22, # '1': 22, # '2': 22, # '3': 22, # '4': 22, # '5': 22, # }, # } db.datasets.insert({"dataset": dataset}) # db.datasets.insert(dataset.__dict__) dataset = db.datasets.find_one({}) print(dataset)
def persist_to_db(self, db_name="crawler"): # create connection to MongoDB and create db if it doesn't exist client = MongoClient() client.drop_database(db_name) self.db = client[db_name] # persist the lexicon lexicon = [{ "word": word, "word_id": id } for word, id in self._word_id_cache.items()] self.db.lexicon.insert_many(lexicon) # persist the doc index doc_index = [{ "doc_id": id, "doc": doc, "title": self._doc_id_titles[int(id)] } for doc, id in self._doc_id_cache.items()] self.db.doc_index.insert_many(doc_index) # persist the inverted index inverted_index = [{ "word_id": word_id, "doc_id_list": [{ 'doc_id': id, 'count': count} for id, count in docs.items()]} for word_id, docs in self.get_inverted_index().items()] self.db.inverted_index.insert_many(inverted_index) # persist the page ranks ranks = [{ "doc_id": doc_id, "score": score } for doc_id, score in self.get_page_ranks().items()] self.db.page_rank.insert_many(ranks)
def flush_db(): client = MongoClient() # delete any old cache/index client.drop_database('search_db') client.drop_database('search_cache')
def test01_operations(self): dbm = MgmtDatabaseManager('localhost') mgmt_api = ManagementEngineAPI(dbm) key = mgmt_api.get_ssh_client().get_pubkey() self.assertTrue(len(key)>0) self.assertEqual(key, TestManagementEngineAPI.key) self.assertTrue(not mgmt_api.is_secured_installation()) mgmt_api.destroy() cl = MongoClient('localhost') cl.drop_database('test_fabnet_mgmt_db') dbm = MgmtDatabaseManager('localhost') ManagementEngineAPI.initial_configuration(dbm, 'test_cluster', KS_PATH, 'mongodb://127.0.0.1/test_fabnet_ca') mgmt_api = ManagementEngineAPI(dbm) s = mgmt_api.authenticate('admin', 'admin') with self.assertRaises(MEMgmtKSAuthException): mgmt_api.get_config(s, None) mgmt_api.logout(s) mgmt_api.destroy() mgmt_api = ManagementEngineAPI(dbm) mgmt_api.init_key_storage(KS_PASSWD) key = mgmt_api.get_ssh_client().get_pubkey() self.assertTrue(mgmt_api.is_secured_installation()) self.assertTrue(len(key)>0) self.assertNotEqual(key, TestManagementEngineAPI.key) mgmt_api.destroy()
def drop_database(): client = MongoClient(DB_LOCAL_MONGO_IP, DB_PORT) print client.database_names() client.drop_database(DB_NAME) client.drop_database(DB_NAME + "Debug") print client.database_names() client.close()
class TestConfig(unittest.TestCase): def setUp(self): """ Create an empty database that could be used for logging """ filename = join(dirname(__file__), "logging-test.config") fileConfig(filename) self.db_name = "_mongolog_test" self.collection_name = "log_test" self.conn = Connection() self.db = self.conn[self.db_name] self.collection = self.db[self.collection_name] self.conn.drop_database(self.db_name) def tearDown(self): """ Drop used database """ self.conn.drop_database(self.db_name) def testLoggingFileConfiguration(self): log = logging.getLogger("example") log.addHandler(MongoHandler(self.collection_name, self.db_name)) log.debug("test") message = self.collection.find_one({"levelname": "DEBUG", "msg": "test"}) self.assertEqual(message["msg"], "test")
class MongoAgentDB(HasTraits): implements(IAgentStorage) db_name = "pynetsym_agents" def __init__(self): self.client = MongoClient() self.client.drop_database(self.db_name) self.agents_db = self.client[self.db_name].agents def _mktypename(self, obj): cls = type(obj) return "%s.%s" % (cls.__module__, cls.__name__) def store(self, node): node.id = int(node.id) state = node.__getstate__() del state["__traits_version__"] state["__agenttype__"] = self._mktypename(node) self.agents_db.update(dict(_id=node.id), {"$set": state}, upsert=True) def recover(self, identifier): identifier = int(identifier) state = self.agents_db.find_one({"_id": identifier}) agent_type = jsonpickle.unpickler.loadclass(state.pop("__agenttype__")) del state["_id"] return agent_type(**state)
class DBClient: def __init__(self, url=None, db_name='vocabReminder'): self.client = MongoClient(url) self.db_name = db_name self.db = self.client[db_name] def drop_db(self, db_name): self.client.drop_database(db_name) logging.info("Dropped DB %s" % db_name) def write_or_update(self, collection_name, key, content): collection = self.db[collection_name] result_id = None if collection.find(key).count() > 0: logging.info('{key} already exist in {collection}!'.format(key=key, collection=collection)) collection.update_one(key, {"$set": content, "$currentDate": {"lastModified": True}}) logging.info('Updated {key} in {collection}.'.format(key=key, collection=collection)) else: result_id = collection.insert_one(content).inserted_id logging.info('Added {key} into {collection}. ID: {id}'.format(key=key, collection=collection, id=result_id)) return result_id def find(self, collection_name, key): collection = self.db[collection_name] return collection.find(key)
class TestBaseMinimal(TestCase): def setUp(self): settings_path = os.path.join(os.path.dirname( os.path.realpath(__file__)), 'testsettings.py') self.headers = {'Content-Type': 'application/json'} self.setupDB() self.apiapp = Eve(settings=settings_path) self.everh = EveRestHooks(self.apiapp) self.local_client = self.apiapp.test_client() def setupDB(self): self.connection = MongoClient(MONGO_HOST, MONGO_PORT) self.connection.drop_database(MONGO_DBNAME) if MONGO_USERNAME: self.connection[MONGO_DBNAME].add_user(MONGO_USERNAME, MONGO_PASSWORD) def bulk_insert(self): pass def dropDB(self): self.connection = MongoClient(MONGO_HOST, MONGO_PORT) self.connection.drop_database(MONGO_DBNAME) self.connection.close()
def astore_teardown(proc): proc2 = Popen(['kill', '-9', str(proc.pid)]) ttime.sleep(5) # make sure the process is killed conn = MongoClient(host=TESTING_CONFIG['mongohost'], port=TESTING_CONFIG['mongoport']) conn.drop_database(TESTING_CONFIG['database']) ttime.sleep(0.5)
def connection(): # Mongo client client = MongoClient('localhost', 27017) # Drop database client.drop_database('InThatDayI') # Create new database db = client['InThatDayI'] # Collections creation totalCount = db.create_collection('counters') pcUser = db.create_collection('user') # Collections to Google APIs calendar = db.create_collection('calendar') drive = db.create_collection('drive') gmail = db.create_collection('gmail') plus = db.create_collection('plus') lastFm = db.create_collection('lastFm') twitter = db.create_collection('twitter') facebook = db.create_collection('facebook') return totalCount, pcUser, calendar, drive, gmail, plus, lastFm, twitter, facebook
def test_cert_ssl_validation_optional(self): # Expects the server to be running with the the server.pem, ca.pem # and crl.pem provided in mongodb and the server tests eg: # # --sslPEMKeyFile=jstests/libs/server.pem # --sslCAFile=jstests/libs/ca.pem # --sslCRLFile=jstests/libs/crl.pem # # Also requires an /etc/hosts entry where "server" is resolvable if not CERT_SSL: raise SkipTest("No mongod available over SSL with certs") if not SERVER_IS_RESOLVABLE: raise SkipTest("No hosts entry for 'server'. Cannot validate " "hostname in the certificate") client = MongoClient( "server", ssl=True, ssl_certfile=CLIENT_PEM, ssl_cert_reqs=ssl.CERT_OPTIONAL, ssl_ca_certs=CA_PEM ) response = client.admin.command("ismaster") if "setName" in response: client = MongoReplicaSetClient( "server", replicaSet=response["setName"], w=len(response["hosts"]), ssl=True, ssl_certfile=CLIENT_PEM, ssl_cert_reqs=ssl.CERT_OPTIONAL, ssl_ca_certs=CA_PEM, ) db = client.pymongo_ssl_test db.test.drop() self.assertTrue(db.test.insert({"ssl": True})) self.assertTrue(db.test.find_one()["ssl"]) client.drop_database("pymongo_ssl_test")
def command_create(args): """ create XX.XX.XX.XX db-test :param args: :return: """ if not check_ip_address(args.ip_address, 27017): LOG.error('The address {0}:27017 is not available'.format(args.ip_address)) return connection_mongodb = MongoClient('mongodb://{0}:27017/'.format(args.ip_address)) # Drop the old database connection_mongodb.drop_database(args.database) # Recreate the database database = Database(connection_mongodb, args.database) # Create indexes on collections galaxies = Collection(database, COLLECTION_GALAXIES, True) galaxies.create_index('galaxy_id') galaxy_data = Collection(database, COLLECTION_GALAXY_DATA_SED, True) galaxy_data.create_index('galaxy_id') galaxy_data = Collection(database, COLLECTION_GALAXY_DATA_ORIGINAL, True) galaxy_data.create_index('galaxy_id')
class TestJSON(TestCase): def setUp(self): for f in "testfile1.json", "testfile2.JSON", "fakefile.txt": handle = open(f, "w+") handle.write('{"a": 1, "b": 2}') handle.close() self.j = JsonHandler(".", "unittest", "unittest") self.client = MongoClient() self.client.drop_database("unittest") self.coll = self.client["unittest"]["unittest"] def test_can_make_pathlib_object_of_json_files(self): self.assertIn(PosixPath("testfile1.json"), self.j.jsons) self.assertIn(PosixPath("testfile2.JSON"), self.j.jsons) self.assertNotIn("fakefile.txt", self.j.jsons) def test_can_parse_json(self): self.assertEqual(self.j.parse_one_json(self.j.jsons[0]), {"a": 1, "b": 2}) def test_can_put_files_in_database(self): self.assertEqual(self.coll.count(), 0) self.j.put_jsons_in_database(self.j.jsons, self.j.db_name, self.j.coll_name) self.assertEqual(self.coll.count(), 2) def test_inserted_docs_can_be_retrieved(self): self.j.put_jsons_in_database(self.j.jsons, self.j.db_name, self.j.coll_name) self.assertEqual(self.coll.find({"a": 1}).count(), 2) def tearDown(self): for f in "testfile1.json", "testfile2.JSON", "fakefile.txt": os.remove(f) self.client.drop_database("unittest") self.client.close()
class DBUtil(object): _instance = None _initialized = False def __new__(cls, *args, **kwargs): if not cls._instance: cls._instance = super(DBUtil, cls).__new__(cls, *args, **kwargs) return cls._instance def __init__(self): if(self._initialized): return print "initializing" conf = os.path.join(os.path.dirname(__file__), './config/application.conf') Config = ConfigParser.ConfigParser(); Config.read(conf); self.connection = MongoClient(Config.get('DB', 'ip')) self.db = self.connection.c3stem_database self._initialized = True def getDatabase(self): return self.db def dropDatabase(self): self.connection.drop_database('c3stem_database')
class MongoController: """ Can be used to execute the tasks on MongoDB. Since most of the tasks on mongo DB cannot run in parallel and take noticeable time, all of the execution calls add the task to a queue and return. The tasks from the queue run sequentially in a separate process. In order to start this process start method should be called. """ def __init__(self, ip, port=MONGOS_PORT): """ :param ip: IP of the mongos instance to connect to :param port: port of the mongos instance to connect to """ self._mongo_available = True self._mongo_client = MongoClient(ip, port, serverSelectionTimeoutMS=15000) # Test the connection to mongos. If mongos is not running at the specified address, will throw # pymongo.errors.ConnectionFailure try: self._mongo_client.admin.command('ismaster') except pymongo.errors.ConnectionFailure: logging.info('MongoDB is not accesible. All operations with Mongo will not work.') self._mongo_available = False self._queue: Queue = Queue() self._chunks: Dict[str, Dict[int, str]] = {} self._lock = RLock() def start(self) -> None: """ Starts the thread that executes the MongoDB tasks from the queue of this instance. """ if not self._mongo_available: return operations_thread = threading.Thread(target=self._run, args=()) operations_thread.setDaemon(True) operations_thread.start() @staticmethod def _try_until_done(task: Callable) -> None: while True: try: task() break except pymongo.errors.OperationFailure: time.sleep(WAIT_BEFORE_RETRY) def _run(self) -> None: """ Executes the tasks from the queue. """ while True: args: MigrationArgs = self._queue.get(block=True) with self._lock: if args.collection in self._chunks: if args.shard_key not in self._chunks[args.collection]: self._split_chunk(args.collection, args.shard_key) self._move_chunk(args) def _split_chunk(self, collection_name: str, key: int): """ Splits the chunk of the collection at the given value of the shard key. :param collection_name: Collection to split :param key: Value of the shard key """ def split_command(): self._mongo_client.admin.command('split', collection_name, middle={SHARD_KEY: key}) self._try_until_done(split_command) self._chunks[collection_name][key] = MAIN_MONGO_SHARD_NAME logging.info(f"MongoAgent: Split chunk of {collection_name} at {key}") def _move_chunk(self, args: MigrationArgs) -> None: """ Moves the chunk to a given shard of MongoDB :param args: MigrationArgs for the chunk """ def move_command(): self._mongo_client.admin.command("moveChunk", args.collection, find={SHARD_KEY: args.shard_key}, to=args.shard, _secondaryThrottle=False, _waitForDelete=True) self._try_until_done(move_command) self._chunks[args.collection][args.shard_key] = args.shard logging.info(f"MongoAgent: Moved chunk {args.shard_key} of collection {args.collection} to {args.shard}") def shard_collection(self, db_name: str, collection_name: str) -> None: def shard_command(): self._mongo_client.admin.command('enableSharding', db_name) self._mongo_client.admin.command('shardCollection', full_collection_name, key={SHARD_KEY: 1}, unique=False) if not self._mongo_available: return db = self._mongo_client[db_name] collection = db[collection_name] full_collection_name = db_name + '.' + collection_name self._try_until_done(shard_command) self._chunks[full_collection_name] = {} def move_chunk(self, db_name: str, collection_name: str, shard_key: int, shard_id: str) -> None: """ Moves a specified chunk of a specified collection to a specified shard. If the value of the shard key does not belong to a separate chunk, splits the chunk at that value first. This task is executed asynchronously. :param db_name: Database of the chunk :param collection_name: Collection of the chunk :param shard_key: Value of the shard key of the chunk :param shard_id: ID of the shard where the chunk has to be moved """ if not self._mongo_available: return full_collection_name = db_name + '.' + collection_name args = MigrationArgs(full_collection_name, shard_id, shard_key) self._queue.put(args) def drop_database(self, db_name: str, collections: Iterable[str]) -> None: """ Drops a database. :param db_name: Database to drop. :param collections: Collections that were created in this database. """ if not self._mongo_available: return with self._lock: for collection in collections: del self._chunks[f'{db_name}.{collection}'] self._mongo_client.drop_database(db_name) def add_document(self, db_name, collection_name, doc) -> None: """ Adds a document to a collection of MongoDB :param db_name: Database to add the document to :param collection_name: Collection to add the document to :param doc: Document to add """ if not self._mongo_available: return collection: Collection = self._mongo_client[db_name][collection_name] collection.insert_one(document=doc) def delete_document(self, db_name, collection_name, doc) -> None: """ Deletes a document from a collection of MongoDB :param db_name: Database to delete the document from :param collection_name: Collection to delete the document from :param doc: Document to delete """ if not self._mongo_available: return collection: Collection = self._mongo_client[db_name][collection_name] collection.delete_one(doc)
def _drop_database(self): # remove collection mongodb_uri = self.config.get_config(None, 'URI', root='mongodb') client = MongoClient(mongodb_uri) client.drop_database('testmongodbstate')
'name': 'booo', 'marks': 60, 'grades': 'B' }) print list(collection.find()) print '\n' #delete data with pymongo print 'delete_one operator' collection.delete_one({'name': 'booo'}) print list(collection.find()) print '\n' print 'delete_many operator' r = collection.delete_many({'age': 24}) print 'deleted_count ', r.deleted_count print list(collection.find()) print '\n' print 'delete_many operator' r = collection.delete_many({}) print 'deleted_count ', r.deleted_count print list(collection.find()) print '\n' #drop a collection collection.drop() #drop a database client.drop_database(db)
class MongoDatabaseAdapter(StorageAdapter): """ The MongoDatabaseAdapter is an interface that allows ChatterBot to store the conversation as a MongoDB database. """ def __init__(self, **kwargs): super(MongoDatabaseAdapter, self).__init__(**kwargs) self.database_name = self.kwargs.get( "database", "chatterbot-database" ) self.database_uri = self.kwargs.get( "database_uri", "mongodb://localhost:27017/" ) # Use the default host and port self.client = MongoClient(self.database_uri) # Specify the name of the database self.database = self.client[self.database_name] # The mongo collection of statement documents self.statements = self.database['statements'] # Set a requirement for the text attribute to be unique self.statements.create_index('text', unique=True) def count(self): return self.statements.count() def find(self, statement_text): values = self.statements.find_one({'text': statement_text}) if not values: return None del(values['text']) # Build the objects for the response list response_list = self.deserialize_responses( values["in_response_to"] ) values["in_response_to"] = response_list return Statement(statement_text, **values) def deserialize_responses(self, response_list): """ Takes the list of response items and returns the list converted to Response objects. """ proxy_statement = Statement("") for response in response_list: text = response["text"] del(response["text"]) proxy_statement.add_response( Response(text, **response) ) return proxy_statement.in_response_to def filter(self, **kwargs): """ Returns a list of statements in the database that match the parameters specified. """ filter_parameters = kwargs.copy() contains_parameters = {} # Convert Response objects to data if "in_response_to" in filter_parameters: response_objects = filter_parameters["in_response_to"] serialized_responses = [] for response in response_objects: serialized_responses.append(response.serialize()) filter_parameters["in_response_to"] = serialized_responses # Exclude special arguments from the kwargs for parameter in kwargs: if "__" in parameter: del(filter_parameters[parameter]) kwarg_parts = parameter.split("__") if kwarg_parts[1] == "contains": key = kwarg_parts[0] value = kwargs[parameter] contains_parameters[key] = { '$elemMatch': { 'text': value } } filter_parameters.update(contains_parameters) matches = self.statements.find(filter_parameters) matches = list(matches) results = [] for match in matches: statement_text = match['text'] del(match['text']) response_list = self.deserialize_responses(match["in_response_to"]) match["in_response_to"] = response_list results.append(Statement(statement_text, **match)) return results def update(self, statement): from pymongo import UpdateOne, ReplaceOne # Do not alter the database unless writing is enabled if not self.read_only: data = statement.serialize() operations = [] update_operation = ReplaceOne( {'text': statement.text}, data, True ) operations.append(update_operation) # Make sure that an entry for each response is saved for response in statement.in_response_to: # $setOnInsert does nothing if the document is not created update_operation = UpdateOne( {'text': response.text}, {'$setOnInsert': {'in_response_to': []}}, upsert=True ) operations.append(update_operation) self.statements.bulk_write(operations, ordered=False) return statement def get_random(self): """ Returns a random statement from the database """ from random import randint count = self.count() random_integer = randint(0, count - 1) if self.count() < 1: raise self.EmptyDatabaseException() statement = self.statements.find().limit(1).skip(random_integer) values = list(statement)[0] statement_text = values['text'] del(values['text']) return Statement(statement_text, **values) def remove(self, statement_text): """ Removes the statement that matches the input text. Removes any responses from statements if the response text matches the input text. """ for statement in self.filter(in_response_to__contains=statement_text): statement.remove_response(statement_text) self.update(statement) self.statements.delete_one({'text': statement_text}) def get_response_statements(self): """ Return only statements that are in response to another statement. A statement must exist which lists the closest matching statement in the in_response_to field. Otherwise, the logic adapter may find a closest matching statement that does not have a known response. """ response_query = self.statements.distinct('in_response_to.text') statement_query = self.statements.find({ 'text': { '$in': response_query } }) statement_list = list(statement_query) statement_objects = [] for statement in statement_list: values = dict(statement) statement_text = values['text'] del(values['text']) response_list = self.deserialize_responses(values["in_response_to"]) values["in_response_to"] = response_list statement_objects.append(Statement(statement_text, **values)) return statement_objects def drop(self): """ Remove the database. """ self.client.drop_database(self.database_name)
def init_test(db_name='test'): init_empty_mongo_db(db_name) conn = DBConnector(db_name=db_name) ids = [1, 13, 24] status_types = [ "finished", "preparing_for_download", "fail_on_downloading", "converting_to_wav", "fail_on_converting_to_wav", "recognition", "fail_on_recognition", "joining_segments", "downloading", ] seed = 42 random.seed(42) gen_parts = [] for video_id in ids: for i in range(10): gen_parts.append({ 'video_id': video_id, "start": 2 * i, "end": 2 * i + 1, "text": f"{13 * i + 17} русский {video_id}" }) gen_status = [] for video_id in ids: gen_status.append({ "video_id": video_id, "status": random.choice(status_types), "info": { "1": 1, "2": 2 } }) for status in gen_status: conn.update_status(status["video_id"], status["status"], status["info"]) res = conn.get_status_table() print("\nStatus table:") for el in res: print(el) conn.insert_parts(gen_parts) res = conn.get_parts_table() print("\nParts table:") for el in res: print(el) conn.update_status(ids[0], "NEW SUPER STATUS", info={"speed": 424242}) print(f"\nStatus {ids[0]} video:") res = conn.get_status(ids[0]) print(res) print(f"\nParts {ids[0]} video:") res = conn.get_parts(ids[0]) for el in res: print(el) request_text = "русский" print(f"\nFind {request_text} in {ids[0]} video:") res = conn.find_text(ids[0], request_text, limit=5) for el in res: print(el) print(f"\nStatus not exists {145421232} video:") res = conn.get_status(145421232) print(res) client = MongoClient() client.drop_database(db_name)
def teardown(): mongo_client = MongoClient('mongodb://localhost:27017') mongo_client.drop_database('fork_test')
def cuckoo_clean(): """Clean up cuckoo setup. It deletes logs, all stored data from file system and configured databases (SQL and MongoDB. """ # Init logging. # This need to init a console logger handler, because the standard # logger (init_logging()) logs to a file which will be deleted. create_structure() init_console_logging() # Initialize the database connection. db = Database() # Drop all tables. db.drop() # Check if MongoDB reporting is enabled and drop that if it is. cfg = Config("reporting") if cfg.mongodb and cfg.mongodb.enabled: from pymongo import MongoClient host = cfg.mongodb.get("host", "127.0.0.1") port = cfg.mongodb.get("port", 27017) mdb = cfg.mongodb.get("db", "cuckoo") user = cfg.mongodb.get("username", None) password = cfg.mongodb.get("password", None) try: conn = MongoClient(cfg.mongodb.host, port=port, username=user, password=password, authSource=mdb) conn.drop_database(mdb) conn.close() except: log.warning("Unable to drop MongoDB database: %s", mdb) # Check if ElasticSearch is enabled and delete that data if it is. if cfg.elasticsearchdb and cfg.elasticsearchdb.enabled and not cfg.elasticsearchdb.searchonly: from elasticsearch import Elasticsearch delidx = cfg.elasticsearchdb.index + "-*" try: es = Elasticsearch(hosts=[{ "host": cfg.elasticsearchdb.host, "port": cfg.elasticsearchdb.port, }], timeout=60) except: log.warning("Unable to connect to ElasticSearch") if es: analyses = es.search(index=delidx, doc_type="analysis", q="*")["hits"]["hits"] if analyses: for analysis in analyses: esidx = analysis["_index"] esid = analysis["_id"] # Check if behavior exists if analysis["_source"]["behavior"]: for process in analysis["_source"]["behavior"][ "processes"]: for call in process["calls"]: es.delete( index=esidx, doc_type="calls", id=call, ) # Delete the analysis results es.delete( index=esidx, doc_type="analysis", id=esid, ) # Paths to clean. paths = [ os.path.join(CUCKOO_ROOT, "db"), os.path.join(CUCKOO_ROOT, "log"), os.path.join(CUCKOO_ROOT, "storage"), ] # Delete various directories. for path in paths: if os.path.isdir(path): try: shutil.rmtree(path) except (IOError, OSError) as e: log.warning("Error removing directory %s: %s", path, e) # Delete all compiled Python objects ("*.pyc"). for dirpath, dirnames, filenames in os.walk(CUCKOO_ROOT): for fname in filenames: if not fname.endswith(".pyc"): continue path = os.path.join(CUCKOO_ROOT, dirpath, fname) try: os.unlink(path) except (IOError, OSError) as e: log.warning("Error removing file %s: %s", path, e)
class MongoStorage(Storage): "Storage based on MongoDB back-end" def __init__(self, uri, dbname='fwjr', collname='db', chunk_size=1000): "ctor with mongo uri: mongodb://host:port" Storage.__init__(self, uri) self.client = MongoClient(uri, w=1) self.mdb = self.client[dbname] self.mdb.add_son_manipulator(WMASONManipulator()) self.collname = collname self.coll = self.mdb[collname] self.jobs = self.mdb['jobs'] # separate collection for job results self.log(self.coll) self.chunk_size = chunk_size # read error codes fname = os.environ.get('WMARCHIVE_ERROR_CODES', '') self.codes = {} # dict of numeric codes if fname: with open(fname, 'r') as exit_codes_file: self.codes = json.load(exit_codes_file) # read performance metrics fname = os.environ.get('WMARCHIVE_PERF_METRICS', '') if fname: with open(fname, 'r') as metrics_file: self.metrics = json.load(metrics_file) # printout pymongo version print("### pymongo.version %s" % pymongo.version) def sconvert(self, spec, fields): "convert input spec/fields into ones suitable for MognoDB QL" return spec, fields def find_duplicates(self, wmaids): "Find duplicate FWJR ids from collection of docs" spec = {'wmaid':{'$in':wmaids}} fields = ['meta_data'] fwjr_ids = [] for doc in self.coll.find(spec, fields): fwjr_ids.append(doc['meta_data']) return fwjr_ids def write(self, data, safe=None): "Write API, return ids of stored documents" if not isinstance(data, list): data = [data] # ensure that we got list of data coll = self.coll if isinstance(data[0], dict) and data[0].get('dtype', None) == 'job': coll = self.jobs wmaids = self.getids(data) uniqids = set(wmaids) sts_dup = self.find_duplicates(wmaids) if len(wmaids) != len(uniqids): fdup, wdup = set_duplicates(data) self.log("WARNING, found %s duplicates in given docs, FWJR ids %s, WMA ids %s, given %s, unique %s" \ % ( len(wmaids)-len(uniqids), json.dumps(fdup), json.dumps(wdup), len(wmaids), len(set(wmaids)) ) ) for wid in wdup: for rec in data: if rec['wmaid'] == wid: self.log(json.dumps(rec)) if sts_dup: self.log("WARNING, found %s duplicates in STS:" % len(sts_dup)) for rec in sts_dup: self.log('WARNING, duplicate record %s' % json.dumps(rec)) for idx in range(0, len(data), self.chunk_size): docs = data[idx:idx+self.chunk_size] for doc in data: try: if pymongo.version.startswith('3.'): # pymongo 3.X self.coll.insert(doc) else: self.coll.insert(doc, continue_on_error=True) except InvalidDocument as exp: self.log('WARNING InvalidDocument: %s' % str(exp)) except InvalidOperation as exp: self.log('WARNING InvalidOperation: %s' % str(exp)) except DuplicateKeyError as exp: pass except Exception as exp: raise WriteError(str(exp)) return wmaids def read(self, spec, fields=None): "Read API, it reads data from MongoDB storage for provided spec." try: gen = self.find(spec, fields) docs = [r for r in gen] return docs except Exception as exp: raise ReadError(str(exp)) def find(self, spec, fields): """ Find records in MongoDB storage for provided spec, returns generator over MongoDB collection """ if not spec: spec = {} if isinstance(spec, list): spec = {'wmaid': {'$in': spec}} return self.jobs.find(spec) elif PAT_UID.match(str(spec)): spec = {'wmaid': spec} return self.jobs.find(spec) if fields: return self.coll.find(spec, fields) return self.coll.find(spec) def ndocs(self, spec): "Return number of documents for given spec" return self.coll.find(spec).count() def update(self, ids, spec): "Update documents with given set of document ids and update spec" if len(ids) > self.chunk_size: for idx in range(0, len(ids), self.chunk_size): sub_ids = ids[idx:idx+self.chunk_size] doc_query = {'wmaid' : {'$in': sub_ids}} self.coll.update(doc_query, spec, multi=True) else: doc_query = {'wmaid' : {'$in': ids}} self.coll.update(doc_query, spec, multi=True) def remove(self, spec=None): "Remove documents from MongoDB for given spec" if not spec: spec = {} return self.coll.remove(spec) def dropdb(self, dbname): "Remove given database from MongoDB" return self.client.drop_database(dbname) def stats(self): "Return statistics about MongoDB" return self.mdb.command("collstats", self.collname) def jobsids(self): "Return jobs ids" out = [] for row in self.jobs.find(): if 'wmaid' in row: out.append({'wmaid':row['wmaid']}) return out def performance(self, metrics, axes, start_date=None, end_date=None, suggestions=[], **kwargs): """ The performance service endpoint that the web UI relies upon to present aggregated performance data as documented in https://github.com/knly/WMArchiveAggregation. """ start_time = time.time() verbose = kwargs.get('verbose', None) # adjust end_date to make end_date inclusive since all records in aggregated.performance # are stored as full day, e.g. # "start_date" : ISODate("2016-12-21T00:00:00Z") # "end_date" : ISODate("2016-12-22T00:00:00Z") if end_date: cdt = datetime.date(int(end_date[:4]), int(end_date[4:6]), int(end_date[6:])) ndt = cdt+datetime.timedelta(days=1) end_date = ndt.strftime('%Y%m%d') agg_db = os.environ.get('WMARCHIVE_PERF_DB', kwargs.get('aggDB', 'aggregated')) agg_col = os.environ.get('WMARCHIVE_PERF_COLL', kwargs.get('aggCol', 'day')) performance_data = self.client[agg_db][agg_col] def get_aggregation_result(cursor_or_dict): """ Fallback for pymongo<3.0 """ if type(cursor_or_dict) is dict: return cursor_or_dict['result'] return list(cursor_or_dict) # Valid keys in `stats.scope` scope_keys = [ 'workflow', 'task', 'host', 'site', 'jobtype', 'jobstate', 'acquisitionEra', 'exitCode', 'exitStep' ] # Construct scope scope = [] timeframe_scope = [] # Timeframes # TODO: build more robust date parsing if start_date is not None: timeframe_scope.append({ '$match': { 'scope.start_date': { '$gte': datetime.datetime(int(start_date[0:4]), int(start_date[4:6]), int(start_date[6:8]), 0, 0, 0) }, } }) if end_date is not None: timeframe_scope.append({ '$match': { 'scope.end_date': { '$lte': datetime.datetime(int(end_date[0:4]), int(end_date[4:6]), int(end_date[6:8]), 0, 0, 0) }, } }) scope += timeframe_scope # Scope filters = {} for scope_key in kwargs: if scope_key not in scope_keys or kwargs[scope_key] is None: continue if scope_key == 'exitCode': # we need to handle exitCodes specially since they # are stored in FWJR as int data-type, while on web UI # we fetch them as strings in WMARCHIVE_ERROR_CODES json # So, we take the exit code from kwargs which is a string pattern # take its string value, and match both on int and str data-types # in our records val = kwargs[scope_key] if hasattr(val, "pattern"): val = val.pattern val2 = val if PAT_INT.match(val): val2 = int(val) filters[scope_key] = { '$match': { '$or' : [ {'scope.' + scope_key: val}, {'scope.' + scope_key: val2} ] } } else: filters[scope_key] = { '$match': { 'scope.' + scope_key: { '$regex': kwargs[scope_key] }, } } scope += filters.values() # Collect suggestions collected_suggestions = { scope_key: map(lambda d: d['_id'], get_aggregation_result(performance_data.aggregate(timeframe_scope + [ f for k, f in filters.iteritems() if k != scope_key ] + [ { '$group': { '_id': '$scope.' + scope_key, }, }, ]))) for scope_key in suggestions } # convert all exitCodes in suggestions to unique string to have them on web UI data = collected_suggestions.get('exitCode', []) collected_suggestions['exitCode'] = list(set([str(c) for c in data])) # Collect visualizations visualizations = {} ISO_DATE_FORMAT = "%Y-%m-%dT%H:%M:%S.%LZ" for metric in metrics: visualizations[metric] = {} aggregation_key = 'performance.' + metric if metric == 'data.events': aggregation_key = 'events' elif metric == 'data.exitcodes': aggregation_key = 'exitcodes' elif metric == 'jobstate': aggregation_key = 'count' for axis in axes: if axis == '_summary': group_id = None label = axis if axis == 'time': group_id = { 'start_date': '$scope.start_date', 'end_date': '$scope.end_date' } label = { 'start_date': { '$dateToString': { 'format': ISO_DATE_FORMAT, 'date': '$_id.start_date' } }, 'end_date': { '$dateToString': { 'format': ISO_DATE_FORMAT, 'date': '$_id.end_date' } }, } else: group_id = '$scope.' + axis label = '$_id' if metric == 'jobstate': query = scope + [ { '$group': { '_id': { 'axis': group_id, 'jobstate': '$scope.jobstate' }, 'count': { '$sum': '$' + aggregation_key } } }, { '$group': { '_id': '$_id.axis', 'jobstates': { '$push': { 'jobstate': '$_id.jobstate', 'count': '$count' } } } }, { '$project': { '_id': False, 'label': label, 'jobstates': '$jobstates', } } ] aggregation_result = get_aggregation_result(performance_data.aggregate(query)) elif metric == 'data.exitcodes': if aggregation_key == 'exitcodes': if not 'exit' in filters.keys(): scope += [{'$match': {"scope.exitCode":{'$ne':None}}}] if group_id == None or group_id == '$scope._summary' or isinstance(group_id, dict): sdate = { '$dateToString' : {'format': ISO_DATE_FORMAT, 'date':"$scope.start_date"} } edate = { '$dateToString' : {'format': ISO_DATE_FORMAT, 'date':"$scope.end_date"} } eid = {'start_date':'$sdate', 'end_date':'$edate'} else: sdate = group_id edate = group_id eid = '$sdate' query = scope + [ { '$project' : { 'sdate': sdate, 'edate': edate, 'dcode': '$exitCode', '_id':False, } }, { '$group' : { '_id': eid, 'average': {'$sum': 1}, 'count':{'$sum':1}, } }, { '$sort':{'_id':1} }, { '$project' : { '_id': False, 'label': '$_id', 'average': '$average', 'count': '$count', } } ] aggregation_result = get_aggregation_result(performance_data.aggregate(query)) else: key = '%s' % aggregation_key scope += [{"$match": {key:{"$gte":0}}}] query = scope + [ { '$group': { '_id': group_id, 'average': { '$avg': '$' + aggregation_key }, 'count': { '$sum': '$' + aggregation_key + '_N' }, } }, { '$project': { '_id': False, 'label': label, 'average': '$average', 'count': '$count', } } ] aggregation_result = get_aggregation_result(performance_data.aggregate(query)) if verbose: print("### metric", metric) print("### query", query) print("### result", len(aggregation_result)) if verbose>1: for row in aggregation_result: print(row) if axis == '_summary': aggregation_result = aggregation_result[0] if aggregation_result else None visualizations[metric][axis] = aggregation_result query = scope + [ { '$group': { '_id': None, 'count': { '$sum': '$count' }, 'start_date': { '$min': '$scope.start_date' }, 'end_date': { '$max': '$scope.end_date' }, }, }, { '$project': { '_id': False, 'totalMatchedJobs': '$count', 'start_date': { '$dateToString': { 'format': ISO_DATE_FORMAT, 'date': '$start_date' } }, 'end_date': { '$dateToString': { 'format': ISO_DATE_FORMAT, 'date': '$end_date' } }, } }] res = get_aggregation_result(performance_data.aggregate(query)) status = (res or [ {} ])[0] if verbose: print("### query", query) print("### status", status) status["time"] = time.time() - start_time query = [ { '$group': { '_id': None, 'min_date': { '$min': '$scope.start_date' }, 'max_date': { '$max': '$scope.end_date' }, }, }, { '$project': { '_id': False, 'min_date': { '$dateToString': { 'format': ISO_DATE_FORMAT, 'date': '$min_date' } }, 'max_date': { '$dateToString': { 'format': ISO_DATE_FORMAT, 'date': '$max_date' } }, } }] res = get_aggregation_result(performance_data.aggregate(query)) status.update((res or [ {} ])[0]) if verbose: print("### query", query) print("### status", status) # Collect supplementary data supplementaryData = {} if "exitCode" in axes + suggestions: supplementaryData["exitCodes"] = self.codes if len(metrics) == 0: supplementaryData["metrics"] = self.metrics output = { "status": status, "suggestions": collected_suggestions, "visualizations": visualizations, "supplementaryData": supplementaryData, } if verbose>1: print("### output", json.dumps(output)) return output
class MongoDatabaseExtAdapter(StorageAdapter): """ The MongoDatabaseAdapter is an interface that allows ChatterBot to store statements in a MongoDB database. :keyword database: The name of the database you wish to connect to. :type database: str .. code-block:: python database='chatterbot-database' :keyword database_uri: The URI of a remote instance of MongoDB. :type database_uri: str .. code-block:: python database_uri='mongodb://example.com:8100/' """ def __init__(self, **kwargs): super(MongoDatabaseExtAdapter, self).__init__(**kwargs) from pymongo import MongoClient from pymongo.errors import OperationFailure self.database_name = self.kwargs.get( 'database', 'chatterbot-database' ) self.database_uri = self.kwargs.get( 'database_uri', 'mongodb://localhost:27017/' ) # Use the default host and port self.client = MongoClient(self.database_uri) # Increase the sort buffer to 42M if possible try: self.client.admin.command({'setParameter': 1, 'internalQueryExecMaxBlockingSortBytes': 44040192}) except OperationFailure: pass # Specify the name of the database self.database = self.client[self.database_name] # The mongo collection of statement documents # self.statements = self.database['statements'] # '程序员涤生' 改为根据传递的参数使用不同的集合 self.statements = self.database[self.kwargs.get('ext_collection_name')] # The mongo collection of conversation documents self.conversations = self.database['conversations'] # Set a requirement for the text attribute to be unique self.statements.create_index('text', unique=True) self.base_query = Query() def get_statement_model(self): """ Return the class for the statement model. """ from chatterbot.conversation import Statement # Create a storage-aware statement statement = Statement statement.storage = self return statement def get_response_model(self): """ Return the class for the response model. """ from chatterbot.conversation import Response # Create a storage-aware response response = Response response.storage = self return response def count(self): return self.statements.count() def find(self, statement_text): Statement = self.get_model('statement') query = self.base_query.statement_text_equals(statement_text) values = self.statements.find_one(query.value()) if not values: return None del values['text'] # Build the objects for the response list values['in_response_to'] = self.deserialize_responses( values.get('in_response_to', []) ) return Statement(statement_text, **values) def deserialize_responses(self, response_list): """ Takes the list of response items and returns the list converted to Response objects. """ Statement = self.get_model('statement') Response = self.get_model('response') proxy_statement = Statement('') for response in response_list: text = response['text'] del response['text'] proxy_statement.add_response( Response(text, **response) ) return proxy_statement.in_response_to def mongo_to_object(self, statement_data): """ Return Statement object when given data returned from Mongo DB. """ Statement = self.get_model('statement') statement_text = statement_data['text'] del statement_data['text'] statement_data['in_response_to'] = self.deserialize_responses( statement_data.get('in_response_to', []) ) return Statement(statement_text, **statement_data) def filter(self, **kwargs): """ Returns a list of statements in the database that match the parameters specified. """ import pymongo query = self.base_query order_by = kwargs.pop('order_by', None) # Convert Response objects to data if 'in_response_to' in kwargs: serialized_responses = [] for response in kwargs['in_response_to']: serialized_responses.append({'text': response}) query = query.statement_response_list_equals(serialized_responses) del kwargs['in_response_to'] if 'in_response_to__contains' in kwargs: query = query.statement_response_list_contains( kwargs['in_response_to__contains'] ) del kwargs['in_response_to__contains'] query = query.raw(kwargs) matches = self.statements.find(query.value()) if order_by: direction = pymongo.ASCENDING # Sort so that newer datetimes appear first if order_by == 'created_at': direction = pymongo.DESCENDING matches = matches.sort(order_by, direction) results = [] for match in list(matches): results.append(self.mongo_to_object(match)) return results # 扩展自Statement对象的serialize方法,原因是原方法没有加入created_at属性 def serialize(self, statement): """ :returns: A dictionary representation of the statement object. :rtype: dict """ data = {} data['text'] = statement.text # 对statement实例添加created_at是因为Statement实例化时没创建created_at statement.created_at = datetime.now() data['created_at'] = statement.created_at.isoformat() data['in_response_to'] = [] data['extra_data'] = statement.extra_data for response in statement.in_response_to: data['in_response_to'].append(response.serialize()) return data def update(self, statement): from pymongo import UpdateOne from pymongo.errors import BulkWriteError # data = statement.serialize() # '程序员涤生' data = self.serialize(statement) operations = [] # '程序员涤生' 加入下面的if逻辑可以解决在遇到添加不同问题,相同答案的时候,之前的问题答案对关系会被覆盖的bug statement_in_db = self.find(statement.text) if statement_in_db: statement_in_db.in_response_to.extend(statement.in_response_to) data = self.serialize(statement_in_db) update_operation = UpdateOne( {'text': statement.text}, {'$set': data}, upsert=True ) operations.append(update_operation) # Make sure that an entry for each response is saved for response_dict in data.get('in_response_to', []): response_text = response_dict.get('text') # $setOnInsert does nothing if the document is not created update_operation = UpdateOne( {'text': response_text}, {'$set': response_dict}, upsert=True ) operations.append(update_operation) try: self.statements.bulk_write(operations, ordered=False) except BulkWriteError as bwe: # Log the details of a bulk write error self.logger.error(str(bwe.details)) return statement def create_conversation(self): """ Create a new conversation. """ conversation_id = self.conversations.insert_one({}).inserted_id return conversation_id def get_latest_response(self, conversation_id): """ Returns the latest response in a conversation if it exists. Returns None if a matching conversation cannot be found. """ from pymongo import DESCENDING statements = list(self.statements.find({ 'conversations.id': conversation_id }).sort('conversations.created_at', DESCENDING)) if not statements: return None return self.mongo_to_object(statements[-2]) def add_to_conversation(self, conversation_id, statement, response): """ Add the statement and response to the conversation. """ from datetime import datetime, timedelta self.statements.update_one( { 'text': statement.text }, { '$push': { 'conversations': { 'id': conversation_id, 'created_at': datetime.utcnow() } } } ) self.statements.update_one( { 'text': response.text }, { '$push': { 'conversations': { 'id': conversation_id, # Force the response to be at least one millisecond after the input statement 'created_at': datetime.utcnow() + timedelta(milliseconds=1) } } } ) def get_random(self): """ Returns a random statement from the database """ from random import randint count = self.count() if count < 1: raise self.EmptyDatabaseException() random_integer = randint(0, count - 1) statements = self.statements.find().limit(1).skip(random_integer) return self.mongo_to_object(list(statements)[0]) def remove(self, statement_text): """ Removes the statement that matches the input text. Removes any responses from statements if the response text matches the input text. """ for statement in self.filter(in_response_to__contains=statement_text): statement.remove_response(statement_text) self.update(statement) self.statements.delete_one({'text': statement_text}) def get_response_statements(self): """ Return only statements that are in response to another statement. A statement must exist which lists the closest matching statement in the in_response_to field. Otherwise, the logic adapter may find a closest matching statement that does not have a known response. """ # '程序员涤生' 原来的逻辑是根据in_response_to字段来判断是否是问题,如果一个句子出现在了其他句子的in_response_to字段中,那么该句子可以做为问题, # 因此需要先查出in_response_to字段中的text,然后查出在这些text集合中的句子,做为问题,这样的效率非常慢, # 通过在句子中加入Q和A标记,我们可以利用正则来直接匹配出表示问题的句子, # 并且我们只返回text字段,大大提升了查询的效率。 pattern = re.compile('^Q ') regex = Regex.from_native(pattern) # response_query = self.statements.find({'text': 'Q 今天天气怎么样?'}, {'text': 1}) response_query = self.statements.find({'text': {'$regex': regex}}, {'text': 1}) statement_objects = [] statement_vec = [] import datetime as dt starttime2 = dt.datetime.now() for r in response_query: try: # 此处考虑直接使用text对应的向量,从系统启动时就构建好的text-vec索引文件中获取 text_vec_indx = IntentClassifier().text_vec_indx vec = text_vec_indx.get(r['text'],None) if vec is not None: # 注意:下面这两个数组一定要保证长度一样,否则计算相似度的时候根据索引来取原文本会出先位置偏移,导致无法获取正确的答案!! statement_vec.append(vec) statement_objects.append(self.mongo_to_object({'text': r['text']})) except Exception as e: logging.warning("出现异常%s,问题句子为:%s", str(e), r['text']) endtime2 = dt.datetime.now() logging.debug("===========get_response_statements的for循环构造向量耗时: %s秒", (endtime2 - starttime2).seconds) return statement_objects, statement_vec def drop(self): """ Remove the database. """ self.client.drop_database(self.database_name)
#!/usr/bin/env python import re from pymongo import MongoClient FIELD_NAMES = ('termnumber lastname firstname ' 'birthdate ' 'deathdate birthplace birthstate ' 'termstartdate ' 'termenddate ' 'party').split() # <1> mc = MongoClient() # <2> try: mc.drop_database("presidents") # <3> except: pass db = mc["presidents"] # <4> coll = db.presidents # <5> with open('../DATA/presidents.txt') as PRES: # <6> for line in PRES: flds = line[:-1].split(':') kvpairs = zip(FIELD_NAMES, flds) record_dict = dict(kvpairs) coll.insert(record_dict) # <7> print(db.collection_names()) # <8> print()
def cuckoo_clean(): """Clean up cuckoo setup. It deletes logs, all stored data from file system and configured databases (SQL and MongoDB). """ # Init logging. # This need to init a console logger handler, because the standard # logger (init_logging()) logs to a file which will be deleted. create_structure() init_console_logging() # Initialize the database connection. try: db = Database(schema_check=False) except CuckooDatabaseError as e: # If something is screwed due to incorrect database migrations or bad # database SqlAlchemy would be unable to connect and operate. log.warning( "Error connecting to database: it is suggested to check " "the connectivity, apply all migrations if needed or purge " "it manually. Error description: %s", e) else: # Drop all tables. db.drop() # Check if MongoDB reporting is enabled and drop that if it is. cfg = Config("reporting") if cfg.mongodb and cfg.mongodb.enabled: from pymongo import MongoClient host = cfg.mongodb.get("host", "127.0.0.1") port = cfg.mongodb.get("port", 27017) mdb = cfg.mongodb.get("db", "cuckoo") try: conn = MongoClient(host, port) conn.drop_database(mdb) conn.close() except: log.warning("Unable to drop MongoDB database: %s", mdb) # Paths to clean. paths = [ os.path.join(CUCKOO_ROOT, "db"), os.path.join(CUCKOO_ROOT, "log"), os.path.join(CUCKOO_ROOT, "storage"), ] # Delete various directories. for path in paths: if os.path.isdir(path): try: shutil.rmtree(path) except (IOError, OSError) as e: log.warning("Error removing directory %s: %s", path, e) # Delete all compiled Python objects ("*.pyc"). for dirpath, dirnames, filenames in os.walk(CUCKOO_ROOT): for fname in filenames: if not fname.endswith(".pyc"): continue # We don't want to delete the Android's Agent .pyc files (as we # don't ship the original .py files and thus they're critical). if "agent/android/python_agent" in dirpath.replace("\\", "/"): continue path = os.path.join(dirpath, fname) try: os.unlink(path) except (IOError, OSError) as e: log.warning("Error removing file %s: %s", path, e)
print(output) print("\n") def show_rentals(product_id): customers = {} for rental in db.rental.find({"product_id": product_id}): customer = db.customers.find_one({"user_id": rental["user_id"]}) customers[customer["user_id"]] = { "name": customer["name"], "address": customer["address"], "phone_number": customer["phone_number"], "email": customer["email"] } print("Rentals for product_id:" + product_id + ":") print(customers) print("\n") import_data("data", "product.csv", "customer.csv", "rental.csv") show_available_products() show_rentals('prd006') show_rentals('prd002') mongo.drop_database("assignment")
from pymongo import MongoClient uri = 'mongodb://*****:*****@ds155164.mlab.com:55164/ubeer' client = MongoClient(uri, retryWrites=False) mydb = client["ubeer"] client.drop_database(mydb)
class TestRootLoggerHandler(unittest.TestCase): """ Test Handler attached to RootLogger """ def setUp(self): """ Create an empty database that could be used for logging """ self.db_name = '_mongolog_test' self.collection_name = 'log' self.conn = Connection() self.db = self.conn[self.db_name] self.collection = self.db[self.collection_name] self.conn.drop_database(self.db_name) def tearDown(self): """ Drop used database """ self.conn.drop_database(self.db_name) def testLogging(self): """ Simple logging example """ log = logging.getLogger('log') log.setLevel(logging.DEBUG) log.addHandler(MongoHandler(self.collection_name, self.db_name)) log.debug('test') r = self.collection.find_one({'levelname': 'DEBUG', 'msg': 'test'}) self.assertEqual(r['msg'], 'test') def testLoggingException(self): """ Logging example with exception """ log = logging.getLogger('exception') log.setLevel(logging.DEBUG) log.addHandler(MongoHandler(self.collection_name, self.db_name)) try: 1 / 0 except ZeroDivisionError: log.error('test zero division', exc_info=True) r = self.collection.find_one({ 'levelname': 'ERROR', 'msg': 'test zero division' }) self.assertTrue(r['exc_info'].startswith('Traceback')) def testQueryableMessages(self): """ Logging example with dictionary """ log = logging.getLogger('query') log.setLevel(logging.DEBUG) log.addHandler(MongoHandler(self.collection_name, self.db_name)) log.info({'address': '340 N 12th St', 'state': 'PA', 'country': 'US'}) log.info({'address': '340 S 12th St', 'state': 'PA', 'country': 'US'}) log.info({'address': '1234 Market St', 'state': 'PA', 'country': 'US'}) cursor = self.collection.find({ 'levelname': 'INFO', 'msg.address': '340 N 12th St' }) self.assertEqual( cursor.count(), 1, "Expected query to return 1 " "message; it returned %d" % cursor.count()) self.assertEqual(cursor[0]['msg']['address'], '340 N 12th St') cursor = self.collection.find({'levelname': 'INFO', 'msg.state': 'PA'}) self.assertEqual(cursor.count(), 3, "Didn't find all three documents")