def test_update_content(app): # Setup a content document input_doc = { "data": { "id": "test_id", "url": "some test url 123", "content": { "title": "some_test_title", "keywords": [{ "keyword": "change_from_test", "confidence": 0.2010 }], "texts": ["some test text"] } } } prod_col = Config.get_mongo_collection("prod") manual_col = Config.get_mongo_collection("manual") factory.post_document(input_doc["data"].copy(), prod_col) try: # Make a change new_title = "title has been changed" input_doc["data"]["content"]["title"] = new_title response = app.test_client().post('/v1/web/content/', data=json.dumps(input_doc)) response = app.test_client().get('/v1/web/content/?id=test_id') response_doc = json.loads(response.data.decode()) print(response_doc) assert response_doc["manual"]["title"] == new_title finally: # Delete test content factory.delete_document({"id": "test_id"}, manual_col) factory.delete_document({"id": "test_id"}, prod_col)
def test_get_docs_from_url(app): # Setup a content document input_doc = { "data": { "id": "test_id_for_url", "url": "some test url", "content": { "title": "some_test_title", "keywords": [{ "keyword": "change_from_test", "confidence": 0.2010 }], "texts": ["some test text"] } } } prod_col = Config.get_mongo_collection("prod") factory.post_document(input_doc["data"].copy(), prod_col) try: response = app.test_client().get('/v1/web/docs/?url=some test url') response_json = json.loads(response.data.decode()) assert response_json[0]["id"] == "test_id_for_url" finally: factory.delete_document({"id": "test_id_for_url"}, prod_col)
def test_get_all_conflicts(app): # Setup two conflicts conflicts = [{ "conflict_id": "test_conflict_id_{}".format(i), "title": "test_conflict_title_{}".format(i) } for i in range(2)] conflict_col = Config.get_mongo_collection("conflicts") # Post both focuments to conflict_ids for conflict in conflicts: factory.post_document(conflict, conflict_col) response = app.test_client().get('/v1/web/conflict_ids') try: response = app.test_client().get('/v1/web/conflict_ids') response_json = json.loads(response.data.decode()) for conflict in conflicts: assert conflict["conflict_id"] in [ resp["id"] for resp in response_json ] finally: # Delete test conflits for conflict in conflicts: factory.delete_document({"conflict_id": conflict["conflict_id"]}, conflict_col)
def check_manually_changed(factory, document): if document["manually_changed"]: id = document["id"] manual_col = Config.get_mongo_collection("manual") return next( factory.get_database().get_collection(manual_col).find({"id": id}), None) else: return document
def get_document(self, query, prod_col=Config.get_mongo_collection("prod"), manual_col=Config.get_mongo_collection("manual"), number_of_docs=30): """ Searches for documents using MongoDB in a given document collection. Get 15 results from prod. Get 15 from Manual. Go through every doc in prod and delete the ones with manually_changed=true. Then return every remaining document, remember it's not sorted now, but for what we need it for this is not necessary. """ prod_col = self.get_collection(prod_col) cursor = prod_col.find({'$text': { '$search': query }}, {'score': { '$meta': 'textScore' }}) # Sort and retrieve some of the top scoring documents. cursor.sort([('score', {'$meta': 'textScore'})]).limit(number_of_docs) docs = [] for doc in cursor: if doc["manually_changed"] is False: docs.append(doc) manual_col = self.get_collection(manual_col) cursor = manual_col.find({'$text': { '$search': query }}, {'score': { '$meta': 'textScore' }}) # Sort and retrieve some of the top scoring documents. cursor.sort([('score', {'$meta': 'textScore'})]).limit(number_of_docs) for doc in cursor: docs.append(doc) return docs
def _handle_not_found(query_text): ''' Inserts this specific query text into the unknown queries collection as well as returning a fallback string. ''' try: unknown_col = Config.get_mongo_collection("unknown") factory.get_database().get_collection(unknown_col).insert_one( {"query_text": query_text}) except pymongo.errors.DuplicateKeyError: # If we already have this specific query in the unknown_queries # collection we don't need to add it again. pass return NOT_FOUND
def test_get_content(app): # Setup a content document document = { "id": "test_content_id", "content": "some_test_content", "url": "test_url" } prod_col = Config.get_mongo_collection("prod") factory.post_document(document, prod_col) try: url = "/v1/web/content/?id=test_content_id" response = app.test_client().get(url) response_json = json.loads(response.data.decode()) assert response_json["prod"] == "some_test_content" finally: # Delete test content factory.delete_document({"id": "test_content_id"}, prod_col)
def insert_documents(data): """ Insert all provided documents. Checks if the document has been manually changed before - if it has, and the new document does not match, it is marked as a conflict """ factory = ModelFactory.get_instance() factory.set_db() temp_col = Config.get_mongo_collection("temp_scraped") manual_col = Config.get_mongo_collection("manual") unknown_col = Config.get_mongo_collection("unknown") prod_col = Config.get_mongo_collection("prod") conflict_col = Config.get_mongo_collection("conflicts") print("Starting insertion of {} documents".format(len(data))) pbar = ProgressBar() for i, doc in enumerate(pbar(data)): factory.post_document(doc, temp_col) print("Successfully inserted {} documents".format(i + 1)) manual_docs = factory.get_collection(manual_col).find() conflicts = [] for manual_doc in manual_docs: if "id" in manual_doc: idx = manual_doc["id"] else: continue # Mark corresponding entry in temp collection as manually changed factory.get_database() \ .get_collection(temp_col) \ .update_one({"id": idx}, {"$set": {"manually_changed": True}}) prod_doc = next(factory.get_collection(prod_col).find({"id": idx}), None) temp_doc = next(factory.get_collection(temp_col).find({"id": idx}), None) if prod_doc and temp_doc: if not temp_doc["content"] == prod_doc["content"]: title = temp_doc["content"]["title"] conflicts.append({"id": idx, "title": title}) print("Conflicts: {}".format(conflicts)) factory.get_collection(conflict_col).create_index([("title", 1)], unique=True) for conflict in conflicts: try: factory.post_document(conflict, conflict_col) except pymongo.errors.DuplicateKeyError: # In case there are dupliacte, unsolved conflicts pass # Update production collection db = factory.get_database() try: db.get_collection(prod_col).rename("old_prod") except pymongo.errors.OperationFailure: # If the prod collection does not exist pass try: db.get_collection(temp_col).rename(prod_col) except Exception as e: print("Failed to update production db collection") print(e) db.get_collection("old_prod").rename(prod_col) finally: db.get_collection("old_prod").drop() db.get_collection(temp_col).drop() # Update all indexes factory.set_index(prod_col) factory.set_index(manual_col) factory.set_index(temp_col) # Removes duplicates factory.get_collection(unknown_col).create_index([("query_text", 1)], unique=True) return conflicts
import pytest import json from chatbot.api import server from chatbot.model.model_factory import ModelFactory from chatbot.util.config_util import Config factory = ModelFactory.get_instance() factory.set_db() prod_col = Config.get_mongo_collection("prod") manual_col = Config.get_mongo_collection("manual") conflict_col = Config.get_mongo_collection("conflicts") unknown_col = Config.get_mongo_collection("unknown") @pytest.fixture(scope='module') def client(): return server.app.test_client() def test_swagger(client): response = client.get('/') assert response.status_code == 200 def test_response(client): query = 'some test response' try: response = client.get('/v2/response/{}/'.format(query)) assert response.status_code == 200