def test_shouldCreateModelWhenNoModelFound(self):
        server = self.stub_http_server
        self.id_list = json.dumps({"documentIds": ["1", "2", "3", "4", "5"]})
        self.clear_model(config("app.model_path"))

        server.response_when(method="GET",
                             path="/documents/tokens",
                             body=self.id_list,
                             response=json.dumps(self.document_response),
                             responseType="application/json")

        response = self.fetch("/tagger/documents",
                              method="POST",
                              body=self.id_list)

        self.assertTrue(
            server.request_received(method="GET",
                                    path="/documents/tokens",
                                    body=self.id_list))
        self.assertTrue(
            server.request_received(method="POST",
                                    path="/documents/logical_topics"))
        self.assertTrue(
            os.path.exists(os.path.join(config("app.model_path"),
                                        "lda.model")))
        self.assertTrue(
            os.path.exists(
                os.path.join(config("app.model_path"), "tokens.dict")))

        self.assertEqual(200, response.code)
        self.assertEqual('success', json.loads(response.body)["status"])
Exemplo n.º 2
0
 def post_document_logical_topics_association(self, document_id, topics,
                                              topics_tokens_map):
     document_topics_mixture = DocumentTopicsMixtureRequest(
         document_id=document_id,
         topics=topics,
         topics_tokens_map=topics_tokens_map)
     post_document_logical_topics_url = config(
         "content_store.host") + config(
             "content_store.post_document_logical_topics")
     response = None
     try:
         logger.info("Posting Document tagged with Logical Topics to %s" %
                     post_document_logical_topics_url)
         response = requests.post(post_document_logical_topics_url,
                                  data=document_topics_mixture.to_json(),
                                  headers=self.HEADERS)
         if response.status_code is not httplib.OK:
             logger.error(response.text)
             raise Exception(self.STATUS_FAILED)
     except requests.ConnectionError as e:
         logger.error(e)
         raise Exception(self.CONNECTION_ERROR)
     logger.info(
         "Successfully posted Logical Topics for Document %s. Server Response: %s"
         % (document_id, response.text))
 def generate_a_model_for_inference(self):
     documents_response_map = [{
         "id":
         "1",
         "tokens": [
             "content", "network", "router", "wifi", "cable", "ethernet",
             "socket", "authentication", "content", "network", "router",
             "wifi", "cable", "ethernet", "socket", "authentication"
         ]
     }, {
         "id":
         "2",
         "tokens": ["java", "golang", "cool", "awesome"]
     }, {
         "id":
         "3",
         "tokens": ["authentication", "golang", "impossible"]
     }, {
         "id":
         "4",
         "tokens": [
             "network", "tcp", "ftp", "monitor", "reliability", "cable",
             "ethernet", "content", "network", "router", "wifi", "cable",
             "ethernet", "socket", "authentication"
         ]
     }, {
         "id":
         "5",
         "tokens": ["python", "topic", "modelling", "module"]
     }]
     tagger = LDATagger(model_path=config("app.model_path"),
                        num_topics=config("app.max_topics"))
     documents_response = DocumentsResponse(documents_response_map)
     docs_tokens_map = documents_response.to_docs_tokens_map()
     tagger.build_topics(docs_tokens_map.values())
Exemplo n.º 4
0
 def start(self):
     logger.info(
         "Starting Trinity with config at %s with %s sub processes." %
         (self.config_file, config("app.process_count")))
     logger.info("Listening to requests on port %s" % config("app.port"))
     server = HTTPServer(self)
     server.bind(config("app.port"))
     server.start(int(config("app.process_count")))
     IOLoop.instance().start()
    def test_shouldSendTopicToTokensAssociationToContentStore(self):
        server = self.stub_http_server
        self.id_list = json.dumps({"documentIds": ["1", "2", "3", "4", "5"]})
        self.clear_model(config("app.model_path"))

        server.response_when(method="GET",
                             path="/documents/tokens",
                             body=self.id_list,
                             response=json.dumps(self.document_response),
                             responseType="application/json")

        response = self.fetch("/tagger/documents",
                              method="POST",
                              body=self.id_list)

        self.assertTrue(
            server.request_received(method="GET",
                                    path="/documents/tokens",
                                    body=self.id_list))
        self.assertTrue(
            server.request_received(method="POST",
                                    path="/documents/logical_topics"))

        self.assertEqual(200, response.code)
        self.assertEqual('success', json.loads(response.body)["status"])
    def test_shouldSendfailureResponseWhenDocumentsFetchFails(self):
        server = self.stub_http_server
        self.id_list = json.dumps({"documentIds": ["1", "2", "3", "4", "5"]})
        self.clear_model(config("app.model_path"))

        server.response_when(method="GET",
                             path="/documents/tokens",
                             body=self.id_list,
                             response="[]",
                             responseType="application/json",
                             status_code=500)

        response = self.fetch("/tagger/documents",
                              method="POST",
                              body=self.id_list)

        self.assertTrue(
            server.request_received(method="GET",
                                    path="/documents/tokens",
                                    body=self.id_list))
        self.assertFalse(
            server.request_received(method="POST",
                                    path="/documents/logical_topics"))

        self.assertEqual(500, response.code)
        self.assertEqual('failure', json.loads(response.body)["status"])
Exemplo n.º 7
0
    def post(self):

        document_id = json.loads(self.request.body)["documentId"]
        logger.info("Request to infer topics for document %s received" %
                    document_id)

        try:
            document_response = self.content_store_service.fetch_document(
                document_id)
        except Exception as e:
            document_fetch_error = "unable to fetch Document for Tagging"
            logger.info(e)
            logger.error("%s for Id %s" % (document_fetch_error, document_id))
            return self.error_response(document_fetch_error)

        model_path = config("app.model_path", LDATagger.DEFAULT_MODEL_PATH)

        logger.info("Model Path: %s" % (model_path))
        logger.info("Inferring topics and tags for %s" % document_id)

        result = self.document_processor.infer(document_response,
                                               os.path.abspath(model_path))

        logger.info("Topics and tags for %s successfully generated" %
                    document_id)

        if not result.is_success():
            self.set_status(httplib.INTERNAL_SERVER_ERROR)

        self.write(result.to_json())
        self.set_header("Content-Type", "application/json")
Exemplo n.º 8
0
    def infer(self, document_response, tagger_model_path):
        tagger = LDATagger(tagger_model_path,
                           num_topics=config("app.max_topics"))

        topics = tagger.topics_for_document(document_response.tokens())
        topics_tokens_map = tagger.topics_to_tokens()
        tags = TagGenerator(topics_tokens_map).generate_tags(
            topics=topics, tokens=document_response.tokens())

        try:
            self.content_store_service.post_document_logical_topics_association(
                document_response.document_id(), topics, topics_tokens_map)
        except Exception as e:
            document_topic_error_msg = "Error updating content store for document with logical topics"
            logger.error(e)
            return self.error_response(document_topic_error_msg)

        try:
            self.content_store_service.post_document_tags_association(
                document_response.document_id(), tags)
        except Exception as e:
            doc_tags_error_msg = "Error updating content store with Document Tags"
            logger.error(e)
            return self.error_response(doc_tags_error_msg)

        return Response(status="success", message="Process Complete")
Exemplo n.º 9
0
    def process(self, docs_tokens_map, tagger_model_path):
        tagger = LDATagger(tagger_model_path,
                           num_topics=config("app.max_topics"))

        tagger.build_or_update_model(docs_tokens_map.values())
        docs_topics_map = tagger.topics_for_documents(docs_tokens_map)
        topics_tokens_map = tagger.topics_to_tokens()
        docs_tags_map = TagGenerator(
            topics_tokens_map).generate_documents_tag_map(
                documents_tokens_map=docs_tokens_map,
                documents_topics_map=docs_topics_map)

        try:
            self.content_store_service.post_documents_logical_topics_associations(
                docs_topics_map, topics_tokens_map)
        except Exception as e:
            document_topic_error_msg = "Error updating content store for documents with logical topics"
            logger.error(e)
            return self.error_response(document_topic_error_msg)

        try:
            self.content_store_service.post_documents_tags_associations(
                docs_tags_map)
        except Exception as e:
            docs_tags_error_msg = "Error updating content store with Documents' Tags"
            logger.error(e)
            return self.error_response(docs_tags_error_msg)

        return Response(status="success", message="Process Complete")
Exemplo n.º 10
0
 def fetch_document(self, document_id):
     get_doc_url = config("content_store.host"
                          ) + config("content_store.get_doc") % document_id
     response = None
     try:
         logger.info("Fetching %s Document to Tag from %s" %
                     (document_id, get_doc_url))
         response = requests.get(get_doc_url, headers=self.HEADERS)
         if response.status_code is not httplib.OK:
             logger.error(response.text)
             raise Exception(self.STATUS_FAILED)
     except requests.ConnectionError as e:
         logger.error(e)
         raise Exception(self.CONNECTION_ERROR)
     logger.info("Received Tokenised Document for tagging with %s tokens." %
                 len(json.loads(response.text)["tokens"]))
     logger.debug(
         "Received Tokenised Document for tagging. Server Response %s" %
         response.text)
     return DocumentResponse(response.json())
Exemplo n.º 11
0
 def fetch_documents(self, documents_request):
     get_docs_url = config("content_store.host") + config(
         "content_store.get_docs")
     response = None
     try:
         logger.info("Fetching %s Documents to Tag from %s" %
                     (documents_request, get_docs_url))
         response = requests.get(get_docs_url,
                                 data=documents_request,
                                 headers=self.HEADERS)
         if response.status_code is not httplib.OK:
             logger.error(response.text)
             raise Exception(self.STATUS_FAILED)
     except requests.ConnectionError as e:
         logger.error(e)
         raise Exception(self.CONNECTION_ERROR)
     logger.debug(
         "Received Tokenised Documents for tagging. Server Response %s" %
         response.text)
     return DocumentsResponse(response.json())
Exemplo n.º 12
0
 def post_document_tags_association(self, document_id, tags):
     document_tags_request = DocumentTagsRequest(document_id, tags)
     post_document_tags_url = config("content_store.host") + config(
         "content_store.post_document_tags")
     response = None
     try:
         logger.info("Posting Document-Tags association to %s" %
                     post_document_tags_url)
         response = requests.post(post_document_tags_url,
                                  data=document_tags_request.to_json(),
                                  headers=self.HEADERS)
         if response.status_code is not httplib.OK:
             logger.error(response.text)
             raise Exception(self.STATUS_FAILED)
     except requests.ConnectionError as e:
         logger.error(e)
         raise Exception(self.CONNECTION_ERROR)
     logger.info(
         "Successfully posted Document-Tags association. Server Response: %s"
         % response.text)
Exemplo n.º 13
0
def allStatuses():
    dependencies = []
    dependencies.append(
        restStatus("ContentStore HTTP connection",
                   config("content_store.host") + "/diagnostics/humans.txt"))
    return json.dumps({
        "dependencies":
        dependencies,
        "status":
        reduce(
            lambda current_status, dependency: "yellow"
            if dependency["status"] == "red" else current_status, dependencies,
            "green")
    })
Exemplo n.º 14
0
    def post(self):
        list_of_ids = self.request.body
        logger.info("Request to model topics for documents %s received" % str(list_of_ids))

        try:
            documents_response = self.content_store_service.fetch_documents(list_of_ids)
        except Exception as e:
            document_fetch_error = "unable to fetch Documents for Tagging"
            logger.info(e)
            logger.error("%s for Id %s" % (str(document_fetch_error), str(list_of_ids)))

            return self.error_response(document_fetch_error)

        model_path = config("app.model_path", LDATagger.DEFAULT_MODEL_PATH)
        logger.info("Model Path: %s" % (model_path))
        docs_tokens_map = documents_response.to_docs_tokens_map()
        result = self.processor.process(docs_tokens_map, os.path.abspath(model_path))

        if not result.is_success():
            self.set_status(httplib.INTERNAL_SERVER_ERROR)

        self.write(result.to_json())
        self.set_header("Content-Type", "application/json")
Exemplo n.º 15
0
 def tearDown(self):
     super(InferTaggingIntegrationTest, self).tearDown()
     self.clear_model(config("app.model_path"))
Exemplo n.º 16
0
    def setUp(self):
        super(InferTaggingIntegrationTest, self).setUp()
        self.clear_model(config("app.model_path"))

        self.generate_a_model_for_inference()
        self.stub_http_server.reset()