Exemplo n.º 1
0
def uncache_project_if_exists(serviceid):
    model_store = get_model_store()
    manager = ProjectManager()
    query = {"serviceid": serviceid}
    document = manager.find_model(query)
    model_lists = model_store.get_active_models()
    if document is not None:
        engine = get(document, "engine", "ICE")
    else:
        engine = "ICE"

    model_name = get_model_name(serviceid, MODEL_TYPE_NER, engine)
    if model_name in model_lists:
        model_store.unpublish(serviceid, MODEL_TYPE_NER, engine)

    model_name = get_model_name(serviceid, MODEL_TYPE_IR, engine=None)
    if model_name in model_lists:
        model_store.unpublish(serviceid, MODEL_TYPE_IR, engine=None)

    # delete model file
    model_store_path = os.path.join(home_dir + os.sep + '.verbis/store')

    if os.path.exists(os.path.join(model_store_path + os.sep + serviceid)):
        shutil.rmtree(os.path.join(model_store_path + os.sep + serviceid))

    logger.info("uncache_project_if_exists done.")
Exemplo n.º 2
0
def updateStatus(serviceid, train_ner, train_ir):
    manager = ProjectManager()
    query = {"serviceid": serviceid}
    config = manager.find_model(query)
    if config is not None:
        if train_ner is True:
            document = {
                "$set": {
                    "ner.status":
                    ProjectManager.STATUS_HOLD,
                    "ner.status_message":
                    "Awaiting the completion of entity training."
                }
            }
            manager.update_config(query, document)
        if train_ir is True:
            document = {
                "$set": {
                    "ir.status":
                    ProjectManager.STATUS_HOLD,
                    "ir.status_message":
                    "Awaiting the completion of intent training."
                }
            }
            manager.update_config(query, document)
Exemplo n.º 3
0
    def on_post(self, req, resp):
        doc = req.context['doc'] or {}

        try:
            home_dir = os.path.expanduser('~')
            model_store_path = os.path.join(home_dir + os.sep +
                                            '.verbis/store')
            model_store = get_model_store()
            manager = ProjectManager()
            serviceid = doc['serviceid']
            query = {'serviceid': serviceid}
            data = manager.find_model(query)
            engine = get(data, "engine", "ICE")
            model_name = get_model_name(serviceid, MODEL_TYPE_NER, engine)

            model_lists = model_store.get_active_models()
            logger.info("model lists -  %s" % model_lists)
            if model_name in model_lists:
                model_store.unpublish(serviceid, MODEL_TYPE_NER, engine)

            model_name = get_model_name(serviceid, MODEL_TYPE_IR, engine=None)
            logger.info(model_name)
            if model_name in model_lists:
                model_store.unpublish(serviceid, MODEL_TYPE_IR, engine=None)

            logger.info("path is %s" %
                        os.path.join(model_store_path + os.sep + serviceid))
            # delete model file from local
            if os.path.exists(
                    os.path.join(model_store_path + os.sep + serviceid)):
                # print "path exists, so do rm."
                shutil.rmtree(
                    os.path.join(model_store_path + os.sep + serviceid))
                # delete model file from minio
                VerbisStore().remove_models_from_remote(serviceid)
                logger.info("files removed successfully")

            # update DB
            document = {"$set": {"ner.status": 'new', "ir.status": 'new'}}
            manager.update_config(query, document)

        except AssertionError as ae:
            logger.exception(ae, exc_info=True)
            logger.error(traceback.format_exc())
            raise falcon.HTTPPreconditionFailed(
                'Service publish condition failed', traceback.format_exc())
        except Exception as ex:
            logger.exception(ex, exc_info=True)
            logger.error(traceback.format_exc())
            description = 'Internal Server Error, Please try again later'
            raise falcon.HTTPServiceUnavailable('Service Outage', description,
                                                30)

        resp.set_header('X-Powered-By', 'USTGlobal Verbis')
        resp.status = falcon.HTTP_200
Exemplo n.º 4
0
def validate_service_id_and_cache(doc):
    """
    # cache model for respective project if not cached during prediction call.
    :param doc:
    :return:
    """
    manager = ProjectManager()
    query = {"serviceid": get(doc, "serviceid")}
    config = manager.find_model(query)
    if config is None:
        raise Exception("Invalid Service ID.")
    else:
        serviceid_info = get_requested_services(doc)
        cache_model(config, serviceid_info)
    return config
Exemplo n.º 5
0
def get_corenlp_modelname(serviceid):
    manager = ProjectManager()
    query = {"serviceid": serviceid}
    ds = manager.find_model(query)
    return get(ds, "corenlp_model_name", default=None)
Exemplo n.º 6
0
def get_ner_status(serviceid):
    manager = ProjectManager()
    query = {"serviceid": serviceid}
    ds = manager.find_model(query)
    return get(ds, "ner.status", default=[])
Exemplo n.º 7
0
def get_custom_class_name(serviceid):
    manager = ProjectManager()
    query = {"serviceid": serviceid}
    ds = manager.find_model(query)
    return get(ds, "custom_entity_model", default=[])
Exemplo n.º 8
0
    def deploy(self):
        """
        :param doc:
        :param n_test_percent:
        :return:
        """
        manager = ProjectManager()
        query = {
            "serviceid": self.serviceid
        }
        config = manager.find_model(query)
        if config is not None:
            try:
                trained_data, df = get_ir_dataset(self.serviceid, self.logger)
                self.logger.info("Unique labels %s" % np.unique(df.intent.tolist()))

                group = df.groupby(['intent']).agg('count')
                stats = group.reset_index().to_json(orient="records")
                useSelector = False
                if (len(group) > 1):
                    useSelector = True
                self.logger.info(stats)

                document = {
                    "$set": {
                        "ir.status": ProjectManager.STATUS_TRAINING,
                        "ir.status_message": "Intent training is in progress.",
                        "ir.dataset.stats": stats,
                        "ir.last_trained": datetime.datetime.utcnow()
                    }
                }
                manager.update_config(query, document)

                ir = IntentRecognizer(DEFAULT_CONFIG, serviceid=self.serviceid, useSelector=useSelector)
                self.logger.info("Starting fitting for deployment")
                ir.fit(df, df.intent)
                self.logger.info("Fitting for deployment completed")

                VerbisStore().save_ir(ir)
                ###############MINIOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO##################
                # send file to minio server
                # no engine. .dat - extension
                VerbisStore().save_ir_minio(ir)
                document = {
                    "$set": {
                        "utterances": trained_data,
                    }
                }
                data_manager = DatasourceManager()
                data_manager.update_datasource(query, document)

                document = {
                    "$set": {
                        "ir.status": ProjectManager.STATUS_TRAINED,
                        "ir.status_message": "The Intent model has been successfully trained",
                        "ir.logs.deploy": ""
                    }
                }
                manager.update_config(query, document)
            except (RuntimeError, ValueError, Exception) as e:
                self.logger.error(e)
                message = e
                if (e == "After pruning, no terms remain. Try a lower min_df or a higher max_df."
                        or e == "max_df corresponds to < documents than min_df"):
                    message = "Sufficient vocabulary to build the model is not available. Please add more utterances."
                elif e == "Invalid type float for labels":
                    message = "Add more intents for intent training"
                document = {
                    "$set": {
                        "ir.status": ProjectManager.STATUS_TRAINING_FAILED,
                        "ir.status_message": message,
                        "ir.logs.deploy": self.logger.handlers[-1].logs
                    }
                }
                manager.update_config(query, document)
                traceback.print_exc()
        else:
            description = 'Unable to find project_config with given id.' \
                          'Please check your request params and retry'
            self.logger.error(description)
Exemplo n.º 9
0
    def train(self, train_intent):
        """
        :param doc:
        :param n_test_percent:
        :return:
        """
        manager = ProjectManager()
        query = {"serviceid": self.serviceid}
        config = manager.find_model(query)
        if config is not None:
            try:
                document = {
                    "$set": {
                        "ner.status": ProjectManager.STATUS_TRAINING,
                        "ner.status_message":
                        "Entity training is in progress.",
                        "ner.last_trained": datetime.datetime.utcnow()
                    }
                }
                if (train_intent is True):
                    document = {
                        "$set": {
                            "ner.status": ProjectManager.STATUS_TRAINING,
                            "ner.status_message":
                            "Entity training is in progress.",
                            "ir.status": ProjectManager.STATUS_HOLD,
                            "ir.status_message":
                            "Awaiting the completion of entity training.",
                            "ner.last_trained": datetime.datetime.utcnow()
                        }
                    }
                manager.update_config(query, document)

                # starting actual training
                data_manager = DatasourceManager()
                self.logger.info("Starting training of service %s" %
                                 self.serviceid)
                corpus = data_manager.find_model(query)
                custom_entity_model = get(config, "custom_entity_model")
                entity_recognizer = self.instantiate_trainer(
                    custom_entity_model)
                trained_utterances = entity_recognizer.train(corpus)
                if entity_recognizer.get_engine(
                ) not in get_all_corenlp_engines():
                    VerbisStore().save_ner(entity_recognizer,
                                           model_type=MODEL_TYPE_NER)
                ###############MINIOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO##################
                # send file to minio server
                VerbisStore().save_ner_minio(entity_recognizer,
                                             model_type=MODEL_TYPE_NER)
                document = {
                    "$set": {
                        "utterances": trained_utterances,
                    }
                }
                data_manager.update_datasource(query, document)

                document = {
                    "$set": {
                        "ner.status": ProjectManager.STATUS_TRAINED,
                        "ner.status_message":
                        "Entity training completed successfully.",
                        "ner.logs.train": ""
                    }
                }
                manager.update_config(query, document)

                self.logger.info(
                    "Completed training entity recognizer for service %s" %
                    self.serviceid)
            except (RuntimeError, Exception) as ex:
                self.logger.exception(ex, exc_info=True)
                self.logger.error(traceback.format_exc())
                if ex == "Cannot have number of folds n_folds=3 greater than the number of samples: 2.":
                    ex = "Add more utterances for entity training"
                document = {
                    "$set": {
                        "ner.status": ProjectManager.STATUS_TRAINING_FAILED,
                        "ner.status_message": ex,
                        "ner.logs.train": self.logger.handlers[-1].logs
                    }
                }
                manager.update_config(query, document)
        else:
            description = 'Unable to find project_config with given id.' \
                          'Please check your request params and retry'
            self.logger.error(description)