示例#1
0
 def __init__(self):
     self.train_file_location = Config.get_config_val(
         key="flatfile", key_1depth="location") + Config.get_config_val(
             key="flatfile", key_1depth="mongo_train_fileName")
     self.filename = Config.get_config_val(
         key="model", key_1depth="file", key_2depth="location"
     ) + Config.get_config_val(
         key="model", key_1depth="file", key_2depth="response_classifier")
示例#2
0
class CacheService:

    # retrieve caching properties from config
    cache_ttl = int(Config.get_config_val(key="cache", key_1depth="ttl"))
    cache_max_size = int(
        Config.get_config_val(key="cache", key_1depth="max_size"))
    cache = redis.from_url(
        Config.get_config_val(key="cache", key_1depth="redis_url")
    )  # instead use a wrapper and retrieve from os.environ.get("REDIS_URL")

    # create a cache service
    # cache = TTLCache(maxsize=cache_max_size, ttl=cache_ttl)

    @classmethod
    def get_object(cls, key):
        """
        retrieves the object from cache
        :param key:
        :return:
        """
        value = None
        try:
            if cls.cache.exists(key) == 1:
                value = cls.cache.get(key).decode("utf-8")
            else:
                value = None
        except KeyError as ke:
            logger.error("Either key expired or not present : {0}".format(ke))
            value = None
        except Exception as e:
            logger.error(
                "internal exception while using cache : {0}".format(e))
            value = None
        return value

    @classmethod
    def set_object(cls, key, value):
        logger.info("key : {0}, value : {1}".format(key, value))
        cls.cache.set(key, value, ex=cls.cache_ttl)

    @classmethod
    def remove_objects(cls, key):
        try:
            logger.info("removing value from cache for : {0}".format(key))
            cls.cache.delete(key)
        except KeyError as ke:
            logger.error(
                "already removed from cache for key : {0}. Exception is : {1}".
                format(key, ke))
示例#3
0
    def predict_response(cls, token, broker_id, lang, query):
        """
        This returns the prediction for a given query

        :param token: authentication session token
        :param broker_id: broker for which query is requested
        :param lang: language for which model was trained
        :param query: question being asked
        :return: response object
        """
        response = None
        logger.info("***************inside training service retrain*******************")
        try:

            # get user from token
            user = AuthService.get_logged_in_user(token=token)
            if user is not None:
                logger.info(user)
                broker = BrokerDao.get_broker_by_id(broker_id=broker_id)
                if broker is not None:
                    logger.info(broker)
                    model_type = Config.get_config_val(key="model", key_1depth="classifier", key_2depth="model")
                    vector_type = Config.get_config_val(key="model", key_1depth="vectorizer", key_2depth="vector")

                    logger.info(model_type)
                    logger.info(vector_type)

                    classifier_instance = TrainedClassifierDao.get_trained_classifier_obj_from_db(user=user,
                                                                                                  broker=broker,
                                                                                                  model_type=model_type,
                                                                                                  vector_type=vector_type,
                                                                                                  lang=lang)
                    if classifier_instance is not None:
                        response = classifier_instance.predict(user=user, broker=broker, model_type=model_type, vector_type=vector_type, lang=lang, query=query)
                    else:
                        response = "could not find trained_classifier"
                else:
                    response = "Could not find broker"

            else:
                response = "Unauthorized access/ session expired. Please re-login"
        except Exception as e:
            logger.error("error : {0}".format(e))
            response = "Error occurred"

        logger.error(response)

        return response
def main():
    initialize()
    # Read telegram token from config
    telegram_token = Config.get_config_val(key="auth",
                                           key_1depth="telegram",
                                           key_2depth="token")
    # telegram_token = config['telegram-token']
    # Create the Updater and pass it your bot's token.
    # Make sure to set use_context=True to use the new context based callbacks
    # Post version 12 this will no longer be necessary
    updater = Updater(telegram_token, use_context=True)
    dp = updater.dispatcher

    # Add conversation handler with the states GENDER, PHOTO, LOCATION and BIO
    conv_handler = ConversationHandler(
        entry_points=[CommandHandler('start', start)],
        states={
            QUERY: [MessageHandler(Filters.text, query)],
            CANCEL: [CommandHandler('cancel', cancel)],
        },
        fallbacks=[CommandHandler('cancel', cancel)],
        allow_reentry=True)
    dp.add_handler(conv_handler)
    # log all errors
    dp.add_error_handler(error)

    updater.start_polling()
    updater.idle()
示例#5
0
    def __init__(self):

        # column names
        self.col_lang = Config.get_config_val(key="df_columns", key_1depth="col_lang")
        self.col_category = Config.get_config_val(key="df_columns", key_1depth="col_category")
        self.col_query = Config.get_config_val(key="df_columns", key_1depth="col_query")
        self.col_response = Config.get_config_val(key="df_columns", key_1depth="col_response")
        self.col_variables = Config.get_config_val(key="df_columns", key_1depth="col_variables")
        self.col_input_circumstance = Config.get_config_val(key="df_columns", key_1depth="col_input_circumstance")
        self.col_output_circumstance = Config.get_config_val(key="df_columns", key_1depth="col_output_circumstance")
        self.train_file_location = Config.get_config_val(key="flatfile", key_1depth="location") + Config.get_config_val(
            key="flatfile", key_1depth="mongo_train_fileName")

        self.trained_classifier_obj = None

        self.train_list = []
        self.df_train_flatfile = pd.DataFrame()
示例#6
0
    def __init__(self, vector_type, model_type, train_file_location,
                 trained_classifier):
        # training file location
        self.train_file_location = train_file_location
        self.trained_classifier = trained_classifier

        # language column to be used for dictionary of predictors
        self.col_lang = Config.get_config_val(key="df_columns",
                                              key_1depth="col_lang")

        # dependent columns
        self.col_query = Config.get_config_val(key="df_columns",
                                               key_1depth="col_query")

        # target columns
        self.col_category = Config.get_config_val(key="df_columns",
                                                  key_1depth="col_category")

        # derived column with label encoding
        self.col_category_numeric = self.col_category + "_numeric"

        # Corresponding columns which will be extracted from predicted category
        self.col_response = Config.get_config_val(key="df_columns",
                                                  key_1depth="col_response")
        self.col_variables = Config.get_config_val(key="df_columns",
                                                   key_1depth="col_variables")
        self.col_input_circumstance = Config.get_config_val(
            key="df_columns", key_1depth="col_input_circumstance")
        self.col_output_circumstance = Config.get_config_val(
            key="df_columns", key_1depth="col_output_circumstance")

        # additional data about model and vector
        self.model_type = model_type
        self.model_category = model_factory.get_model(model_type)
        self.model = None
        self.vector_type = vector_type
        self.vector = vectorizer_factory.get_vector(vector_type)

        self.train_df = None
        self.unique_train_df = None
        self.X_train = None
        self.X_train_vect = None
        self.X_test = None
        self.X_test_vect = None
        self.y_train = None
        self.y_test = None
    def __init__(self, unique_train_df, model, vector, use_decision_function,
                 decision_boundary):
        """
        constructor to initialize class params

        :param unique_train_df: unique classes
        :param model: model used for training
        :param vector: vectorizer used for training
        :param use_decision_function: whether to use decision function or not
        :param decision_boundary: threshold for decision function
        """
        self.model = model
        self.vector = vector
        self.use_decision_function = use_decision_function
        self.decision_boundary = decision_boundary
        self.unique_train_df = unique_train_df

        logger.info(self.unique_train_df.head(1))

        # column names
        self.col_lang = Config.get_config_val(key="df_columns",
                                              key_1depth="col_lang")
        self.col_category = Config.get_config_val(key="df_columns",
                                                  key_1depth="col_category")
        # derived column with label encoding
        self.col_category_numeric = self.col_category + "_numeric"
        self.col_query = Config.get_config_val(key="df_columns",
                                               key_1depth="col_query")
        self.col_response = Config.get_config_val(key="df_columns",
                                                  key_1depth="col_response")
        self.col_variables = Config.get_config_val(key="df_columns",
                                                   key_1depth="col_variables")
        self.col_input_circumstance = Config.get_config_val(
            key="df_columns", key_1depth="col_input_circumstance")
        self.col_output_circumstance = Config.get_config_val(
            key="df_columns", key_1depth="col_output_circumstance")
示例#8
0
from mongoengine import *
from modules.utils.yaml_parser import Config
import json
from datetime import datetime

url = Config.get_config_val(key="mongodb", key_1depth="url")
db = Config.get_config_val(key="mongodb", key_1depth="db")
connect(db, host=url)


class User(Document):
    first_name = StringField(required=True, max_length=100)
    last_name = StringField(required=True, max_length=100)
    email = StringField(required=True, max_length=255)
    password = StringField(required=True, max_length=100)
    age = IntField(required=False)
    gender = StringField(required=False, max_length=1)
    created_on = DateTimeField(required=True)
    telegram_oAuth_token = StringField(required=False, max_length=100)


# TODO to use it later. Not yet used. Also extend it from Document
class Language:
    lang_code = StringField(required=True, max_length=8)


class Broker(Document):
    user_id = ReferenceField(User)
    broker_name = StringField(required=True, max_length=100)
    default_lang = StringField(required=True, max_length=8,
                               default="en-US")  # TODO replace with Language
示例#9
0
    def bulk_insert_documents(self):
        # load the old file
        old_train_file_location = Config.get_config_val(key="flatfile", key_1depth="location") + Config.get_config_val(
            key="flatfile", key_1depth="mongo_train_fileName")
        consumer_ques = pd.read_csv(old_train_file_location)

        # first change the column names
        consumer_ques.rename(
            columns={'question-category': Config.get_config_val(key="df_columns", key_1depth="col_category"),
                     'question': Config.get_config_val(key="df_columns", key_1depth="col_query"),
                     'answer': Config.get_config_val(key="df_columns", key_1depth="col_response")}, inplace=True)

        # in order to create 1 row per category, we will have to split data based on every category.
        # 1. extract unique categories in data
        categories = consumer_ques[Config.get_config_val(key="df_columns", key_1depth="col_category")].unique()

        # 2. iterate over each category
        for cat in categories:
            print('category : {0}'.format(cat))
            trainObj = None

            # 3. split data per category
            df = consumer_ques[consumer_ques[Config.get_config_val(key="df_columns", key_1depth="col_category")] == cat]

            # 4. extract query
            training_queries = df[[Config.get_config_val(key="df_columns", key_1depth="col_query")]].values.T.tolist()[
                0]

            # 4.1 extract language
            lang = df[Config.get_config_val(key="df_columns", key_1depth="col_lang")].unique()[0]

            # 4.2 extract category - category is already extracted in "cat"

            # 5. create train object
            trainObj = Train(category=cat, lang=lang, training_queries=training_queries)

            # 6. create circumstance
            circumstance = Circumstance(input_circumstance=df[
                Config.get_config_val(key="df_columns", key_1depth="col_input_circumstance")].unique()[0],
                                        output_circumstance=df[Config.get_config_val(key="df_columns",
                                                                                     key_1depth="col_output_circumstance")].unique()[
                                            0])
            # circumstance = {
            #     'input_circumstance' : df['input_circumstance'].unique()[0],
            #     'output_circumstance' : df['output_circumstance'].unique()[0]
            # }

            trainObj.circumstance = circumstance

            # 7. create response
            responseList = []
            textList = []
            textList.append(df[Config.get_config_val(key="df_columns", key_1depth="col_response")].unique()[0])
            response = Response(text=textList, custom='')
            # responseObj = {
            #     'text' : textList,
            #     'custom' : ''
            # }
            # responseList.append(responseObj)
            trainObj.response.append(response)

            # 8. create variables

            variables = json.loads(df[Config.get_config_val(key="df_columns", key_1depth="col_variables")].unique()[0])
            for var in variables:
                varObj = Variables(name=var.get('name'), type=var.get('type'), value=var.get('value'),
                                   io_type=var.get('io_type'))
                trainObj.variables.append(varObj)

            # 9. save the object
            trainObj.save()