Пример #1
0
    def _set_params(self, kwargs):
        """
        Set input parameters based on the request.
        :
        :For details refer to the GitHub project: https://github.com/nabeel-oz/qlik-py-tools
        """

        # Set default values which will be used if execution arguments are not passed

        # Default parameters:
        self.debug = False

        # If key word arguments were included in the request, get the parameters and values
        if len(kwargs) > 0:

            # Transform the string of arguments into a dictionary
            self.kwargs = utils.get_kwargs(kwargs)

            # Set the debug option for generating execution logs
            # Valid values are: true, false
            if 'debug' in self.kwargs:
                self.debug = 'true' == self.kwargs.pop('debug').lower()

                # Additional information is printed to the terminal and logs if the paramater debug = true
                if self.debug:
                    # Increment log counter for the class. Each instance of the class generates a new log.
                    self.__class__.log_no += 1

                    # Create a log file for the instance
                    # Logs will be stored in ..\logs\Common Functions Log <n>.txt
                    self.logfile = os.path.join(
                        os.getcwd(), 'logs',
                        'Common Functions Log {}.txt'.format(self.log_no))

                    self._print_log(1)

            # Get the rest of the parameters, converting values to the correct data type
            self.pass_on_kwargs = utils.get_kwargs_by_type(self.kwargs)

        # Debug information is printed to the terminal and logs if the paramater debug = true
        if self.debug:
            self._print_log(2)

        # Remove the kwargs column from the request_df
        self.request_df = self.request_df.drop(['kwargs'], axis=1)
Пример #2
0
    def _set_params(self, kwargs):
        """
        Set input parameters based on the request.
        :
        :For details refer to the GitHub project: https://github.com/nabeel-oz/qlik-py-tools
        """

        # Set default values which will be used if execution arguments are not passed

        # Default parameters:
        self.debug = False
        self.model = 'en_core_web_sm'
        self.custom = False
        self.base_model = 'en_core_web_sm'
        self.blank = False
        self.epochs = 100
        self.batch_size = compounding(4.0, 32.0, 1.001)
        self.drop = 0.25
        self.test = 0

        # Extract the model path if required
        try:
            # Get the model name from the first row in the request_df
            self.model = self.request_df.loc[0, 'model_name']

            # Remove the model_name column from the request_df
            self.request_df = self.request_df.drop(['model_name'], axis=1)
        except KeyError:
            pass

        # If key word arguments were included in the request, get the parameters and values
        if len(kwargs) > 0:

            # Transform the string of arguments into a dictionary
            self.kwargs = utils.get_kwargs(kwargs)

            # Set the debug option for generating execution logs
            # Valid values are: true, false
            if 'debug' in self.kwargs:
                self.debug = 'true' == self.kwargs['debug'].lower()

                # Additional information is printed to the terminal and logs if the paramater debug = true
                if self.debug:
                    # Increment log counter for the class. Each instance of the class generates a new log.
                    self.__class__.log_no += 1

                    # Create a log file for the instance
                    # Logs will be stored in ..\logs\SpaCy Log <n>.txt
                    self.logfile = os.path.join(
                        os.getcwd(), 'logs',
                        'SpaCy Log {}.txt'.format(self.log_no))

                    self._print_log(1)

            # Set whether the model (if getting named entites) or base model (if retraining) is a custom model
            # i.e. not one of the pre-trained models provided by spaCy
            if 'custom' in self.kwargs:
                self.custom = 'true' == self.kwargs['custom'].lower()

            # Set the base model, i.e an existing spaCy model to be retrained.
            if 'base_model' in self.kwargs:
                self.base_model = self.kwargs['base_model'].lower()

            # Set the retraining to be done on a blank Language class
            if 'blank' in self.kwargs:
                self.blank = 'true' == self.kwargs['blank'].lower()

            # Set the epochs for training the model.
            # This is the the number times that the learning algorithm will work through the entire training dataset.
            # Valid values are an integer e.g. 200
            if 'epochs' in self.kwargs:
                self.epochs = utils.atoi(self.kwargs['epochs'])

            # Set the batch size to be used during model training.
            # The model's internal parameters will be updated at the end of each batch.
            # Valid values are a single integer or compounding or decaying parameters.
            if 'batch_size' in self.kwargs:
                # The batch size may be a single integer
                try:
                    self.batch_size = utils.atoi(self.kwargs['batch_size'])
                # Or a list of floats
                except ValueError:
                    sizes = utils.get_kwargs_by_type(self.kwargs['batch_size'])

                    # If the start < end, batch sizes will be compounded
                    if sizes[0] < sizes[1]:
                        self.batch_size = compounding(sizes[0], sizes[1],
                                                      sizes[2])
                    # else bath sizes will decay during training
                    else:
                        self.batch_size = decaying(sizes[0], sizes[1],
                                                   sizes[2])

            # Set the dropout rate for retraining the model
            # This determines the likelihood that a feature or internal representation in the model will be dropped,
            # making it harder for the model to memorize the training data.
            # Valid values are a float lesser than 1.0 e.g. 0.35
            if 'drop' in self.kwargs:
                self.drop = utils.atof(self.kwargs['drop'])

            # Set the ratio of data to be used for testing.
            # This data will be held out from training and just used to provide evaluation metrics.
            # Valid values are a float >= zero and < 1.0 e.g. 0.3
            if 'test' in self.kwargs:
                self.test = utils.atof(self.kwargs['test'])

        # Debug information is printed to the terminal and logs if the paramater debug = true
        if self.debug:
            self._print_log(2)

        # Remove the kwargs column from the request_df
        self.request_df = self.request_df.drop(['kwargs'], axis=1)
Пример #3
0
    def _set_params(self, kwargs):
        """
        Set input parameters based on the request.
        :
        :For details refer to the GitHub project: https://github.com/nabeel-oz/qlik-py-tools
        """

        # If key word arguments were included in the request, get the parameters and values,
        # by transforming the string of arguments into a dictionary
        self.kwargs = {} if len(kwargs) == 0 else utils.get_kwargs(kwargs)

        # Set the debug option for generating execution logs
        # Valid values are: true, false
        self.debug = False if 'debug' not in self.kwargs else (
            'true' == self.kwargs.pop('debug').lower())

        # Additional information is printed to the terminal and logs if the paramater debug = true
        if self.debug:
            # Increment log counter for the class. Each instance of the class generates a new log.
            self.__class__.log_no += 1

            # Create a log file for the instance
            # Logs will be stored in ..\logs\Common Functions Log <n>.txt
            self.logfile = os.path.join(
                os.getcwd(), 'logs',
                'Common Functions Log {}.txt'.format(self.log_no))

            self._print_log(1)

        # Set the name of the function to be called on the model
        # By default this is the 'predict' function, but could be other functions such as 'predict_proba' if supported by the model
        self.prediction_func = 'predict' if 'return' not in self.kwargs else self.kwargs.pop(
            'return')

        # Certain models may need sorted data for predictions
        # A feature can be specified for use in sorting using the identifier argument.
        self.identifier = None if 'identifier' not in self.kwargs else self.kwargs.pop(
            'identifier')

        # The identifier can be excluded from the inputs to the model using the exclude_identifier argument.
        self.exclude_identifier = False if 'exclude_identifier' not in self.kwargs else (
            self.kwargs.pop('exclude_identifier').lower() == 'true')

        # Number of seconds to wait if a Keras model is being loaded by another thread
        self.wait = 2 if 'keras_wait' not in self.kwargs else utils.atoi(
            self.kwargs.pop('keras_wait'))

        # Number of retries if a Keras model is being loaded by another thread
        self.retries = 5 if 'keras_retries' not in self.kwargs else utils.atoi(
            self.kwargs.pop('keras_retries'))

        # Get the rest of the parameters, converting values to the correct data type
        self.pass_on_kwargs = {} if len(
            self.kwargs) == 0 else utils.get_kwargs_by_type(self.kwargs)

        # The predictions may need to be decoded in case of classification labels
        # The labels can be passed as a dictionary using the 'labels' argument.
        self.labels = None if 'labels' not in self.pass_on_kwargs else self.pass_on_kwargs.pop(
            'labels')

        # Debug information is printed to the terminal and logs if the paramater debug = true
        if self.debug:
            self._print_log(2)

        # Remove the kwargs column from the request_df
        self.request_df = self.request_df.drop(['kwargs'], axis=1)