def run_test(self):
            message = testcase["message"]
            unit_type = testcase.get("unit_type", None)
            number_detector_object = NumberDetector(entity_name="number",
                                                    language=language,
                                                    unit_type=unit_type)
            number_detector_object.set_min_max_digits(
                min_digit=testcase.get('min_digit',
                                       number_detector_object.min_digit),
                max_digit=testcase.get('max_digit',
                                       number_detector_object.max_digit))
            number_dicts, spans = number_detector_object.detect_entity(message)

            expected_number_dicts, expected_spans = parse_expected_outputs(
                testcase["outputs"])
            expected_outputs = list(
                six.moves.zip(expected_number_dicts, expected_spans))

            prefix = failure_string_prefix.format(message=message,
                                                  language=language)

            self.assertEqual(
                len(number_dicts), len(spans), prefix +
                u"Returned numbers and original_texts have different lengths")
            self.assertEqual(
                len(spans), len(expected_outputs), prefix +
                u"Returned numbers and expected_outputs have different lengths"
            )

            for output in six.moves.zip(number_dicts, spans):

                self.assertIn(
                    output, expected_outputs,
                    prefix + u"{got} not in {expected_outputs}".format(
                        got=output, expected_outputs=expected_outputs))
Пример #2
0
 def test_en_number_detection_for_integer_number(self):
     """
     Number detection for english language for integer number like '100', '2'
     """
     message = u'100 got selected for interview'
     number_detector_object = NumberDetector(entity_name=self.entity_name,
                                             language='en')
     number_dicts, original_texts = number_detector_object.detect_entity(
         message)
     zipped = zip(number_dicts, original_texts)
     self.assertEqual(len(zipped), 1)
     self.assertIn(({'value': '100', 'unit': None}, u'100'), zipped)
Пример #3
0
 def test_en_number_detection_for_integer_number_with_unit(self):
     """
     Number detection for english language for integer number with units like 'Rs100', '2Rs'
     """
     message = u'rs.100 is the application charger'
     number_detector_object = NumberDetector(entity_name=self.entity_name,
                                             language='en')
     number_dicts, original_texts = number_detector_object.detect_entity(
         message)
     zipped = zip(number_dicts, original_texts)
     self.assertEqual(len(zipped), 1)
     self.assertIn(({'value': '100', 'unit': 'rupees'}, 'rs.100'), zipped)
Пример #4
0
    def test_en_number_detection_for_decimal_number_with_scale_and_unit(self):
        """
        Number detection for english language for decimal number with scale like '1.2 thousand', '2.2k' excluding unit
        """
        message = 'I bought a car toy for 2.3k rupees'
        number_detector_object = NumberDetector(entity_name=self.entity_name,
                                                language='en')
        number_dicts, original_texts = number_detector_object.detect_entity(
            message)

        zipped = list(zip(number_dicts, original_texts))
        self.assertEqual(len(zipped), 1)
        self.assertIn(({'value': '2300', 'unit': None}, u'2.3k'), zipped)
Пример #5
0
    def test_en_number_detection_for_decimal_number_with_scale(self):
        """
        Number detection for english language for decimal number with scale like '1.2 thousand', '2.2k', '1.4m'
        """
        message = 'my monthly salary is 2.2k'
        number_detector_object = NumberDetector(entity_name=self.entity_name,
                                                language='en')
        number_dicts, original_texts = number_detector_object.detect_entity(
            message)

        zipped = zip(number_dicts, original_texts)
        self.assertEqual(len(zipped), 1)
        self.assertIn(({'value': '2200', 'unit': None}, u'2.2k'), zipped)
Пример #6
0
    def test_en_number_detection_for_integer_number_with_scale(self):
        """
        Number detection for english language for integer number with scale like '1 thousand', '1k', '1m'
        """
        message = '1 thousand men were killed in war'
        number_detector_object = NumberDetector(entity_name=self.entity_name,
                                                language='en')
        number_dicts, original_texts = number_detector_object.detect_entity(
            message)

        zipped = zip(number_dicts, original_texts)
        self.assertEqual(len(zipped), 1)
        self.assertIn(({'value': '1000', 'unit': None}, u'1 thousand'), zipped)
Пример #7
0
    def test_en_number_detection_for_decimal_number(self):
        """
        Number detection for english language for decimal number like '100.2'
        """
        message = u'Todays temperature is 11.2 degree celsius'
        number_detector_object = NumberDetector(entity_name=self.entity_name,
                                                language='en')
        number_dicts, original_texts = number_detector_object.detect_entity(
            message)

        zipped = zip(number_dicts, original_texts)
        self.assertEqual(len(zipped), 1)
        self.assertIn(({'value': '11.2', 'unit': None}, u'11.2'), zipped)
    def __init__(self,
                 entity_name,
                 language,
                 data_directory_path,
                 unit_type=None):
        """
        Standard Number detection class, read data from language data path and help to detect number ranges like min
        and max value from given number range text for given languages.
        Args:
            entity_name (str): entity_name: string by which the detected number would be replaced
            language (str): language code of text
            data_directory_path (str): path of data folder for given language
            unit_type (str, optional): number unit types like weight, currency, temperature, used to detect number with
                                       specific unit type only. If None, it will detect all number ranges irrespective
                                       of units. You can see all unit types supported inside number detection
                                       language data with filename unit.csv.

        """
        self.text = ''
        self.tagged_text = ''
        self.processed_text = ''
        self.entity_name = entity_name
        self.tag = '__' + entity_name + '__'
        self.range_variants_map = {}
        self.unit_type = unit_type
        self.language = language
        self.min_range_prefix_variants = None
        self.min_range_suffix_variants = None
        self.max_range_prefix_variants = None
        self.max_range_suffix_variants = None
        self.min_max_range_variants = None
        self.number_detected_map = {}

        self.number_detector = NumberDetector(entity_name=entity_name,
                                              language=language,
                                              unit_type=unit_type,
                                              detect_without_unit=True)
        self.number_detector.set_min_max_digits(1, 100)

        # Method to initialise regex params
        self._init_regex_for_range(data_directory_path)

        # Variable to define default order in which detector will work
        self.detector_preferences = [
            self._detect_min_max_num_range,
            self._detect_min_num_range_with_prefix_variants,
            self._detect_min_num_range_with_suffix_variants,
            self._detect_max_num_range_with_prefix_variants,
            self._detect_max_num_range_with_suffix_variants,
            self._detect_absolute_number
        ]
Пример #9
0
    def test_en_number_detection_for_decimal_number_with_scale_and_unit_and_different_unit_type_given(
            self):
        """
        Number detection for english language for decimal number with scale like '1.2 thousand rupees', 'Rupees 2.2k'
        """
        message = 'I buys 2.3k kg mango'
        number_detector_object = NumberDetector(entity_name=self.entity_name,
                                                language='en',
                                                unit_type='currency')
        number_dicts, original_texts = number_detector_object.detect_entity(
            message)

        zipped = list(zip(number_dicts, original_texts))
        self.assertEqual(len(zipped), 0)
Пример #10
0
    def test_en_number_detection_for_integer_number_with_scale_and_unit(self):
        """
        Number detection for english language for integer number with scale and unit like 'Rs 1 thousand', '1k Rs'
        """
        message = 'i need 1 thousand rupees'
        number_detector_object = NumberDetector(entity_name=self.entity_name,
                                                language='en')
        number_dicts, original_texts = number_detector_object.detect_entity(
            message)

        zipped = zip(number_dicts, original_texts)
        self.assertEqual(len(zipped), 1)
        self.assertIn(({
            'value': '1000',
            'unit': 'rupees'
        }, u'1 thousand rupees'), zipped)
Пример #11
0
    def test_en_number_detection_for_decimal_number_with_unit(self):
        """
        Number detection for english language for decimal number with unit like '10.2k rupees'
        """
        message = u'my monthly salary is 10.12k rupees'
        number_detector_object = NumberDetector(entity_name=self.entity_name,
                                                language='en')
        number_dicts, original_texts = number_detector_object.detect_entity(
            message)

        zipped = zip(number_dicts, original_texts)
        self.assertEqual(len(zipped), 1)
        self.assertIn(({
            'value': '10120',
            'unit': 'rupees'
        }, u'10.12k rupees'), zipped)
Пример #12
0
    def test_en_number_detection_for_decimal_number_with_scale_and_unit_and_unit_type_given(
            self):
        """
        Number detection for english language for decimal number with scale like '1.2 thousand rupees', 'Rupees 2.2k'
        """
        message = 'I bought a car toy for 2.3k rupees'
        number_detector_object = NumberDetector(entity_name=self.entity_name,
                                                language='en',
                                                unit_type='currency')
        number_dicts, original_texts = number_detector_object.detect_entity(
            message)

        zipped = zip(number_dicts, original_texts)
        self.assertEqual(len(zipped), 1)
        self.assertIn(({
            'value': '2300',
            'unit': 'rupees'
        }, u'2.3k rupees'), zipped)
Пример #13
0
def resolve_numerals(text, language) -> str:
    """
    Uses NumberDetector to resolve numeric occurrences in text for both English and Hindi.
    Args:
        text (str): processed string with numerals and character constants fixed
        language (str): Language for NumberDetector
    Returns:
        processed_text (str): modified text
    """
    processed_text = text
    number_detector = NumberDetector('asr_dummy', language=language)
    # FIXME: Detection fails if text starts with '0' since number detector discards it
    detected_numerals, original_texts = number_detector.detect_entity(
        text=text)
    detected_numerals_hi, original_texts_hi = number_detector.detect_entity(
        text=text, language='hi')
    detected_numerals.extend(detected_numerals_hi)
    original_texts.extend(original_texts_hi)
    for number, original_text in zip(detected_numerals, original_texts):
        substitution_reg = re.compile(re.escape(original_text), re.IGNORECASE)
        processed_text = substitution_reg.sub(
            number[NUMBER_DETECTION_RETURN_DICT_VALUE], processed_text)
    return processed_text
Пример #14
0
def number(request):
    """Use NumberDetector to detect numerals

       Attributes:
        request: url parameters:

        request params:
           message (str): natural text on which detection logic is to be run. Note if structured value is
                                   detection is run on structured value instead of message
           entity_name (str): name of the entity. Also acts as elastic-search dictionary name
                              if entity uses elastic-search lookup
           structured_value (str): Value obtained from any structured elements. Note if structured value is
                                   detection is run on structured value instead of message
                                   (For example, UI elements like form, payload, etc)
           fallback_value (str): If the detection logic fails to detect any value either from structured_value
                             or message then we return a fallback_value as an output.
           bot_message (str): previous message from a bot/agent.
           unit_type(str): restrict number range to detect for some unit types like 'currency', 'temperature'

           min_digit (str): min digit
           max_digit (str): max digit


       Returns:
           dict or None: dictionary containing entity_value, original_text and detection;
                         entity_value is in itself a dict with its keys varying from entity to entity

       Example:

           message = "I want to purchase 30 units of mobile and 40 units of Television"
           entity_name = 'number_of_unit'
           structured_value = None
           fallback_value = None
           bot_message = None
           unit_type = None
           output = get_number(message=message, entity_name=entity_name, structured_value=structured_value,
                              fallback_value=fallback_value, bot_message=bot_message, min_digit=1, max_digit=2)
           print output

               >> [{'detection': 'message', 'original_text': '30', 'entity_value': {'value': '30', 'unit': None}},
                   {'detection': 'message', 'original_text': '40', 'entity_value': {'value': '40', 'unit': None}}]


           message = "I want to reserve a table for 3 people"
           entity_name = 'number_of_people'
           structured_value = None
           fallback_value = None
           bot_message = None
           unit_type = None
           min_digit=1
           max_digit=6
           output = number(request)
           print output

               >> [{'detection': 'message', 'original_text': 'for 3 people', 'entity_value':
                                                                        {'value': '3', 'unit': 'people'}}]

       """
    try:
        parameters_dict = get_parameters_dictionary(request)
        ner_logger.debug('Start: %s ' % parameters_dict[PARAMETER_ENTITY_NAME])

        number_detection = NumberDetector(entity_name=parameters_dict[PARAMETER_ENTITY_NAME],
                                          language=parameters_dict[PARAMETER_SOURCE_LANGUAGE],
                                          unit_type=parameters_dict[PARAMETER_NUMBER_UNIT_TYPE])

        if parameters_dict[PARAMETER_MIN_DIGITS] and parameters_dict[PARAMETER_MAX_DIGITS]:
            min_digit = int(parameters_dict[PARAMETER_MIN_DIGITS])
            max_digit = int(parameters_dict[PARAMETER_MAX_DIGITS])
            number_detection.set_min_max_digits(min_digit=min_digit, max_digit=max_digit)

        entity_output = number_detection.detect(message=parameters_dict[PARAMETER_MESSAGE],
                                                structured_value=parameters_dict[PARAMETER_STRUCTURED_VALUE],
                                                fallback_value=parameters_dict[PARAMETER_FALLBACK_VALUE],
                                                bot_message=parameters_dict[PARAMETER_BOT_MESSAGE])
        ner_logger.debug('Finished %s : %s ' % (parameters_dict[PARAMETER_ENTITY_NAME], entity_output))

    except TypeError as e:
        ner_logger.exception('Exception for numeric: %s ' % e)
        return HttpResponse(status=500)

    return HttpResponse(json.dumps({'data': entity_output}), content_type='application/json')
Пример #15
0
def get_currency(text,detected_lang):
    from ner_v2.detectors.numeral.number.number_detection import NumberDetector
    detector = NumberDetector(entity_name='number', language=detected_lang,unit_type='currency')
    number = detector.detect_entity(text)
    return number