Пример #1
0
    def __init__(self,
                 entity_name,
                 data_directory_path,
                 locale=None,
                 timezone='UTC',
                 past_date_referenced=False):
        """
        Base Regex class which will be imported by language date class by giving their data folder path
        This will create standard regex and their parser to detect date for given language.
        Args:
            data_directory_path (str): path of data folder for given language
            timezone (Optional, str): user timezone default UTC
            past_date_referenced (boolean): if the date reference is in past, this is helpful for text like 'kal',
                                          'parso' to know if the reference is past or future.
            locale (Optional, str): user locale default None
        """
        self.text = ''
        self.tagged_text = ''
        self.processed_text = ''
        self.date = []
        self.original_date_text = []
        self.entity_name = entity_name
        self.tag = '__' + entity_name + '__'
        self.timezone = get_timezone(timezone)

        self.now_date = datetime.datetime.now(tz=self.timezone)
        self.bot_message = None

        self.past_date_referenced = past_date_referenced

        # dict to store words for date, numerals and words which comes in reference to some date
        self.date_constant_dict = {}
        self.datetime_constant_dict = {}
        self.numerals_constant_dict = {}

        # define dynamic created standard regex from language data files
        self.regex_relative_date = None
        self.regex_day_diff = None
        self.regex_date_month = None
        self.regex_date_ref_month_1 = None
        self.regex_date_ref_month_2 = None
        self.regex_date_ref_month_3 = None
        self.regex_after_days_ref = None
        self.regex_weekday_month_1 = None
        self.regex_weekday_month_2 = None
        self.regex_weekday_diff = None
        self.regex_weekday = None

        # Method to initialise value in regex
        self.init_regex_and_parser(data_directory_path)

        # Variable to define default order in which these regex will work
        self.detector_preferences = [
            self._gregorian_day_month_year_format, self._detect_relative_date,
            self._detect_date_month, self._detect_date_ref_month_1,
            self._detect_date_ref_month_2, self._detect_date_ref_month_3,
            self._detect_date_diff, self._detect_after_days,
            self._detect_weekday_ref_month_1, self._detect_weekday_ref_month_2,
            self._detect_weekday_diff, self._detect_weekday
        ]
    def __init__(self, entity_name, data_directory_path, timezone=None):
        """
        Base Regex class which will be imported by language date class by giving their data folder path
        This will create standard regex and their parser to detect date for given language.
        Args:
            data_directory_path (str): path of data folder for given language
            timezone (str): user timezone default UTC
        """
        self.text = ''
        self.tagged_text = ''
        self.processed_text = ''
        self.entity_name = entity_name
        self.tag = '__' + entity_name + '__'
        if timezone:
            self.timezone = get_timezone(timezone)
        else:
            self.timezone = None
        self.now_date = datetime.datetime.now(tz=self.timezone)
        self.bot_message = None

        # dict to store words for time, numerals and words which comes in reference to some date
        self.time_constant_dict = {}
        self.datetime_constant_dict = {}
        self.numerals_constant_dict = {}

        # define dynamic created standard regex for time from language data files
        self.regex_time = None

        # Method to initialise value in regex
        self.init_regex_and_parser(data_directory_path)

        # Variable to define default order in which these regex will work
        self.detector_preferences = [
            self._detect_time_with_coln_format, self._detect_hour_minute
        ]
Пример #3
0
    def __init__(self,
                 entity_name,
                 locale=None,
                 language=ENGLISH_LANG,
                 timezone='UTC',
                 past_date_referenced=False):
        """Initializes a DateDetector object with given entity_name and pytz timezone object

        Args:
            entity_name: A string by which the detected date entity substrings would be replaced with on calling
                        detect_entity()
            timezone (Optional, str): timezone identifier string that is used to create a pytz timezone object
                                      default is UTC
            past_date_referenced (bool): to know if past or future date is referenced for date text like 'kal', 'parso'
            locale(Optional, str): user locale default is None
        """
        self.text = ''
        self.tagged_text = ''
        self.processed_text = ''
        self.date = []
        self.original_date_text = []
        self.entity_name = entity_name
        self.tag = '__' + entity_name + '__'
        self.timezone = get_timezone(timezone)
        self.now_date = datetime.datetime.now(tz=self.timezone)
        self.bot_message = None
        self.language = language
        self.locale = locale

        try:
            date_detector_module = importlib.import_module(
                'ner_v2.detectors.temporal.date.{0}.date_detection'.format(
                    self.language))
            self.language_date_detector = date_detector_module.DateDetector(
                entity_name=self.entity_name,
                past_date_referenced=past_date_referenced,
                timezone=self.timezone,
                locale=self.locale)
        except ImportError:
            standard_date_regex = importlib.import_module(
                'ner_v2.detectors.temporal.date.standard_date_regex')
            self.language_date_detector = standard_date_regex.DateDetector(
                entity_name=self.entity_name,
                data_directory_path=get_lang_data_path(
                    detector_path=os.path.abspath(__file__),
                    lang_code=self.language),
                timezone=self.timezone,
                past_date_referenced=past_date_referenced,
                locale=self.locale)
    def __init__(self, entity_name='time', timezone=None, language=ENGLISH_LANG):
        """Initializes a TimeDetector object with given entity_name and timezone

        Args:
            entity_name(str): A string by which the detected time stamp substrings would be replaced with on calling
                               detect_entity()
            timezone(str): timezone identifier string that is used to create a pytz timezone object
                            default is UTC
            language(str): ISO 639 code for language of entities to be detected by the instance of this class
        """
        # assigning values to superclass attributes
        self._supported_languages = self.get_supported_languages()
        super(TimeDetector, self).__init__(language=language)
        self.entity_name = entity_name
        self.text = ''
        self.tagged_text = ''
        self.processed_text = ''
        self.time = []
        self.original_time_text = []
        self.tag = '__' + entity_name + '__'
        if timezone:
            self.timezone = get_timezone(timezone)
        else:
            self.timezone = None
        self.language = language

        try:
            time_detector_module = importlib.import_module(
                'ner_v2.detectors.temporal.time.{0}.time_detection'.format(self.language))
            self.language_time_detector = time_detector_module.TimeDetector(entity_name=self.entity_name,
                                                                            timezone=self.timezone)

        except ImportError:
            standard_time_regex = importlib.import_module(
                'ner_v2.detectors.temporal.time.standard_time_regex'
            )
            self.language_time_detector = standard_time_regex.TimeDetector(
                entity_name=self.entity_name,
                data_directory_path=get_lang_data_path(detector_path=os.path.abspath(__file__),
                                                       lang_code=self.language),
                timezone=self.timezone,
            )