Пример #1
0
 def init_strings(self):
     DateParser.init_strings(self)
     self._span = re.compile(
         u"(fra)?\s*(?P<start>.+)\s*(til|--|–)\s*(?P<stop>.+)",
         re.IGNORECASE)
     self._range = re.compile(
         u"(mellem)\s+(?P<start>.+)\s+og\s+(?P<stop>.+)", re.IGNORECASE)
Пример #2
0
 def init_strings(self):
     """ Define, in Swedish, span and range regular expressions"""
     DateParser.init_strings(self)
     self._span = re.compile(
         u"(från)?\s*(?P<start>.+)\s*(till|--|–)\s*(?P<stop>.+)",
         re.IGNORECASE)
     self._range = re.compile(
         u"(mellan)\s+(?P<start>.+)\s+och\s+(?P<stop>.+)", re.IGNORECASE)
Пример #3
0
 def init_strings(self):
     DateParser.init_strings(self)
     # date, whitespace
     self._span = re.compile(u"(?P<start>.+)\s+(-)\s+(?P<stop>.+)",
                             re.IGNORECASE)
     self._range = re.compile(
         u"(vuosien\s*)?(?P<start>.+)\s+ja\s+(?P<stop>.+)\s+välillä",
         re.IGNORECASE)
Пример #4
0
    def init_strings(self):
        """
        This method compiles regular expression strings for matching dates.
        
        Most of the re's in most languages can stay as is. span and range
        most likely will need to change. Whatever change is done, this method
        may be called first as DateParser.init_strings(self) so that the
        invariant expresions don't need to be repeteadly coded. All differences
        can be coded after DateParser.init_strings(self) call, that way they
        override stuff from this method. See DateParserRU() as an example.
        """
        DateParser.init_strings(self)

        # This self._numeric is different from the base
        # avoid bug gregorian / french calendar conversion (+/-10 days)

        self._numeric = re.compile(
            "((\d+)[/\. ])?\s*((\d+)[/\.])?\s*(\d+)\s*$")
        self._span = re.compile(u"(de)\s+(?P<start>.+)\s+(à)\s+(?P<stop>.+)",
                                re.IGNORECASE)
        self._range = re.compile(
            u"(entre|ent\.|ent)\s+(?P<start>.+)\s+(et)\s+(?P<stop>.+)",
            re.IGNORECASE)

        # This self._text are different from the base
        # by adding ".?" after the first date and removing "\s*$" at the end

        #gregorian and julian

        self._text2 = re.compile(
            '(\d+)?.?\s+?%s\s*((\d+)(/\d+)?)?' % self._mon_str, re.IGNORECASE)

        #hebrew

        self._jtext2 = re.compile(
            '(\d+)?.?\s+?%s\s*((\d+)(/\d+)?)?' % self._jmon_str, re.IGNORECASE)

        #french

        self._ftext2 = re.compile(
            '(\d+)?.?\s+?%s\s*((\d+)(/\d+)?)?' % self._fmon_str, re.IGNORECASE)

        #persian

        self._ptext2 = re.compile(
            '(\d+)?.?\s+?%s\s*((\d+)(/\d+)?)?' % self._pmon_str, re.IGNORECASE)

        #islamic

        self._itext2 = re.compile(
            '(\d+)?.?\s+?%s\s*((\d+)(/\d+)?)?' % self._imon_str, re.IGNORECASE)

        #swedish

        self._stext2 = re.compile(
            '(\d+)?.?\s+?%s\s*((\d+)(/\d+)?)?' % self._smon_str, re.IGNORECASE)
Пример #5
0
 def init_strings(self):
     DateParser.init_strings(self)
     self._span = re.compile("(van)\s+(?P<start>.+)\s+(tot)\s+(?P<stop>.+)",
                             re.IGNORECASE)
     self._range = re.compile("tussen\s+(?P<start>.+)\s+en\s+(?P<stop>.+)",
                              re.IGNORECASE)
     self._text2 = re.compile(
         '(\d+)?.?\s+?%s\s*((\d+)(/\d+)?)?' % self._mon_str, re.IGNORECASE)
     self._jtext2 = re.compile(
         '(\d+)?.?\s+?%s\s*((\d+)(/\d+)?)?' % self._jmon_str, re.IGNORECASE)
Пример #6
0
 def init_strings(self):
     DateParser.init_strings(self)
     self._span = re.compile(
         "(von|vom)\s+(?P<start>.+)\s+(bis)\s+(?P<stop>.+)", re.IGNORECASE)
     self._range = re.compile(
         "zwischen\s+(?P<start>.+)\s+und\s+(?P<stop>.+)", re.IGNORECASE)
     self._text2 = re.compile(
         '(\d+)?.?\s+?%s\s*((\d+)(/\d+)?)?' % self._mon_str, re.IGNORECASE)
     self._jtext2 = re.compile(
         '(\d+)?.?\s+?%s\s*((\d+)(/\d+)?)?' % self._jmon_str, re.IGNORECASE)
Пример #7
0
 def init_strings(self):
     DateParser.init_strings(self)
     _span_1 = [u'от']
     _span_2 = [u'до']
     _range_1 = [u'между']
     _range_2 = [u'и']
     self._span = re.compile(
         "(%s)\s+(?P<start>.+)\s+(%s)\s+(?P<stop>.+)" %
         ('|'.join(_span_1), '|'.join(_span_2)), re.IGNORECASE)
     self._range = re.compile(
         "(%s)\s+(?P<start>.+)\s+(%s)\s+(?P<stop>.+)" %
         ('|'.join(_range_1), '|'.join(_range_2)), re.IGNORECASE)
Пример #8
0
 def init_strings(self):
     DateParser.init_strings(self)
     _span_1 = [u'dal', u'da']
     _span_2 = [u'al', u'a']
     _range_1 = [u'tra', u'fra']
     _range_2 = [u'e']
     self._span = re.compile(
         "(%s)\s+(?P<start>.+)\s+(%s)\s+(?P<stop>.+)" %
         ('|'.join(_span_1), '|'.join(_span_2)), re.IGNORECASE)
     self._range = re.compile(
         "(%s)\s+(?P<start>.+)\s+(%s)\s+(?P<stop>.+)" %
         ('|'.join(_range_1), '|'.join(_range_2)), re.IGNORECASE)
Пример #9
0
 def init_strings(self):
     DateParser.init_strings(self)
     _span_1 = [u'nuo']
     _span_2 = [u'iki']
     _range_1 = [u'tarp']
     _range_2 = [u'ir']
     self._span = re.compile(
         "(%s)\s+(?P<start>.+)\s+(%s)\s+(?P<stop>.+)" %
         ('|'.join(_span_1), '|'.join(_span_2)), re.IGNORECASE)
     self._range = re.compile(
         "(%s)\s+(?P<start>.+)\s+(%s)\s+(?P<stop>.+)" %
         ('|'.join(_range_1), '|'.join(_range_2)), re.IGNORECASE)
Пример #10
0
 def init_strings(self):
     DateParser.init_strings(self)
     self._span = re.compile("(od)\s+(?P<start>.+)\s+(do)\s+(?P<stop>.+)",
                             re.IGNORECASE)
     # Also handle a common mistakes
     self._range = re.compile(
         u"((?:po)?mi(?:ę|e)dzy)\s+(?P<start>.+)\s+(a)\s+(?P<stop>.+)",
         re.IGNORECASE)
     self._text2 = re.compile(
         '(\d+)?.?\s+?%s\s*((\d+)(/\d+)?)?' % self._mon_str, re.IGNORECASE)
     self._jtext2 = re.compile(
         '(\d+)?.?\s+?%s\s*((\d+)(/\d+)?)?' % self._jmon_str, re.IGNORECASE)
Пример #11
0
 def init_strings(self):
     DateParser.init_strings(self)
     _span_1 = [u'des de']
     _span_2 = [u'fins a']
     _range_1 = [u'entre', u'ent\.', u'ent']
     _range_2 = [u'i']
     self._span = re.compile(
         "(%s)\s+(?P<start>.+)\s+(%s)\s+(?P<stop>.+)" %
         ('|'.join(_span_1), '|'.join(_span_2)), re.IGNORECASE)
     self._range = re.compile(
         "(%s)\s+(?P<start>.+)\s+(%s)\s+(?P<stop>.+)" %
         ('|'.join(_range_1), '|'.join(_range_2)), re.IGNORECASE)
Пример #12
0
 def init_strings(self):
     """
     This method compiles regular expression strings for matching dates.
     """
     DateParser.init_strings(self)
     _span_1 = [u'من']
     _span_2 = [u'إلى']
     _range_1 = [u'بين']
     _range_2 = [u'و']
     self._span = re.compile(
         "(%s)\s+(?P<start>.+)\s+(%s)\s+(?P<stop>.+)" %
         ('|'.join(_span_1), '|'.join(_span_2)), re.IGNORECASE)
     self._range = re.compile(
         "(%s)\s+(?P<start>.+)\s+(%s)\s+(?P<stop>.+)" %
         ('|'.join(_range_1), '|'.join(_range_2)), re.IGNORECASE)
Пример #13
0
 def init_strings(self):
     """
     compiles regular expression strings for matching dates
     """
     DateParser.init_strings(self)
     # match 'Day. MONTH year.' format with or without dots
     self._text2 = re.compile('(\d+)?\.?\s*?%s\.?\s*((\d+)(/\d+)?)?\s*\.?$'
                             % self._mon_str, re.IGNORECASE)
     # match Day.Month.Year.
     self._numeric  = re.compile("((\d+)[/\.-])?\s*((\d+)[/\.-])?\s*(\d+)\.?$")
    
     self._span  = re.compile("od\s+(?P<start>.+)\s+do\s+(?P<stop>.+)", 
                             re.IGNORECASE)
     self._range = re.compile(
                         u"med\s+(?P<start>.+)\s+in\s+(?P<stop>.+)", 
                         re.IGNORECASE)
     self._jtext2 = re.compile('(\d+)?.?\s+?%s\s*((\d+)(/\d+)?)?'\
                             % self._jmon_str, re.IGNORECASE)
Пример #14
0
    def init_strings(self):
        """
        This method compiles regular expression strings for matching dates.

        See DateParser.init_strings()
        """
        DateParser.init_strings(self)

        _span_1 = [u'з', u'від']
        # b.c.e. pattern also have "до" so skip "до н."
        _span_2 = [u'по', u'до?!\sн\.']
        _range_1 = [u'між']
        _range_2 = [u'і', u'та']
        self._span = re.compile(
            "(%s)\s+(?P<start>.+)\s+(%s)\s+(?P<stop>.+)" %
            ('|'.join(_span_1), '|'.join(_span_2)), re.IGNORECASE)
        self._range = re.compile(
            "(%s)\s+(?P<start>.+)\s+(%s)\s+(?P<stop>.+)" %
            ('|'.join(_range_1), '|'.join(_range_2)), re.IGNORECASE)
Пример #15
0
    def init_strings(self):
        """
        compiles regular expression strings for matching dates
        """
        DateParser.init_strings(self)
        # match 'Day. MONTH year.' format with or without dots
        self._text2 = re.compile(
            '(\d+)?\.?\s*?%s\s*((\d+)(/\d+)?)?\.?\s*$' % self._mon_str,
            re.IGNORECASE)

        # match Day.Month.Year.
        self._numeric = re.compile(
            "((\d+)[/\. ])?\s*((\d+)[/\.])?\s*(\d+)\.?$")

        _span_1 = ['od', 'од']
        _span_2 = ['do', 'до']
        _range_1 = ['između', 'између']
        _range_2 = ['i', 'и']
        self._span = re.compile(
            "(%s)\s+(?P<start>.+)\s+(%s)\s+(?P<stop>.+)" %
            ('|'.join(_span_1), '|'.join(_span_2)), re.IGNORECASE)
        self._range = re.compile(
            "(%s)\s+(?P<start>.+)\s+(%s)\s+(?P<stop>.+)" %
            ('|'.join(_range_1), '|'.join(_range_2)), re.IGNORECASE)
Пример #16
0
 def init_strings(self):
     DateParser.init_strings(self)
     self._span = re.compile(u"(od)\s+(?P<start>.+)\s+(do)\s+(?P<stop>.+)",
                             re.IGNORECASE)
     self._range = re.compile(
         u"(mezi)\s+(?P<start>.+)\s+(a)\s+(?P<stop>.+)", re.IGNORECASE)