class AutoDetectLanguageTest(unittest.TestCase): def setUp(self): self.parser = AutoDetectLanguage() def test_detect_language(self): self.assertItemsEqual( ['es', 'pt'], map(attrgetter('shortname'), self.parser.iterate_applicable_languages('11 abril 2010'))) self.assertItemsEqual( ['es'], map(attrgetter('shortname'), self.parser.iterate_applicable_languages('11 junio 2010'))) @unittest.skip( 'This test should only be testing detecting languages, not parsing them. Although tests ' 'for parsing this dates should be created separately to not reduce the coverage' ) def test_should_reduce_possible_languages_and_reject_different(self): dates_in_spanish = [ (u'13 Ago, 2014', datetime(2014, 8, 13)), (u'13 Septiembre, 2014', datetime(2014, 9, 13)), ] for date_string, correct_date in dates_in_spanish: parsed_date = self.parser.parse(date_string, None) self.assertEqual(correct_date.date(), parsed_date.date()) with self.assertRaisesRegexp(ValueError, 'Invalid date'): portuguese_date = u'13 Setembro, 2014' self.parser.parse(portuguese_date, None) @unittest.skip( 'This test should only be testing detecting languages, not parsing them. Although tests ' 'for parsing this dates should be created separately to not reduce the coverage' ) def test_should_accept_dates_in_different_languages(self): date_fixtures = [ (u'13 Ago, 2014', datetime(2014, 8, 13)), (u'13 Septiembre, 2014', datetime(2014, 9, 13)), (u'13 Setembro, 2014', datetime(2014, 9, 13)), ] parser = AutoDetectLanguage(None, allow_redetection=True) for date_string, correct_date in date_fixtures: parsed_date = parser.parse(date_string, None) self.assertEqual(correct_date.date(), parsed_date.date())
class DateDataParser(object): def __init__(self, languages=None, allow_redetect_language=False): if isinstance(languages, (list, tuple, collections.Set)): available_language_map = default_language_loader.get_language_map() if all( [language in available_language_map for language in languages]): languages = [ available_language_map[language] for language in languages ] else: unsupported_languages = set(languages) - set( available_language_map.keys()) raise ValueError("Unknown language(s) %r" % ', '.join(unsupported_languages)) elif languages is not None: raise TypeError("languages argument must be a list (%r given)" % type(languages)) if allow_redetect_language: self.language_detector = AutoDetectLanguage( languages=languages if languages else None, allow_redetection=True) elif languages: self.language_detector = ExactLanguages(languages=languages) else: self.language_detector = AutoDetectLanguage( languages=None, allow_redetection=False) def get_date_data(self, date_string, date_formats=None): """ Return a dictionary with a date object and a period. Period values can be a 'day' (default), 'week', 'month', 'year'. It aims to solve the following issue: In example, a forum could displays "2 weeks ago" in the thread list (in the thread itself there's the right date) so the engine will translate "2 weeks ago" to a certain date. The next thread summary displays "3 weeks ago" which is translated to a other date seven days before first date. A valid date_string between both dates won't be scraped because it's not an exact date match. The period field helps to build better date range detection. TODO: Timezone issues """ date_string = date_string.strip() date_string = sanitize_date(date_string) for language in self.language_detector.iterate_applicable_languages( date_string, modify=True): parsed_date = _DateLanguageParser.parse(language, date_string, date_formats) if parsed_date: return parsed_date else: return {'date_obj': None, 'period': 'day'}
class AutoDetectLanguageTest(unittest.TestCase): def setUp(self): self.parser = AutoDetectLanguage() def test_detect_language(self): self.assertItemsEqual(['es', 'pt'], map(attrgetter('shortname'), self.parser.iterate_applicable_languages('11 abril 2010'))) self.assertItemsEqual(['es'], map(attrgetter('shortname'), self.parser.iterate_applicable_languages('11 junio 2010'))) @unittest.skip('This test should only be testing detecting languages, not parsing them. Although tests ' 'for parsing this dates should be created separately to not reduce the coverage') def test_should_reduce_possible_languages_and_reject_different(self): dates_in_spanish = [ (u'13 Ago, 2014', datetime(2014, 8, 13)), (u'13 Septiembre, 2014', datetime(2014, 9, 13)), ] for date_string, correct_date in dates_in_spanish: parsed_date = self.parser.parse(date_string, None) self.assertEqual(correct_date.date(), parsed_date.date()) with self.assertRaisesRegexp(ValueError, 'Invalid date'): portuguese_date = u'13 Setembro, 2014' self.parser.parse(portuguese_date, None) @unittest.skip('This test should only be testing detecting languages, not parsing them. Although tests ' 'for parsing this dates should be created separately to not reduce the coverage') def test_should_accept_dates_in_different_languages(self): date_fixtures = [ (u'13 Ago, 2014', datetime(2014, 8, 13)), (u'13 Septiembre, 2014', datetime(2014, 9, 13)), (u'13 Setembro, 2014', datetime(2014, 9, 13)), ] parser = AutoDetectLanguage(None, allow_redetection=True) for date_string, correct_date in date_fixtures: parsed_date = parser.parse(date_string, None) self.assertEqual(correct_date.date(), parsed_date.date())
class DateDataParser(object): def __init__(self, languages=None, allow_redetect_language=False): if isinstance(languages, (list, tuple, collections.Set)): available_language_map = default_language_loader.get_language_map() if all([language in available_language_map for language in languages]): languages = [available_language_map[language] for language in languages] else: unsupported_languages = set(languages) - set(available_language_map.keys()) raise ValueError("Unknown language(s) %r" % ', '.join(unsupported_languages)) elif languages is not None: raise TypeError("languages argument must be a list (%r given)" % type(languages)) if allow_redetect_language: self.language_detector = AutoDetectLanguage(languages=languages if languages else None, allow_redetection=True) elif languages: self.language_detector = ExactLanguages(languages=languages) else: self.language_detector = AutoDetectLanguage(languages=None, allow_redetection=False) def get_date_data(self, date_string, date_formats=None): """ Return a dictionary with a date object and a period. Period values can be a 'day' (default), 'week', 'month', 'year'. It aims to solve the following issue: In example, a forum could displays "2 weeks ago" in the thread list (in the thread itself there's the right date) so the engine will translate "2 weeks ago" to a certain date. The next thread summary displays "3 weeks ago" which is translated to a other date seven days before first date. A valid date_string between both dates won't be scraped because it's not an exact date match. The period field helps to build better date range detection. TODO: Timezone issues """ date_string = date_string.strip() date_string = sanitize_date(date_string) for language in self.language_detector.iterate_applicable_languages( date_string, modify=True): parsed_date = _DateLanguageParser.parse(language, date_string, date_formats) if parsed_date: return parsed_date else: return {'date_obj': None, 'period': 'day'}
class DateDataParser(object): """ Class which handles language detection, translation and subsequent generic parsing of string representing date and/or time. :param languages: A list of two letters language codes, e.g. ['en', 'es']. If languages are given, it will not attempt to detect the language. :type languages: list :param allow_redetect_language: Enables/disables language re-detection. :type allow_redetect_language: bool :param settings: Configure customized behavior using settings defined in :mod:`dateparser.conf.Settings`. :type settings: dict :return: A parser instance :raises: ValueError - Unknown Language, TypeError - Languages argument must be a list """ language_loader = None @apply_settings def __init__(self, languages=None, allow_redetect_language=False, settings=None): self._settings = settings available_language_map = self._get_language_loader().get_language_map() if isinstance(languages, (list, tuple, collections.Set)): if all( [language in available_language_map for language in languages]): languages = [ available_language_map[language] for language in languages ] else: unsupported_languages = set(languages) - set( available_language_map.keys()) raise ValueError("Unknown language(s): %s" % ', '.join(map(repr, unsupported_languages))) elif languages is not None: raise TypeError("languages argument must be a list (%r given)" % type(languages)) if allow_redetect_language: self.language_detector = AutoDetectLanguage( languages if languages else list(available_language_map.values()), allow_redetection=True) elif languages: self.language_detector = ExactLanguages(languages=languages) else: self.language_detector = AutoDetectLanguage( list(available_language_map.values()), allow_redetection=False) def get_date_data(self, date_string, date_formats=None): """ Parse string representing date and/or time in recognizable localized formats. Supports parsing multiple languages and timezones. :param date_string: A string representing date and/or time in a recognizably valid format. :type date_string: str|unicode :param date_formats: A list of format strings using directives as given `here <https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior>`_. The parser applies formats one by one, taking into account the detected languages. :type date_formats: list :return: a dict mapping keys to :mod:`datetime.datetime` object and *period*. For example: {'date_obj': datetime.datetime(2015, 6, 1, 0, 0), 'period': u'day'} :raises: ValueError - Unknown Language .. note:: *Period* values can be a 'day' (default), 'week', 'month', 'year'. *Period* represents the granularity of date parsed from the given string. In the example below, since no day information is present, the day is assumed to be current day ``16`` from *current date* (which is June 16, 2015, at the moment of writing this). Hence, the level of precision is ``month``: >>> DateDataParser().get_date_data(u'March 2015') {'date_obj': datetime.datetime(2015, 3, 16, 0, 0), 'period': u'month'} Similarly, for date strings with no day and month information present, level of precision is ``year`` and day ``16`` and month ``6`` are from *current_date*. >>> DateDataParser().get_date_data(u'2014') {'date_obj': datetime.datetime(2014, 6, 16, 0, 0), 'period': u'year'} Dates with time zone indications or UTC offsets are returned in UTC time unless specified using `Settings`_. >>> DateDataParser().get_date_data(u'23 March 2000, 1:21 PM CET') {'date_obj': datetime.datetime(2000, 3, 23, 14, 21), 'period': 'day'} """ try: date_string = date_string.strip() except AttributeError: raise TypeError('Input type must be str or unicode') if self._settings.NORMALIZE: date_string = normalize_unicode(date_string) date_string = sanitize_date(date_string) for language in self.language_detector.iterate_applicable_languages( date_string, modify=True, settings=self._settings): parsed_date = _DateLanguageParser.parse(language, date_string, date_formats, settings=self._settings) if parsed_date: return parsed_date else: return {'date_obj': None, 'period': 'day'} def get_date_tuple(self, *args, **kwargs): date_tuple = collections.namedtuple('DateData', 'date_obj period') date_data = self.get_date_data(*args, **kwargs) return date_tuple(**date_data) @classmethod def _get_language_loader(cls): if not cls.language_loader: cls.language_loader = LanguageDataLoader() return cls.language_loader
class AutoDetectLanguageTest(BaseTestCase): def setUp(self): super(AutoDetectLanguageTest, self).setUp() # Just a known subset so we can rely on test outcomes. Feel free to add, but not exclude or change order. self.known_languages = ['en', 'fr', 'es', 'pt', 'ru', 'tr', 'cs'] self.parser = NotImplemented self.detected_languages = NotImplemented @parameterized.expand([ param(date_strings=["11 abril 2010"], expected_languages=['es', 'pt']), param(date_strings=["11 junio 2010"], expected_languages=['es']), param(date_strings=["13 Ago, 2014", "13 Septiembre, 2014"], expected_languages=['es']), ]) def test_detect_languages(self, date_strings, expected_languages): self.given_parser(languages=self.known_languages) self.when_all_languages_are_detected(date_strings) self.then_detected_languages_are(expected_languages) @parameterized.expand([ param(date_strings=["11 abril 2010"], expected_language='es'), param(date_strings=["11 junio 2010"], expected_language='es'), param(date_strings=["13 Ago, 2014", "13 Septiembre, 2014"], expected_language='es'), ]) def test_exclude_ineligible_languages_with_modify(self, date_strings, expected_language): self.given_parser(languages=self.known_languages) self.when_one_language_is_detected(date_strings, modify=True) self.then_detected_languages_are([expected_language]) self.then_parser_languages_are( self.known_languages[self.known_languages.index(expected_language ):]) @parameterized.expand([ param(date_strings=["11 abril 2010"], expected_language='es'), param(date_strings=["11 junio 2010"], expected_language='es'), param(date_strings=["13 Ago, 2014", "13 Septiembre, 2014"], expected_language='es'), ]) def test_do_not_exclude_ineligible_languages_without_modify( self, date_strings, expected_language): self.given_parser(languages=self.known_languages) self.when_one_language_is_detected(date_strings, modify=False) self.then_detected_languages_are([expected_language]) self.then_parser_languages_are(self.known_languages) @parameterized.expand([ param(date_strings=["11 abril 2010"], expected_languages=['es', 'pt']), param(date_strings=["11 junio 2010"], expected_languages=['es']), param(date_strings=["13 Ago, 2014", "13 Septiembre, 2014"], expected_languages=['es']), param(date_strings=["13 Srpen, 2014"], expected_languages=['cs']), ]) def test_do_not_exclude_ineligible_languages_when_all_ineligible( self, date_strings, expected_languages): self.given_parser(languages=self.known_languages) self.when_all_languages_are_detected(date_strings, modify=True) self.then_detected_languages_are(expected_languages) self.then_parser_languages_are(self.known_languages) @parameterized.expand([ param(language='es', date_strings=["13 Setembro, 2014"]), param(language='cs', date_strings=["'11 Ağustos, 2014'"]), ]) def test_reject_dates_in_other_languages_without_redetection( self, language, date_strings): self.given_parser(languages=self.known_languages) self.given_parser_languages_are([language]) self.when_all_languages_are_detected(date_strings) self.then_detected_languages_are([]) @parameterized.expand([ param(detected_languages=['es'], date_strings=['13 Juillet, 2014'], expected_languages=['fr']), param(detected_languages=['es'], date_strings=['11 Ağustos, 2014'], expected_languages=['tr']), ]) def test_accept_dates_in_other_languages_with_redetection_enabled( self, detected_languages, date_strings, expected_languages): self.given_parser(languages=self.known_languages, allow_redetection=True) self.given_parser_languages_are(detected_languages) self.when_all_languages_are_detected(date_strings) self.then_detected_languages_are(expected_languages) def test_accept_numeric_dates_without_redetection(self, ): self.given_parser(languages=self.known_languages) self.given_parser_languages_are(['es']) self.when_all_languages_are_detected(['13/08/2014']) self.then_detected_languages_are(['es']) def given_parser(self, languages=None, allow_redetection=False): if languages is not None: language_map = default_language_loader.get_language_map() languages = [language_map[language] for language in languages] self.parser = AutoDetectLanguage(languages, allow_redetection=allow_redetection) def given_parser_languages_are(self, languages): language_map = default_language_loader.get_language_map() self.parser.languages = [ language_map[language] for language in languages ] def when_all_languages_are_detected(self, date_strings, modify=False): assert not isinstance(date_strings, six.string_types) for date_string in date_strings: if settings.NORMALIZE: date_string = normalize_unicode(date_string) detected_languages = list( self.parser.iterate_applicable_languages(date_string, modify=modify, settings=settings)) self.detected_languages = detected_languages def when_one_language_is_detected(self, date_strings, modify=False): for date_string in date_strings: detected_language = next( self.parser.iterate_applicable_languages(date_string, modify=modify, settings=settings)) self.detected_languages = [detected_language] def then_detected_languages_are(self, expected_languages): shortnames = map(attrgetter('shortname'), self.detected_languages) six.assertCountEqual(self, expected_languages, shortnames) def then_parser_languages_are(self, expected_languages): shortnames = map(attrgetter('shortname'), self.parser.languages) six.assertCountEqual(self, expected_languages, shortnames)
class AutoDetectLanguageTest(BaseTestCase): def setUp(self): super(AutoDetectLanguageTest, self).setUp() # Just a known subset so we can rely on test outcomes. Feel free to add, but not exclude or change order. self.known_languages = ['en', 'fr', 'es', 'pt', 'ru', 'tr', 'cs'] self.parser = NotImplemented self.detected_languages = NotImplemented @parameterized.expand([ param(date_strings=["11 abril 2010"], expected_languages=['es', 'pt']), param(date_strings=["11 junio 2010"], expected_languages=['es']), param(date_strings=["13 Ago, 2014", "13 Septiembre, 2014"], expected_languages=['es']), ]) def test_detect_languages(self, date_strings, expected_languages): self.given_parser(languages=self.known_languages) self.when_all_languages_are_detected(date_strings) self.then_detected_languages_are(expected_languages) @parameterized.expand([ param(date_strings=["11 abril 2010"], expected_language='es'), param(date_strings=["11 junio 2010"], expected_language='es'), param(date_strings=["13 Ago, 2014", "13 Septiembre, 2014"], expected_language='es'), ]) def test_exclude_ineligible_languages_with_modify(self, date_strings, expected_language): self.given_parser(languages=self.known_languages) self.when_one_language_is_detected(date_strings, modify=True) self.then_detected_languages_are([expected_language]) self.then_parser_languages_are(self.known_languages[self.known_languages.index(expected_language):]) @parameterized.expand([ param(date_strings=["11 abril 2010"], expected_language='es'), param(date_strings=["11 junio 2010"], expected_language='es'), param(date_strings=["13 Ago, 2014", "13 Septiembre, 2014"], expected_language='es'), ]) def test_do_not_exclude_ineligible_languages_without_modify(self, date_strings, expected_language): self.given_parser(languages=self.known_languages) self.when_one_language_is_detected(date_strings, modify=False) self.then_detected_languages_are([expected_language]) self.then_parser_languages_are(self.known_languages) @parameterized.expand([ param(date_strings=["11 abril 2010"], expected_languages=['es', 'pt']), param(date_strings=["11 junio 2010"], expected_languages=['es']), param(date_strings=["13 Ago, 2014", "13 Septiembre, 2014"], expected_languages=['es']), param(date_strings=["13 Srpen, 2014"], expected_languages=['cs']), ]) def test_do_not_exclude_ineligible_languages_when_all_ineligible(self, date_strings, expected_languages): self.given_parser(languages=self.known_languages) self.when_all_languages_are_detected(date_strings, modify=True) self.then_detected_languages_are(expected_languages) self.then_parser_languages_are(self.known_languages) @parameterized.expand([ param(language='es', date_strings=["13 Setembro, 2014"]), param(language='cs', date_strings=["'11 Ağustos, 2014'"]), ]) def test_reject_dates_in_other_languages_without_redetection(self, language, date_strings): self.given_parser(languages=self.known_languages) self.given_parser_languages_are([language]) self.when_all_languages_are_detected(date_strings) self.then_detected_languages_are([]) @parameterized.expand([ param(detected_languages=['es'], date_strings=['13 Juillet, 2014'], expected_languages=['fr']), param(detected_languages=['es'], date_strings=['11 Ağustos, 2014'], expected_languages=['tr']), ]) def test_accept_dates_in_other_languages_with_redetection_enabled( self, detected_languages, date_strings, expected_languages ): self.given_parser(languages=self.known_languages, allow_redetection=True) self.given_parser_languages_are(detected_languages) self.when_all_languages_are_detected(date_strings) self.then_detected_languages_are(expected_languages) def test_accept_numeric_dates_without_redetection(self,): self.given_parser(languages=self.known_languages) self.given_parser_languages_are(['es']) self.when_all_languages_are_detected(['13/08/2014']) self.then_detected_languages_are(['es']) def given_parser(self, languages=None, allow_redetection=False): if languages is not None: language_map = default_language_loader.get_language_map() languages = [language_map[language] for language in languages] self.parser = AutoDetectLanguage(languages, allow_redetection=allow_redetection) def given_parser_languages_are(self, languages): language_map = default_language_loader.get_language_map() self.parser.languages = [language_map[language] for language in languages] def when_all_languages_are_detected(self, date_strings, modify=False): assert not isinstance(date_strings, six.string_types) for date_string in date_strings: if settings.NORMALIZE: date_string = normalize_unicode(date_string) detected_languages = list(self.parser.iterate_applicable_languages(date_string, modify=modify, settings=settings)) self.detected_languages = detected_languages def when_one_language_is_detected(self, date_strings, modify=False): for date_string in date_strings: detected_language = next(self.parser.iterate_applicable_languages(date_string, modify=modify, settings=settings)) self.detected_languages = [detected_language] def then_detected_languages_are(self, expected_languages): shortnames = map(attrgetter('shortname'), self.detected_languages) six.assertCountEqual(self, expected_languages, shortnames) def then_parser_languages_are(self, expected_languages): shortnames = map(attrgetter('shortname'), self.parser.languages) six.assertCountEqual(self, expected_languages, shortnames)
class DateDataParser(object): """ Class which handles language detection, translation and subsequent generic parsing of string representing date and/or time. :param languages: A list of two letters language codes, e.g. ['en', 'es']. If languages are given, it will not attempt to detect the language. :type languages: list :param allow_redetect_language: Enables/disables language re-detection. :type allow_redetect_language: bool :param settings: Configure customized behavior using settings defined in :mod:`dateparser.conf.Settings`. :type settings: dict :return: A parser instance :raises: ValueError - Unknown Language, TypeError - Languages argument must be a list """ language_loader = None @apply_settings def __init__(self, languages=None, allow_redetect_language=False, settings=None): self._settings = settings available_language_map = self._get_language_loader().get_language_map() if isinstance(languages, (list, tuple, collections.Set)): if all([language in available_language_map for language in languages]): languages = [available_language_map[language] for language in languages] else: unsupported_languages = set(languages) - set(available_language_map.keys()) raise ValueError( "Unknown language(s): %s" % ', '.join(map(repr, unsupported_languages))) elif languages is not None: raise TypeError("languages argument must be a list (%r given)" % type(languages)) if allow_redetect_language: self.language_detector = AutoDetectLanguage( languages if languages else list(available_language_map.values()), allow_redetection=True) elif languages: self.language_detector = ExactLanguages(languages=languages) else: self.language_detector = AutoDetectLanguage( list(available_language_map.values()), allow_redetection=False) def get_date_data(self, date_string, date_formats=None): """ Parse string representing date and/or time in recognizable localized formats. Supports parsing multiple languages and timezones. :param date_string: A string representing date and/or time in a recognizably valid format. :type date_string: str|unicode :param date_formats: A list of format strings using directives as given `here <https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior>`_. The parser applies formats one by one, taking into account the detected languages. :type date_formats: list :return: a dict mapping keys to :mod:`datetime.datetime` object and *period*. For example: {'date_obj': datetime.datetime(2015, 6, 1, 0, 0), 'period': u'day'} :raises: ValueError - Unknown Language .. note:: *Period* values can be a 'day' (default), 'week', 'month', 'year'. *Period* represents the granularity of date parsed from the given string. In the example below, since no day information is present, the day is assumed to be current day ``16`` from *current date* (which is June 16, 2015, at the moment of writing this). Hence, the level of precision is ``month``: >>> DateDataParser().get_date_data(u'March 2015') {'date_obj': datetime.datetime(2015, 3, 16, 0, 0), 'period': u'month'} Similarly, for date strings with no day and month information present, level of precision is ``year`` and day ``16`` and month ``6`` are from *current_date*. >>> DateDataParser().get_date_data(u'2014') {'date_obj': datetime.datetime(2014, 6, 16, 0, 0), 'period': u'year'} Dates with time zone indications or UTC offsets are returned in UTC time unless specified using `Settings`_. >>> DateDataParser().get_date_data(u'23 March 2000, 1:21 PM CET') {'date_obj': datetime.datetime(2000, 3, 23, 14, 21), 'period': 'day'} """ try: date_string = date_string.strip() except AttributeError: raise TypeError('Input type must be str or unicode') if self._settings.NORMALIZE: date_string = normalize_unicode(date_string) date_string = sanitize_date(date_string) for language in self.language_detector.iterate_applicable_languages( date_string, modify=True, settings=self._settings): parsed_date = _DateLanguageParser.parse( language, date_string, date_formats, settings=self._settings) if parsed_date: return parsed_date else: return {'date_obj': None, 'period': 'day'} def get_date_tuple(self, *args, **kwargs): date_tuple = collections.namedtuple('DateData', 'date_obj period') date_data = self.get_date_data(*args, **kwargs) return date_tuple(**date_data) @classmethod def _get_language_loader(cls): if not cls.language_loader: cls.language_loader = LanguageDataLoader() return cls.language_loader
class DateDataParser(object): """ Class which handles language detection, translation and subsequent generic parsing of string representing date and/or time. :param languages: A list of two letters language codes.e.g. ['en', 'es']. If languages are given, it will not attempt to detect the language. :type languages: list :param allow_redetect_language: Enables/disables language re-detection. :type allow_redetect_language: bool :return: A parser instance :raises: ValueError - Unknown Language, TypeError - Languages argument must be a list """ def __init__(self, languages=None, allow_redetect_language=False): if isinstance(languages, (list, tuple, collections.Set)): available_language_map = default_language_loader.get_language_map() if all([language in available_language_map for language in languages]): languages = [available_language_map[language] for language in languages] else: unsupported_languages = set(languages) - set(available_language_map.keys()) raise ValueError("Unknown language(s): %s" % ', '.join(map(repr, unsupported_languages))) elif languages is not None: raise TypeError("languages argument must be a list (%r given)" % type(languages)) if allow_redetect_language: self.language_detector = AutoDetectLanguage(languages=languages if languages else None, allow_redetection=True) elif languages: self.language_detector = ExactLanguages(languages=languages) else: self.language_detector = AutoDetectLanguage(languages=None, allow_redetection=False) def get_date_data(self, date_string, date_formats=None): """ Parse string representing date and/or time in recognizeable localized formats. Supports parsing multiple languages. :param date_string: A string representing date and/or time in a recognizably valid format. :type date_string: str|unicode :param date_formats: A list of format strings using directives as given `here <https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior>`_. The parser applies formats one by one, taking into account the detected languages. :type date_formats: list :return: a dict mapping keys to :mod:`datetime.datetime` object and *period*. For example: {'date_obj': datetime.datetime(2015, 6, 1, 0, 0), 'period': u'day'} :raises: ValueError - Unknown Language .. note:: *Period* values can be a 'day' (default), 'week', 'month', 'year'. *Period* represent the granularity of date parsed from the given string. In the example below, since no day information is present, the day is assumed to be current day ``16`` from *current date* (which is June 16, 2015, at the moment of writing this). Hence, the level of precision is ``month``. >>> DateDataParser().get_date_data(u'March 2015') {'date_obj': datetime.datetime(2015, 3, 16, 0, 0), 'period': u'month'} Similarly, for date strings with no day and month information present, level of precision is ``year`` and day ``16`` and month ``6`` are from *current_date*. >>> DateDataParser().get_date_data(u'2014') {'date_obj': datetime.datetime(2014, 6, 16, 0, 0), 'period': u'year'} TODO: Timezone issues """ date_string = date_string.strip() date_string = sanitize_date(date_string) for language in self.language_detector.iterate_applicable_languages( date_string, modify=True): parsed_date = _DateLanguageParser.parse(language, date_string, date_formats) if parsed_date: return parsed_date else: return {'date_obj': None, 'period': 'day'}