def detect_language(self, text, languages): if isinstance(languages, (list, tuple, Set)): if all([ language in self.available_language_map for language in languages ]): languages = [ self.available_language_map[language] for language in languages ] else: unsupported_languages = set(languages) - set( self.available_language_map.keys()) raise ValueError("Unknown language(s): %s" % ', '.join(map(repr, unsupported_languages))) elif languages is not None: raise TypeError("languages argument must be a list (%r given)" % type(languages)) if languages: self.language_detector = FullTextLanguageDetector( languages=languages) else: self.language_detector = FullTextLanguageDetector( list(self.available_language_map.values())) return self.language_detector._best_language(text)
def detect_language(self, text, languages, settings=None, detect_languages_function=None): if detect_languages_function and not languages: detected_languages = detect_languages_function( text, confidence_threshold=settings.LANGUAGE_DETECTION_CONFIDENCE_THRESHOLD ) detected_languages = map_languages(detected_languages) or settings.DEFAULT_LANGUAGES return detected_languages[0] if detected_languages else None if isinstance(languages, (list, tuple, Set)): if all([language in self.available_language_map for language in languages]): languages = [self.available_language_map[language] for language in languages] else: unsupported_languages = set(languages) - set(self.available_language_map.keys()) raise ValueError("Unknown language(s): %s" % ', '.join(map(repr, unsupported_languages))) elif languages is not None: raise TypeError("languages argument must be a list (%r given)" % type(languages)) if languages: self.language_detector = FullTextLanguageDetector(languages=languages) else: self.language_detector = FullTextLanguageDetector(list(self.available_language_map.values())) detected_language = self.language_detector._best_language(text) or ( settings.DEFAULT_LANGUAGES[0] if settings.DEFAULT_LANGUAGES else None ) return detected_language
class DateSearchWithDetection: """ Class which executes language detection of string in a natural language, translation of a given string, search of substrings which represent date and/or time and parsing of these substrings. """ def __init__(self): self.loader = LocaleDataLoader() self.available_language_map = self.loader.get_locale_map() self.search = _ExactLanguageSearch(self.loader) def detect_language(self, text, languages): if isinstance(languages, (list, tuple, Set)): if all([language in self.available_language_map for language in languages]): languages = [self.available_language_map[language] for language in languages] else: unsupported_languages = set(languages) - set(self.available_language_map.keys()) raise ValueError( "Unknown language(s): %s" % ', '.join(map(repr, unsupported_languages))) elif languages is not None: raise TypeError("languages argument must be a list (%r given)" % type(languages)) if languages: self.language_detector = FullTextLanguageDetector(languages=languages) else: self.language_detector = FullTextLanguageDetector(list(self.available_language_map.values())) return self.language_detector._best_language(text) @apply_settings def search_dates(self, text, languages=None, settings=None): """ Find all substrings of the given string which represent date and/or time and parse them. :param text: A string in a natural language which may contain date and/or time expressions. :type text: str :param languages: A list of two letters language codes.e.g. ['en', 'es']. If languages are given, it will not attempt to detect the language. :type languages: list :param settings: Configure customized behavior using settings defined in :mod:`dateparser.conf.Settings`. :type settings: dict :return: a dict mapping keys to two letter language code and a list of tuples of pairs: substring representing date expressions and corresponding :mod:`datetime.datetime` object. For example: {'Language': 'en', 'Dates': [('on 4 October 1957', datetime.datetime(1957, 10, 4, 0, 0))]} If language of the string isn't recognised returns: {'Language': None, 'Dates': None} :raises: ValueError - Unknown Language """ language_shortname = self.detect_language(text=text, languages=languages) if not language_shortname: return {'Language': None, 'Dates': None} return {'Language': language_shortname, 'Dates': self.search.search_parse(language_shortname, text, settings=settings)}
class DateSearchWithDetection: """ Class which executes language detection of string in a natural language, translation of a given string, search of substrings which represent date and/or time and parsing of these substrings. """ def __init__(self): self.loader = LocaleDataLoader() self.available_language_map = self.loader.get_locale_map() self.search = ExactLanguageSearch(self.loader) def detect_language(self, text, languages): if isinstance(languages, (list, tuple, Set)): if all([language in self.available_language_map for language in languages]): languages = [self.available_language_map[language] for language in languages] else: unsupported_languages = set(languages) - set(self.available_language_map.keys()) raise ValueError( "Unknown language(s): %s" % ', '.join(map(repr, unsupported_languages))) elif languages is not None: raise TypeError("languages argument must be a list (%r given)" % type(languages)) if languages: self.language_detector = FullTextLanguageDetector(languages=languages) else: self.language_detector = FullTextLanguageDetector(list(self.available_language_map.values())) return self.language_detector._best_language(text) @apply_settings def search_dates(self, text, languages=None, settings=None): """ Find all substrings of the given string which represent date and/or time and parse them. :param text: A string in a natural language which may contain date and/or time expressions. :type text: str|unicode :param languages: A list of two letters language codes.e.g. ['en', 'es']. If languages are given, it will not attempt to detect the language. :type languages: list :param settings: Configure customized behavior using settings defined in :mod:`dateparser.conf.Settings`. :type settings: dict :return: a dict mapping keys to two letter language code and a list of tuples of pairs: substring representing date expressions and corresponding :mod:`datetime.datetime` object. For example: {'Language': 'en', 'Dates': [('on 4 October 1957', datetime.datetime(1957, 10, 4, 0, 0))]} If language of the string isn't recognised returns: {'Language': None, 'Dates': None} :raises: ValueError - Unknown Language """ language_shortname = self.detect_language(text=text, languages=languages) if not language_shortname: return {'Language': None, 'Dates': None} return {'Language': language_shortname, 'Dates': self.search.search_parse(language_shortname, text, settings=settings)}
def detect_language(self, text, languages): if isinstance(languages, (list, tuple, Set)): if all([language in self.available_language_map for language in languages]): languages = [self.available_language_map[language] for language in languages] else: unsupported_languages = set(languages) - set(self.available_language_map.keys()) raise ValueError( "Unknown language(s): %s" % ', '.join(map(repr, unsupported_languages))) elif languages is not None: raise TypeError("languages argument must be a list (%r given)" % type(languages)) if languages: self.language_detector = FullTextLanguageDetector(languages=languages) else: self.language_detector = FullTextLanguageDetector(list(self.available_language_map.values())) return self.language_detector._best_language(text)