示例#1
0
    def detect_language(self, text, languages):
        if isinstance(languages, (list, tuple, Set)):

            if all([
                    language in self.available_language_map
                    for language in languages
            ]):
                languages = [
                    self.available_language_map[language]
                    for language in languages
                ]
            else:
                unsupported_languages = set(languages) - set(
                    self.available_language_map.keys())
                raise ValueError("Unknown language(s): %s" %
                                 ', '.join(map(repr, unsupported_languages)))
        elif languages is not None:
            raise TypeError("languages argument must be a list (%r given)" %
                            type(languages))

        if languages:
            self.language_detector = FullTextLanguageDetector(
                languages=languages)
        else:
            self.language_detector = FullTextLanguageDetector(
                list(self.available_language_map.values()))

        return self.language_detector._best_language(text)
示例#2
0
    def detect_language(self, text, languages, settings=None, detect_languages_function=None):
        if detect_languages_function and not languages:
            detected_languages = detect_languages_function(
                text, confidence_threshold=settings.LANGUAGE_DETECTION_CONFIDENCE_THRESHOLD
            )
            detected_languages = map_languages(detected_languages) or settings.DEFAULT_LANGUAGES
            return detected_languages[0] if detected_languages else None

        if isinstance(languages, (list, tuple, Set)):
            if all([language in self.available_language_map for language in languages]):
                languages = [self.available_language_map[language] for language in languages]
            else:
                unsupported_languages = set(languages) - set(self.available_language_map.keys())
                raise ValueError("Unknown language(s): %s" % ', '.join(map(repr, unsupported_languages)))
        elif languages is not None:
            raise TypeError("languages argument must be a list (%r given)" % type(languages))

        if languages:
            self.language_detector = FullTextLanguageDetector(languages=languages)
        else:
            self.language_detector = FullTextLanguageDetector(list(self.available_language_map.values()))

        detected_language = self.language_detector._best_language(text) or (
            settings.DEFAULT_LANGUAGES[0] if settings.DEFAULT_LANGUAGES else None
        )
        return detected_language
示例#3
0
class DateSearchWithDetection:
    """
    Class which executes language detection of string in a natural language, translation of a given string,
    search of substrings which represent date and/or time and parsing of these substrings.

    """
    def __init__(self):
        self.loader = LocaleDataLoader()
        self.available_language_map = self.loader.get_locale_map()
        self.search = _ExactLanguageSearch(self.loader)

    def detect_language(self, text, languages):
        if isinstance(languages, (list, tuple, Set)):

            if all([language in self.available_language_map for language in languages]):
                languages = [self.available_language_map[language] for language in languages]
            else:
                unsupported_languages = set(languages) - set(self.available_language_map.keys())
                raise ValueError(
                    "Unknown language(s): %s" % ', '.join(map(repr, unsupported_languages)))
        elif languages is not None:
            raise TypeError("languages argument must be a list (%r given)" % type(languages))

        if languages:
            self.language_detector = FullTextLanguageDetector(languages=languages)
        else:
            self.language_detector = FullTextLanguageDetector(list(self.available_language_map.values()))

        return self.language_detector._best_language(text)

    @apply_settings
    def search_dates(self, text, languages=None, settings=None):
        """
        Find all substrings of the given string which represent date and/or time and parse them.

        :param text:
            A string in a natural language which may contain date and/or time expressions.
        :type text: str
        :param languages:
            A list of two letters language codes.e.g. ['en', 'es']. If languages are given, it will not attempt
            to detect the language.
        :type languages: list
        :param settings:
               Configure customized behavior using settings defined in :mod:`dateparser.conf.Settings`.
        :type settings: dict

        :return: a dict mapping keys to two letter language code and a list of tuples of pairs:
                substring representing date expressions and corresponding :mod:`datetime.datetime` object.
            For example:
            {'Language': 'en', 'Dates': [('on 4 October 1957', datetime.datetime(1957, 10, 4, 0, 0))]}
            If language of the string isn't recognised returns:
            {'Language': None, 'Dates': None}
        :raises: ValueError - Unknown Language
        """

        language_shortname = self.detect_language(text=text, languages=languages)
        if not language_shortname:
            return {'Language': None, 'Dates': None}
        return {'Language': language_shortname, 'Dates': self.search.search_parse(language_shortname, text,
                                                                                  settings=settings)}
示例#4
0
class DateSearchWithDetection:
    """
    Class which executes language detection of string in a natural language, translation of a given string,
    search of substrings which represent date and/or time and parsing of these substrings.

    """
    def __init__(self):
        self.loader = LocaleDataLoader()
        self.available_language_map = self.loader.get_locale_map()
        self.search = ExactLanguageSearch(self.loader)

    def detect_language(self, text, languages):
        if isinstance(languages, (list, tuple, Set)):

            if all([language in self.available_language_map for language in languages]):
                languages = [self.available_language_map[language] for language in languages]
            else:
                unsupported_languages = set(languages) - set(self.available_language_map.keys())
                raise ValueError(
                    "Unknown language(s): %s" % ', '.join(map(repr, unsupported_languages)))
        elif languages is not None:
            raise TypeError("languages argument must be a list (%r given)" % type(languages))

        if languages:
            self.language_detector = FullTextLanguageDetector(languages=languages)
        else:
            self.language_detector = FullTextLanguageDetector(list(self.available_language_map.values()))

        return self.language_detector._best_language(text)

    @apply_settings
    def search_dates(self, text, languages=None, settings=None):
        """
        Find all substrings of the given string which represent date and/or time and parse them.

        :param text:
            A string in a natural language which may contain date and/or time expressions.
        :type text: str|unicode
        :param languages:
            A list of two letters language codes.e.g. ['en', 'es']. If languages are given, it will not attempt
            to detect the language.
        :type languages: list
        :param settings:
               Configure customized behavior using settings defined in :mod:`dateparser.conf.Settings`.
        :type settings: dict

        :return: a dict mapping keys to two letter language code and a list of tuples of pairs:
                substring representing date expressions and corresponding :mod:`datetime.datetime` object.
            For example:
            {'Language': 'en', 'Dates': [('on 4 October 1957', datetime.datetime(1957, 10, 4, 0, 0))]}
            If language of the string isn't recognised returns:
            {'Language': None, 'Dates': None}
        :raises: ValueError - Unknown Language
        """

        language_shortname = self.detect_language(text=text, languages=languages)
        if not language_shortname:
            return {'Language': None, 'Dates': None}
        return {'Language': language_shortname, 'Dates': self.search.search_parse(language_shortname, text,
                                                                                  settings=settings)}
示例#5
0
    def detect_language(self, text, languages):
        if isinstance(languages, (list, tuple, Set)):

            if all([language in self.available_language_map for language in languages]):
                languages = [self.available_language_map[language] for language in languages]
            else:
                unsupported_languages = set(languages) - set(self.available_language_map.keys())
                raise ValueError(
                    "Unknown language(s): %s" % ', '.join(map(repr, unsupported_languages)))
        elif languages is not None:
            raise TypeError("languages argument must be a list (%r given)" % type(languages))

        if languages:
            self.language_detector = FullTextLanguageDetector(languages=languages)
        else:
            self.language_detector = FullTextLanguageDetector(list(self.available_language_map.values()))

        return self.language_detector._best_language(text)