Exemplo n.º 1
0
 def test_check_settings_extra_check_require_parts(self):
     with self.assertRaisesRegex(
         SettingValidationError, r'"REQUIRE_PARTS" setting contains invalid values: time'
     ):
         DateDataParser(settings={'REQUIRE_PARTS': ['time', 'day']})
     with self.assertRaisesRegex(
         SettingValidationError, r'There are repeated values in the "REQUIRE_PARTS" setting'
     ):
         DateDataParser(settings={'REQUIRE_PARTS': ['month', 'day', 'month']})
Exemplo n.º 2
0
    def test_check_settings_extra_check_parsers(self):
        with self.assertRaisesRegex(
            SettingValidationError, r'Found unknown parsers in the "PARSERS" setting: no-spaces'
        ):
            DateDataParser(settings={'PARSERS': ['absolute-time', 'no-spaces']})

        with self.assertRaisesRegex(
            SettingValidationError, r'There are repeated values in the "PARSERS" setting'
        ):
            DateDataParser(settings={'PARSERS': ['absolute-time', 'timestamp', 'absolute-time']})
Exemplo n.º 3
0
def test_no_spaces_strict_parsing(date_string, expected_result):
    parser = DateDataParser(settings={
        'PARSERS': ['no-spaces-time'],
        'STRICT_PARSING': False
    })
    assert parser.get_date_data(date_string)['date_obj'] == expected_result

    parser = DateDataParser(settings={
        'PARSERS': ['no-spaces-time'],
        'STRICT_PARSING': True
    })
    assert parser.get_date_data(date_string)['date_obj'] is None
def _parse_date(date_string: str) -> datetime:
    # NOTE:这里 new DateDataParser() 对象,避免上一次的判断条件会产生影响,将该函数变成 non state 的
    from dateparser import DateDataParser
    data = DateDataParser(try_previous_locales=False).get_date_data(
        date_string, None)
    if data:
        return data['date_obj']
Exemplo n.º 5
0
    def __init__(self, column, form):

        parser = DateDataParser(languages=['en'],
                                allow_redetect_language=False)

        def fn(df, column=column, format=form, parser=parser):

            N = df.shape[0]

            for i in range(N):
                if df[column].iloc[i] != None:

                    try:
                        df[column].iloc[i] = parser.get_date_data(
                            str(df[column].iloc[i]))['date_obj'].strftime(form)
                    except:
                        pass

            return df

        self.name = 'df = dateparse(df,' + formatString(
            column) + ',' + formatString(form) + ')'
        self.provenance = [self]

        super(DatetimeCast, self).__init__(fn, ['column', 'form'])
Exemplo n.º 6
0
    def test_check_settings(self, setting, wrong_type, wrong_value, valid_value):
        with self.assertRaisesRegex(
            SettingValidationError, r'"{}" must be .*, not "{}".'.format(setting, type(wrong_type).__name__)
        ):
            DateDataParser(settings={setting: wrong_type})

        if wrong_value:
            with self.assertRaisesRegex(
                SettingValidationError, r'"{}" is not a valid value for "{}", it should be: .*'.format(
                    str(wrong_value).replace('[', '\\[').replace(']', '\\]'), setting
                )
            ):
                DateDataParser(settings={setting: wrong_value})

        # check that a valid value doesn't raise an error
        assert DateDataParser(settings={setting: valid_value})
Exemplo n.º 7
0
 def test_check_settings_extra_check_confidence_threshold(self):
     with self.assertRaisesRegex(
         SettingValidationError,
         r'1.1 is not a valid value for '
         r'LANGUAGE_DETECTION_CONFIDENCE_THRESHOLD. It can take values '
         r'between 0 and 1'
     ):
         DateDataParser(settings={'LANGUAGE_DETECTION_CONFIDENCE_THRESHOLD': 1.1})
Exemplo n.º 8
0
def get_mount_number(name: str) -> int:
    """
    Get month number by name
    Use `dateparser` for cross-platform solution. Develop on Mac os, use on Windows
    Because native solution have different name in module calendar, for example
    on Windows `Январь` on Mac Os `января`
    """
    ddp = DateDataParser(languages=['ru'])
    date_data = ddp.get_date_data(f'1 {name}')
    return date_data.date_obj.month
Exemplo n.º 9
0
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
from dateparser import DateDataParser
from functools import lru_cache

parser = DateDataParser(languages=['en'])


def rank(l, user):
    # hack hack
    return sorted(l, key=lambda x: x.user)


def uprank(l, users):
    # hack hack
    # score is a sorting order; lower comes first.
    def score(n):
        score = 0
        if n.user in users:
            # the earlier in the list a user comes, the more highly ranked it is.
            score = users.index(n.user) - len(users) - 1
Exemplo n.º 10
0
 def test_check_settings_wrong_setting_name(self):
     with self.assertRaisesRegex(SettingValidationError,
                                 r'.* is not a valid setting'):
         DateDataParser(settings={'AAAAA': 'foo'})
Exemplo n.º 11
0
import datetime
import logging
import re

from functools import lru_cache

# coverage for date parsing
from dateparser import DateDataParser  # third-party, slow
from dateparser_data.settings import default_parsers
EXTERNAL_PARSER = DateDataParser(
    settings={
        #    'DATE_ORDER': 'DMY',
        'PREFER_DATES_FROM':
        'past',
        #    'PREFER_DAY_OF_MONTH': 'first',
        'STRICT_PARSING':
        True,
        'PARSERS': [
            p for p in default_parsers
            if p not in ('no-spaces-time', 'relative-time', 'timestamp')
        ],
    })

from dateutil.parser import parse as dateutil_parse

# own
from .settings import CACHE_SIZE
from .validators import convert_date, date_validator

LOGGER = logging.getLogger(__name__)
Exemplo n.º 12
0
from datetime import datetime
import re

from dateparser import DateDataParser
import pandas as pd

from gestion_erreurs import ajout_erreur

# on crée un analyseur de dates pour le français
DDP = DateDataParser(languages=["fr"])
# on stocke la date du jour d'exécution pour filtrer les dates mal reconnues
# (eg. si la date de signature extraite est postérieure à la date du jour)
_TODAY = datetime.now()

RE_DOC_ID = re.compile(
    r"N°[ ]*(?P<doc_id>\d{4}[ ]?[-_]?[ ]?\d{4,5}[B]?[ ]?[-_.]?[ ]?VDM[A]?)")


def extract_doc_id(doc_txt):
    """Extrait l'identifiant de l'arrêté: année_num_VDM

    année sur 4 chiffres, num sur 5 chiffres, VDM pour Ville De Marseille ?

    Parameters
    ----------
    doc_txt : string
        Texte du document

    Returns
    -------
    doc_id : string or None
Exemplo n.º 13
0
 def __init__(self):
     self._date_parser = DateDataParser(languages=self.LANGUAGES,
                                        settings=self.DATE_PARSER_SETTINGS)
Exemplo n.º 14
0
def test_confidence_threshold_setting_is_applied():
    ddp = DateDataParser(detect_languages_function=detect_languages, settings={'LANGUAGE_DETECTION_CONFIDENCE_THRESHOLD': 0.6})
    assert ddp.get_date_data('21/06/2020').locale == 'en'

    ddp2 = DateDataParser(detect_languages_function=detect_languages, settings={'LANGUAGE_DETECTION_CONFIDENCE_THRESHOLD': 0.4})
    assert ddp2.get_date_data('21/06/2020').locale == 'fr'
Exemplo n.º 15
0
 def test_check_settings_extra_check_default_languages(self):
     with self.assertRaisesRegex(
         SettingValidationError,
         "Found invalid languages in the 'DEFAULT_LANGUAGES' setting: 'abcd'"
     ):
         DateDataParser(settings={'DEFAULT_LANGUAGES': ["abcd"]})