示例#1
0
def normalize_lay_can(date_item, rpt_date):
    """Transform non isoformat dates to isoformat

    Examples:
        >>> normalize_lay_can('8/14 PM', '14 Aug 2019')
        '2019-08-14T00:00:00'
        >>> normalize_lay_can('2018-02-04T00:00:00', '14 Aug 2019')
        '2018-02-04T00:00:00'
        >>> normalize_lay_can('12/31 PM', '29 Dec 2019')
        '2019-12-31T00:00:00'
        >>> normalize_lay_can('12/31 PM', '01 Jan 2020')
        '2019-12-31T00:00:00'
        >>> normalize_lay_can('01/01 PM', '31 Dec 2019')
        '2020-01-01T00:00:00'
        >>> normalize_lay_can('7/06/2015 1750', '31 Dec 2019')
        '2015-07-06T17:50:00'

    Args:
        date_item (str):

    Returns:
        str:
    """
    if is_isoformat(date_item):
        return date_item

    if not is_isoformat(date_item):
        year = parse_date(rpt_date, dayfirst=True).year
        _date = date_item.partition(' ')[0]
        if len(_date.split('/')) == 2:
            _month, _day = _date.split('/')
            if 'Dec' in rpt_date and (str(_month) == '1'
                                      or str(_month) == '01'):
                year += 1
            if 'Jan' in rpt_date and str(_month) == '12':
                year -= 1

            return to_isoformat(f'{_day} {_month} {year}', dayfirst=True)

        if len(_date.split('/')) == 3:
            _time = date_item.partition(' ')[2]
            _hour = _time.replace(' ', '')[:2] if _time.replace(
                ' ', '')[:2].isdigit() else '00'
            _min = _time.replace(' ', '')[2:] if _time.replace(
                ' ', '')[2:].isdigit() else '00'
            try:
                return to_isoformat(f'{_date} {_hour}:{_min}', dayfirst=False)
            except Exception:
                logger.error('Skipping date row: %s', date_item)
                return date_item

    logger.error('Skipping date row: %s', date_item)
    return date_item
示例#2
0
def normalize_laycan(raw_laycan):
    """normalize laycans

    Args:
        raw_laycan (str):

    Returns:
        str:

    Examples:
        >>> normalize_laycan('2020-01-01T00:00:00')
        '2020-01-01T00:00:00'
        >>> normalize_laycan('20Feb20')
        '2020-02-20T00:00:00'

    """
    if is_isoformat(raw_laycan):
        return raw_laycan
    else:
        try:
            _match = re.match(r'(\d+)([A-z]+)(\d+)', raw_laycan)
            if _match:
                day, month, year = _match.groups()
                return parse_date(f'20{year} {month} {day}').isoformat()
        except Exception:
            return None
def normalize_pc_date(date_str):
    """Cleanup portcall-related date.

    Args:
        date_str (str):

    Returns:
        str | None: date string without "am" or 'pm' features
    """

    if not may_strip(date_str) or any(sub in date_str
                                      for sub in STRING_BLACKLIST):
        return None

    # remove 'am' and 'pm' abbreviations in portcall-dates
    if not is_isoformat(date_str):
        for abbreviation in ABBREVIATIONS:
            if re.compile(abbreviation).search(date_str):
                date_str = date_str.replace(abbreviation, '')
                # assume hours of arrival for 'am' and 'pm' equal to 06:00 and 18:00
                if abbreviation == 'a.m' or abbreviation == 'am':
                    date_str = date_str + '06:00'
                else:
                    date_str = date_str + '18:00'
        # parse raw date and format it
        date_str = may_remove_substring(date_str, ["'", '.', ' '])
        date_str = datetime.strptime(date_str, '%d-%b%H:%M')

    return date_str
def field_mapping():
    return {
        'vessel': ('vessel_name', None),
        'arr': ('arrival', lambda x: x if is_isoformat(x) else None),
        'eta': ('eta', lambda x: x if is_isoformat(x) else None),
        'agency': ignore_key('irrelevant'),
        'shipping line': ignore_key('irrelevant'),
        'load port': ignore_key('irrelevant'),
        'origin': ignore_key('irrelevant'),
        'charterer/trader': ignore_key('irrelevant'),
        'exporter/shipper': ignore_key('irrelevant'),
        'exporter': ignore_key('irrelevant'),
        'importer': ('cargo_buyer', may_strip),
        'cargo type': ('cargo_product', None),
        'import tonnes': ('cargo_volume_dis', lambda x: x if is_number(x) else None),
        'export tonnes': ('cargo_volume_load', lambda x: x if is_number(x) else None),
        'remarks': ignore_key('irrelevant'),
        'port_name': ('port_name', None),
        'provider_name': ('provider_name', None),
        'reported_date': ('reported_date', None),
    }
def field_mapping():
    return {
        'vessel': ('vessel_name', None),
        'arrival': ('arrival', lambda x: x if is_isoformat(x) else None),
        'berthing': ('berthed', lambda x: x if is_isoformat(x) else None),
        'departure': ('departure', lambda x: x if is_isoformat(x) else None),
        'second atb': ('second_port_date', lambda x: x
                       if is_isoformat(x) else None),
        'tip(days)':
        ignore_key('irrelevant'),
        'load/last port': ('departure_zone', may_strip),
        'shipper': ('cargo_seller', None),
        'main receiver': ('cargo_buyer', None),
        'cargo type': ('cargo_product', None),
        'import tonnes': ('cargo_volume_disc', lambda x: x
                          if is_number(x) else None),
        'load tonnes': ('cargo_volume_load', lambda x: x
                        if is_number(x) else None),
        'next port':
        ignore_key('irrelevant'),
        'berth': ('raw_port_name', None),
        'provider_name': ('provider_name', None),
        'reported_date': ('reported_date', None),
    }
示例#6
0
def normalize_lay_can(date_item):
    """Transform non isoformat dates to isoformat

    Examples:
        >>> normalize_lay_can('04-Feb-2018')
        '2018-02-04T00:00:00'
        >>> normalize_lay_can('2018-02-04T00:00:00')
        '2018-02-04T00:00:00'

    Args:
        date_item (str):

    Returns:
        str:
    """
    return date_item if is_isoformat(date_item) else to_isoformat(
        date_item, dayfirst=True)
def normalize_date(raw_date):
    """Normalize laycan

    Examples:
        - '02.02.19'

    Examples:
        >>> normalize_date('02.02.19')
        '2019-02-02T00:00:00'

    Args:
        raw_laycan (str):

    Returns:
        str:
    """
    return raw_date if is_isoformat(raw_date) else to_isoformat(raw_date, dayfirst=True)
示例#8
0
def normalize_date(raw_date, reported_date):
    """Normalize ETA, ETB, ETS date.

    Args:
        raw_eta (str):
        reported_date (str): date is in ISO-8601 format

    Returns:
        str | None: date in ISO-8601 format

    Examples:
        >>> normalize_date('6-Oct', '2018-09-21T00:00:00')
        '2018-10-06T00:00:00'
        >>> normalize_date('6-Jan', '2018-12-21T00:00:00')
        '2019-01-06T00:00:00'
    """
    if not raw_date:
        return

    if is_isoformat(raw_date):
        return raw_date

    if len(raw_date.split('-')) != 2:
        logger.warning('Not a valid date: {}'.format(raw_date))
        return

    day, month = raw_date.split('-')

    # in case month is in spanish
    month = SPANISH_MONTH_MAPPING.get(month, month)

    _reported_date = parse_date(reported_date, dayfirst=False)

    try:
        date = parse_date(f'{day} {month} {_reported_date.year}', dayfirst=True)
    except ValueError:
        logger.warning('Not a valid date: {}'.format(raw_date))
        return None

    # sanity check for cases where there is year rollover
    if date - _reported_date < dt.timedelta(days=-180):
        date += relativedelta(years=1)

    return date.isoformat()
def normalize_dates(raw_date, raw_year):
    """Normalize raw laycan date.

    Args:
        raw_date (str):
        raw_year (str):

    Returns:
        str:

    Examples:
        >>> normalize_dates('2/8', '2019')
        '2019-02-08T00:00:00'
        >>> normalize_dates('2/8 11:00', '2019')
        '2019-02-08T11:00:00'
    """
    if not is_isoformat(raw_date):
        datetime_array = raw_date.split(' ')
        if len(datetime_array) == 1:
            try:
                return to_isoformat(f'{datetime_array[0]}/{raw_year}',
                                    dayfirst=False)
            except Exception:
                return raw_date

        if len(datetime_array) == 2:
            if datetime_array[1] in ['2400', '24:00']:
                datetime_array[1] = '0000'

            if datetime_array[1].replace('.', '').lower() == 'am':
                datetime_array[1] = '0900'

            if datetime_array[1].replace('.', '').lower() == 'pm':
                datetime_array[1] = '1500'

            try:
                return to_isoformat(
                    f'{datetime_array[0]}/{raw_year} {datetime_array[1]}',
                    dayfirst=False)
            except Exception:
                return raw_date

    return raw_date
示例#10
0
def normalize_dates(raw_date):
    """ Normalize dates
    Args:
        raw_date (str):

    Returns:
        str:

    Examples:
        >>> normalize_dates('2019-10-10T00:00:00')
        '2019-10-10T00:00:00'
        >>> normalize_dates('11/01/2019')
        '2019-01-11T00:00:00'
        >>> normalize_dates('TBA')
    """
    if is_isoformat(raw_date):
        return raw_date

    try:
        return to_isoformat(raw_date, dayfirst=True)
    except Exception:
        return None
示例#11
0
def normalize_dates(raw_date, rpt_date):
    """Normalize dates

    Args:
        vessel_name (str):

    Examples:
        >>> normalize_dates('1400 hrs 01.01.2020', '2020-01-01T00:00:00')
        '2020-01-01T14:00:00'
        >>> normalize_dates('2106 hrs/02.01.2020', '2020-01-01T00:00:00')
        '2020-01-02T21:06:00'
        >>> normalize_dates('2106 hrs /02.01.2020', '2020-01-01T00:00:00')
        '2020-01-02T21:06:00'
        >>> normalize_dates('02.01.2020(NOR)', '2020-01-01T00:00:00')
        '2020-01-02T00:00:00'
        >>> normalize_dates('02.01.2020', '2020-01-01T00:00:00')
        '2020-01-02T00:00:00'
        >>> normalize_dates('02.01.20', '2020-01-01T00:00:00')
        '2020-01-02T00:00:00'
        >>> normalize_dates('22.03.2020-1800', '2020-01-01T00:00:00')
        '2020-03-22T18:00:00'
        >>> normalize_dates('22.03.2020-AM', '2020-01-01T00:00:00')
        '2020-03-22T06:00:00'
        >>> normalize_dates(' am 02.01.2020', '2020-01-01T00:00:00')
        '2020-01-02T06:00:00'
        >>> normalize_dates('02.01.2020/0700 hrs', '2020-01-01T00:00:00')
        '2020-01-02T07:00:00'
        >>> normalize_dates('pm hrs 06.01.2020', '2020-01-01T00:00:00')
        '2020-01-06T15:00:00'
        >>> normalize_dates('26/01', '2020-01-01T00:00:00')
        '2020-01-26T00:00:00'
        >>> normalize_dates('26/01', '2019-12-30T00:00:00')
        '2020-01-26T00:00:00'

    Returns:
        str:

    """
    # normalize dates strings
    raw_date = raw_date.lower().replace('am', '0600/').replace('pm', '1500/')
    if is_isoformat(raw_date.upper()):
        return raw_date.upper()

    # if dd/mm data is provided, guess the year and return date
    if '/' in raw_date and len(raw_date.split('/')) == 2:
        if all(is_number(rd) for rd in raw_date.split('/')):
            potential_date, _ = get_date_range(raw_date, '/', '-', rpt_date)
            return potential_date

    # detect date and time fields
    date_hour = [
        may_strip(_d)
        for _d in re.split(r'(hrs /|hrs/|/ hrs|hrs|/|\()|\-', raw_date) if _d
    ]

    _date, _time = None, ''
    for dh in date_hour:
        if is_number(dh):
            _time = dh
            continue
        if len(dh.split('.')) == 3:
            _date = dh
            continue

    if not _date:
        return None

    try:
        return to_isoformat(may_strip(f'{_date} {_time}'), dayfirst=True)
    except Exception:
        return None