Пример #1
0
def normalize_pc_date(date_str):
    """Cleanup portcall-related date.

    Args:
        date_str (str):

    Returns:
        str | None: date string without "am" or 'pm' features
    """

    if not may_strip(date_str) or any(sub in date_str
                                      for sub in STRING_BLACKLIST):
        return None

    # remove 'am' and 'pm' abbreviations in portcall-dates
    if not is_isoformat(date_str):
        for abbreviation in ABBREVIATIONS:
            if re.compile(abbreviation).search(date_str):
                date_str = date_str.replace(abbreviation, '')
                # assume hours of arrival for 'am' and 'pm' equal to 06:00 and 18:00
                if abbreviation == 'a.m' or abbreviation == 'am':
                    date_str = date_str + '06:00'
                else:
                    date_str = date_str + '18:00'
        # parse raw date and format it
        date_str = may_remove_substring(date_str, ["'", '.', ' '])
        date_str = datetime.strptime(date_str, '%d-%b%H:%M')

    return date_str
Пример #2
0
def normalize_departure_zone(raw_zone):
    """Normalize departure zones.

    Args:
        raw_zone (str):

    Returns:
        str:

    Examples:
        >>> normalize_departure_zone('BASRAH-KAA')
        'Persian Gulf'
        >>> normalize_departure_zone('BASRAH')
        'BASRAH'
        >>> normalize_departure_zone('BASRAH+1')
        'Persian Gulf'
        >>> normalize_departure_zone('BUKIT TUA+1')
        'BUKIT TUA'
        >>> normalize_departure_zone('STS YOSU')
        'YOSU'
    """
    if is_persian_gulf_zone(raw_zone):
        return PERSIAN_GULF

    zone = may_strip(may_remove_substring(raw_zone, ZONE_SUBSTR_BLACKLIST))
    for alias in ZONE_MAPPING:
        if alias in zone.lower():
            return ZONE_MAPPING[alias]

    return zone
Пример #3
0
def normalize_date(raw_date):
    """Convert raw date to ISO 8601 format.

    Examples:
        >>> normalize_date("03-August-2018'")
        '2018-08-03T00:00:00'
        >>> normalize_date("27-July-2018\' at 1700HRS")
        '2018-07-27T17:00:00'
        >>> normalize_date('10-Sep 2018 at 1430hrs')
        '2018-09-10T14:30:00'
        >>> normalize_date('05-out-18 1400 Hrs')
        '2018-10-05T14:00:00'
        >>> normalize_date('05-out- 18 1400 Hrs')
        '2018-10-05T14:00:00'
        >>> normalize_date('12-out-2018 as 0400HRS')
        '2018-10-12T04:00:00'

    Args:
        raw_date (str):

    Returns:
        str:

    """
    clean_date = may_remove_substring(raw_date.lower(),
                                      ['at', 'as', 'hrs', '\'']).replace(
                                          '-', ' ')
    clean_date = translate_substrings(clean_date, PORTUGESE_TO_ENGLISH_MONTHS)
    return to_isoformat(clean_date, dayfirst=True)
Пример #4
0
def get_products_and_volumes(item):
    """Get products and volumes according to the presence of grade features
    Args:
        item (Dict[str, str]):
    Returns:
        Tuple[List, List]:
    Examples:
        >>> get_products_and_volumes({'cargo_grade': 'A', 'cargo_product': 'C',\
         'cargo_volume': '1000'})
        ('A', 1000.0)
        >>> get_products_and_volumes({'cargo_grade': None, 'cargo_product': 'C',\
         'cargo_volume': '1000'})
        ('C', 1000.0)
        >>> get_products_and_volumes({'cargo_grade': None, 'cargo_product': None,\
         'cargo_volume': '3000'})
        ([], [])
        >>> get_products_and_volumes({'cargo_grade': None, 'cargo_product': None,\
         'cargo_volume': None})
        ([], [])
    """
    grade = item.pop('cargo_grade', None)
    cargo = item.pop('cargo_product', None)
    volumes = item.pop('cargo_volume', None)

    products = grade if grade else cargo
    products = products if products else []
    volumes = float(may_remove_substring(volumes,
                                         '.')) if volumes and products else []

    return products, volumes
Пример #5
0
def normalize_arrival_zone(raw_zone):
    """Normalize arrival zones.

    We don't care about persian gulf macro zones when it is an arrival zone.

    Args:
        raw_zone (str):

    Returns:
        List[str]:

    Examples:
        >>> normalize_arrival_zone('BASRAH-KAA')
        ['BASRAH', 'KAA']
        >>> normalize_arrival_zone('BASRAH')
        ['BASRAH']
        >>> normalize_arrival_zone('BASRAH+1')
        ['BASRAH']
        >>> normalize_arrival_zone('BUKIT TUA+1')
        ['BUKIT TUA']
        >>> normalize_arrival_zone('STS YOSU')
        ['YOSU']
    """
    arrival_zone = []
    for single_zone in raw_zone.split('-'):
        zone = may_strip(may_remove_substring(single_zone, ZONE_SUBSTR_BLACKLIST))
        for alias in ZONE_MAPPING:
            if alias in zone.lower():
                arrival_zone.append(ZONE_MAPPING[alias])
                break

        arrival_zone.append(zone)

    return arrival_zone if arrival_zone else [raw_zone]
Пример #6
0
def normalize_volume_unit(raw_volume):
    """Normalize a raw volume into a float quantity and a unit.
       Args:
           raw_volume (str):
       Returns:
            float ,str:
       Examples:
           >>> normalize_volume_unit('N/A')
           (None, None)
           >>> normalize_volume_unit('42.0 MT')
           (42.0, 'tons')
           >>> normalize_volume_unit('42.0 BBLS')
           (42.0, 'barrel')
    """
    if not raw_volume or raw_volume in STRING_BLACKLIST:
        return None, None

    volume = may_remove_substring(raw_volume, ',')
    volume_match = re.match(r'\d+', volume)
    if volume_match:
        volume = volume_match.group(0)
        if re.compile('BBLS').search(raw_volume):
            return float(volume), Unit.barrel
        else:
            return float(volume), Unit.tons
    else:
        return None, None
Пример #7
0
def normalize_zone(raw_zone):
    """Normalize arrival zones.

    Args:
        raw_zone (str):

    Returns:
        List[str]:

    Examples:
        >>> normalize_zone('BASRAH-KAA')
        ['BASRAH', 'KAA']
        >>> normalize_zone('BASRAH')
        ['BASRAH']
        >>> normalize_zone('BASRAH+1')
        ['BASRAH']
        >>> normalize_zone('BUKIT TUA+1')
        ['BUKIT TUA']
        >>> normalize_zone('STS YOSU')
        ['YOSU']
    """
    arrival_zone = []
    for single_zone in raw_zone.split('-'):
        zone = may_strip(
            may_remove_substring(single_zone, ZONE_SUBSTR_BLACKLIST))

        arrival_zone.append(zone)

    return arrival_zone if arrival_zone else [raw_zone]
Пример #8
0
def normalize_volume(raw_volume):
    """Split a raw cargo into multiple cargoes.
    Args:
        raw_volume (str):
    Returns:
        str|None
    """
    if not raw_volume or raw_volume in STRING_BLACKLIST:
        return None

    return may_remove_substring(raw_volume, [',', 'BLS'])
Пример #9
0
def normalize_date(date_str):
    """Normalize date information to ISO 8601 format

    Args:
        date_str (str): raw, fuzzy date string

    Returns:
        str:

    """
    return to_isoformat(
        may_remove_substring(date_str, blacklist=DATESTRING_BLACKLIST))
Пример #10
0
def normalize_berth_name(berth_str):
    """Cleanup port name.

        Args:
            berth_str (str):

        Returns:
            str | None: string without undesired letters
    """
    if not may_strip(berth_str) or any(sub in berth_str
                                       for sub in STRING_BLACKLIST):
        return None

    berth_str = may_strip(berth_str)
    if not re.compile('/').search(berth_str):
        berth_str = may_remove_substring(berth_str, ['.', '0'])
    return berth_str
Пример #11
0
def split_volumes(raw_volume):
    """Split a raw cargo into multiple cargoes.
    Args:
        raw_volume (str):
    Returns:
        List[str]:
    Examples:
        >>> split_volumes('TBC')
        []
        >>> split_volumes('112/ 116')
        ['112', '116']
        >>> split_volumes('111')
        ['111']
    """
    if not raw_volume or raw_volume in STRING_BLACKLIST:
        return []
    raw_volume = may_remove_substring(raw_volume, [',', 'BLS'])
    return split_by_delimiters(raw_volume, '/', '+', '\n')
Пример #12
0
def arrived_field_mapping():
    return {
        'NAMEOFVESSEL': ('vessel', None),
        'R/ON':
        ignore_key('r/on, not in use'),
        'LENGTH': ('vessel_length', lambda x: x.split('.')[0]),
        'CARGO': ('cargo', lambda x: [{
            'product': x
        }]),
        'LPORT': ('last_port', None),
        'FLAG': ('flag', None),
        'ARRIVALDATETIME':
        ('arrival', lambda x: may_remove_substring(x, ['R/A(', ')'])),
        # static information
        'port_name': ('port_name', None),
        'reported_date': ('reported_date', None),
        'provider_name': ('provider_name', None),
    }
Пример #13
0
def berthed_field_mapping():
    return {
        'NAMEOFVESSEL': ('vessel', lambda x: may_remove_substring(x, ['*'])),
        'LENGTH': ('vessel_length', lambda x: x.split('.')[0]),
        'CARGO': ('cargo', lambda x: [{
            'product': x
        }]),
        'LASTPORT': ('last_port', None),
        'FLAG': ('flag', None),
        'LOCALAGENT': ('shipping_agent', None),
        'DTOFARRIVAL': ignore_key('arrival date, not in use'),
        'BERTHING': ('berthed_date', None),
        'LEAVING': ignore_key('leaving_date, not in use'),
        'IMPORTDISCH': ignore_key('draught, not in use'),
        # static information
        'port_name': ('port_name', None),
        'reported_date': ('reported_date', None),
        'provider_name': ('provider_name', None),
    }
Пример #14
0
def normalize_dept_zone(raw_dept):
    """Clean departure zone

    Examples:
        >>> normalize_dept_zone('RAS TAN+1')
        'Ras Tanura'
        >>> normalize_dept_zone('CHINA')
        'CHINA'

    Args:
        raw_dept (str):

    Returns:
        str:

    """
    zone = may_strip(may_remove_substring(raw_dept, ZONE_BLACKLIST))
    for alias in ZONE_MAPPING:
        if alias in zone.lower():
            return ZONE_MAPPING[alias]

    return zone
Пример #15
0
def normalize_reported_date(raw_str):
    """Normalize reported date of page at time of scraping.

    Reported date is contained as one of the page headers in this format (as of 2 April 2018):
    "PROGRAMACIÓN DE ARRIBOS AL <date> DE <spanish_month> DE <year> - 12 :00 hrs."

    Args:
        raw_date (str): reported date in raw form

    Returns:
        str: reported date in ISO-8601 format

    Examples:
        >>> normalize_reported_date('PROGRAMACIÓN DE ARRIBOS AL 30 DE ABRIL DE 2019 - 12 :00 hrs.')
        '2019-04-30T12:00:00'

    """
    raw_str = may_remove_substring(raw_str, blacklist=['PROGRAMACIÓN DE ARRIBOS AL', 'hrs.', 'DE'])
    raw_date, raw_time = raw_str.split(' - ')
    raw_date = translate_substrings(raw_date.lower(), translation_dict=SPANISH_TO_ENGLISH_MONTHS)

    return to_isoformat(' '.join([raw_date, raw_time.replace(' ', '')]))
Пример #16
0
def get_products_and_volumes(item):
    """Associate volumes to their related products
    :param item: dict
    :returns: List[str], List[str]
    Examples:
        >>> get_products_and_volumes({'cargo_grade': ['A', 'B'], 'cargo_product': ['C', 'D'],\
         'cargo_volume': ['1.000', '2.000']})
        (['A', 'B'], [1000.0, 2000.0])
        >>> get_products_and_volumes({'cargo_grade': None, 'cargo_product': ['C', 'D'],\
         'cargo_volume': ['1.000', '2.000']})
        (['C', 'D'], [1000.0, 2000.0])
        >>> get_products_and_volumes({'cargo_grade': None, 'cargo_product': ['C', 'D'],\
         'cargo_volume': ['3000']})
        (['C', 'D'], [])
        >>> get_products_and_volumes({'cargo_grade': None, 'cargo_product': None,\
         'cargo_volume': ['3000']})
        ([], [])
        >>> get_products_and_volumes({'cargo_grade': None, 'cargo_product': None,\
         'cargo_volume': None})
        ([], [])
    """
    grade = item.pop('cargo_grade', None)
    cargo = item.pop('cargo_product', None)
    volumes = item.pop('cargo_volume', None)

    products = grade if grade else cargo
    products = products if products else []
    volumes = volumes if volumes and products else []

    if volumes and products:
        volumes = [may_remove_substring(volume, '.') for volume in volumes]
        if len(products) == len(volumes):
            volumes = [float(volume) for volume in volumes]
        else:
            volumes = []

    return products, volumes
Пример #17
0
def parse_product(product_str, qty_str):
    """Parse product string into 1 or more products and corresponding quantities

    Assign normally if a qty is given for each product.
    There are cases where there are more products than quantities,
    we divide that qty by the number of products.
    (Does not work for rare case: k products, n quantities, where k > n and n > 1)

    Examples:
        >>> parse_product('SM/ HAVY AROMATICS', '2/1')
        (['SM', 'HAVY AROMATICS'], [2000.0, 1000.0])
        >>> parse_product('PX/MEG', '40')
        (['PX', 'MEG'], [20000.0, 20000.0])
        >>> parse_product('TIMBER', '1941 LOGS/42202 CBM')
        (['TIMBER'], [None])
        >>> parse_product('CARS', '50')
        ([], [])

    """
    products = [may_strip(product) for product in product_str.split('/')]
    quantities = [
        scale_to_thousand(may_strip(may_remove_substring(quantity, ['\'', ';'])))
        for quantity in qty_str.split('/')
    ]
    if len(products) != len(quantities):
        if None in quantities:
            quantities = [None for _ in enumerate(products)]
        else:
            quantities = [quantities[0] / len(products)] * len(products)

    # filter out unwanted vessels
    for prod in products:
        if any(prod == blist for blist in CARGO_BLACKLIST):
            return [], []

    return products, quantities