Exemplo n.º 1
0
def portcall_mapping():
    return {
        '0': ('vessel', lambda x: {'name': x}),
        '1': ignore_key('shipping agent'),
        '2': ('eta', lambda x: to_isoformat(x, dayfirst=True)),
        '3': ignore_key('time of vessel arrival, not required'),
        '4': (
            'cargo_volume_load',
            lambda x: x.replace(',', '') if x.lower() not in ('-', 'nil') else None,
        ),
        '5': (
            'cargo_product_load',
            lambda x: (
                None if not x or any(alias in x.lower() for alias in IRRELEVANT_PRODUCTS) else x
            ),
        ),
        '6': (
            'cargo_volume_discharge',
            lambda x: x.replace(',', '') if x.lower() not in ('-', 'nil') else None,
        ),
        '7': (
            'cargo_product_discharge',
            lambda x: (
                None if not x or any(alias in x.lower() for alias in IRRELEVANT_PRODUCTS) else x
            ),
        ),
        'port_name': ('port_name', None),
        'provider_name': ('provider_name', None),
        'reported_date': ('reported_date', lambda x: to_isoformat(x.partition(',')[2])),
        'vessel_type': (
            'vessel_type',
            lambda x: None if any(alias in x.lower() for alias in IRRELEVANT_VESSEL_TYPES) else x,
        ),
    }
Exemplo n.º 2
0
def grades_mapping():
    return {
        'Cargo': ('cargo_movement', lambda x: 'discharge' if 'import' in x.lower() else 'load'),
        'Cargo No': ('cargo_movement', lambda x: 'discharge' if 'import' in x.lower() else 'load'),
        'Cargo No.': ('cargo_movement', lambda x: 'discharge' if 'import' in x.lower() else 'load'),
        'Charterer': ignore_key('charterer'),
        'Charterers': ignore_key('charterer'),
        'Date': ('arrival', lambda x: to_isoformat(x, dayfirst=True)),
        'Dates': ('arrival', lambda x: to_isoformat(x, dayfirst=True)),
        'Grade': ('cargo_product', None),
        'Next Port': ignore_key('next port'),
        'Notes': ignore_key('remarks'),
        'provider_name': ('provider_name', None),
        'port_name': ('port_name', None),
        # cargo unit is in kilobarrels
        'QTY': ('cargo_volume', lambda x: x + '000' if x else None),
        'Quantity': ('cargo_volume', lambda x: x + '000' if x else None),
        'reported_date': ('reported_date', None),
        'Shipper': ignore_key('irrelevant'),
        'Shipper/Receiver': ignore_key('irrelevant'),
        'Shipper/Receivers': ignore_key('irrelevant'),
        'Supp/Rcvr': ignore_key('irrelevant'),
        'Supplier': ignore_key('irrelevant'),
        'Vessel': ('vessel', lambda x: {'name': x} if 'TBN' not in x else None),
    }
Exemplo n.º 3
0
def _build_lay_can_date(date_tuple, reported):
    """Build lay can date given date tuple.

    As there's no year, we refer to reported year. Specific scenario:
        -------------------------------------------------------
        | Lay can month   |   Reported date  |   Lay can year |
        | Dec             |   1 Jan 2019     |   2018         |
        | Jan             |   28 Dec 2018    |   2019         |
        | Dec - Jan       |   1 Jan 2019     |   2018 / 2019  |
        -------------------------------------------------------

    Args:
        date_tuple (Tuple[str, str, str]): (month, start day, end day), end day might be empty
        reported (str): reported date

    Returns:
        Tuple[str, str]: first date, second date

    """
    year = parse_date(reported).year
    month, first_day, second_day = date_tuple
    if month == 'Dec' and 'Jan' in reported:
        year -= 1
    if month == 'Jan' and 'Dec' in reported:
        year += 1

    first_date = to_isoformat(f'{first_day} {month} {year}', dayfirst=True)
    second_date = (
        to_isoformat(f'{second_day} {month} {year}', dayfirst=True) if second_day else None
    )

    return first_date, second_date
Exemplo n.º 4
0
def key_mapping():
    return {
        'Berth':
        ('installation', lambda x: INSTALLATION_MAPPING.get(x.lower())),
        'Berth number':
        ignore_key('irrelevant'),
        'Destination':
        ignore_key('not required yet'),
        'Entry date':
        ('berthed', lambda x: to_isoformat(x, dayfirst=False, yearfirst=True)),
        'Expected berth': ('installation', None),
        'Forecast arrival':
        ('eta', lambda x: to_isoformat(x, dayfirst=False, yearfirst=True)),
        'Forecast departure':
        ignore_key('not required yet'),
        'From':
        ignore_key('not required yet'),
        'Name of vessel': ('vessel', lambda x: {
            'name': x
        }),
        'port_name': ('port_name', None),
        'provider_name': ('provider_name', None),
        'reported_date': ('reported_date', None),
        'Scheduled line':
        ignore_key('irrelevant'),
        'Shipping agent':
        ignore_key('irrelevant'),
        'Shipflow-paq_id':
        ignore_key('irrelevant'),
        'shipflow-operation': ('cargoes', lambda x: list(build_cargoes(x))),
    }
Exemplo n.º 5
0
def process_item(raw_item: Dict[str, str]) -> Dict[str, str]:
    """Transform raw item into a usable event.

    """
    table_label = raw_item['table_label']
    if table_label == 'VESSELS EXPECTED TO ARRIVE PORT':
        eta = raw_item.pop('ETA')
        raw_item['arrival'] = to_isoformat(eta, dayfirst=True)
        raw_item['cargoes'] = process_cargo_list_items(
            raw_item['Activity / Cargo / Quantity'])
    elif table_label == 'VESSELS AT BERTH FOR  LOADING':
        etc = raw_item.pop('Arrival Date')
        raw_item['berthed'] = to_isoformat(etc, dayfirst=True)
        raw_item['cargoes'] = process_cargo_list_items(
            raw_item['Activity / Cargo / Quantity'])
    elif table_label == 'VESSELS AT BERTH FOR  DISCHARGE':
        etc = raw_item.pop('Berth Date')
        raw_item['berthed'] = to_isoformat(etc, dayfirst=True)
        raw_item['cargoes'] = process_cargo_list_items(
            raw_item['Activity / Cargo / Quantity'])
    elif table_label == 'VESSELS WAITING FOR BERTH':
        etc = raw_item.pop('Arrival Date')
        raw_item['arrival'] = to_isoformat(etc, dayfirst=True)
        raw_item['cargoes'] = process_cargo_list_items(
            raw_item['Activity / Cargo / Quantity'])
    else:
        logger.error(f"unexpected label {table_label}")

    item = map_keys(raw_item, portcall_mapping())
    item['vessel'] = {'name': item.pop('vessel_name')}

    return item
Exemplo n.º 6
0
def field_mapping():
    return {
        'bandera':
        ignore_key('vessel flag'),
        'calent':
        ignore_key('vessel draught'),
        'codbuq': ('vessel_imo', normalize_imo),
        'desmue':
        ignore_key('berth'),
        'destipbuq':
        ignore_key('vessel type'),
        'eslora': ('vessel_length', lambda x: try_apply(x, float, int, str)),
        'fecatr': ('eta', lambda x: to_isoformat(x, dayfirst=True)
                   if x else None),
        'fecsal': ('departure', lambda x: to_isoformat(x, dayfirst=True)
                   if x else None),
        'gt': ('vessel_gross_tonnage', lambda x: try_apply(x, str)),
        'nombuq': ('vessel_name', None),
        'nomcsg':
        ignore_key('shipping agent'),
        'operaciones': ('cargoes', lambda x: list(normalize_cargoes(x))),
        'port_name': ('port_name', None),
        'provider_name': ('provider_name', None),
        'reported_date':
        ('reported_date', lambda x: to_isoformat(x, dayfirst=True)),
    }
Exemplo n.º 7
0
def normalize_date(raw_date):
    """Normalize date.

    Date strings given may not contain time information, so this function will iterate across
    all possible patterns that have been found in the pdf so far.

    Examples:
        >>> normalize_date('03/10/2018 11:30')
        '2018-10-03T11:30:00'
        >>> normalize_date('03/10/2018')
        '2018-10-03T00:00:00'
        >>> normalize_date('SIN PROGRAMA')

    Args:
        raw_date (str):

    Returns:
        str: ISO 8601 format

    """
    # matching date has both date and time info
    if re.match(MATCHING_DATE_PATTERN_EXACT, raw_date):
        return to_isoformat(raw_date, dayfirst=True)

    # matching date only has date info, no time info
    elif raw_date.split():
        date_match = re.match(MATCHING_DATE_PATTERN_FUZZY, raw_date.split()[0])
        if date_match:
            return to_isoformat(raw_date, dayfirst=True)

    # matching date is missing from the table
    logger.warning('Unable to match date: {}'.format(raw_date))
    return None
Exemplo n.º 8
0
def portcall_mapping():
    return {
        'ARMADOR': ignore_key('armador ?'),
        'Agent': ('shipping_agent', may_strip),
        'Call': ignore_key('internal portcall number'),
        'Call sign': ignore_key('vessel callsign'),
        'cargoes': ('cargoes', lambda x: [map_keys(cargo, cargo_mapping()) for cargo in x]),
        'Commander': ignore_key('commander ?'),
        'Construction year': ignore_key('vessel build year'),
        'Countermark': ignore_key('countermark ?'),
        'Destiny': ignore_key('TODO use next zone to forecast future ETAs after this port'),
        'Flag': ignore_key('vessel flag'),
        'Forecast': ('matching_date', lambda x: to_isoformat(x, dayfirst=False)),
        'IMO': ('vessel_imo', may_strip),
        'Navigation type': ignore_key('navigation type ?'),
        'Nº travel': ignore_key('travel ?'),
        'Origin': ignore_key('previous port of call'),
        'Origin / Destiny': ignore_key('previous/next port of call'),
        'Port register': ignore_key('port register'),
        'port_name': ('port_name', None),
        'Protective agent': ignore_key('protective agent ?'),
        'provider_name': ('provider_name', None),
        'reported_date': ('reported_date', None),
        'Scale': ignore_key('scale ?'),
        'Ship name': ('vessel_name', may_strip),
        'Ship type': ('vessel_type', None),
        'Since': ('matching_date', lambda x: to_isoformat(x, dayfirst=False)),
        'Terminal': ('installation', lambda x: INSTALLATION_MAPPING.get(may_strip(x), x)),
        'Teus': ignore_key('TEUs; only relevant for container vessels'),
        'title': ('event', lambda x: EVENT_MAPPING.get(x)),
    }
Exemplo n.º 9
0
def portcall_mapping():
    return {
        'cargoes': ('cargoes', lambda x: process_cargo(x)),
        'Empresa de Transporte':
        ignore_key('shipping agent'),
        'Fecha de Descarga:':
        ignore_key('departure date'),
        'Fecha de Llegada:': ('eta', lambda x: to_isoformat(x, dayfirst=True)),
        'Manifiesto':
        ignore_key('manifest number'),
        'Matr�cula de la Nave': ('vessel_name', None),
        'Matrícula de la Nave': ('vessel_name', None),
        'Nacionalidad:': ('vessel_flag', None),
        'No Bultos:':
        ignore_key('irrelevant'),
        'No Detalles:':
        ignore_key('unknown'),
        'P.Bruto:':
        ignore_key('unknown'),
        'port_names': ('port_names', None),
        'provider_name': ('provider_name', None),
        'raw_cargoes':
        ignore_key('raw cargo; not needed anymore'),
        'reported_date':
        ('reported_date', lambda x: to_isoformat(x, dayfirst=True)),
    }
Exemplo n.º 10
0
def field_mapping():
    return {
        'Berth': (ignore_key('berth')),
        'Vessel Name': ('vessel_name', lambda x: None
                        if 'TBN' in x.split() else x),
        'Flag': (ignore_key('ignore flag')),
        'Agent': (ignore_key('agent')),
        'Cargo Inspection': (ignore_key('cargo inspection')),
        'Destination(s)': (ignore_key('destination')),
        'Order Number(s)': (ignore_key('order number')),
        'Product Grade(s)': ('products', None),
        'Product Grade': ('products', None),
        'Loading/Discharge\nStart':
        ('berthed', lambda x: to_isoformat(x, dayfirst=True)),
        'Loading/Discharge\nFinished (Est.)': (
            'departure',
            lambda x: to_isoformat(x, dayfirst=True),
        ),
        'Anchored Time': ('arrival', lambda x: to_isoformat(x, dayfirst=True)),
        'LOA': ('length', None),
        'Dead Weight': ('dwt', None),
        'ETA': ('eta', lambda x: to_isoformat(x, dayfirst=True)),
        'provider_name': ('provider_name', None),
        'reported_date': ('reported_date', to_isoformat),
        'port_name': ('port_name', None),
    }
Exemplo n.º 11
0
def field_mapping():
    return {
        # NOTE since we are only scraping scheduled/future portcalls, we hardcode 'eta'
        'ship_name': ('vessel_name', None),
        'pier_Assigned': ('berth', None),
        'ship_type': ignore_key('vessel type'),
        'date_arrival': ('eta', lambda x: to_isoformat(x, dayfirst=True)),
        'date_departure':
        ('departure', lambda x: to_isoformat(x, dayfirst=True)),
        'depth': ignore_key('depth'),
        'Ship name': ignore_key('Ship name'),
        'Indicative Radius:': ignore_key('Indicative Radius:'),
        'IMO Number:': ('imo', None),
        'GT London 1969:': ignore_key('irrelevant'),
        'GT:': ('gross_tonnage', None),
        'Country:': ('flag_code', lambda x: x.partition('-')[0]),
        'Consignee:': ignore_key('consignee'),
        'Shipowner:': ignore_key('companies'),
        'Ship type:': ('vessel_type', None),
        'Tanks:': ignore_key('Tanks:'),
        'Beam:': ('beam', None),
        'Length:': ('length', None),
        'Draught:': ignore_key('Draught:'),
        'provider_name': ('provider_name', None),
        'port_name': ('port_name', None),
    }
Exemplo n.º 12
0
def parse_expected_vessels(row, reported_date, current_port, provider):
    if not MOVEMENT_MAPPING.get(row['e_movement']) or not may_strip(row['e_eta']):
        return

    for product, quantity in zip(*parse_product(map_spelling(row['e_cargo']), row['e_qty'])):
        if product == 'LPG':
            for p in ['Butane', 'Propane']:
                yield {
                    'reported_date': to_isoformat(reported_date),
                    'eta': normalize_date(reported_date, row['e_eta']),
                    'port_name': PORT_MAPPING.get(current_port, current_port),
                    'provider_name': provider,
                    'cargo': {
                        'product': p,
                        'movement': MOVEMENT_MAPPING.get(row['e_movement']),
                        'volume': try_apply(quantity, int, lambda x: x // 2, str),
                        'volume_unit': Unit.tons,
                    },
                    'vessel': {'name': row['e_vessel']},
                }
        else:
            yield {
                'reported_date': to_isoformat(reported_date),
                'eta': normalize_date(reported_date, row['e_eta']),
                'port_name': PORT_MAPPING.get(current_port, current_port),
                'provider_name': provider,
                'cargo': {
                    'product': product,
                    'movement': MOVEMENT_MAPPING.get(row['e_movement']),
                    'volume': try_apply(quantity, int, str),
                    'volume_unit': Unit.tons,
                },
                'vessel': {'name': row['e_vessel']},
            }
Exemplo n.º 13
0
def field_mapping():
    return {
        'Crude Type': ('cargo_product', None),
        'Date': ('eta', lambda x: to_isoformat(x, dayfirst=False)),
        'Estimated Date of Arrival':
        ('eta', lambda x: to_isoformat(x, dayfirst=False)),
        'Estimated Date of loading/unloading':
        ('eta', lambda x: to_isoformat(x, dayfirst=False)),
        'Importer':
        ignore_key('not useful right now'),
        'Origin':
        ignore_key('not useful right now'),
        'Port': ('port_name', lambda x: may_strip(x.replace('Port', ''))),
        'Product': ('cargo_product', None),
        'provider_name': ('provider_name', None),
        'Purchaser':
        ignore_key('not useful right now'),
        'Remarks':
        ignore_key('Not important right now'),
        'reported_date': ('reported_date', None),
        'Seller':
        ignore_key('Not important right now'),
        'Trade mode': ('cargo_movement', lambda x: MOVEMENT_MAPPING.get(x)),
        'Trade Mode': ('cargo_movement', lambda x: MOVEMENT_MAPPING.get(x)),
        'Vessel': ('vessel', lambda x: {
            'name': x
        } if x not in VESSEL_NAME_BLACKLIST else None),
        'Vessel Type':
        ignore_key('redundant'),
        'Volume': ('cargo_volume', None),
        'Volume (mt)': ('cargo_volume', None),
        'Volume (mt) ': ('cargo_volume', None),
    }
Exemplo n.º 14
0
    def get_reported_date(self, response, event_type):
        """Extract reported date from a specific area on the 1st page

        Args:
            response (scrapy.HtmlResponse):
            event_type (str): used to filter type of pdf

        Returns:
            str: reported date in ISO-8601 compatible format

        """
        if event_type == 'berthed':
            date_area = self.extract_pdf_table(response,
                                               information_parser=None,
                                               **parser.BERTHED_DATE_AREA)
            date = [
                str(line).replace(' ', '').split('DATED:')
                for line in date_area if 'DATED' and 'VESSELS' in str(line)
            ]
            return to_isoformat(date[0][1][:10])

        elif event_type == 'eta':
            date_area = self.extract_pdf_table(response,
                                               information_parser=None,
                                               **parser.ETA_DATE_AREA)
            date = [
                str(line).split(':') for line in date_area
                if 'DATED' in str(line)
            ]
            return to_isoformat(date[0][1])
Exemplo n.º 15
0
def normalize_eta(raw_eta, month, year):
    """Normalize raw eta date to an ISO-8601 compatible string.

    Args:
        raw_eta (str):
        month (int):
        year (int):

    Returns:
        str: ISO-8601 compatible string

    Examples:
        >>> normalize_eta('010830', 8, 2018)
        '2018-08-01T08:30:00'
        >>> normalize_eta(')231500', 9, 2018)
        '2018-09-23T15:00:00'

    """
    eta_pattern = r'\d{6}'
    res = re.search(eta_pattern, raw_eta)
    if res:
        eta = res.group()
        day, time = eta[:2], eta[2:]
        try:
            return to_isoformat(f'{day}/{month}/{year} {time}', dayfirst=True)
        except ValueError:
            return to_isoformat(f'{int(day) - 1}/{month}/{year} {time}',
                                dayfirst=True)

    else:
        logger.warning(f'Eta date is invalid: {raw_eta}')
Exemplo n.º 16
0
def portcall_mapping():
    return {
        'Berth': ('berth', None),
        'VCN No.': ignore_key('irrelevant'),
        'Vessels name': ('vessel_name', normalize_vessel_name),
        'Cargo': ('cargo_product', None),
        'Cargo Type': ('vessel_type', lambda x: VESSEL_TYPE_MAPPING.get(x, x)),
        'Qty in MT': ('cargo_volume', None),
        'Qty MT': ('cargo_volume', None),
        'Type': ('cargo_movement', lambda x: MOVEMENT_MAPPING.get(x, x)),
        'Import Export': ('cargo_movement', None),
        'IEB': ('cargo_movement', None),
        'Agent': ('shipping_agent', None),
        'Vessel Agent': ('shipping_agent', None),
        'Vessels Agent': ('shipping_agent', None),
        'ETD': ('departure', lambda x: to_isoformat(x, dayfirst=False)),
        'ETA': ('eta', lambda x: to_isoformat(x, dayfirst=False)),
        'Anch': ('eta', lambda x: to_isoformat(x, dayfirst=False)),
        'PCS Anchored': ignore_key('irrelevant'),
        'PCS': ignore_key('irrelevant'),
        'Pilot Request Time': ignore_key('irrelevant'),
        'Remark': ignore_key('irrelevant'),
        'port_name': ('port_name', None),
        'provider_name': ('provider_name', None),
        'reported_date': ('reported_date', None),
    }
Exemplo n.º 17
0
def charter_mapping():
    return {
        'vessel_name': ('vessel_name', may_strip),
        'vessel_imo': ('vessel_imo', lambda x: may_apply(x, float, int, str)),
        'vessel_length': ('vessel_length', lambda x: may_apply(x, float, int)),
        'vessel_dwt': ('vessel_dwt', lambda x: may_apply(x, float, int)),
        'charterer': ('charterer', may_strip),
        'status': ('status', lambda x: STATUS_MAPPING.get(x.lower(), x)
                   if x else None),
        'lay_can_start': (
            'lay_can_start',
            lambda x: to_isoformat(x, dayfirst=False, yearfirst=True),
        ),
        'lay_can_end':
        ('lay_can_end',
         lambda x: to_isoformat(x, dayfirst=False, yearfirst=True)),
        'rate_value': ('rate_value', may_strip),
        'rate_raw_value': ('rate_raw_value', may_strip),
        'departure_zone': ('departure_zone', may_strip),
        'arrival_zone': ('arrival_zone', lambda x: may_strip(x).split('-')
                         if x else None),
        'cargo_product': ('cargo_product', may_strip),
        'cargo_movement': ('cargo_movement', None),
        'cargo_volume': ('cargo_volume', None),
        'cargo_unit': ('cargo_unit', lambda x: UNIT_MAPPING.get(x.lower(), x)
                       if x else None),
        'provider_name': ('provider_name', None),
        'reported_date':
        ('reported_date', lambda x: parse_date(x).strftime('%d %b %Y')),
    }
Exemplo n.º 18
0
def grades_mapping():
    return {
        'port_name': ('port_name', may_strip),
        'berthed':
        ('berthed', lambda x: to_isoformat(x, dayfirst=False, yearfirst=True)),
        'eta':
        ('eta', lambda x: to_isoformat(x, dayfirst=False, yearfirst=True)),
        'departure':
        ('departure',
         lambda x: to_isoformat(x, dayfirst=False, yearfirst=True)),
        'arrival':
        ('arrival', lambda x: to_isoformat(x, dayfirst=False, yearfirst=True)),
        'vessel_name': ('vessel_name', may_strip),
        'vessel_imo': ('vessel_imo', lambda x: may_apply(x, float, int, str)),
        'vessel_length': ('vessel_length', lambda x: may_apply(x, float, int)),
        'vessel_dwt': ('vessel_dwt', lambda x: may_apply(x, float, int)),
        'cargo_product': ('cargo_product', may_strip),
        'cargo_movement': ('cargo_movement', may_strip),
        'cargo_volume': ('cargo_volume', may_strip),
        'cargo_unit': ('cargo_unit', lambda x: UNIT_MAPPING.get(x.lower(), x)
                       if x else None),
        'provider_name': ('provider_name', None),
        'reported_date': (
            'reported_date',
            lambda x: to_isoformat(x, dayfirst=False, yearfirst=True),
        ),
        'cargo_seller': ('cargo_seller', may_strip),
        'cargo_buyer': ('cargo_buyer', may_strip),
    }
Exemplo n.º 19
0
def normalize_arrival_date(date, time):
    """Combine date and time, convert them to ISO 8601 format.

    Examples:
        >>> normalize_arrival_date('09.09.18', '0500')
        '2018-09-09T05:00:00'
        >>> normalize_arrival_date('09.09.18', '-')
        '2018-09-09T00:00:00'
        >>> normalize_arrival_date('-', '-')

    Args:
        date (str):
        time (str):

    Returns:
        str:

    """
    if date in NO_VALUE_SIGN:
        return None

    if time in NO_VALUE_SIGN:
        return to_isoformat(date, dayfirst=True)

    return to_isoformat(' '.join([date, time]), dayfirst=True)
Exemplo n.º 20
0
def portcall_mapping():
    return {
        # ships expected tables
        'VESSEL NAME': ('vessel_name', normalize_vessel_name),
        'VES.SCHEDULE': (ignore_key('vessel schedule')),
        'CALL SIGN': ('call_sign', None),
        'VOYAGE NO.': (ignore_key('voyage number')),
        'ETA': ('eta', lambda x: to_isoformat(x, dayfirst=True)),
        'LOA': ('vessel_length', normalize_numeric_value),
        'DRAFT': (ignore_key('draught')),
        'AGENT': ('shipping_agent', None),
        'DISCH': ('disch_volume', normalize_numeric_value),
        'LOAD': ('load_volume', normalize_numeric_value),
        'FBW': (ignore_key('unknown')),
        'BER': (ignore_key('unknown')),
        'BOOKED': (ignore_key('booked date')),
        'REMARKS': ('remarks', None),
        # waiters
        '0': ('arrival', lambda x: to_isoformat(x, dayfirst=True)),
        '1': ('vessel_name', None),
        '2': ('vessel_length', normalize_numeric_value),
        '3': (ignore_key('draught')),
        '4': (ignore_key('unknown')),
        '5': ('remarks', None),
        # meta info
        'port_name': ('port_name', None),
        'provider_name': ('provider_name', None),
        'reported_date': ('reported_date', None),
    }
Exemplo n.º 21
0
    def _process_barcelona_rows(self, table):
        """Process rows for barcelona port.

        Decode all rows as unicode strings since tabula outputs byte strings by default.
        Extract matching date for each table section based on the section's description.
        Yield only rows that contain table data, skipping table section description.

        Known table section headers (each section has a different matching date):
            - "Buques que efectuaron operaciones durante la noche del día anterior"
            - "Buques que efectuaron operaciones durante las 20 horas del viernes 16.3.18 hata el domingo 18.3.18"  # noqa
            - "Buques que efectuaron operaciones durante el lunes 18.3.18"
            - "Ultima hora"

        Args:
            table (List[List[str]]): list of table rows from pdf

        Yields:
            List[str]:

        """
        matching_date = None
        for idx, row in enumerate(table):
            # tabula stores string data as bytes by default
            row = [cell for cell in row]

            # try deciphering matching_date of subsequent rows
            if any('Buques que efectuaron' in cell for cell in row):
                raw_matching_date = ''.join(row)
                date_match = re.search(r'(\d{1,2}\.\d{1,2}\.\d{2})',
                                       raw_matching_date)
                # matching date is mentioned explictly in table section description
                if date_match:
                    matching_date = to_isoformat(date_match.group(1))
                    logger.debug(
                        'Found matching date: {}'.format(matching_date))

                # sometimes matching date is described implicitly in words in the pdf
                elif 'anterior' in raw_matching_date:
                    matching_date = to_isoformat(
                        str(
                            date_parse(self.reported_date, dayfirst=False) -
                            timedelta(days=1)),
                        dayfirst=False,
                    )
                    logger.debug(
                        'Found matching date: {}'.format(matching_date))
                else:
                    raise ValueError('Unable to find matching date: {}'.format(
                        raw_matching_date))
            elif any('ltima hora' in cell for cell in row):
                matching_date = self.reported_date
                logger.debug('Found matching date: {}'.format(matching_date))

            # do not yield table section headers
            if not ('/EXPORT' in row or any('Buques que efectuaron' in cell
                                            for cell in row)
                    or any('ltima hora' in cell for cell in row)):
                row.append('matching_date' if idx == 0 else matching_date)
                yield row
Exemplo n.º 22
0
def normalize_lay_can(raw_lay_can, reported):
    """Normalize lay can date with reported year as reference.

    In this report, the lay can date can vary differently, however, we only extract below formats:
    - 31-02/02 format 1 with rollover
    - 02-04/02 format 1
    - 02/02 format 2

    Rollover dates have not been identified yet

    Examples:
        >>> normalize_lay_can('31-02/02', '22 JAN 2019')
        ('2019-01-31T00:00:00', '2019-02-02T00:00:00')
        >>> normalize_lay_can('02-04/02', '22 JAN 2019')
        ('2019-02-02T00:00:00', '2019-02-04T00:00:00')
        >>> normalize_lay_can('02/02', '22 JAN 2019')
        ('2019-02-02T00:00:00', '2019-02-02T00:00:00')

    Args:
        raw_lay_can (str)
        reported (str)

    Returns:
        Tuple[str, str]

    """
    _match_1 = re.match(r'^(\d{1,2})\-(\d{1,2})\/(\d{1,2})$', raw_lay_can)
    _match_2 = re.match(r'^(\d{1,2})\/(\d{1,2})$', raw_lay_can)
    # format 1
    if _match_1:
        start_day, end_day, month = _match_1.groups()
        year = _get_year(month, reported)

        # handle typo errors in dates
        start_day = end_day if int(start_day) > 31 else start_day

        # to accomodate end dates for february
        try:
            start = to_isoformat(f'{start_day} {month} {year}', dayfirst=True)
        except Exception:
            start = to_isoformat(f'{start_day} {int(month) - 1} {year}',
                                 dayfirst=True)

        end = to_isoformat(f'{end_day} {month} {year}', dayfirst=True)

        return start, end

    # format 2
    if _match_2:
        month, start_day = _match_2.groups()
        year = _get_year(month, reported)
        start = to_isoformat(f'{start_day} {month} {year}', dayfirst=True)

        return start, start

    # other undiscovered case
    logger.exception(f'Unknown lay can date pattern: {raw_lay_can}')

    return None, None
Exemplo n.º 23
0
def normalize_lay_can(raw_lay_can, reported):
    """Normalize raw lay can date with reference of reported year.

    Raw laycan inputs can be of the following formats:
        1) single day: '08-SEP'
        2) duration days: '10-11 OCT'
        3) month cross: '30 OCT-2 NOV'
        4) month only: 'AUG'

    Examples:
        >>> normalize_lay_can('08-SEP','1 Sep 2018')
        ('2018-09-08T00:00:00', '2018-09-08T00:00:00')
        >>> normalize_lay_can('10-11 OCT', '10 Oct 2018')
        ('2018-10-10T00:00:00', '2018-10-11T00:00:00')
        >>> normalize_lay_can('30 OCT-2 NOV', '10 Oct 2018')
        ('2018-10-30T00:00:00', '2018-11-02T00:00:00')
        >>> normalize_lay_can('AUG','28 Aug 2018')
        (None, None)
        >>> normalize_lay_can('28 Dec-1 Jan', '25 Dec 2018')
        ('2018-12-28T00:00:00', '2019-01-01T00:00:00')

    Args:
        raw_lay_can (str):
        reported (str): reported date

    Returns:
        Tuple[str, str]: tuple of lay can period, (lay can start, lay can end)

    """
    # format 1, 2
    _match = re.match(r'(^\d{1,2}).(\d{1,2}.)?([A-Za-z]{3,4}$)', raw_lay_can)
    if _match:
        start_day, end_day, month = _match.groups()
        year = _get_year(month, reported)
        start = to_isoformat(f'{start_day} {month} {year}', dayfirst=True)
        end = to_isoformat(f'{end_day} {month} {year}', dayfirst=True) if end_day else start

        return start, end

    # format 3
    _findall = re.findall(r'\d{1,2} [A-Za-z]{3,4}', raw_lay_can)
    if len(_findall) == 2:
        start_date, end_date = _findall
        start_year, end_year = _get_year(start_date, reported), _get_year(end_date, reported)
        start = to_isoformat(f'{start_date} {start_year}', dayfirst=True)
        end = to_isoformat(f'{end_date} {end_year}', dayfirst=True)

        return start, end

    # format 4
    _match = re.match(r'^[A-Za-z]{3,4}$', raw_lay_can)
    if _match:
        return None, None

    # unknown formats
    logger.exception(f'Invalid or unknown lay can format: {raw_lay_can}')
    return None, None
Exemplo n.º 24
0
def normalize_lay_can(raw_lay_can, reported):
    """Normalize lay can date with reported year as reference.

    Existing pattern found:
    1. 26-27/9, 17/10
    2. END/9

    No cross month pattern found in current reports, so we don't handle it for now until we
    encountered one, which would fail in validation and we'll know.

    Examples:
        >>> normalize_lay_can('26-27/9', '13 Sep 2018')
        ('2018-09-26T00:00:00', '2018-09-27T00:00:00')
        >>> normalize_lay_can('17/10', '13 Sep 2018')
        ('2018-10-17T00:00:00', '2018-10-17T00:00:00')
        >>> normalize_lay_can('END/9', '13 Sep 2018')
        ('2018-09-23T00:00:00', '2018-09-30T00:00:00')
        >>> normalize_lay_can('29/12', '1 Jan 2019')
        ('2018-12-29T00:00:00', '2018-12-29T00:00:00')
        >>> normalize_lay_can('END/1', '31 Dec 2018')
        ('2019-01-24T00:00:00', '2019-01-31T00:00:00')

    Args:
        raw_lay_can (str):
        reported (str):

    Returns:
        Tuple[str, str]:

    """
    match = re.match(LAY_CAN_PATTERN, raw_lay_can)
    if not match:
        logger.warning(f'Invalid lay can date: {raw_lay_can}')
        return None, None

    start_day, end_day, month = match.groups()

    # month rollover for lay can start
    year = parse_date(reported).year
    if '12' == month and 'Jan' in reported:
        year -= 1
    if month == '1' or month == '01' and 'Dec' in reported:
        year += 1

    # handle vague day cases
    if 'END' in start_day:
        end_day = get_last_day_of_current_month(month, str(year), '%m', '%Y')
        start_day = end_day - 7

    if start_day in LAY_CAN_MAPPING:
        start_day, end_day = LAY_CAN_MAPPING.get(start_day)

    start = to_isoformat(f'{start_day} {month} {year}', dayfirst=True)
    end = to_isoformat(f'{end_day} {month} {year}',
                       dayfirst=True) if end_day else start

    return start, end
Exemplo n.º 25
0
def process_item(raw_item: Dict[str, Any]) -> Dict[str, Any]:
    item = map_keys(raw_item, field_mapping())

    # ignore empty vessels
    if not item['vessel']['name']:
        return

    # normalize dates
    if item.get('berthed'):
        try:
            item['berthed'] = to_isoformat(item['berthed'], dayfirst=True)
        except Exception:
            item['berthed'] = normalize_date_time(
                item['berthed'].replace('/', ''), item['reported_date'])

    if item.get('arrival'):
        try:
            item['arrival'] = to_isoformat(item['arrival'], dayfirst=True)
        except Exception:
            item['arrival'] = normalize_date_time(
                item['arrival'].replace('/', ''), item['reported_date'])
    if item.get('eta'):
        try:
            item['eta'] = to_isoformat(item['eta'], dayfirst=True)
        except Exception:
            item['eta'] = normalize_date_time(item['eta'].replace('/', ''),
                                              item['reported_date'])

    # get movement details
    if item.get('move_load') and 'X' in item.get('move_load'):
        movement = 'load'
    elif item.get('move_dis') and 'X' in item.get('move_dis'):
        movement = 'discharge'
    elif item.get('cargo_movement'):
        movement = MOVEMENT_MAPPING.get(item.pop('cargo_movement', ''))
    else:
        movement = None

    for col in ('move_load', 'move_dis'):
        item.pop(col, None)

    # yield individual items for multiple cargos
    if item['cargo_product']:
        for f_cargo in split_cargo_volume(item.pop('cargo_product'),
                                          item.pop('cargo_volume')):
            # discard null products
            item['cargo'] = {
                'product': f_cargo[0],
                'movement': movement,
                'volume': f_cargo[1],
                'volume_unit': Unit.tons,
            }
            if item.get('cargo').get('product', '') == 'TBN':
                continue

            yield item
Exemplo n.º 26
0
def portcall_mapping() -> Dict[str, tuple]:
    return {
        'arrival': ('eta', lambda x: to_isoformat(x, dayfirst=False)),
        'berth': ('installation', _clean_installation),
        'cargo': ('cargo_product', None),
        'departure': ('departure', lambda x: to_isoformat(x, dayfirst=False)),
        'port_name': ('port_name', None),
        'provider_name': ('provider_name', None),
        'reported_date': ('reported_date', None),
        'ship': ('vessel', lambda x: {'name': x}),
        'status': ('is_pc_relevant', _is_portcall_irrelevant),
    }
Exemplo n.º 27
0
def _spot_charter_factory(row, **kwargs):
    item = {
        'lay_can_start':
        to_isoformat(row['lay_can_start'], dayfirst=False),
        'lay_can_end':
        to_isoformat(row['lay_can_end'], dayfirst=False),
        'reported_date':
        parse_date(row['reported_date'], dayfirst=False).strftime('%d %b %Y'),
        # 'open_date': row['open_date'],
        # 'coming_from': row['coming_from'],
        # 'fixture_id': row['fixture_id'],
        'charterer':
        row['charterer'],
        'seller':
        row['vessel_operator'],
        'status':
        row['charter_contract_status'],
        # 'broker_address_commission_max': row['broker_address_commission_max_percent'],
        # 'last_done_rate_value': row['last_done_rate_value'],
        'rate_value':
        row['rate_value'] if row['rate_value'] else None,
        # 'actual_tce_per_day_including_idle_days': row['actual_TCE_per_day_including_idle_days'],
        # 'breakeven_tce': row['breakeven_TCE_in_usd_per_day'],
        # 'voyage_raw_text': row['voyage_origin_destination'],
        # 'voyage_raw_text2': row['voyage_origin_destination2'],
        **kwargs,
    }

    match_vessel = re.match('([\w\s.]+) \(([0-9]{4})\)', row['vessel_name'])
    match_origin_dest = re.match('([\'\w]+)/([\'\w]+)',
                                 row['voyage_origin_destination'])

    if match_vessel:
        item['vessel'] = {
            'name': match_vessel.group(1),
            'build_year': match_vessel.group(2)
        }
    else:
        item['vessel'] = {'name': row['vessel_name']}

    if match_origin_dest:
        item['departure_zone'] = match_origin_dest.group(1)
        item['arrival_zone'] = [match_origin_dest.group(2)]
        match_origin = re.match('(\w+)via\w+', match_origin_dest.group(1))
        match_dest = re.match('(\w+)via\w+', match_origin_dest.group(2))

        if match_origin:
            item['departure_zone'] = match_origin.group(1)

        if match_dest:
            item['arrival_zone'] = [match_dest.group(1)]

    return item
Exemplo n.º 28
0
def normalize_lay_can(date_item, rpt_date):
    """Transform non isoformat dates to isoformat

    Examples:
        >>> normalize_lay_can('8/14 PM', '14 Aug 2019')
        '2019-08-14T00:00:00'
        >>> normalize_lay_can('2018-02-04T00:00:00', '14 Aug 2019')
        '2018-02-04T00:00:00'
        >>> normalize_lay_can('12/31 PM', '29 Dec 2019')
        '2019-12-31T00:00:00'
        >>> normalize_lay_can('12/31 PM', '01 Jan 2020')
        '2019-12-31T00:00:00'
        >>> normalize_lay_can('01/01 PM', '31 Dec 2019')
        '2020-01-01T00:00:00'
        >>> normalize_lay_can('7/06/2015 1750', '31 Dec 2019')
        '2015-07-06T17:50:00'

    Args:
        date_item (str):

    Returns:
        str:
    """
    if is_isoformat(date_item):
        return date_item

    if not is_isoformat(date_item):
        year = parse_date(rpt_date, dayfirst=True).year
        _date = date_item.partition(' ')[0]
        if len(_date.split('/')) == 2:
            _month, _day = _date.split('/')
            if 'Dec' in rpt_date and (str(_month) == '1'
                                      or str(_month) == '01'):
                year += 1
            if 'Jan' in rpt_date and str(_month) == '12':
                year -= 1

            return to_isoformat(f'{_day} {_month} {year}', dayfirst=True)

        if len(_date.split('/')) == 3:
            _time = date_item.partition(' ')[2]
            _hour = _time.replace(' ', '')[:2] if _time.replace(
                ' ', '')[:2].isdigit() else '00'
            _min = _time.replace(' ', '')[2:] if _time.replace(
                ' ', '')[2:].isdigit() else '00'
            try:
                return to_isoformat(f'{_date} {_hour}:{_min}', dayfirst=False)
            except Exception:
                logger.error('Skipping date row: %s', date_item)
                return date_item

    logger.error('Skipping date row: %s', date_item)
    return date_item
Exemplo n.º 29
0
def get_imports_lay_can_dates(item):
    """Get laycan dates for import vessel movements.

        Call the API with origin parameter (to improve accuracy).
        If no known origin, call the API without the origin parameter.
        Match trades by checking that both destination date and installations are
        accurate to what's stipulated in the report (return the first one that matches).
        Finally, get the lay_can_start and lay_can_end from the final matched trade.

        Args:
            item (Dict[str, str]):

        Returns:
            Tuple[str | None, str | None]:

        """
    # get trade from different platforms
    if item['load_disch_zone'] and item['lay_can_start']:

        if isinstance(item['lay_can_start'], datetime):
            import_date = to_isoformat(
                item['lay_can_start'].strftime(KP_API_DATE_PARAM_FORMAT))
        else:
            import_date = to_isoformat(item['lay_can_start'])

        for platform in PLATFORMS:
            trade = kp_api.get_session(platform,
                                       recreate=True).get_import_trade(
                                           vessel=item['vessel_name'],
                                           origin=item['load_disch_zone'],
                                           dest=item['current_zone'],
                                           end_date=import_date,
                                       )
            if trade:
                break

        # obtain lay_can dates from trade
        if trade:
            # lay_can_start is 2 days before origin date,
            # lay_can_end is 1 day after origin date (c.f. analysts)
            lay_can = parse_date(trade['Date (origin)'], dayfirst=False)
            lay_can_start = (lay_can - dt.timedelta(days=2)).isoformat()
            lay_can_end = (lay_can + dt.timedelta(days=1)).isoformat()
        else:
            lay_can_start = None
            lay_can_end = None
    else:
        lay_can_start = None
        lay_can_end = None

    return lay_can_start, lay_can_end
Exemplo n.º 30
0
def parse_vessel_movement(row, reported_date, current_port, provider):
    if not MOVEMENT_MAPPING.get(row['m_movement']):
        return

    for product, quantity in zip(*parse_product(map_spelling(row['m_cargo']), row['m_qty'])):
        if product == 'LPG':
            for p in ['Butane', 'Propane']:
                yield {
                    'reported_date': to_isoformat(reported_date),
                    'arrival': normalize_date(reported_date, row['m_arrived'])
                    if may_strip(row['m_arrived'])
                    else None,
                    'berthed': normalize_date(reported_date, row['m_berthed'])
                    if may_strip(row['m_berthed'])
                    else None,
                    'departure': normalize_date(reported_date, row['m_sailed'])
                    if may_strip(row['m_sailed'])
                    else None,
                    'port_name': PORT_MAPPING.get(current_port, current_port),
                    'provider_name': provider,
                    'cargo': {
                        'product': p,
                        'movement': MOVEMENT_MAPPING.get(row['m_movement']),
                        'volume': try_apply(quantity, int, lambda x: x // 2, str),
                        'volume_unit': Unit.tons,
                    },
                    'vessel': {'name': row['m_vessel']},
                }
        else:
            yield {
                'reported_date': to_isoformat(reported_date),
                'arrival': normalize_date(reported_date, row['m_arrived'])
                if may_strip(row['m_arrived'])
                else None,  # noqa
                'berthed': normalize_date(reported_date, row['m_berthed'])
                if may_strip(row['m_berthed'])
                else None,  # noqa
                'departure': normalize_date(reported_date, row['m_sailed'])
                if may_strip(row['m_sailed'])
                else None,  # noqa
                'port_name': PORT_MAPPING.get(current_port, current_port),
                'provider_name': provider,
                'cargo': {
                    'product': product,
                    'movement': MOVEMENT_MAPPING.get(row['m_movement']),
                    'volume': try_apply(quantity, int, str),
                    'volume_unit': Unit.tons,
                },
                'vessel': {'name': row['m_vessel']},
            }