def _is_same_dwt(dwt_1, dwt_2): """Check if the dwt is similar by approximation of ±5%. Examples: >>> _is_same_dwt('45229', 44783) True >>> _is_same_dwt('-', 44000) False Args: dwt_1 (str): dwt_2 (int): Returns: Boolean: """ if isinstance(dwt_1, str): dwt_1 = try_apply(may_strip(dwt_1), int) if isinstance(dwt_2, str): dwt_2 = try_apply(may_strip(dwt_2), int) if dwt_1 and dwt_2: _lower_bound, _upper_bound = dwt_1 * (1 - APPROXIMATION), dwt_1 * ( 1 + APPROXIMATION) return _lower_bound <= dwt_2 <= _upper_bound return False
def _is_same_year(year_1, year_2, offset=1): """Check if the build year is similar by approximation of ±1 year. We need the approximation because Clarksons often provides build year offset by one. Examples: >>> _is_same_year('2018', 2019) True >>> _is_same_year('2018', '2020') False >>> _is_same_year('-', 2019) False Args: dwt_1 (str): dwt_2 (int): Returns: Boolean: """ if isinstance(year_1, str): year_1 = try_apply(may_strip(year_1), int) if isinstance(year_2, str): year_2 = try_apply(may_strip(year_2), int) if year_1 and year_2: _lower_bound, _upper_bound = year_1 - 1, year_1 + 1 return _lower_bound <= year_2 <= _upper_bound return False
def parse_expected_vessels(row, reported_date, current_port, provider): if not MOVEMENT_MAPPING.get(row['e_movement']) or not may_strip(row['e_eta']): return for product, quantity in zip(*parse_product(map_spelling(row['e_cargo']), row['e_qty'])): if product == 'LPG': for p in ['Butane', 'Propane']: yield { 'reported_date': to_isoformat(reported_date), 'eta': normalize_date(reported_date, row['e_eta']), 'port_name': PORT_MAPPING.get(current_port, current_port), 'provider_name': provider, 'cargo': { 'product': p, 'movement': MOVEMENT_MAPPING.get(row['e_movement']), 'volume': try_apply(quantity, int, lambda x: x // 2, str), 'volume_unit': Unit.tons, }, 'vessel': {'name': row['e_vessel']}, } else: yield { 'reported_date': to_isoformat(reported_date), 'eta': normalize_date(reported_date, row['e_eta']), 'port_name': PORT_MAPPING.get(current_port, current_port), 'provider_name': provider, 'cargo': { 'product': product, 'movement': MOVEMENT_MAPPING.get(row['e_movement']), 'volume': try_apply(quantity, int, str), 'volume_unit': Unit.tons, }, 'vessel': {'name': row['e_vessel']}, }
def is_valid_numeric(candidate): """Check if the given string is an absolute numeric. Args: candidate (str | int | float): Examples: >>> is_valid_numeric('4500') '4500' >>> is_valid_numeric(4500) 4500 >>> is_valid_numeric('-4500') # doctest: +IGNORE_EXCEPTION_DETAIL Traceback (most recent call last): ValidationError: ["Volume/Speed must be an absolute value"] >>> is_valid_numeric('foobar') # doctest: +IGNORE_EXCEPTION_DETAIL Traceback (most recent call last): ValidationError: ["Volume/Speed is not a numeric: foobar"] """ try: try_apply(candidate, float, int) if float(candidate) < 0: raise ValidationError('Value must be a positive value') return candidate except: raise ValidationError('Value is not numeric: {}'.format(candidate))
def portcall_mapping(): return { # main page 'Arrived Date': ('matching_date', to_isoformat), 'Schedule Date': ('matching_date', to_isoformat), 'Berth': ('berth', None), 'Vessel Name': ('vessel_name', None), 'Vessel Type': ignore_key('alternate field for vessel type'), 'Origin / Destination': ignore_key('FIXME not processed due to limitations on the ETL'), 'Origin': ignore_key('ignore origin zone'), 'Destination': ignore_key('FIXME not processed due to limitations on the ETL'), # vessel page 'Type :': ('vessel_type', None), 'Flag :': ('vessel_flag', None), 'LOA :': ('vessel_loa', lambda x: try_apply(x, float, int)), 'Bow to Bridge :': ignore_key('bow to bridge'), 'Beam :': ('vessel_beam', lambda x: try_apply(x, float, int)), 'Summer DWT :': ('vessel_dwt', lambda x: try_apply(x, int) if x != '0' else None), 'Gross Tonnage :': ('vessel_gt', lambda x: try_apply(x, int)), 'IMO :': ('vessel_imo', None), 'Thrust Bow :': ignore_key('ignore'), 'Thrust Stern :': ignore_key('ignore'), 'Summer Draft :': ignore_key('ignore'), 'Ramp Mid :': ignore_key('ignore'), 'Ramp Stern :': ignore_key('ignore'), 'Cargo Gear :': ignore_key('ignore'), 'Inert Gas :': ignore_key('ignore'), 'Crude Oil Wash :': ignore_key('ignore'), # meta info 'event_type': ('event_type', None), 'port_name': ('port_name', None), 'provider_name': ('provider_name', None), 'reported_date': ('reported_date', to_isoformat), }
def field_mapping(): return { 'bandera': ignore_key('vessel flag'), 'calent': ignore_key('vessel draught'), 'codbuq': ('vessel_imo', normalize_imo), 'desmue': ignore_key('berth'), 'destipbuq': ignore_key('vessel type'), 'eslora': ('vessel_length', lambda x: try_apply(x, float, int, str)), 'fecatr': ('eta', lambda x: to_isoformat(x, dayfirst=True) if x else None), 'fecsal': ('departure', lambda x: to_isoformat(x, dayfirst=True) if x else None), 'gt': ('vessel_gross_tonnage', lambda x: try_apply(x, str)), 'nombuq': ('vessel_name', None), 'nomcsg': ignore_key('shipping agent'), 'operaciones': ('cargoes', lambda x: list(normalize_cargoes(x))), 'port_name': ('port_name', None), 'provider_name': ('provider_name', None), 'reported_date': ('reported_date', lambda x: to_isoformat(x, dayfirst=True)), }
def field_mapping(): return { '0': ('vessel_name', may_strip), '1': ('flag', may_strip), '2': ('gross_tonnage', lambda x: try_apply(x, float, int)), '3': ('vessel_type', may_strip), '4': ('shipping_agent', may_strip), '5': ignore_key('ignore transit'), '6': ignore_key('from country'), '7': ('arrival', lambda x: xldate_as_datetime(x, 0) if isinstance(x, float) else None), '8': ignore_key('next_zone, bacause ceyhan is not up to date'), '9': ('departure', lambda x: xldate_as_datetime(x, 0) if isinstance(x, float) else None), '10': ('evacuation_type', None), '11': ('evacuation_tonnage', lambda x: try_apply(x, float, int)), '12': ignore_key('discharge at port'), '13': ('loading_type', None), '14': ('loading_tonnage', lambda x: try_apply(x, float, int)), '15': ignore_key('loading port'), # meta fields 'port_name': ('port_name', None), 'provider_name': ('provider_name', None), 'reported_date': ('reported_date', None), }
def field_mapping(): return { 'ARRIVAL DATE': ('arrival_date', may_strip), 'ARRIVAL': ('arrival_date', may_strip), 'DEPART': ('departure_date', may_strip), 'DEPARTURE': ('departure_date', may_strip), 'AD': ignore_key('after draught'), 'STEVEDORES': ignore_key('stevedores'), 'PDSA': ignore_key('pds'), 'DAILY': ignore_key('daily'), 'ROB': ignore_key('rob'), 'SHIP NAME': ('vessel_name', None), 'VESSELS': ('vessel_name', None), 'VESSEL': ('vessel_name', None), 'VOY': ignore_key('voyage number'), 'LOA': ( 'vessel_loa', lambda x: round(try_apply(x, float, int)) if try_apply(x, float, int) else None, ), 'DRAFT': ignore_key('draft'), 'FLAG': ignore_key('flag'), 'AGENT': ('shipping_agent', lambda x: None if 'TBC' in x else x), 'AGENTS': ('shipping_agent', lambda x: None if 'TBC' in x else x), 'LINE': ignore_key('line'), 'BERTH': ('berth', None), 'OPERATIONS': ('cargo_information', lambda x: normalize_cargos(x)), 'CALL': ignore_key('call'), 'reported_date': ('reported_date', None), 'provider_name': ('provider_name', None), 'port_name': ('port_name', None), 'month_row': ('month_row', None), }
def field_mapping(): return { 'berth': ('berth', None), 'vessel': ('vessel_name', None), 'nationality': ('vessel_flag', None), 'agent': ('shipping_agent', None), 'arrival_date': ('arrival_date', None), 'arrival_time': ('arrival_time', None), 'eta_date': ('eta_date', None), 'eta_time': ('eta_time', None), 'loa': ('vessel_loa', lambda x: try_apply(x, float, int, str)), 'gt': ('vessel_gt', lambda x: try_apply(x, float, int, str)), 'fwd': ignore_key('irrelevant'), 'aft': ignore_key('irrelevant'), 'berthed_date': ('berthed_date', None), 'berthed_time': ('berthed_time', None), 'cargo': ('cargo_product', may_strip), 'total': ('cargo_volume', lambda x: try_apply(x, float, int, str)), 'tonnage': ('cargo_volume', None), 'remarks': ('cargo_movement', None), 'purpose': ('cargo_movement', None), 'terminal': ('installation', None), 'position': ignore_key('irrelevant'), 'provider_name': ('provider_name', None), 'reported_date': ('reported_date', None), }
def _normalize_total_volume(raw_volume, raw_unit, numdays): """Normalize raw volume data based on total date range. TODO could use start/end date info to determine total volume, however no use case for it yet Args: raw_volume (int): raw_unit (str): Returns: Tuple[str, str]: tuple of (volume, volume_unit) Examples: >>> _normalize_total_volume(2640, 'Thousand Barrels per Day', 7) (18480000, 'barrel') >>> _normalize_total_volume(2640, 'Thousand Barrels per Day', 30) (79200000, 'barrel') >>> _normalize_total_volume(2640, 'foobar', 7) Traceback (most recent call last): ... ValueError: Unknown volume unit: foobar """ if raw_unit == 'Thousand Barrels per Day': volume = try_apply(raw_volume * numdays * 1000, int) elif raw_unit == 'Thousand Barrels': volume = try_apply(raw_volume * 1000, int) else: raise ValueError(f'Unknown volume unit: {raw_unit}') return volume, 'barrel'
def parse(self, response): for row in response.xpath('//tr')[2:]: cells = row.xpath('td/text()').extract() # build CustomsFigure model start_utc = dt.datetime(response.meta['year'], response.meta['month'], 1) item = { 'start_utc': start_utc, 'end_utc': start_utc + relativedelta(months=1), 'product': response.meta['subcommodity'] or response.meta['commodity'], 'provider_name': self.provider, 'reported_date': dt.datetime.utcnow() .replace(hour=0, minute=0, second=0) .isoformat(timespec='seconds'), 'export_zone': cells[0], 'import_zone': 'South Korea', # source is exclusively about korean imports 'valuation': { 'value': try_apply(cells[1].replace(',', ''), int, lambda x: x * 1000), 'currency': Currency.USD, }, 'mass': try_apply(cells[2].replace(',', ''), int), 'mass_unit': Unit.kilogram, } # sanity check; in case no price/mass/export_zone if item['export_zone'] != '0' and item['valuation']['value'] != 0 and item['mass'] != 0: yield item
def field_mapping(): return { 'AIS Type': ignore_key('vessel AIS type'), 'Flag': ('flag_name', None), 'Destination': ignore_key('destination'), 'ETA': ignore_key('eta'), 'IMO / MMSI': ('mmsi', lambda x: x.partition('/')[2].strip() if '/' in x else None), 'Callsign': ('call_sign', None), 'Length / Beam': ignore_key('length and beam, extracted from following'), 'Current draught': ignore_key('current draught'), 'Course / Speed': ignore_key('course / speed'), 'Coordinates': ignore_key('coordinates'), 'Last report': ignore_key('last report'), 'IMO number': ('imo', None), 'Vessel Name': ('name', None), 'Ship type': ('type', None), 'Homeport': ignore_key('homeport'), 'Gross Tonnage': ('gross_tonnage', lambda x: try_apply(x, int)), 'Summer Deadweight (t)': ('dead_weight', lambda x: try_apply(x, int)), 'Length Overall (m)': ('length', lambda x: try_apply(x, _handle_empty_value, int)), 'Beam (m)': ('beam', lambda x: try_apply(x, _handle_empty_value, int)), 'Draught (m)': ignore_key('draught, no access'), 'Year of Built': ('build_year', lambda x: try_apply(x, int)), 'Builder': ignore_key('builder, no access'), 'Place of Built': ignore_key('place of built, no access'), 'Yard': ignore_key('yard, no access'), 'TEU': ignore_key('teu'), 'Crude': ignore_key('crude'), 'Grain': ignore_key('grain'), 'Bale': ignore_key('bale'), 'Registered Owner': ignore_key('registered owner, no access'), 'Manager': ignore_key('manager, no access'), 'provider_name': ('provider_name', None), }
def cargo_mapping(): return { 'frghtNm': ('product', smart_split), # movement is indicated in last character of manifest number 'mrnum': ('movement', lambda x: MOVEMENT_MAPPING.get(x[-1])), 'msrmntUnitSe': ('volume_unit', lambda x: VOLUME_UNIT), 'tkinSe': ('movement', lambda x: MOVEMENT_MAPPING.get(x)), # 'tnspotShapDc': ('product', None), 'wt': ('volume', lambda x: try_apply(x, float, int, str)), 'wtTon': ('volume', lambda x: try_apply(x, float, int, str)), }
def normalize_matching_date(raw_date): """Normalize arrival date. Date might appear be a range, pick the later date. - 2018/10/17 - 30/07-01/08/13 Examples: >>> normalize_matching_date('11-13/12/15') (None, '2015-12-11T00:00:00', '2015-12-13T00:00:00') >>> normalize_matching_date('24/01-01/02/14') (None, '2014-01-24T00:00:00', '2014-02-01T00:00:00') >>> normalize_matching_date(42359.0) ('2015-12-21T00:00:00', None, None) Args: raw_date (str): Returns: str: berthed str: arrival date str: departure date """ if not raw_date: return None, None, None # xlrd date float if isinstance(raw_date, float): return _convert_xlrd_date(raw_date), None, None # date range else: _match = re.match(r'(\d{1,2})/?(\d{1,2})?-(\d{1,2}/\d{1,2}/\d{1,2})', raw_date) if not _match: logger.error(f'Date pattern is invalid: {raw_date}') return None, None, None arrival_day, arrival_month, departure = _match.groups() arrival_day, arrival_month = try_apply(arrival_day, int), try_apply( arrival_month, int) departure = parse_date(departure, dayfirst=True) arrival = None if arrival_day and arrival_month: arrival = departure.replace(day=arrival_day, month=arrival_month) if arrival_day and not arrival_month: arrival = departure.replace(day=arrival_day) return None, arrival.isoformat(), departure.isoformat()
def parse_vessel_movement(row, reported_date, current_port, provider): if not MOVEMENT_MAPPING.get(row['m_movement']): return for product, quantity in zip(*parse_product(map_spelling(row['m_cargo']), row['m_qty'])): if product == 'LPG': for p in ['Butane', 'Propane']: yield { 'reported_date': to_isoformat(reported_date), 'arrival': normalize_date(reported_date, row['m_arrived']) if may_strip(row['m_arrived']) else None, 'berthed': normalize_date(reported_date, row['m_berthed']) if may_strip(row['m_berthed']) else None, 'departure': normalize_date(reported_date, row['m_sailed']) if may_strip(row['m_sailed']) else None, 'port_name': PORT_MAPPING.get(current_port, current_port), 'provider_name': provider, 'cargo': { 'product': p, 'movement': MOVEMENT_MAPPING.get(row['m_movement']), 'volume': try_apply(quantity, int, lambda x: x // 2, str), 'volume_unit': Unit.tons, }, 'vessel': {'name': row['m_vessel']}, } else: yield { 'reported_date': to_isoformat(reported_date), 'arrival': normalize_date(reported_date, row['m_arrived']) if may_strip(row['m_arrived']) else None, # noqa 'berthed': normalize_date(reported_date, row['m_berthed']) if may_strip(row['m_berthed']) else None, # noqa 'departure': normalize_date(reported_date, row['m_sailed']) if may_strip(row['m_sailed']) else None, # noqa 'port_name': PORT_MAPPING.get(current_port, current_port), 'provider_name': provider, 'cargo': { 'product': product, 'movement': MOVEMENT_MAPPING.get(row['m_movement']), 'volume': try_apply(quantity, int, str), 'volume_unit': Unit.tons, }, 'vessel': {'name': row['m_vessel']}, }
def process_item(raw_item): """Transform raw item into a usable event. Args: raw_item (Dict[str, str]): Yields: Dict[str, str]: """ item = map_keys(raw_item, grades_mapping()) # discard items without valid dates if not (item.get('eta') or item.get('arrival') or item.get('berthed')): logger.warning( f'Item has no valid portcall dates, discarding:\n{item}') return # discard items without vessel names vessel_name, _ = item.pop('vessel_name_and_charter_status') if not vessel_name: logger.warning(f'Item has no vessel name, discarding:\n{item}') return # build Vessel sub-model item['vessel'] = {'name': vessel_name, 'imo': item.pop('vessel_imo', None)} # build Cargo sub-model products = item.pop('cargo_product', []) movement = item.pop('cargo_movement', None) # NOTE assume equal volume split by number of products volume = item.pop('cargo_volume', None) for product in products: item['cargo'] = { 'product': product, 'movement': movement, 'volume': (str(try_apply(volume, int, float) / len(products)) if try_apply(volume, int, float) else None), 'volume_unit': Unit.tons, } if movement and item.get('buyer_seller', None): player = 'seller' if movement == 'load' else 'buyer' item['cargo'].update({player: {'name': item.get('buyer_seller')}}) item.pop('buyer_seller', None) yield item
def grades_mapping(): return { 'Arrived': ('arrival', normalize_pc_date), 'AGENT': ignore_key('shipping agent is not required'), 'Berthed': ('berthed', normalize_pc_date), 'BL DD': ('berthed', normalize_pc_date), 'CHARTERER': ignore_key('charterer is not required'), 'COUNTRY OF DEST': ignore_key('not specific enough; we already have "NEXT PORT"'), 'ETA': ('eta', normalize_pc_date), 'ETB': ('berthed', normalize_pc_date), 'ETS': ignore_key('not required'), 'GRADE DETAIL': ('cargo_product', split_cargoes), 'GRADE GROUP': ignore_key('we already have grade detail, so we can ignore this'), 'IMO NR': ('vessel_imo', lambda x: try_apply(x, float, int, str) if x else None), 'LOAD POSITION': ignore_key('irrelevant'), 'LOAD/DISCH': ('cargo_movement', lambda x: MOVEMENT_MAPPING.get(x)), 'NEXT PORT': ignore_key('not required for PortCall for now'), 'PORT': ('port_name', None), 'PRE. PORT': ignore_key('not required'), 'provider_name': ('provider_name', None), 'QTT IN MT': ('cargo_volume', lambda x: try_apply(x, float, int) if x else None), 'region_name': ignore_key('not required'), 'reported_date': ('reported_date', normalize_reported_date), 'Sailed': ignore_key('not required'), 'SHIPPERS/RECEIVERS': ('buyer_seller', lambda x: x.split('/')[-1] if x else None), 'STATUS': ignore_key('irrelevant'), 'TERMINAL': ('installation', lambda x: x if x else None), 'VESSEL': ( 'vessel_name_and_charter_status', # don't use the separator value lambda x: [ may_strip(each) for idx, each in enumerate(x.partition('/')) if idx != 1 ], ), }
def grades_mapping(): return { 'Date': ('departure', to_isoformat), 'Dock': ('berth', None), 'Vessel': ('vessel', lambda x: {'name': x}), 'Charterer': ignore_key('charters spider'), 'Grade': ('cargo_product', lambda x: PRODUCT_MAPPING.get(x.upper(), x)), 'Qty Nominated': ('cargo_volume_nominated', lambda x: try_apply(x, float, int, str)), 'Qty Loaded': ('cargo_volume', lambda x: try_apply(x, float, int, str)), 'Loading Rate': ignore_key('irrelevant'), 'Status': ignore_key('irrelevant'), 'provider_name': ('provider_name', None), 'reported_date': ('reported_date', None), 'sheet_name': ('sheet_name', None), }
def grades_mapping(): return { 'ENTRY DATE': ignore_key('redundant reported date'), 'WEEK ENDING': ignore_key('redundant'), 'Sequence Number': ignore_key('redundant'), 'VESSEL ENTRY NAME': ('vessel_name', lambda x: x if x != 'STEAMER' else None), 'COMMODITY LABEL': ignore_key('redundant'), 'PORT OF ORIGIN': ('port_name', may_strip), 'CODE ORG': ignore_key('redundant'), 'ORG CODE': ignore_key('redundant'), 'UNCTAD CODE LOAD': ignore_key('redundant'), 'DESCRIPTION LOAD': (ignore_key('redundant arrival zone')), 'DESTINATION': ignore_key('redundant'), 'CODE (DES)': ignore_key('redundant'), 'DES CODE': ignore_key('redundant'), 'UNCTAD CODE D': ignore_key('redundant'), 'DESCRIPTION DEST': ignore_key('redundant departure zone'), 'VESSEL NAME': ignore_key('redundant'), 'VESSEL YR': ignore_key('redundant'), 'IMO': ('vessel_imo', lambda x: try_apply(x, int, str)), 'TONNAGE': ('cargo_volume', normalize_cargo_volume), 'SIZE': ignore_key('not a good estimate for dwt'), 'COMMODITY': ('cargo_product', lambda x: x if x not in IRRELEVANT_PRODUCTS else None), 'MRI COMM CODE': ignore_key('redundant'), 'COMM CODE': ignore_key('redundant'), 'Dates': ignore_key('redundant'), 'S DATE': ('eta', None), 'CHARTER': ignore_key('redundant'), 'TERMS': ignore_key('redundant'), 'RATE': ignore_key('redundant'), 'RATE A': ignore_key('redundant'), 'provider_name': ('provider_name', None), 'reported_date': ('reported_date', lambda x: dt.datetime.strptime(x, '%m%d%Y').isoformat()), }
def field_mapping(): return { 'Vessel Name': ('vessel_name', None), 'Vessel Type': ('vessel_type', None), 'IMO': ('vessel_imo', lambda x: try_apply(x, float, int, str)), 'Length': ('vessel_length', None), 'Dimensions': ('vessel_length', None), 'DWT': ('vessel_dwt', None), 'Berth Number': ('berth', None), 'Draft': ignore_key('redundant'), 'Last Port': ignore_key('redundant'), 'OPS': ('cargo_movement', lambda x: x.lower()), 'Operation': ('cargo_movement', lambda x: x.lower()), 'Cargo Type': ('cargo_product', may_strip), 'Qtty': ('cargo_volume', None), 'Est. Qtty': ('cargo_volume', None), 'A.T.A': ('arrival', None), 'E.T.A': ('arrival', None), 'A.T.B': ('berthed', None), 'E.T.B': ('berthed', None), 'A.T.D': ('departure', None), 'E.T.D': ('departure', None), 'provider_name': ('provider_name', None), 'reported_date': ('reported_date', normalize_rptd_date), 'port_name': ('port_name', normalize_port), 'file_name': ('file_name', None), }
def portcall_mapping(): return { 'ETA': ignore_key('use more accurate ETB as ETA estimate instead'), 'SHIPS': ('vessel_name', clean_vessel_name), 'ETB': ('eta', None), 'BERTH': ('berth', None), 'LOA / BEAM': ('vessel_length', lambda x: try_apply(x.split('/')[0], float, int)), 'TYPE': ('vessel_type', lambda x: None if x in VESSEL_TYPE_BLACKLIST else x), 'OPERATOR': ('shipping_agent', None), 'LINE': ignore_key('irrelevant'), 'CARGO': ('cargo_product', normalize_cargo_product), 'QUANTITY': ('cargo_movement', normalize_cargo_movement), 'PILOT': ignore_key('irrelevant'), 'TUG': ignore_key('irrelevant'), 'LAST PORT': ignore_key('previous port'), 'NEXT PORT': ignore_key('next port'), 'provider_name': ('provider_name', None), 'reported_date': ('reported_date', None), 'port_name': ('port_name', None), 'month': ('month', None), 'year': ('year', None), }
def process_item(raw_item): """Process raw item, and normalize them. Args: raw_item (Dict[str, str]): Yields: Dict[str, str]: """ item = map_keys(raw_item, field_mapping(), skip_missing=True) # fallback unto finished time if departure is null (analyst request) if not item.get('departure'): item['departure'] = item.get('finished', None) if not (item.get('eta') or item.get('berthed') or item.get('departure')): return seller = item.pop('shipper', None) item['cargo'] = { # product requested by coal analyst (Malay Trivedi) 'product': 'Thermal Coal', 'movement': 'load', 'volume': try_apply(item.pop('cargo_volume', None), float, int, str), 'volume_unit': Unit.tons, 'seller': { 'name': seller } if seller else None, } for col in ('finished', 'sheet_mode'): item.pop(col, None) return item
def portcall_mapping() -> Dict[str, Tuple[str, Optional[Callable]]]: return { '#': ignore_key('internal portcall ID'), 'Arrival': ('eta', None), 'Departure': ('departure', None), 'Destination': ignore_key('next zone; discuss with analysts on accuracy and value'), 'Expected Arrival': ('eta', None), 'Flag': ignore_key('empty'), 'GT': ('vessel_gt', None), 'GT 100': ('vessel_gt', lambda x: int(x) * 100 if try_apply(x, int) else None), 'Home Port': ignore_key('home port'), 'Length': ('vessel_length', may_strip), 'port_name': ('port_name', None), 'provider_name': ('provider_name', None), 'reported_date': ('reported_date', lambda x: to_isoformat(x, dayfirst=True)), 'Situation': ignore_key('current vessel situation'), 'Vessel': ('vessel_name', lambda x: may_strip(x).upper() if x else None), }
def normalize_cargoes(raw_operation): """Extract cargo info from raw operation list. Args: raw_operation (List[Dict[str, str]]): Yields: Dict[str, str]: """ for raw_cargo in raw_operation: movement = raw_cargo['nomoperacion'] product = normalize_cargo_name(raw_cargo['mercancia']) volume = raw_cargo['toneladas'] product_list = re.split(r'\sy\s|\+', product) if len(product_list) > 1: volume = float(volume) / len(product_list) for p in product_list: # only keep relevant movement and relevant product if (movement in MOVEMENT_MAPPING and product and not any(alias in product.upper() for alias in PRODUCT_BLACKLIST)): yield { 'product': p, 'movement': MOVEMENT_MAPPING[movement], 'volume': try_apply(volume, str), 'volume_unit': Unit.tons, }
def field_mapping(): return { 'Agency:': ignore_key('shipping agent'), 'Berth:': ignore_key('shipping agent'), 'Data arrivo in rada': ('arrival', normalize_date), 'cargoes': ('cargoes', lambda x: [build_cargo(cargo) for cargo in x]), 'Draft:': ignore_key('vessel draught'), 'ETA': ('eta', normalize_date), 'EDB:': ignore_key('estimated date of berthing'), 'Flag:': ignore_key('vessel flag'), 'GRT:': ('vessel_gross_tonnage', lambda x: x.replace(',', '')), 'IMO:': ('vessel_imo', None), 'Length:': ('vessel_length', lambda x: try_apply(x.replace(',', ''), float, int, str)), 'provider_name': ('provider_name', may_strip), 'port_name': ('port_name', may_strip), 'Receiver:': ('buyer', None), 'reported_date': ('reported_date', None), 'S.I.:': ignore_key('unknown'), 'Ship:': ('vessel_name', may_strip), }
def field_mapping(): return { 'port_name': ('port_name', None), 'provider_name': ('provider_name', None), 'reported_date': ('reported_date', None), 'VESSEL': ('vessel_name', lambda x: x.replace('M.V ', '')), 'QUAY': ignore_key('quay number of portcall'), 'IMP/EXP': ('cargo_movement', lambda x: CARGO_MOVEMENT_MAPPING.get(x)), 'LOA': ('vessel_length', lambda x: try_apply(x.replace('M', ''), float, int, str)), 'TYPE': ('is_container_vessel', lambda x: x == 'CONT'), 'AGENT': ignore_key('shipping agent'), 'CARRIER': ignore_key('carrier'), 'TERMINAL': ignore_key('berth terminal'), 'TNS': ('cargo_product', normalize_cargo), 'ATD': ('departure', normalize_date), 'ETD': ('departure', normalize_date), 'ATB': ('berthed', normalize_date), 'ETB': ('berthed', normalize_date), 'ATA': ('arrival', normalize_date), 'ETA': ('eta', normalize_date), }
def portcall_mapping(): return { 'B.NO.': ('berth', None), 'SL.NO': (ignore_key('irrelevant')), 'NAME OF VESSEL': ('vessel_name', normalize_vessel_name), 'NAME OF THE VESSEL': ('vessel_name', normalize_vessel_name), 'IND/': (ignore_key('irrelevant')), 'LOA': ('vessel_loa', lambda x: try_apply(x, float, int)), 'ETA': ('eta', None), 'E.RT.TIME': ('eta_time', None), 'DOA': ('eta', None), 'RT.TIME': ('eta_time', None), 'NOD(WAITING)': (ignore_key('irrelevant')), 'ARRIVAL': ('arrival', None), 'BERTHING': ('berthed', None), 'CARGO': ('product', None), 'AGENT': ('shipping_agent', None), 'RECEIVER': (ignore_key('cannot be used for now')), 'QTY.': ('volume', normalize_volume), 'QTY': ('volume', normalize_volume), 'DAY': (ignore_key('irrelevant')), 'TOTAL': ('volume_total', normalize_volume), 'BALANCE': ('volume_bal', normalize_volume), 'ETD': ('departure', None), 'REASONS FOR WAITING': (ignore_key('irrelevant')), 'B.PREFER': (ignore_key('irrelevant')), 'port_name': ('port_name', None), 'provider_name': ('provider_name', None), 'reported_date': ('reported_date', None), }
def process_item(raw_item): """Transform raw item into a usable event. Args: raw_item (Dict[str, str]): Yields: Dict[str, str]: """ item = map_keys(raw_item, grades_mapping()) # discard items without vessel names vessel_name = item.pop('vessel_name') if not vessel_name: logger.warning(f'Item has no vessel name, discarding:\n{item}') return # discard items without valid dates if not (item.get('arrival') or item.get('departure')): logger.warning(f'Item has no valid portcall dates, discarding:\n{item}') return # build Vessel sub-model item['vessel'] = {'name': vessel_name, 'gross_tonnage': item.pop('gross_tonnage', None)} # build Cargo sub-model products, volumes = get_products_and_volumes(item) movement = item.pop('cargo_movement', None) for product, volume in zip_longest(products, volumes): item['cargo'] = { 'product': product, 'movement': movement, 'volume': try_apply(volume, float, int), 'volume_unit': Unit.tons, } yield item
def field_mapping(): return { # common to all tables 'AGENT': (ignore_key('not used in model')), 'CARGO': ('cargoes', lambda x: [{ 'product': x }] if x else []), 'NAME OF SHIP': ('vessel_name', None), 'Name of the ship / voyage': ('vessel_name', None), 'Type of Cargo': ('cargoes', lambda x: [{ 'product': x }] if x else []), 'Type of Cargo ': ('cargoes', lambda x: [{ 'product': x }] if x else []), # static info 'event': ('event', None), 'port_name': ('port_name', None), 'provider_name': ('provider_name', None), 'reported_date': ('reported_date', None), # for eta/arrival tables 'DRAFT': (ignore_key('not used in model')), 'ETA / ATA': ('matching_date', to_isoformat), 'No': (ignore_key('not used in model')), # for berthed tables 'ARR. TEMA': (ignore_key('not used in model')), 'BOOKINGS': (ignore_key('not used in model')), 'ETB/ATB': ('matching_date', to_isoformat), 'ETD': (ignore_key('not used in model')), 'L.O.A': ('vessel_length', lambda x: try_apply(x, float, int, str)), 'SHIP STATUS': (ignore_key('not used in model')), 'ST\'DORE': (ignore_key('not used in model')), }
def process_cargo(row): """Process cargo info given a table row. Not all rows contain cargo info however, and so this function is for extracting relevant cargo info from each row Args: row (list): table row as a list, with each element as a table cell Returns: cargo: Cargo item """ cargo = {} section_number = row[-1] field_map_section = FIELD_MAPS[section_number] # if movement found, append relevant cargo info if try_apply( row[field_map_section['cargo_load']['volume']].replace(',', ''), float): volume = row[field_map_section['cargo_load']['volume']].replace( ',', '') product = may_strip(row[field_map_section['cargo_load']['product']]) movement = 'load' elif try_apply( row[field_map_section['cargo_unload']['volume']].replace(',', ''), float): volume = row[field_map_section['cargo_unload']['volume']].replace( ',', '') product = may_strip(row[field_map_section['cargo_unload']['product']]) movement = 'discharge' else: return None product_list = re.split('[\+\/]', product) volume = float(volume) / len(product_list) if len( product_list) > 1 else volume for prod in product_list: cargo = { 'product': may_strip(prod), 'volume': str(volume), 'volume_unit': Unit.tons, 'movement': movement, } yield cargo