Python try_apply示例，kp_scrapers.lib.parser.try_apply Python示例

示例#1

0

显示文件

文件： parser.py 项目： theHausdorffMetric/test

def _is_same_dwt(dwt_1, dwt_2):
    """Check if the dwt is similar by approximation of ±5%.

    Examples:
        >>> _is_same_dwt('45229', 44783)
        True
        >>> _is_same_dwt('-', 44000)
        False

    Args:
        dwt_1 (str):
        dwt_2 (int):

    Returns:
        Boolean:

    """
    if isinstance(dwt_1, str):
        dwt_1 = try_apply(may_strip(dwt_1), int)

    if isinstance(dwt_2, str):
        dwt_2 = try_apply(may_strip(dwt_2), int)

    if dwt_1 and dwt_2:
        _lower_bound, _upper_bound = dwt_1 * (1 - APPROXIMATION), dwt_1 * (
            1 + APPROXIMATION)
        return _lower_bound <= dwt_2 <= _upper_bound

    return False

示例#2

0

显示文件

文件： parser.py 项目： theHausdorffMetric/test

def _is_same_year(year_1, year_2, offset=1):
    """Check if the build year is similar by approximation of ±1 year.

    We need the approximation because Clarksons often provides build year offset by one.

    Examples:
        >>> _is_same_year('2018', 2019)
        True
        >>> _is_same_year('2018', '2020')
        False
        >>> _is_same_year('-', 2019)
        False

    Args:
        dwt_1 (str):
        dwt_2 (int):

    Returns:
        Boolean:

    """
    if isinstance(year_1, str):
        year_1 = try_apply(may_strip(year_1), int)

    if isinstance(year_2, str):
        year_2 = try_apply(may_strip(year_2), int)

    if year_1 and year_2:
        _lower_bound, _upper_bound = year_1 - 1, year_1 + 1
        return _lower_bound <= year_2 <= _upper_bound

    return False

示例#3

0

显示文件

def parse_expected_vessels(row, reported_date, current_port, provider):
    if not MOVEMENT_MAPPING.get(row['e_movement']) or not may_strip(row['e_eta']):
        return

    for product, quantity in zip(*parse_product(map_spelling(row['e_cargo']), row['e_qty'])):
        if product == 'LPG':
            for p in ['Butane', 'Propane']:
                yield {
                    'reported_date': to_isoformat(reported_date),
                    'eta': normalize_date(reported_date, row['e_eta']),
                    'port_name': PORT_MAPPING.get(current_port, current_port),
                    'provider_name': provider,
                    'cargo': {
                        'product': p,
                        'movement': MOVEMENT_MAPPING.get(row['e_movement']),
                        'volume': try_apply(quantity, int, lambda x: x // 2, str),
                        'volume_unit': Unit.tons,
                    },
                    'vessel': {'name': row['e_vessel']},
                }
        else:
            yield {
                'reported_date': to_isoformat(reported_date),
                'eta': normalize_date(reported_date, row['e_eta']),
                'port_name': PORT_MAPPING.get(current_port, current_port),
                'provider_name': provider,
                'cargo': {
                    'product': product,
                    'movement': MOVEMENT_MAPPING.get(row['e_movement']),
                    'volume': try_apply(quantity, int, str),
                    'volume_unit': Unit.tons,
                },
                'vessel': {'name': row['e_vessel']},
            }

示例#4

0

显示文件

def is_valid_numeric(candidate):
    """Check if the given string is an absolute numeric.

    Args:
        candidate (str | int | float):

    Examples:
        >>> is_valid_numeric('4500')
        '4500'
        >>> is_valid_numeric(4500)
        4500
        >>> is_valid_numeric('-4500') # doctest: +IGNORE_EXCEPTION_DETAIL
        Traceback (most recent call last):
        ValidationError: ["Volume/Speed must be an absolute value"]
        >>> is_valid_numeric('foobar') # doctest: +IGNORE_EXCEPTION_DETAIL
        Traceback (most recent call last):
        ValidationError: ["Volume/Speed is not a numeric: foobar"]

    """
    try:
        try_apply(candidate, float, int)
        if float(candidate) < 0:
            raise ValidationError('Value must be a positive value')
        return candidate
    except:
        raise ValidationError('Value is not numeric: {}'.format(candidate))

示例#5

0

显示文件

def portcall_mapping():
    return {
        # main page
        'Arrived Date': ('matching_date', to_isoformat),
        'Schedule Date': ('matching_date', to_isoformat),
        'Berth': ('berth', None),
        'Vessel Name': ('vessel_name', None),
        'Vessel Type': ignore_key('alternate field for vessel type'),
        'Origin / Destination': ignore_key('FIXME not processed due to limitations on the ETL'),
        'Origin': ignore_key('ignore origin zone'),
        'Destination': ignore_key('FIXME not processed due to limitations on the ETL'),
        # vessel page
        'Type :': ('vessel_type', None),
        'Flag :': ('vessel_flag', None),
        'LOA :': ('vessel_loa', lambda x: try_apply(x, float, int)),
        'Bow to Bridge :': ignore_key('bow to bridge'),
        'Beam :': ('vessel_beam', lambda x: try_apply(x, float, int)),
        'Summer DWT :': ('vessel_dwt', lambda x: try_apply(x, int) if x != '0' else None),
        'Gross Tonnage :': ('vessel_gt', lambda x: try_apply(x, int)),
        'IMO :': ('vessel_imo', None),
        'Thrust Bow :': ignore_key('ignore'),
        'Thrust Stern :': ignore_key('ignore'),
        'Summer Draft :': ignore_key('ignore'),
        'Ramp Mid :': ignore_key('ignore'),
        'Ramp Stern :': ignore_key('ignore'),
        'Cargo Gear :': ignore_key('ignore'),
        'Inert Gas :': ignore_key('ignore'),
        'Crude Oil Wash :': ignore_key('ignore'),
        # meta info
        'event_type': ('event_type', None),
        'port_name': ('port_name', None),
        'provider_name': ('provider_name', None),
        'reported_date': ('reported_date', to_isoformat),
    }

示例#6

0

显示文件

文件： normalize.py 项目： theHausdorffMetric/test

def field_mapping():
    return {
        'bandera':
        ignore_key('vessel flag'),
        'calent':
        ignore_key('vessel draught'),
        'codbuq': ('vessel_imo', normalize_imo),
        'desmue':
        ignore_key('berth'),
        'destipbuq':
        ignore_key('vessel type'),
        'eslora': ('vessel_length', lambda x: try_apply(x, float, int, str)),
        'fecatr': ('eta', lambda x: to_isoformat(x, dayfirst=True)
                   if x else None),
        'fecsal': ('departure', lambda x: to_isoformat(x, dayfirst=True)
                   if x else None),
        'gt': ('vessel_gross_tonnage', lambda x: try_apply(x, str)),
        'nombuq': ('vessel_name', None),
        'nomcsg':
        ignore_key('shipping agent'),
        'operaciones': ('cargoes', lambda x: list(normalize_cargoes(x))),
        'port_name': ('port_name', None),
        'provider_name': ('provider_name', None),
        'reported_date':
        ('reported_date', lambda x: to_isoformat(x, dayfirst=True)),
    }

示例#7

0

显示文件

def field_mapping():
    return {
        '0': ('vessel_name', may_strip),
        '1': ('flag', may_strip),
        '2': ('gross_tonnage', lambda x: try_apply(x, float, int)),
        '3': ('vessel_type', may_strip),
        '4': ('shipping_agent', may_strip),
        '5':
        ignore_key('ignore transit'),
        '6':
        ignore_key('from country'),
        '7': ('arrival', lambda x: xldate_as_datetime(x, 0)
              if isinstance(x, float) else None),
        '8':
        ignore_key('next_zone, bacause ceyhan is not up to date'),
        '9': ('departure', lambda x: xldate_as_datetime(x, 0)
              if isinstance(x, float) else None),
        '10': ('evacuation_type', None),
        '11': ('evacuation_tonnage', lambda x: try_apply(x, float, int)),
        '12':
        ignore_key('discharge at port'),
        '13': ('loading_type', None),
        '14': ('loading_tonnage', lambda x: try_apply(x, float, int)),
        '15':
        ignore_key('loading port'),
        # meta fields
        'port_name': ('port_name', None),
        'provider_name': ('provider_name', None),
        'reported_date': ('reported_date', None),
    }

示例#8

0

显示文件

def field_mapping():
    return {
        'ARRIVAL DATE': ('arrival_date', may_strip),
        'ARRIVAL': ('arrival_date', may_strip),
        'DEPART': ('departure_date', may_strip),
        'DEPARTURE': ('departure_date', may_strip),
        'AD': ignore_key('after draught'),
        'STEVEDORES': ignore_key('stevedores'),
        'PDSA': ignore_key('pds'),
        'DAILY': ignore_key('daily'),
        'ROB': ignore_key('rob'),
        'SHIP NAME': ('vessel_name', None),
        'VESSELS': ('vessel_name', None),
        'VESSEL': ('vessel_name', None),
        'VOY': ignore_key('voyage number'),
        'LOA': (
            'vessel_loa',
            lambda x: round(try_apply(x, float, int)) if try_apply(x, float, int) else None,
        ),
        'DRAFT': ignore_key('draft'),
        'FLAG': ignore_key('flag'),
        'AGENT': ('shipping_agent', lambda x: None if 'TBC' in x else x),
        'AGENTS': ('shipping_agent', lambda x: None if 'TBC' in x else x),
        'LINE': ignore_key('line'),
        'BERTH': ('berth', None),
        'OPERATIONS': ('cargo_information', lambda x: normalize_cargos(x)),
        'CALL': ignore_key('call'),
        'reported_date': ('reported_date', None),
        'provider_name': ('provider_name', None),
        'port_name': ('port_name', None),
        'month_row': ('month_row', None),
    }

示例#9

0

显示文件

def field_mapping():
    return {
        'berth': ('berth', None),
        'vessel': ('vessel_name', None),
        'nationality': ('vessel_flag', None),
        'agent': ('shipping_agent', None),
        'arrival_date': ('arrival_date', None),
        'arrival_time': ('arrival_time', None),
        'eta_date': ('eta_date', None),
        'eta_time': ('eta_time', None),
        'loa': ('vessel_loa', lambda x: try_apply(x, float, int, str)),
        'gt': ('vessel_gt', lambda x: try_apply(x, float, int, str)),
        'fwd': ignore_key('irrelevant'),
        'aft': ignore_key('irrelevant'),
        'berthed_date': ('berthed_date', None),
        'berthed_time': ('berthed_time', None),
        'cargo': ('cargo_product', may_strip),
        'total': ('cargo_volume', lambda x: try_apply(x, float, int, str)),
        'tonnage': ('cargo_volume', None),
        'remarks': ('cargo_movement', None),
        'purpose': ('cargo_movement', None),
        'terminal': ('installation', None),
        'position': ignore_key('irrelevant'),
        'provider_name': ('provider_name', None),
        'reported_date': ('reported_date', None),
    }

示例#10

0

显示文件

def _normalize_total_volume(raw_volume, raw_unit, numdays):
    """Normalize raw volume data based on total date range.

    TODO could use start/end date info to determine total volume, however no use case for it yet

    Args:
        raw_volume (int):
        raw_unit (str):

    Returns:
        Tuple[str, str]: tuple of (volume, volume_unit)

    Examples:
        >>> _normalize_total_volume(2640, 'Thousand Barrels per Day', 7)
        (18480000, 'barrel')
        >>> _normalize_total_volume(2640, 'Thousand Barrels per Day', 30)
        (79200000, 'barrel')
        >>> _normalize_total_volume(2640, 'foobar', 7)
        Traceback (most recent call last):
            ...
        ValueError: Unknown volume unit: foobar
    """
    if raw_unit == 'Thousand Barrels per Day':
        volume = try_apply(raw_volume * numdays * 1000, int)
    elif raw_unit == 'Thousand Barrels':
        volume = try_apply(raw_volume * 1000, int)
    else:
        raise ValueError(f'Unknown volume unit: {raw_unit}')

    return volume, 'barrel'

示例#11

0

显示文件

    def parse(self, response):
        for row in response.xpath('//tr')[2:]:
            cells = row.xpath('td/text()').extract()

            # build CustomsFigure model
            start_utc = dt.datetime(response.meta['year'], response.meta['month'], 1)
            item = {
                'start_utc': start_utc,
                'end_utc': start_utc + relativedelta(months=1),
                'product': response.meta['subcommodity'] or response.meta['commodity'],
                'provider_name': self.provider,
                'reported_date': dt.datetime.utcnow()
                .replace(hour=0, minute=0, second=0)
                .isoformat(timespec='seconds'),
                'export_zone': cells[0],
                'import_zone': 'South Korea',  # source is exclusively about korean imports
                'valuation': {
                    'value': try_apply(cells[1].replace(',', ''), int, lambda x: x * 1000),
                    'currency': Currency.USD,
                },
                'mass': try_apply(cells[2].replace(',', ''), int),
                'mass_unit': Unit.kilogram,
            }

            # sanity check; in case no price/mass/export_zone
            if item['export_zone'] != '0' and item['valuation']['value'] != 0 and item['mass'] != 0:
                yield item

示例#12

0

显示文件

文件： normalize.py 项目： theHausdorffMetric/test

def field_mapping():
    return {
        'AIS Type':
        ignore_key('vessel AIS type'),
        'Flag': ('flag_name', None),
        'Destination':
        ignore_key('destination'),
        'ETA':
        ignore_key('eta'),
        'IMO / MMSI': ('mmsi', lambda x: x.partition('/')[2].strip()
                       if '/' in x else None),
        'Callsign': ('call_sign', None),
        'Length / Beam':
        ignore_key('length and beam, extracted from following'),
        'Current draught':
        ignore_key('current draught'),
        'Course / Speed':
        ignore_key('course / speed'),
        'Coordinates':
        ignore_key('coordinates'),
        'Last report':
        ignore_key('last report'),
        'IMO number': ('imo', None),
        'Vessel Name': ('name', None),
        'Ship type': ('type', None),
        'Homeport':
        ignore_key('homeport'),
        'Gross Tonnage': ('gross_tonnage', lambda x: try_apply(x, int)),
        'Summer Deadweight (t)': ('dead_weight', lambda x: try_apply(x, int)),
        'Length Overall (m)':
        ('length', lambda x: try_apply(x, _handle_empty_value, int)),
        'Beam (m)': ('beam', lambda x: try_apply(x, _handle_empty_value, int)),
        'Draught (m)':
        ignore_key('draught, no access'),
        'Year of Built': ('build_year', lambda x: try_apply(x, int)),
        'Builder':
        ignore_key('builder, no access'),
        'Place of Built':
        ignore_key('place of built, no access'),
        'Yard':
        ignore_key('yard, no access'),
        'TEU':
        ignore_key('teu'),
        'Crude':
        ignore_key('crude'),
        'Grain':
        ignore_key('grain'),
        'Bale':
        ignore_key('bale'),
        'Registered Owner':
        ignore_key('registered owner, no access'),
        'Manager':
        ignore_key('manager, no access'),
        'provider_name': ('provider_name', None),
    }

示例#13

0

显示文件

def cargo_mapping():
    return {
        'frghtNm': ('product', smart_split),
        # movement is indicated in last character of manifest number
        'mrnum': ('movement', lambda x: MOVEMENT_MAPPING.get(x[-1])),
        'msrmntUnitSe': ('volume_unit', lambda x: VOLUME_UNIT),
        'tkinSe': ('movement', lambda x: MOVEMENT_MAPPING.get(x)),
        # 'tnspotShapDc': ('product', None),
        'wt': ('volume', lambda x: try_apply(x, float, int, str)),
        'wtTon': ('volume', lambda x: try_apply(x, float, int, str)),
    }

示例#14

0

显示文件

def normalize_matching_date(raw_date):
    """Normalize arrival date.

    Date might appear be a range, pick the later date.
        - 2018/10/17
        - 30/07-01/08/13

    Examples:
        >>> normalize_matching_date('11-13/12/15')
        (None, '2015-12-11T00:00:00', '2015-12-13T00:00:00')
        >>> normalize_matching_date('24/01-01/02/14')
        (None, '2014-01-24T00:00:00', '2014-02-01T00:00:00')
        >>> normalize_matching_date(42359.0)
        ('2015-12-21T00:00:00', None, None)

    Args:
        raw_date (str):

    Returns:
        str: berthed
        str: arrival date
        str: departure date

    """
    if not raw_date:
        return None, None, None

    # xlrd date float
    if isinstance(raw_date, float):
        return _convert_xlrd_date(raw_date), None, None

    # date range
    else:
        _match = re.match(r'(\d{1,2})/?(\d{1,2})?-(\d{1,2}/\d{1,2}/\d{1,2})',
                          raw_date)
        if not _match:
            logger.error(f'Date pattern is invalid: {raw_date}')
            return None, None, None

        arrival_day, arrival_month, departure = _match.groups()
        arrival_day, arrival_month = try_apply(arrival_day, int), try_apply(
            arrival_month, int)

        departure = parse_date(departure, dayfirst=True)
        arrival = None
        if arrival_day and arrival_month:
            arrival = departure.replace(day=arrival_day, month=arrival_month)
        if arrival_day and not arrival_month:
            arrival = departure.replace(day=arrival_day)

        return None, arrival.isoformat(), departure.isoformat()

示例#15

0

显示文件

def parse_vessel_movement(row, reported_date, current_port, provider):
    if not MOVEMENT_MAPPING.get(row['m_movement']):
        return

    for product, quantity in zip(*parse_product(map_spelling(row['m_cargo']), row['m_qty'])):
        if product == 'LPG':
            for p in ['Butane', 'Propane']:
                yield {
                    'reported_date': to_isoformat(reported_date),
                    'arrival': normalize_date(reported_date, row['m_arrived'])
                    if may_strip(row['m_arrived'])
                    else None,
                    'berthed': normalize_date(reported_date, row['m_berthed'])
                    if may_strip(row['m_berthed'])
                    else None,
                    'departure': normalize_date(reported_date, row['m_sailed'])
                    if may_strip(row['m_sailed'])
                    else None,
                    'port_name': PORT_MAPPING.get(current_port, current_port),
                    'provider_name': provider,
                    'cargo': {
                        'product': p,
                        'movement': MOVEMENT_MAPPING.get(row['m_movement']),
                        'volume': try_apply(quantity, int, lambda x: x // 2, str),
                        'volume_unit': Unit.tons,
                    },
                    'vessel': {'name': row['m_vessel']},
                }
        else:
            yield {
                'reported_date': to_isoformat(reported_date),
                'arrival': normalize_date(reported_date, row['m_arrived'])
                if may_strip(row['m_arrived'])
                else None,  # noqa
                'berthed': normalize_date(reported_date, row['m_berthed'])
                if may_strip(row['m_berthed'])
                else None,  # noqa
                'departure': normalize_date(reported_date, row['m_sailed'])
                if may_strip(row['m_sailed'])
                else None,  # noqa
                'port_name': PORT_MAPPING.get(current_port, current_port),
                'provider_name': provider,
                'cargo': {
                    'product': product,
                    'movement': MOVEMENT_MAPPING.get(row['m_movement']),
                    'volume': try_apply(quantity, int, str),
                    'volume_unit': Unit.tons,
                },
                'vessel': {'name': row['m_vessel']},
            }

示例#16

0

显示文件

def process_item(raw_item):
    """Transform raw item into a usable event.

    Args:
        raw_item (Dict[str, str]):

    Yields:
        Dict[str, str]:

    """
    item = map_keys(raw_item, grades_mapping())

    # discard items without valid dates
    if not (item.get('eta') or item.get('arrival') or item.get('berthed')):
        logger.warning(
            f'Item has no valid portcall dates, discarding:\n{item}')
        return

    # discard items without vessel names
    vessel_name, _ = item.pop('vessel_name_and_charter_status')
    if not vessel_name:
        logger.warning(f'Item has no vessel name, discarding:\n{item}')
        return

    # build Vessel sub-model
    item['vessel'] = {'name': vessel_name, 'imo': item.pop('vessel_imo', None)}

    # build Cargo sub-model
    products = item.pop('cargo_product', [])
    movement = item.pop('cargo_movement', None)
    # NOTE assume equal volume split by number of products
    volume = item.pop('cargo_volume', None)

    for product in products:
        item['cargo'] = {
            'product':
            product,
            'movement':
            movement,
            'volume':
            (str(try_apply(volume, int, float) /
                 len(products)) if try_apply(volume, int, float) else None),
            'volume_unit':
            Unit.tons,
        }
        if movement and item.get('buyer_seller', None):
            player = 'seller' if movement == 'load' else 'buyer'
            item['cargo'].update({player: {'name': item.get('buyer_seller')}})
        item.pop('buyer_seller', None)
        yield item

示例#17

0

显示文件

def grades_mapping():
    return {
        'Arrived': ('arrival', normalize_pc_date),
        'AGENT':
        ignore_key('shipping agent is not required'),
        'Berthed': ('berthed', normalize_pc_date),
        'BL DD': ('berthed', normalize_pc_date),
        'CHARTERER':
        ignore_key('charterer is not required'),
        'COUNTRY OF DEST':
        ignore_key('not specific enough; we already have "NEXT PORT"'),
        'ETA': ('eta', normalize_pc_date),
        'ETB': ('berthed', normalize_pc_date),
        'ETS':
        ignore_key('not required'),
        'GRADE DETAIL': ('cargo_product', split_cargoes),
        'GRADE GROUP':
        ignore_key('we already have grade detail, so we can ignore this'),
        'IMO NR': ('vessel_imo', lambda x: try_apply(x, float, int, str)
                   if x else None),
        'LOAD POSITION':
        ignore_key('irrelevant'),
        'LOAD/DISCH': ('cargo_movement', lambda x: MOVEMENT_MAPPING.get(x)),
        'NEXT PORT':
        ignore_key('not required for PortCall for now'),
        'PORT': ('port_name', None),
        'PRE. PORT':
        ignore_key('not required'),
        'provider_name': ('provider_name', None),
        'QTT IN MT': ('cargo_volume', lambda x: try_apply(x, float, int)
                      if x else None),
        'region_name':
        ignore_key('not required'),
        'reported_date': ('reported_date', normalize_reported_date),
        'Sailed':
        ignore_key('not required'),
        'SHIPPERS/RECEIVERS': ('buyer_seller', lambda x: x.split('/')[-1]
                               if x else None),
        'STATUS':
        ignore_key('irrelevant'),
        'TERMINAL': ('installation', lambda x: x if x else None),
        'VESSEL': (
            'vessel_name_and_charter_status',
            # don't use the separator value
            lambda x: [
                may_strip(each) for idx, each in enumerate(x.partition('/'))
                if idx != 1
            ],
        ),
    }

示例#18

0

显示文件

def grades_mapping():
    return {
        'Date': ('departure', to_isoformat),
        'Dock': ('berth', None),
        'Vessel': ('vessel', lambda x: {'name': x}),
        'Charterer': ignore_key('charters spider'),
        'Grade': ('cargo_product', lambda x: PRODUCT_MAPPING.get(x.upper(), x)),
        'Qty Nominated': ('cargo_volume_nominated', lambda x: try_apply(x, float, int, str)),
        'Qty Loaded': ('cargo_volume', lambda x: try_apply(x, float, int, str)),
        'Loading Rate': ignore_key('irrelevant'),
        'Status': ignore_key('irrelevant'),
        'provider_name': ('provider_name', None),
        'reported_date': ('reported_date', None),
        'sheet_name': ('sheet_name', None),
    }

示例#19

0

显示文件

def grades_mapping():
    return {
        'ENTRY DATE':
        ignore_key('redundant reported date'),
        'WEEK ENDING':
        ignore_key('redundant'),
        'Sequence Number':
        ignore_key('redundant'),
        'VESSEL ENTRY NAME': ('vessel_name', lambda x: x
                              if x != 'STEAMER' else None),
        'COMMODITY LABEL':
        ignore_key('redundant'),
        'PORT OF ORIGIN': ('port_name', may_strip),
        'CODE ORG':
        ignore_key('redundant'),
        'ORG CODE':
        ignore_key('redundant'),
        'UNCTAD CODE LOAD':
        ignore_key('redundant'),
        'DESCRIPTION LOAD': (ignore_key('redundant arrival zone')),
        'DESTINATION':
        ignore_key('redundant'),
        'CODE (DES)':
        ignore_key('redundant'),
        'DES CODE':
        ignore_key('redundant'),
        'UNCTAD CODE D':
        ignore_key('redundant'),
        'DESCRIPTION DEST':
        ignore_key('redundant departure zone'),
        'VESSEL NAME':
        ignore_key('redundant'),
        'VESSEL YR':
        ignore_key('redundant'),
        'IMO': ('vessel_imo', lambda x: try_apply(x, int, str)),
        'TONNAGE': ('cargo_volume', normalize_cargo_volume),
        'SIZE':
        ignore_key('not a good estimate for dwt'),
        'COMMODITY': ('cargo_product', lambda x: x
                      if x not in IRRELEVANT_PRODUCTS else None),
        'MRI COMM CODE':
        ignore_key('redundant'),
        'COMM CODE':
        ignore_key('redundant'),
        'Dates':
        ignore_key('redundant'),
        'S DATE': ('eta', None),
        'CHARTER':
        ignore_key('redundant'),
        'TERMS':
        ignore_key('redundant'),
        'RATE':
        ignore_key('redundant'),
        'RATE A':
        ignore_key('redundant'),
        'provider_name': ('provider_name', None),
        'reported_date':
        ('reported_date',
         lambda x: dt.datetime.strptime(x, '%m%d%Y').isoformat()),
    }

示例#20

0

显示文件

文件： normalize.py 项目： theHausdorffMetric/test

def field_mapping():
    return {
        'Vessel Name': ('vessel_name', None),
        'Vessel Type': ('vessel_type', None),
        'IMO': ('vessel_imo', lambda x: try_apply(x, float, int, str)),
        'Length': ('vessel_length', None),
        'Dimensions': ('vessel_length', None),
        'DWT': ('vessel_dwt', None),
        'Berth Number': ('berth', None),
        'Draft': ignore_key('redundant'),
        'Last Port': ignore_key('redundant'),
        'OPS': ('cargo_movement', lambda x: x.lower()),
        'Operation': ('cargo_movement', lambda x: x.lower()),
        'Cargo Type': ('cargo_product', may_strip),
        'Qtty': ('cargo_volume', None),
        'Est. Qtty': ('cargo_volume', None),
        'A.T.A': ('arrival', None),
        'E.T.A': ('arrival', None),
        'A.T.B': ('berthed', None),
        'E.T.B': ('berthed', None),
        'A.T.D': ('departure', None),
        'E.T.D': ('departure', None),
        'provider_name': ('provider_name', None),
        'reported_date': ('reported_date', normalize_rptd_date),
        'port_name': ('port_name', normalize_port),
        'file_name': ('file_name', None),
    }

示例#21

0

显示文件

def portcall_mapping():
    return {
        'ETA':
        ignore_key('use more accurate ETB as ETA estimate instead'),
        'SHIPS': ('vessel_name', clean_vessel_name),
        'ETB': ('eta', None),
        'BERTH': ('berth', None),
        'LOA / BEAM':
        ('vessel_length', lambda x: try_apply(x.split('/')[0], float, int)),
        'TYPE': ('vessel_type', lambda x: None
                 if x in VESSEL_TYPE_BLACKLIST else x),
        'OPERATOR': ('shipping_agent', None),
        'LINE':
        ignore_key('irrelevant'),
        'CARGO': ('cargo_product', normalize_cargo_product),
        'QUANTITY': ('cargo_movement', normalize_cargo_movement),
        'PILOT':
        ignore_key('irrelevant'),
        'TUG':
        ignore_key('irrelevant'),
        'LAST PORT':
        ignore_key('previous port'),
        'NEXT PORT':
        ignore_key('next port'),
        'provider_name': ('provider_name', None),
        'reported_date': ('reported_date', None),
        'port_name': ('port_name', None),
        'month': ('month', None),
        'year': ('year', None),
    }

示例#22

0

显示文件

文件： normalize_movement.py 项目： theHausdorffMetric/test

def process_item(raw_item):
    """Process raw item, and normalize them.

    Args:
        raw_item (Dict[str, str]):

    Yields:
        Dict[str, str]:
    """
    item = map_keys(raw_item, field_mapping(), skip_missing=True)

    # fallback unto finished time if departure is null (analyst request)
    if not item.get('departure'):
        item['departure'] = item.get('finished', None)

    if not (item.get('eta') or item.get('berthed') or item.get('departure')):
        return

    seller = item.pop('shipper', None)
    item['cargo'] = {
        # product requested by coal analyst (Malay Trivedi)
        'product': 'Thermal Coal',
        'movement': 'load',
        'volume': try_apply(item.pop('cargo_volume', None), float, int, str),
        'volume_unit': Unit.tons,
        'seller': {
            'name': seller
        } if seller else None,
    }

    for col in ('finished', 'sheet_mode'):
        item.pop(col, None)

    return item

示例#23

0

显示文件

def portcall_mapping() -> Dict[str, Tuple[str, Optional[Callable]]]:
    return {
        '#':
        ignore_key('internal portcall ID'),
        'Arrival': ('eta', None),
        'Departure': ('departure', None),
        'Destination':
        ignore_key('next zone; discuss with analysts on accuracy and value'),
        'Expected Arrival': ('eta', None),
        'Flag':
        ignore_key('empty'),
        'GT': ('vessel_gt', None),
        'GT 100': ('vessel_gt', lambda x: int(x) * 100
                   if try_apply(x, int) else None),
        'Home Port':
        ignore_key('home port'),
        'Length': ('vessel_length', may_strip),
        'port_name': ('port_name', None),
        'provider_name': ('provider_name', None),
        'reported_date':
        ('reported_date', lambda x: to_isoformat(x, dayfirst=True)),
        'Situation':
        ignore_key('current vessel situation'),
        'Vessel': ('vessel_name', lambda x: may_strip(x).upper()
                   if x else None),
    }

示例#24

0

显示文件

文件： normalize.py 项目： theHausdorffMetric/test

def normalize_cargoes(raw_operation):
    """Extract cargo info from raw operation list.

    Args:
        raw_operation (List[Dict[str, str]]):

    Yields:
        Dict[str, str]:

    """
    for raw_cargo in raw_operation:
        movement = raw_cargo['nomoperacion']
        product = normalize_cargo_name(raw_cargo['mercancia'])
        volume = raw_cargo['toneladas']

        product_list = re.split(r'\sy\s|\+', product)
        if len(product_list) > 1:
            volume = float(volume) / len(product_list)

        for p in product_list:
            # only keep relevant movement and relevant product
            if (movement in MOVEMENT_MAPPING and product
                    and not any(alias in product.upper()
                                for alias in PRODUCT_BLACKLIST)):
                yield {
                    'product': p,
                    'movement': MOVEMENT_MAPPING[movement],
                    'volume': try_apply(volume, str),
                    'volume_unit': Unit.tons,
                }

示例#25

0

显示文件

def field_mapping():
    return {
        'Agency:':
        ignore_key('shipping agent'),
        'Berth:':
        ignore_key('shipping agent'),
        'Data arrivo in rada': ('arrival', normalize_date),
        'cargoes': ('cargoes', lambda x: [build_cargo(cargo) for cargo in x]),
        'Draft:':
        ignore_key('vessel draught'),
        'ETA': ('eta', normalize_date),
        'EDB:':
        ignore_key('estimated date of berthing'),
        'Flag:':
        ignore_key('vessel flag'),
        'GRT:': ('vessel_gross_tonnage', lambda x: x.replace(',', '')),
        'IMO:': ('vessel_imo', None),
        'Length:': ('vessel_length',
                    lambda x: try_apply(x.replace(',', ''), float, int, str)),
        'provider_name': ('provider_name', may_strip),
        'port_name': ('port_name', may_strip),
        'Receiver:': ('buyer', None),
        'reported_date': ('reported_date', None),
        'S.I.:':
        ignore_key('unknown'),
        'Ship:': ('vessel_name', may_strip),
    }

示例#26

0

显示文件

文件： normalize.py 项目： theHausdorffMetric/test

def field_mapping():
    return {
        'port_name': ('port_name', None),
        'provider_name': ('provider_name', None),
        'reported_date': ('reported_date', None),
        'VESSEL': ('vessel_name', lambda x: x.replace('M.V ', '')),
        'QUAY':
        ignore_key('quay number of portcall'),
        'IMP/EXP': ('cargo_movement', lambda x: CARGO_MOVEMENT_MAPPING.get(x)),
        'LOA': ('vessel_length',
                lambda x: try_apply(x.replace('M', ''), float, int, str)),
        'TYPE': ('is_container_vessel', lambda x: x == 'CONT'),
        'AGENT':
        ignore_key('shipping agent'),
        'CARRIER':
        ignore_key('carrier'),
        'TERMINAL':
        ignore_key('berth terminal'),
        'TNS': ('cargo_product', normalize_cargo),
        'ATD': ('departure', normalize_date),
        'ETD': ('departure', normalize_date),
        'ATB': ('berthed', normalize_date),
        'ETB': ('berthed', normalize_date),
        'ATA': ('arrival', normalize_date),
        'ETA': ('eta', normalize_date),
    }

示例#27

0

显示文件

文件： normalize.py 项目： theHausdorffMetric/test

def portcall_mapping():
    return {
        'B.NO.': ('berth', None),
        'SL.NO': (ignore_key('irrelevant')),
        'NAME OF VESSEL': ('vessel_name', normalize_vessel_name),
        'NAME OF THE VESSEL': ('vessel_name', normalize_vessel_name),
        'IND/': (ignore_key('irrelevant')),
        'LOA': ('vessel_loa', lambda x: try_apply(x, float, int)),
        'ETA': ('eta', None),
        'E.RT.TIME': ('eta_time', None),
        'DOA': ('eta', None),
        'RT.TIME': ('eta_time', None),
        'NOD(WAITING)': (ignore_key('irrelevant')),
        'ARRIVAL': ('arrival', None),
        'BERTHING': ('berthed', None),
        'CARGO': ('product', None),
        'AGENT': ('shipping_agent', None),
        'RECEIVER': (ignore_key('cannot be used for now')),
        'QTY.': ('volume', normalize_volume),
        'QTY': ('volume', normalize_volume),
        'DAY': (ignore_key('irrelevant')),
        'TOTAL': ('volume_total', normalize_volume),
        'BALANCE': ('volume_bal', normalize_volume),
        'ETD': ('departure', None),
        'REASONS FOR WAITING': (ignore_key('irrelevant')),
        'B.PREFER': (ignore_key('irrelevant')),
        'port_name': ('port_name', None),
        'provider_name': ('provider_name', None),
        'reported_date': ('reported_date', None),
    }

示例#28

0

显示文件

def process_item(raw_item):
    """Transform raw item into a usable event.
    Args:
        raw_item (Dict[str, str]):
    Yields:
        Dict[str, str]:
    """
    item = map_keys(raw_item, grades_mapping())

    # discard items without vessel names
    vessel_name = item.pop('vessel_name')
    if not vessel_name:
        logger.warning(f'Item has no vessel name, discarding:\n{item}')
        return

    # discard items without valid dates
    if not (item.get('arrival') or item.get('departure')):
        logger.warning(f'Item has no valid portcall dates, discarding:\n{item}')
        return

    # build Vessel sub-model
    item['vessel'] = {'name': vessel_name, 'gross_tonnage': item.pop('gross_tonnage', None)}

    # build Cargo sub-model
    products, volumes = get_products_and_volumes(item)
    movement = item.pop('cargo_movement', None)
    for product, volume in zip_longest(products, volumes):
        item['cargo'] = {
            'product': product,
            'movement': movement,
            'volume': try_apply(volume, float, int),
            'volume_unit': Unit.tons,
        }

        yield item

示例#29

0

显示文件

文件： normalize.py 项目： theHausdorffMetric/test

def field_mapping():
    return {
        # common to all tables
        'AGENT': (ignore_key('not used in model')),
        'CARGO': ('cargoes', lambda x: [{
            'product': x
        }] if x else []),
        'NAME OF SHIP': ('vessel_name', None),
        'Name of the ship / voyage': ('vessel_name', None),
        'Type of Cargo': ('cargoes', lambda x: [{
            'product': x
        }] if x else []),
        'Type of Cargo ': ('cargoes', lambda x: [{
            'product': x
        }] if x else []),
        # static info
        'event': ('event', None),
        'port_name': ('port_name', None),
        'provider_name': ('provider_name', None),
        'reported_date': ('reported_date', None),
        # for eta/arrival tables
        'DRAFT': (ignore_key('not used in model')),
        'ETA / ATA': ('matching_date', to_isoformat),
        'No': (ignore_key('not used in model')),
        # for berthed tables
        'ARR. TEMA': (ignore_key('not used in model')),
        'BOOKINGS': (ignore_key('not used in model')),
        'ETB/ATB': ('matching_date', to_isoformat),
        'ETD': (ignore_key('not used in model')),
        'L.O.A': ('vessel_length', lambda x: try_apply(x, float, int, str)),
        'SHIP STATUS': (ignore_key('not used in model')),
        'ST\'DORE': (ignore_key('not used in model')),
    }

示例#30

0

显示文件

文件： normalize.py 项目： theHausdorffMetric/test

def process_cargo(row):
    """Process cargo info given a table row.

    Not all rows contain cargo info however, and so this function is for extracting relevant
    cargo info from each row

    Args:
        row (list): table row as a list, with each element as a table cell

    Returns:
        cargo: Cargo item

    """
    cargo = {}
    section_number = row[-1]
    field_map_section = FIELD_MAPS[section_number]
    # if movement found, append relevant cargo info

    if try_apply(
            row[field_map_section['cargo_load']['volume']].replace(',', ''),
            float):
        volume = row[field_map_section['cargo_load']['volume']].replace(
            ',', '')
        product = may_strip(row[field_map_section['cargo_load']['product']])
        movement = 'load'
    elif try_apply(
            row[field_map_section['cargo_unload']['volume']].replace(',', ''),
            float):
        volume = row[field_map_section['cargo_unload']['volume']].replace(
            ',', '')
        product = may_strip(row[field_map_section['cargo_unload']['product']])
        movement = 'discharge'
    else:
        return None

    product_list = re.split('[\+\/]', product)
    volume = float(volume) / len(product_list) if len(
        product_list) > 1 else volume
    for prod in product_list:
        cargo = {
            'product': may_strip(prod),
            'volume': str(volume),
            'volume_unit': Unit.tons,
            'movement': movement,
        }

        yield cargo