Пример #1
0
    def start_requests(self):
        """Entry point for Equasis spider."""

        # get all active vessels to be scraped
        self._requested_imos = list(v['imo'] for v in fetch_kpler_fleet(
            is_eligible=lambda v: v['status'] == 'Active', disable_cache=True))

        logger.info('Going to scrape for %d active vessels' %
                    len(self._requested_imos))
        yield self.parse_vessels(self._requested_imos)
Пример #2
0
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        # cross reference Kpler's fleet with Gibson's, and update our fleet
        self._fleet = tuple(
            v['imo']
            for v in fetch_kpler_fleet(lambda _: True, disable_cache=True))
        self.query = ' '.join((
            api.VesselsDB._sql_script('retrieve-records.sql'),
            f'WHERE EAGKplerVesselMain.IMONumber IN {repr(self._fleet)};',
        ))
Пример #3
0
    def get_fleet(self):
        """Build a list of vesssels given the information available, i.e.:


            - list of given ids (depending on they key matched)
            - entire internal fleet with 'EE' as a provider

        Returns:
            (list): List of vessels

        """
        if self.forced:
            given_keys = self.forced.split(',')
            self.logger.debug('scenario: forced list of {}: {}'.format(self.match_key, given_keys))
            for each in given_keys:
                yield each
        else:
            # by default
            self.logger.debug('scenario: default (no mmsi, internal fleet)')
            for i, vessel in enumerate(fetch_kpler_fleet(is_eligible)):
                if i < self.fleet_limit and vessel.get(self.match_key):
                    yield vessel[self.match_key]
Пример #4
0
    def __init__(self, token, api, query_by='imo', **kwargs):
        super().__init__(**kwargs)

        # used to only yield one position per vessel (imo being the id)
        self._cache = set()
        self.token = token

        self.api = api
        # crash on purpose if `api` is invalid
        self.mapper = MAPPERS[api]

        self.skip_validation = bool_flag(kwargs.get('skip_validation'))

        self.batch_size = int(kwargs.get('batch', FLEET_BATCH))
        self.query_by = query_by
        self.fleet = list(fetch_kpler_fleet(is_eligible))
        self.vessel_ids = kwargs.get(query_by) or reduce_fleet_on(query_by, self.fleet)
        if isinstance(self.vessel_ids, six.string_types):
            self.vessel_ids = self.vessel_ids.replace(' ', '').split(',')

        self.message_similarity = float(kwargs.get('message_similarity', MIN_MESSAGE_SIMILARITY))

        slice_size = kwargs.get('slice_size')
        if slice_size:
            self.logger.info(f"slicing scraping scope to {slice_size}")
            self.vessel_ids = self.divide_work(self.vessel_ids, int(slice_size))

        # TODO on `message type`, use since with last run date
        # self.forced_since = compute_since(minutes=int(since)) if since else None

        # filter/limit messages
        # we use `since` for cli because from user point of view it's the same thing.
        # But programmaticaly it's not the `since` used by Spire pagination. instead, it's
        # the filter available to the `messages` api
        self.window = kwargs.get('since')  # in minutes (default will be set by Spire to 3h)
        self.messages_limit = int(kwargs.get('limit', DEFAULT_MESSAGES_LIMIT))

        self.messages_counter = 0
Пример #5
0
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        # cross reference Kpler's fleet with Gibson's
        self._fleet = tuple(
            v['imo']
            for v in fetch_kpler_fleet(lambda _: True, disable_cache=True))
        self.query = ' '.join((
            api.VesselsDB._sql_script('retrieve-records.sql'),
            # prevent yielding of vessels we already have
            f'WHERE EAGKplerVesselMain.IMONumber NOT IN {repr(self._fleet)}',
            # take only active and under construction vessels
            'AND EAGKplerTradingStatus.tradingcategorycode IN ("New", "Live")',
            'AND EAGKplerVesselMain.vesseltypecode IN ("LNG", "LPG", "Offs", "Tank", "Chem", "Comb", "Bulk", "Gen", "Misc")',  # noqa
            # sanity check
            'AND EAGKplerVesselMain.DWT != ""',
            'AND EAGKplerVesselMain.GT != ""',
            'AND EAGKplerVesselMain.LOA != ""',
            'AND EAGKplerVesselMain.BeamMoulded != ""',
            'AND EAGKplerVesselMain.flagcode != ""',
            'AND EAGKplerVesselMain.builtyear != ""',
            # take only vessels from last year onwards
            f'AND cast(EAGKplerVesselMain.builtyear AS int) >= {dt.datetime.utcnow().year - 1}',
        ))
Пример #6
0
    def __init__(self,
                 fleet_name,
                 getkey,
                 setkey='',
                 removal='',
                 dry_run='',
                 read_only='',
                 imos=None):
        # validate cli
        if fleet_name not in list(api.FLEETS.keys()):
            raise InvalidCliRun("fleet_name", fleet_name)
        elif not setkey and not read_only:
            self.logger.error(
                "will try to sync fleet but no `setkey` was provided")
            raise InvalidCliRun("setkey", setkey)

        self.logger.debug("loading static vessels")
        # internal fleet is what we want to reflect on MT servers
        # load the vessels supposed to be tracked by MT
        if imos:
            # build the same layout of vessels as in our internal fleet list
            self.internal_fleet = [{
                'imo': imo,
                'status': 'Active'
            } for imo in imos.split(',')]
        else:
            self.internal_fleet = list(
                fetch_kpler_fleet(lambda v: fleet_name in v['providers']))

        self.read_only = read_only.lower() == 'true'
        self.dry_run = dry_run.lower() == 'true'
        self._allow_removal = removal.lower() == 'true'

        self.client = api.MTClient(fleet_name, getkey=getkey, setkey=setkey)

        self.tags = ['fleet:{}'.format(fleet_name.lower())]
Пример #7
0
 def test_fleet_selection_without_filter(self):
     # TODO ensure we're dealing with the mock dataset in debug mode
     for vessel in static_data.fetch_kpler_fleet(lambda x: True):
         self.assertTrue(vessel)
Пример #8
0
 def test_fleet_selection_not_eligible(self):
     fleet = static_data.fetch_kpler_fleet(lambda x: False)
     self.assertEqual(len(list(fleet)), 0)