Пример #1
0
    def scrape(self) -> List[Any]:
        items = []

        for iter in html.parse(self.config.source).xpath(
                self.config.xpath_root):
            try:
                title = iter.find(self.config.xpath_title)
                comments = iter.find(self.config.xpath_comments)
                if title is None or comments is None: continue

                name = self.__zeroblank(title.text)
                href = parse_url(title.get('href'))
                if not name or not href.host: continue

                info = comments.text_content()
                item = deadition(href.url, name)
                star = iter.find(self.config.xpath_starred)
                if star is not None: item.meta.ranking += 1

                self.__available(item)
                self.__parseinfo(item, info)
                self.__ranking(item, href)
                self.__touched(item, 'ia')

                items.append(item)
                logger().debug('Scraped deadition %s', name)
            except:
                logger().debug('Ignoring deadition %s', name)

        return items
Пример #2
0
    def _parse(self, params: List[str]) -> None:
        try:
            instance.config.read_file(open(params[0]))
        except:
            exit('Invalid config file {}'.format(params[0]))

        logger().info('Read config file %s', params[0])
Пример #3
0
  def __init__(self) -> None:
    this = self.__class__.__name__
    if not instance.config.has_section(this):
      raise(RuntimeError('Missing {} configuration section'.format(this)))

    self.config = dotdict(instance.config.items(this, True))
    logger().debug('Initialized mode %s', this)
Пример #4
0
    def _parse(self, params: List[str]) -> None:
        try:
            logger().setLevel(getLevelName(params[0]))
        except:
            exit('Invalid log level {}'.format(params[0]))

        instance.config.set('logging', 'level', params[0])
        logger().info('Set log level to %s', params[0])
Пример #5
0
    def _parse(self, params: List[str]) -> None:
        try:
            mode = mod('seacliff.modes.{}'.format(
                params[0])).__dict__[params[0]]
        except:
            exit('Invalid mode {}'.format(params[0]))

        instance.mode = mode
        logger().info('Set mode to %s', params[0])
Пример #6
0
    def main(self) -> None:
        instance.config = ConfigParser()
        instance.config.read_dict(defaultconfig())
        instance.mode = dummy

        try:
            logger().info('Started seacliff with pid %s', getpid())

            for i in [i for i in argv if i.startswith('--')]:
                try:
                    mod('seacliff.params.{}'.format(
                        i[2:])).__dict__[i[2:]](argv)
                except:
                    exit('Invalid parameter or argument to {}'.format(i[2:]))

            mode = instance.mode()
            mode.execute()

        except KeyboardInterrupt:
            print('\N{bomb}')
        except Exception as exception:
            logger().exception(exception)
        except SystemExit as exception:
            logger().critical(str(exception))

        finally:
            logger().info('Stopped seacliff with pid %s', getpid())
Пример #7
0
  def _parse(self, params: List[str]) -> None:
    try: instance.config.read_string(params[0])
    except: exit('Invalid config string {}'.format(params[0]))

    logger().info('Read config string %s', params[0])
Пример #8
0
 def __init__(self, params: List[str]) -> None:
   args = []
   logger().debug('Parsing cli param %s', params.pop(0)[2:])
   while params and not params[0].startswith('--'): args += [params.pop(0)]
   logger().debug('Passing cli args %s', str(args))
   self._parse(args)
Пример #9
0
 def __init__(self, configuration: Dict[str, str]) -> None:
   this = self.__class__.__name__
   self.config = dotdict(configuration)
   logger().debug('Initialized scraper %s', this)