def scrape(self) -> List[Any]: items = [] for iter in html.parse(self.config.source).xpath( self.config.xpath_root): try: title = iter.find(self.config.xpath_title) comments = iter.find(self.config.xpath_comments) if title is None or comments is None: continue name = self.__zeroblank(title.text) href = parse_url(title.get('href')) if not name or not href.host: continue info = comments.text_content() item = deadition(href.url, name) star = iter.find(self.config.xpath_starred) if star is not None: item.meta.ranking += 1 self.__available(item) self.__parseinfo(item, info) self.__ranking(item, href) self.__touched(item, 'ia') items.append(item) logger().debug('Scraped deadition %s', name) except: logger().debug('Ignoring deadition %s', name) return items
def _parse(self, params: List[str]) -> None: try: instance.config.read_file(open(params[0])) except: exit('Invalid config file {}'.format(params[0])) logger().info('Read config file %s', params[0])
def __init__(self) -> None: this = self.__class__.__name__ if not instance.config.has_section(this): raise(RuntimeError('Missing {} configuration section'.format(this))) self.config = dotdict(instance.config.items(this, True)) logger().debug('Initialized mode %s', this)
def _parse(self, params: List[str]) -> None: try: logger().setLevel(getLevelName(params[0])) except: exit('Invalid log level {}'.format(params[0])) instance.config.set('logging', 'level', params[0]) logger().info('Set log level to %s', params[0])
def _parse(self, params: List[str]) -> None: try: mode = mod('seacliff.modes.{}'.format( params[0])).__dict__[params[0]] except: exit('Invalid mode {}'.format(params[0])) instance.mode = mode logger().info('Set mode to %s', params[0])
def main(self) -> None: instance.config = ConfigParser() instance.config.read_dict(defaultconfig()) instance.mode = dummy try: logger().info('Started seacliff with pid %s', getpid()) for i in [i for i in argv if i.startswith('--')]: try: mod('seacliff.params.{}'.format( i[2:])).__dict__[i[2:]](argv) except: exit('Invalid parameter or argument to {}'.format(i[2:])) mode = instance.mode() mode.execute() except KeyboardInterrupt: print('\N{bomb}') except Exception as exception: logger().exception(exception) except SystemExit as exception: logger().critical(str(exception)) finally: logger().info('Stopped seacliff with pid %s', getpid())
def _parse(self, params: List[str]) -> None: try: instance.config.read_string(params[0]) except: exit('Invalid config string {}'.format(params[0])) logger().info('Read config string %s', params[0])
def __init__(self, params: List[str]) -> None: args = [] logger().debug('Parsing cli param %s', params.pop(0)[2:]) while params and not params[0].startswith('--'): args += [params.pop(0)] logger().debug('Passing cli args %s', str(args)) self._parse(args)
def __init__(self, configuration: Dict[str, str]) -> None: this = self.__class__.__name__ self.config = dotdict(configuration) logger().debug('Initialized scraper %s', this)