def main(): parser = ArgumentParser(description="Crawl frontier worker.") parser.add_argument( '--config', type=str, help='Settings module name, should be accessible by import.') parser.add_argument( '--hostname', type=str, help='Hostname or IP address to bind. Default is 127.0.0.1') parser.add_argument( '--log-level', '-L', type=str, default='INFO', help= 'Log level, for ex. DEBUG, INFO, WARN, ERROR, FATAL. Default is INFO.') parser.add_argument( '--port', type=int, help= 'Base port number, server will bind to 6 ports starting from base. Default is 5550' ) args = parser.parse_args() settings = Settings(module=args.config) hostname = args.hostname if args.hostname else settings.get("ZMQ_HOSTNAME") port = args.port if args.port else settings.get("ZMQ_BASE_PORT") server = Server(hostname, port) server.logger.setLevel(args.log_level) server.start()
def __init__(self, manager): self._manager = manager settings = Settings(attributes=manager.settings.attributes) messagebus = load_object(settings.get('MESSAGE_BUS')) self.mb = messagebus(settings) store_content = settings.get('STORE_CONTENT') self._encoder = Encoder(manager.request_model, send_body=store_content) self._decoder = Decoder(manager.request_model, manager.response_model) self.spider_log_producer = self.mb.spider_log().producer() spider_feed = self.mb.spider_feed() self.partition_id = settings.get('SPIDER_PARTITION_ID') self.consumer = spider_feed.consumer(partition_id=self.partition_id) self._get_timeout = float(settings.get('KAFKA_GET_TIMEOUT', 5.0)) self._buffer = OverusedBuffer(self._get_next_requests, manager.logger.manager.debug)
def __init__(self, manager): self._manager = manager settings = Settings(attributes=manager.settings.attributes) messagebus = load_object(settings.get('MESSAGE_BUS')) self.mb = messagebus(settings) store_content = settings.get('STORE_CONTENT') self._encoder = Encoder(manager.request_model, send_body=store_content) self._decoder = Decoder(manager.request_model, manager.response_model) self.spider_log_producer = self.mb.spider_log().producer() spider_feed = self.mb.spider_feed() self.partition_id = settings.get('SPIDER_PARTITION_ID') self.consumer = spider_feed.consumer(partition_id=self.partition_id) self._get_timeout = float(settings.get('KAFKA_GET_TIMEOUT', 5.0)) self._buffer = OverusedBuffer(self._get_next_requests, manager.logger.manager.debug)
def main(): parser = ArgumentParser(description="Crawl frontier worker.") parser.add_argument('--config', type=str, help='Settings module name, should be accessible by import.') parser.add_argument('--hostname', type=str, help='Hostname or IP address to bind. Default is 127.0.0.1') parser.add_argument('--log-level', '-L', type=str, default='INFO', help='Log level, for ex. DEBUG, INFO, WARN, ERROR, FATAL. Default is INFO.') parser.add_argument('--port', type=int, help='Base port number, server will bind to 6 ports starting from base. Default is 5550') args = parser.parse_args() settings = Settings(module=args.config) hostname = args.hostname if args.hostname else settings.get("ZMQ_HOSTNAME") port = args.port if args.port else settings.get("ZMQ_BASE_PORT") server = Server(hostname, port) server.logger.setLevel(args.log_level) server.start()
def test_override(): s = Settings() assert s.get("SPIDER_FEED_PARTITIONS") == 2
def test_frontera(): s = Settings() assert s.get("TEST_MODE") is not None assert s.get("MAX_REQUESTS") is not None
def test_override(): s = Settings() assert s.get("SPIDER_FEED_PARTITIONS") == 2
def test_frontera(): s = Settings() assert s.get("TEST_MODE") is not None assert s.get("MAX_REQUESTS") is not None