Пример #1
0
def main():
    parser = ArgumentParser(description="Crawl frontier worker.")
    parser.add_argument(
        '--config',
        type=str,
        help='Settings module name, should be accessible by import.')
    parser.add_argument(
        '--hostname',
        type=str,
        help='Hostname or IP address to bind. Default is 127.0.0.1')
    parser.add_argument(
        '--log-level',
        '-L',
        type=str,
        default='INFO',
        help=
        'Log level, for ex. DEBUG, INFO, WARN, ERROR, FATAL. Default is INFO.')
    parser.add_argument(
        '--port',
        type=int,
        help=
        'Base port number, server will bind to 6 ports starting from base. Default is 5550'
    )
    args = parser.parse_args()

    settings = Settings(module=args.config)
    hostname = args.hostname if args.hostname else settings.get("ZMQ_HOSTNAME")
    port = args.port if args.port else settings.get("ZMQ_BASE_PORT")
    server = Server(hostname, port)
    server.logger.setLevel(args.log_level)
    server.start()
Пример #2
0
 def __init__(self, manager):
     self._manager = manager
     settings = Settings(attributes=manager.settings.attributes)
     messagebus = load_object(settings.get('MESSAGE_BUS'))
     self.mb = messagebus(settings)
     store_content = settings.get('STORE_CONTENT')
     self._encoder = Encoder(manager.request_model, send_body=store_content)
     self._decoder = Decoder(manager.request_model, manager.response_model)
     self.spider_log_producer = self.mb.spider_log().producer()
     spider_feed = self.mb.spider_feed()
     self.partition_id = settings.get('SPIDER_PARTITION_ID')
     self.consumer = spider_feed.consumer(partition_id=self.partition_id)
     self._get_timeout = float(settings.get('KAFKA_GET_TIMEOUT', 5.0))
     self._buffer = OverusedBuffer(self._get_next_requests,
                                   manager.logger.manager.debug)
 def __init__(self, manager):
     self._manager = manager
     settings = Settings(attributes=manager.settings.attributes)
     messagebus = load_object(settings.get('MESSAGE_BUS'))
     self.mb = messagebus(settings)
     store_content = settings.get('STORE_CONTENT')
     self._encoder = Encoder(manager.request_model, send_body=store_content)
     self._decoder = Decoder(manager.request_model, manager.response_model)
     self.spider_log_producer = self.mb.spider_log().producer()
     spider_feed = self.mb.spider_feed()
     self.partition_id = settings.get('SPIDER_PARTITION_ID')
     self.consumer = spider_feed.consumer(partition_id=self.partition_id)
     self._get_timeout = float(settings.get('KAFKA_GET_TIMEOUT', 5.0))
     self._buffer = OverusedBuffer(self._get_next_requests,
                                   manager.logger.manager.debug)
Пример #4
0
def main():
    parser = ArgumentParser(description="Crawl frontier worker.")
    parser.add_argument('--config', type=str,
                        help='Settings module name, should be accessible by import.')
    parser.add_argument('--hostname', type=str,
                        help='Hostname or IP address to bind. Default is 127.0.0.1')
    parser.add_argument('--log-level', '-L', type=str, default='INFO',
                        help='Log level, for ex. DEBUG, INFO, WARN, ERROR, FATAL. Default is INFO.')
    parser.add_argument('--port', type=int,
                        help='Base port number, server will bind to 6 ports starting from base. Default is 5550')
    args = parser.parse_args()

    settings = Settings(module=args.config)
    hostname = args.hostname if args.hostname else settings.get("ZMQ_HOSTNAME")
    port = args.port if args.port else settings.get("ZMQ_BASE_PORT")
    server = Server(hostname, port)
    server.logger.setLevel(args.log_level)
    server.start()
def test_override():
    s = Settings()
    assert s.get("SPIDER_FEED_PARTITIONS") == 2
def test_frontera():
    s = Settings()
    assert s.get("TEST_MODE") is not None
    assert s.get("MAX_REQUESTS") is not None
Пример #7
0
def test_override():
    s = Settings()
    assert s.get("SPIDER_FEED_PARTITIONS") == 2
Пример #8
0
def test_frontera():
    s = Settings()
    assert s.get("TEST_MODE") is not None
    assert s.get("MAX_REQUESTS") is not None