Пример #1
0
    def reelect_app(self, request, app):
        """tries to connect to the same app on differnet host from dist-info"""

        # disconnect app explicitly to break possibly existing connection
        app.disconnect()
        endpoints_size = len(app.locator.endpoints)

        # try x times, where x is the number of different endpoints in app locator.
        for _ in xrange(0, endpoints_size + 1):
            # last chance to take app from common pool
            if len(app.locator.endpoints) == 0:
                request.logger.info(
                    "giving up on connecting to dist-info hosts, falling back to common pool processing"
                )
                app = yield self.proxy.reelect_app(request, app)
                raise gen.Return(app)

            try:
                # always create new locator to prevent locking as we do connect with timeout
                # however lock can be still held during TCP timeout
                locator = Locator(endpoints=app.locator.endpoints)
                request.logger.info("connecting to locator %s",
                                    locator.endpoints[0])

                # first try to connect to locator only on remote host with timeout
                yield gen.with_timeout(self.service_connect_timeout,
                                       locator.connect())
                request.logger.debug("connected to locator %s for %s",
                                     locator.endpoints[0], app.name)
                app = Service(app.name,
                              locator=locator,
                              timeout=RESOLVE_TIMEOUT)

                # try to resolve and connect to application itself
                yield gen.with_timeout(self.service_connect_timeout,
                                       app.connect())
                request.logger.debug("connected to application %s via %s",
                                     app.name, app.endpoints)
            except gen.TimeoutError:
                # on timeout try next endpoint first
                request.logger.warning(
                    "timed out while connecting to application")
                continue
            except ServiceError as err:
                request.logger.warning("got error while resolving app - %s",
                                       err)
                if err.category in LOCATORCATEGORY and err.code == ESERVICENOTAVAILABLE:
                    # if the application is down - also try next endpoint
                    continue
                else:
                    raise err
            finally:
                # drop first endpoint to start next connection from different endpoint
                # we do this, as default logic of connection attempts in locator do not fit here
                app.locator.endpoints = app.locator.endpoints[1:]
            # return connected app
            raise gen.Return(app)
        raise PluginApplicationError(42, 42,
                                     "could not connect to application")
def test_locator():
    io = IOLoop.current()
    locator = Locator(endpoints=[["localhost", 10053]], io_loop=io)
    chan = io.run_sync(lambda: locator.resolve("storage"))
    endpoint, version, api = io.run_sync(chan.rx.get, timeout=4)
    assert version == 1, "invalid version number %s" % version
    assert isinstance(endpoint, (list, tuple)), "invalid endpoint type %s" % type(endpoint)
    assert isinstance(api, dict)
Пример #3
0
def test_locator():
    io = CocaineIO.instance()
    locator = Locator("localhost", 10053, loop=io)
    chan = locator.resolve("storage").wait(4)
    endpoint, version, api = chan.rx.get().wait(1)
    assert version == 1, "invalid version number %s" % version
    assert isinstance(endpoint, (list, tuple)), "invalid endpoint type %s" % type(endpoint)
    assert isinstance(api, dict)
Пример #4
0
def test_locator():
    io = IOLoop.current()
    locator = Locator(endpoints=[["localhost", 10053]], io_loop=io)
    chan = io.run_sync(lambda: locator.resolve("storage"))
    endpoint, version, api = io.run_sync(chan.rx.get, timeout=4)
    assert version == 1, "invalid version number %s" % version
    assert isinstance(
        endpoint, (list, tuple)), "invalid endpoint type %s" % type(endpoint)
    assert isinstance(api, dict)
Пример #5
0
def test_locate():
    locator = Locator()
    res = io.run_sync(common.Locate(locator, "locator").execute, timeout=2)
    assert isinstance(res, dict)
    assert "api" in res
    assert "version" in res
    assert "endpoints" in res
Пример #6
0
def test_on_close():
    io = IOLoop.current()
    locator = Locator(endpoints=[["localhost", 10053]], io_loop=io)
    locator.disconnect()

    locator = Locator(endpoints=[["localhost", 10053]], io_loop=io)
    io.run_sync(locator.connect)
    io.run_sync(locator.connect)
    locator.disconnect()
Пример #7
0
def test_on_close():
    io = CocaineIO.instance()
    locator = Locator("localhost", 10053, loop=io)
    locator.disconnect()

    locator = Locator("localhost", 10053, loop=io)
    locator.connect().wait(4)
    locator.connect().wait(4)
    locator.disconnect()
Пример #8
0
 def create_service(self, name):
     if name not in self._cache:
         if name == 'locator':
             service = Locator(endpoints=self._endpoints)
         else:
             service = Service(name, endpoints=self._endpoints)
         self._cache[name] = service
     return self._cache[name]
Пример #9
0
    def __init__(self, locators=("localhost:10053",),
                 cache=DEFAULT_SERVICE_CACHE_COUNT,
                 request_id_header="", sticky_header="X-Cocaine-Sticky",
                 forcegen_request_header=False,
                 default_tracing_chance=DEFAULT_TRACING_CHANCE,
                 configuration_service="unicorn",
                 tracing_conf_path="/zipkin_sampling",
                 ioloop=None, **config):
        # stats
        self.requests_in_progress = 0
        self.requests_disconnections = 0
        self.requests_total = 0

        self.io_loop = ioloop or tornado.ioloop.IOLoop.current()
        self.service_cache_count = cache
        self.spool_size = int(self.service_cache_count * 1.5)
        self.refresh_period = config.get("refresh_timeout", DEFAULT_REFRESH_PERIOD)
        self.timeouts = config.get("timeouts", {})
        self.locator_endpoints = [parse_locators_endpoints(i) for i in locators]
        # it's initialized after start
        # to avoid an io_loop creation before fork
        self.locator = Locator(endpoints=self.locator_endpoints)
        # it's used to reply on `ping` method
        self.locator_status = False

        # active applications
        self.cache = collections.defaultdict(list)

        self.logger = logging.getLogger("cocaine.proxy.general")
        self.access_log = logging.getLogger("cocaine.proxy.access")
        self.access_log.propagate = False
        self.logger.info("locators %s",
                         ','.join("%s:%d" % (h, p) for h, p in self.locator_endpoints))

        self.sticky_header = sticky_header

        self.logger.info("conf path in `%s` configuration service: %s",
                         configuration_service, tracing_conf_path)
        self.unicorn = Service(configuration_service, locator=self.locator)
        self.sampled_apps = {}
        self.default_tracing_chance = default_tracing_chance
        self.tracing_conf_path = tracing_conf_path

        self.io_loop.add_future(self.on_sampling_updates(),
                                lambda x: self.logger.error("the sample updater must not exit"))

        if request_id_header:
            self.get_request_id = functools.partial(get_request_id, request_id_header,
                                                    force=forcegen_request_header)
        else:
            self.get_request_id = generate_request_id

        # post the watcher for routing groups
        self.io_loop.add_future(self.on_routing_groups_update(),
                                lambda x: self.logger.error("the updater must not exit"))
        # run infinity check locator health status
        self.locator_health_check()
Пример #10
0
    def reelect_app(self, request, app):
        """tries to connect to the same app on differnet host from dist-info"""

        # store current endpoints of locator
        locator_endpoints = app.locator.endpoints

        # disconnect app explicitly to break possibly existing connection
        app.disconnect()
        endpoints_size = len(locator_endpoints)

        # try x times, where x is the number of different endpoints in app locator.
        for _ in xrange(0, endpoints_size):
            try:
                # move first endpoint to the end to start new connection from different endpoint
                # we do this, as default logic of connection attempts in locator do not fit here
                locator_endpoints = locator_endpoints[1:] + locator_endpoints[:1]

                # always create new locator to prevent locking as we do connect with timeout
                # however lock can be still held during TCP timeout
                locator = Locator(endpoints=locator_endpoints)
                request.logger.info("connecting to locator %s", locator.endpoints[0])

                # first try to connect to locator only on remote host with timeout
                yield gen.with_timeout(self.service_connect_timeout, locator.connect())
                request.logger.debug("connected to locator %s for %s", locator.endpoints[0], app.name)
                app = Service(app.name, locator=locator, timeout=RESOLVE_TIMEOUT)

                # try to resolve and connect to application itself
                yield gen.with_timeout(self.service_connect_timeout, app.connect())
                request.logger.debug("connected to application %s via %s", app.name, app.endpoints)
            except gen.TimeoutError:
                # on timeout try next endpoint first
                request.logger.warning("timed out while connecting to application")
                continue
            except ServiceError as err:
                request.logger.warning("got error while resolving app - %s", err)
                if err.category in LOCATORCATEGORY and err.code == ESERVICENOTAVAILABLE:
                    # if the application is down - also try next endpoint
                    continue
                else:
                    raise err
            # return connected app
            raise gen.Return(app)
        raise PluginApplicationError(42, 42, "could not connect to application")
Пример #11
0
 def locator(self):
     if self._locator:
         return self._locator
     else:
         try:
             locator = Locator(endpoints=self.endpoints)
             self._locator = locator
             return locator
         except Exception as err:
             raise ToolsError(err)
Пример #12
0
def test_on_close():
    io = IOLoop.current()
    locator = Locator(endpoints=[["localhost", 10053]], io_loop=io)
    locator.disconnect()

    locator = Locator(endpoints=[["localhost", 10053]], io_loop=io)
    io.run_sync(locator.connect)
    io.run_sync(locator.connect)
    locator.disconnect()
Пример #13
0
    def process(self, request):
        mds_request_headers = httputil.HTTPHeaders()
        if "Authorization" in request.headers:
            mds_request_headers["Authorization"] = request.headers[
                "Authorization"]

        traceid = getattr(request, "traceid", None)
        if traceid is not None:
            mds_request_headers["X-Request-Id"] = traceid

        key = request.headers["X-Srw-Key"]

        name, event = extract_app_and_event(request)
        self.proxy.setup_tracing(request, name)
        timeout = self.proxy.get_timeout(name, event)
        name = self.proxy.resolve_group_to_version(name)
        if self.is_stid_request(request):
            url = "%s/gate/dist-info/%s?primary-only" % (
                self.dist_info_endpoint, key)
            request.logger.debug(
                "fetching endpoints via mulcagate dist-info - %s", url)
            srw_request = HTTPRequest(url,
                                      method="GET",
                                      headers=mds_request_headers,
                                      allow_ipv6=True,
                                      request_timeout=timeout)
        else:
            url = "%s/dist-info-%s/%s" % (self.mds_dist_info_endpoint,
                                          request.headers["X-Srw-Namespace"],
                                          key)
            request.logger.debug("fetching endpoints via mds dist-info - %s",
                                 url)
            srw_request = HTTPRequest(url,
                                      method="GET",
                                      headers=mds_request_headers,
                                      allow_ipv6=True,
                                      request_timeout=timeout)

        endpoints = yield self.fetch_mds_endpoints(request, srw_request)
        locator = Locator(endpoints=endpoints)
        app = Service(name, locator=locator, timeout=RESOLVE_TIMEOUT)
        request.logger.info("connecting to app %s", name)
        app = yield self.reelect_app(request, app)
        # TODO: attempts should be configurable
        yield self.proxy.process(request, name, app, event,
                                 pack_httprequest(request), self.reelect_app,
                                 4, timeout)
Пример #14
0
    def __init__(self, locators=("localhost:10053",),
                 cache=DEFAULT_SERVICE_CACHE_COUNT,
                 request_id_header="", sticky_header="X-Cocaine-Sticky",
                 ioloop=None, **config):
        # stats
        self.requests_in_progress = 0
        self.requests_disconnections = 0
        self.requests_total = 0

        self.io_loop = ioloop or tornado.ioloop.IOLoop.current()
        self.serviceCacheCount = cache
        self.spoolSize = int(self.serviceCacheCount * 1.5)
        self.refreshPeriod = config.get("refresh_timeout", DEFAULT_REFRESH_PERIOD)
        self.timeouts = config.get("timeouts", {})
        self.locator_endpoints = map(parse_locators_endpoints, locators)
        # it's initialized after start
        # to avoid an io_loop creation before fork
        self.locator = Locator(endpoints=self.locator_endpoints)

        # active applications
        self.cache = collections.defaultdict(list)

        self.logger = ContextAdapter(logging.getLogger("cocaine.proxy"), {"id": "0" * 16})
        self.tracking_logger = logging.getLogger("cocaine.proxy.tracking")
        self.logger.info("locators %s", ','.join("%s:%d" % (h, p) for h, p in self.locator_endpoints))

        self.sticky_header = sticky_header

        if request_id_header:
            self.get_request_id = functools.partial(get_request_id, request_id_header)
        else:
            self.get_request_id = generate_request_id

        # post the watcher for routing groups
        self.io_loop.add_future(self.on_routing_groups_update(),
                                lambda x: self.logger.error("the updater must not exit"))
Пример #15
0
    def __init__(self, locators=("localhost:10053",),
                 cache=DEFAULT_SERVICE_CACHE_COUNT,
                 request_id_header="", sticky_header="X-Cocaine-Sticky",
                 forcegen_request_header=False,
                 default_tracing_chance=DEFAULT_TRACING_CHANCE,
                 configuration_service="unicorn",
                 client_id=0,
                 client_secret='',
                 mapped_headers=[],
                 tracing_conf_path="/zipkin_sampling",
                 timeouts_conf_path="/proxy_apps_timeouts",
                 srw_config=None,
                 allow_json_rpc=True,
                 ioloop=None, **config):
        # stats
        self.requests_in_progress = 0
        self.requests_disconnections = 0
        self.requests_total = 0

        self.io_loop = ioloop or tornado.ioloop.IOLoop.current()
        self.service_cache_count = cache
        self.spool_size = int(self.service_cache_count * 1.5)
        self.refresh_period = config.get("refresh_timeout", DEFAULT_REFRESH_PERIOD)
        self.locator_endpoints = [parse_locators_endpoints(i) for i in locators]
        # it's initialized after start
        # to avoid an io_loop creation before fork
        self.locator = Locator(endpoints=self.locator_endpoints)
        # it's used to reply on `ping` method
        self.locator_status = False

        # active applications
        self.cache = collections.defaultdict(list)
        # routing groups from Locator service
        self.current_rg = {}

        self.logger = logging.getLogger("cocaine.proxy.general")
        self.access_log = logging.getLogger("cocaine.proxy.access")
        self.access_log.propagate = False
        self.logger.info("locators %s",
                         ','.join("%s:%d" % (h, p) for h, p in self.locator_endpoints))

        self.sticky_header = sticky_header
        self.mapped_headers = mapped_headers
        self.logger.info("mapping headers - %s", str(self.mapped_headers))

        self.plugins = []
        if srw_config:
            for config in srw_config:
                name, cfg = config["type"], config["args"]
                self.logger.info("initialize plugin %s", name)
                self.plugins.append(load_plugin(name, self, cfg))

        if allow_json_rpc:
            self.plugins.append(load_plugin('cocaine.proxy.jsonrpc.JSONRPC', self, {}))

        self.logger.info("conf path in `%s` configuration service: %s",
                         configuration_service, tracing_conf_path)
        repo = PooledServiceFactory(self.locator_endpoints)
        repo.secure = TVM(repo, client_id, client_secret)

        if client_id == 0 or client_secret == '':
            self.logger.info("using non-authenticated unicorn access")
            self.unicorn = repo.create_service(configuration_service)
        else:
            self.logger.info("using authenticated unicorn access")
            self.unicorn = repo.create_secure_service(configuration_service)
        self.sampled_apps = {}
        self.default_tracing_chance = default_tracing_chance
        self.tracing_conf_path = tracing_conf_path

        self.io_loop.add_future(self.on_sampling_updates(),
                                lambda x: self.logger.error("the sample updater must not exit"))

        self.timeouts_conf_path = timeouts_conf_path
        self.timeouts = {}
        self.io_loop.add_future(self.on_timeouts_updates(),
                                lambda x: self.logger.error("the timeouts updater must not exit"))

        if request_id_header:
            self.get_request_id = functools.partial(get_request_id, request_id_header,
                                                    force=forcegen_request_header)
        else:
            self.get_request_id = generate_request_id

        # post the watcher for routing groups
        self.io_loop.add_future(self.on_routing_groups_update(),
                                lambda x: self.logger.error("the updater must not exit"))
        # run infinity check locator health status
        self.locator_health_check()
Пример #16
0
class CocaineProxy(object):
    def __init__(self, locators=("localhost:10053",),
                 cache=DEFAULT_SERVICE_CACHE_COUNT,
                 request_id_header="", sticky_header="X-Cocaine-Sticky",
                 forcegen_request_header=False,
                 default_tracing_chance=DEFAULT_TRACING_CHANCE,
                 configuration_service="unicorn",
                 client_id=0,
                 client_secret='',
                 mapped_headers=[],
                 tracing_conf_path="/zipkin_sampling",
                 timeouts_conf_path="/proxy_apps_timeouts",
                 srw_config=None,
                 allow_json_rpc=True,
                 ioloop=None, **config):
        # stats
        self.requests_in_progress = 0
        self.requests_disconnections = 0
        self.requests_total = 0

        self.io_loop = ioloop or tornado.ioloop.IOLoop.current()
        self.service_cache_count = cache
        self.spool_size = int(self.service_cache_count * 1.5)
        self.refresh_period = config.get("refresh_timeout", DEFAULT_REFRESH_PERIOD)
        self.locator_endpoints = [parse_locators_endpoints(i) for i in locators]
        # it's initialized after start
        # to avoid an io_loop creation before fork
        self.locator = Locator(endpoints=self.locator_endpoints)
        # it's used to reply on `ping` method
        self.locator_status = False

        # active applications
        self.cache = collections.defaultdict(list)
        # routing groups from Locator service
        self.current_rg = {}

        self.logger = logging.getLogger("cocaine.proxy.general")
        self.access_log = logging.getLogger("cocaine.proxy.access")
        self.access_log.propagate = False
        self.logger.info("locators %s",
                         ','.join("%s:%d" % (h, p) for h, p in self.locator_endpoints))

        self.sticky_header = sticky_header
        self.mapped_headers = mapped_headers
        self.logger.info("mapping headers - %s", str(self.mapped_headers))

        self.plugins = []
        if srw_config:
            for config in srw_config:
                name, cfg = config["type"], config["args"]
                self.logger.info("initialize plugin %s", name)
                self.plugins.append(load_plugin(name, self, cfg))

        if allow_json_rpc:
            self.plugins.append(load_plugin('cocaine.proxy.jsonrpc.JSONRPC', self, {}))

        self.logger.info("conf path in `%s` configuration service: %s",
                         configuration_service, tracing_conf_path)
        repo = PooledServiceFactory(self.locator_endpoints)
        repo.secure = TVM(repo, client_id, client_secret)

        if client_id == 0 or client_secret == '':
            self.logger.info("using non-authenticated unicorn access")
            self.unicorn = repo.create_service(configuration_service)
        else:
            self.logger.info("using authenticated unicorn access")
            self.unicorn = repo.create_secure_service(configuration_service)
        self.sampled_apps = {}
        self.default_tracing_chance = default_tracing_chance
        self.tracing_conf_path = tracing_conf_path

        self.io_loop.add_future(self.on_sampling_updates(),
                                lambda x: self.logger.error("the sample updater must not exit"))

        self.timeouts_conf_path = timeouts_conf_path
        self.timeouts = {}
        self.io_loop.add_future(self.on_timeouts_updates(),
                                lambda x: self.logger.error("the timeouts updater must not exit"))

        if request_id_header:
            self.get_request_id = functools.partial(get_request_id, request_id_header,
                                                    force=forcegen_request_header)
        else:
            self.get_request_id = generate_request_id

        # post the watcher for routing groups
        self.io_loop.add_future(self.on_routing_groups_update(),
                                lambda x: self.logger.error("the updater must not exit"))
        # run infinity check locator health status
        self.locator_health_check()

    @gen.coroutine
    def locator_health_check(self, period=5):
        wait_timeot = datetime.timedelta(seconds=period)
        while True:
            try:
                self.logger.debug("check health status of locator via cluster method")
                channel = yield gen.with_timeout(wait_timeot, self.locator.cluster())
                cluster = yield gen.with_timeout(wait_timeot, channel.rx.get())
                self.locator_status = True
                self.logger.debug("dumped cluster %s", cluster)
                yield gen.sleep(period)
            except Exception as err:
                self.logger.error("health status check failed: %s", err)
                self.locator_status = False
                yield gen.sleep(1)

    @gen.coroutine
    def on_routing_groups_update(self):
        uid = gen_uid()
        self.logger.info("generate new unique id %s", uid)
        maximum_timeout = 32  # sec
        timeout = 1  # sec
        while True:
            self.current_rg = {}
            try:
                self.logger.info("subscribe to updates with id %s", uid)
                channel = yield self.locator.routing(uid, True)
                timeout = 1
                while True:
                    new = yield channel.rx.get()
                    if isinstance(new, EmptyResponse):
                        # it means that the cocaine has been stopped
                        self.logger.error("locator sends close")
                        break
                    updates = scan_for_updates(self.current_rg, new)
                    # replace current
                    self.current_rg = new
                    if len(updates) == 0:
                        self.logger.info("locator sends an update message, "
                                         "but no updates have been found")
                        continue

                    self.logger.info("%d routing groups have been refreshed %s",
                                     len(updates), updates)
                    for group in updates:
                        # if we have not created an instance of
                        # the group it is absent in cache
                        if group not in self.cache:
                            self.logger.debug("nothing to update in group %s", group)
                            continue

                        for app in self.cache[group]:
                            self.logger.debug("%s: move %s to the inactive queue to refresh"
                                              " routing group", app.id, app.name)
                            self.migrate_from_cache_to_inactive(app, group)
            except Exception as err:
                timeout = min(timeout << 1, maximum_timeout)
                self.logger.error("error occurred while watching for group updates %s. Sleep %d",
                                  err, timeout)
                yield gen.sleep(timeout)

    @gen.coroutine
    def watch_app(self, name, path):
        version = 0
        self.sampled_apps[name] = self.default_tracing_chance
        try:
            self.logger.info("start watching for sampling updates of %s", name)
            watch_channel = yield self.unicorn.subscribe(path, version)
            while True:
                value, version = yield watch_channel.rx.get()
                self.logger.info("got sampling updates for %s: version %d value %.2f", name, version, value)
                try:
                    weight = float(value)
                    self.sampled_apps[name] = weight
                except ValueError as err:
                    self.logger.error("sample value %s for %s can NOT be converted: %s. Use %f",
                                      value, name, err, self.default_tracing_chance)
                    self.sampled_apps[name] = self.default_tracing_chance
        except ServiceError as err:
            # verify that the err is `zookeeper: no node [-101]``
            if err.code != -101:
                self.logger.error("watching of `%s` raised an unexpected service error (cat. %d): %s", name, err.category, err)
        except Exception as err:
            self.logger.error("watching of %s error: %s", name, err)
        finally:
            self.logger.info("stop watching for sampling updates of %s", name)
            self.sampled_apps.pop(name, None)
            try:
                watch_channel.tx.close()
            except Exception:
                pass

    @gen.coroutine
    def on_sampling_updates(self):
        maximum_timeout = 32  # sec
        timeout = 1  # sec
        listing_version = 0

        while True:
            try:
                listing_channel = yield self.unicorn.children_subscribe(self.tracing_conf_path, listing_version)
                while True:
                    listing_version, apps = yield listing_channel.rx.get()
                    self.logger.info("on_sampling_updates: version %d value %s", listing_version, apps)
                    for app in (i for i in apps if i not in self.sampled_apps):
                        self.watch_app(app, self.tracing_conf_path + "/" + app)
            except Exception as err:
                timeout = min(timeout << 1, maximum_timeout)
                listing_version = 0
                self.logger.error("error occurred while subscribing for sampling updates %s. Sleep %d",
                                  err, timeout)
                yield gen.sleep(timeout)

    @gen.coroutine
    def watch_app_timeouts(self, name, path):
        version = 0
        self.timeouts[name] = {}
        try:
            self.logger.info("start watching for timeouts updates of %s", name)
            watch_channel = yield self.unicorn.subscribe(path, version)
            while True:
                value, version = yield watch_channel.rx.get()
                self.logger.info("got timeouts updates for %s: version %d value %s", name, version, value)
                if isinstance(value, dict):
                    self.timeouts[name] = value
                else:
                    self.logger.error("timeout value %s for %s is not dict", value, name)
                    self.timeouts[name] = {}
        except ServiceError as err:
            # verify that the err is `zookeeper: no node [-101]``
            if err.code != -101:
                self.logger.error("watching of `%s` raised an unexpected service error (cat. %d): %s", name, err.category, err)
        except Exception as err:
            self.logger.error("watching of %s error: %s", name, err)
        finally:
            self.logger.info("stop watching for timeouts updates of %s", name)
            self.timeouts.pop(name, None)
            try:
                watch_channel.tx.close()
            except Exception:
                pass

    @gen.coroutine
    def on_timeouts_updates(self):
        maximum_timeout = 32  # sec
        timeout = 1  # sec
        listing_version = 0

        while True:
            try:
                listing_channel = yield self.unicorn.children_subscribe(self.timeouts_conf_path, listing_version)
                while True:
                    listing_version, apps = yield listing_channel.rx.get()
                    self.logger.info("on_timeouts_updates: version %d value %s", listing_version, apps)
                    for app in (i for i in apps if i not in self.timeouts):
                        self.watch_app_timeouts(app, self.timeouts_conf_path + "/" + app)
            except Exception as err:
                timeout = min(timeout << 1, maximum_timeout)
                listing_version = 0
                self.logger.error("error occurred while subscribing for sampling updates %s. Sleep %d",
                                  err, timeout)
                yield gen.sleep(timeout)

    def get_timeout(self, name, event=''):
        if name in self.timeouts:
            tmts = self.timeouts[name]
            return tmts.get(event) or tmts.get('', DEFAULT_TIMEOUT)

        return DEFAULT_TIMEOUT

    def migrate_from_cache_to_inactive(self, app, name):
        try:
            drop_app_from_cache(self.cache, app, name)
        except Exception as err:
            self.logger.error("app %s %s: drop cache error %s", app, name, err)

        # dispose service after 3 x timeouts
        # assume that all requests will be finished
        self.io_loop.call_later(self.get_timeout(name) * 3,
                                functools.partial(self.dispose, app, name))
        self.logger.info("app %s %s is scheduled to dispose", app, name)

    def move_to_inactive(self, app, name):
        @gen.coroutine
        def wrapper():
            active_apps = len(self.cache[name])
            self.logger.info("%s: preparing to moving %s %s to an inactive queue (active %d)",
                             app.id, app.name, "{0}:{1}".format(*app.address), active_apps)

            try:
                new_app = Service(name, locator=self.locator, timeout=RESOLVE_TIMEOUT)
                self.logger.info("%s: creating an instance of %s", new_app.id, name)
                yield new_app.connect()
                self.logger.info("%s: connect to an app %s endpoint %s ",
                                 new_app.id, new_app.name, "{0}:{1}".format(*new_app.address))
                timeout = (1 + random.random()) * self.refresh_period
                self.io_loop.call_later(timeout, self.move_to_inactive(new_app, name))
                # add to cache only after successfully connected
                self.cache[name].append(new_app)
            except Exception as err:
                self.logger.error("%s: unable to connect to `%s`: %s", new_app.id, name, err)
                # schedule later
                self.io_loop.call_later(self.get_timeout(name), self.move_to_inactive(app, name))
            else:
                self.logger.info("%s: move %s %s to an inactive queue",
                                 app.id, app.name, "{0}:{1}".format(*app.address))
                # current active app will be dropped here
                self.migrate_from_cache_to_inactive(app, name)

        return wrapper

    def dispose(self, app, name):
        self.logger.info("dispose service %s %s", name, app.id)
        app.disconnect()

    def resolve_group_to_version(self, name, value=None):
        """ Pick a version from a routing group using a random or provided value
            A routing group looks like (weight, version):
            {"APP": [[29431330, 'A'], [82426238, 'B'], [101760716, 'C'], [118725487, 'D'], [122951927, 'E']]}
        """
        if name not in self.current_rg:
            return name

        routing_group = self.current_rg[name]
        if len(routing_group) == 0:
            self.logger.warning("empty rounting group %s", name)
            return name

        value = value or random.randint(0, 1 << 32)
        index = upper_bound(routing_group, value)
        return routing_group[index if index < len(routing_group) else 0][1]

    def ping(self, request):
        if self.locator_status:
            fill_response_in(request, httplib.OK, "OK", "OK")
            return

        fill_response_in(request, httplib.SERVICE_UNAVAILABLE,
                         httplib.responses[httplib.SERVICE_UNAVAILABLE],
                         "Failed", proxy_error_headers())

    def setup_tracing(self, request, name):
        if getattr(request, "traceid", None) is not None:
            tracing_chance = self.sampled_apps.get(name, self.default_tracing_chance)
            rolled_dice = random.uniform(0, 100)
            request.logger.debug("tracing_chance %f, rolled dice %f", tracing_chance, rolled_dice)
            if tracing_chance < rolled_dice:
                request.logger.info('stop tracing the request')
                request.logger = NULLLOGGER
                request.tracebit = False
        else:
            request.tracebit = False

    @context
    @gen.coroutine
    def __call__(self, request):
        for plugin in self.plugins:
            if plugin.match(request):
                request.logger.info('processed by %s plugin', plugin.name())
                try:
                    yield plugin.process(request)
                except PluginNoSuchApplication as err:
                    fill_response_in(request, NO_SUCH_APP, "No such application",
                                     str(err), proxy_error_headers())
                except PluginApplicationError:
                    message = "application error"
                    fill_response_in(request, httplib.INTERNAL_SERVER_ERROR,
                                     httplib.responses[httplib.INTERNAL_SERVER_ERROR],
                                     message, proxy_error_headers())
                except ProxyInvalidRequest:
                    if request.path == "/ping":
                        self.ping(request)
                    else:
                        fill_response_in(request, httplib.NOT_FOUND, httplib.responses[httplib.NOT_FOUND],
                                         "Invalid url", proxy_error_headers())
                except Exception as err:
                    request.logger.exception('plugin %s returned error: %s', plugin.name(), err)
                    message = "unknown error"
                    fill_response_in(request, httplib.INTERNAL_SERVER_ERROR,
                                     httplib.responses[httplib.INTERNAL_SERVER_ERROR],
                                     message, proxy_error_headers())
                return

        try:
            name, event = extract_app_and_event(request)
        except ProxyInvalidRequest:
            if request.path == "/ping":
                self.ping(request)
            else:
                fill_response_in(request, httplib.NOT_FOUND, httplib.responses[httplib.NOT_FOUND],
                                 "Invalid url", proxy_error_headers())
            return

        self.setup_tracing(request, name)

        if self.sticky_header in request.headers:
            seed = request.headers.get(self.sticky_header)
            seed_value = header_to_seed(seed)
            request.logger.info('sticky_header has been found: name %s, value %s, seed %d', name, seed, seed_value)
            name = self.resolve_group_to_version(name, seed_value)

        app = yield self.get_service(name, request)

        if app is None:
            message = "current application %s is unavailable" % name
            fill_response_in(request, NO_SUCH_APP, "No Such Application",
                             message, proxy_error_headers(name))
            return

        try:
            # TODO: attempts should be configurable
            yield self.process(request, name, app, event, pack_httprequest(request), self.reelect_app, 2)
        except Exception as err:
            request.logger.exception("error during processing request %s", err)
            fill_response_in(request, httplib.INTERNAL_SERVER_ERROR,
                             httplib.responses[httplib.INTERNAL_SERVER_ERROR],
                             "UID %s: %s" % (request.traceid, str(err)), proxy_error_headers(name))

        request.logger.info("exit from process")

    def info(self):
        return {'services': {'cache': dict(((k, len(v)) for k, v in self.cache.items()))},
                'requests': {'inprogress': self.requests_in_progress,
                             'total': self.requests_total},
                'errors': {'disconnections': self.requests_disconnections},
                'sampling': self.sampled_apps}

    @gen.coroutine
    def reelect_app(self, request, app):
        cache_size = len(self.cache[app.name])
        if cache_size < self.spool_size:
            request.logger.info("spool is not full. Create a new application instance")
            app = yield self.get_service(app.name, request)
        elif cache_size == 1:
            # NOTE: if we have spool_size 1, the same app will be picked
            # Probably we can create a new one and mark the old one inactive
            request.logger.warning("spool size is limited by 1, cannot pick a new instance of th app. Use the old one")
            # pass
        else:
            request.logger.info("pick a random instance of the application")
            try:
                index = self.cache[app.name].index(app)
                request.logger.info("the app is located in cache at pos %d", index)
                if cache_size == 2:  # shortcut
                    picked = (index + 1) % 2
                else:
                    picked = index
                    while picked == index:
                        picked = random.randint(0, cache_size - 1)

                request.logger.info("an instance at pos %d has been picked", index)
                app = self.cache[app.name][picked]
            except ValueError:
                app = random.choice(self.cache[app.name])
        raise gen.Return(app)

    @gen.coroutine
    def process(self, request, name, app, event, data, reelect_app_fn, attempts, timeout=None):
        if timeout is None:
            timeout = self.get_timeout(name, event)
        request.logger.info("start processing event `%s` for an app `%s` (appid: %s) after %.3f ms with timeout %f",
                            event, app.name, app.id, request.request_time() * 1000, timeout)
        parentid = 0

        if request.traceid is not None:
            traceid = int(request.traceid, 16)
            trace = Trace(traceid=traceid, spanid=traceid, parentid=parentid)
        else:
            trace = None

        headers = {
            'trace_bit': '{:d}'.format(request.tracebit),
        }
        if 'authorization' in request.headers:
            headers['authorization'] = request.headers['authorization']

        for mapped in self.mapped_headers:
            if mapped in request.headers:
                headers[mapped] = request.headers[mapped]

        def on_error(app, err, extra_msg, code=httplib.INTERNAL_SERVER_ERROR):
            if len(extra_msg) > 0 and not extra_msg.endswith(' '):
                extra_msg += ' '
            request.logger.error("%s: %s%s", app.id, extra_msg, err)

            message = "UID %s: application `%s` error: %s" % (request.traceid, app.name, str(err))
            fill_response_in(request, code, httplib.responses[code], message, proxy_error_headers(app.name))

        def check_attempts(app, err):
            if attempts > 0:
                return True
            # we have no attempts more, so quit here
            on_error(app, err, '(no attempts left) ')
            return False

        while attempts > 0:
            attempts -= 1
            processor = None
            try:
                request.logger.debug("%s: enqueue event (attempt %d)", app.id, attempts)
                channel = yield app.enqueue(event, trace=trace, **headers)
                request.logger.debug("%s: send event data (attempt %d)", app.id, attempts)
                yield channel.tx.write(msgpack.packb(data), trace=trace)
                yield channel.tx.close(trace=trace)
                request.logger.debug("%s: waiting for a code and headers (attempt %d)",
                                     app.id, attempts)
                code_and_headers = yield channel.rx.get(timeout=timeout)
                request.logger.debug("%s: code and headers have been received (attempt %d)",
                                     app.id, attempts)
                code, raw_headers = msgpack.unpackb(code_and_headers)
                headers = httputil.HTTPHeaders(raw_headers)

                cocaine_http_proto_version = headers.get(X_COCAINE_HTTP_PROTO_VERSION)
                if cocaine_http_proto_version is None or cocaine_http_proto_version == "1.0":
                    cocaine_http_proto_version = "1.0"

                    def stop_condition(body):
                        return isinstance(body, EmptyResponse)
                elif cocaine_http_proto_version == "1.1":
                    def stop_condition(body):
                        return isinstance(body, EmptyResponse) or len(body) == 0
                else:
                    raise Exception("unsupported X-Cocaine-HTTP-Proto-Version: %s" % cocaine_http_proto_version)

                processor = BodyProcessor.make_processor(
                    headers.get('Content-Length'),
                    request, name, code, headers)

                while True:
                    body = yield channel.rx.get(timeout=timeout)
                    if stop_condition(body):
                        request.logger.info("%s: body finished (attempt %d)", app.id, attempts)
                        break

                    request.logger.debug("%s: received %d bytes as a body chunk (attempt %d)",
                                         app.id, len(body), attempts)

                    processor.swallow(body)

            except gen.TimeoutError as err:
                on_error(app, err, '', httplib.GATEWAY_TIMEOUT)

            except (DisconnectionError, StreamClosedError) as err:
                self.requests_disconnections += 1
                # Probably it's dangerous to retry requests all the time.
                # I must find the way to determine whether it failed during writing
                # or reading a reply. And retry only writing fails.
                request.logger.error("%s: %s", app.id, err)
                if not check_attempts(app, err):
                    return

                # Seems on_close callback is not called in case of connecting through IPVS
                # We detect disconnection here to avoid unnecessary errors.
                # Try to reconnect here and give the request a go
                try:
                    start_time = time.time()
                    reconn_timeout = timeout - request.request_time()
                    request.logger.info("%s: connecting with timeout %.fms", app.id, reconn_timeout * 1000)
                    yield gen.with_timeout(start_time + reconn_timeout, app.connect(request.traceid))
                    reconn_time = time.time() - start_time
                    request.logger.info("%s: connecting took %.3fms", app.id, reconn_time * 1000)
                except Exception as err:
                    request.logger.error("%s: unable to reconnect: %s (%d attempts left)", err, attempts)
                # We have an attempt to process request again.
                # Jump to the begining of `while attempts > 0`, either we connected successfully
                # or we were failed to connect
                continue

            except ServiceError as err:
                if not check_attempts(app, err):
                    return

                # if the application has been restarted, we get broken pipe code
                # and system category
                if err.category in SYSTEMCATEGORY and err.code == EAPPSTOPPED:
                    request.logger.error("%s: the application has been restarted", app.id)
                    app.disconnect()
                    continue

                elif err.category in OVERSEERCATEGORY and err.code == EQUEUEISFULL:
                    request.logger.error("%s: queue is full. Pick another application instance", app.id)
                    try:
                        app = yield reelect_app_fn(request, app)
                    except Exception as reelect_err:
                        on_error(app, reelect_err, '(could not reelect app)')
                        return
                    request.logger.info("fetched new app from reelect_app_fn")
                    continue

                on_error(app, err, '')

            except Exception as err:
                on_error(app, err, '(unknown error) ')

            else:
                if processor:
                    processor.finish()

            # to return from all errors except Disconnection
            # or receiving a good reply
            return

    @gen.coroutine
    def get_service(self, name, request):
        # cache isn't full for the current application
        if len(self.cache[name]) < self.spool_size:
            logger = request.logger
            try:
                app = Service(name, locator=self.locator, timeout=RESOLVE_TIMEOUT)
                logger.info("%s: creating an instance of %s", app.id, name)
                self.cache[name].append(app)
                yield app.connect(request.traceid)
                logger.info("%s: connect to an app %s endpoint %s ",
                            app.id, app.name, "{0}:{1}".format(*app.address))

                timeout = (1 + random.random()) * self.refresh_period
                self.io_loop.call_later(timeout, self.move_to_inactive(app, name))
            except Exception as err:
                logger.error("%s: unable to connect to `%s`: %s", app.id, name, err)
                drop_app_from_cache(self.cache, app, name)
                raise gen.Return()
            else:
                raise gen.Return(app)

        # get an instance from cache
        chosen = random.choice(self.cache[name])
        raise gen.Return(chosen)
Пример #17
0
class CocaineProxy(object):
    def __init__(self, locators=("localhost:10053",),
                 cache=DEFAULT_SERVICE_CACHE_COUNT,
                 request_id_header="", sticky_header="X-Cocaine-Sticky",
                 ioloop=None, **config):
        # stats
        self.requests_in_progress = 0
        self.requests_disconnections = 0
        self.requests_total = 0

        self.io_loop = ioloop or tornado.ioloop.IOLoop.current()
        self.serviceCacheCount = cache
        self.spoolSize = int(self.serviceCacheCount * 1.5)
        self.refreshPeriod = config.get("refresh_timeout", DEFAULT_REFRESH_PERIOD)
        self.timeouts = config.get("timeouts", {})
        self.locator_endpoints = map(parse_locators_endpoints, locators)
        # it's initialized after start
        # to avoid an io_loop creation before fork
        self.locator = Locator(endpoints=self.locator_endpoints)

        # active applications
        self.cache = collections.defaultdict(list)

        self.logger = ContextAdapter(logging.getLogger("cocaine.proxy"), {"id": "0" * 16})
        self.tracking_logger = logging.getLogger("cocaine.proxy.tracking")
        self.logger.info("locators %s", ','.join("%s:%d" % (h, p) for h, p in self.locator_endpoints))

        self.sticky_header = sticky_header

        if request_id_header:
            self.get_request_id = functools.partial(get_request_id, request_id_header)
        else:
            self.get_request_id = generate_request_id

        # post the watcher for routing groups
        self.io_loop.add_future(self.on_routing_groups_update(),
                                lambda x: self.logger.error("the updater must not exit"))

    @gen.coroutine
    def on_routing_groups_update(self):
        uid = gen_uid()
        self.logger.info("generate new uniqque id %s", uid)
        maximum_timeout = 32  # sec
        timeout = 1  # sec
        while True:
            current = {}
            try:
                self.logger.info("subscribe to updates with id %s", uid)
                channel = yield self.locator.routing(uid, True)
                timeout = 1
                while True:
                    new = yield channel.rx.get()
                    if isinstance(new, EmptyResponse):
                        # it means that the cocaine has been stopped
                        self.logger.info("locator sends close")
                        break
                    updates = scan_for_updates(current, new)
                    # replace current
                    current = new
                    if len(updates) == 0:
                        self.logger.info("locator sends an update message, "
                                         "but no updates have been found")
                        continue

                    self.logger.info("%d routing groups have been refreshed %s",
                                     len(updates), updates)
                    for group in updates:
                        # if we have not created an instance of
                        # the group it is absent in cache
                        if group not in self.cache:
                            self.logger.info("nothing to update in group %s", group)
                            continue

                        for app in self.cache[group]:
                            self.logger.info("%d: move %s to the inactive queue to refresh"
                                             " routing group", app.id, app.name)
                            self.migrate_from_cache_to_inactive(app, group)
            except Exception as err:
                timeout = min(timeout << 1, maximum_timeout)
                self.logger.error("error occured while watching for group updates %s. Sleep %d",
                                  err, timeout)
                yield gen.sleep(timeout)

    def get_timeout(self, name):
        return self.timeouts.get(name, DEFAULT_TIMEOUT)

    def migrate_from_cache_to_inactive(self, app, name):
        try:
            self.cache[name].remove(app)
        except ValueError as err:
            self.logger.error("broken cache: %s", err)
        except KeyError as err:
            self.logger.error("broken cache: no such key %s", err)

        self.io_loop.call_later(self.get_timeout(name) * 3,
                                functools.partial(self.dispose, app, name))

    def move_to_inactive(self, app, name):
        def wrapper():
            active_apps = len(self.cache[name])
            if active_apps < self.serviceCacheCount:
                self.io_loop.call_later(self.get_timeout(name), self.move_to_inactive(app, name))
                return

            self.logger.info("%s: move %s %s to an inactive queue (active %d)",
                             app.id, app.name, "{0}:{1}".format(*app.address), active_apps)
            self.migrate_from_cache_to_inactive(app, name)
        return wrapper

    def dispose(self, app, name):
        self.logger.info("dispose service %s %s", name, app.id)
        app.disconnect()

    @context
    @gen.coroutine
    def __call__(self, request):
        if "X-Cocaine-Service" in request.headers and "X-Cocaine-Event" in request.headers:
            request.logger.debug('dispatch by headers')
            name = request.headers['X-Cocaine-Service']
            event = request.headers['X-Cocaine-Event']
        else:
            request.logger.debug('dispatch by uri')
            match = URL_REGEX.match(request.uri)
            if match is None:
                if request.path == "/ping":
                    try:
                        yield self.locator.connect()
                        fill_response_in(request, httplib.OK, "OK", "OK")
                    except Exception as err:
                        request.logger.error("unable to conenct to the locator: %s", err)
                        fill_response_in(request, httplib.SERVICE_UNAVAILABLE,
                                         httplib.responses[httplib.SERVICE_UNAVAILABLE],
                                         "locator is unavailable")
                elif request.path == '/__info':
                    # ToDo: may we should remove keys with len == 0 values from cache
                    # to avoid memory consumption for strings and the dict
                    body = json.dumps({
                        'services': {
                            'cache': dict(((k, len(v)) for k, v in self.cache.items())),
                        },
                        'requests': {
                            'inprogress': self.requests_in_progress,
                            'total': self.requests_total,
                        },
                        'errors': {
                            'disconnections': self.requests_disconnections,
                        }
                    }, sort_keys=True)
                    headers = httputil.HTTPHeaders({"Content-Type": "application/json"})
                    fill_response_in(request, httplib.OK, httplib.responses[httplib.OK],
                                     body, headers)
                else:
                    fill_response_in(request, httplib.NOT_FOUND,
                                     httplib.responses[httplib.NOT_FOUND], "Invalid url")
                return

            name, event, other = match.groups()
            if name == '' or event == '':
                fill_response_in(request, httplib.BAD_REQUEST,
                                 httplib.responses[httplib.BAD_REQUEST], "Proxy invalid request")
                return

            # Drop from query appname and event's name
            if not other.startswith('/'):
                other = "/" + other
            request.uri = other
            request.path, _, _ = other.partition("?")

        if self.sticky_header not in request.headers:
            app = yield self.get_service(name, request)
        else:
            seed = request.headers.get(self.sticky_header)
            request.logger.info('sticky_header has been found: %s', seed)
            app = yield self.get_service_with_seed(name, seed, request)

        if app is None:
            message = "current application %s is unavailable" % name
            fill_response_in(request, NO_SUCH_APP,
                             "No Such Application", message)
            return

        try:
            request.logger.debug("%s: processing request app: `%s`, event `%s`",
                                 app.id, app.name, event)
            yield self.process(request, name, app, event, pack_httprequest(request))
        except Exception as err:
            request.logger.error("error during processing request %s", err)
            fill_response_in(request, httplib.INTERNAL_SERVER_ERROR,
                             httplib.responses[httplib.INTERNAL_SERVER_ERROR],
                             "UID %s: %s" % (request.traceid, str(err)))

        request.logger.info("exit from process")

    @gen.coroutine
    def process(self, request, name, app, event, data):
        request.logger.info("start processing request after %.3f ms", request.request_time() * 1000)
        timeout = self.get_timeout(name)
        # allow to reconnect this amount of times.
        attempts = 2  # make it configurable

        parentid = 0

        if request.traceid is not None:
            traceid = int(request.traceid, 16)
            trace = Trace(traceid=traceid, spanid=traceid, parentid=parentid)
        else:
            trace = None

        while attempts > 0:
            headers = {}
            body_parts = []
            attempts -= 1
            try:
                request.logger.info("%s: enqueue event (attempt %d)", app.id, attempts)
                channel = yield app.enqueue(event, trace=trace)
                request.logger.debug("%s: send event data (attempt %d)", app.id, attempts)
                yield channel.tx.write(msgpack.packb(data), trace=trace)
                yield channel.tx.close(trace=trace)
                request.logger.debug("%s: waiting for a code and headers (attempt %d)",
                                     app.id, attempts)
                code_and_headers = yield channel.rx.get(timeout=timeout)
                request.logger.debug("%s: code and headers have been received (attempt %d)",
                                     app.id, attempts)
                code, raw_headers = msgpack.unpackb(code_and_headers)
                headers = tornado.httputil.HTTPHeaders(raw_headers)
                while True:
                    body = yield channel.rx.get(timeout=timeout)
                    if isinstance(body, EmptyResponse):
                        request.logger.info("%s: body finished (attempt %d)", app.id, attempts)
                        break

                    request.logger.debug("%s: received %d bytes as a body chunk (attempt %d)",
                                         app.id, len(body), attempts)
                    body_parts.append(body)
            except gen.TimeoutError as err:
                request.logger.error("%s %s:  %s", app.id, name, err)
                message = "UID %s: application `%s` error: %s" % (request.traceid, name, str(err))
                fill_response_in(request, httplib.GATEWAY_TIMEOUT,
                                 httplib.responses[httplib.GATEWAY_TIMEOUT], message)

            except (DisconnectionError, StreamClosedError) as err:
                self.requests_disconnections += 1
                # Probably it's dangerous to retry requests all the time.
                # I must find the way to determine whether it failed during writing
                # or reading a reply. And retry only writing fails.
                request.logger.error("%s: %s", app.id, err)
                if attempts <= 0:
                    request.logger.info("%s: no more attempts", app.id)
                    fill_response_in(request, httplib.INTERNAL_SERVER_ERROR,
                                     httplib.responses[httplib.INTERNAL_SERVER_ERROR],
                                     "UID %s: Connection problem" % request.traceid)
                    return

                # Seems on_close callback is not called in case of connecting through IPVS
                # We detect disconnection here to avoid unnecessary errors.
                # Try to reconnect here and give the request a go
                try:
                    start_time = time.time()
                    reconn_timeout = timeout - request.request_time()
                    request.logger.info("%s: connecting with timeout %.fms", app.id, reconn_timeout * 1000)
                    yield gen.with_timeout(start_time + reconn_timeout, app.connect(request.logger.traceid))
                    reconn_time = time.time() - start_time
                    request.logger.info("%s: connecting took %.3fms", app.id, reconn_time * 1000)
                except Exception as err:
                    if attempts <= 0:
                        # we have no attempts more, so quit here
                        request.logger.error("%s: %s (no attempts left)", app.id, err)
                        message = "UID %s: application `%s` error: %s" % (request.traceid, name, str(err))
                        fill_response_in(request, httplib.INTERNAL_SERVER_ERROR,
                                         httplib.responses[httplib.INTERNAL_SERVER_ERROR], message)
                        return

                    request.logger.error("%s: unable to reconnect: %s (%d attempts left)", err, attempts)
                # We have an attempt to process request again.
                # Jump to the begining of `while attempts > 0`, either we connected successfully
                # or we were failed to connect
                continue

            except ServiceError as err:
                # if the application has been restarted, we get broken pipe code
                # and system category
                if err.code == errno.EPIPE and err.category == ESYSTEMCATEGORY:
                    request.logger.error("%s: the application has been restarted", app.id)
                    app.disconnect()
                    continue

                request.logger.error("%s: %s", app.id, err)
                message = "UID %s: application `%s` error: %s" % (request.traceid, name, str(err))
                fill_response_in(request, httplib.INTERNAL_SERVER_ERROR,
                                 httplib.responses[httplib.INTERNAL_SERVER_ERROR], message)

            except Exception as err:
                request.logger.error("%s: %s", app.id, err)
                message = "UID %s: unknown `%s` error: %s" % (request.traceid, name, str(err))
                fill_response_in(request, httplib.INTERNAL_SERVER_ERROR,
                                 httplib.responses[httplib.INTERNAL_SERVER_ERROR], message)
            else:
                message = ''.join(body_parts)
                fill_response_in(request, code,
                                 httplib.responses.get(code, httplib.OK),
                                 message, headers)
            # to return from all errors except Disconnection
            # or receiving a good reply
            return

    @gen.coroutine
    def get_service(self, name, request):
        # cache isn't full for the current application
        if len(self.cache[name]) < self.spoolSize:
            logger = request.logger
            try:
                app = Service(name, locator=self.locator, timeout=RESOLVE_TIMEOUT)
                logger.info("%s: creating an instance of %s", app.id, name)
                self.cache[name].append(app)
                yield app.connect(request.traceid)
                logger.info("%s: connect to an app %s endpoint %s ",
                            app.id, app.name, "{0}:{1}".format(*app.address))

                timeout = (1 + random.random()) * self.refreshPeriod
                self.io_loop.call_later(timeout, self.move_to_inactive(app, name))
            except Exception as err:
                logger.error("%s: unable to connect to `%s`: %s", app.id, name, err)
                if app in self.cache[name]:
                    self.cache[name].remove(app)
                raise gen.Return()
            else:
                raise gen.Return(app)

        # get an instance from cache
        chosen = random.choice(self.cache[name])
        raise gen.Return(chosen)

    @gen.coroutine
    def get_service_with_seed(self, name, seed, request):
        logger = request.logger
        app = Service(name, seed=seed, locator=self.locator)
        try:
            logger.info("%s: creating an instance of %s, seed %s", app.id, name, seed)
            yield app.connect(logger.traceid)
        except Exception as err:
            logger.error("%s: unable to connect to `%s`: %s", app.id, name, err)
            raise gen.Return()

        raise gen.Return(app)
Пример #18
0
    def __init__(self,
                 locators=("localhost:10053", ),
                 cache=DEFAULT_SERVICE_CACHE_COUNT,
                 request_id_header="",
                 sticky_header="X-Cocaine-Sticky",
                 forcegen_request_header=False,
                 default_tracing_chance=DEFAULT_TRACING_CHANCE,
                 configuration_service="unicorn",
                 tracing_conf_path="/zipkin_sampling",
                 timeouts_conf_path="/proxy_apps_timeouts",
                 srw_config=None,
                 allow_json_rpc=True,
                 ioloop=None,
                 **config):
        # stats
        self.requests_in_progress = 0
        self.requests_disconnections = 0
        self.requests_total = 0

        self.io_loop = ioloop or tornado.ioloop.IOLoop.current()
        self.service_cache_count = cache
        self.spool_size = int(self.service_cache_count * 1.5)
        self.refresh_period = config.get("refresh_timeout",
                                         DEFAULT_REFRESH_PERIOD)
        self.locator_endpoints = [
            parse_locators_endpoints(i) for i in locators
        ]
        # it's initialized after start
        # to avoid an io_loop creation before fork
        self.locator = Locator(endpoints=self.locator_endpoints)
        # it's used to reply on `ping` method
        self.locator_status = False

        # active applications
        self.cache = collections.defaultdict(list)
        # routing groups from Locator service
        self.current_rg = {}

        self.logger = logging.getLogger("cocaine.proxy.general")
        self.access_log = logging.getLogger("cocaine.proxy.access")
        self.access_log.propagate = False
        self.logger.info(
            "locators %s",
            ','.join("%s:%d" % (h, p) for h, p in self.locator_endpoints))

        self.sticky_header = sticky_header

        self.plugins = []
        if srw_config:
            for config in srw_config:
                name, cfg = config["type"], config["args"]
                self.logger.info("initialize plugin %s", name)
                self.plugins.append(load_plugin(name, self, cfg))

        if allow_json_rpc:
            self.plugins.append(
                load_plugin('cocaine.proxy.jsonrpc.JSONRPC', self, {}))

        self.logger.info("conf path in `%s` configuration service: %s",
                         configuration_service, tracing_conf_path)
        self.unicorn = Service(configuration_service, locator=self.locator)
        self.sampled_apps = {}
        self.default_tracing_chance = default_tracing_chance
        self.tracing_conf_path = tracing_conf_path

        self.io_loop.add_future(
            self.on_sampling_updates(),
            lambda x: self.logger.error("the sample updater must not exit"))

        self.timeouts_conf_path = timeouts_conf_path
        self.timeouts = {}
        self.io_loop.add_future(
            self.on_timeouts_updates(),
            lambda x: self.logger.error("the timeouts updater must not exit"))

        if request_id_header:
            self.get_request_id = functools.partial(
                get_request_id,
                request_id_header,
                force=forcegen_request_header)
        else:
            self.get_request_id = generate_request_id

        # post the watcher for routing groups
        self.io_loop.add_future(
            self.on_routing_groups_update(),
            lambda x: self.logger.error("the updater must not exit"))
        # run infinity check locator health status
        self.locator_health_check()
Пример #19
0
class CocaineProxy(object):
    def __init__(self,
                 locators=("localhost:10053", ),
                 cache=DEFAULT_SERVICE_CACHE_COUNT,
                 request_id_header="",
                 sticky_header="X-Cocaine-Sticky",
                 forcegen_request_header=False,
                 default_tracing_chance=DEFAULT_TRACING_CHANCE,
                 configuration_service="unicorn",
                 tracing_conf_path="/zipkin_sampling",
                 timeouts_conf_path="/proxy_apps_timeouts",
                 srw_config=None,
                 allow_json_rpc=True,
                 ioloop=None,
                 **config):
        # stats
        self.requests_in_progress = 0
        self.requests_disconnections = 0
        self.requests_total = 0

        self.io_loop = ioloop or tornado.ioloop.IOLoop.current()
        self.service_cache_count = cache
        self.spool_size = int(self.service_cache_count * 1.5)
        self.refresh_period = config.get("refresh_timeout",
                                         DEFAULT_REFRESH_PERIOD)
        self.locator_endpoints = [
            parse_locators_endpoints(i) for i in locators
        ]
        # it's initialized after start
        # to avoid an io_loop creation before fork
        self.locator = Locator(endpoints=self.locator_endpoints)
        # it's used to reply on `ping` method
        self.locator_status = False

        # active applications
        self.cache = collections.defaultdict(list)
        # routing groups from Locator service
        self.current_rg = {}

        self.logger = logging.getLogger("cocaine.proxy.general")
        self.access_log = logging.getLogger("cocaine.proxy.access")
        self.access_log.propagate = False
        self.logger.info(
            "locators %s",
            ','.join("%s:%d" % (h, p) for h, p in self.locator_endpoints))

        self.sticky_header = sticky_header

        self.plugins = []
        if srw_config:
            for config in srw_config:
                name, cfg = config["type"], config["args"]
                self.logger.info("initialize plugin %s", name)
                self.plugins.append(load_plugin(name, self, cfg))

        if allow_json_rpc:
            self.plugins.append(
                load_plugin('cocaine.proxy.jsonrpc.JSONRPC', self, {}))

        self.logger.info("conf path in `%s` configuration service: %s",
                         configuration_service, tracing_conf_path)
        self.unicorn = Service(configuration_service, locator=self.locator)
        self.sampled_apps = {}
        self.default_tracing_chance = default_tracing_chance
        self.tracing_conf_path = tracing_conf_path

        self.io_loop.add_future(
            self.on_sampling_updates(),
            lambda x: self.logger.error("the sample updater must not exit"))

        self.timeouts_conf_path = timeouts_conf_path
        self.timeouts = {}
        self.io_loop.add_future(
            self.on_timeouts_updates(),
            lambda x: self.logger.error("the timeouts updater must not exit"))

        if request_id_header:
            self.get_request_id = functools.partial(
                get_request_id,
                request_id_header,
                force=forcegen_request_header)
        else:
            self.get_request_id = generate_request_id

        # post the watcher for routing groups
        self.io_loop.add_future(
            self.on_routing_groups_update(),
            lambda x: self.logger.error("the updater must not exit"))
        # run infinity check locator health status
        self.locator_health_check()

    @gen.coroutine
    def locator_health_check(self, period=5):
        wait_timeot = datetime.timedelta(seconds=period)
        while True:
            try:
                self.logger.debug(
                    "check health status of locator via cluster method")
                channel = yield gen.with_timeout(wait_timeot,
                                                 self.locator.cluster())
                cluster = yield gen.with_timeout(wait_timeot, channel.rx.get())
                self.locator_status = True
                self.logger.debug("dumped cluster %s", cluster)
                yield gen.sleep(period)
            except Exception as err:
                self.logger.error("health status check failed: %s", err)
                self.locator_status = False
                yield gen.sleep(1)

    @gen.coroutine
    def on_routing_groups_update(self):
        uid = gen_uid()
        self.logger.info("generate new unique id %s", uid)
        maximum_timeout = 32  # sec
        timeout = 1  # sec
        while True:
            self.current_rg = {}
            try:
                self.logger.info("subscribe to updates with id %s", uid)
                channel = yield self.locator.routing(uid, True)
                timeout = 1
                while True:
                    new = yield channel.rx.get()
                    if isinstance(new, EmptyResponse):
                        # it means that the cocaine has been stopped
                        self.logger.error("locator sends close")
                        break
                    updates = scan_for_updates(self.current_rg, new)
                    # replace current
                    self.current_rg = new
                    if len(updates) == 0:
                        self.logger.info("locator sends an update message, "
                                         "but no updates have been found")
                        continue

                    self.logger.info(
                        "%d routing groups have been refreshed %s",
                        len(updates), updates)
                    for group in updates:
                        # if we have not created an instance of
                        # the group it is absent in cache
                        if group not in self.cache:
                            self.logger.debug("nothing to update in group %s",
                                              group)
                            continue

                        for app in self.cache[group]:
                            self.logger.debug(
                                "%s: move %s to the inactive queue to refresh"
                                " routing group", app.id, app.name)
                            self.migrate_from_cache_to_inactive(app, group)
            except Exception as err:
                timeout = min(timeout << 1, maximum_timeout)
                self.logger.error(
                    "error occurred while watching for group updates %s. Sleep %d",
                    err, timeout)
                yield gen.sleep(timeout)

    @gen.coroutine
    def watch_app(self, name, path):
        version = 0
        self.sampled_apps[name] = self.default_tracing_chance
        try:
            self.logger.info("start watching for sampling updates of %s", name)
            watch_channel = yield self.unicorn.subscribe(path, version)
            while True:
                value, version = yield watch_channel.rx.get()
                self.logger.info(
                    "got sampling updates for %s: version %d value %.2f", name,
                    version, value)
                try:
                    weight = float(value)
                    self.sampled_apps[name] = weight
                except ValueError as err:
                    self.logger.error(
                        "sample value %s for %s can NOT be converted: %s. Use %f",
                        value, name, err, self.default_tracing_chance)
                    self.sampled_apps[name] = self.default_tracing_chance
        except ServiceError as err:
            # verify that the err is `zookeeper: no node [-101]``
            if err.code != -101:
                self.logger.error(
                    "watching of `%s` raised an unexpected service error (cat. %d): %s",
                    name, err.category, err)
        except Exception as err:
            self.logger.error("watching of %s error: %s", name, err)
        finally:
            self.logger.info("stop watching for sampling updates of %s", name)
            self.sampled_apps.pop(name, None)
            try:
                watch_channel.tx.close()
            except Exception:
                pass

    @gen.coroutine
    def on_sampling_updates(self):
        maximum_timeout = 32  # sec
        timeout = 1  # sec
        listing_version = 0

        while True:
            try:
                listing_channel = yield self.unicorn.children_subscribe(
                    self.tracing_conf_path, listing_version)
                while True:
                    listing_version, apps = yield listing_channel.rx.get()
                    self.logger.info(
                        "on_sampling_updates: version %d value %s",
                        listing_version, apps)
                    for app in (i for i in apps if i not in self.sampled_apps):
                        self.watch_app(app, self.tracing_conf_path + "/" + app)
            except Exception as err:
                timeout = min(timeout << 1, maximum_timeout)
                listing_version = 0
                self.logger.error(
                    "error occurred while subscribing for sampling updates %s. Sleep %d",
                    err, timeout)
                yield gen.sleep(timeout)

    @gen.coroutine
    def watch_app_timeouts(self, name, path):
        version = 0
        self.timeouts[name] = {}
        try:
            self.logger.info("start watching for timeouts updates of %s", name)
            watch_channel = yield self.unicorn.subscribe(path, version)
            while True:
                value, version = yield watch_channel.rx.get()
                self.logger.info(
                    "got timeouts updates for %s: version %d value %s", name,
                    version, value)
                if isinstance(value, dict):
                    self.timeouts[name] = value
                else:
                    self.logger.error("timeout value %s for %s is not dict",
                                      value, name)
                    self.timeouts[name] = {}
        except ServiceError as err:
            # verify that the err is `zookeeper: no node [-101]``
            if err.code != -101:
                self.logger.error(
                    "watching of `%s` raised an unexpected service error (cat. %d): %s",
                    name, err.category, err)
        except Exception as err:
            self.logger.error("watching of %s error: %s", name, err)
        finally:
            self.logger.info("stop watching for timeouts updates of %s", name)
            self.timeouts.pop(name, None)
            try:
                watch_channel.tx.close()
            except Exception:
                pass

    @gen.coroutine
    def on_timeouts_updates(self):
        maximum_timeout = 32  # sec
        timeout = 1  # sec
        listing_version = 0

        while True:
            try:
                listing_channel = yield self.unicorn.children_subscribe(
                    self.timeouts_conf_path, listing_version)
                while True:
                    listing_version, apps = yield listing_channel.rx.get()
                    self.logger.info(
                        "on_timeouts_updates: version %d value %s",
                        listing_version, apps)
                    for app in (i for i in apps if i not in self.timeouts):
                        self.watch_app_timeouts(
                            app, self.timeouts_conf_path + "/" + app)
            except Exception as err:
                timeout = min(timeout << 1, maximum_timeout)
                listing_version = 0
                self.logger.error(
                    "error occurred while subscribing for sampling updates %s. Sleep %d",
                    err, timeout)
                yield gen.sleep(timeout)

    def get_timeout(self, name, event=''):
        if name in self.timeouts:
            tmts = self.timeouts[name]
            return tmts.get(event) or tmts.get('', DEFAULT_TIMEOUT)

        return DEFAULT_TIMEOUT

    def migrate_from_cache_to_inactive(self, app, name):
        try:
            drop_app_from_cache(self.cache, app, name)
        except Exception as err:
            self.logger.error("app %s %s: drop cache error %s", app, name, err)

        # dispose service after 3 x timeouts
        # assume that all requests will be finished
        self.io_loop.call_later(
            self.get_timeout(name) * 3,
            functools.partial(self.dispose, app, name))
        self.logger.info("app %s %s is scheduled to dispose", app, name)

    def move_to_inactive(self, app, name):
        @gen.coroutine
        def wrapper():
            active_apps = len(self.cache[name])
            self.logger.info(
                "%s: preparing to moving %s %s to an inactive queue (active %d)",
                app.id, app.name, "{0}:{1}".format(*app.address), active_apps)

            try:
                new_app = Service(name,
                                  locator=self.locator,
                                  timeout=RESOLVE_TIMEOUT)
                self.logger.info("%s: creating an instance of %s", new_app.id,
                                 name)
                yield new_app.connect()
                self.logger.info("%s: connect to an app %s endpoint %s ",
                                 new_app.id, new_app.name,
                                 "{0}:{1}".format(*new_app.address))
                timeout = (1 + random.random()) * self.refresh_period
                self.io_loop.call_later(timeout,
                                        self.move_to_inactive(new_app, name))
                # add to cache only after successfully connected
                self.cache[name].append(new_app)
            except Exception as err:
                self.logger.error("%s: unable to connect to `%s`: %s",
                                  new_app.id, name, err)
                # schedule later
                self.io_loop.call_later(self.get_timeout(name),
                                        self.move_to_inactive(app, name))
            else:
                self.logger.info("%s: move %s %s to an inactive queue", app.id,
                                 app.name, "{0}:{1}".format(*app.address))
                # current active app will be dropped here
                self.migrate_from_cache_to_inactive(app, name)

        return wrapper

    def dispose(self, app, name):
        self.logger.info("dispose service %s %s", name, app.id)
        app.disconnect()

    def resolve_group_to_version(self, name, value=None):
        """ Pick a version from a routing group using a random or provided value
            A routing group looks like (weight, version):
            {"APP": [[29431330, 'A'], [82426238, 'B'], [101760716, 'C'], [118725487, 'D'], [122951927, 'E']]}
        """
        if name not in self.current_rg:
            return name

        routing_group = self.current_rg[name]
        if len(routing_group) == 0:
            self.logger.warning("empty rounting group %s", name)
            return name

        value = value or random.randint(0, 1 << 32)
        index = upper_bound(routing_group, value)
        return routing_group[index if index < len(routing_group) else 0][1]

    def ping(self, request):
        if self.locator_status:
            fill_response_in(request, httplib.OK, "OK", "OK")
            return

        fill_response_in(request, httplib.SERVICE_UNAVAILABLE,
                         httplib.responses[httplib.SERVICE_UNAVAILABLE],
                         "Failed", proxy_error_headers())

    @context
    @gen.coroutine
    def __call__(self, request):
        for plugin in self.plugins:
            if plugin.match(request):
                request.logger.info('processed by %s plugin', plugin.name())
                try:
                    yield plugin.process(request)
                except PluginNoSuchApplication as err:
                    fill_response_in(request, NO_SUCH_APP,
                                     "No such application", str(err),
                                     proxy_error_headers())
                except PluginApplicationError:
                    message = "application error"
                    fill_response_in(
                        request, httplib.INTERNAL_SERVER_ERROR,
                        httplib.responses[httplib.INTERNAL_SERVER_ERROR],
                        message, proxy_error_headers())
                except ProxyInvalidRequest:
                    if request.path == "/ping":
                        self.ping(request)
                    else:
                        fill_response_in(request, httplib.NOT_FOUND,
                                         httplib.responses[httplib.NOT_FOUND],
                                         "Invalid url", proxy_error_headers())
                except Exception as err:
                    request.logger.exception('plugin %s returned error: %s',
                                             plugin.name(), err)
                    message = "unknown error"
                    fill_response_in(
                        request, httplib.INTERNAL_SERVER_ERROR,
                        httplib.responses[httplib.INTERNAL_SERVER_ERROR],
                        message, proxy_error_headers())
                return

        try:
            name, event = extract_app_and_event(request)
        except ProxyInvalidRequest:
            if request.path == "/ping":
                self.ping(request)
            else:
                fill_response_in(request, httplib.NOT_FOUND,
                                 httplib.responses[httplib.NOT_FOUND],
                                 "Invalid url", proxy_error_headers())
            return

        if getattr(request, "traceid", None) is not None:
            tracing_chance = self.sampled_apps.get(name,
                                                   self.default_tracing_chance)
            rolled_dice = random.uniform(0, 100)
            request.logger.debug("tracing_chance %f, rolled dice %f",
                                 tracing_chance, rolled_dice)
            if tracing_chance < rolled_dice:
                request.logger.info('stop tracing the request')
                request.logger = NULLLOGGER
                request.traceid = None

        if self.sticky_header in request.headers:
            seed = request.headers.get(self.sticky_header)
            seed_value = header_to_seed(seed)
            request.logger.info(
                'sticky_header has been found: name %s, value %s, seed %d',
                name, seed, seed_value)
            name = self.resolve_group_to_version(name, seed_value)

        app = yield self.get_service(name, request)

        if app is None:
            message = "current application %s is unavailable" % name
            fill_response_in(request, NO_SUCH_APP, "No Such Application",
                             message, proxy_error_headers(name))
            return

        try:
            yield self.process(request, name, app, event,
                               pack_httprequest(request), self.reelect_app)
        except Exception as err:
            request.logger.exception("error during processing request %s", err)
            fill_response_in(request, httplib.INTERNAL_SERVER_ERROR,
                             httplib.responses[httplib.INTERNAL_SERVER_ERROR],
                             "UID %s: %s" % (request.traceid, str(err)),
                             proxy_error_headers(name))

        request.logger.info("exit from process")

    def info(self):
        return {
            'services': {
                'cache': dict(((k, len(v)) for k, v in self.cache.items()))
            },
            'requests': {
                'inprogress': self.requests_in_progress,
                'total': self.requests_total
            },
            'errors': {
                'disconnections': self.requests_disconnections
            },
            'sampling': self.sampled_apps
        }

    @gen.coroutine
    def reelect_app(self, request, app):
        cache_size = len(self.cache[app.name])
        if cache_size < self.spool_size:
            request.logger.info(
                "spool is not full. Create a new application instance")
            app = yield self.get_service(app.name, request)
        elif cache_size == 1:
            # NOTE: if we have spool_size 1, the same app will be picked
            # Probably we can create a new one and mark the old one inactive
            request.logger.warning(
                "spool size is limited by 1, cannot pick a new instance of th app. Use the old one"
            )
            # pass
        else:
            request.logger.info("pick a random instance of the application")
            try:
                index = self.cache[app.name].index(app)
                request.logger.info("the app is located in cache at pos %d",
                                    index)
                if cache_size == 2:  # shortcut
                    picked = (index + 1) % 2
                else:
                    picked = index
                    while picked == index:
                        picked = random.randint(0, cache_size - 1)

                request.logger.info("an instance at pos %d has been picked",
                                    index)
                app = self.cache[app.name][picked]
            except ValueError:
                app = random.choice(self.cache[app.name])
        raise gen.Return(app)

    @gen.coroutine
    def process(self,
                request,
                name,
                app,
                event,
                data,
                reelect_app_fn,
                timeout=None):
        if timeout is None:
            timeout = self.get_timeout(name, event)
        request.logger.info(
            "start processing event `%s` for an app `%s` (appid: %s) after %.3f ms with timeout %f",
            event, app.name, app.id,
            request.request_time() * 1000, timeout)
        # allow to reconnect this amount of times.
        attempts = 2  # make it configurable

        parentid = 0

        if request.traceid is not None:
            traceid = int(request.traceid, 16)
            trace = Trace(traceid=traceid, spanid=traceid, parentid=parentid)
        else:
            trace = None

        headers = {}
        if 'authorization' in request.headers:
            headers['authorization'] = request.headers['authorization']

        while attempts > 0:
            body_parts = []
            attempts -= 1
            try:
                request.logger.debug("%s: enqueue event (attempt %d)", app.id,
                                     attempts)
                channel = yield app.enqueue(event, trace=trace, **headers)
                request.logger.debug("%s: send event data (attempt %d)",
                                     app.id, attempts)
                yield channel.tx.write(msgpack.packb(data), trace=trace)
                yield channel.tx.close(trace=trace)
                request.logger.debug(
                    "%s: waiting for a code and headers (attempt %d)", app.id,
                    attempts)
                code_and_headers = yield channel.rx.get(timeout=timeout)
                request.logger.debug(
                    "%s: code and headers have been received (attempt %d)",
                    app.id, attempts)
                code, raw_headers = msgpack.unpackb(code_and_headers)
                headers = httputil.HTTPHeaders(raw_headers)

                cocaine_http_proto_version = headers.get(
                    X_COCAINE_HTTP_PROTO_VERSION)
                if cocaine_http_proto_version is None or cocaine_http_proto_version == "1.0":
                    cocaine_http_proto_version = "1.0"

                    def stop_condition(body):
                        return isinstance(body, EmptyResponse)
                elif cocaine_http_proto_version == "1.1":

                    def stop_condition(body):
                        return isinstance(body,
                                          EmptyResponse) or len(body) == 0
                else:
                    raise Exception(
                        "unsupported X-Cocaine-HTTP-Proto-Version: %s" %
                        cocaine_http_proto_version)

                while True:
                    body = yield channel.rx.get(timeout=timeout)
                    if stop_condition(body):
                        request.logger.info("%s: body finished (attempt %d)",
                                            app.id, attempts)
                        break

                    request.logger.debug(
                        "%s: received %d bytes as a body chunk (attempt %d)",
                        app.id, len(body), attempts)
                    body_parts.append(body)
            except gen.TimeoutError as err:
                request.logger.error("%s %s:  %s", app.id, name, err)
                message = "UID %s: application `%s` error: TimeoutError" % (
                    request.traceid, name)
                fill_response_in(request, httplib.GATEWAY_TIMEOUT,
                                 httplib.responses[httplib.GATEWAY_TIMEOUT],
                                 message, proxy_error_headers(name))

            except (DisconnectionError, StreamClosedError) as err:
                self.requests_disconnections += 1
                # Probably it's dangerous to retry requests all the time.
                # I must find the way to determine whether it failed during writing
                # or reading a reply. And retry only writing fails.
                request.logger.error("%s: %s", app.id, err)
                if attempts <= 0:
                    request.logger.error("%s: no more attempts", app.id)
                    fill_response_in(
                        request, httplib.INTERNAL_SERVER_ERROR,
                        httplib.responses[httplib.INTERNAL_SERVER_ERROR],
                        "UID %s: Connection problem" % request.traceid,
                        proxy_error_headers(name))
                    return

                # Seems on_close callback is not called in case of connecting through IPVS
                # We detect disconnection here to avoid unnecessary errors.
                # Try to reconnect here and give the request a go
                try:
                    start_time = time.time()
                    reconn_timeout = timeout - request.request_time()
                    request.logger.info("%s: connecting with timeout %.fms",
                                        app.id, reconn_timeout * 1000)
                    yield gen.with_timeout(start_time + reconn_timeout,
                                           app.connect(request.traceid))
                    reconn_time = time.time() - start_time
                    request.logger.info("%s: connecting took %.3fms", app.id,
                                        reconn_time * 1000)
                except Exception as err:
                    if attempts <= 0:
                        # we have no attempts more, so quit here
                        request.logger.error("%s: %s (no attempts left)",
                                             app.id, err)
                        message = "UID %s: application `%s` error: %s" % (
                            request.traceid, name, str(err))
                        fill_response_in(
                            request, httplib.INTERNAL_SERVER_ERROR,
                            httplib.responses[httplib.INTERNAL_SERVER_ERROR],
                            message, proxy_error_headers(name))
                        return

                    request.logger.error(
                        "%s: unable to reconnect: %s (%d attempts left)", err,
                        attempts)
                # We have an attempt to process request again.
                # Jump to the begining of `while attempts > 0`, either we connected successfully
                # or we were failed to connect
                continue

            except ServiceError as err:
                # if the application has been restarted, we get broken pipe code
                # and system category
                if err.category in SYSTEMCATEGORY and err.code == EAPPSTOPPED:
                    request.logger.error(
                        "%s: the application has been restarted", app.id)
                    app.disconnect()
                    continue

                elif err.category in OVERSEERCATEGORY and err.code == EQUEUEISFULL:
                    request.logger.error(
                        "%s: queue is full. Pick another application instance",
                        app.id)
                    app = yield reelect_app_fn(request, app)
                    continue

                request.logger.error("%s: service error: [%d, %d] %s", app.id,
                                     err.category, err.code, err.reason)
                message = "UID %s: application `%s` error: %s" % (
                    request.traceid, name, str(err))
                fill_response_in(
                    request, httplib.INTERNAL_SERVER_ERROR,
                    httplib.responses[httplib.INTERNAL_SERVER_ERROR], message,
                    proxy_error_headers(name))

            except Exception as err:
                request.logger.exception("%s: %s", app.id, err)
                message = "UID %s: unknown `%s` error: %s" % (request.traceid,
                                                              name, str(err))
                fill_response_in(
                    request, httplib.INTERNAL_SERVER_ERROR,
                    httplib.responses[httplib.INTERNAL_SERVER_ERROR], message,
                    proxy_error_headers(name))
            else:
                message = ''.join(body_parts)
                headers['X-Cocaine-Application'] = name
                fill_response_in(request, code,
                                 httplib.responses.get(code, httplib.OK),
                                 message, headers)
            # to return from all errors except Disconnection
            # or receiving a good reply
            return

    @gen.coroutine
    def get_service(self, name, request):
        # cache isn't full for the current application
        if len(self.cache[name]) < self.spool_size:
            logger = request.logger
            try:
                app = Service(name,
                              locator=self.locator,
                              timeout=RESOLVE_TIMEOUT)
                logger.info("%s: creating an instance of %s", app.id, name)
                self.cache[name].append(app)
                yield app.connect(request.traceid)
                logger.info("%s: connect to an app %s endpoint %s ", app.id,
                            app.name, "{0}:{1}".format(*app.address))

                timeout = (1 + random.random()) * self.refresh_period
                self.io_loop.call_later(timeout,
                                        self.move_to_inactive(app, name))
            except Exception as err:
                logger.error("%s: unable to connect to `%s`: %s", app.id, name,
                             err)
                drop_app_from_cache(self.cache, app, name)
                raise gen.Return()
            else:
                raise gen.Return(app)

        # get an instance from cache
        chosen = random.choice(self.cache[name])
        raise gen.Return(chosen)
Пример #20
0
 def __init__(self):
     self.storage = Service("storage")
     self.node = Service("node")
     self.locator = Locator()
Пример #21
0
def test_service_attribute_error():
    io = IOLoop.current()
    locator = Locator([("localhost", 10053)], io_loop=io)
    locator.random_attribute().get()
Пример #22
0
def test_service_attribute_error():
    io = IOLoop.current()
    locator = Locator([("localhost", 10053)], io_loop=io)
    locator.random_attribute().get()
Пример #23
0
class CocaineProxy(object):
    def __init__(self, locators=("localhost:10053",),
                 cache=DEFAULT_SERVICE_CACHE_COUNT,
                 request_id_header="", sticky_header="X-Cocaine-Sticky",
                 forcegen_request_header=False,
                 default_tracing_chance=DEFAULT_TRACING_CHANCE,
                 configuration_service="unicorn",
                 tracing_conf_path="/zipkin_sampling",
                 ioloop=None, **config):
        # stats
        self.requests_in_progress = 0
        self.requests_disconnections = 0
        self.requests_total = 0

        self.io_loop = ioloop or tornado.ioloop.IOLoop.current()
        self.service_cache_count = cache
        self.spool_size = int(self.service_cache_count * 1.5)
        self.refresh_period = config.get("refresh_timeout", DEFAULT_REFRESH_PERIOD)
        self.timeouts = config.get("timeouts", {})
        self.locator_endpoints = [parse_locators_endpoints(i) for i in locators]
        # it's initialized after start
        # to avoid an io_loop creation before fork
        self.locator = Locator(endpoints=self.locator_endpoints)
        # it's used to reply on `ping` method
        self.locator_status = False

        # active applications
        self.cache = collections.defaultdict(list)

        self.logger = logging.getLogger("cocaine.proxy.general")
        self.access_log = logging.getLogger("cocaine.proxy.access")
        self.access_log.propagate = False
        self.logger.info("locators %s",
                         ','.join("%s:%d" % (h, p) for h, p in self.locator_endpoints))

        self.sticky_header = sticky_header

        self.logger.info("conf path in `%s` configuration service: %s",
                         configuration_service, tracing_conf_path)
        self.unicorn = Service(configuration_service, locator=self.locator)
        self.sampled_apps = {}
        self.default_tracing_chance = default_tracing_chance
        self.tracing_conf_path = tracing_conf_path

        self.io_loop.add_future(self.on_sampling_updates(),
                                lambda x: self.logger.error("the sample updater must not exit"))

        if request_id_header:
            self.get_request_id = functools.partial(get_request_id, request_id_header,
                                                    force=forcegen_request_header)
        else:
            self.get_request_id = generate_request_id

        # post the watcher for routing groups
        self.io_loop.add_future(self.on_routing_groups_update(),
                                lambda x: self.logger.error("the updater must not exit"))
        # run infinity check locator health status
        self.locator_health_check()

    @gen.coroutine
    def locator_health_check(self, period=5):
        wait_timeot = datetime.timedelta(seconds=period)
        while True:
            try:
                self.logger.debug("check health status of locator via cluster method")
                channel = yield gen.with_timeout(wait_timeot, self.locator.cluster())
                cluster = yield gen.with_timeout(wait_timeot, channel.rx.get())
                self.locator_status = True
                self.logger.debug("dumped cluster %s", cluster)
                yield gen.sleep(period)
            except Exception as err:
                self.logger.error("health status check failed: %s", err)
                self.locator_status = False
                yield gen.sleep(1)

    @gen.coroutine
    def on_routing_groups_update(self):
        uid = gen_uid()
        self.logger.info("generate new uniqque id %s", uid)
        maximum_timeout = 32  # sec
        timeout = 1  # sec
        while True:
            current = {}
            try:
                self.logger.info("subscribe to updates with id %s", uid)
                channel = yield self.locator.routing(uid, True)
                timeout = 1
                while True:
                    new = yield channel.rx.get()
                    if isinstance(new, EmptyResponse):
                        # it means that the cocaine has been stopped
                        self.logger.error("locator sends close")
                        break
                    updates = scan_for_updates(current, new)
                    # replace current
                    current = new
                    if len(updates) == 0:
                        self.logger.info("locator sends an update message, "
                                         "but no updates have been found")
                        continue

                    self.logger.info("%d routing groups have been refreshed %s",
                                     len(updates), updates)
                    for group in updates:
                        # if we have not created an instance of
                        # the group it is absent in cache
                        if group not in self.cache:
                            self.logger.debug("nothing to update in group %s", group)
                            continue

                        for app in self.cache[group]:
                            self.logger.debug("%s: move %s to the inactive queue to refresh"
                                              " routing group", app.id, app.name)
                            self.migrate_from_cache_to_inactive(app, group)
            except Exception as err:
                timeout = min(timeout << 1, maximum_timeout)
                self.logger.error("error occured while watching for group updates %s. Sleep %d",
                                  err, timeout)
                yield gen.sleep(timeout)

    @gen.coroutine
    def watch_app(self, name, path):
        version = 0
        self.sampled_apps[name] = self.default_tracing_chance
        try:
            self.logger.info("start watching for sampling updates of %s", name)
            watch_channel = yield self.unicorn.subscribe(path, version)
            while True:
                value, version = yield watch_channel.rx.get()
                self.logger.info("got sampling updates for %s: version %d value %.2f", name, version, value)
                try:
                    weight = float(value)
                    self.sampled_apps[name] = weight
                except ValueError as err:
                    self.logger.error("sample value %s for %s can NOT be converted: %s. Use %f",
                                      value, name, err, self.default_tracing_chance)
                    self.sampled_apps[name] = self.default_tracing_chance
        except ServiceError as err:
            # verify that the err is `zookeeper: no node [-101]``
            if err.code != -101:
                self.logger.error("watching of `%s` raised an unexpected service error (cat. %d): %s", name, err.category, err)
        except Exception as err:
            self.logger.error("watching of %s error: %s", name, err)
        finally:
            self.logger.info("stop watching for sampling updates of %s", name)
            self.sampled_apps.pop(name, None)
            try:
                watch_channel.tx.close()
            except Exception:
                pass

    @gen.coroutine
    def on_sampling_updates(self):
        maximum_timeout = 32  # sec
        timeout = 1  # sec
        listing_version = 0

        while True:
            try:
                listing_channel = yield self.unicorn.children_subscribe(self.tracing_conf_path, listing_version)
                while True:
                    listing_version, apps = yield listing_channel.rx.get()
                    self.logger.info("on_sampling_updates: version %d value %s", listing_version, apps)
                    for app in (i for i in apps if i not in self.sampled_apps):
                        self.watch_app(app, self.tracing_conf_path + "/" + app)
            except Exception as err:
                timeout = min(timeout << 1, maximum_timeout)
                listing_version = 0
                self.logger.error("error occured while subscribing for sampling updates %s. Sleep %d",
                                  err, timeout)
                yield gen.sleep(timeout)

    def get_timeout(self, name):
        return self.timeouts.get(name, DEFAULT_TIMEOUT)

    def migrate_from_cache_to_inactive(self, app, name):
        try:
            drop_app_from_cache(self.cache, app, name)
        except Exception as err:
            self.logger.error("app %s %s: drop cache error %s", app, name, err)

        # dispose service after 3 x timeouts
        # assume that all requests will be finished
        self.io_loop.call_later(self.get_timeout(name) * 3,
                                functools.partial(self.dispose, app, name))
        self.logger.info("app %s %s is scheduled to dispose", app, name)

    def move_to_inactive(self, app, name):
        @gen.coroutine
        def wrapper():
            active_apps = len(self.cache[name])
            self.logger.info("%s: preparing to moving %s %s to an inactive queue (active %d)",
                             app.id, app.name, "{0}:{1}".format(*app.address), active_apps)

            try:
                new_app = Service(name, locator=self.locator, timeout=RESOLVE_TIMEOUT)
                self.logger.info("%s: creating an instance of %s", new_app.id, name)
                yield new_app.connect()
                self.logger.info("%s: connect to an app %s endpoint %s ",
                                 new_app.id, new_app.name, "{0}:{1}".format(*new_app.address))
                timeout = (1 + random.random()) * self.refresh_period
                self.io_loop.call_later(timeout, self.move_to_inactive(new_app, name))
                # add to cache only after successfully connected
                self.cache[name].append(new_app)
            except Exception as err:
                self.logger.error("%s: unable to connect to `%s`: %s", new_app.id, name, err)
                # schedule later
                self.io_loop.call_later(self.get_timeout(name), self.move_to_inactive(app, name))
            else:
                self.logger.info("%s: move %s %s to an inactive queue",
                                 app.id, app.name, "{0}:{1}".format(*app.address))
                # current active app will be dropped here
                self.migrate_from_cache_to_inactive(app, name)

        return wrapper

    def dispose(self, app, name):
        self.logger.info("dispose service %s %s", name, app.id)
        app.disconnect()

    @context
    @gen.coroutine
    def __call__(self, request):
        if "X-Cocaine-Service" in request.headers and "X-Cocaine-Event" in request.headers:
            request.logger.debug('dispatch by headers')
            name = request.headers['X-Cocaine-Service']
            event = request.headers['X-Cocaine-Event']
        else:
            request.logger.debug('dispatch by uri')
            match = URL_REGEX.match(request.uri)
            if match is None:
                if request.path == "/ping":
                    if self.locator_status:
                        fill_response_in(request, httplib.OK, "OK", "OK")
                    else:
                        fill_response_in(request, httplib.SERVICE_UNAVAILABLE,
                                         httplib.responses[httplib.SERVICE_UNAVAILABLE],
                                         "Failed", proxy_error_headers())
                else:
                    fill_response_in(request, httplib.NOT_FOUND,
                                     httplib.responses[httplib.NOT_FOUND],
                                     "Invalid url", proxy_error_headers())
                return

            name, event, other = match.groups()
            if name == '' or event == '':
                fill_response_in(request, httplib.BAD_REQUEST,
                                 httplib.responses[httplib.BAD_REQUEST],
                                 "Proxy invalid request", proxy_error_headers())
                return

            # Drop from query appname and event's name
            if not other.startswith('/'):
                other = "/" + other
            request.uri = other
            request.path, _, _ = other.partition("?")

        if getattr(request, "traceid", None) is not None:
            tracing_chance = self.sampled_apps.get(name, self.default_tracing_chance)
            rolled_dice = random.uniform(0, 100)
            request.logger.debug("tracing_chance %f, rolled dice %f", tracing_chance, rolled_dice)
            if tracing_chance < rolled_dice:
                request.logger.info('stop tracing the request')
                request.logger = NULLLOGGER
                request.traceid = None

        if self.sticky_header not in request.headers:
            app = yield self.get_service(name, request)
        else:
            seed = request.headers.get(self.sticky_header)
            request.logger.info('sticky_header has been found: %s', seed)
            app = yield self.get_service_with_seed(name, seed, request)

        if app is None:
            message = "current application %s is unavailable" % name
            fill_response_in(request, NO_SUCH_APP, "No Such Application",
                             message, proxy_error_headers())
            return

        try:
            yield self.process(request, name, app, event, pack_httprequest(request))
        except Exception as err:
            request.logger.error("error during processing request %s", err)
            fill_response_in(request, httplib.INTERNAL_SERVER_ERROR,
                             httplib.responses[httplib.INTERNAL_SERVER_ERROR],
                             "UID %s: %s" % (request.traceid, str(err)), proxy_error_headers())

        request.logger.info("exit from process")

    def info(self):
        return {'services': {'cache': dict(((k, len(v)) for k, v in self.cache.items()))},
                'requests': {'inprogress': self.requests_in_progress,
                             'total': self.requests_total},
                'errors': {'disconnections': self.requests_disconnections},
                'sampling': self.sampled_apps}

    @gen.coroutine
    def process(self, request, name, app, event, data):
        request.logger.info("start processing event `%s` for an app `%s` (appid: %s) after %.3f ms",
                            event, app.name, app.id, request.request_time() * 1000)
        timeout = self.get_timeout(name)
        # allow to reconnect this amount of times.
        attempts = 2  # make it configurable

        parentid = 0

        if request.traceid is not None:
            traceid = int(request.traceid, 16)
            trace = Trace(traceid=traceid, spanid=traceid, parentid=parentid)
        else:
            trace = None

        while attempts > 0:
            headers = {}
            body_parts = []
            attempts -= 1
            try:
                request.logger.debug("%s: enqueue event (attempt %d)", app.id, attempts)
                channel = yield app.enqueue(event, trace=trace)
                request.logger.debug("%s: send event data (attempt %d)", app.id, attempts)
                yield channel.tx.write(msgpack.packb(data), trace=trace)
                yield channel.tx.close(trace=trace)
                request.logger.debug("%s: waiting for a code and headers (attempt %d)",
                                     app.id, attempts)
                code_and_headers = yield channel.rx.get(timeout=timeout)
                request.logger.debug("%s: code and headers have been received (attempt %d)",
                                     app.id, attempts)
                code, raw_headers = msgpack.unpackb(code_and_headers)
                headers = httputil.HTTPHeaders(raw_headers)

                cocaine_http_proto_version = headers.get(X_COCAINE_HTTP_PROTO_VERSION)
                if cocaine_http_proto_version is None or cocaine_http_proto_version == "1.0":
                    cocaine_http_proto_version = "1.0"

                    def stop_condition(body):
                        return isinstance(body, EmptyResponse)
                elif cocaine_http_proto_version == "1.1":
                    def stop_condition(body):
                        return isinstance(body, EmptyResponse) or len(body) == 0
                else:
                    raise Exception("unsupported X-Cocaine-HTTP-Proto-Version: %s" % cocaine_http_proto_version)

                while True:
                    body = yield channel.rx.get(timeout=timeout)
                    if stop_condition(body):
                        request.logger.info("%s: body finished (attempt %d)", app.id, attempts)
                        break

                    request.logger.debug("%s: received %d bytes as a body chunk (attempt %d)",
                                         app.id, len(body), attempts)
                    body_parts.append(body)
            except gen.TimeoutError as err:
                request.logger.error("%s %s:  %s", app.id, name, err)
                message = "UID %s: application `%s` error: TimeoutError" % (request.traceid, name)
                fill_response_in(request, httplib.GATEWAY_TIMEOUT,
                                 httplib.responses[httplib.GATEWAY_TIMEOUT],
                                 message, proxy_error_headers())

            except (DisconnectionError, StreamClosedError) as err:
                self.requests_disconnections += 1
                # Probably it's dangerous to retry requests all the time.
                # I must find the way to determine whether it failed during writing
                # or reading a reply. And retry only writing fails.
                request.logger.error("%s: %s", app.id, err)
                if attempts <= 0:
                    request.logger.error("%s: no more attempts", app.id)
                    fill_response_in(request, httplib.INTERNAL_SERVER_ERROR,
                                     httplib.responses[httplib.INTERNAL_SERVER_ERROR],
                                     "UID %s: Connection problem" % request.traceid,
                                     proxy_error_headers())
                    return

                # Seems on_close callback is not called in case of connecting through IPVS
                # We detect disconnection here to avoid unnecessary errors.
                # Try to reconnect here and give the request a go
                try:
                    start_time = time.time()
                    reconn_timeout = timeout - request.request_time()
                    request.logger.info("%s: connecting with timeout %.fms", app.id, reconn_timeout * 1000)
                    yield gen.with_timeout(start_time + reconn_timeout, app.connect(request.traceid))
                    reconn_time = time.time() - start_time
                    request.logger.info("%s: connecting took %.3fms", app.id, reconn_time * 1000)
                except Exception as err:
                    if attempts <= 0:
                        # we have no attempts more, so quit here
                        request.logger.error("%s: %s (no attempts left)", app.id, err)
                        message = "UID %s: application `%s` error: %s" % (request.traceid, name, str(err))
                        fill_response_in(request, httplib.INTERNAL_SERVER_ERROR,
                                         httplib.responses[httplib.INTERNAL_SERVER_ERROR],
                                         message, proxy_error_headers())
                        return

                    request.logger.error("%s: unable to reconnect: %s (%d attempts left)",
                                         err, attempts)
                # We have an attempt to process request again.
                # Jump to the begining of `while attempts > 0`, either we connected successfully
                # or we were failed to connect
                continue

            except ServiceError as err:
                # if the application has been restarted, we get broken pipe code
                # and system category
                if err.category in SYSTEMCATEGORY and err.code == EAPPSTOPPED:
                    request.logger.error("%s: the application has been restarted", app.id)
                    app.disconnect()
                    continue

                elif err.category in OVERSEERCATEGORY and err.code == EQUEUEISFULL:
                    request.logger.error("%s: queue is full. Pick another application instance", app.id)
                    cache_size = len(self.cache[app.name])
                    if cache_size < self.spool_size:
                        request.logger.info("spool is not full. Create a new application instance")
                        app = yield self.get_service(app.name, request)
                    elif cache_size == 1:
                        # NOTE: if we have spool_size 1, the same app will be picked
                        # Probably we can create a new one and mark the old one inactive
                        request.logger.warning("spool size is limited by 1, cannot pick a new instance of th app. Use the old one")
                        # pass
                    else:
                        request.logger.info("pick a random instance of the application")
                        try:
                            index = self.cache[app.name].index(app)
                            request.logger.info("the app is located in cache at pos %d", index)
                            if cache_size == 2:  # shortcut
                                picked = (index + 1) % 2
                            else:
                                picked = index
                                while picked == index:
                                    picked = random.randint(0, cache_size - 1)

                            request.logger.info("an instance at pos %d has been picked", index)
                            app = self.cache[app.name][picked]
                        except ValueError:
                            app = random.choice(self.cache[app.name])

                    continue

                request.logger.error("%s: service error: [%d, %d] %s", app.id, err.category, err.code, err.reason)
                message = "UID %s: application `%s` error: %s" % (request.traceid, name, str(err))
                fill_response_in(request, httplib.INTERNAL_SERVER_ERROR,
                                 httplib.responses[httplib.INTERNAL_SERVER_ERROR],
                                 message, proxy_error_headers())

            except Exception as err:
                request.logger.error("%s: %s", app.id, err)
                message = "UID %s: unknown `%s` error: %s" % (request.traceid, name, str(err))
                fill_response_in(request, httplib.INTERNAL_SERVER_ERROR,
                                 httplib.responses[httplib.INTERNAL_SERVER_ERROR],
                                 message, proxy_error_headers())
            else:
                message = ''.join(body_parts)
                fill_response_in(request, code,
                                 httplib.responses.get(code, httplib.OK),
                                 message, headers)
            # to return from all errors except Disconnection
            # or receiving a good reply
            return

    @gen.coroutine
    def get_service(self, name, request):
        # cache isn't full for the current application
        if len(self.cache[name]) < self.spool_size:
            logger = request.logger
            try:
                app = Service(name, locator=self.locator, timeout=RESOLVE_TIMEOUT)
                logger.info("%s: creating an instance of %s", app.id, name)
                self.cache[name].append(app)
                yield app.connect(request.traceid)
                logger.info("%s: connect to an app %s endpoint %s ",
                            app.id, app.name, "{0}:{1}".format(*app.address))

                timeout = (1 + random.random()) * self.refresh_period
                self.io_loop.call_later(timeout, self.move_to_inactive(app, name))
            except Exception as err:
                logger.error("%s: unable to connect to `%s`: %s", app.id, name, err)
                drop_app_from_cache(self.cache, app, name)
                raise gen.Return()
            else:
                raise gen.Return(app)

        # get an instance from cache
        chosen = random.choice(self.cache[name])
        raise gen.Return(chosen)

    @gen.coroutine
    def get_service_with_seed(self, name, seed, request):
        logger = request.logger
        app = Service(name, seed=seed, locator=self.locator)
        try:
            logger.info("%s: creating an instance of %s, seed %s", app.id, name, seed)
            yield app.connect(request.traceid)
        except Exception as err:
            logger.error("%s: unable to connect to `%s`: %s", app.id, name, err)
            raise gen.Return()

        raise gen.Return(app)
Пример #24
0
def test_service_attribute_error():
    io = CocaineIO.instance()
    locator = Locator("localhost", 10053, loop=io)
    locator.random_attribute().get()