Exemplo n.º 1
0
def _setup(_halt_event, state):
    log = logging.getLogger("_setup")

    # do the event push client first, because we may need to
    # push an execption event from setup
    state["event-push-client"] = EventPushClient(state["zmq-context"], "data_writer")

    log.info("binding resilient-server to %s" % (_data_writer_address,))
    state["resilient-server"] = ResilientServer(state["zmq-context"], _data_writer_address, state["receive-queue"])
    state["resilient-server"].register(state["pollster"])

    state["queue-dispatcher"] = DequeDispatcher(state, state["receive-queue"], _dispatch_table)

    central_connection = get_central_connection()
    state["cluster-row"] = get_cluster_row(central_connection)
    state["node-rows"] = get_node_rows(central_connection, state["cluster-row"].id)
    central_connection.close()

    state["node-id-dict"] = dict([(node_row.name, node_row.id) for node_row in state["node-rows"]])

    state["database-connection"] = get_node_local_connection()

    # Ticket #1646 mark output value files as closed at startup
    mark_value_files_as_closed(state["database-connection"])

    state["writer"] = Writer(state["database-connection"], _repository_path)

    state["stats-reporter"] = StatsReporter(state)

    state["event-push-client"].info("program-start", "data_writer starts")

    return [
        (state["pollster"].run, time.time()),
        (state["queue-dispatcher"].run, time.time()),
        (state["stats-reporter"].run, state["stats-reporter"].next_run()),
    ]
def _setup(_halt_event, state):
    log = logging.getLogger("_setup")
    status_checkers = list()

    # do the event push client first, because we may need to
    # push an execption event from setup
    state["event-push-client"] = EventPushClient(
        state["zmq-context"],
        "anti_entropy_server"
    )

    state["central-database-connection"] = get_central_connection()
    state["local-database-connection"] = get_node_local_connection()

    state["cluster-row"] = get_cluster_row(
        state["central-database-connection"] 
    )

    local_anti_entropy_server_address = None
    for node_name, address in zip(_node_names, _anti_entropy_server_addresses):
        if node_name == _local_node_name:
            local_anti_entropy_server_address = address
            break
    assert local_anti_entropy_server_address is not None

    log.info("binding resilient-server to %s" % (
        local_anti_entropy_server_address, 
    ))
    state["resilient-server"] = ResilientServer(
        state["zmq-context"],
        local_anti_entropy_server_address,
        state["receive-queue"]
    )
    state["resilient-server"].register(state["pollster"])

    log.info("binding pull-server to %s" % (
        _anti_entropy_server_pipeline_address, 
    ))
    state["pull-server"] = PULLServer(
        state["zmq-context"],
        _anti_entropy_server_pipeline_address,
        state["receive-queue"]
    )
    state["pull-server"].register(state["pollster"])

    state["anti-entropy-clients"] = list()
    for node_name, anti_entropy_server_address in zip(
        _node_names, _anti_entropy_server_addresses
    ):
        resilient_client = ResilientClient(
                state["zmq-context"],
                state["pollster"],
                node_name,
                anti_entropy_server_address,
                _client_tag,
                _anti_entropy_server_pipeline_address
            )
        state["anti-entropy-clients"].append(resilient_client)
        status_checkers.append(
            (resilient_client.run, time.time() + random.random() * 60.0, )
        )        

    state["queue-dispatcher"] = DequeDispatcher(
        state,
        state["receive-queue"],
        _dispatch_table
    )

    state["collection-list-requestor"] = CollectionListRequestor(state)
    state["consistency-check-starter"] = ConsistencyCheckStarter(
        state, _start_consistency_check
    )
    state["retry-manager"] = RetryManager(
        state, _start_consistency_check
    )
    state["state-cleaner"] = StateCleaner(state)

    state["event-push-client"].info(
        "program-start", "anti_entropy_server starts"
    )  

    # start the collection list requestor right away
    # start the consistency check starter a little later, when
    # we presumably have some collection ids
    timer_driven_callbacks = [
        (state["pollster"].run, time.time(), ), 
        (state["queue-dispatcher"].run, time.time(), ), 
        (state["collection-list-requestor"].run, time.time(), ), 
        (state["consistency-check-starter"].run, time.time()+60.0, ), 
        (state["retry-manager"].run, state["retry-manager"].next_run(), ), 
        (state["state-cleaner"].run, state["state-cleaner"].next_run(), ), 
    ] 
    timer_driven_callbacks.extend(status_checkers)
    return timer_driven_callbacks
def _setup(_halt_event, state):
    log = logging.getLogger("_setup")
    status_checkers = list()

    # do the event push client first, because we may need to
    # push an execption event from setup
    state["event-push-client"] = EventPushClient(
        state["zmq-context"],
        "handoff_server"
    )

    central_connection = get_central_connection()
    state["cluster-row"] = get_cluster_row(central_connection)
    state["node-rows"] = get_node_rows(
        central_connection, state["cluster-row"].id
    )
    central_connection.close()

    state["node-id-dict"] = dict(
        [(node_row.name, node_row.id, ) for node_row in state["node-rows"]]
    )
    state["node-name-dict"] = dict(
        [(node_row.id, node_row.name, ) for node_row in state["node-rows"]]
    )

    state["database-connection"] = get_node_local_connection()
    for node_row, handoff_server_address in zip(
        state["node-rows"], _handoff_server_addresses
    ):
        if node_row.name == _local_node_name:
            log.info("binding resilient-server to %s" % (
                handoff_server_address, 
            ))
            state["resilient-server"] = ResilientServer(
                state["zmq-context"],
                handoff_server_address,
                state["receive-queue"]
            )
            state["resilient-server"].register(state["pollster"])
        else:
            handoff_server_client = ResilientClient(
                state["zmq-context"],
                state["pollster"],
                node_row.name,
                handoff_server_address,
                _client_tag,
                _handoff_server_pipeline_address
            )
            state["handoff-server-clients"].append(handoff_server_client)
            # don't run all the status checkers at the same time
            status_checkers.append(
                (handoff_server_client.run, 
                 time.time() + random.random() * 60.0, )
            )        

    log.info("binding pull-server to %s" % (_handoff_server_pipeline_address, ))
    state["pull-server"] = PULLServer(
        state["zmq-context"],
        _handoff_server_pipeline_address,
        state["receive-queue"]
    )
    state["pull-server"].register(state["pollster"])

    for node_row, data_reader_address in zip(
        state["node-rows"], _data_reader_addresses
    ):
        data_reader_client = ResilientClient(
            state["zmq-context"],
            state["pollster"],
            node_row.name,
            data_reader_address,
            _client_tag,
            _handoff_server_pipeline_address
        )
        state["reader-client-dict"][data_reader_client.server_node_name] = \
                data_reader_client
        # don't run all the status checkers at the same time
        status_checkers.append(
            (data_reader_client.run, time.time() + random.random() * 60.0, )
        )        

    for node_row, data_writer_address in zip(
        state["node-rows"], _data_writer_addresses
    ):
        data_writer_client = ResilientClient(
            state["zmq-context"],
            state["pollster"],
            node_row.name,
            data_writer_address,
            _client_tag,
            _handoff_server_pipeline_address
        )
        state["writer-client-dict"][data_writer_client.server_node_name] = \
                data_writer_client
        # don't run all the status checkers at the same time
        status_checkers.append(
            (data_writer_client.run, time.time() + random.random() * 60.0, )
        )        

    state["queue-dispatcher"] = DequeDispatcher(
        state,
        state["receive-queue"],
        _dispatch_table
    )

    state["handoff-requestor"] = HandoffRequestor(state, _local_node_name)
    state["handoff-starter"] = HandoffStarter(
        state, _local_node_name, state["event-push-client"]
    )

    state["event-push-client"].info("program-start", "handoff_server starts")  

    timer_driven_callbacks = [
        (state["handoff-starter"].run, state["handoff-starter"].next_run(), ),
        (state["pollster"].run, time.time(), ), 
        (state["queue-dispatcher"].run, time.time(), ), 
        # try to spread out handoff polling, if all nodes start together
        (state["handoff-requestor"].run,
            time.time() + random.random() * handoff_polling_interval)
    ] 
    timer_driven_callbacks.extend(status_checkers)
    return timer_driven_callbacks
Exemplo n.º 4
0
    def __init__(self):
        self._log = logging.getLogger("WebServer")
        authenticator = SqlAuthenticator()

        self._central_connection = get_central_connection()
        self._cluster_row = get_cluster_row(self._central_connection)
        self._node_local_connection = get_node_local_connection()
        self._unified_id_factory = UnifiedIDFactory(
            self._central_connection,
            _get_shard_id(self._central_connection, self._cluster_row.id)
        )
        self._deliverator = Deliverator()

        self._zeromq_context = zmq.Context()

        self._pull_server = GreenletPULLServer(
            self._zeromq_context, 
            _web_server_pipeline_address,
            self._deliverator
        )
        self._pull_server.link_exception(self._unhandled_greenlet_exception)

        # message sent to data readers and writers telling them the server
        # is (re)starting, thereby invalidating any archvies or retrieved
        # that are in progress for this node
        timestamp = create_timestamp()
        start_message = {
            "message-type"              : "web-server-start",
            "priority"                  : create_priority(),
            "unified-id"                : self._unified_id_factory.next(),
            "timestamp-repr"            : repr(timestamp),
            "source-node-name"          : _local_node_name,
        }

        self._data_writer_clients = list()
        for node_name, address in zip(_node_names, _data_writer_addresses):
            resilient_client = GreenletResilientClient(
                self._zeromq_context, 
                node_name,
                address,
                _client_tag,
                _web_server_pipeline_address,
                self._deliverator,
                connect_messages=[start_message, ]
            )
            resilient_client.link_exception(self._unhandled_greenlet_exception)
            self._data_writer_clients.append(resilient_client)

        self._data_reader_clients = list()
        self._data_readers = list()
        for node_name, address in zip(_node_names, _data_reader_addresses):
            resilient_client = GreenletResilientClient(
                self._zeromq_context, 
                node_name,
                address,
                _client_tag,
                _web_server_pipeline_address,
                self._deliverator,
                connect_messages=[start_message, ]
            )
            resilient_client.link_exception(self._unhandled_greenlet_exception)
            self._data_reader_clients.append(resilient_client)
            data_reader = DataReader(
                node_name, resilient_client
            )
            self._data_readers.append(data_reader)

        self._space_accounting_dealer_client = GreenletDealerClient(
            self._zeromq_context, 
            _local_node_name, 
            _space_accounting_server_address
        )
        self._space_accounting_dealer_client.link_exception(
            self._unhandled_greenlet_exception
        )

        push_client = GreenletPUSHClient(
            self._zeromq_context, 
            _local_node_name, 
            _space_accounting_pipeline_address,
        )

        self._accounting_client = SpaceAccountingClient(
            _local_node_name,
            self._space_accounting_dealer_client,
            push_client
        )

        self._event_push_client = EventPushClient(
            self._zeromq_context,
            "web-server"
        )

        self._watcher = Watcher(
            _stats, 
            self._data_reader_clients,
            self._data_writer_clients,
            self._event_push_client
        )

        id_translator_keys_path = os.path.join(
            _repository_path, "id_translator_keys.pkl"
        )
        with open(id_translator_keys_path, "r") as input_file:
            id_translator_keys = pickle.load(input_file)

        self._id_translator = InternalIDTranslator(
            id_translator_keys["key"],
            id_translator_keys["hmac_key"], 
            id_translator_keys["iv_key"],
            id_translator_keys["hmac_size"]
        )
        self.application = Application(
            self._central_connection,
            self._node_local_connection,
            self._cluster_row,
            self._unified_id_factory,
            self._id_translator,
            self._data_writer_clients,
            self._data_readers,
            authenticator,
            self._accounting_client,
            self._event_push_client,
            _stats
        )
        self.wsgi_server = WSGIServer(
            (_web_server_host, _web_server_port), 
            application=self.application,
            backlog=_wsgi_backlog
        )
    def setUp(self):
        if not hasattr(self, "_log"):
            self._log = logging.getLogger("TestHandoffServer")

        self.tearDown()
        database_connection = get_central_connection()
        cluster_row = get_cluster_row(database_connection)
        node_rows = get_node_rows(database_connection, cluster_row.id)
        database_connection.close()

        self._key_generator = generate_key()

        self._event_publisher_processes = list()
        self._data_writer_processes = list()
        self._data_reader_processes = list()
        self._handoff_server_processes = list()

        for i in xrange(_node_count):
            node_name = _generate_node_name(i)
            repository_path = _repository_path(node_name)
            os.makedirs(repository_path)
            
            process = start_event_publisher(
                node_name, 
                _event_publisher_pull_addresses[i],
                _event_publisher_pub_addresses[i]
            )
            poll_result = poll_process(process)
            self.assertEqual(poll_result, None)
            self._event_publisher_processes.append(process)
            time.sleep(1.0)

            process = start_data_writer(
                _cluster_name,
                node_name, 
                _data_writer_addresses[i],
                _event_publisher_pull_addresses[i],
                repository_path
            )
            poll_result = poll_process(process)
            self.assertEqual(poll_result, None)
            self._data_writer_processes.append(process)
            time.sleep(1.0)

            process = start_data_reader(
                node_name, 
                _data_reader_addresses[i],
                _event_publisher_pull_addresses[i], 
                repository_path
            )
            poll_result = poll_process(process)
            self.assertEqual(poll_result, None)
            self._data_reader_processes.append(process)
            time.sleep(1.0)

            process = start_handoff_server(
                _cluster_name,
                node_name, 
                _handoff_server_addresses,
                _handoff_server_pipeline_addresses[i],
                _data_reader_addresses,
                _data_writer_addresses,
                _event_publisher_pull_addresses[i], 
                _repository_path(node_name)
            )
            poll_result = poll_process(process)
            self.assertEqual(poll_result, None)
            self._handoff_server_processes.append(process)
            time.sleep(1.0)

        self._context = zmq.context.Context()
        self._pollster = GreenletZeroMQPollster()
        self._deliverator = Deliverator()

        self._pull_server = GreenletPULLServer(
            self._context, 
            _client_address,
            self._deliverator
        )
        self._pull_server.register(self._pollster)

        backup_nodes = random.sample(node_rows[1:], 2)
        self._log.debug("backup nodes = %s" % (
            [n.name for n in backup_nodes], 
        ))

        self._resilient_clients = list()        
        for node_row, address in zip(node_rows, _data_writer_addresses):
            if not node_row in backup_nodes:
                continue
            resilient_client = GreenletResilientClient(
                self._context,
                self._pollster,
                node_row.name,
                address,
                _local_node_name,
                _client_address,
                self._deliverator,
            )
            self._resilient_clients.append(resilient_client)
        self._log.debug("%s resilient clients" % (
            len(self._resilient_clients), 
        ))

        self._data_writer_handoff_client = DataWriterHandoffClient(
            node_rows[0].name,
            self._resilient_clients
        )

        self._pollster.start()