def setUp(self): self.tearDown() os.makedirs(_repository_path) self._key_generator = generate_key() self._database_connection = get_node_local_connection() self._event_publisher_process = start_event_publisher( _local_node_name, _event_publisher_pull_address, _event_publisher_pub_address ) poll_result = poll_process(self._event_publisher_process) self.assertEqual(poll_result, None) self._data_writer_process = start_data_writer( _cluster_name, _local_node_name, _data_writer_address, _event_publisher_pull_address, _repository_path ) poll_result = poll_process(self._data_writer_process) self.assertEqual(poll_result, None) self._data_reader_process = start_data_reader( _local_node_name, _data_reader_address, _event_publisher_pull_address, _repository_path ) poll_result = poll_process(self._data_reader_process) self.assertEqual(poll_result, None)
def __init__(self, halt_event, node_id_dict, message_queue, push_client): Thread.__init__(self, name="WriterThread") self._halt_event = halt_event self._node_id_dict = node_id_dict self._message_queue = message_queue self._database_connection = get_node_local_connection() self._active_segments = dict() self._completions = list() self._writer = None self._reply_pusher = push_client self._dispatch_table = { "archive-key-entire" : self._handle_archive_key_entire, "archive-key-start" : self._handle_archive_key_start, "archive-key-next" : self._handle_archive_key_next, "archive-key-final" : self._handle_archive_key_final, "archive-key-cancel" : self._handle_archive_key_cancel, "destroy-key" : self._handle_destroy_key, "start-conjoined-archive" : self._handle_start_conjoined_archive, "abort-conjoined-archive" : self._handle_abort_conjoined_archive, "finish-conjoined-archive" : self._handle_finish_conjoined_archive, "web-writer-start" : self._handle_web_writer_start, "sync-value-file" : self._handle_sync_value_file, }
def _volume_name_by_space_id(): """ The control process creates a pool of worker processes of configurable size (default 2) for each distinct file space. However, if multiple file spaces have the same "volume name" value, then one worker process pool handles read requests to all of the file spaces with that same volume name. In other words, there will be a pool of workers for each non null volume name. Null values are never the same as other null values, so if no volume names are specified for the table spaces, there will be one read worker pool per file space. So we assign a volume name to each space_id, creating a 'null-nn' name if volume is null """ connection = get_node_local_connection() file_space_info = load_file_space_info(connection) connection.close() file_space_sanity_check(file_space_info, _repository_path) volume_name_by_space_id = dict() null_count = 0 for file_space_row_list in file_space_info.values(): for file_space_row in file_space_row_list: if file_space_row.volume is None: null_count += 1 volume_name = "null-{0}".format(null_count) else: volume_name = file_space_row.volume volume_name_by_space_id[file_space_row.space_id] = volume_name return volume_name_by_space_id
def main(): """ main entry point return 0 for success (exit code) """ initialize_logging(_log_path) log = logging.getLogger("main") log.info("program starts") options = get_options() try: connection = get_node_local_connection() except Exception as instance: log.exception("Exception connecting to database {0}".format(instance)) return -1 zmq_context = zmq.Context() event_push_client = EventPushClient(zmq_context, "garbage_collector") event_push_client.info("program-start", "garbage_collector starts") return_code = 0 collectable_segment_ids = io.StringIO() partition_count = 0 collectable_count = 0 try: versioned_collections = get_versioned_collections() for partition in generate_candidate_partitions(connection): partition_count += 1 versioned_collection = \ partition[0].collection_id in versioned_collections count = _evaluate_partition(collectable_segment_ids, partition, versioned_collection) collectable_count += count archive_collectable_segment_rows(connection, collectable_segment_ids, options.max_node_offline_time) collectable_segment_ids.close() except Exception: log.exception("_garbage_collection") return_code = -2 else: log.info( "found {0:,} candidates, collected {1:,} segments".format( partition_count, collectable_count ) ) log.info("program terminates normally") connection.close() event_push_client.close() zmq_context.term() return return_code
def main(): """ main entry point return 0 for success (exit code) """ initialize_logging(_log_path) log = logging.getLogger("main") log.info("program starts") options = get_options() try: connection = get_node_local_connection() except Exception as instance: log.exception("Exception connecting to database {0}".format(instance)) return -1 zmq_context = zmq.Context() event_push_client = EventPushClient(zmq_context, "garbage_collector") event_push_client.info("program-start", "garbage_collector starts") return_code = 0 collectable_segment_ids = io.StringIO() partition_count = 0 collectable_count = 0 try: versioned_collections = get_versioned_collections() for partition in generate_candidate_partitions(connection): partition_count += 1 versioned_collection = \ partition[0].collection_id in versioned_collections count = _evaluate_partition(collectable_segment_ids, partition, versioned_collection) collectable_count += count archive_collectable_segment_rows(connection, collectable_segment_ids, options.max_node_offline_time) collectable_segment_ids.close() except Exception: log.exception("_garbage_collection") return_code = -2 else: log.info("found {0:,} candidates, collected {1:,} segments".format( partition_count, collectable_count)) log.info("program terminates normally") connection.close() event_push_client.close() zmq_context.term() return return_code
def main(): """ main entry point return 0 for success (exit code) """ initialize_logging(_log_path) log = logging.getLogger("main") log.info("program starts") options = get_options() try: connection = get_node_local_connection() except Exception as instance: log.exception("Exception connecting to database") return -1 zmq_context = zmq.Context() event_push_client = EventPushClient(zmq_context, "garbage_collector") event_push_client.info("program-start", "garbage_collector starts") return_code = 0 try: total_unused_value_file_size = unlink_totally_unused_value_files( connection, _repository_path) unreachable_value_file_size = unlink_unreachable_value_files( connection, _repository_path) ref_generator = generate_value_file_references(options, connection) savings = rewrite_value_files( options, connection, _repository_path, ref_generator) except Exception: log.exception("_garbage_collection") return_code = -2 else: log.info("program terminates normally") event_push_client.info( "rewrite complete", "garbage_collector finished", unused_value_file_bytes_reclaimed=total_unused_value_file_size, unreachable_value_file_bytes_reclaimed=unreachable_value_file_size, rewrite_value_file_savings=savings ) connection.close() event_push_client.close() zmq_context.term() return return_code
def main(): """ main entry point return 0 for success (exit code) """ initialize_logging(_log_path) log = logging.getLogger("main") log.info("program starts") options = get_options() try: connection = get_node_local_connection() except Exception as instance: log.exception("Exception connecting to database") return -1 zmq_context = zmq.Context() event_push_client = EventPushClient(zmq_context, "garbage_collector") event_push_client.info("program-start", "garbage_collector starts") return_code = 0 try: total_unused_value_file_size = unlink_totally_unused_value_files( connection, _repository_path) unreachable_value_file_size = unlink_unreachable_value_files( connection, _repository_path) ref_generator = generate_value_file_references(options, connection) savings = rewrite_value_files(options, connection, _repository_path, ref_generator) except Exception: log.exception("_garbage_collection") return_code = -2 else: log.info("program terminates normally") event_push_client.info( "rewrite complete", "garbage_collector finished", unused_value_file_bytes_reclaimed=total_unused_value_file_size, unreachable_value_file_bytes_reclaimed=unreachable_value_file_size, rewrite_value_file_savings=savings) connection.close() event_push_client.close() zmq_context.term() return return_code
def delete_all_motoboto_test_segments(): central_conn = get_central_connection() local_conn = get_node_local_connection() collection_id_rows = central_conn.fetch_all_rows(_test_collections_query, []) central_conn.close() local_conn.begin_transaction() local_conn.execute("create temp table tmp_motoboto_collection_ids (id int4 not null)", []) for row in collection_id_rows: local_conn.execute("insert into tmp_motoboto_collection_ids values (%s)", row) for query in _delete_test_collections_data: rowcount = local_conn.execute(query, []) if rowcount: print "Deleted %s via %s" % (rowcount, query.split("\n", 1)[0]) local_conn.commit()
def _setup(_halt_event, state): log = logging.getLogger("_setup") # do the event push client first, because we may need to # push an execption event from setup state["event-push-client"] = EventPushClient( state["zmq-context"], "data_reader" ) log.info("binding resilient-server to %s" % (_data_reader_address, )) state["resilient-server"] = ResilientServer( state["zmq-context"], _data_reader_address, state["receive-queue"] ) state["resilient-server"].register(state["pollster"]) state["queue-dispatcher"] = DequeDispatcher( state, state["receive-queue"], _dispatch_table ) state["state-cleaner"] = StateCleaner(state) state["database-connection"] = get_node_local_connection() state["reader"] = Reader( state["database-connection"], _repository_path ) state["stats-reporter"] = StatsReporter(state) state["event-push-client"].info("program-start", "data_reader starts") return [ (state["pollster"].run, time.time(), ), (state["queue-dispatcher"].run, time.time(), ), (state["state-cleaner"].run, state["state-cleaner"].next_run(), ), (state["stats-reporter"].run, state["stats-reporter"].next_run(), ), ]
def delete_all_motoboto_test_segments(): central_conn = get_central_connection() local_conn = get_node_local_connection() collection_id_rows = central_conn.fetch_all_rows(_test_collections_query, []) central_conn.close() local_conn.begin_transaction() local_conn.execute( "create temp table tmp_motoboto_collection_ids (id int4 not null)", []) for row in collection_id_rows: local_conn.execute( "insert into tmp_motoboto_collection_ids values (%s)", row) for query in _delete_test_collections_data: rowcount = local_conn.execute(query, []) if rowcount: print "Deleted %s via %s" % ( rowcount, query.split("\n", 1)[0], ) local_conn.commit()
def _setup(_halt_event, state): log = logging.getLogger("_setup") # do the event push client first, because we may need to # push an execption event from setup state["event-push-client"] = EventPushClient(state["zmq-context"], "data_writer") log.info("binding resilient-server to %s" % (_data_writer_address,)) state["resilient-server"] = ResilientServer(state["zmq-context"], _data_writer_address, state["receive-queue"]) state["resilient-server"].register(state["pollster"]) state["queue-dispatcher"] = DequeDispatcher(state, state["receive-queue"], _dispatch_table) central_connection = get_central_connection() state["cluster-row"] = get_cluster_row(central_connection) state["node-rows"] = get_node_rows(central_connection, state["cluster-row"].id) central_connection.close() state["node-id-dict"] = dict([(node_row.name, node_row.id) for node_row in state["node-rows"]]) state["database-connection"] = get_node_local_connection() # Ticket #1646 mark output value files as closed at startup mark_value_files_as_closed(state["database-connection"]) state["writer"] = Writer(state["database-connection"], _repository_path) state["stats-reporter"] = StatsReporter(state) state["event-push-client"].info("program-start", "data_writer starts") return [ (state["pollster"].run, time.time()), (state["queue-dispatcher"].run, time.time()), (state["stats-reporter"].run, state["stats-reporter"].next_run()), ]
def __init__(self, halt_event, node_id_dict, message_queue, push_client): Thread.__init__(self, name="WriterThread") self._halt_event = halt_event self._node_id_dict = node_id_dict self._message_queue = message_queue self._database_connection = get_node_local_connection() self._active_segments = dict() self._completions = list() self._writer = None self._reply_pusher = push_client self._dispatch_table = { "archive-key-entire": self._handle_archive_key_entire, "archive-key-start": self._handle_archive_key_start, "archive-key-next": self._handle_archive_key_next, "archive-key-final": self._handle_archive_key_final, "archive-key-cancel": self._handle_archive_key_cancel, "destroy-key": self._handle_destroy_key, "start-conjoined-archive": self._handle_start_conjoined_archive, "abort-conjoined-archive": self._handle_abort_conjoined_archive, "finish-conjoined-archive": self._handle_finish_conjoined_archive, "web-writer-start": self._handle_web_writer_start, "sync-value-file": self._handle_sync_value_file, }
def setUp(self): self.tearDown() os.makedirs(_repository_path) self._key_generator = generate_key() self._database_connection = get_node_local_connection() self._event_publisher_process = start_event_publisher( _local_node_name, _event_publisher_pull_address, _event_publisher_pub_address) poll_result = poll_process(self._event_publisher_process) self.assertEqual(poll_result, None) self._data_writer_process = start_data_writer( _cluster_name, _local_node_name, _data_writer_address, _event_publisher_pull_address, _repository_path) poll_result = poll_process(self._data_writer_process) self.assertEqual(poll_result, None) self._data_reader_process = start_data_reader( _local_node_name, _data_reader_address, _event_publisher_pull_address, _repository_path) poll_result = poll_process(self._data_reader_process) self.assertEqual(poll_result, None)
def main(): """ main entry point return 0 for success (exit code) """ global _max_value_file_time initialize_logging(_log_path) log = logging.getLogger("main") try: _max_value_file_time = parse_timedelta_str(_max_value_file_time_str) except Exception as instance: log.exception("Unable to parse '{0}' {1}".format( _max_value_file_time_str, instance)) return -1 log.info("program starts; max_value_file_time = {0}".format( _max_value_file_time)) zmq_context = zmq.Context() event_push_client = EventPushClient(zmq_context, "node_inspector") event_push_client.info("program-start", "node_inspector starts") try: connection = get_node_local_connection() except Exception as instance: log.exception("Exception connecting to database {0}".format(instance)) event_push_client.exception( unhandled_exception_topic, str(instance), exctype=instance.__class__.__name__ ) return -1 known_value_files = dict() connection.begin_transaction() try: for batch in generate_work(connection): _process_work_batch(connection, known_value_files, batch) except Exception as instance: connection.rollback() log.exception("Exception processing batch {0} {1}".format( batch, instance)) event_push_client.exception( unhandled_exception_topic, str(instance), exctype=instance.__class__.__name__ ) return -1 else: connection.commit() finally: connection.close() event_push_client.close() zmq_context.term() log.info("program terminates normally") return 0
def generate_work(connection): """ generate batches for inspection """ prev_key = None batch = list() for raw_entry in connection.generate_all_rows(_work_query, []): entry = _entry_template._make(raw_entry) batch_key = make_batch_key(entry) if prev_key is None: prev_key = batch_key if batch_key != prev_key: yield batch batch = list() prev_key = batch_key batch.append(entry) if len(batch) is not None: yield batch if __name__ == "__main__": """ test the generator independantly """ from tools.database_connection import get_node_local_connection connection = get_node_local_connection() for entry in generate_work(connection): print(entry)
def setUp(self): self._connection = get_node_local_connection() _clear_test_data(self._connection)
def __init__(self): self._log = logging.getLogger("WebInternalReader") memcached_client = memcache.Client(_memcached_nodes) self._central_connection = get_central_connection() self._cluster_row = get_cluster_row(self._central_connection) self._node_local_connection = get_node_local_connection() self._deliverator = Deliverator() self._zeromq_context = zmq.Context() self._pull_server = GreenletPULLServer( self._zeromq_context, _web_internal_reader_pipeline_address, self._deliverator ) self._pull_server.link_exception(self._unhandled_greenlet_exception) self._data_reader_clients = list() self._data_readers = list() for node_name, address in zip(_node_names, _data_reader_addresses): resilient_client = GreenletResilientClient( self._zeromq_context, node_name, address, _client_tag, _web_internal_reader_pipeline_address, self._deliverator, connect_messages=[] ) resilient_client.link_exception(self._unhandled_greenlet_exception) self._data_reader_clients.append(resilient_client) data_reader = DataReader( node_name, resilient_client ) self._data_readers.append(data_reader) self._space_accounting_dealer_client = GreenletDealerClient( self._zeromq_context, _local_node_name, _space_accounting_server_address ) self._space_accounting_dealer_client.link_exception( self._unhandled_greenlet_exception ) push_client = GreenletPUSHClient( self._zeromq_context, _local_node_name, _space_accounting_pipeline_address, ) self._accounting_client = SpaceAccountingClient( _local_node_name, self._space_accounting_dealer_client, push_client ) self._event_push_client = EventPushClient( self._zeromq_context, "web-internal-reader" ) # message sent to data readers telling them the server # is (re)starting, thereby invalidating any archvies or retrieved # that are in progress for this node timestamp = create_timestamp() self._event_push_client.info("web-reader-start", "web reader (re)start", timestamp_repr=repr(timestamp), source_node_name=_local_node_name) self._watcher = Watcher( _stats, self._data_reader_clients, self._event_push_client ) self.application = Application( memcached_client, self._central_connection, self._node_local_connection, self._cluster_row, self._data_readers, self._accounting_client, self._event_push_client, _stats ) self.wsgi_server = WSGIServer( (_web_internal_reader_host, _web_internal_reader_port), application=self.application, backlog=_wsgi_backlog )
def __init__(self): self._log = logging.getLogger("WebServer") authenticator = SqlAuthenticator() self._central_connection = get_central_connection() self._cluster_row = get_cluster_row(self._central_connection) self._node_local_connection = get_node_local_connection() self._unified_id_factory = UnifiedIDFactory( self._central_connection, _get_shard_id(self._central_connection, self._cluster_row.id) ) self._deliverator = Deliverator() self._zeromq_context = zmq.Context() self._pull_server = GreenletPULLServer( self._zeromq_context, _web_server_pipeline_address, self._deliverator ) self._pull_server.link_exception(self._unhandled_greenlet_exception) # message sent to data readers and writers telling them the server # is (re)starting, thereby invalidating any archvies or retrieved # that are in progress for this node timestamp = create_timestamp() start_message = { "message-type" : "web-server-start", "priority" : create_priority(), "unified-id" : self._unified_id_factory.next(), "timestamp-repr" : repr(timestamp), "source-node-name" : _local_node_name, } self._data_writer_clients = list() for node_name, address in zip(_node_names, _data_writer_addresses): resilient_client = GreenletResilientClient( self._zeromq_context, node_name, address, _client_tag, _web_server_pipeline_address, self._deliverator, connect_messages=[start_message, ] ) resilient_client.link_exception(self._unhandled_greenlet_exception) self._data_writer_clients.append(resilient_client) self._data_reader_clients = list() self._data_readers = list() for node_name, address in zip(_node_names, _data_reader_addresses): resilient_client = GreenletResilientClient( self._zeromq_context, node_name, address, _client_tag, _web_server_pipeline_address, self._deliverator, connect_messages=[start_message, ] ) resilient_client.link_exception(self._unhandled_greenlet_exception) self._data_reader_clients.append(resilient_client) data_reader = DataReader( node_name, resilient_client ) self._data_readers.append(data_reader) self._space_accounting_dealer_client = GreenletDealerClient( self._zeromq_context, _local_node_name, _space_accounting_server_address ) self._space_accounting_dealer_client.link_exception( self._unhandled_greenlet_exception ) push_client = GreenletPUSHClient( self._zeromq_context, _local_node_name, _space_accounting_pipeline_address, ) self._accounting_client = SpaceAccountingClient( _local_node_name, self._space_accounting_dealer_client, push_client ) self._event_push_client = EventPushClient( self._zeromq_context, "web-server" ) self._watcher = Watcher( _stats, self._data_reader_clients, self._data_writer_clients, self._event_push_client ) id_translator_keys_path = os.path.join( _repository_path, "id_translator_keys.pkl" ) with open(id_translator_keys_path, "r") as input_file: id_translator_keys = pickle.load(input_file) self._id_translator = InternalIDTranslator( id_translator_keys["key"], id_translator_keys["hmac_key"], id_translator_keys["iv_key"], id_translator_keys["hmac_size"] ) self.application = Application( self._central_connection, self._node_local_connection, self._cluster_row, self._unified_id_factory, self._id_translator, self._data_writer_clients, self._data_readers, authenticator, self._accounting_client, self._event_push_client, _stats ) self.wsgi_server = WSGIServer( (_web_server_host, _web_server_port), application=self.application, backlog=_wsgi_backlog )
def main(): """ main entry point return 0 for success (exit code) """ initialize_logging(_log_path) log = logging.getLogger("main") log.info("program starts") halt_event = Event() signal.signal(signal.SIGTERM, _create_signal_handler(halt_event)) zmq_context = zmq.Context() event_push_client = EventPushClient(zmq_context, "defragger") event_push_client.info("program-start", "defragger starts") connection = None while not halt_event.is_set(): # if we don't have an open database connection, get one if connection is None: try: connection = get_node_local_connection() except Exception as instance: exctype, value = sys.exc_info()[:2] event_push_client.exception( "database exception", str(value), exctype=exctype.__name__ ) log.exception("Exception connecting to database") halt_event.wait(_database_retry_interval) continue # start a transaction connection.execute("begin") # try one defrag pass bytes_defragged = 0 try: bytes_defragged = _defrag_pass(connection, event_push_client) except KeyboardInterrupt: halt_event.set() connection.rollback() except Exception as instance: log.exception(str(instance)) event_push_client.exception( unhandled_exception_topic, str(instance), exctype=instance.__class__.__name__ ) connection.rollback() else: connection.commit() log.info("bytes defragged = {0:,}".format(bytes_defragged)) # if we didn't do anything on this pass... if bytes_defragged == 0: # close the database connection if connection is not None: connection.close() connection = None # wait and try again try: halt_event.wait(_defrag_check_interval) except KeyboardInterrupt: halt_event.set() if connection is not None: connection.close() event_push_client.close() zmq_context.term() log.info("program terminates normally") return 0
def __init__(self): self._log = logging.getLogger("WebInternalReader") memcached_client = memcache.Client(_memcached_nodes) self._central_connection = get_central_connection() self._cluster_row = get_cluster_row(self._central_connection) self._node_local_connection = get_node_local_connection() self._deliverator = Deliverator() self._zeromq_context = zmq.Context() self._pull_server = GreenletPULLServer( self._zeromq_context, _web_internal_reader_pipeline_address, self._deliverator) self._pull_server.link_exception(self._unhandled_greenlet_exception) self._data_reader_clients = list() self._data_readers = list() for node_name, address in zip(_node_names, _data_reader_addresses): resilient_client = GreenletResilientClient( self._zeromq_context, node_name, address, _client_tag, _web_internal_reader_pipeline_address, self._deliverator, connect_messages=[]) resilient_client.link_exception(self._unhandled_greenlet_exception) self._data_reader_clients.append(resilient_client) data_reader = DataReader(node_name, resilient_client) self._data_readers.append(data_reader) self._space_accounting_dealer_client = GreenletDealerClient( self._zeromq_context, _local_node_name, _space_accounting_server_address) self._space_accounting_dealer_client.link_exception( self._unhandled_greenlet_exception) push_client = GreenletPUSHClient( self._zeromq_context, _local_node_name, _space_accounting_pipeline_address, ) self._accounting_client = SpaceAccountingClient( _local_node_name, self._space_accounting_dealer_client, push_client) self._event_push_client = EventPushClient(self._zeromq_context, "web-internal-reader") # message sent to data readers telling them the server # is (re)starting, thereby invalidating any archvies or retrieved # that are in progress for this node timestamp = create_timestamp() self._event_push_client.info("web-reader-start", "web reader (re)start", timestamp_repr=repr(timestamp), source_node_name=_local_node_name) self._watcher = Watcher(_stats, self._data_reader_clients, self._event_push_client) self.application = Application(memcached_client, self._central_connection, self._node_local_connection, self._cluster_row, self._data_readers, self._accounting_client, self._event_push_client, _stats) self.wsgi_server = WSGIServer( (_web_internal_reader_host, _web_internal_reader_port), application=self.application, backlog=_wsgi_backlog)
def main(): """ main entry point return 0 for success (exit code) """ initialize_logging(_log_path) log = logging.getLogger("main") log.info("program starts") halt_event = Event() set_signal_handler(halt_event) zmq_context = zmq.Context() event_push_client = EventPushClient(zmq_context, "defragger") event_push_client.info("program-start", "defragger starts") connection = None file_space_info = None while not halt_event.is_set(): # if we don't have an open database connection, get one if connection is None: try: connection = get_node_local_connection() except Exception as instance: exctype, value = sys.exc_info()[:2] event_push_client.exception( "database exception", str(value), exctype=exctype.__name__ ) log.exception("Exception connecting to database") halt_event.wait(_database_retry_interval) continue file_space_info = load_file_space_info(connection) file_space_sanity_check(file_space_info, _repository_path) # try one defrag pass bytes_defragged = 0 connection.begin_transaction() try: bytes_defragged = _defrag_pass(connection, file_space_info, event_push_client) except KeyboardInterrupt: halt_event.set() connection.rollback() except Exception as instance: log.exception(str(instance)) event_push_client.exception( unhandled_exception_topic, str(instance), exctype=instance.__class__.__name__ ) connection.rollback() else: connection.commit() log.info("bytes defragged = {0:,}".format(bytes_defragged)) # if we didn't do anything on this pass... if bytes_defragged == 0: # exit if we're done and asked to do single pass if int(os.environ.get('NIMBUSIO_EXIT_WHEN_DONE', '0')): halt_event.set() # close the database connection if connection is not None: connection.close() connection = None # wait and try again try: halt_event.wait(_defrag_check_interval) except KeyboardInterrupt: halt_event.set() if connection is not None: connection.close() event_push_client.close() zmq_context.term() log.info("program terminates normally") return 0
def setUp(self): self.tearDown() self._connection = get_node_local_connection()
return (entry.unified_id, entry.conjoined_part, entry.segment_num, ) def generate_work(connection): """ generate batches for inspection """ prev_key = None batch = list() for raw_entry in connection.generate_all_rows(_work_query, []): entry = _entry_template._make(raw_entry) batch_key = make_batch_key(entry) if prev_key is None: prev_key = batch_key if batch_key != prev_key: yield batch batch = list() prev_key = batch_key batch.append(entry) if len(batch) is not None: yield batch if __name__ == "__main__": """ test the generator independantly """ from tools.database_connection import get_node_local_connection connection = get_node_local_connection() for entry in generate_work(connection): print(entry)
def setUp(self): self.tearDown() os.makedirs(_test_dir) self._database_connection = get_node_local_connection()
def _setup(_halt_event, state): log = logging.getLogger("_setup") status_checkers = list() # do the event push client first, because we may need to # push an execption event from setup state["event-push-client"] = EventPushClient( state["zmq-context"], "handoff_server" ) central_connection = get_central_connection() state["cluster-row"] = get_cluster_row(central_connection) state["node-rows"] = get_node_rows( central_connection, state["cluster-row"].id ) central_connection.close() state["node-id-dict"] = dict( [(node_row.name, node_row.id, ) for node_row in state["node-rows"]] ) state["node-name-dict"] = dict( [(node_row.id, node_row.name, ) for node_row in state["node-rows"]] ) state["database-connection"] = get_node_local_connection() for node_row, handoff_server_address in zip( state["node-rows"], _handoff_server_addresses ): if node_row.name == _local_node_name: log.info("binding resilient-server to %s" % ( handoff_server_address, )) state["resilient-server"] = ResilientServer( state["zmq-context"], handoff_server_address, state["receive-queue"] ) state["resilient-server"].register(state["pollster"]) else: handoff_server_client = ResilientClient( state["zmq-context"], state["pollster"], node_row.name, handoff_server_address, _client_tag, _handoff_server_pipeline_address ) state["handoff-server-clients"].append(handoff_server_client) # don't run all the status checkers at the same time status_checkers.append( (handoff_server_client.run, time.time() + random.random() * 60.0, ) ) log.info("binding pull-server to %s" % (_handoff_server_pipeline_address, )) state["pull-server"] = PULLServer( state["zmq-context"], _handoff_server_pipeline_address, state["receive-queue"] ) state["pull-server"].register(state["pollster"]) for node_row, data_reader_address in zip( state["node-rows"], _data_reader_addresses ): data_reader_client = ResilientClient( state["zmq-context"], state["pollster"], node_row.name, data_reader_address, _client_tag, _handoff_server_pipeline_address ) state["reader-client-dict"][data_reader_client.server_node_name] = \ data_reader_client # don't run all the status checkers at the same time status_checkers.append( (data_reader_client.run, time.time() + random.random() * 60.0, ) ) for node_row, data_writer_address in zip( state["node-rows"], _data_writer_addresses ): data_writer_client = ResilientClient( state["zmq-context"], state["pollster"], node_row.name, data_writer_address, _client_tag, _handoff_server_pipeline_address ) state["writer-client-dict"][data_writer_client.server_node_name] = \ data_writer_client # don't run all the status checkers at the same time status_checkers.append( (data_writer_client.run, time.time() + random.random() * 60.0, ) ) state["queue-dispatcher"] = DequeDispatcher( state, state["receive-queue"], _dispatch_table ) state["handoff-requestor"] = HandoffRequestor(state, _local_node_name) state["handoff-starter"] = HandoffStarter( state, _local_node_name, state["event-push-client"] ) state["event-push-client"].info("program-start", "handoff_server starts") timer_driven_callbacks = [ (state["handoff-starter"].run, state["handoff-starter"].next_run(), ), (state["pollster"].run, time.time(), ), (state["queue-dispatcher"].run, time.time(), ), # try to spread out handoff polling, if all nodes start together (state["handoff-requestor"].run, time.time() + random.random() * handoff_polling_interval) ] timer_driven_callbacks.extend(status_checkers) return timer_driven_callbacks
def main(): """ main entry point returns 0 for normal termination (usually SIGTERM) """ return_value = 0 worker_number = int(sys.argv[1]) log_path = _log_path_template.format(os.environ["NIMBUSIO_LOG_DIR"], worker_number, _local_node_name) initialize_logging(log_path) log = logging.getLogger("main") log.info("program starts") halt_event = Event() set_signal_handler(halt_event) zeromq_context = zmq.Context() event_source_name = "rs_dbpool_worker_{0}".format(worker_number) event_push_client = EventPushClient(zeromq_context, event_source_name) dealer_socket = zeromq_context.socket(zmq.DEALER) dealer_socket.setsockopt(zmq.LINGER, 1000) log.debug("connecting to {0}".format(db_controller_router_socket_uri)) dealer_socket.connect(db_controller_router_socket_uri) log.debug("opening local database connection") database_connection = get_node_local_connection() try: _send_initial_work_request(dealer_socket) while not halt_event.is_set(): _process_one_transaction(dealer_socket, database_connection, event_push_client) except InterruptedSystemCall: if halt_event.is_set(): log.info("program teminates normally with interrupted system call") else: log.exception("zeromq error processing request") event_push_client.exception(unhandled_exception_topic, "Interrupted zeromq system call", exctype="InterruptedSystemCall") return_value = 1 except Exception as instance: log.exception("error processing request") event_push_client.exception(unhandled_exception_topic, str(instance), exctype=instance.__class__.__name__) return_value = 1 else: log.info("program teminates normally") finally: database_connection.close() dealer_socket.close() event_push_client.close() zeromq_context.term() return return_value
def _setup(_halt_event, state): log = logging.getLogger("_setup") status_checkers = list() # do the event push client first, because we may need to # push an execption event from setup state["event-push-client"] = EventPushClient( state["zmq-context"], "anti_entropy_server" ) state["central-database-connection"] = get_central_connection() state["local-database-connection"] = get_node_local_connection() state["cluster-row"] = get_cluster_row( state["central-database-connection"] ) local_anti_entropy_server_address = None for node_name, address in zip(_node_names, _anti_entropy_server_addresses): if node_name == _local_node_name: local_anti_entropy_server_address = address break assert local_anti_entropy_server_address is not None log.info("binding resilient-server to %s" % ( local_anti_entropy_server_address, )) state["resilient-server"] = ResilientServer( state["zmq-context"], local_anti_entropy_server_address, state["receive-queue"] ) state["resilient-server"].register(state["pollster"]) log.info("binding pull-server to %s" % ( _anti_entropy_server_pipeline_address, )) state["pull-server"] = PULLServer( state["zmq-context"], _anti_entropy_server_pipeline_address, state["receive-queue"] ) state["pull-server"].register(state["pollster"]) state["anti-entropy-clients"] = list() for node_name, anti_entropy_server_address in zip( _node_names, _anti_entropy_server_addresses ): resilient_client = ResilientClient( state["zmq-context"], state["pollster"], node_name, anti_entropy_server_address, _client_tag, _anti_entropy_server_pipeline_address ) state["anti-entropy-clients"].append(resilient_client) status_checkers.append( (resilient_client.run, time.time() + random.random() * 60.0, ) ) state["queue-dispatcher"] = DequeDispatcher( state, state["receive-queue"], _dispatch_table ) state["collection-list-requestor"] = CollectionListRequestor(state) state["consistency-check-starter"] = ConsistencyCheckStarter( state, _start_consistency_check ) state["retry-manager"] = RetryManager( state, _start_consistency_check ) state["state-cleaner"] = StateCleaner(state) state["event-push-client"].info( "program-start", "anti_entropy_server starts" ) # start the collection list requestor right away # start the consistency check starter a little later, when # we presumably have some collection ids timer_driven_callbacks = [ (state["pollster"].run, time.time(), ), (state["queue-dispatcher"].run, time.time(), ), (state["collection-list-requestor"].run, time.time(), ), (state["consistency-check-starter"].run, time.time()+60.0, ), (state["retry-manager"].run, state["retry-manager"].next_run(), ), (state["state-cleaner"].run, state["state-cleaner"].next_run(), ), ] timer_driven_callbacks.extend(status_checkers) return timer_driven_callbacks
def main(): """ main entry point return 0 for success (exit code) """ initialize_logging(_log_path) log = logging.getLogger("main") log.info("program starts") halt_event = Event() set_signal_handler(halt_event) zmq_context = zmq.Context() event_push_client = EventPushClient(zmq_context, "defragger") event_push_client.info("program-start", "defragger starts") connection = None file_space_info = None while not halt_event.is_set(): # if we don't have an open database connection, get one if connection is None: try: connection = get_node_local_connection() except Exception as instance: exctype, value = sys.exc_info()[:2] event_push_client.exception("database exception", str(value), exctype=exctype.__name__) log.exception("Exception connecting to database") halt_event.wait(_database_retry_interval) continue file_space_info = load_file_space_info(connection) file_space_sanity_check(file_space_info, _repository_path) # try one defrag pass bytes_defragged = 0 connection.begin_transaction() try: bytes_defragged = _defrag_pass(connection, file_space_info, event_push_client) except KeyboardInterrupt: halt_event.set() connection.rollback() except Exception as instance: log.exception(str(instance)) event_push_client.exception(unhandled_exception_topic, str(instance), exctype=instance.__class__.__name__) connection.rollback() else: connection.commit() log.info("bytes defragged = {0:,}".format(bytes_defragged)) # if we didn't do anything on this pass... if bytes_defragged == 0: # exit if we're done and asked to do single pass if int(os.environ.get('NIMBUSIO_EXIT_WHEN_DONE', '0')): halt_event.set() # close the database connection if connection is not None: connection.close() connection = None # wait and try again try: halt_event.wait(_defrag_check_interval) except KeyboardInterrupt: halt_event.set() if connection is not None: connection.close() event_push_client.close() zmq_context.term() log.info("program terminates normally") return 0