def _destroy(self, collection_id, key, unified_id_to_delete, timestamp, segment_num): user_request_id = uuid.uuid1().hex archive_priority = create_priority() unified_id_factory = UnifiedIDFactory(1) unified_id = unified_id_factory.next() message = { "message-type": "destroy-key", "priority": archive_priority, "user-request-id": user_request_id, "collection-id": collection_id, "key": key, "unified-id-to-delete": unified_id_to_delete, "unified-id": unified_id, "timestamp-repr": repr(timestamp), "segment-num": segment_num, "source-node-name": _local_node_name, "handoff-node-name": None, } reply = send_request_and_get_reply(_local_node_name, _data_writer_address, _local_node_name, _client_address, message) self.assertEqual(reply["message-type"], "destroy-key-reply") self.assertEqual(reply["user-request-id"], user_request_id) return reply
def _destroy(self, collection_id, key, unified_id_to_delete, timestamp, segment_num): user_request_id = uuid.uuid1().hex archive_priority = create_priority() unified_id_factory = UnifiedIDFactory(1) unified_id = unified_id_factory.next() message = { "message-type" : "destroy-key", "priority" : archive_priority, "user-request-id" : user_request_id, "collection-id" : collection_id, "key" : key, "unified-id-to-delete" : unified_id_to_delete, "unified-id" : unified_id, "timestamp-repr" : repr(timestamp), "segment-num" : segment_num, "source-node-name" : _local_node_name, "handoff-node-name" : None, } reply = send_request_and_get_reply( _local_node_name, _data_writer_address, _local_node_name, _client_address, message ) self.assertEqual(reply["message-type"], "destroy-key-reply") self.assertEqual(reply["user-request-id"], user_request_id) return reply
def test_increasing_ids(self): """test that shard ids increase""" unified_id_factory = UnifiedIDFactory(1) prev_id = None for _ in range(1000): unified_id = unified_id_factory.next() if prev_id is not None: self.assertTrue(unified_id > prev_id) prev_id = unified_id
def xxxtest_archive_key_entire_with_meta(self): """ test archiving a key in a single message, including meta data """ file_size = 10 * 64 * 1024 content_item = random_string(file_size) user_request_id = uuid.uuid1().hex collection_id = 1001 key = self._key_generator.next() archive_priority = create_priority() timestamp = create_timestamp() segment_num = 2 meta_key = "".join([nimbus_meta_prefix, "test_key"]) meta_value = "pork" file_adler32 = zlib.adler32(content_item) file_md5 = hashlib.md5(content_item) unified_id_factory = UnifiedIDFactory(1) unified_id = unified_id_factory.next() message = { "message-type" : "archive-key-entire", "priority" : archive_priority, "user-request-id" : user_request_id, "collection-id" : collection_id, "key" : key, "unified-id" : unified_id, "timestamp-repr" : repr(timestamp), "conjoined-part" : 0, "segment-num" : segment_num, "segment-size" : file_size, "zfec-padding-size" : 4, "segment-adler32" : file_adler32, "segment-md5-digest": b64encode(file_md5.digest()), "file-size" : file_size, "file-adler32" : file_adler32, "file-hash" : b64encode(file_md5.digest()), "source-node-name" : _local_node_name, "handoff-node-name" : None, meta_key : meta_value } reply = send_request_and_get_reply( _local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=content_item ) self.assertEqual(reply["message-type"], "archive-key-final-reply") self.assertEqual(reply["user-request-id"], user_request_id) self.assertEqual(reply["result"], "success", reply["error-message"])
def xxxtest_destroy_nonexistent_key(self): """test destroying a key that does not exist, with no complications""" unified_id_factory = UnifiedIDFactory(1) unified_id = unified_id_factory.next() collection_id = 1001 key = self._key_generator.next() segment_num = 4 timestamp = create_timestamp() reply = self._destroy(collection_id, key, unified_id, timestamp, segment_num) self.assertEqual(reply["result"], "success", reply["error-message"])
def xxxtest_archive_key_entire_with_meta(self): """ test archiving a key in a single message, including meta data """ file_size = 10 * 64 * 1024 content_item = random_string(file_size) user_request_id = uuid.uuid1().hex collection_id = 1001 key = self._key_generator.next() archive_priority = create_priority() timestamp = create_timestamp() segment_num = 2 meta_key = "".join([nimbus_meta_prefix, "test_key"]) meta_value = "pork" file_adler32 = zlib.adler32(content_item) file_md5 = hashlib.md5(content_item) unified_id_factory = UnifiedIDFactory(1) unified_id = unified_id_factory.next() message = { "message-type": "archive-key-entire", "priority": archive_priority, "user-request-id": user_request_id, "collection-id": collection_id, "key": key, "unified-id": unified_id, "timestamp-repr": repr(timestamp), "conjoined-part": 0, "segment-num": segment_num, "segment-size": file_size, "zfec-padding-size": 4, "segment-adler32": file_adler32, "segment-md5-digest": b64encode(file_md5.digest()), "file-size": file_size, "file-adler32": file_adler32, "file-hash": b64encode(file_md5.digest()), "source-node-name": _local_node_name, "handoff-node-name": None, meta_key: meta_value } reply = send_request_and_get_reply(_local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=content_item) self.assertEqual(reply["message-type"], "archive-key-final-reply") self.assertEqual(reply["user-request-id"], user_request_id) self.assertEqual(reply["result"], "success", reply["error-message"])
def xxxtest_large_archive(self): """ test archiving a file that needs more than one message. For example, a 10 Mb file: each node would get 10 120kb zefec shares. """ slice_size = 1024 * 1024 slice_count = 10 total_size = slice_size * slice_count test_data = random_string(total_size) user_request_id = uuid.uuid1().hex collection_id = 1001 archive_priority = create_priority() timestamp = create_timestamp() key = self._key_generator.next() segment_num = 4 sequence_num = 0 file_adler32 = zlib.adler32(test_data) file_md5 = hashlib.md5(test_data) slice_start = 0 slice_end = slice_size segment_adler32 = zlib.adler32(test_data[slice_start:slice_end]) segment_md5 = hashlib.md5(test_data[slice_start:slice_end]) unified_id_factory = UnifiedIDFactory(1) unified_id = unified_id_factory.next() message = { "message-type" : "archive-key-start", "priority" : archive_priority, "user-request-id" : user_request_id, "collection-id" : collection_id, "key" : key, "unified-id" : unified_id, "timestamp-repr" : repr(timestamp), "conjoined-part" : 0, "segment-num" : segment_num, "segment-size" : len(test_data[slice_start:slice_end]), "zfec-padding-size" : 4, "segment-md5-digest": b64encode(segment_md5.digest()), "segment-adler32" : segment_adler32, "sequence-num" : sequence_num, "source-node-name" : _local_node_name, "handoff-node-name" : None, } reply = send_request_and_get_reply( _local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=test_data[slice_start:slice_end] ) self.assertEqual(reply["message-type"], "archive-key-start-reply") self.assertEqual(reply["user-request-id"], user_request_id) self.assertEqual(reply["result"], "success", reply["error-message"]) for _ in range(slice_count-2): sequence_num += 1 slice_start += slice_size slice_end += slice_size segment_adler32 = zlib.adler32(test_data[slice_start:slice_end]) segment_md5 = hashlib.md5(test_data[slice_start:slice_end]) message_id = uuid.uuid1().hex message = { "message-type" : "archive-key-next", "priority" : archive_priority, "user-request-id" : user_request_id, "collection-id" : collection_id, "key" : key, "unified-id" : unified_id, "timestamp-repr" : repr(timestamp), "conjoined-part" : 0, "segment-num" : segment_num, "segment-size" : len(test_data[slice_start:slice_end]), "zfec-padding-size" : 4, "segment-md5-digest": b64encode(segment_md5.digest()), "segment-adler32" : segment_adler32, "sequence-num" : sequence_num, "source-node-name" : _local_node_name, "handoff-node-name" : None, } reply = send_request_and_get_reply( _local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=test_data[slice_start:slice_end] ) self.assertEqual(reply["message-type"], "archive-key-next-reply") self.assertEqual(reply["user-request-id"], user_request_id) self.assertEqual(reply["result"], "success", reply["error-message"]) sequence_num += 1 slice_start += slice_size slice_end += slice_size segment_adler32 = zlib.adler32(test_data[slice_start:slice_end]) segment_md5 = hashlib.md5(test_data[slice_start:slice_end]) message = { "message-type" : "archive-key-final", "priority" : archive_priority, "user-request-id" : user_request_id, "collection-id" : collection_id, "key" : key, "unified-id" : unified_id, "timestamp-repr" : repr(timestamp), "conjoined-part" : 0, "segment-num" : segment_num, "segment-size" : len(test_data[slice_start:slice_end]), "zfec-padding-size" : 4, "segment-md5-digest": b64encode(segment_md5.digest()), "segment-adler32" : segment_adler32, "sequence-num" : sequence_num, "file-size" : total_size, "file-adler32" : file_adler32, "file-hash" : b64encode(file_md5.digest()), "source-node-name" : _local_node_name, "handoff-node-name" : None, } reply = send_request_and_get_reply( _local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=test_data[slice_start:slice_end] ) self.assertEqual(reply["message-type"], "archive-key-final-reply") self.assertEqual(reply["user-request-id"], user_request_id) self.assertEqual(reply["result"], "success", reply["error-message"])
def __init__(self): self._log = logging.getLogger("WebServer") authenticator = SqlAuthenticator() self._central_connection = get_central_connection() self._cluster_row = get_cluster_row(self._central_connection) self._node_local_connection = get_node_local_connection() self._unified_id_factory = UnifiedIDFactory( self._central_connection, _get_shard_id(self._central_connection, self._cluster_row.id) ) self._deliverator = Deliverator() self._zeromq_context = zmq.Context() self._pull_server = GreenletPULLServer( self._zeromq_context, _web_server_pipeline_address, self._deliverator ) self._pull_server.link_exception(self._unhandled_greenlet_exception) # message sent to data readers and writers telling them the server # is (re)starting, thereby invalidating any archvies or retrieved # that are in progress for this node timestamp = create_timestamp() start_message = { "message-type" : "web-server-start", "priority" : create_priority(), "unified-id" : self._unified_id_factory.next(), "timestamp-repr" : repr(timestamp), "source-node-name" : _local_node_name, } self._data_writer_clients = list() for node_name, address in zip(_node_names, _data_writer_addresses): resilient_client = GreenletResilientClient( self._zeromq_context, node_name, address, _client_tag, _web_server_pipeline_address, self._deliverator, connect_messages=[start_message, ] ) resilient_client.link_exception(self._unhandled_greenlet_exception) self._data_writer_clients.append(resilient_client) self._data_reader_clients = list() self._data_readers = list() for node_name, address in zip(_node_names, _data_reader_addresses): resilient_client = GreenletResilientClient( self._zeromq_context, node_name, address, _client_tag, _web_server_pipeline_address, self._deliverator, connect_messages=[start_message, ] ) resilient_client.link_exception(self._unhandled_greenlet_exception) self._data_reader_clients.append(resilient_client) data_reader = DataReader( node_name, resilient_client ) self._data_readers.append(data_reader) self._space_accounting_dealer_client = GreenletDealerClient( self._zeromq_context, _local_node_name, _space_accounting_server_address ) self._space_accounting_dealer_client.link_exception( self._unhandled_greenlet_exception ) push_client = GreenletPUSHClient( self._zeromq_context, _local_node_name, _space_accounting_pipeline_address, ) self._accounting_client = SpaceAccountingClient( _local_node_name, self._space_accounting_dealer_client, push_client ) self._event_push_client = EventPushClient( self._zeromq_context, "web-server" ) self._watcher = Watcher( _stats, self._data_reader_clients, self._data_writer_clients, self._event_push_client ) id_translator_keys_path = os.path.join( _repository_path, "id_translator_keys.pkl" ) with open(id_translator_keys_path, "r") as input_file: id_translator_keys = pickle.load(input_file) self._id_translator = InternalIDTranslator( id_translator_keys["key"], id_translator_keys["hmac_key"], id_translator_keys["iv_key"], id_translator_keys["hmac_size"] ) self.application = Application( self._central_connection, self._node_local_connection, self._cluster_row, self._unified_id_factory, self._id_translator, self._data_writer_clients, self._data_readers, authenticator, self._accounting_client, self._event_push_client, _stats ) self.wsgi_server = WSGIServer( (_web_server_host, _web_server_port), application=self.application, backlog=_wsgi_backlog )
def xxxtest_large_archive(self): """ test archiving a file that needs more than one message. For example, a 10 Mb file: each node would get 10 120kb zefec shares. """ slice_size = 1024 * 1024 slice_count = 10 total_size = slice_size * slice_count test_data = random_string(total_size) user_request_id = uuid.uuid1().hex collection_id = 1001 archive_priority = create_priority() timestamp = create_timestamp() key = self._key_generator.next() segment_num = 4 sequence_num = 0 file_adler32 = zlib.adler32(test_data) file_md5 = hashlib.md5(test_data) slice_start = 0 slice_end = slice_size segment_adler32 = zlib.adler32(test_data[slice_start:slice_end]) segment_md5 = hashlib.md5(test_data[slice_start:slice_end]) unified_id_factory = UnifiedIDFactory(1) unified_id = unified_id_factory.next() message = { "message-type": "archive-key-start", "priority": archive_priority, "user-request-id": user_request_id, "collection-id": collection_id, "key": key, "unified-id": unified_id, "timestamp-repr": repr(timestamp), "conjoined-part": 0, "segment-num": segment_num, "segment-size": len(test_data[slice_start:slice_end]), "zfec-padding-size": 4, "segment-md5-digest": b64encode(segment_md5.digest()), "segment-adler32": segment_adler32, "sequence-num": sequence_num, "source-node-name": _local_node_name, "handoff-node-name": None, } reply = send_request_and_get_reply( _local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=test_data[slice_start:slice_end]) self.assertEqual(reply["message-type"], "archive-key-start-reply") self.assertEqual(reply["user-request-id"], user_request_id) self.assertEqual(reply["result"], "success", reply["error-message"]) for _ in range(slice_count - 2): sequence_num += 1 slice_start += slice_size slice_end += slice_size segment_adler32 = zlib.adler32(test_data[slice_start:slice_end]) segment_md5 = hashlib.md5(test_data[slice_start:slice_end]) message_id = uuid.uuid1().hex message = { "message-type": "archive-key-next", "priority": archive_priority, "user-request-id": user_request_id, "collection-id": collection_id, "key": key, "unified-id": unified_id, "timestamp-repr": repr(timestamp), "conjoined-part": 0, "segment-num": segment_num, "segment-size": len(test_data[slice_start:slice_end]), "zfec-padding-size": 4, "segment-md5-digest": b64encode(segment_md5.digest()), "segment-adler32": segment_adler32, "sequence-num": sequence_num, "source-node-name": _local_node_name, "handoff-node-name": None, } reply = send_request_and_get_reply( _local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=test_data[slice_start:slice_end]) self.assertEqual(reply["message-type"], "archive-key-next-reply") self.assertEqual(reply["user-request-id"], user_request_id) self.assertEqual(reply["result"], "success", reply["error-message"]) sequence_num += 1 slice_start += slice_size slice_end += slice_size segment_adler32 = zlib.adler32(test_data[slice_start:slice_end]) segment_md5 = hashlib.md5(test_data[slice_start:slice_end]) message = { "message-type": "archive-key-final", "priority": archive_priority, "user-request-id": user_request_id, "collection-id": collection_id, "key": key, "unified-id": unified_id, "timestamp-repr": repr(timestamp), "conjoined-part": 0, "segment-num": segment_num, "segment-size": len(test_data[slice_start:slice_end]), "zfec-padding-size": 4, "segment-md5-digest": b64encode(segment_md5.digest()), "segment-adler32": segment_adler32, "sequence-num": sequence_num, "file-size": total_size, "file-adler32": file_adler32, "file-hash": b64encode(file_md5.digest()), "source-node-name": _local_node_name, "handoff-node-name": None, } reply = send_request_and_get_reply( _local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=test_data[slice_start:slice_end]) self.assertEqual(reply["message-type"], "archive-key-final-reply") self.assertEqual(reply["user-request-id"], user_request_id) self.assertEqual(reply["result"], "success", reply["error-message"])
def __init__(self, halt_event): self._log = logging.getLogger("WebWriter") memcached_client = memcache.Client(_memcached_nodes) self._interaction_pool = gdbpool.interaction_pool.DBInteractionPool( get_central_database_dsn(), pool_name=_central_pool_name, pool_size=_database_pool_size, do_log=True) authenticator = InteractionPoolAuthenticator(memcached_client, self._interaction_pool) # Ticket #25: must run database operation in a greenlet greenlet = gevent.Greenlet.spawn(_get_cluster_row_and_node_row, self._interaction_pool) greenlet.join() self._cluster_row, node_row = greenlet.get() self._unified_id_factory = UnifiedIDFactory(node_row.id) self._deliverator = Deliverator() self._zeromq_context = zmq.Context() self._pull_server = GreenletPULLServer( self._zeromq_context, _web_writer_pipeliner_address, self._deliverator ) self._pull_server.link_exception(self._unhandled_greenlet_exception) self._data_writer_clients = list() for node_name, address in zip(_node_names, _data_writer_addresses): resilient_client = GreenletResilientClient( self._zeromq_context, node_name, address, _client_tag, _web_writer_pipeliner_address, self._deliverator, connect_messages=[] ) resilient_client.link_exception(self._unhandled_greenlet_exception) self._data_writer_clients.append(resilient_client) self._space_accounting_dealer_client = GreenletDealerClient( self._zeromq_context, _local_node_name, _space_accounting_server_address ) self._space_accounting_dealer_client.link_exception( self._unhandled_greenlet_exception ) push_client = GreenletPUSHClient( self._zeromq_context, _local_node_name, _space_accounting_pipeline_address, ) self._accounting_client = SpaceAccountingClient( _local_node_name, self._space_accounting_dealer_client, push_client ) self._event_push_client = EventPushClient( self._zeromq_context, "web-server" ) # message sent to data writers telling them the server # is (re)starting, thereby invalidating any archives # that are in progress for this node unified_id = self._unified_id_factory.next() timestamp = create_timestamp() self._event_push_client.info("web-writer-start", "web writer (re)start", unified_id=unified_id, timestamp_repr=repr(timestamp), source_node_name=_local_node_name) id_translator_keys_path = os.environ.get( "NIMBUS_IO_ID_TRANSLATION_KEYS", os.path.join(_repository_path, "id_translator_keys.pkl")) with open(id_translator_keys_path, "r") as input_file: id_translator_keys = pickle.load(input_file) self._id_translator = InternalIDTranslator( id_translator_keys["key"], id_translator_keys["hmac_key"], id_translator_keys["iv_key"], id_translator_keys["hmac_size"] ) redis_queue = gevent.queue.Queue() self._redis_sink = OperationalStatsRedisSink(halt_event, redis_queue, _local_node_name) self._redis_sink.link_exception(self._unhandled_greenlet_exception) self.application = Application( self._cluster_row, self._unified_id_factory, self._id_translator, self._data_writer_clients, authenticator, self._accounting_client, self._event_push_client, redis_queue ) self.wsgi_server = WSGIServer((_web_writer_host, _web_writer_port), application=self.application, backlog=_wsgi_backlog )
def __init__(self, halt_event): self._log = logging.getLogger("WebWriter") memcached_client = memcache.Client(_memcached_nodes) self._interaction_pool = gdbpool.interaction_pool.DBInteractionPool( get_central_database_dsn(), pool_name=_central_pool_name, pool_size=_database_pool_size, do_log=True) authenticator = InteractionPoolAuthenticator(memcached_client, self._interaction_pool) # Ticket #25: must run database operation in a greenlet greenlet = gevent.Greenlet.spawn(_get_cluster_row_and_node_row, self._interaction_pool) greenlet.join() self._cluster_row, node_row = greenlet.get() self._unified_id_factory = UnifiedIDFactory(node_row.id) self._deliverator = Deliverator() self._zeromq_context = zmq.Context() self._pull_server = GreenletPULLServer(self._zeromq_context, _web_writer_pipeliner_address, self._deliverator) self._pull_server.link_exception(self._unhandled_greenlet_exception) self._data_writer_clients = list() for node_name, address in zip(_node_names, _data_writer_addresses): resilient_client = GreenletResilientClient( self._zeromq_context, node_name, address, _client_tag, _web_writer_pipeliner_address, self._deliverator, connect_messages=[]) resilient_client.link_exception(self._unhandled_greenlet_exception) self._data_writer_clients.append(resilient_client) self._space_accounting_dealer_client = GreenletDealerClient( self._zeromq_context, _local_node_name, _space_accounting_server_address) self._space_accounting_dealer_client.link_exception( self._unhandled_greenlet_exception) push_client = GreenletPUSHClient( self._zeromq_context, _local_node_name, _space_accounting_pipeline_address, ) self._accounting_client = SpaceAccountingClient( _local_node_name, self._space_accounting_dealer_client, push_client) self._event_push_client = EventPushClient(self._zeromq_context, "web-server") # message sent to data writers telling them the server # is (re)starting, thereby invalidating any archives # that are in progress for this node unified_id = self._unified_id_factory.next() timestamp = create_timestamp() self._event_push_client.info("web-writer-start", "web writer (re)start", unified_id=unified_id, timestamp_repr=repr(timestamp), source_node_name=_local_node_name) id_translator_keys_path = os.environ.get( "NIMBUS_IO_ID_TRANSLATION_KEYS", os.path.join(_repository_path, "id_translator_keys.pkl")) with open(id_translator_keys_path, "r") as input_file: id_translator_keys = pickle.load(input_file) self._id_translator = InternalIDTranslator( id_translator_keys["key"], id_translator_keys["hmac_key"], id_translator_keys["iv_key"], id_translator_keys["hmac_size"]) redis_queue = gevent.queue.Queue() self._redis_sink = OperationalStatsRedisSink(halt_event, redis_queue, _local_node_name) self._redis_sink.link_exception(self._unhandled_greenlet_exception) self.application = Application(self._cluster_row, self._unified_id_factory, self._id_translator, self._data_writer_clients, authenticator, self._accounting_client, self._event_push_client, redis_queue) self.wsgi_server = WSGIServer((_web_writer_host, _web_writer_port), application=self.application, backlog=_wsgi_backlog)