def stat(self, collection_id, key, version_id=None): self._log.debug("collection_id=%s, key=%s, version_id=%r" % ( collection_id, key, version_id )) if version_id is None: status_rows = current_status_of_key( self._node_local_connection, collection_id, key, ) else: status_rows = current_status_of_version( self._node_local_connection, version_id ) return status_rows
def retrieve_meta(connection, collection_id, key, version_id=None): """ get a dict of meta data associated with the segment """ # TODO: find a non-blocking way to do this # TODO: don't just use the local node, it might be wrong if version_id is None: status_rows = current_status_of_key(connection, collection_id, key) else: status_rows = current_status_of_version(connection, version_id) if len(status_rows) == 0 or \ status_rows[0].seg_status != segment_status_final: return None return dict(connection.fetch_all_rows( _retrieve_meta_query, [collection_id, status_rows[0].seg_id, ] ))
def destroy(self, timeout=None): if self._pending: raise AlreadyInProgress() # TODO: find a non-blocking way to do this if self.unified_id_to_delete is None: status_rows = current_status_of_key( self._node_local_connection, self.collection_id, self.key ) else: status_rows = current_status_of_version( self._node_local_connection, self.unified_id_to_delete ) if len(status_rows) == 0: raise DestroyFailedError("no status rows found") file_size = sum([row.seg_file_size for row in status_rows]) for i, data_writer in enumerate(self.data_writers): segment_num = i + 1 self._spawn( data_writer.destroy_key, self.collection_id, self.key, self.unified_id_to_delete, self._unified_id, self.timestamp, segment_num, _local_node_name, ) self._join(timeout) self._done = [] return file_size
def retrieve(self, timeout): # TODO: find a non-blocking way to do this # TODO: don't just use the local node, it might be wrong if self._version_id is None: status_rows = current_status_of_key( self._node_local_connection, self._collection_id, self._key, ) else: status_rows = current_status_of_version( self._node_local_connection, self._version_id ) if len(status_rows) == 0: raise RetrieveFailedError("key not found %s %s" % ( self._collection_id, self._key, )) is_available = False if status_rows[0].con_create_timestamp is None: is_available = status_rows[0].seg_status == segment_status_final else: is_available = status_rows[0].con_complete_timestamp is not None if not is_available: raise RetrieveFailedError("key is not available %s %s" % ( self._collection_id, self._key, )) for status_row in status_rows: # spawn retrieve_key start, then spawn retrieve key next # until we are done start = True while True: self._sequence += 1 self._log.debug("retrieve: %s %s %s" % ( self._sequence, status_row.seg_unified_id, status_row.seg_conjoined_part, )) # send a request to all node for i, data_reader in enumerate(self._data_readers): if not data_reader.connected: self._log.warn("ignoring disconnected reader %s" % ( str(data_reader), )) continue segment_number = i + 1 if start: function = data_reader.retrieve_key_start else: function = data_reader.retrieve_key_next task = self._pending.spawn( function, status_row.seg_unified_id, status_row.seg_conjoined_part, segment_number ) task.link(self._done_link) task.segment_number = segment_number task.data_reader = data_reader task.sequence = self._sequence # wait for, and process, replies from the nodes result_dict, completed = self._process_node_replies(timeout) self._log.debug("retrieve: completed sequence %s" % ( self._sequence, )) yield result_dict if completed: break if start: start = False
def test_retrieve_large_content(self): """test retrieving content that fits in a multiple messages""" slice_size = 1024 * 1024 slice_count = 10 total_size = slice_size * slice_count test_data = random_string(total_size) collection_id = 1001 archive_priority = create_priority() timestamp = create_timestamp() key = self._key_generator.next() segment_num = 4 sequence_num = 0 file_adler32 = zlib.adler32(test_data) file_md5 = hashlib.md5(test_data) slice_start = 0 slice_end = slice_size segment_adler32 = zlib.adler32(test_data[slice_start:slice_end]) segment_md5 = hashlib.md5(test_data[slice_start:slice_end]) message_id = uuid.uuid1().hex message = { "message-type" : "archive-key-start", "message-id" : message_id, "priority" : archive_priority, "collection-id" : collection_id, "key" : key, "conjoined-unified-id" : None, "conjoined-part" : 0, "timestamp-repr" : repr(timestamp), "segment-num" : segment_num, "segment-size" : len(test_data[slice_start:slice_end]), "segment-adler32" : segment_adler32, "segment-md5-digest" : b64encode(segment_md5.digest()), "sequence-num" : sequence_num, } reply = send_request_and_get_reply( _local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=test_data[slice_start:slice_end] ) self.assertEqual(reply["message-id"], message_id) self.assertEqual(reply["message-type"], "archive-key-start-reply") self.assertEqual(reply["result"], "success") for _ in range(slice_count-2): sequence_num += 1 slice_start += slice_size slice_end += slice_size segment_adler32 = zlib.adler32(test_data[slice_start:slice_end]) segment_md5 = hashlib.md5(test_data[slice_start:slice_end]) message_id = uuid.uuid1().hex message = { "message-type" : "archive-key-next", "message-id" : message_id, "priority" : archive_priority, "collection-id" : collection_id, "key" : key, "conjoined-unified-id" : None, "conjoined-part" : 0, "timestamp-repr" : repr(timestamp), "segment-num" : segment_num, "segment-size" : len( test_data[slice_start:slice_end] ), "segment-adler32" : segment_adler32, "segment-md5-digest" : b64encode(segment_md5.digest()), "sequence-num" : sequence_num, } reply = send_request_and_get_reply( _local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=test_data[slice_start:slice_end] ) self.assertEqual(reply["message-id"], message_id) self.assertEqual(reply["message-type"], "archive-key-next-reply") self.assertEqual(reply["result"], "success") sequence_num += 1 slice_start += slice_size slice_end += slice_size self.assertEqual(slice_end, total_size) segment_adler32 = zlib.adler32(test_data[slice_start:slice_end]) segment_md5 = hashlib.md5(test_data[slice_start:slice_end]) message_id = uuid.uuid1().hex message = { "message-type" : "archive-key-final", "message-id" : message_id, "priority" : archive_priority, "collection-id" : collection_id, "key" : key, "conjoined-unified-id" : None, "conjoined-part" : 0, "timestamp-repr" : repr(timestamp), "segment-num" : segment_num, "segment-size" : len(test_data[slice_start:slice_end]), "segment-adler32" : segment_adler32, "segment-md5-digest" : b64encode(segment_md5.digest()), "sequence-num" : sequence_num, "file-size" : total_size, "file-adler32" : file_adler32, "file-hash" : b64encode(file_md5.digest()), "handoff-node-name" : None, } reply = send_request_and_get_reply( _local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=test_data[slice_start:slice_end] ) self.assertEqual(reply["message-id"], message_id) self.assertEqual(reply["message-type"], "archive-key-final-reply") self.assertEqual(reply["result"], "success") # get file info from the local database _conjoined_row, segment_rows = current_status_of_key( self._database_connection, collection_id, key ) self.assertEqual(len(segment_rows), 1) retrieved_data_list = list() message_id = uuid.uuid1().hex message = { "message-type" : "retrieve-key-start", "message-id" : message_id, "collection-id" : collection_id, "key" : key, "timestamp-repr" : repr(timestamp), "conjoined-unified-id" : None, "conjoined-part" : 0, "segment-num" : segment_num } reply, data = send_request_and_get_reply_and_data( _local_node_name, _data_reader_address, _local_node_name, _client_address, message ) self.assertEqual(reply["message-id"], message_id) self.assertEqual(reply["message-type"], "retrieve-key-reply") self.assertEqual(reply["completed"], False) print "sequence-num =", reply["sequence-num"] retrieved_data_list.append(data) while True: message_id = uuid.uuid1().hex message = { "message-type" : "retrieve-key-next", "message-id" : message_id, "collection-id" : collection_id, "key" : key, "timestamp-repr" : repr(timestamp), "conjoined-unified-id" : None, "conjoined-part" : 0, "segment-num" : segment_num } reply, data = send_request_and_get_reply_and_data( _local_node_name, _data_reader_address, _local_node_name, _client_address, message ) self.assertEqual(reply["message-id"], message_id) self.assertEqual(reply["message-type"], "retrieve-key-reply") retrieved_data_list.append(data) print "sequence-num =", reply["sequence-num"] if reply["completed"]: break retrieved_data = "".join(retrieved_data_list) self.assertEqual(len(retrieved_data), len(test_data)) self.assertEqual(retrieved_data, test_data)
def test_retrieve_small_content(self): """test retrieving content that fits in a single message""" file_size = 10 * 64 * 1024 file_content = random_string(file_size) collection_id = 1001 key = self._key_generator.next() archive_priority = create_priority() timestamp = create_timestamp() segment_num = 2 file_adler32 = zlib.adler32(file_content) file_md5 = hashlib.md5(file_content) message_id = uuid.uuid1().hex message = { "message-type" : "archive-key-entire", "message-id" : message_id, "priority" : archive_priority, "collection-id" : collection_id, "key" : key, "conjoined-unified-id" : None, "conjoined-part" : 0, "timestamp-repr" : repr(timestamp), "segment-num" : segment_num, "segment-size" : file_size, "segment-adler32" : file_adler32, "segment-md5-digest" : b64encode(file_md5.digest()), "file-size" : file_size, "file-adler32" : file_adler32, "file-hash" : b64encode(file_md5.digest()), "handoff-node-name" : None, } reply = send_request_and_get_reply( _local_node_name, _data_writer_address, _local_node_name, _client_address, message, data=file_content ) self.assertEqual(reply["message-id"], message_id) self.assertEqual(reply["message-type"], "archive-key-final-reply") self.assertEqual(reply["result"], "success") # get file info from the local database _conjoined_row, segment_rows = current_status_of_key( self._database_connection, collection_id, key ) self.assertEqual(len(segment_rows), 1) message_id = uuid.uuid1().hex message = { "message-type" : "retrieve-key-start", "message-id" : message_id, "collection-id" : collection_id, "key" : key, "timestamp-repr" : repr(timestamp), "conjoined-unified-id" : None, "conjoined-part" : 0, "segment-num" : segment_num } reply, data = send_request_and_get_reply_and_data( _local_node_name, _data_reader_address, _local_node_name, _client_address, message ) self.assertEqual(reply["message-id"], message_id) self.assertEqual(reply["message-type"], "retrieve-key-reply") self.assertEqual(reply["completed"], True) self.assertEqual(len(data), len(file_content)) self.assertEqual(data, file_content)