Exemplo n.º 1
0
    def stat(self, collection_id, key, version_id=None):
        self._log.debug("collection_id=%s, key=%s, version_id=%r" % (
            collection_id, key, version_id
        ))
        
        if version_id is None:
            status_rows = current_status_of_key(
                self._node_local_connection,
                collection_id,
                key,
            )
        else:
            status_rows = current_status_of_version(
                self._node_local_connection,
                version_id
            )

        return status_rows
Exemplo n.º 2
0
def retrieve_meta(connection, collection_id, key, version_id=None):
    """
    get a dict of meta data associated with the segment
    """
    # TODO: find a non-blocking way to do this
    # TODO: don't just use the local node, it might be wrong

    if version_id is None:
        status_rows = current_status_of_key(connection, collection_id, key)
    else:
        status_rows = current_status_of_version(connection, version_id)

    if len(status_rows) == 0 or \
       status_rows[0].seg_status != segment_status_final:
        return None

    return dict(connection.fetch_all_rows(
        _retrieve_meta_query, [collection_id, status_rows[0].seg_id, ]
    ))
Exemplo n.º 3
0
    def destroy(self, timeout=None):
        if self._pending:
            raise AlreadyInProgress()

        # TODO: find a non-blocking way to do this
        if self.unified_id_to_delete is None:
            status_rows = current_status_of_key(
                self._node_local_connection, 
                self.collection_id,
                self.key
            )
        else:
            status_rows = current_status_of_version(
                self._node_local_connection, 
                self.unified_id_to_delete
            )

        if len(status_rows) == 0:
            raise DestroyFailedError("no status rows found")

        file_size = sum([row.seg_file_size for row in status_rows])

        for i, data_writer in enumerate(self.data_writers):
            segment_num = i + 1
            self._spawn(
                data_writer.destroy_key,
                self.collection_id,
                self.key,
                self.unified_id_to_delete,
                self._unified_id,
                self.timestamp,
                segment_num,
                _local_node_name,
            )
        self._join(timeout)
        self._done = []

        return file_size
Exemplo n.º 4
0
    def retrieve(self, timeout):
        # TODO: find a non-blocking way to do this
        # TODO: don't just use the local node, it might be wrong
        if self._version_id is None:
            status_rows = current_status_of_key(
                self._node_local_connection,
                self._collection_id, 
                self._key,
            )
        else:
            status_rows = current_status_of_version(
                self._node_local_connection, self._version_id
            )

        if len(status_rows) == 0:
            raise RetrieveFailedError("key not found %s %s" % (
                self._collection_id, self._key,
            ))

        is_available = False
        if status_rows[0].con_create_timestamp is None:
            is_available = status_rows[0].seg_status == segment_status_final
        else:
            is_available = status_rows[0].con_complete_timestamp is not None

        if not is_available:
            raise RetrieveFailedError("key is not available %s %s" % (
                self._collection_id, self._key,
            ))

        for status_row in status_rows:
            # spawn retrieve_key start, then spawn retrieve key next
            # until we are done
            start = True
            while True:
                self._sequence += 1
                self._log.debug("retrieve: %s %s %s" % (
                    self._sequence, 
                    status_row.seg_unified_id, 
                    status_row.seg_conjoined_part,
                ))
                # send a request to all node
                for i, data_reader in enumerate(self._data_readers):
                    if not data_reader.connected:
                        self._log.warn("ignoring disconnected reader %s" % (
                            str(data_reader),
                        ))
                        continue

                    segment_number = i + 1
                    if start:
                        function = data_reader.retrieve_key_start
                    else:
                        function = data_reader.retrieve_key_next
                    task = self._pending.spawn(
                        function, 
                        status_row.seg_unified_id,
                        status_row.seg_conjoined_part,
                        segment_number
                    )
                    task.link(self._done_link)
                    task.segment_number = segment_number
                    task.data_reader = data_reader
                    task.sequence = self._sequence

                # wait for, and process, replies from the nodes
                result_dict, completed = self._process_node_replies(timeout)
                self._log.debug("retrieve: completed sequence %s" % (
                    self._sequence,
                ))

                yield result_dict
                if completed:
                    break

                if start:
                    start = False
Exemplo n.º 5
0
    def test_retrieve_large_content(self):
        """test retrieving content that fits in a multiple messages"""
        slice_size = 1024 * 1024
        slice_count = 10
        total_size = slice_size * slice_count
        test_data = random_string(total_size)

        collection_id = 1001
        archive_priority = create_priority()
        timestamp = create_timestamp()
        key  = self._key_generator.next()
        segment_num = 4
        sequence_num = 0

        file_adler32 = zlib.adler32(test_data)
        file_md5 = hashlib.md5(test_data)

        slice_start = 0
        slice_end = slice_size

        segment_adler32 = zlib.adler32(test_data[slice_start:slice_end])
        segment_md5 = hashlib.md5(test_data[slice_start:slice_end])

        message_id = uuid.uuid1().hex
        message = {
            "message-type"              : "archive-key-start",
            "message-id"                : message_id,
            "priority"                  : archive_priority,
            "collection-id"             : collection_id,
            "key"                       : key, 
            "conjoined-unified-id"      : None,
            "conjoined-part"            : 0,
            "timestamp-repr"            : repr(timestamp),
            "segment-num"               : segment_num,
            "segment-size"              : len(test_data[slice_start:slice_end]),
            "segment-adler32"           : segment_adler32,
            "segment-md5-digest"        : b64encode(segment_md5.digest()),
            "sequence-num"              : sequence_num,
        }
        reply = send_request_and_get_reply(
            _local_node_name,
            _data_writer_address, 
            _local_node_name,
            _client_address,
            message, 
            data=test_data[slice_start:slice_end]
        )
        self.assertEqual(reply["message-id"], message_id)
        self.assertEqual(reply["message-type"], "archive-key-start-reply")
        self.assertEqual(reply["result"], "success")

        for _ in range(slice_count-2):
            sequence_num += 1
            slice_start += slice_size
            slice_end += slice_size
            
            segment_adler32 = zlib.adler32(test_data[slice_start:slice_end])
            segment_md5 = hashlib.md5(test_data[slice_start:slice_end])

            message_id = uuid.uuid1().hex
            message = {
                "message-type"              : "archive-key-next",
                "message-id"                : message_id,
                "priority"                  : archive_priority,
                "collection-id"             : collection_id,
                "key"                       : key, 
                "conjoined-unified-id"      : None,
                "conjoined-part"            : 0,
                "timestamp-repr"            : repr(timestamp),
                "segment-num"               : segment_num,
                "segment-size"              : len(
                    test_data[slice_start:slice_end]
                ),
                "segment-adler32"           : segment_adler32,
                "segment-md5-digest"        : b64encode(segment_md5.digest()),
                "sequence-num"              : sequence_num,
            }
            reply = send_request_and_get_reply(
                _local_node_name,
                _data_writer_address, 
                _local_node_name,
                _client_address,
                message, 
                data=test_data[slice_start:slice_end]
            )
            self.assertEqual(reply["message-id"], message_id)
            self.assertEqual(reply["message-type"], "archive-key-next-reply")
            self.assertEqual(reply["result"], "success")
        
        sequence_num += 1
        slice_start += slice_size
        slice_end += slice_size
        self.assertEqual(slice_end, total_size)

        segment_adler32 = zlib.adler32(test_data[slice_start:slice_end])
        segment_md5 = hashlib.md5(test_data[slice_start:slice_end])

        message_id = uuid.uuid1().hex
        message = {
            "message-type"              : "archive-key-final",
            "message-id"                : message_id,
            "priority"                  : archive_priority,
            "collection-id"             : collection_id,
            "key"                       : key, 
            "conjoined-unified-id"      : None,
            "conjoined-part"            : 0,
            "timestamp-repr"            : repr(timestamp),
            "segment-num"               : segment_num,
            "segment-size"              : len(test_data[slice_start:slice_end]),
            "segment-adler32"           : segment_adler32,
            "segment-md5-digest"        : b64encode(segment_md5.digest()),
            "sequence-num"              : sequence_num,
            "file-size"                 : total_size,
            "file-adler32"              : file_adler32,
            "file-hash"                 : b64encode(file_md5.digest()),
            "handoff-node-name"         : None,
        }
        reply = send_request_and_get_reply(
            _local_node_name,
            _data_writer_address, 
            _local_node_name,
            _client_address,
            message, 
            data=test_data[slice_start:slice_end]
        )
        self.assertEqual(reply["message-id"], message_id)
        self.assertEqual(reply["message-type"], "archive-key-final-reply")
        self.assertEqual(reply["result"], "success")

        # get file info from the local database
        _conjoined_row, segment_rows = current_status_of_key(
            self._database_connection, collection_id, key
        )

        self.assertEqual(len(segment_rows), 1)

        retrieved_data_list = list()

        message_id = uuid.uuid1().hex
        message = {
            "message-type"              : "retrieve-key-start",
            "message-id"                : message_id,
            "collection-id"             : collection_id,
            "key"                       : key, 
            "timestamp-repr"            : repr(timestamp),
            "conjoined-unified-id"      : None,
            "conjoined-part"            : 0,
            "segment-num"               : segment_num
        }

        reply, data = send_request_and_get_reply_and_data(
            _local_node_name,
            _data_reader_address, 
            _local_node_name,
            _client_address,
            message 
        )
        
        self.assertEqual(reply["message-id"], message_id)
        self.assertEqual(reply["message-type"], "retrieve-key-reply")
        self.assertEqual(reply["completed"], False)
        print "sequence-num =", reply["sequence-num"]

        retrieved_data_list.append(data)

        while True:
            message_id = uuid.uuid1().hex
            message = {
                "message-type"              : "retrieve-key-next",
                "message-id"                : message_id,
                "collection-id"             : collection_id,
                "key"                       : key, 
                "timestamp-repr"            : repr(timestamp),
                "conjoined-unified-id"      : None,
                "conjoined-part"            : 0,
                "segment-num"               : segment_num
            }

            reply, data = send_request_and_get_reply_and_data(
                _local_node_name,
                _data_reader_address, 
                _local_node_name,
                _client_address,
                message 
            )
            
            self.assertEqual(reply["message-id"], message_id)
            self.assertEqual(reply["message-type"], "retrieve-key-reply")
            retrieved_data_list.append(data)
            print "sequence-num =", reply["sequence-num"]

            if reply["completed"]:
                break

        retrieved_data = "".join(retrieved_data_list)
        self.assertEqual(len(retrieved_data), len(test_data))
        self.assertEqual(retrieved_data, test_data)
Exemplo n.º 6
0
    def test_retrieve_small_content(self):
        """test retrieving content that fits in a single message"""
        file_size = 10 * 64 * 1024
        file_content = random_string(file_size) 
        collection_id = 1001
        key  = self._key_generator.next()
        archive_priority = create_priority()
        timestamp = create_timestamp()
        segment_num = 2

        file_adler32 = zlib.adler32(file_content)
        file_md5 = hashlib.md5(file_content)

        message_id = uuid.uuid1().hex
        message = {
            "message-type"              : "archive-key-entire",
            "message-id"                : message_id,
            "priority"                  : archive_priority,
            "collection-id"             : collection_id,
            "key"                       : key, 
            "conjoined-unified-id"      : None,
            "conjoined-part"            : 0,
            "timestamp-repr"            : repr(timestamp),
            "segment-num"               : segment_num,
            "segment-size"              : file_size,
            "segment-adler32"           : file_adler32,
            "segment-md5-digest"        : b64encode(file_md5.digest()),
            "file-size"                 : file_size,
            "file-adler32"              : file_adler32,
            "file-hash"                 : b64encode(file_md5.digest()),
            "handoff-node-name"         : None,
        }
        reply = send_request_and_get_reply(
            _local_node_name,
            _data_writer_address, 
            _local_node_name,
            _client_address,
            message, 
            data=file_content
        )
        self.assertEqual(reply["message-id"], message_id)
        self.assertEqual(reply["message-type"], "archive-key-final-reply")
        self.assertEqual(reply["result"], "success")

        # get file info from the local database
        _conjoined_row, segment_rows = current_status_of_key(
            self._database_connection, collection_id, key
        )

        self.assertEqual(len(segment_rows), 1)

        message_id = uuid.uuid1().hex
        message = {
            "message-type"              : "retrieve-key-start",
            "message-id"                : message_id,
            "collection-id"             : collection_id,
            "key"                       : key, 
            "timestamp-repr"            : repr(timestamp),
            "conjoined-unified-id"      : None,
            "conjoined-part"            : 0,
            "segment-num"               : segment_num
        }

        reply, data = send_request_and_get_reply_and_data(
            _local_node_name,
            _data_reader_address, 
            _local_node_name,
            _client_address,
            message 
        )

        self.assertEqual(reply["message-id"], message_id)
        self.assertEqual(reply["message-type"], "retrieve-key-reply")
        self.assertEqual(reply["completed"], True)
        self.assertEqual(len(data), len(file_content))
        self.assertEqual(data, file_content)