示例#1
0
def _send_message_receive_reply(data_writers, message, error_tag):
    log = logging.getLogger(error_tag)
    sender_list = list()
    message["priority"] = create_priority()
    pending_group = gevent.pool.Group()
    for data_writer in data_writers:
        # send a copy of the message, so each one gets a separagte message-id
        sender = MessageGreenlet(data_writer, message.copy()) 
        sender_list.append(sender)
        pending_group.start(sender)

    pending_group.join(timeout=_conjoined_timeout)

    for sender in sender_list:
        if not sender.ready():
            log.error("incomplete")
            raise ConjoinedFailedError("%s incomplete" % (error_tag, ))

        if not sender.successful():
            try:
                sender.get()
            except Exception, instance:
                log.exception("")
                raise ConjoinedFailedError("%s %s" % (error_tag, instance, ))

        reply = sender.get()

        if reply["result"] != "success":
            log.error("%s" % (reply, ))
            raise ConjoinedFailedError("%s %s" % (
                error_tag, reply["error-message"]))
示例#2
0
def _send_message_receive_reply(data_writers, message, error_tag):
    log = logging.getLogger(error_tag)
    sender_list = list()
    message["priority"] = create_priority()
    pending_group = gevent.pool.Group()
    for data_writer in data_writers:
        # send a copy of the message, so each one gets a separagte message-id
        sender = MessageGreenlet(data_writer, message.copy())
        sender_list.append(sender)
        pending_group.start(sender)

    pending_group.join(timeout=_conjoined_timeout)

    for sender in sender_list:
        if not sender.ready():
            log.error("incomplete")
            raise ConjoinedFailedError("%s incomplete" % (error_tag, ))

        if not sender.successful():
            try:
                sender.get()
            except Exception, instance:
                log.exception("")
                raise ConjoinedFailedError("%s %s" % (
                    error_tag,
                    instance,
                ))

        reply = sender.get()

        if reply["result"] != "success":
            log.error("%s" % (reply, ))
            raise ConjoinedFailedError("%s %s" %
                                       (error_tag, reply["error-message"]))
示例#3
0
    def test_handoff_small_content(self):
        """test retrieving content that fits in a single message"""
        file_size = 10 * 64 * 1024
        file_content = random_string(file_size)
        collection_id = 1001
        key = self._key_generator.next()
        archive_priority = create_priority()
        timestamp = create_timestamp()
        segment_num = 5

        file_adler32 = zlib.adler32(file_content)
        file_md5 = hashlib.md5(file_content)

        message = {
            "message-type": "archive-key-entire",
            "priority": archive_priority,
            "collection-id": collection_id,
            "key": key,
            "timestamp-repr": repr(timestamp),
            "segment-num": segment_num,
            "file-size": file_size,
            "file-adler32": file_adler32,
            "file-hash": b64encode(file_md5.digest()),
            "handoff-node-name": None,
        }
        g = gevent.spawn(self._send_message_get_reply, message, file_content)
        g.join(timeout=10.0)
        self.assertEqual(g.ready(), True)
        reply = g.value
        self.assertEqual(reply["message-type"], "archive-key-final-reply")
        self.assertEqual(reply["result"], "success")

        print >> sys.stderr, "archive successful"
        print >> sys.stderr, "press [Enter] to continue"
        raw_input()
示例#4
0
    def _destroy(self, collection_id, key, unified_id_to_delete, timestamp,
                 segment_num):
        user_request_id = uuid.uuid1().hex
        archive_priority = create_priority()

        unified_id_factory = UnifiedIDFactory(1)
        unified_id = unified_id_factory.next()

        message = {
            "message-type": "destroy-key",
            "priority": archive_priority,
            "user-request-id": user_request_id,
            "collection-id": collection_id,
            "key": key,
            "unified-id-to-delete": unified_id_to_delete,
            "unified-id": unified_id,
            "timestamp-repr": repr(timestamp),
            "segment-num": segment_num,
            "source-node-name": _local_node_name,
            "handoff-node-name": None,
        }
        reply = send_request_and_get_reply(_local_node_name,
                                           _data_writer_address,
                                           _local_node_name, _client_address,
                                           message)
        self.assertEqual(reply["message-type"], "destroy-key-reply")
        self.assertEqual(reply["user-request-id"], user_request_id)

        return reply
示例#5
0
    def destroy_key(
        self,
        collection_id,
        key,
        unified_id_to_delete,
        unified_id,
        timestamp,
        segment_num,
        source_node_name
    ):
        message = {
            "message-type"              : "destroy-key",
            "priority"                  : create_priority(),
            "collection-id"             : collection_id,
            "key"                       : key,
            "unified-id-to-delete"      : unified_id_to_delete,
            "unified-id"                : unified_id,
            "timestamp-repr"            : repr(timestamp),
            "segment-num"               : segment_num,
            "source-node-name"          : source_node_name,
            "handoff-node-name"         : None,
        }
        delivery_channel = \
                self._resilient_client.queue_message_for_send(message)

        self._log.debug(
            '%(message-type)s: '
            'key = %(key)r '
            'segment_num = %(segment-num)d' % message
            )
        reply, _data = delivery_channel.get()
        if reply["result"] != "success":
            self._log.error("failed: %s" % (reply, ))
            raise DestroyFailedError(reply["error-message"])
示例#6
0
    def _destroy(self, collection_id, key, unified_id_to_delete, timestamp, 
        segment_num):
        user_request_id = uuid.uuid1().hex
        archive_priority = create_priority()

        unified_id_factory = UnifiedIDFactory(1)
        unified_id = unified_id_factory.next()

        message = {
            "message-type"          : "destroy-key",
            "priority"              : archive_priority,
            "user-request-id"       : user_request_id,
            "collection-id"         : collection_id,
            "key"                   : key,
            "unified-id-to-delete"  : unified_id_to_delete,
            "unified-id"            : unified_id,
            "timestamp-repr"        : repr(timestamp),
            "segment-num"           : segment_num,
            "source-node-name"      : _local_node_name,
            "handoff-node-name"     : None,
        }
        reply = send_request_and_get_reply(
            _local_node_name,
            _data_writer_address, 
            _local_node_name,
            _client_address,
            message
        )
        self.assertEqual(reply["message-type"], "destroy-key-reply")
        self.assertEqual(reply["user-request-id"], user_request_id)
        
        return reply
    def test_handoff_small_content(self):
        """test retrieving content that fits in a single message"""
        file_size = 10 * 64 * 1024
        file_content = random_string(file_size) 
        collection_id = 1001
        key  = self._key_generator.next()
        archive_priority = create_priority()
        timestamp = create_timestamp()
        segment_num = 5

        file_adler32 = zlib.adler32(file_content)
        file_md5 = hashlib.md5(file_content)

        message = {
            "message-type"      : "archive-key-entire",
            "priority"          : archive_priority,
            "collection-id"     : collection_id,
            "key"               : key, 
            "timestamp-repr"    : repr(timestamp),
            "segment-num"       : segment_num,
            "file-size"         : file_size,
            "file-adler32"      : file_adler32,
            "file-hash"         : b64encode(file_md5.digest()),
            "handoff-node-name" : None,
        }
        g = gevent.spawn(self._send_message_get_reply, message, file_content)
        g.join(timeout=10.0)
        self.assertEqual(g.ready(), True)
        reply = g.value
        self.assertEqual(reply["message-type"], "archive-key-final-reply")
        self.assertEqual(reply["result"], "success")

        print >> sys.stderr, "archive successful"
        print >> sys.stderr, "press [Enter] to continue" 
        raw_input()
示例#8
0
    def destroy_key(
        self,
        collection_id,
        key,
        unified_id_to_delete,
        unified_id,
        timestamp,
        segment_num,
        source_node_name,
        user_request_id
    ):
        message = {
            "message-type"              : "destroy-key",
            "priority"                  : create_priority(),
            "user-request-id"           : user_request_id,
            "collection-id"             : collection_id,
            "key"                       : key,
            "unified-id-to-delete"      : unified_id_to_delete,
            "unified-id"                : unified_id,
            "timestamp-repr"            : repr(timestamp),
            "segment-num"               : segment_num,
            "source-node-name"          : source_node_name,
            "handoff-node-name"         : None,
        }
        delivery_channel = \
                self._resilient_client.queue_message_for_send(message)

        self._log.debug("request {user-request-id}: {message-type}: " \
                        "key = {key} " \
                        "timestamp = {timestamp-repr} " \
                        "segment_num = {segment-num}".format(**message))
        reply, _data = delivery_channel.get()
        return reply
示例#9
0
    def xxxtest_archive_key_entire_with_meta(self):
        """
        test archiving a key in a single message, including meta data
        """
        file_size = 10 * 64 * 1024
        content_item = random_string(file_size) 
        user_request_id = uuid.uuid1().hex
        collection_id = 1001
        key  = self._key_generator.next()
        archive_priority = create_priority()
        timestamp = create_timestamp()
        segment_num = 2

        meta_key = "".join([nimbus_meta_prefix, "test_key"])
        meta_value = "pork"

        file_adler32 = zlib.adler32(content_item)
        file_md5 = hashlib.md5(content_item)

        unified_id_factory = UnifiedIDFactory(1)
        unified_id = unified_id_factory.next()

        message = {
            "message-type"      : "archive-key-entire",
            "priority"          : archive_priority,
            "user-request-id"   : user_request_id,
            "collection-id"     : collection_id,
            "key"               : key, 
            "unified-id"        : unified_id,
            "timestamp-repr"    : repr(timestamp),
            "conjoined-part"    : 0,
            "segment-num"       : segment_num,
            "segment-size"      : file_size,
            "zfec-padding-size" : 4,
            "segment-adler32"   : file_adler32,
            "segment-md5-digest": b64encode(file_md5.digest()),
            "file-size"         : file_size,
            "file-adler32"      : file_adler32,
            "file-hash"         : b64encode(file_md5.digest()),
            "source-node-name"  : _local_node_name,
            "handoff-node-name" : None,
            meta_key            : meta_value
        }
        reply = send_request_and_get_reply(
            _local_node_name,
            _data_writer_address, 
            _local_node_name,
            _client_address,
            message, 
            data=content_item
        )
        self.assertEqual(reply["message-type"], "archive-key-final-reply")
        self.assertEqual(reply["user-request-id"], user_request_id)
        self.assertEqual(reply["result"], "success", reply["error-message"])
示例#10
0
    def xxxtest_destroy_tombstone(self):
        """test destroying a key that has already been destroyed"""
        file_size = 10 * 64 * 1024
        content_item = random_string(file_size) 
        message_id = uuid.uuid1().hex
        collection_id = 1001
        key  = self._key_generator.next()
        archive_priority = create_priority()
        archive_timestamp = create_timestamp()
        destroy_1_timestamp = archive_timestamp + timedelta(seconds=1)
        destroy_2_timestamp = destroy_1_timestamp + timedelta(seconds=1)
        segment_num = 2

        file_adler32 = zlib.adler32(content_item)
        file_md5 = hashlib.md5(content_item)

        message = {
            "message-type"      : "archive-key-entire",
            "message-id"        : message_id,
            "priority"          : archive_priority,
            "collection-id"     : collection_id,
            "key"               : key, 
            "timestamp-repr"    : repr(archive_timestamp),
            "segment-num"       : segment_num,
            "segment-size"      : file_size,
            "segment-adler32"   : file_adler32,
            "segment-md5-digest": b64encode(file_md5.digest()),
            "file-size"         : file_size,
            "file-adler32"      : file_adler32,
            "file-hash"         : b64encode(file_md5.digest()),
            "handoff-node-name" : None,
        }
        reply = send_request_and_get_reply(
            _local_node_name,
            _data_writer_address, 
            _local_node_name,
            _client_address,
            message, 
            data=content_item
        )
        self.assertEqual(reply["message-id"], message_id)
        self.assertEqual(reply["message-type"], "archive-key-final-reply")
        self.assertEqual(reply["result"], "success")

        reply = self._destroy(
            collection_id, key, destroy_1_timestamp, segment_num
        )
        self.assertEqual(reply["result"], "success", reply["error-message"])

        reply = self._destroy(
            collection_id, key, destroy_2_timestamp, segment_num
        )
        self.assertEqual(reply["result"], "success", reply["error-message"])
示例#11
0
    def xxxtest_archive_key_entire_with_meta(self):
        """
        test archiving a key in a single message, including meta data
        """
        file_size = 10 * 64 * 1024
        content_item = random_string(file_size)
        user_request_id = uuid.uuid1().hex
        collection_id = 1001
        key = self._key_generator.next()
        archive_priority = create_priority()
        timestamp = create_timestamp()
        segment_num = 2

        meta_key = "".join([nimbus_meta_prefix, "test_key"])
        meta_value = "pork"

        file_adler32 = zlib.adler32(content_item)
        file_md5 = hashlib.md5(content_item)

        unified_id_factory = UnifiedIDFactory(1)
        unified_id = unified_id_factory.next()

        message = {
            "message-type": "archive-key-entire",
            "priority": archive_priority,
            "user-request-id": user_request_id,
            "collection-id": collection_id,
            "key": key,
            "unified-id": unified_id,
            "timestamp-repr": repr(timestamp),
            "conjoined-part": 0,
            "segment-num": segment_num,
            "segment-size": file_size,
            "zfec-padding-size": 4,
            "segment-adler32": file_adler32,
            "segment-md5-digest": b64encode(file_md5.digest()),
            "file-size": file_size,
            "file-adler32": file_adler32,
            "file-hash": b64encode(file_md5.digest()),
            "source-node-name": _local_node_name,
            "handoff-node-name": None,
            meta_key: meta_value
        }
        reply = send_request_and_get_reply(_local_node_name,
                                           _data_writer_address,
                                           _local_node_name,
                                           _client_address,
                                           message,
                                           data=content_item)
        self.assertEqual(reply["message-type"], "archive-key-final-reply")
        self.assertEqual(reply["user-request-id"], user_request_id)
        self.assertEqual(reply["result"], "success", reply["error-message"])
示例#12
0
    def archive_key_entire(
        self,
        collection_id,
        key,
        unified_id,
        timestamp,
        conjoined_part,
        meta_dict,
        segment_num,
        zfec_padding_size,
        file_size,
        file_adler32,
        file_md5,
        segment,
        source_node_name,
    ):
        segment_md5 = hashlib.md5()
        segment_md5.update(segment)

        message = {
            "message-type"              : "archive-key-entire",
            "priority"                  : create_priority(),
            "collection-id"             : collection_id,
            "key"                       : key, 
            "unified-id"                : unified_id,
            "timestamp-repr"            : repr(timestamp),
            "conjoined-part"            : conjoined_part,
            "segment-num"               : segment_num,
            "segment-size"              : len(segment),
            "zfec-padding-size"         : zfec_padding_size,
            "segment-md5-digest"        : b64encode(segment_md5.digest()),
            "segment-adler32"           : zlib.adler32(segment),
            "file-size"                 : file_size,
            "file-adler32"              : file_adler32,
            "file-hash"                 : b64encode(file_md5),
            "source-node-name"          : source_node_name,
            "handoff-node-name"         : None,
        }
        message.update(meta_dict)
        delivery_channel = self._resilient_client.queue_message_for_send(
            message, data=segment
        )
        self._log.debug(
            '%(message-type)s: '
            'key = %(key)r '
            'timestamp = %(timestamp-repr)r '
            'segment_num = %(segment-num)d' % message
            )
        reply, _data = delivery_channel.get()
        if reply["result"] != "success":
            self._log.error("failed: %s" % (reply, ))
            raise ArchiveFailedError(reply["error-message"])
示例#13
0
def _send_archive_cancel(user_request_id, unified_id, conjoined_part, clients):
    # message sent to data writers telling them to cancel the archive
    for i, client in enumerate(clients):
        if not client.connected:
            continue
        cancel_message = {
            "message-type"      : "archive-key-cancel",            
            "priority"          : create_priority(),
            "user-request-id"   : user_request_id,
            "unified-id"        : unified_id,
            "conjoined-part"    : conjoined_part,
            "segment-num"       : i+1,
        }
        client.queue_message_for_broadcast(cancel_message)
示例#14
0
    def archive_key_entire(
        self,
        collection_id,
        key,
        unified_id,
        timestamp,
        conjoined_part,
        meta_dict,
        segment_num,
        zfec_padding_size,
        file_size,
        file_adler32,
        file_md5,
        segment,
        source_node_name,
        user_request_id
    ):
        segment_size, segment_adler32, segment_md5 = \
                _segment_properties(segment)

        message = {
            "message-type"              : "archive-key-entire",
            "priority"                  : create_priority(),
            "user-request-id"           : user_request_id,
            "collection-id"             : collection_id,
            "key"                       : key, 
            "unified-id"                : unified_id,
            "timestamp-repr"            : repr(timestamp),
            "conjoined-part"            : conjoined_part,
            "segment-num"               : segment_num,
            "segment-size"              : segment_size,
            "zfec-padding-size"         : zfec_padding_size,
            "segment-md5-digest"        : b64encode(segment_md5.digest()),
            "segment-adler32"           : segment_adler32,
            "file-size"                 : file_size,
            "file-adler32"              : file_adler32,
            "file-hash"                 : b64encode(file_md5),
            "source-node-name"          : source_node_name,
            "handoff-node-name"         : None,
        }
        message.update(meta_dict)
        delivery_channel = self._resilient_client.queue_message_for_send(
            message, data=segment
        )
        self._log.debug("request {user-request-id}: {message-type}: " \
                        "key = {key} " \
                        "timestamp = {timestamp-repr} " \
                        "segment_num = {segment-num}".format(**message))
        reply, _data = delivery_channel.get()
        return reply
示例#15
0
def _send_archive_cancel(user_request_id, unified_id, conjoined_part, clients):
    # message sent to data writers telling them to cancel the archive
    for i, client in enumerate(clients):
        if not client.connected:
            continue
        cancel_message = {
            "message-type": "archive-key-cancel",
            "priority": create_priority(),
            "user-request-id": user_request_id,
            "unified-id": unified_id,
            "conjoined-part": conjoined_part,
            "segment-num": i + 1,
        }
        client.queue_message_for_broadcast(cancel_message)
def _handle_archive_reply(state, message, _data):
    log = logging.getLogger("_handle_archive_reply")

    #TODO: we need to squawk about this somehow
    if message["result"] != "success":
        error_message = "%s failed (%s) %s %s" % (
            message["message-type"], 
            message["result"], 
            message["error-message"], 
            message,
        )
        log.error(error_message)
        raise HandoffError(error_message)

    result = state["forwarder"].send(message)

    if result is not None:
        state["forwarder"] = None

        segment_row, source_node_names = result

        description = "handoff complete %s %s %s %s" % (
            segment_row.collection_id,
            segment_row.key,
            segment_row.timestamp,
            segment_row.segment_num,
        )
        log.info(description)

        state["event-push-client"].info(
            "handoff-complete",
            description,
            backup_sources=source_node_names,
            collection_id=segment_row.collection_id,
            key=segment_row.key,
            timestamp_repr=repr(segment_row.timestamp)
        )
        
        # purge the handoff source(s)
        message = {
            "message-type"      : "purge-handoff-source",
            "priority"          : create_priority(),
            "collection-id"     : segment_row.collection_id,
            "unified-id"        : segment_row.unified_id,
            "handoff-node-id"   : segment_row.handoff_node_id
        }
        for source_node_name in source_node_names:
            writer_client = state["writer-client-dict"][source_node_name]
            writer_client.queue_message_for_send(message)
示例#17
0
    def xxxtest_destroy_tombstone(self):
        """test destroying a key that has already been destroyed"""
        file_size = 10 * 64 * 1024
        content_item = random_string(file_size)
        message_id = uuid.uuid1().hex
        collection_id = 1001
        key = self._key_generator.next()
        archive_priority = create_priority()
        archive_timestamp = create_timestamp()
        destroy_1_timestamp = archive_timestamp + timedelta(seconds=1)
        destroy_2_timestamp = destroy_1_timestamp + timedelta(seconds=1)
        segment_num = 2

        file_adler32 = zlib.adler32(content_item)
        file_md5 = hashlib.md5(content_item)

        message = {
            "message-type": "archive-key-entire",
            "message-id": message_id,
            "priority": archive_priority,
            "collection-id": collection_id,
            "key": key,
            "timestamp-repr": repr(archive_timestamp),
            "segment-num": segment_num,
            "segment-size": file_size,
            "segment-adler32": file_adler32,
            "segment-md5-digest": b64encode(file_md5.digest()),
            "file-size": file_size,
            "file-adler32": file_adler32,
            "file-hash": b64encode(file_md5.digest()),
            "handoff-node-name": None,
        }
        reply = send_request_and_get_reply(_local_node_name,
                                           _data_writer_address,
                                           _local_node_name,
                                           _client_address,
                                           message,
                                           data=content_item)
        self.assertEqual(reply["message-id"], message_id)
        self.assertEqual(reply["message-type"], "archive-key-final-reply")
        self.assertEqual(reply["result"], "success")

        reply = self._destroy(collection_id, key, destroy_1_timestamp,
                              segment_num)
        self.assertEqual(reply["result"], "success", reply["error-message"])

        reply = self._destroy(collection_id, key, destroy_2_timestamp,
                              segment_num)
        self.assertEqual(reply["result"], "success", reply["error-message"])
示例#18
0
    def append(self, message_tuple):
        """
        add a message to the queue

        The message must be created by tools.data_definitions.message_format

        It must have a message["priority"] entry
        """
        try:
            priority = message_tuple[0]["priority"]
        except KeyError:
            self._log.error("message lacks priority %s" % (message_tuple[0], ))
            priority = create_priority()

        heapq.heappush(
            self._internal_queue, 
            (priority, self._counter.next(), message_tuple, )
        )
示例#19
0
    def append(self, message_tuple):
        """
        add a message to the queue

        The message must be created by tools.data_definitions.message_format

        It must have a message["priority"] entry
        """
        try:
            priority = message_tuple[0]["priority"]
        except KeyError:
            self._log.error("message lacks priority %s" % (message_tuple[0], ))
            priority = create_priority()

        heapq.heappush(self._internal_queue, (
            priority,
            self._counter.next(),
            message_tuple,
        ))
示例#20
0
    def test_archive_key_entire(self):
        """test archiving all data for a key in a single message"""
        file_size = 10 * 64 * 1024
        content_item = random_string(file_size) 
        message_id = uuid.uuid1().hex
        collection_id = 1001
        key  = self._key_generator.next()
        archive_priority = create_priority()
        timestamp = create_timestamp()
        segment_num = 2

        file_adler32 = zlib.adler32(content_item)
        file_md5 = hashlib.md5(content_item)

        message = {
            "message-type"      : "archive-key-entire",
            "message-id"        : message_id,
            "priority"          : archive_priority,
            "collection-id"     : collection_id,
            "key"               : key, 
            "timestamp-repr"    : repr(timestamp),
            "segment-num"       : segment_num,
            "segment-size"      : file_size,
            "segment-adler32"   : file_adler32,
            "segment-md5-digest": b64encode(file_md5.digest()),
            "file-size"         : file_size,
            "file-adler32"      : file_adler32,
            "file-hash"         : b64encode(file_md5.digest()),
            "handoff-node-name" : None,
        }
        reply = send_request_and_get_reply(
            _local_node_name,
            _data_writer_address, 
            _local_node_name,
            _client_address,
            message, 
            data=content_item
        )
        self.assertEqual(reply["message-id"], message_id)
        self.assertEqual(reply["message-type"], "archive-key-final-reply")
        self.assertEqual(reply["result"], "success")
示例#21
0
    def _purge(self, collection_id, key, timestamp, segment_num):
        message_id = uuid.uuid1().hex
        archive_priority = create_priority()
        message = {
            "message-type"      : "purge-key",
            "message-id"        : message_id,
            "priority"          : archive_priority,
            "collection-id"     : collection_id,
            "key"               : key,
            "timestamp-repr"    : repr(timestamp),
            "segment-num"       : segment_num,
        }
        reply = send_request_and_get_reply(
            _local_node_name,
            _data_writer_address, 
            _local_node_name,
            _client_address,
            message
        )
        self.assertEqual(reply["message-id"], message_id)
        self.assertEqual(reply["message-type"], "purge-key-reply")

        return reply
    def test_retrieve_large_content(self):
        """test retrieving content that fits in a multiple messages"""
        slice_size = 1024 * 1024
        slice_count = 10
        total_size = slice_size * slice_count
        test_data = random_string(total_size)

        collection_id = 1001
        archive_priority = create_priority()
        timestamp = create_timestamp()
        key  = self._key_generator.next()
        segment_num = 4
        sequence_num = 0

        file_adler32 = zlib.adler32(test_data)
        file_md5 = hashlib.md5(test_data)

        slice_start = 0
        slice_end = slice_size

        segment_adler32 = zlib.adler32(test_data[slice_start:slice_end])
        segment_md5 = hashlib.md5(test_data[slice_start:slice_end])

        message_id = uuid.uuid1().hex
        message = {
            "message-type"              : "archive-key-start",
            "message-id"                : message_id,
            "priority"                  : archive_priority,
            "collection-id"             : collection_id,
            "key"                       : key, 
            "conjoined-unified-id"      : None,
            "conjoined-part"            : 0,
            "timestamp-repr"            : repr(timestamp),
            "segment-num"               : segment_num,
            "segment-size"              : len(test_data[slice_start:slice_end]),
            "segment-adler32"           : segment_adler32,
            "segment-md5-digest"        : b64encode(segment_md5.digest()),
            "sequence-num"              : sequence_num,
        }
        reply = send_request_and_get_reply(
            _local_node_name,
            _data_writer_address, 
            _local_node_name,
            _client_address,
            message, 
            data=test_data[slice_start:slice_end]
        )
        self.assertEqual(reply["message-id"], message_id)
        self.assertEqual(reply["message-type"], "archive-key-start-reply")
        self.assertEqual(reply["result"], "success")

        for _ in range(slice_count-2):
            sequence_num += 1
            slice_start += slice_size
            slice_end += slice_size
            
            segment_adler32 = zlib.adler32(test_data[slice_start:slice_end])
            segment_md5 = hashlib.md5(test_data[slice_start:slice_end])

            message_id = uuid.uuid1().hex
            message = {
                "message-type"              : "archive-key-next",
                "message-id"                : message_id,
                "priority"                  : archive_priority,
                "collection-id"             : collection_id,
                "key"                       : key, 
                "conjoined-unified-id"      : None,
                "conjoined-part"            : 0,
                "timestamp-repr"            : repr(timestamp),
                "segment-num"               : segment_num,
                "segment-size"              : len(
                    test_data[slice_start:slice_end]
                ),
                "segment-adler32"           : segment_adler32,
                "segment-md5-digest"        : b64encode(segment_md5.digest()),
                "sequence-num"              : sequence_num,
            }
            reply = send_request_and_get_reply(
                _local_node_name,
                _data_writer_address, 
                _local_node_name,
                _client_address,
                message, 
                data=test_data[slice_start:slice_end]
            )
            self.assertEqual(reply["message-id"], message_id)
            self.assertEqual(reply["message-type"], "archive-key-next-reply")
            self.assertEqual(reply["result"], "success")
        
        sequence_num += 1
        slice_start += slice_size
        slice_end += slice_size
        self.assertEqual(slice_end, total_size)

        segment_adler32 = zlib.adler32(test_data[slice_start:slice_end])
        segment_md5 = hashlib.md5(test_data[slice_start:slice_end])

        message_id = uuid.uuid1().hex
        message = {
            "message-type"              : "archive-key-final",
            "message-id"                : message_id,
            "priority"                  : archive_priority,
            "collection-id"             : collection_id,
            "key"                       : key, 
            "conjoined-unified-id"      : None,
            "conjoined-part"            : 0,
            "timestamp-repr"            : repr(timestamp),
            "segment-num"               : segment_num,
            "segment-size"              : len(test_data[slice_start:slice_end]),
            "segment-adler32"           : segment_adler32,
            "segment-md5-digest"        : b64encode(segment_md5.digest()),
            "sequence-num"              : sequence_num,
            "file-size"                 : total_size,
            "file-adler32"              : file_adler32,
            "file-hash"                 : b64encode(file_md5.digest()),
            "handoff-node-name"         : None,
        }
        reply = send_request_and_get_reply(
            _local_node_name,
            _data_writer_address, 
            _local_node_name,
            _client_address,
            message, 
            data=test_data[slice_start:slice_end]
        )
        self.assertEqual(reply["message-id"], message_id)
        self.assertEqual(reply["message-type"], "archive-key-final-reply")
        self.assertEqual(reply["result"], "success")

        # get file info from the local database
        _conjoined_row, segment_rows = current_status_of_key(
            self._database_connection, collection_id, key
        )

        self.assertEqual(len(segment_rows), 1)

        retrieved_data_list = list()

        message_id = uuid.uuid1().hex
        message = {
            "message-type"              : "retrieve-key-start",
            "message-id"                : message_id,
            "collection-id"             : collection_id,
            "key"                       : key, 
            "timestamp-repr"            : repr(timestamp),
            "conjoined-unified-id"      : None,
            "conjoined-part"            : 0,
            "segment-num"               : segment_num
        }

        reply, data = send_request_and_get_reply_and_data(
            _local_node_name,
            _data_reader_address, 
            _local_node_name,
            _client_address,
            message 
        )
        
        self.assertEqual(reply["message-id"], message_id)
        self.assertEqual(reply["message-type"], "retrieve-key-reply")
        self.assertEqual(reply["completed"], False)
        print "sequence-num =", reply["sequence-num"]

        retrieved_data_list.append(data)

        while True:
            message_id = uuid.uuid1().hex
            message = {
                "message-type"              : "retrieve-key-next",
                "message-id"                : message_id,
                "collection-id"             : collection_id,
                "key"                       : key, 
                "timestamp-repr"            : repr(timestamp),
                "conjoined-unified-id"      : None,
                "conjoined-part"            : 0,
                "segment-num"               : segment_num
            }

            reply, data = send_request_and_get_reply_and_data(
                _local_node_name,
                _data_reader_address, 
                _local_node_name,
                _client_address,
                message 
            )
            
            self.assertEqual(reply["message-id"], message_id)
            self.assertEqual(reply["message-type"], "retrieve-key-reply")
            retrieved_data_list.append(data)
            print "sequence-num =", reply["sequence-num"]

            if reply["completed"]:
                break

        retrieved_data = "".join(retrieved_data_list)
        self.assertEqual(len(retrieved_data), len(test_data))
        self.assertEqual(retrieved_data, test_data)
示例#23
0
    def test_retrieve_small_content(self):
        """test retrieving content that fits in a single message"""
        file_size = 10 * 64 * 1024
        file_content = random_string(file_size)
        collection_id = 1001
        key = self._key_generator.next()
        archive_priority = create_priority()
        timestamp = create_timestamp()
        segment_num = 2

        file_adler32 = zlib.adler32(file_content)
        file_md5 = hashlib.md5(file_content)

        message_id = uuid.uuid1().hex
        message = {
            "message-type": "archive-key-entire",
            "message-id": message_id,
            "priority": archive_priority,
            "collection-id": collection_id,
            "key": key,
            "conjoined-unified-id": None,
            "conjoined-part": 0,
            "timestamp-repr": repr(timestamp),
            "segment-num": segment_num,
            "segment-size": file_size,
            "segment-adler32": file_adler32,
            "segment-md5-digest": b64encode(file_md5.digest()),
            "file-size": file_size,
            "file-adler32": file_adler32,
            "file-hash": b64encode(file_md5.digest()),
            "handoff-node-name": None,
        }
        reply = send_request_and_get_reply(_local_node_name,
                                           _data_writer_address,
                                           _local_node_name,
                                           _client_address,
                                           message,
                                           data=file_content)
        self.assertEqual(reply["message-id"], message_id)
        self.assertEqual(reply["message-type"], "archive-key-final-reply")
        self.assertEqual(reply["result"], "success")

        # get file info from the local database
        _conjoined_row, segment_rows = current_status_of_key(
            self._database_connection, collection_id, key)

        self.assertEqual(len(segment_rows), 1)

        message_id = uuid.uuid1().hex
        message = {
            "message-type": "retrieve-key-start",
            "message-id": message_id,
            "collection-id": collection_id,
            "key": key,
            "timestamp-repr": repr(timestamp),
            "conjoined-unified-id": None,
            "conjoined-part": 0,
            "segment-num": segment_num
        }

        reply, data = send_request_and_get_reply_and_data(
            _local_node_name, _data_reader_address, _local_node_name,
            _client_address, message)

        self.assertEqual(reply["message-id"], message_id)
        self.assertEqual(reply["message-type"], "retrieve-key-reply")
        self.assertEqual(reply["completed"], True)
        self.assertEqual(len(data), len(file_content))
        self.assertEqual(data, file_content)
示例#24
0
    def test_retrieve_large_content(self):
        """test retrieving content that fits in a multiple messages"""
        slice_size = 1024 * 1024
        slice_count = 10
        total_size = slice_size * slice_count
        test_data = random_string(total_size)

        collection_id = 1001
        archive_priority = create_priority()
        timestamp = create_timestamp()
        key = self._key_generator.next()
        segment_num = 4
        sequence_num = 0

        file_adler32 = zlib.adler32(test_data)
        file_md5 = hashlib.md5(test_data)

        slice_start = 0
        slice_end = slice_size

        segment_adler32 = zlib.adler32(test_data[slice_start:slice_end])
        segment_md5 = hashlib.md5(test_data[slice_start:slice_end])

        message_id = uuid.uuid1().hex
        message = {
            "message-type": "archive-key-start",
            "message-id": message_id,
            "priority": archive_priority,
            "collection-id": collection_id,
            "key": key,
            "conjoined-unified-id": None,
            "conjoined-part": 0,
            "timestamp-repr": repr(timestamp),
            "segment-num": segment_num,
            "segment-size": len(test_data[slice_start:slice_end]),
            "segment-adler32": segment_adler32,
            "segment-md5-digest": b64encode(segment_md5.digest()),
            "sequence-num": sequence_num,
        }
        reply = send_request_and_get_reply(
            _local_node_name,
            _data_writer_address,
            _local_node_name,
            _client_address,
            message,
            data=test_data[slice_start:slice_end])
        self.assertEqual(reply["message-id"], message_id)
        self.assertEqual(reply["message-type"], "archive-key-start-reply")
        self.assertEqual(reply["result"], "success")

        for _ in range(slice_count - 2):
            sequence_num += 1
            slice_start += slice_size
            slice_end += slice_size

            segment_adler32 = zlib.adler32(test_data[slice_start:slice_end])
            segment_md5 = hashlib.md5(test_data[slice_start:slice_end])

            message_id = uuid.uuid1().hex
            message = {
                "message-type": "archive-key-next",
                "message-id": message_id,
                "priority": archive_priority,
                "collection-id": collection_id,
                "key": key,
                "conjoined-unified-id": None,
                "conjoined-part": 0,
                "timestamp-repr": repr(timestamp),
                "segment-num": segment_num,
                "segment-size": len(test_data[slice_start:slice_end]),
                "segment-adler32": segment_adler32,
                "segment-md5-digest": b64encode(segment_md5.digest()),
                "sequence-num": sequence_num,
            }
            reply = send_request_and_get_reply(
                _local_node_name,
                _data_writer_address,
                _local_node_name,
                _client_address,
                message,
                data=test_data[slice_start:slice_end])
            self.assertEqual(reply["message-id"], message_id)
            self.assertEqual(reply["message-type"], "archive-key-next-reply")
            self.assertEqual(reply["result"], "success")

        sequence_num += 1
        slice_start += slice_size
        slice_end += slice_size
        self.assertEqual(slice_end, total_size)

        segment_adler32 = zlib.adler32(test_data[slice_start:slice_end])
        segment_md5 = hashlib.md5(test_data[slice_start:slice_end])

        message_id = uuid.uuid1().hex
        message = {
            "message-type": "archive-key-final",
            "message-id": message_id,
            "priority": archive_priority,
            "collection-id": collection_id,
            "key": key,
            "conjoined-unified-id": None,
            "conjoined-part": 0,
            "timestamp-repr": repr(timestamp),
            "segment-num": segment_num,
            "segment-size": len(test_data[slice_start:slice_end]),
            "segment-adler32": segment_adler32,
            "segment-md5-digest": b64encode(segment_md5.digest()),
            "sequence-num": sequence_num,
            "file-size": total_size,
            "file-adler32": file_adler32,
            "file-hash": b64encode(file_md5.digest()),
            "handoff-node-name": None,
        }
        reply = send_request_and_get_reply(
            _local_node_name,
            _data_writer_address,
            _local_node_name,
            _client_address,
            message,
            data=test_data[slice_start:slice_end])
        self.assertEqual(reply["message-id"], message_id)
        self.assertEqual(reply["message-type"], "archive-key-final-reply")
        self.assertEqual(reply["result"], "success")

        # get file info from the local database
        _conjoined_row, segment_rows = current_status_of_key(
            self._database_connection, collection_id, key)

        self.assertEqual(len(segment_rows), 1)

        retrieved_data_list = list()

        message_id = uuid.uuid1().hex
        message = {
            "message-type": "retrieve-key-start",
            "message-id": message_id,
            "collection-id": collection_id,
            "key": key,
            "timestamp-repr": repr(timestamp),
            "conjoined-unified-id": None,
            "conjoined-part": 0,
            "segment-num": segment_num
        }

        reply, data = send_request_and_get_reply_and_data(
            _local_node_name, _data_reader_address, _local_node_name,
            _client_address, message)

        self.assertEqual(reply["message-id"], message_id)
        self.assertEqual(reply["message-type"], "retrieve-key-reply")
        self.assertEqual(reply["completed"], False)
        print "sequence-num =", reply["sequence-num"]

        retrieved_data_list.append(data)

        while True:
            message_id = uuid.uuid1().hex
            message = {
                "message-type": "retrieve-key-next",
                "message-id": message_id,
                "collection-id": collection_id,
                "key": key,
                "timestamp-repr": repr(timestamp),
                "conjoined-unified-id": None,
                "conjoined-part": 0,
                "segment-num": segment_num
            }

            reply, data = send_request_and_get_reply_and_data(
                _local_node_name, _data_reader_address, _local_node_name,
                _client_address, message)

            self.assertEqual(reply["message-id"], message_id)
            self.assertEqual(reply["message-type"], "retrieve-key-reply")
            retrieved_data_list.append(data)
            print "sequence-num =", reply["sequence-num"]

            if reply["completed"]:
                break

        retrieved_data = "".join(retrieved_data_list)
        self.assertEqual(len(retrieved_data), len(test_data))
        self.assertEqual(retrieved_data, test_data)
示例#25
0
    def xxxtest_large_archive(self):

        """
        test archiving a file that needs more than one message.
        For example, a 10 Mb file: each node would get 10 120kb 
        zefec shares.
        """
        slice_size = 1024 * 1024
        slice_count = 10
        total_size = slice_size * slice_count
        test_data = random_string(total_size)

        user_request_id = uuid.uuid1().hex

        collection_id = 1001
        archive_priority = create_priority()
        timestamp = create_timestamp()
        key  = self._key_generator.next()
        segment_num = 4
        sequence_num = 0

        file_adler32 = zlib.adler32(test_data)
        file_md5 = hashlib.md5(test_data)

        slice_start = 0
        slice_end = slice_size

        segment_adler32 = zlib.adler32(test_data[slice_start:slice_end])
        segment_md5 = hashlib.md5(test_data[slice_start:slice_end])

        unified_id_factory = UnifiedIDFactory(1)
        unified_id = unified_id_factory.next()

        message = {
            "message-type"      : "archive-key-start",
            "priority"          : archive_priority,
            "user-request-id"   : user_request_id,
            "collection-id"     : collection_id,
            "key"               : key, 
            "unified-id"        : unified_id,
            "timestamp-repr"    : repr(timestamp),
            "conjoined-part"    : 0,
            "segment-num"       : segment_num,
            "segment-size"      : len(test_data[slice_start:slice_end]),
            "zfec-padding-size" : 4,
            "segment-md5-digest": b64encode(segment_md5.digest()),
            "segment-adler32"   : segment_adler32,
            "sequence-num"      : sequence_num,
            "source-node-name"  : _local_node_name,
            "handoff-node-name" : None,
        }
        reply = send_request_and_get_reply(
            _local_node_name,
            _data_writer_address, 
            _local_node_name,
            _client_address,
            message, 
            data=test_data[slice_start:slice_end]
        )
        self.assertEqual(reply["message-type"], "archive-key-start-reply")
        self.assertEqual(reply["user-request-id"], user_request_id)
        self.assertEqual(reply["result"], "success", reply["error-message"])

        for _ in range(slice_count-2):
            sequence_num += 1
            slice_start += slice_size
            slice_end += slice_size

            segment_adler32 = zlib.adler32(test_data[slice_start:slice_end])
            segment_md5 = hashlib.md5(test_data[slice_start:slice_end])

            message_id = uuid.uuid1().hex
            message = {
                "message-type"      : "archive-key-next",
                "priority"          : archive_priority,
                "user-request-id"   : user_request_id,
                "collection-id"     : collection_id,
                "key"               : key, 
                "unified-id"        : unified_id,
                "timestamp-repr"    : repr(timestamp),
                "conjoined-part"    : 0,
                "segment-num"       : segment_num,
                "segment-size"      : len(test_data[slice_start:slice_end]),
                "zfec-padding-size" : 4,
                "segment-md5-digest": b64encode(segment_md5.digest()),
                "segment-adler32"   : segment_adler32,
                "sequence-num"      : sequence_num,
                "source-node-name"  : _local_node_name,
                "handoff-node-name" : None,
            }
            reply = send_request_and_get_reply(
                _local_node_name,
                _data_writer_address, 
                _local_node_name,
                _client_address,
                message, 
                data=test_data[slice_start:slice_end]
            )
            self.assertEqual(reply["message-type"], "archive-key-next-reply")
            self.assertEqual(reply["user-request-id"], user_request_id)
            self.assertEqual(reply["result"], "success", reply["error-message"])
        
        sequence_num += 1
        slice_start += slice_size
        slice_end += slice_size

        segment_adler32 = zlib.adler32(test_data[slice_start:slice_end])
        segment_md5 = hashlib.md5(test_data[slice_start:slice_end])

        message = {
            "message-type"      : "archive-key-final",
            "priority"          : archive_priority,
            "user-request-id"   : user_request_id,
            "collection-id"     : collection_id,
            "key"               : key, 
            "unified-id"        : unified_id,
            "timestamp-repr"    : repr(timestamp),
            "conjoined-part"    : 0,
            "segment-num"       : segment_num,
            "segment-size"      : len(test_data[slice_start:slice_end]),
            "zfec-padding-size" : 4,
            "segment-md5-digest": b64encode(segment_md5.digest()),
            "segment-adler32"   : segment_adler32,
            "sequence-num"      : sequence_num,
            "file-size"         : total_size,
            "file-adler32"      : file_adler32,
            "file-hash"         : b64encode(file_md5.digest()),
            "source-node-name"  : _local_node_name,
            "handoff-node-name" : None,
        }
        reply = send_request_and_get_reply(
            _local_node_name,
            _data_writer_address, 
            _local_node_name,
            _client_address,
            message, 
            data=test_data[slice_start:slice_end]
        )

        self.assertEqual(reply["message-type"], "archive-key-final-reply")
        self.assertEqual(reply["user-request-id"], user_request_id)
        self.assertEqual(reply["result"], "success", reply["error-message"])
示例#26
0
def forwarder_coroutine(node_dict, segment_row, writer_socket, reader_socket):
    """
    manage the message traffic for retrieving and re-archiving 
    a segment that was handed off to us
    """
    log = logging.getLogger("forwarder_coroutine")
    user_request_id = str(uuid.uuid4())
    archive_priority = create_priority()
    retrieve_id = uuid.uuid1().hex
    retrieve_sequence = 0

    # start retrieving from our reader
    message = {
        "message-type"              : "retrieve-key-start",
        "user-request-id"           : user_request_id,
        "retrieve-id"               : retrieve_id,
        "retrieve-sequence"         : retrieve_sequence,
        "collection-id"             : segment_row["collection_id"],
        "key"                       : segment_row["key"],
        "segment-unified-id"        : segment_row["unified_id"],
        "segment-conjoined-part"    : segment_row["conjoined_part"],
        "segment-num"               : segment_row["segment_num"],
        "handoff-node-id"           : segment_row["handoff_node_id"],
        "block-offset"              : 0,
        "block-count"               : None,
    }

    log.debug("request {0}: " \
              "sending retrieve-key-start {1} {2}".format(
              user_request_id,
              segment_row["unified_id"], 
              segment_row["segment_num"]))
    
    reader_socket.send(message)
    try:
        reader_socket.wait_for_ack()
    except ReqSocketAckTimeOut as instance:
        log.error("request {0}: " \
                  "timeout waiting ack {1} {2}".format(user_request_id,
                                                       str(reader_socket), 
                                                       str(instance)))
        raise

    reply, data = yield

    assert reply["message-type"] == "retrieve-key-reply", reply
    assert reply["result"] == "success", reply
    completed = reply["completed"]

    sequence = 1

    if completed:
        message = {
            "message-type"      : "archive-key-entire",
            "user-request-id"   : user_request_id,
            "priority"          : archive_priority,
            "collection-id"     : segment_row["collection_id"],
            "key"               : segment_row["key"], 
            "unified-id"        : segment_row["unified_id"],
            "conjoined-part"    : segment_row["conjoined_part"],
            "timestamp-repr"    : repr(segment_row["timestamp"]),
            "segment-num"       : segment_row["segment_num"],
            "segment-size"      : reply["segment-size"],
            "zfec-padding-size" : reply["zfec-padding-size"],
            "segment-adler32"   : reply["segment-adler32"],
            "segment-md5-digest": reply["segment-md5-digest"],
            "file-size"         : segment_row["file_size"],
            "file-adler32"      : segment_row["file_adler32"],
            "file-hash"         : segment_row["file_hash"],
            "source-node-name"  : node_dict[segment_row["source_node_id"]],
            "handoff-node-name" : None,
        }
    else:
        message = {
            "message-type"      : "archive-key-start",
            "user-request-id"   : user_request_id,
            "priority"          : archive_priority,
            "collection-id"     : segment_row["collection_id"],
            "key"               : segment_row["key"], 
            "unified-id"        : segment_row["unified_id"],
            "conjoined-part"    : segment_row["conjoined_part"],
            "timestamp-repr"    : repr(segment_row["timestamp"]),
            "segment-num"       : segment_row["segment_num"],
            "segment-size"      : reply["segment-size"],
            "zfec-padding-size" : reply["zfec-padding-size"],
            "segment-adler32"   : reply["segment-adler32"],
            "segment-md5-digest": reply["segment-md5-digest"],
            "sequence-num"      : sequence,
            "source-node-name"  : node_dict[segment_row["source_node_id"]],
            "handoff-node-name" : None,
        }
            
    writer_socket.send(message, data=data)
    try:
        writer_socket.wait_for_ack()
    except ReqSocketAckTimeOut as instance:
        log.error("request {0}: " \
                  "timeout waiting ack {1} {2}".format(user_request_id,
                                                       str(writer_socket), 
                                                       str(instance)))
        raise
    reply, _ = yield

    if completed:
        yield "done"
        return 

    assert reply["message-type"] == "archive-key-start-reply", reply
    assert reply["result"] == "success", reply

    # send the intermediate segments
    while not completed:
        retrieve_sequence += 1
        sequence += 1

        message = {
            "message-type"              : "retrieve-key-next",
             "user-request-id"          : user_request_id,
            "retrieve-id"               : retrieve_id,
            "retrieve-sequence"         : retrieve_sequence,
            "collection-id"             : segment_row["collection_id"],
            "key"                       : segment_row["key"],
            "segment-unified-id"        : segment_row["unified_id"],
            "segment-conjoined-part"    : segment_row["conjoined_part"],
            "segment-num"               : segment_row["segment_num"],
            "handoff-node-id"           : segment_row["handoff_node_id"],
            "block-offset"              : 0,
            "block-count"               : None,
        }
        reader_socket.send(message)
        try:
            reader_socket.wait_for_ack()
        except ReqSocketAckTimeOut as instance:
            log.error("request {0}: " \
                      "timeout waiting ack {1} {2}".format(user_request_id,
                                                           str(reader_socket), 
                                                           str(instance)))
            raise

        reply, data = yield
        assert reply["message-type"] == "retrieve-key-reply", reply
        assert reply["result"] == "success", reply
        completed = reply["completed"]

        if completed:
            message = {
                "message-type"      : "archive-key-final",
                "user-request-id"   : user_request_id,
                "priority"          : archive_priority,
                "collection-id"     : segment_row["collection_id"],
                "key"               : segment_row["key"],
                "unified-id"        : segment_row["unified_id"],
                "conjoined-part"    : segment_row["conjoined_part"],
                "timestamp-repr"    : repr(segment_row["timestamp"]),
                "segment-num"       : segment_row["segment_num"],
                "segment-size"      : reply["segment-size"],
                "zfec-padding-size" : reply["zfec-padding-size"],
                "segment-adler32"   : reply["segment-adler32"],
                "segment-md5-digest": reply["segment-md5-digest"],
                "sequence-num"      : sequence,
                "file-size"         : segment_row["file_size"],
                "file-adler32"      : segment_row["file_adler32"],
                "file-hash"         : segment_row["file_hash"],
                "source-node-name"  : node_dict[segment_row["source_node_id"]],
                "handoff-node-name" : None,
            }
        else:
            message = {
                "message-type"      : "archive-key-next",
                "user-request-id"   : user_request_id,
                "priority"          : archive_priority,
                "collection-id"     : segment_row["collection_id"],
                "key"               : segment_row["key"],
                "unified-id"        : segment_row["unified_id"],
                "conjoined-part"    : segment_row["conjoined_part"],
                "timestamp-repr"    : repr(segment_row["timestamp"]),
                "segment-num"       : segment_row["segment_num"],
                "segment-size"      : reply["segment-size"],
                "zfec-padding-size" : reply["zfec-padding-size"],
                "segment-adler32"   : reply["segment-adler32"],
                "segment-md5-digest": reply["segment-md5-digest"],
                "sequence-num"      : sequence,
                "source-node-name"  : node_dict[segment_row["source_node_id"]],
                "handoff-node-name" : None,
            }
        
        writer_socket.send(message, data=data)
        try:
            writer_socket.wait_for_ack()
        except ReqSocketAckTimeOut as instance:
            log.error("request {0}: " \
                      "timeout waiting ack {1} {2}".format(user_request_id,
                                                           str(writer_socket), 
                                                           str(instance)))
            raise
        reply, _ = yield
        assert reply["result"] == "success", reply

    yield "done"
示例#27
0
    def xxxtest_large_archive(self):
        """
        test archiving a file that needs more than one message.
        For example, a 10 Mb file: each node would get 10 120kb 
        zefec shares.
        """
        slice_size = 1024 * 1024
        slice_count = 10
        total_size = slice_size * slice_count
        test_data = random_string(total_size)

        user_request_id = uuid.uuid1().hex

        collection_id = 1001
        archive_priority = create_priority()
        timestamp = create_timestamp()
        key = self._key_generator.next()
        segment_num = 4
        sequence_num = 0

        file_adler32 = zlib.adler32(test_data)
        file_md5 = hashlib.md5(test_data)

        slice_start = 0
        slice_end = slice_size

        segment_adler32 = zlib.adler32(test_data[slice_start:slice_end])
        segment_md5 = hashlib.md5(test_data[slice_start:slice_end])

        unified_id_factory = UnifiedIDFactory(1)
        unified_id = unified_id_factory.next()

        message = {
            "message-type": "archive-key-start",
            "priority": archive_priority,
            "user-request-id": user_request_id,
            "collection-id": collection_id,
            "key": key,
            "unified-id": unified_id,
            "timestamp-repr": repr(timestamp),
            "conjoined-part": 0,
            "segment-num": segment_num,
            "segment-size": len(test_data[slice_start:slice_end]),
            "zfec-padding-size": 4,
            "segment-md5-digest": b64encode(segment_md5.digest()),
            "segment-adler32": segment_adler32,
            "sequence-num": sequence_num,
            "source-node-name": _local_node_name,
            "handoff-node-name": None,
        }
        reply = send_request_and_get_reply(
            _local_node_name,
            _data_writer_address,
            _local_node_name,
            _client_address,
            message,
            data=test_data[slice_start:slice_end])
        self.assertEqual(reply["message-type"], "archive-key-start-reply")
        self.assertEqual(reply["user-request-id"], user_request_id)
        self.assertEqual(reply["result"], "success", reply["error-message"])

        for _ in range(slice_count - 2):
            sequence_num += 1
            slice_start += slice_size
            slice_end += slice_size

            segment_adler32 = zlib.adler32(test_data[slice_start:slice_end])
            segment_md5 = hashlib.md5(test_data[slice_start:slice_end])

            message_id = uuid.uuid1().hex
            message = {
                "message-type": "archive-key-next",
                "priority": archive_priority,
                "user-request-id": user_request_id,
                "collection-id": collection_id,
                "key": key,
                "unified-id": unified_id,
                "timestamp-repr": repr(timestamp),
                "conjoined-part": 0,
                "segment-num": segment_num,
                "segment-size": len(test_data[slice_start:slice_end]),
                "zfec-padding-size": 4,
                "segment-md5-digest": b64encode(segment_md5.digest()),
                "segment-adler32": segment_adler32,
                "sequence-num": sequence_num,
                "source-node-name": _local_node_name,
                "handoff-node-name": None,
            }
            reply = send_request_and_get_reply(
                _local_node_name,
                _data_writer_address,
                _local_node_name,
                _client_address,
                message,
                data=test_data[slice_start:slice_end])
            self.assertEqual(reply["message-type"], "archive-key-next-reply")
            self.assertEqual(reply["user-request-id"], user_request_id)
            self.assertEqual(reply["result"], "success",
                             reply["error-message"])

        sequence_num += 1
        slice_start += slice_size
        slice_end += slice_size

        segment_adler32 = zlib.adler32(test_data[slice_start:slice_end])
        segment_md5 = hashlib.md5(test_data[slice_start:slice_end])

        message = {
            "message-type": "archive-key-final",
            "priority": archive_priority,
            "user-request-id": user_request_id,
            "collection-id": collection_id,
            "key": key,
            "unified-id": unified_id,
            "timestamp-repr": repr(timestamp),
            "conjoined-part": 0,
            "segment-num": segment_num,
            "segment-size": len(test_data[slice_start:slice_end]),
            "zfec-padding-size": 4,
            "segment-md5-digest": b64encode(segment_md5.digest()),
            "segment-adler32": segment_adler32,
            "sequence-num": sequence_num,
            "file-size": total_size,
            "file-adler32": file_adler32,
            "file-hash": b64encode(file_md5.digest()),
            "source-node-name": _local_node_name,
            "handoff-node-name": None,
        }
        reply = send_request_and_get_reply(
            _local_node_name,
            _data_writer_address,
            _local_node_name,
            _client_address,
            message,
            data=test_data[slice_start:slice_end])

        self.assertEqual(reply["message-type"], "archive-key-final-reply")
        self.assertEqual(reply["user-request-id"], user_request_id)
        self.assertEqual(reply["result"], "success", reply["error-message"])
    def test_retrieve_small_content(self):
        """test retrieving content that fits in a single message"""
        file_size = 10 * 64 * 1024
        file_content = random_string(file_size) 
        collection_id = 1001
        key  = self._key_generator.next()
        archive_priority = create_priority()
        timestamp = create_timestamp()
        segment_num = 2

        file_adler32 = zlib.adler32(file_content)
        file_md5 = hashlib.md5(file_content)

        message_id = uuid.uuid1().hex
        message = {
            "message-type"              : "archive-key-entire",
            "message-id"                : message_id,
            "priority"                  : archive_priority,
            "collection-id"             : collection_id,
            "key"                       : key, 
            "conjoined-unified-id"      : None,
            "conjoined-part"            : 0,
            "timestamp-repr"            : repr(timestamp),
            "segment-num"               : segment_num,
            "segment-size"              : file_size,
            "segment-adler32"           : file_adler32,
            "segment-md5-digest"        : b64encode(file_md5.digest()),
            "file-size"                 : file_size,
            "file-adler32"              : file_adler32,
            "file-hash"                 : b64encode(file_md5.digest()),
            "handoff-node-name"         : None,
        }
        reply = send_request_and_get_reply(
            _local_node_name,
            _data_writer_address, 
            _local_node_name,
            _client_address,
            message, 
            data=file_content
        )
        self.assertEqual(reply["message-id"], message_id)
        self.assertEqual(reply["message-type"], "archive-key-final-reply")
        self.assertEqual(reply["result"], "success")

        # get file info from the local database
        _conjoined_row, segment_rows = current_status_of_key(
            self._database_connection, collection_id, key
        )

        self.assertEqual(len(segment_rows), 1)

        message_id = uuid.uuid1().hex
        message = {
            "message-type"              : "retrieve-key-start",
            "message-id"                : message_id,
            "collection-id"             : collection_id,
            "key"                       : key, 
            "timestamp-repr"            : repr(timestamp),
            "conjoined-unified-id"      : None,
            "conjoined-part"            : 0,
            "segment-num"               : segment_num
        }

        reply, data = send_request_and_get_reply_and_data(
            _local_node_name,
            _data_reader_address, 
            _local_node_name,
            _client_address,
            message 
        )

        self.assertEqual(reply["message-id"], message_id)
        self.assertEqual(reply["message-type"], "retrieve-key-reply")
        self.assertEqual(reply["completed"], True)
        self.assertEqual(len(data), len(file_content))
        self.assertEqual(data, file_content)
def forwarder_coroutine(
    node_name_dict, 
    segment_row, 
    source_node_names, 
    writer_client, 
    reader_client
):
    """
    manage the message traffic for retrieving and re-archiving 
    a segment that was handed off to us
    """
    log = logging.getLogger("forwarder_coroutine")
    archive_priority = create_priority()

    # start retrieving from our reader
    message_id = uuid.uuid1().hex
    message = {
        "message-type"      : "retrieve-key-start",
        "message-id"        : message_id,
        "segment-unified-id": segment_row.unified_id,
        "segment-num"       : segment_row.segment_num,
    }

    log.debug("sending retrieve-key-start %s %s" % (
        segment_row.unified_id, 
        segment_row.segment_num
    ))
    
    reader_client.queue_message_for_send(message, data=None)
    reply, data = yield

    assert reply["message-type"] == "retrieve-key-reply", reply
    assert reply["result"] == "success", reply
    completed = reply["completed"]

    sequence = 1

    message_id = uuid.uuid1().hex
    if completed:
        message = {
            "message-type"      : "archive-key-entire",
            "message-id"        : message_id,
            "priority"          : archive_priority,
            "collection-id"     : segment_row.collection_id,
            "key"               : segment_row.key, 
            "unified-id"        : segment_row.unified_id,
            "conjoined-part"    : segment_row.conjoined_part,
            "timestamp-repr"    : repr(segment_row.timestamp),
            "segment-num"       : segment_row.segment_num,
            "segment-size"      : reply["segment-size"],
            "zfec-padding-size" : reply["zfec-padding-size"],
            "segment-adler32"   : reply["segment-adler32"],
            "segment-md5-digest": reply["segment-md5-digest"],
            "file-size"         : segment_row.file_size,
            "file-adler32"      : segment_row.file_adler32,
            "file-hash"         : b64encode(segment_row.file_hash),
            "source-node-name"  : node_name_dict[segment_row.source_node_id],
            "handoff-node-name" : None,
        }
    else:
        message = {
            "message-type"      : "archive-key-start",
            "message-id"        : message_id,
            "priority"          : archive_priority,
            "collection-id"     : segment_row.collection_id,
            "key"               : segment_row.key, 
            "unified-id"        : segment_row.unified_id,
            "conjoined-part"    : segment_row.conjoined_part,
            "timestamp-repr"    : repr(segment_row.timestamp),
            "segment-num"       : segment_row.segment_num,
            "segment-size"      : reply["segment-size"],
            "zfec-padding-size" : reply["zfec-padding-size"],
            "segment-adler32"   : reply["segment-adler32"],
            "segment-md5-digest": reply["segment-md5-digest"],
            "sequence-num"      : sequence,
            "source-node-name"  : node_name_dict[segment_row.source_node_id],
            "handoff-node-name" : None,
        }
            
    writer_client.queue_message_for_send(message, data=data)
    reply = yield

    if completed:
        # we give back the segment_row and source node names as our last yield
        yield (segment_row, source_node_names, )
        return 

    assert reply["message-type"] == "archive-key-start-reply", reply
    assert reply["result"] == "success", reply

    # send the intermediate segments
    while not completed:
        sequence += 1

        message_id = uuid.uuid1().hex
        message = {
            "message-type"      : "retrieve-key-next",
            "message-id"        : message_id,
            "segment-unified-id": segment_row.unified_id,
            "segment-num"       : segment_row.segment_num,
        }
        reader_client.queue_message_for_send(message, data=None)
        reply, data = yield
        assert reply["message-type"] == "retrieve-key-reply", reply
        assert reply["result"] == "success", reply
        completed = reply["completed"]

        message_id = uuid.uuid1().hex
        if completed:
            message = {
                "message-type"      : "archive-key-final",
                "message-id"        : message_id,
                "priority"          : archive_priority,
                "collection-id"     : segment_row.collection_id,
                "key"               : segment_row.key,
                "unified-id"        : segment_row.unified_id,
                "conjoined-part"    : segment_row.conjoined_part,
                "timestamp-repr"    : repr(segment_row.timestamp),
                "segment-num"       : segment_row.segment_num,
                "segment-size"      : reply["segment-size"],
                "zfec-padding-size" : reply["zfec-padding-size"],
                "segment-adler32"   : reply["segment-adler32"],
                "segment-md5-digest": reply["segment-md5-digest"],
                "sequence-num"      : sequence,
                "file-size"         : segment_row.file_size,
                "file-adler32"      : segment_row.file_adler32,
                "file-hash"         : b64encode(segment_row.file_hash),
                "source-node-name"  : node_name_dict[
                    segment_row.source_node_id],
                "handoff-node-name" : None,
            }
        else:
            message = {
                "message-type"      : "archive-key-next",
                "message-id"        : message_id,
                "priority"          : archive_priority,
                "collection-id"     : segment_row.collection_id,
                "key"               : segment_row.key,
                "unified-id"        : segment_row.unified_id,
                "conjoined-part"    : segment_row.conjoined_part,
                "timestamp-repr"    : repr(segment_row.timestamp),
                "segment-num"       : segment_row.segment_num,
                "segment-size"      : reply["segment-size"],
                "zfec-padding-size" : reply["zfec-padding-size"],
                "segment-adler32"   : reply["segment-adler32"],
                "segment-md5-digest": reply["segment-md5-digest"],
                "sequence-num"      : sequence,
                "source-node-name"  : node_name_dict[
                    segment_row.source_node_id],
                "handoff-node-name" : None,
            }
        
        writer_client.queue_message_for_send(message, data=data)
        reply = yield
        assert reply["result"] == "success", reply

    # we give back the segment_row and source node names as our last yield
    yield (segment_row, source_node_names, )
    def __init__(self):
        self._log = logging.getLogger("WebServer")
        authenticator = SqlAuthenticator()

        self._central_connection = get_central_connection()
        self._cluster_row = get_cluster_row(self._central_connection)
        self._node_local_connection = get_node_local_connection()
        self._unified_id_factory = UnifiedIDFactory(
            self._central_connection,
            _get_shard_id(self._central_connection, self._cluster_row.id)
        )
        self._deliverator = Deliverator()

        self._zeromq_context = zmq.Context()

        self._pull_server = GreenletPULLServer(
            self._zeromq_context, 
            _web_server_pipeline_address,
            self._deliverator
        )
        self._pull_server.link_exception(self._unhandled_greenlet_exception)

        # message sent to data readers and writers telling them the server
        # is (re)starting, thereby invalidating any archvies or retrieved
        # that are in progress for this node
        timestamp = create_timestamp()
        start_message = {
            "message-type"              : "web-server-start",
            "priority"                  : create_priority(),
            "unified-id"                : self._unified_id_factory.next(),
            "timestamp-repr"            : repr(timestamp),
            "source-node-name"          : _local_node_name,
        }

        self._data_writer_clients = list()
        for node_name, address in zip(_node_names, _data_writer_addresses):
            resilient_client = GreenletResilientClient(
                self._zeromq_context, 
                node_name,
                address,
                _client_tag,
                _web_server_pipeline_address,
                self._deliverator,
                connect_messages=[start_message, ]
            )
            resilient_client.link_exception(self._unhandled_greenlet_exception)
            self._data_writer_clients.append(resilient_client)

        self._data_reader_clients = list()
        self._data_readers = list()
        for node_name, address in zip(_node_names, _data_reader_addresses):
            resilient_client = GreenletResilientClient(
                self._zeromq_context, 
                node_name,
                address,
                _client_tag,
                _web_server_pipeline_address,
                self._deliverator,
                connect_messages=[start_message, ]
            )
            resilient_client.link_exception(self._unhandled_greenlet_exception)
            self._data_reader_clients.append(resilient_client)
            data_reader = DataReader(
                node_name, resilient_client
            )
            self._data_readers.append(data_reader)

        self._space_accounting_dealer_client = GreenletDealerClient(
            self._zeromq_context, 
            _local_node_name, 
            _space_accounting_server_address
        )
        self._space_accounting_dealer_client.link_exception(
            self._unhandled_greenlet_exception
        )

        push_client = GreenletPUSHClient(
            self._zeromq_context, 
            _local_node_name, 
            _space_accounting_pipeline_address,
        )

        self._accounting_client = SpaceAccountingClient(
            _local_node_name,
            self._space_accounting_dealer_client,
            push_client
        )

        self._event_push_client = EventPushClient(
            self._zeromq_context,
            "web-server"
        )

        self._watcher = Watcher(
            _stats, 
            self._data_reader_clients,
            self._data_writer_clients,
            self._event_push_client
        )

        id_translator_keys_path = os.path.join(
            _repository_path, "id_translator_keys.pkl"
        )
        with open(id_translator_keys_path, "r") as input_file:
            id_translator_keys = pickle.load(input_file)

        self._id_translator = InternalIDTranslator(
            id_translator_keys["key"],
            id_translator_keys["hmac_key"], 
            id_translator_keys["iv_key"],
            id_translator_keys["hmac_size"]
        )
        self.application = Application(
            self._central_connection,
            self._node_local_connection,
            self._cluster_row,
            self._unified_id_factory,
            self._id_translator,
            self._data_writer_clients,
            self._data_readers,
            authenticator,
            self._accounting_client,
            self._event_push_client,
            _stats
        )
        self.wsgi_server = WSGIServer(
            (_web_server_host, _web_server_port), 
            application=self.application,
            backlog=_wsgi_backlog
        )