示例#1
0
def _partition_snd(kvs, name, tag, total_size, partitions, mq_names, mq):
    LOGGER.debug(
        f"[rabbitmq._partition_send]total_size:{total_size}, partitions:{partitions}, mq_names:{mq_names}, mq:{mq}."
    )
    channel_infos = _get_channels(mq_names=mq_names, mq=mq)
    data = []
    lines = 0
    for k, v in kvs:
        el = {'k': p_dumps(k).hex(), 'v': p_dumps(v).hex()}
        data.append(el)
        lines = lines + 1
        if lines > MESSAGE_MAX_SIZE:
            _send_kv(name=name,
                     tag=tag,
                     data=data,
                     channel_infos=channel_infos,
                     total_size=total_size,
                     partitions=partitions)
            lines = 0
            data.clear()
    _send_kv(name=name,
             tag=tag,
             data=data,
             channel_infos=channel_infos,
             total_size=total_size,
             partitions=partitions)

    return [1]
示例#2
0
    def _partition_send(
        self,
        index,
        kvs,
        name,
        tag,
        partitions,
        mq_names,
        mq,
        maximun_message_size,
        connection_conf: dict,
    ):
        channel_infos = self._get_channels_index(
            index=index,
            mq_names=mq_names,
            mq=mq,
            connection_conf=connection_conf)

        datastream = Datastream()
        base_message_key = str(index)
        message_key_idx = 0
        count = 0

        for k, v in kvs:
            count += 1
            el = {"k": p_dumps(k).hex(), "v": p_dumps(v).hex()}
            # roughly caculate the size of package to avoid serialization ;)
            if (datastream.get_size() + sys.getsizeof(el["k"]) +
                    sys.getsizeof(el["v"]) >= maximun_message_size):
                print(
                    f"[rabbitmq._partition_send]The size of message is: {datastream.get_size()}"
                )
                message_key_idx += 1
                message_key = base_message_key + "_" + str(message_key_idx)
                self._send_kv(
                    name=name,
                    tag=tag,
                    data=datastream.get_data(),
                    channel_infos=channel_infos,
                    partition_size=-1,
                    partitions=partitions,
                    message_key=message_key,
                )
                datastream.clear()
            datastream.append(el)

        message_key_idx += 1
        message_key = _SPLIT_.join([base_message_key, str(message_key_idx)])

        self._send_kv(
            name=name,
            tag=tag,
            data=datastream.get_data(),
            channel_infos=channel_infos,
            partition_size=count,
            partitions=partitions,
            message_key=message_key,
        )

        return [1]
示例#3
0
def moca_dumps(obj: Any) -> bytes:
    """serialize and compress."""
    data = p_dumps(obj)
    if len(data) > 1024:
        return b'moca' + compress(data)
    else:
        return data
示例#4
0
    def remote(self, v, name: str, tag: str, parties: typing.List[Party],
               gc: GarbageCollectionABC) -> typing.NoReturn:
        log_str = f"rabbitmq.remote(name={name}, tag={tag}, parties={parties})"
        mq_names = self._get_mq_names(parties)
        LOGGER.debug(f"[rabbitmq.remote]mq_names: {mq_names}")

        if isinstance(v, Table):
            total_size = v.count()
            partitions = v.partitions
            LOGGER.debug(
                f"[{log_str}]start to remote RDD, total_size={total_size}, partitions={partitions}"
            )
            send_func = _get_partition_send_func(name,
                                                 tag,
                                                 total_size,
                                                 partitions,
                                                 mq_names,
                                                 mq=self._mq)
            # noinspection PyProtectedMember
            v._rdd.mapPartitions(send_func).count()
        else:
            LOGGER.debug(f"[{log_str}]start to remote obj")
            channel_infos = self._get_channels(mq_names=mq_names)
            LOGGER.debug(
                f"[rabbitmq.remote]got channel_infos: {channel_infos}")
            _send_obj(name=name,
                      tag=tag,
                      data=p_dumps(v),
                      channel_infos=channel_infos)
        LOGGER.debug(f"[{log_str}]finish to remote")
示例#5
0
    def _partition_send(self, index, kvs, name, tag, partitions,
                        party_topic_infos, mq, maximun_message_size,
                        conf: dict):
        channel_infos = self._get_channels_index(
            index=index, party_topic_infos=party_topic_infos, mq=mq, conf=conf)
        # reuse datastream here incase message size has limitation in pulsar
        datastream = Datastream()
        base_message_key = str(index)
        message_key_idx = 0
        count = 0

        for k, v in kvs:
            count += 1
            el = {'k': p_dumps(k).hex(), 'v': p_dumps(v).hex()}
            # roughly caculate the size of package to avoid serialization ;)
            if datastream.get_size() + sys.getsizeof(el['k']) + sys.getsizeof(
                    el['v']) >= maximun_message_size:
                print(
                    f'[pulsar._partition_send]The size of message is: {datastream.get_size()}'
                )
                message_key_idx += 1
                message_key = _SPLIT_.join(
                    [base_message_key, str(message_key_idx)])
                self._send_kv(name=name,
                              tag=tag,
                              data=datastream.get_data().encode(),
                              channel_infos=channel_infos,
                              partition_size=-1,
                              partitions=partitions,
                              message_key=message_key)
                datastream.clear()
            datastream.append(el)

        message_key_idx += 1
        message_key = _SPLIT_.join([base_message_key, str(message_key_idx)])

        self._send_kv(name=name,
                      tag=tag,
                      data=datastream.get_data().encode(),
                      channel_infos=channel_infos,
                      partition_size=count,
                      partitions=partitions,
                      message_key=message_key)

        return [1]
示例#6
0
    def remote(
        self,
        v,
        name: str,
        tag: str,
        parties: typing.List[Party],
        gc: GarbageCollectionABC,
    ) -> typing.NoReturn:
        log_str = f"[rabbitmq.remote](name={name}, tag={tag}, parties={parties})"

        # if not _remote_tag_not_duplicate(name, tag, parties):
        #     raise ValueError(f"[{log_str}]remote to {parties} with duplicate tag")

        _name_dtype_keys = [
            _SPLIT_.join([party.role, party.party_id, name, tag, "remote"])
            for party in parties
        ]

        if _name_dtype_keys[0] not in self._name_dtype_map:
            mq_names = self._get_mq_names(parties, dtype=NAME_DTYPE_TAG)
            channel_infos = self._get_channels(mq_names=mq_names)
            if isinstance(v, Table):
                body = {
                    "dtype": FederationDataType.TABLE,
                    "partitions": v.partitions
                }
            else:
                body = {"dtype": FederationDataType.OBJECT}

            LOGGER.debug(
                f"[rabbitmq.remote] _name_dtype_keys: {_name_dtype_keys}, dtype: {body}"
            )
            self._send_obj(
                name=name,
                tag=_SPLIT_.join([tag, NAME_DTYPE_TAG]),
                data=p_dumps(body),
                channel_infos=channel_infos,
            )

            for k in _name_dtype_keys:
                if k not in self._name_dtype_map:
                    self._name_dtype_map[k] = body

        if isinstance(v, Table):
            total_size = v.count()
            partitions = v.partitions
            LOGGER.debug(
                f"[{log_str}]start to remote RDD, total_size={total_size}, partitions={partitions}"
            )

            mq_names = self._get_mq_names(parties, name, partitions=partitions)
            # add gc
            gc.add_gc_action(tag, v, "__del__", {})

            send_func = self._get_partition_send_func(
                name,
                tag,
                partitions,
                mq_names,
                mq=self._mq,
                maximun_message_size=self._max_message_size,
                connection_conf=self._rabbit_manager.runtime_config.get(
                    "connection", {}),
            )
            # noinspection PyProtectedMember
            v._rdd.mapPartitionsWithIndex(send_func).count()
        else:
            LOGGER.debug(f"[{log_str}]start to remote obj")
            mq_names = self._get_mq_names(parties, name)
            channel_infos = self._get_channels(mq_names=mq_names)
            self._send_obj(name=name,
                           tag=tag,
                           data=p_dumps(v),
                           channel_infos=channel_infos)

        LOGGER.debug(f"[{log_str}]finish to remote")
示例#7
0
 def serialize(_obj):
     return p_dumps(_obj)
示例#8
0
def upload_opml(md5_key: str, show_list: list) -> str:
    presigned_url = upload_feed(md5_key , p_dumps(show_list), 'opml')
    return presigned_url
示例#9
0
    def remote(self, v, name: str, tag: str, parties: typing.List[Party],
               gc: GarbageCollectionABC) -> typing.NoReturn:
        log_str = f"[pulsar.remote](name={name}, tag={tag}, parties={parties})"

        _name_dtype_keys = [
            _SPLIT_.join([party.role, party.party_id, name, tag, 'remote'])
            for party in parties
        ]

        # tell the receiver what sender is going to send.

        if _name_dtype_keys[0] not in self._name_dtype_map:
            party_topic_infos = self._get_party_topic_infos(
                parties, dtype=NAME_DTYPE_TAG)
            channel_infos = self._get_channels(
                party_topic_infos=party_topic_infos)
            if isinstance(v, Table):
                body = {
                    "dtype": FederationDataType.TABLE,
                    "partitions": v.partitions
                }
            else:
                body = {"dtype": FederationDataType.OBJECT}

            LOGGER.debug(
                f"[pulsar.remote] _name_dtype_keys: {_name_dtype_keys}, dtype: {body}"
            )
            self._send_obj(name=name,
                           tag=_SPLIT_.join([tag, NAME_DTYPE_TAG]),
                           data=p_dumps(body),
                           channel_infos=channel_infos)

            for k in _name_dtype_keys:
                if k not in self._name_dtype_map:
                    self._name_dtype_map[k] = body

        if isinstance(v, Table):
            total_size = v.count()
            partitions = v.partitions
            LOGGER.debug(
                f"[{log_str}]start to remote RDD, total_size={total_size}, partitions={partitions}"
            )

            party_topic_infos = self._get_party_topic_infos(
                parties, name, partitions=partitions)
            # add gc
            gc.add_gc_action(tag, v, '__del__', {})

            send_func = self._get_partition_send_func(
                name,
                tag,
                partitions,
                party_topic_infos,
                mq=self._mq,
                maximun_message_size=self._max_message_size,
                conf=self._pulsar_manager.runtime_config)
            # noinspection PyProtectedMember
            v._rdd.mapPartitionsWithIndex(send_func).count()
        else:
            LOGGER.debug(f"[{log_str}]start to remote obj")
            party_topic_infos = self._get_party_topic_infos(parties, name)
            channel_infos = self._get_channels(
                party_topic_infos=party_topic_infos)
            self._send_obj(name=name,
                           tag=tag,
                           data=p_dumps(v),
                           channel_infos=channel_infos)

        LOGGER.debug(f"[{log_str}]finish to remote")
示例#10
0
    def _partition_send(
        self,
        index,
        kvs,
        name,
        tag,
        partitions,
        party_topic_infos,
        mq,
        maximun_message_size,
        conf: dict,
    ):
        channel_infos = self._get_channels_index(
            index=index, party_topic_infos=party_topic_infos, mq=mq, conf=conf)
        # reuse datastream here incase message size has limitation in pulsar
        datastream = Datastream()
        base_message_key = str(index)
        message_key_idx = 0
        count = 0
        internal_count = 0
        for k, v in kvs:
            count += 1
            internal_count += 1
            el = {"k": p_dumps(k).hex(), "v": p_dumps(v).hex()}
            # roughly caculate the size of package to avoid serialization ;)
            if (datastream.get_size() + sys.getsizeof(el["k"]) +
                    sys.getsizeof(el["v"]) >= maximun_message_size):
                LOGGER.debug(
                    f"[pulsar._partition_send]The count of message is: {internal_count}"
                )
                LOGGER.debug(
                    f"[pulsar._partition_send]The total count of message is: {count}"
                )
                internal_count = 0
                message_key_idx += 1
                message_key = _SPLIT_.join(
                    [base_message_key, str(message_key_idx)])
                self._send_kv(
                    name=name,
                    tag=tag,
                    data=datastream.get_data().encode(),
                    channel_infos=channel_infos,
                    partition_size=-1,
                    partitions=partitions,
                    message_key=message_key,
                )
                datastream.clear()
            datastream.append(el)

        message_key_idx += 1
        message_key = _SPLIT_.join([base_message_key, str(message_key_idx)])

        self._send_kv(
            name=name,
            tag=tag,
            data=datastream.get_data().encode(),
            channel_infos=channel_infos,
            partition_size=count,
            partitions=partitions,
            message_key=message_key,
        )

        return [1]
示例#11
0
    def pickle(self, line: str = ''):
        """
        Pickles a variable and copies it to the clipboard or un-pickles clipboard contents and prints or stores it.

        `%pickle` unpickle clipboard and print
        `%pickle v` pickle variable `v` and store in clipboard
        `%pickle _` pickle last line's output and store in clipboard
        `%pickle -o my_var` unpickle clipboard contents and store in `my_var`"""
        ip = self.shell
        args = magic_args.parse_argstring(self.pickle, line)
        if bool(args.output) and bool(args.var):
            msg = (
                'Incorrect usage, you can either pickle a variable, or unpickle, but not both at the same time.\n'
                f'\n`%pickle {args.var}` to pickle the contents of `{args.var}` and send them to your clipboard'
                f'\n`%pickle -o {args.output[0]}` to unpickle clipboard contents and send them to `{args.output[0]}`'
                f'\n`%pickle` to unpickle your clipboard contents and print')
            ip.write_err(msg)
            return None

        if not line or args.output:  # user wants to unpickle from clipboard
            content: str = pypaste()
            format_error = not content.startswith(
                'b') and content[1] != content[-1]  # b'...' or b"..."
            if format_error:  # clipboard doesn't have a valid pickle string
                sys.stderr.write(
                    r'''Your clipboard doesn't have a bytes-like string (ie. b'\x80\x03N.' or 
                b"\x80\x03N.")''')
                return None
            if not content:  # clipboard is empty
                sys.stderr.write(r'Your clipboard is empty.')
                return None

            try:
                unpickled = p_loads((literal_eval(content)))
            except (KeyError, UnpicklingError, PickleError):
                sys.stderr.write(
                    r'Your clipboard contents could not be unpickled because the data is not valid.'
                )
            else:
                if args.output:  # user wants to unpickle into a variable
                    ip.user_ns[args.output[0]] = unpickled

                else:  # user wants to unpickle and print
                    sys.stdout.write(str(unpickled))

        else:  # user wants to pickle a var
            try:
                pickled_data = str(p_dumps(ip.user_ns.get(args.var)))
            except RuntimeError:
                sys.stderr.write(
                    "Your data could not be pickled because it may be highly recursive.\n"
                    "For more information on what can be (un)pickled checkout "
                    "https://docs.python.org/3/library/pickle.html#what-can-be-pickled-and-unpickled"
                )
            except PicklingError:
                sys.stderr.write(
                    "The object you are trying to pickle is unpickable.\n"
                    "For more information on what can be (un)pickled checkout "
                    "https://docs.python.org/3/library/pickle.html#what-can-be-pickled-and-unpickled"
                )
            else:
                pycopy(pickled_data)
示例#12
0
def save_messages_pickle(events, path):
    with open(path, 'wb') as _file:
        _file.write(p_dumps(events.to_dict()))
示例#13
0
def save_channel_pickle(dict_channel, dirname):
    pickles_dir = dirname + "pickles/"
    full_path = pickles_dir + "channel_info.pickle"
    with open(full_path, 'wb') as _file:
        _file.write(p_dumps(dict_channel))