Exemplo n.º 1
0
    def test_rabbitmq_consuming(self):

        # Integration Test the Consuming Worker with 50,0000 messages
        # This test just uses send_task for publishing
        num_to_consume = 50000
        num_sent = 0
        num_to_send = num_to_consume
        msgs_to_send = []

        msgs_by_id = {}

        not_done_publishing = True

        test_values = {"test_name": "large messages"}

        if len(msgs_to_send) == 0:
            while len(msgs_to_send) != num_to_send:
                test_msg = self.build_user_conversion_event_msg(test_values)
                msgs_to_send.append(test_msg)
                msgs_by_id[test_msg["msg_id"]] = False
        # end of building messages before slower publishing calls

        pub_auth_url = ev("RELAY_WORKER_BROKER_URL",
                          "pyamqp://*****:*****@localhost:5672//")
        path_to_config_module = "ecomm_app.ecommerce.celeryconfig_pub_sub"

        app = ecomm_app.ecommerce.tasks.get_celery_app(
                name="demo",
                auth_url=pub_auth_url,
                path_to_config_module=path_to_config_module)

        task_name = "ecomm_app.ecommerce.tasks.handle_user_conversion_events"

        source_id = {"msg_proc": ev("TEST_RELAY_NAME",
                                    "test_ecomm_relay")}
        result = None

        log.info(("Sending broker={}")
                 .format(app.conf.broker_url))

        while not_done_publishing:

            if (num_sent % 1000 == 0) and num_sent > 0:
                log.info(("Published {} for "
                          "{}/{} messages")
                         .format(get_percent_done(num_sent,
                                                  num_to_send),
                                 num_sent,
                                 num_to_send))
            # end of if print for tracing

            msg_body = None
            if num_sent < len(msgs_to_send):
                msg_body = msgs_to_send[num_sent]

            result = app.send_task(task_name, (msg_body, source_id))

            num_sent += 1

            if num_sent >= num_to_send:
                log.info(("Published {} ALL "
                          "{}/{} messages")
                         .format(get_percent_done(num_sent,
                                                  num_to_send),
                                 num_sent,
                                 num_to_send))

                not_done_publishing = False
            elif num_sent >= len(msgs_to_send):
                log.info(("Published {} all "
                          "{}/{} messages result={}")
                         .format(get_percent_done(num_sent,
                                                  len(msgs_to_send)),
                                 num_sent,
                                 num_to_send,
                                 result))

                not_done_publishing = False
            # if should stop

        # end of not_done_publishing

        assert(num_sent == num_to_consume)

        log.info("")
        os.system("list-queues.sh")
        log.info("")
Exemplo n.º 2
0
find_this = "08:00:27:89:2d:02"

total_records = len(df.index)

ridx = 1

for idx, row in df.iterrows():
    for h in df.columns.values:
        if str(row[h]) == find_this:
            log.info(("found={} row={} column_header={}").format(
                find_this, ridx, h))
            sys.exit(0)
            break
    # end of all columns
    perc_done = get_percent_done(ridx, total_records)

    if ridx % 1000 == 0:
        log.info(("done {} - {}/{} rows").format(perc_done, ridx,
                                                 total_records))
    ridx += 1
# end of all rows

perc_done = get_percent_done(ridx, total_records)

log.info(("Done {} - {}/{} rows").format(perc_done, ridx, total_records))

log.info(
    ("Did not find={} in training_data={}").format(find_this, training_file))

sys.exit(0)
    def test_rabbitmq_consuming(self):

        # Integration Test the Subscriber Processor
        # This test just fills the queue for processing
        num_to_consume = 50000
        num_sent = 0
        num_to_send = num_to_consume
        msgs_to_send = []

        msgs_by_id = {}

        self.exchange_name = ev("LOAD_TEST_EXCHANGE", "reporting")
        self.routing_key = ev("LOAD_TEST_ROUTING_KEY", "reporting.accounts")
        self.queue_name = ev("LOAD_TEST_QUEUE", "reporting.accounts")

        log.info(("Publishing {}/{} "
                  "ex={} rk={} broker={}").format(num_sent, num_to_send,
                                                  self.exchange_name,
                                                  self.routing_key,
                                                  self.pub_auth_url))

        pub_retry = True
        not_done_publishing = True

        test_values = {"test_name": "large messages"}

        if len(msgs_to_send) == 0:
            while len(msgs_to_send) != num_to_send:

                test_msg = self.build_user_conversion_event_msg(test_values)
                msgs_to_send.append(test_msg)
                msgs_by_id[test_msg["msg_id"]] = False
        # end of building messages before slower publishing calls

        while not_done_publishing:

            if (num_sent % 1000 == 0) and num_sent > 0:
                log.info(("Published {} for "
                          "{}/{} messages").format(
                              get_percent_done(num_sent, num_to_send),
                              num_sent, num_to_send))
            # end of if print for tracing

            msg_body = None
            if num_sent < len(msgs_to_send):
                msg_body = msgs_to_send[num_sent]

            self.publish(body=msg_body,
                         exchange=self.exchange_name,
                         routing_key=self.routing_key,
                         queue=self.queue_name,
                         priority=0,
                         ttl=None,
                         serializer=self.pub_serializer,
                         retry=pub_retry)

            num_sent += 1

            if num_sent >= num_to_send:
                log.info(("Published {} ALL "
                          "{}/{} messages").format(
                              get_percent_done(num_sent, num_to_send),
                              num_sent, num_to_send))

                not_done_publishing = False
            elif num_sent >= len(msgs_to_send):
                log.info(("Published {} all "
                          "{}/{} messages").format(
                              get_percent_done(num_sent, len(msgs_to_send)),
                              num_sent, num_to_send))

                not_done_publishing = False
            # if should stop

        # end of not_done_publishing

        assert (num_sent == num_to_consume)

        os.system("list-queues.sh")

        log.info("")
        log.info(("display messages in the queues "
                  "with routing_key={} again with:").format(self.routing_key))
        log.info("list-queues.sh")
        log.info("")
    def test_consuming_large_number_of_messages(self):

        # Test the Publisher and Subscriber with 50,0000 messages
        # and verify each unique message id was consumed
        # default is using the rabbitmq broker
        num_to_consume = 50000
        num_sent = 0
        num_to_send = num_to_consume
        num_consumed = 0
        msgs_to_send = []
        msgs_received = []

        msgs_by_id = {}

        self.exchange_name = "test_large_num_1"
        self.routing_key = "test_large_num_1.orders"
        self.queue_name = "test_large_num_1.orders"

        class TestMessageProcessor:
            def __init__(self,
                         should_consume=1,
                         test_id=None,
                         stop_after_num=-1,
                         validate_for_test=False):

                self.num_consumed = 0
                self.should_consume = should_consume
                self.stop_after_num = stop_after_num
                self.expected_test_id = test_id

                self.recv_msgs = []
                self.validate_for_test = validate_for_test

            # end of __init__

            def process_message(self, body, message):

                self.num_consumed += 1

                test_id = "unknown"
                if "test_id" in body:
                    test_id = body["test_id"]

                # validate we're not crossing test streams
                if self.validate_for_test and test_id != self.expected_test_id:
                    log.error(("Test={} consumed a message "
                               "from another test={} - please "
                               "restart rabbitmq or clean up "
                               " the queue and try again").format(
                                   self.expected_test_id, test_id))

                    assert (test_id == self.expected_test_id)
                # end of validating the test_id matches the msg's test_id

                log.debug(("test={} Consumed message "
                           "{}/{} acking").format(test_id, self.num_consumed,
                                                  self.should_consume))

                msgs_received.append(body)

                message.ack()

            # end of process_message

            def get_received_messages(self):
                return self.recv_msgs

            # end of get_received_messages

        # end of TestMessageProcessor

        log.info(("Publishing {}/{} "
                  "broker={}").format(num_sent, num_to_send,
                                      self.pub_auth_url))

        pub_retry = True
        not_done_publishing = True
        not_done_subscribing = True

        test_values = {"test_name": "large messages"}

        if len(msgs_to_send) == 0:
            while len(msgs_to_send) != num_to_send:
                test_msg = self.build_msg(test_values)
                msgs_to_send.append(test_msg)
                msgs_by_id[test_msg["msg_id"]] = False
        # end of building messages before slower publishing calls

        while not_done_publishing:

            if (num_sent % 1000 == 0) and num_sent > 0:
                log.info(("Published {} for "
                          "{}/{} messages").format(
                              get_percent_done(num_sent, num_to_send),
                              num_sent, num_to_send))
            # end of if print for tracing

            msg_body = None
            if num_sent < len(msgs_to_send):
                msg_body = msgs_to_send[num_sent]

            self.publish(body=msg_body,
                         exchange=self.exchange_name,
                         routing_key=self.routing_key,
                         queue=self.queue_name,
                         priority=0,
                         ttl=None,
                         serializer=self.pub_serializer,
                         retry=pub_retry)

            num_sent += 1

            if num_sent >= num_to_send:
                log.info(("Published {} ALL "
                          "{}/{} messages").format(
                              get_percent_done(num_sent, num_to_send),
                              num_sent, num_to_send))

                not_done_publishing = False
            elif num_sent >= len(msgs_to_send):
                log.info(("Published {} all "
                          "{}/{} messages").format(
                              get_percent_done(num_sent, len(msgs_to_send)),
                              num_sent, num_to_send))

                not_done_publishing = False
            # if should stop

        # end of not_done_publishing

        log.info(("Creating Consumer "
                  "{}/{} messages").format(num_consumed, num_to_consume))

        test_consumer = TestMessageProcessor(should_consume=num_to_consume,
                                             test_id=self.test_id,
                                             stop_after_num=-1)

        log.info(("Starting Consume "
                  "{}/{} messages").format(num_consumed, num_to_consume))

        last_num_done = 0

        while not_done_subscribing:

            if (num_consumed % 1000 == 0) and num_consumed > 0:
                log.info(("Consumed {} for "
                          "{}/{} messages").format(
                              get_percent_done(num_consumed, num_to_consume),
                              num_consumed, num_to_consume))
            # end of if print for tracing

            self.consume(callback=test_consumer.process_message,
                         queue=self.queue_name,
                         exchange=None,
                         routing_key=None,
                         forever=False,
                         serializer=self.sub_serializer,
                         heartbeat=60,
                         time_to_wait=2.0)

            num_consumed = len(msgs_received)

            if num_consumed >= num_to_consume and last_num_done == num_consumed:
                log.info(("Consumed {} ALL "
                          "{}/{} messages").format(
                              get_percent_done(num_consumed, num_to_consume),
                              num_consumed, num_to_consume))

                not_done_subscribing = False
            else:

                # was there something in the queue already?
                if last_num_done == num_consumed:
                    # if not sleep it out
                    log.info(("Consumed {} "
                              "{}/{} messages").format(
                                  get_percent_done(num_consumed,
                                                   num_to_consume),
                                  num_consumed, num_to_consume))
                # end of checking if something was found or not

            # if should stop

            last_num_done = num_consumed

        # end of not_done_subscribing

        log.info(("test={} consumed={} "
                  "out of queue={}").format(self.test_id, len(msgs_received),
                                            self.queue_name))

        # find test messages that were sent and validate the msg id was from
        # this test
        for consumed_msg in msgs_received:
            consumed_msg_id = consumed_msg["msg_id"]
            if consumed_msg_id in msgs_by_id:
                msgs_by_id[consumed_msg_id] = True
        # end of for all consumed messages

        num_valid_messages = 0

        for msg_id in msgs_by_id:
            if not msg_id:
                log.error(
                    ("FAILED to find "
                     "test_id={} msg_id={}").format(self.test_id, msg_id))
                assert (msg_id)
            else:
                num_valid_messages += 1
        # end of for validating all the messages were found in the queue

        assert (num_valid_messages == num_to_consume)
    log.error(("No columns={} found in training dataset={}").format(
        len(df.columns.values), training_file))
    sys.exit(3)

log.info(
    ("found rows={} columns={} in dataset={}").format(len(df.index),
                                                      len(df.columns.values),
                                                      training_file))

ATTACK_VALUE = 1
NON_ATTACK_VALUE = 0
filter_num_attacks = (df["label_value"] == ATTACK_VALUE)
filter_num_nonattacks = (df["label_value"] == NON_ATTACK_VALUE)
df_attacks = df[filter_num_attacks]
df_nonattacks = df[filter_num_nonattacks]

num_attacks = len(df_attacks.index)
num_nonattacks = len(df_nonattacks.index)

total_records = len(df.index)

percent_attack = get_percent_done(num_attacks, total_records)
percent_nonattack = get_percent_done(num_nonattacks, total_records)

log.info(("total records={} attack={} nonattack={} "
          "percent_attack={}% percent_nonattack={}%").format(
              total_records, num_attacks, num_nonattacks, percent_attack,
              percent_nonattack))

sys.exit(0)
Exemplo n.º 6
0
def run_publisher(broker_url,
                  exchange=None,     # kombu.Exchange object
                  routing_key=None,  # string
                  msgs=[],
                  num_per_batch=-1,
                  priority="high",
                  priority_routing={},
                  serializer="json",
                  ssl_options={},
                  transport_options={},
                  send_method=None,
                  silent=True,
                  publish_silent=False,
                  log_label="pub",
                  *args,
                  **kwargs):

    verbose = not silent

    if verbose:
        log.debug("connecting")

    with Connection(broker_url,
                    ssl=ssl_options,
                    transport_options=transport_options) as conn:

        num_to_send = len(msgs)

        if num_to_send == 0:
            log.info(("no msgs={} to publish")
                     .format(num_to_send))
            return

        use_send_method = send_method
        # use the default method for sending if one is not passed in
        if not use_send_method:
            use_send_method = celery_connectors.mixin_send_task_msg.mixin_send_task_msg

        if verbose:
            log.debug(("publishing ex={} rk={} "
                       "msgs={} send_method={}")
                      .format(exchange,
                              routing_key,
                              num_to_send,
                              use_send_method.__name__))

        num_sent = 0
        not_done = True
        num_fails = 0

        while not_done:

            cur_msg = msgs[num_sent]

            hide_logs = publish_silent
            if num_sent > 1 and num_sent % 200 == 0:
                hide_logs = False
                log.info(("{} send done "
                          "msg={}/{} ex={} rk={}")
                         .format(get_percent_done(
                                    num_sent,
                                    num_to_send),
                                 num_sent,
                                 num_to_send,
                                 exchange.name,
                                 routing_key))

            send_res = use_send_method(conn=conn,
                                       data=cur_msg,
                                       exchange=exchange,
                                       routing_key=routing_key,
                                       priority=priority,
                                       priority_routing=priority_routing,
                                       serializer=serializer,
                                       silent=hide_logs,
                                       log_label=log_label)

            if send_res["status"] == SUCCESS:
                num_fails = 0
                num_sent += 1
                if num_sent >= num_to_send:
                    not_done = False
            else:
                num_fails += 1
                sleep_duration = calc_backoff_timer(num_fails)
                log.info(("publish failed - {} - exch={} rk={} "
                          "sleep={} seconds retry={}")
                         .format(send_res["error"],
                                 exchange,
                                 routing_key,
                                 sleep_duration,
                                 num_fails))

                if num_fails > 100000:
                    num_fails = 1

                time.sleep(sleep_duration)