def test_qps(): # 创建生产者 p = Producer(ip=g_ip, user=g_user, password=g_password) p.producer_declare() p.create_exchange(g_exchange, "topic") # 创建消费者 consumers = [] for queue_name in g_queue_name: for i in range(0, 3): consumers.append( Consumer(ip=g_ip, user=g_user, password=g_password)) consumers[len(consumers) - 1].start_consumer( g_exchange, queue_name + str(i), queue_name + str(i)) time.sleep(10) # 等待10S, 让消费者绑定完成 log.info("[test_qps] starting ...") try: target_time = g_test_secs start = time.time() stop = False while not stop: for queue_name in g_queue_name: for i in range(0, 3): time.sleep(g_sleep_secs) p.publish(g_exchange, queue_name + str(i), '{"msg":"this is a test!"}') curr = time.time() if (curr - start) >= target_time: stop = True break if stop: break except Exception as err: log.error("[test_qps] error: " + str(err)) finally: for queue_name in g_queue_name: for i in range(0, 3): p.publish(g_exchange, queue_name + str(i), "quit") p.close() recev = 0 last_time = 0.0 for c in consumers: c.join() recev += c.number_of_msg() if c.stop_consume_time() > last_time: last_time = c.stop_consume_time() log.info("[test_qps] %d msg have been sent, start at %f" % (p.number_of_msg(), p.start_publish_time())) log.info("[test_qps] %d msg have been received, end at %f" % (recev, last_time)) log.info("[test_qps] QPS: %f" % (recev / (last_time - p.start_publish_time())))
class Crawler: MAX_URL = 10 def __init__(self): self.url_counter = 1 self.document_client = DocumentClient() self.indexing_client = IndexingClient() self.pagerank_client = PagerankClient() self.producer = Producer('url_queue') self.consumer = Consumer('url_queue') def run(self): self.consumer.subscribe(self.run_for_url) def run_for_url(self, ch, method, properties, body): doc_url = body.decode("utf-8") print("[Crawler] Received %r" % doc_url) document_text = WebScraper.get_text(doc_url) document_links = WebScraper.get_links(doc_url) hash_object = hashlib.sha256(document_text.encode("utf-8")) digest = hash_object.hexdigest() doc_record = self.document_client.get_by_url(doc_url) if "id" not in doc_record: doc_record = self.document_client.create(doc_url, digest) doc_indexed = self.indexing_client.get_by_id(doc_record["id"]) if "url" not in doc_indexed: self.indexing_client.index(doc_record["id"], doc_url, document_text) if doc_record["digest"] != digest: self.document_client.update_digest(doc_record["id"], digest) self.indexing_client.update_content(doc_record["id"], document_text) for link in document_links: if self.url_counter < Crawler.MAX_URL: self.url_counter += 1 child_doc_record = self.document_client.get_by_url(link.geturl()) if "id" not in child_doc_record: child_doc_record = self.document_client.create(link.geturl(), "digest") self.document_client.create_link(doc_record["id"], child_doc_record["id"]) self.producer.publish(link.geturl()) self.pagerank_client.update(doc_record["id"])
def publish(): try: producer = Producer(bootstrap_servers, url, topic) values = request.json["val"] response = producer.publish(key=values, values=values) del producer return response except Exception as e: return format(e)
def test_keep_alive(): # 创建生产者 p = Producer(ip=g_ip, user=g_user, password=g_password) p.producer_declare() p.create_exchange(g_exchange, "topic") # 创建消费者 c = Consumer(ip=g_ip, user=g_user, password=g_password) c.start_consumer(g_exchange, "test1", "test1") time.sleep(5) # 等5秒让队列准备就绪 # 保持不发送任何消息 log.info("[test_keep_alive] start sending nothing test ...") secs = 0 while secs < g_test_secs: time.sleep(5) secs += 5 try: # 发送一次消息检查连接可用性 log.info("[test_keep_alive] test connection alive???") p.publish(g_exchange, "docx2pdf", '{"msg":"this is a test!"}') log.info("[test_keep_alive] connection alive!!!") log.info("[test_keep_alive] start sending msg test ...") secs = 0 while secs < g_test_secs: time.sleep(1) p.publish(g_exchange, "test1", '{"msg":"this is a test!"}') secs += 1 except Exception as err: log.error("[test_keep_alive] error: " + str(err)) log.error("exit [test_keep_alive]") finally: p.publish(g_exchange, "test1", "quit") p.close() # 关闭生产者连接 c.join() # 等待消费线程结束
import time from producer import Producer KAFKA_BROKER = 'kafka:9092' KAFKA_TOPIC = 'thing-data' KAFKA_GROUP = 'thing-cockpit' if __name__ == "__main__": producer = Producer(KAFKA_BROKER) while True: data = { "timestamp": 1593834720000, "thing_id": 1, "company_id": 2, "data": { "production": 100, }, } producer.publish(KAFKA_TOPIC, data) time.sleep(1)
#!/usr/bin/env python from producer import Producer from crawler import Crawler producer = Producer('url_queue') producer.publish('https://makeitreal.camp/') crawler = Crawler() crawler.run()