예제 #1
0
def produce_consume():
    real_path, word_path, config_path = paths()
    check_paths(word_path, config_path)
    config = get_config(config_path)
    try:
        error = check_config(config)
    except Exception as e:
        print(type(e).__name__, e)
        exit(1)
    else:
        if error is not None:
            print(error)
            exit(1)
    q = Queue()
    consumer = Consumer(q)
    for i in range(16):
        t = Thread(target=consumer.consume_domains)
        t.daemon = True
        t.start()
    Producer(q, config, word_path).get_doms()
    q.join()
    if config['write_to_file']:
        print_red('writing to domains.json')
        p = Process(target=add_data, args=(real_path, consumer.get_domains()))
        p.start()
    print_red('sleeping zzzzz...')
    sleep(config['interval'])
예제 #2
0
class Specialist(HospitalWorker):
    """
    Receives and processes specialized requests.
    Every specialist must be able to process exactly two
    out of three injury types (hip / knee / elbow).
    """
    def __init__(self, specializations):
        """
        Initializes connection structures, binds to each queue
        corresponding to specializations.
        """
        super().__init__()

        self._requests_consumer = Consumer('hospital', 'topic', 'localhost')

        for spec in specializations:
            self._requests_consumer.add_queue(queue_name=spec,
                                              routing_key='hosp.' + spec,
                                              callback=self.process_request)

        self._requests_consumer.start(new_thread=True)

    def process_request(self, ch, method, properties, body):
        """
        Simulates processing injury examination by sleeping random
        number of seconds (between 1 and 5) and sending back 'results' message.
        """
        body = body.decode()
        request_id = properties.correlation_id
        target = properties.reply_to

        log = colored(
            'processing request: ' + body + ' (request id: ' + request_id[:8] +
            ')', 'green')
        print(log, end='', flush=True)

        time_to_sleep = randint(1, 5)
        for _ in range(time_to_sleep):
            print(colored('.', 'green'), end='', flush=True)
            sleep(1)

        print('')

        message_opts = {
            'properties': pika.BasicProperties(correlation_id=request_id)
        }
        message = body + ' done'

        self._producer.send_message(routing_key=target,
                                    message=message,
                                    **message_opts)

        ch.basic_ack(delivery_tag=method.delivery_tag)
예제 #3
0
    def __init__(self, specializations):
        """
        Initializes connection structures, binds to each queue
        corresponding to specializations.
        """
        super().__init__()

        self._requests_consumer = Consumer('hospital', 'topic', 'localhost')

        for spec in specializations:
            self._requests_consumer.add_queue(queue_name=spec,
                                              routing_key='hosp.' + spec,
                                              callback=self.process_request)

        self._requests_consumer.start(new_thread=True)
예제 #4
0
def crawl2db(getsession):
    """多线程只使用一个连接会存在一些问题,建立一个session池每个线程一个session
    视频访问速率有很严格的限制,请调大sleepsec"""
    global update_round
    print(
        "now start update round %d at %s" %
        (update_round, datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
    Q = Queue()
    mythreads = []
    aids = get_update_aids()
    print("try update videos with aids " + str(aids))
    pthread = Producer3(Q, video_aids=aids, func=lambda x: (x, ), sleepsec=0.5)
    mythreads.append(pthread)
    consumer_num = 4  # 4个消费者线程
    sessions = [getsession() for _ in range(consumer_num)]
    for i in range(consumer_num):
        db_session = sessions[i]  # 每个线程一个session
        cthread = Consumer(Q,
                           session=db_session,
                           func=TddUpdateFocusVideo.store_video,
                           sleepsec=0.5)
        mythreads.append(cthread)
    with Timer() as t:
        for thread in mythreads:
            thread.start()
        for thread in mythreads:
            thread.join()
    for session in sessions:
        session.close()

    # db_session.close()
    print("update round %d finished, runtime: %s" % (update_round, t.elapsed))
    update_round += 1
예제 #5
0
    def __init__(self):
        """
        Initializes all data structures that will be used
        for receiving processed examination requests.
        """
        super().__init__()

        self._results_consumer = Consumer('hospital', 'topic', 'localhost')
        self._results_queue = self._results_consumer.add_queue(
            callback=self.process_results)
        self._results_consumer.start(new_thread=True)

        # Set of unique ids for every sent request
        # that hasn't been processed yet
        self._pending_requests = set()
        self._requests_lock = threading.Lock()
예제 #6
0
def crawl2csv(filename, start, end):
    """sleep sec 可以用random生成在一个范围的正态分布更好些
    start, end: up主mid范围"""
    Q = Queue()

    with open(filename, 'w', encoding='utf8', newline='') as fwriter:
        mycsvwriter = csv.writer(fwriter)
        mythreads = []
        pthread = Producer(Q,
                           start=start,
                           end=end,
                           func=BiliUser.getVideoList,
                           sleepsec=0.1)
        mythreads.append(pthread)
        consumer_num = 4  # 4个消费者线程
        for _ in range(consumer_num):
            cthread = Consumer(Q,
                               csvwriter=mycsvwriter,
                               func=BiliVideo.store_video_simpleajax,
                               sleepsec=0.01)
            mythreads.append(cthread)
        with Timer() as t:
            for thread in mythreads:
                thread.start()
            for thread in mythreads:
                thread.join()

        print('runtime - (%i_%i) - : %s' % (start, end, t.elapsed))
        print('======= All Done! =======')
예제 #7
0
def crawl2db(getsession, start, end):
    """多线程只使用一个连接会存在一些问题,建立一个session池每个线程一个session
    视频访问速率有很严格的限制,请调大sleepsec"""
    Q = Queue()
    mythreads = []
    pthread = Producer(Q,
                       start=start,
                       end=end,
                       func=BiliUser.getVideoList,
                       sleepsec=0.1)
    mythreads.append(pthread)
    consumer_num = 4  # 4个消费者线程
    sessions = [getsession() for _ in range(consumer_num)]
    for i in range(consumer_num):
        db_session = sessions[i]  # 每个线程一个session
        cthread = Consumer(Q,
                           session=db_session,
                           func=BiliVideo.store_video_simpleajax,
                           sleepsec=0.01)
        mythreads.append(cthread)
    with Timer() as t:
        for thread in mythreads:
            thread.start()
        for thread in mythreads:
            thread.join()
    for session in sessions:
        session.close()

    # db_session.close()
    print('runtime - (%i_%i) - : %s' % (start, end, t.elapsed))
    print('======= All Done! =======')
예제 #8
0
class Administrator:
    """
    Receives and logs every message sent within the whole system.
    Is allowed to broadcast messages to all other workers.
    """
    def __init__(self):
        self._log_consumer = Consumer('hospital', 'topic', 'localhost')
        self._log_queue = self._log_consumer.add_queue(
            routing_key='#', callback=self.process_log)
        self._log_consumer.start(new_thread=True)
        self._info_producer = Producer('info', 'fanout', 'localhost')

    def send_info(self, message):
        print('sending info: ', message)
        self._info_producer.send_message(message=message)

    def process_log(self, ch, method, properties, body):
        body = body.decode()
        log = colored('LOG: ' + body, 'yellow')
        print(log)
        ch.basic_ack(delivery_tag=method.delivery_tag)
예제 #9
0
def crawl2csv(filename, start, end):
    """sleep sec 可以用random生成在一个范围的正态分布更好些
    start,end: aid范围"""
    Q = Queue()
    
    with open(filename, 'w', encoding='utf8', newline='') as fwriter:
        mycsvwriter = csv.writer(fwriter)
        mythreads = []
        pthread = Producer(Q, start=start, end=end, func=lambda x: (x, ), sleepsec=0.5)
        mythreads.append(pthread)
        consumer_num = 4 # 4个消费者线程
        for _ in range(consumer_num):
            cthread = Consumer(Q, csvwriter=mycsvwriter, func=BiliVideo.store_video, sleepsec=0.5)
            mythreads.append(cthread)
        with Timer() as t:
            for thread in mythreads:
                thread.start()
            for thread in mythreads:
                thread.join()
        
        print('runtime: %s' % t.elapsed)
        print('======= All Done! ======')
def crawl2db(getsession, start, end):
    """多线程只使用一个连接会存在一些问题,建立一个session池每个线程一个session
    视频访问速率有很严格的限制,请调大sleepsec"""
    Q = Queue()
    mythreads = []
    pthread = Producer(Q, start=start, end=end, func=lambda x: (x,), sleepsec=0.5)
    mythreads.append(pthread)
    consumer_num = 4 # 4个消费者线程
    sessions = [getsession() for _ in range(consumer_num)]
    for i in range(consumer_num):
        db_session = sessions[i] # 每个线程一个session
        cthread = Consumer(Q, session=db_session, func=TddAddFocusVideo.store_video, sleepsec=0.5)
        mythreads.append(cthread)
    with Timer() as t:
        for thread in mythreads:
            thread.start()
        for thread in mythreads:
            thread.join()
    for session in sessions:
        session.close()
        
    # db_session.close()
    print('runtime: %s' % t.elapsed)
    print('======= All Done! ======')
예제 #11
0
class Medic(HospitalWorker):
    """
    Creates and sends specialized examination requests and receives
    the results. Every request must be of one of following types:
        - hip examination
        - knee examination
        - elbow examination
    """
    def __init__(self):
        """
        Initializes all data structures that will be used
        for receiving processed examination requests.
        """
        super().__init__()

        self._results_consumer = Consumer('hospital', 'topic', 'localhost')
        self._results_queue = self._results_consumer.add_queue(
            callback=self.process_results)
        self._results_consumer.start(new_thread=True)

        # Set of unique ids for every sent request
        # that hasn't been processed yet
        self._pending_requests = set()
        self._requests_lock = threading.Lock()

    def send_request(self, patient_name, injury_type):
        """
        Creates new request message and sends it to proper injury queue.
        Adds unique request id to pending requests set.
        """
        request_id = str(uuid4())
        routing_key = 'hosp.' + injury_type
        message = patient_name + ' ' + injury_type

        message_opts = {
            'properties':
            pika.BasicProperties(reply_to=self._results_queue,
                                 correlation_id=request_id)
        }

        with self._requests_lock:
            self._pending_requests.add(request_id)
            self._producer.send_message(routing_key, message, **message_opts)

        log = colored(
            'sending request: ' + message + ' (request id: ' + request_id[:8] +
            ')', 'blue')
        print(log)

    def process_results(self, ch, method, properties, body):
        """
        Removes request_id corresponding to received results
        from pending requests set.
        """
        body = body.decode()
        ignore = False
        request_id = properties.correlation_id

        with self._requests_lock:

            if request_id in self._pending_requests:
                self._pending_requests.remove(request_id)

            else:
                ignore = True

        if not ignore:
            log = colored(
                'received results: ' + body + ' (request id: ' +
                request_id[:8] + ')', 'green')
            print(log)

        ch.basic_ack(delivery_tag=method.delivery_tag)
예제 #12
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2019/11/7 下午3:48
# @Author  : yinxin<*****@*****.**>
# @Site    :
# @File    : application
# @Software: PyCharm

from utils import Consumer, send_data, crawl_url
from common import delay
import time

# init consumer
consumer = Consumer.get_consumer()


def main():
    msg = consumer.get_msg()
    url = bytes.decode(msg.value)
    html = crawl_url(url)
    send_data(url, html)


if __name__ == '__main__':
    while True:
        main()
        time.sleep(delay)
예제 #13
0
 def __init__(self):
     self._log_consumer = Consumer('hospital', 'topic', 'localhost')
     self._log_queue = self._log_consumer.add_queue(
         routing_key='#', callback=self.process_log)
     self._log_consumer.start(new_thread=True)
     self._info_producer = Producer('info', 'fanout', 'localhost')