Exemplo n.º 1
0
class Server(object):
    def __init__(self, address, size=None, log_level=DEFAULT_LOG_LEVEL):
        self.daemon = True
        self.started = False
        self.size = size
        self.queue = Queue(maxsize=size)
        self.address = address

        self.context = zmq.Context(1)
        self.server = None
        self.logger = get_logger(self, log_level)

        self._has_fetched_jobs = False

    def send(self, cmd, data=''):
        self.server.send_multipart([cmd, data])

    def recv(self):
        reply = self.server.recv_multipart()

        assert len(reply) == 2

        return reply

    def bind(self):
        if self.server:
            self.server.close()

        self.server = self.context.socket(zmq.REP)
        self.server.bind(self.address)

    def start(self):
        self.started = True

        self.logger.info("Taskmaster binding to %r", self.address)
        self.bind()

        while self.started:
            gevent.sleep(0)
            cmd, data = self.recv()
            if cmd == 'GET':
                if not self.has_work():
                    self.send('QUIT')
                    continue

                try:
                    job = self.queue.get_nowait()
                except Empty:
                    self.send('WAIT')
                    continue

                self.send('OK', pickle.dumps(job))

            elif cmd == 'DONE':
                self.queue.task_done()
                if self.has_work():
                    self.send('OK')
                else:
                    self.send('QUIT')

            else:
                self.send('ERROR', 'Unrecognized command')

        self.logger.info('Shutting down')
        self.shutdown()

    def mark_queue_filled(self):
        self._has_fetched_jobs = True

    def put_job(self, job):
        return self.queue.put(job)

    def first_job(self):
        return self.queue.queue[0]

    def get_current_size(self):
        return self.queue.qsize()

    def get_max_size(self):
        return self.size

    def has_work(self):
        if not self._has_fetched_jobs:
            return True
        return not self.queue.empty()

    def is_alive(self):
        return self.started

    def shutdown(self):
        if not self.started:
            return
        self.server.close()
        self.context.term()
        self.started = False
Exemplo n.º 2
0
class Command(collectstatic.Command):
    """
    This command extends Django's `collectstatic` with a `--faster` argument for parallel file copying using gevent.
    The speed improvement is especially helpful for remote storage backends like S3.
    """
    def __init__(self, *args, **kwargs):
        super(Command, self).__init__(*args, **kwargs)
        self.counter = 0
        self.task_queue = None
        self.worker_spawn_method = None
        self.use_multiprocessing = False

    def add_arguments(self, parser):
        super(Command, self).add_arguments(parser)
        parser.add_argument('--faster',
                            action='store_true',
                            default=False,
                            help='Collect static files simultaneously')
        parser.add_argument('--workers',
                            action='store',
                            default=20,
                            help='Amount of simultaneous workers (default=20)')
        parser.add_argument(
            '--use-multiprocessing',
            action='store_true',
            default=False,
            help='Use multiprocessing library instead of gevent')

    def set_options(self, **options):
        self.faster = options.pop('faster')
        self.queue_worker_amount = int(options.pop('workers'))
        self.use_multiprocessing = options.pop('use_multiprocessing')

        if self.use_multiprocessing:
            self.task_queue = multiprocessing.JoinableQueue()
            self.worker_spawn_method = self.mp_spawn
        else:
            self.task_queue = GeventQueue()
            self.worker_spawn_method = self.gevent_spawn

        super(Command, self).set_options(**options)

    def handle(self, **options):
        start_time = time.time()
        super(Command, self).handle(**options)
        self.log('%s static files copied asynchronously in %is.' %
                 (self.counter, time.time() - start_time),
                 level=1)

    def copy_file(self, path, prefixed_path, source_storage):
        self.file_handler('copy', path, prefixed_path, source_storage)

    def link_file(self, path, prefixed_path, source_storage):
        self.file_handler('link', path, prefixed_path, source_storage)

    def file_handler(self, handler_type, path, prefixed_path, source_storage):
        """
        Create a dict with all kwargs of the `copy_file` or `link_file` method of the super class and add it to
        the queue for later processing.
        """
        if self.faster:
            self.task_queue.put({
                'handler_type': handler_type,
                'path': path,
                'prefixed_path': prefixed_path,
                'source_storage': source_storage
            })
            self.counter += 1
        else:
            if handler_type == 'link':
                super(Command, self).link_file(path, prefixed_path,
                                               source_storage)
            else:
                super(Command, self).copy_file(path, prefixed_path,
                                               source_storage)

    def delete_file(self, path, prefixed_path, source_storage):
        """
        We don't need all the file_exists stuff because we have to override all files anyways.
        """
        if self.faster:
            return True
        else:
            return super(Command, self).delete_file(path, prefixed_path,
                                                    source_storage)

    def collect(self):
        """
        Create some concurrent workers that process the tasks simultaneously.
        """
        result = super(Command, self).collect()
        if self.faster:
            self.worker_spawn_method()
        return result

    def gevent_spawn(self):
        """ Spawn worker threads (using gevent) """
        monkey.patch_all(thread=False)
        joinall([
            spawn(self.gevent_worker) for x in range(self.queue_worker_amount)
        ])

    def gevent_worker(self):
        """
        Process one task after another by calling the handler (`copy_file` or `copy_link`) method of the super class.
        """
        while not self.task_queue.empty():
            task_kwargs = self.task_queue.get()
            handler_type = task_kwargs.pop('handler_type')

            if handler_type == 'link':
                super(Command, self).link_file(**task_kwargs)
            else:
                super(Command, self).copy_file(**task_kwargs)

    def mp_spawn(self):
        """ Spawn worker processes (using multiprocessing) """
        processes = []
        for x in range(self.queue_worker_amount):
            process = multiprocessing.Process(target=self.mp_worker)
            process.start()
            processes.append(process)
        for process in processes:
            process.join()

    def mp_worker(self):
        """
        Process one task after another by calling the handler (`copy_file` or `copy_link`) method of the super class.
        """
        while not self.task_queue.empty():
            task_kwargs = self.task_queue.get()
            handler_type = task_kwargs.pop('handler_type')

            if handler_type == 'link':
                super(Command, self).link_file(**task_kwargs)
            else:
                super(Command, self).copy_file(**task_kwargs)

            self.task_queue.task_done()
class Command(collectstatic.Command):
    """
    This command extends Django's `collectstatic` with a `--faster` argument for parallel file copying using gevent.
    The speed improvement is especially helpful for remote storage backends like S3.
    """
    def __init__(self, *args, **kwargs):
        super(Command, self).__init__(*args, **kwargs)
        self.counter = 0
        self.task_queue = None
        self.worker_spawn_method = None
        self.use_multiprocessing = False
        self.found_files = OrderedDict()

    def add_arguments(self, parser):
        super(Command, self).add_arguments(parser)
        parser.add_argument('--faster',
                            action='store_true',
                            default=False,
                            help='Collect static files simultaneously')
        parser.add_argument('--workers',
                            action='store',
                            default=20,
                            help='Amount of simultaneous workers (default=20)')
        parser.add_argument(
            '--use-multiprocessing',
            action='store_true',
            default=False,
            help='Use multiprocessing library instead of gevent')

    def set_options(self, **options):
        self.faster = options.pop('faster')
        self.queue_worker_amount = int(options.pop('workers'))
        self.use_multiprocessing = options.pop('use_multiprocessing')

        if self.use_multiprocessing:
            self.task_queue = multiprocessing.JoinableQueue()
            self.worker_spawn_method = self.mp_spawn
        else:
            self.task_queue = GeventQueue()
            self.worker_spawn_method = self.gevent_spawn

        super(Command, self).set_options(**options)

        if self.faster:
            # The original management command of Django collects all the files and calls the post_process method of
            # the storage backend within the same method. Because we are using a task queue, post processing is started
            # before all files were collected.
            self.post_process_original = self.post_process
            self.post_process = False

    def handle(self, **options):
        start_time = time.time()
        super(Command, self).handle(**options)
        self.log('%s static files copied asynchronously in %is.' %
                 (self.counter, time.time() - start_time),
                 level=1)

    def copy_file(self, path, prefixed_path, source_storage):
        self.file_handler('copy', path, prefixed_path, source_storage)

    def link_file(self, path, prefixed_path, source_storage):
        self.file_handler('link', path, prefixed_path, source_storage)

    def file_handler(self, handler_type, path, prefixed_path, source_storage):
        """
        Create a dict with all kwargs of the `copy_file` or `link_file` method of the super class and add it to
        the queue for later processing.
        """
        if self.faster:
            if prefixed_path not in self.found_files:
                self.found_files[prefixed_path] = (source_storage, path)

            self.task_queue.put({
                'handler_type': handler_type,
                'path': path,
                'prefixed_path': prefixed_path,
                'source_storage': source_storage
            })
            self.counter += 1
        else:
            if handler_type == 'link':
                super(Command, self).link_file(path, prefixed_path,
                                               source_storage)
            else:
                super(Command, self).copy_file(path, prefixed_path,
                                               source_storage)

    def delete_file(self, path, prefixed_path, source_storage):
        """
        We don't need all the file_exists stuff because we have to override all files anyways.
        """
        if self.faster:
            return True
        else:
            return super(Command, self).delete_file(path, prefixed_path,
                                                    source_storage)

    def collect(self):
        """
        Create some concurrent workers that process the tasks simultaneously.
        """
        collected = super(Command, self).collect()
        if self.faster:
            self.worker_spawn_method()
            self.post_processor()
        return collected

    def post_processor(self):
        # Here we check if the storage backend has a post_process
        # method and pass it the list of modified files.
        if self.post_process_original and hasattr(self.storage,
                                                  'post_process'):
            processor = self.storage.post_process(self.found_files,
                                                  dry_run=self.dry_run)
            for original_path, processed_path, processed in processor:
                if isinstance(processed, Exception):
                    self.stderr.write("Post-processing '%s' failed!" %
                                      original_path)
                    # Add a blank line before the traceback, otherwise it's
                    # too easy to miss the relevant part of the error message.
                    self.stderr.write("")
                    raise processed
                if processed:
                    self.log("Post-processed '%s' as '%s'" %
                             (original_path, processed_path),
                             level=1)
                    self.post_processed_files.append(original_path)
                else:
                    self.log("Skipped post-processing '%s'" % original_path)

    def gevent_spawn(self):
        """ Spawn worker threads (using gevent) """
        monkey.patch_all(thread=False)
        joinall([
            spawn(self.gevent_worker) for x in range(self.queue_worker_amount)
        ])

    def gevent_worker(self):
        """
        Process one task after another by calling the handler (`copy_file` or `copy_link`) method of the super class.
        """
        while not self.task_queue.empty():
            task_kwargs = self.task_queue.get()
            handler_type = task_kwargs.pop('handler_type')

            if handler_type == 'link':
                super(Command, self).link_file(**task_kwargs)
            else:
                super(Command, self).copy_file(**task_kwargs)

    def mp_spawn(self):
        """ Spawn worker processes (using multiprocessing) """
        processes = []
        for x in range(self.queue_worker_amount):
            process = multiprocessing.Process(target=self.mp_worker)
            process.start()
            processes.append(process)
        for process in processes:
            process.join()

    def mp_worker(self):
        """
        Process one task after another by calling the handler (`copy_file` or `copy_link`) method of the super class.
        """
        while not self.task_queue.empty():
            task_kwargs = self.task_queue.get()
            handler_type = task_kwargs.pop('handler_type')

            if handler_type == 'link':
                super(Command, self).link_file(**task_kwargs)
            else:
                super(Command, self).copy_file(**task_kwargs)

            self.task_queue.task_done()
Exemplo n.º 4
0
class Server(object):
    def __init__(self, address, size=None):
        self.daemon = True
        self.started = False
        self.size = size
        self.queue = Queue(maxsize=size)
        self.address = address

        self.context = zmq.Context(1)
        self.server = None

    def send(self, cmd, data=""):
        self.server.send_multipart([cmd, data])

    def recv(self):
        reply = self.server.recv_multipart()

        assert len(reply) == 2

        return reply

    def bind(self):
        if self.server:
            self.server.close()

        print "Taskmaster binding to %r" % self.address
        self.server = self.context.socket(zmq.REP)
        self.server.bind(self.address)

    def start(self):
        self.started = True

        self.bind()

        while self.started:
            gevent.sleep(0)
            cmd, data = self.recv()
            if cmd == "GET":
                if not self.has_work():
                    self.send("QUIT")
                    continue

                try:
                    job = self.queue.get_nowait()
                except Empty:
                    self.send("WAIT")
                    continue

                self.send("OK", pickle.dumps(job))

            elif cmd == "DONE":
                self.queue.task_done()
                if self.has_work():
                    self.send("OK")
                else:
                    self.send("QUIT")

            else:
                self.send("ERROR", "Unrecognized command")

        self.shutdown()

    def put_job(self, job):
        return self.queue.put(job)

    def first_job(self):
        return self.queue.queue[0]

    def get_current_size(self):
        return self.queue.qsize()

    def get_max_size(self):
        return self.size

    def has_work(self):
        return not self.queue.empty()

    def is_alive(self):
        return self.started

    def shutdown(self):
        if not self.started:
            return
        self.server.close()
        self.context.term()
        self.started = False