Пример #1
0
class TaskControl:
    def __init__(self, cls_worker, count, *args, **kwargs):
        self.queue = JoinableQueue()
        self.stopped = Event()
        self.count_processed = Value('i', 0)

        self.processes = [cls_worker(self, *args) for _ in range(count)]
        map(Process.start, self.processes)

    def is_active(self):
        return not self.stopped.is_set()

    def is_alive(self):
        alive = filter(bool, map(Process.is_alive, self.processes))
        print '---- %d child processes are still alive' % len(alive)
        return alive

    def stop(self):
        self.stopped.set()
        self.queue.close()
        print '-- waiting for processes to finish'
        map(Process.join, self.processes)
        self.queue.cancel_join_thread()

    def send_chunk(self, items):
        map(self.queue.put, items)
        print '--- waiting for queue to complete'
        while self.get_stats()[1] and self.is_alive():
            time.sleep(1)

    def get(self):
        while self.is_active():
            try:
                yield self.queue.get(timeout=1)
            except Queue.Empty:
                pass

    def tick(self):
        self.queue.task_done()
        self.count_processed.value += 1
        if not self.count_processed.value % 20:
            print '%d items processed' % self.count_processed.value
        time.sleep(0.5)

    def get_stats(self):
        stats = self.count_processed.value, self.queue.qsize()
        print '--- %d items processed, %d queued' % stats
        return stats
class multiprocessing_mapping(object):
    def __init__(self, num_workers=4, transform=read_img) -> None:
        super().__init__()
        self.num_workers = num_workers
        self.transform = transform
        self.data_queue = Queue()
        self.path_queue = JoinableQueue()
        self.path_queue.cancel_join_thread()
        self.workers = [
            multiprocessing.Process(target=self.transform,
                                    args=(self.path_queue, self.data_queue))
            for _ in range(self.num_workers)
        ]

        for w in self.workers:
            w.daemon = True  # ensure that the worker exits on process exit
            w.start()

    def __call__(self, img_path_list):
        for i in img_path_list:
            self.path_queue.put(i)
        self.path_queue.join()
        return [self.data_queue.get() for _ in range(len(img_path_list))]
Пример #3
0
class ParallelAnalyzer(BaseAnalyzer):
    ''' Parallel analyzer which uses the `multiprocessing` module. '''

    def __init__(self,
                 storage, script_list, script_hashes, min_script_needs, apks_or_paths,
                 concurrency = None):
        '''
        See :py:method`.BaseAnalyzer.__init__` for details on the first attributes.

        Other Parameters
        ----------------
        concurrency : int, optional (default is number of cpu cores)
            Number of workers to spawn.
        '''
        super(ParallelAnalyzer, self).__init__(storage, script_list, script_hashes, min_script_needs, apks_or_paths)

        # parallelization parameters
        if concurrency is None:
            concurrency = cpu_count()

        self.__concurrency = concurrency

        log.info("concurrency: %s", self.concurrency)
        log.info("Using processes")

        # parallel stuff, concerning processes
        self.__work_queue = Queue()
        self.__work_queue.cancel_join_thread()
        self.__workers = []
        
        self.__analyzed_apks = Queue()

    def get_analyzed_apks(self):
        return self.__analyzed_apks

    def set_analyzed_apks(self, value):
        self.__analyzed_apks = value

    def del_analyzed_apks(self):
        del self.__analyzed_apks

    def get_work_queue(self):
        return self.__work_queue

    def get_concurrency(self):
        return self.__concurrency

    def get_workers(self):
        return self.__workers

    def set_workers(self, value):
        self.__workers = value

    def del_workers(self):
        del self.__workers

    analyzed_apks = property(get_analyzed_apks, set_analyzed_apks, del_analyzed_apks, "Queue<FastAPK> : Yet analyzed APKs")
    concurrency = property(get_concurrency, None, None, "int : Number of workers to spawn.")
    workers = property(get_workers, set_workers, del_workers, "list<Worker> : List of workers.")
    work_queue = property(get_work_queue, None, None, "Queue<str> : Queue with paths to apks which shall be analyzed.")

    def _analyze(self):
        ''' See doc of :py:method:BaseAnalyzer.analyze`. '''
        try:
            work_queue = self.work_queue

            # create worker pool
            log.debug("starting %s workers ...", self.concurrency)
            for _ in range(self.concurrency):
                p = Worker(self.script_list, self.script_hashes, self.min_script_needs,
                                                 work_queue, self.storage,
                                                 self.cnt_analyzed_apks, self.analyzed_apks, self.storage_results)
                self.workers.append(p)
                p.daemon = True

            # start workers
            for p in self.workers:
                p.start()

            # queue has size limit -> start workers first then enqueue items
            log.info("Loading apk paths into work queue ...")
            for apk_stuff in AnalyzeUtil.apk_gen(self.apks_or_paths):
                # task is apk with all scripts
                work_queue.put(apk_stuff)

            for _ in range(self.concurrency):
                # signal end-of-work
                work_queue.put(STOP_SENTINEL)

            # progress view for cli
            av = AnalysisStatsView(self.cnt_analyzed_apks, self._cnt_apks, self.analyzed_apks)
            av.daemon = True
            av.start()
            
            # block until workers finished
            work_queue.join()
            av.terminate()
            log.debug("joined on work queue ...")

            return self.cnt_analyzed_apks.value

        # try hot shutdown first
        except KeyboardInterrupt:
            log.warn("Hot shutdown ... ")
            try:
                log.warn("clearing work queue ... ")
                Util.clear_queue(work_queue)
                log.warn("cleared work queue ... ")
                
                for _ in range(self.concurrency):
                    # signal end-of-work
                    work_queue.put(STOP_SENTINEL)
                    
                for worker in self.workers:
                    worker.join()
                log.warn("waited for all workers ... ")

                return self.cnt_analyzed_apks.value

            # if user really wants make a cold shutdown -> kill processes
            except KeyboardInterrupt:
                log.warn("Cold shutdown ... ")
                log.warn("Hard shutdown wanted! Killing all workers!")

                # kill processes via SIGINT -> send CTRL-C
                for w in self.workers:
                    try:
                        os.kill(w.pid, signal.SIGINT)
                    except:
                        pass

                return self.cnt_analyzed_apks.value
Пример #4
0
class ParallelAnalyzer(BaseAnalyzer):
    ''' Parallel analyzer which uses the `multiprocessing` module. '''
    def __init__(self,
                 storage,
                 script_list,
                 script_hashes,
                 min_script_needs,
                 apks_or_paths,
                 concurrency=None):
        '''
        See :py:method`.BaseAnalyzer.__init__` for details on the first attributes.

        Other Parameters
        ----------------
        concurrency : int, optional (default is number of cpu cores)
            Number of workers to spawn.
        '''
        super(ParallelAnalyzer,
              self).__init__(storage, script_list, script_hashes,
                             min_script_needs, apks_or_paths)

        # parallelization parameters
        if concurrency is None:
            concurrency = cpu_count()

        self.__concurrency = concurrency

        log.info("concurrency: %s", self.concurrency)
        log.info("Using processes")

        # parallel stuff, concerning processes
        self.__work_queue = Queue()
        self.__work_queue.cancel_join_thread()
        self.__workers = []

        self.__analyzed_apks = Queue()

    def get_analyzed_apks(self):
        return self.__analyzed_apks

    def set_analyzed_apks(self, value):
        self.__analyzed_apks = value

    def del_analyzed_apks(self):
        del self.__analyzed_apks

    def get_work_queue(self):
        return self.__work_queue

    def get_concurrency(self):
        return self.__concurrency

    def get_workers(self):
        return self.__workers

    def set_workers(self, value):
        self.__workers = value

    def del_workers(self):
        del self.__workers

    analyzed_apks = property(get_analyzed_apks, set_analyzed_apks,
                             del_analyzed_apks,
                             "Queue<FastAPK> : Yet analyzed APKs")
    concurrency = property(get_concurrency, None, None,
                           "int : Number of workers to spawn.")
    workers = property(get_workers, set_workers, del_workers,
                       "list<Worker> : List of workers.")
    work_queue = property(
        get_work_queue, None, None,
        "Queue<str> : Queue with paths to apks which shall be analyzed.")

    def _analyze(self):
        ''' See doc of :py:method:BaseAnalyzer.analyze`. '''
        try:
            work_queue = self.work_queue

            # create worker pool
            log.debug("starting %s workers ...", self.concurrency)
            for _ in range(self.concurrency):
                p = Worker(self.script_list, self.script_hashes,
                           self.min_script_needs, work_queue, self.storage,
                           self.cnt_analyzed_apks, self.analyzed_apks,
                           self.storage_results)
                self.workers.append(p)
                p.daemon = True

            # start workers
            for p in self.workers:
                p.start()

            # queue has size limit -> start workers first then enqueue items
            log.info("Loading apk paths into work queue ...")
            for apk_stuff in AnalyzeUtil.apk_gen(self.apks_or_paths):
                # task is apk with all scripts
                work_queue.put(apk_stuff)

            for _ in range(self.concurrency):
                # signal end-of-work
                work_queue.put(STOP_SENTINEL)

            # progress view for cli
            av = AnalysisStatsView(self.cnt_analyzed_apks, self._cnt_apks,
                                   self.analyzed_apks)
            av.daemon = True
            av.start()

            # block until workers finished
            work_queue.join()
            av.terminate()
            log.debug("joined on work queue ...")

            return self.cnt_analyzed_apks.value

        # try hot shutdown first
        except KeyboardInterrupt:
            log.warn("Hot shutdown ... ")
            try:
                log.warn("clearing work queue ... ")
                Util.clear_queue(work_queue)
                log.warn("cleared work queue ... ")

                for _ in range(self.concurrency):
                    # signal end-of-work
                    work_queue.put(STOP_SENTINEL)

                for worker in self.workers:
                    worker.join()
                log.warn("waited for all workers ... ")

                return self.cnt_analyzed_apks.value

            # if user really wants make a cold shutdown -> kill processes
            except KeyboardInterrupt:
                log.warn("Cold shutdown ... ")
                log.warn("Hard shutdown wanted! Killing all workers!")

                # kill processes via SIGINT -> send CTRL-C
                for w in self.workers:
                    try:
                        os.kill(w.pid, signal.SIGINT)
                    except:
                        pass

                return self.cnt_analyzed_apks.value