Пример #1
0
class ThreadingManager(object):
    """
    Usage:
    tm = ThreadingManager(2, get_detections=cerberus_api.get_detection)
    u = tm.queued_executor('get_detections', [777, 125])

    """
    def __init__(self, cpu_count=cpu_count(), **kwargs):
        self.m = Manager()
        self.pool = self.m.Pool(cpu_count)
        self.kwargs = kwargs
        self.queue = self.m.Queue()
        self.log_queue = self.m.Queue()

    def executor(self, function, *args, **kwargs):
        result = self.pool.map(self.kwargs[function], *args, **kwargs)
        return result

    def queued_executor(self, function, *args, **kwargs):
        result = self.pool.map(self.kwargs[function], *args, **kwargs)
        [self.queue.put(i) for i in result]
        return self.queue

    def async_executor(self, function, *args, **kwargs):
        result = self.pool.map_async(self.kwargs[function], *args, **kwargs)
        sys.stdout.flush()
        return result

    def star_executor(self, function, *args, **kwargs):
        result = self.pool.starmap(self.kwargs[function], *args, **kwargs)
        return result

    def star_queued_executor(self, function, *args, **kwargs):
        result = self.pool.starmap(self.kwargs[function], *args, **kwargs)
        [self.queue.put(i) for i in result]
        return self.queue

    def async_star_executor(self, function, *args, **kwargs):
        result = self.pool.starmap_async(self.kwargs[function], *args,
                                         **kwargs)
        return result

    def async_star_queued_executor(self, function, *args, **kwargs):
        result = self.pool.starmap_async(self.kwargs[function], *args,
                                         **kwargs)
        [self.queue.put(i) for i in result]
        return self.queue

    def close(self, exception_type, exception_value, traceback):
        self.pool._maintain_pool()
        self.pool.close()
        self.pool.join()
Пример #2
0
def check_open_ports(url: str,
                     ip: str,
                     path: Optional[str] = None) -> List[Result]:
    results = []

    # create processing pool
    pool = Pool(os.cpu_count() * 2)
    mgr = Manager()
    queue = mgr.Queue()

    # read the data in from the data directory
    if path is None:
        file_path = pkg_resources.resource_filename(
            "yawast", "resources/common_ports.json")
    else:
        file_path = path

    with open(file_path) as json_file:
        data = json.load(json_file)

    for rec in data:
        pool.apply_async(_is_port_open, (url, ip, rec, queue))

    pool.close()
    pool.join()

    while not queue.empty():
        val = queue.get()
        if val is not None:
            results.append(val)

    return results
Пример #3
0
    def calc(self, computeOptions, **args):
        tiles = self._get_tile_service().get_tiles_bounded_by_box(computeOptions.get_min_lat(), computeOptions.get_max_lat(),
                                                            computeOptions.get_min_lon(), computeOptions.get_max_lon(),
                                                            computeOptions.get_dataset()[0],
                                                            computeOptions.get_start_time(),
                                                            computeOptions.get_end_time())

        if len(tiles) == 0:
            raise NexusProcessingException.NoDataException(reason="No data found for selected timeframe")

        maxprocesses = int(self.algorithm_config.get("multiprocessing", "maxprocesses"))

        results = []
        if maxprocesses == 1:
            calculator = LongitudeHofMoellerCalculator()
            for x, tile in enumerate(tiles):
                result = calculator.longitude_time_hofmoeller_stats(tile, x)
                results.append(result)
        else:
            manager = Manager()
            work_queue = manager.Queue()
            done_queue = manager.Queue()
            for x, tile in enumerate(tiles):
                work_queue.put(
                    ('longitude_time_hofmoeller_stats', tile, x))
            [work_queue.put(SENTINEL) for _ in range(0, maxprocesses)]

            # Start new processes to handle the work
            pool = Pool(maxprocesses)
            [pool.apply_async(pool_worker, (LONGITUDE, work_queue, done_queue)) for _ in range(0, maxprocesses)]
            pool.close()

            # Collect the results
            for x, tile in enumerate(tiles):
                result = done_queue.get()
                try:
                    error_str = result['error']
                    logger.error(error_str)
                    raise NexusProcessingException(reason="Error calculating longitude_time_hofmoeller_stats.")
                except KeyError:
                    pass

                results.append(result)

            pool.terminate()
            manager.shutdown()

        results = sorted(results, key=lambda entry: entry["time"])

        results = self.applyDeseasonToHofMoeller(results, pivot="lons")

        result = HoffMoellerResults(results=results, computeOptions=computeOptions, type=HoffMoellerResults.LONGITUDE)
        return result
    def get_daily_difference_average_for_box(self, min_lat, max_lat, min_lon,
                                             max_lon, dataset1, dataset2,
                                             start_time, end_time):

        daysinrange = self._tile_service.find_days_in_range_asc(
            min_lat, max_lat, min_lon, max_lon, dataset1, start_time, end_time)

        maxprocesses = int(
            self.algorithm_config.get("multiprocessing", "maxprocesses"))

        if maxprocesses == 1:
            calculator = DailyDifferenceAverageCalculator()
            averagebyday = []
            for dayinseconds in daysinrange:
                result = calculator.calc_average_diff_on_day(
                    min_lat, max_lat, min_lon, max_lon, dataset1, dataset2,
                    dayinseconds)
                averagebyday.append((result[0], result[1]))
        else:
            # Create a task to calc average difference for each day
            manager = Manager()
            work_queue = manager.Queue()
            done_queue = manager.Queue()
            for dayinseconds in daysinrange:
                work_queue.put(
                    ('calc_average_diff_on_day', min_lat, max_lat, min_lon,
                     max_lon, dataset1, dataset2, dayinseconds))
            [work_queue.put(SENTINEL) for _ in xrange(0, maxprocesses)]

            # Start new processes to handle the work
            pool = Pool(maxprocesses)
            [
                pool.apply_async(pool_worker, (work_queue, done_queue))
                for _ in xrange(0, maxprocesses)
            ]
            pool.close()

            # Collect the results as [(day (in ms), average difference for that day)]
            averagebyday = []
            for i in xrange(0, len(daysinrange)):
                result = done_queue.get()
                if result[0] == 'error':
                    print >> sys.stderr, result[1]
                    raise NexusProcessingException(
                        reason="Error calculating average by day.")
                rdata = result
                averagebyday.append((rdata[0], rdata[1]))

            pool.terminate()
            manager.shutdown()

        return averagebyday
Пример #5
0
    def getTimeSeriesStatsForBoxSingleDataSet(self,
                                              min_lat,
                                              max_lat,
                                              min_lon,
                                              max_lon,
                                              ds,
                                              start_time=0,
                                              end_time=-1,
                                              applySeasonalFilter=True,
                                              applyLowPass=True):

        daysinrange = self._get_tile_service().find_days_in_range_asc(
            min_lat, max_lat, min_lon, max_lon, ds, start_time, end_time)

        if len(daysinrange) == 0:
            raise NoDataException(
                reason="No data found for selected timeframe")

        maxprocesses = int(
            self.algorithm_config.get("multiprocessing", "maxprocesses"))

        results = []
        if maxprocesses == 1:
            calculator = TimeSeriesCalculator()
            for dayinseconds in daysinrange:
                result = calculator.calc_average_on_day(
                    min_lat, max_lat, min_lon, max_lon, ds, dayinseconds)
                results.append(result)
        else:
            # Create a task to calc average difference for each day
            manager = Manager()
            work_queue = manager.Queue()
            done_queue = manager.Queue()
            for dayinseconds in daysinrange:
                work_queue.put(('calc_average_on_day', min_lat, max_lat,
                                min_lon, max_lon, ds, dayinseconds))
            [work_queue.put(SENTINEL) for _ in range(0, maxprocesses)]

            # Start new processes to handle the work
            pool = Pool(maxprocesses)
            [
                pool.apply_async(pool_worker, (work_queue, done_queue))
                for _ in range(0, maxprocesses)
            ]
            pool.close()

            # Collect the results as [(day (in ms), average difference for that day)]
            for i in range(0, len(daysinrange)):
                result = done_queue.get()
                try:
                    error_str = result['error']
                    logger.error(error_str)
                    raise NexusProcessingException(
                        reason="Error calculating average by day.")
                except KeyError:
                    pass

                results.append(result)

            pool.terminate()
            manager.shutdown()

        results = sorted(results, key=lambda entry: entry["time"])

        filt.applyAllFiltersOnField(results,
                                    'mean',
                                    applySeasonal=applySeasonalFilter,
                                    applyLowPass=applyLowPass)
        filt.applyAllFiltersOnField(results,
                                    'max',
                                    applySeasonal=applySeasonalFilter,
                                    applyLowPass=applyLowPass)
        filt.applyAllFiltersOnField(results,
                                    'min',
                                    applySeasonal=applySeasonalFilter,
                                    applyLowPass=applyLowPass)

        return results, {}
Пример #6
0
    def getTimeSeriesStatsForBoxSingleDataSet(self, bounding_polygon, ds, start_seconds_from_epoch,
                                              end_seconds_from_epoch,
                                              apply_seasonal_cycle_filter=True, apply_low_pass_filter=True):

        the_time = datetime.now()
        daysinrange = self._get_tile_service().find_days_in_range_asc(bounding_polygon.bounds[1],
                                                                bounding_polygon.bounds[3],
                                                                bounding_polygon.bounds[0],
                                                                bounding_polygon.bounds[2],
                                                                ds,
                                                                start_seconds_from_epoch,
                                                                end_seconds_from_epoch)
        logger.info("Finding days in range took %s for dataset %s" % (str(datetime.now() - the_time), ds))

        if len(daysinrange) == 0:
            raise NoDataException(reason="No data found for selected timeframe")

        the_time = datetime.now()
        maxprocesses = int(self.algorithm_config.get("multiprocessing", "maxprocesses"))

        results = []
        if maxprocesses == 1:
            calculator = TimeSeriesCalculator()
            for dayinseconds in daysinrange:
                result = calculator.calc_average_on_day(bounding_polygon.wkt, ds, dayinseconds)
                results += [result] if result else []
        else:
            # Create a task to calc average difference for each day
            manager = Manager()
            work_queue = manager.Queue()
            done_queue = manager.Queue()
            for dayinseconds in daysinrange:
                work_queue.put(
                    ('calc_average_on_day', bounding_polygon.wkt, ds, dayinseconds))
            [work_queue.put(SENTINEL) for _ in range(0, maxprocesses)]

            # Start new processes to handle the work
            pool = Pool(maxprocesses)
            [pool.apply_async(pool_worker, (work_queue, done_queue)) for _ in range(0, maxprocesses)]
            pool.close()

            # Collect the results as [(day (in ms), average difference for that day)]
            for i in range(0, len(daysinrange)):
                result = done_queue.get()
                try:
                    error_str = result['error']
                    logger.error(error_str)
                    raise NexusProcessingException(reason="Error calculating average by day.")
                except KeyError:
                    pass

                results += [result] if result else []

            pool.terminate()
            manager.shutdown()

        results = sorted(results, key=lambda entry: entry["time"])
        logger.info("Time series calculation took %s for dataset %s" % (str(datetime.now() - the_time), ds))

        if apply_seasonal_cycle_filter:
            the_time = datetime.now()
            for result in results:
                month = datetime.utcfromtimestamp(result['time']).month
                month_mean, month_max, month_min = self.calculate_monthly_average(month, bounding_polygon.wkt, ds)
                seasonal_mean = result['mean'] - month_mean
                seasonal_min = result['min'] - month_min
                seasonal_max = result['max'] - month_max
                result['meanSeasonal'] = seasonal_mean
                result['minSeasonal'] = seasonal_min
                result['maxSeasonal'] = seasonal_max
            logger.info(
                "Seasonal calculation took %s for dataset %s" % (str(datetime.now() - the_time), ds))

        the_time = datetime.now()
        filtering.applyAllFiltersOnField(results, 'mean', applySeasonal=False, applyLowPass=apply_low_pass_filter)
        filtering.applyAllFiltersOnField(results, 'max', applySeasonal=False, applyLowPass=apply_low_pass_filter)
        filtering.applyAllFiltersOnField(results, 'min', applySeasonal=False, applyLowPass=apply_low_pass_filter)

        if apply_seasonal_cycle_filter and apply_low_pass_filter:
            try:
                filtering.applyFiltersOnField(results, 'meanSeasonal', applySeasonal=False, applyLowPass=True,
                                              append="LowPass")
                filtering.applyFiltersOnField(results, 'minSeasonal', applySeasonal=False, applyLowPass=True,
                                              append="LowPass")
                filtering.applyFiltersOnField(results, 'maxSeasonal', applySeasonal=False, applyLowPass=True,
                                              append="LowPass")
            except Exception as e:
                # If it doesn't work log the error but ignore it
                tb = traceback.format_exc()
                logger.warn("Error calculating SeasonalLowPass filter:\n%s" % tb)

        logger.info(
            "LowPass filter calculation took %s for dataset %s" % (str(datetime.now() - the_time), ds))

        return results, {}