def test_when_a_logger_is_passed_then_it_does_not_change_the_root_logger(self):
        if not mock:
            self.skipTest('unittest.mock is not available')

        with mock.patch('logging.getLogger') as getLogger:
            install_mp_handler(self.logger)

            self.assertEqual(0, getLogger.call_count)
    def test_when_no_logger_is_specified_then_it_uses_the_root_logger(self):
        if not mock:
            self.skipTest('unittest.mock is not available')

        with mock.patch('logging.getLogger') as getLogger:
            getLogger.return_value = self.logger

            install_mp_handler()

            getLogger.assert_called_once_with()

        wrapper_handler, = self.logger.handlers
        self.assertIsInstance(wrapper_handler, MultiProcessingHandler)
        self.assertIs(wrapper_handler.sub_handler, self.handler)
Пример #3
0
def parse_args(description, ParamsClass=SieverParams, **kwds):
    """
    Parse command line arguments.

    The command line parser accepts the standard parameters as printed by calling it with
    ``--help``.  All other parameters are used to construct params objects.  For example.

    ./foo 80 --workers 4 --trials 2 -S 1337 --a 1 2 - b 3 4

    would operate on dimension 80 with parameters (a: 1, b: 3), (a: 1, b: 4), (a: 2, b: 3), (a: 2,
    b: 4), i.e. the Cartesian product of all parameters.  It will run two trials each using four
    workers. Note that each worker may use several threads, too. The starting seed is `1337`.

    :param description: help message
    :param kwds: default parameters

    """

    parser = argparse.ArgumentParser(
        description=description,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument(
        'lower_bound',
        type=int,
        help="lowest lattice dimension to consider (inclusive)")
    parser.add_argument(
        '-u',
        '--upper-bound',
        type=int,
        dest="upper_bound",
        default=0,
        help="upper bound on lattice dimension to consider (exclusive)")
    parser.add_argument('-s',
                        '--step-size',
                        type=int,
                        dest="step_size",
                        default=2,
                        help="increment lattice dimension in these steps")
    parser.add_argument('-t',
                        '--trials',
                        type=int,
                        dest="trials",
                        default=1,
                        help="number of experiments to run per dimension")
    parser.add_argument('-w',
                        '--workers',
                        type=int,
                        dest="workers",
                        default=1,
                        help="number of parallel experiments to run")
    parser.add_argument('-p',
                        '--pickle',
                        action='store_true',
                        dest="pickle",
                        help="pickle statistics")
    parser.add_argument('-S',
                        '--seed',
                        type=int,
                        dest="seed",
                        default=0,
                        help="randomness seed")
    parser.add_argument(
        '--dry-run',
        dest="dry_run",
        action='store_true',
        help=
        "Show parameters that would be used but don't run any actual experiments."
    )
    parser.add_argument('--show-defaults',
                        dest="show_defaults",
                        action='store_true',
                        help="Show default parameters and exit.")
    parser.add_argument('--loglvl',
                        type=str,
                        help="Logging level (one of DEBUG, WARN, INFO)",
                        default="INFO")
    parser.add_argument('--log-filename',
                        dest="log_filename",
                        type=str,
                        help="Logfile filename",
                        default=None)
    args, unknown = parser.parse_known_args()

    kwds_ = OrderedDict()
    for k, v in six.iteritems(kwds):
        k_ = k.replace("__", "/")
        kwds_[k_] = v
    kwds = kwds_

    if args.show_defaults:
        pp = ParamsClass(**kwds)
        slen = max(len(p) for p in pp) + 1
        fmt = "{key:%ds}: {value}" % slen
        for k, v in six.iteritems(pp):
            print(fmt.format(key=k, value=v))
        exit(0)

    all_params = OrderedDict([("", ParamsClass(**kwds))])

    unknown_args = OrderedDict()
    unknown = apply_aliases(unknown)

    # NOTE: This seems like the kind of thing the standard library can do (better)
    i = 0
    while i < len(unknown):
        k = unknown[i]
        if not (k.startswith("--") or k.startswith("-")):
            raise ValueError("Failure to parse command line argument '%s'" % k)
        k = re.match("^-+(.*)", k).groups()[0]
        k = k.replace("-", "_")
        unknown_args[k] = []
        i += 1
        for i in range(i, len(unknown)):
            v = unknown[i]
            if v.startswith("--") or v.startswith("-"):
                i -= 1
                break
            try:
                v = eval(v, {"BKZ": BKZ})
            except NameError:
                v = v
            except SyntaxError:
                v = v
            if not isinstance(v, (list, tuple)):
                v = [v]
            unknown_args[k].extend(v)
        i += 1
        if not unknown_args[k]:
            unknown_args[k] = [True]

    for k, v in six.iteritems(unknown_args):
        all_params_ = OrderedDict()
        for p in all_params:
            for v_ in v:
                p_ = copy.copy(all_params[p])
                p_[k] = v_
                all_params_[p + "'%s': %s, " % (k, v_)] = p_
        all_params = all_params_

    log_filename = args.log_filename
    if log_filename is None:
        log_filename = log_filenamef()

    multiprocessing_logging.install_mp_handler()

    if not os.path.isdir("logs"):
        os.makedirs("logs")
    logging.basicConfig(
        level=logging.DEBUG,
        format='%(levelname)5s:%(name)12s:%(asctime)s: %(message)s',
        datefmt='%Y/%m/%d %H:%M:%S %Z',
        filename=log_filename)

    console = logging.StreamHandler()
    console.setLevel(getattr(logging, args.loglvl.upper()))
    console.setFormatter(logging.Formatter('%(name)s: %(message)s', ))
    logging.getLogger('').addHandler(console)

    if args.dry_run:
        for params in all_params:
            print(params)
        exit(0)

    return args, all_params
Пример #4
0
def main():
    args = parse_arguments()

    logging.basicConfig(
        level=getattr(logging, args.log_level.upper()),
        format='%(asctime)s - %(process)s - %(levelname)s - %(message)s')
    install_mp_handler()

    os.nice(20)
    if not os.path.isdir(args.output_dir):
        os.makedirs(args.output_dir)

    input_files = collect_inputs(args.input_dirs)
    logging.info('Scheduled {} files for shuffling.'.format(len(input_files)))
    if not input_files:
        logging.critical('No input files!')
        sys.exit(1)

    output_files = [
        os.path.join(args.output_dir, os.path.basename(f)) for f in input_files
    ]

    with openall(input_files[0]) as inf:
        header = inf.readline().strip()

    with Pool(args.processes) as inpool, Pool(args.processes) as outpool:
        m = Manager()
        queue = m.Queue(maxsize=1000)
        num_readers = m.Value('I', args.processes)
        lock = m.Lock()

        # Each worker gets a chunk of all input / output files
        input_chunks = list(split_into(input_files, args.processes))
        output_chunks = list(split_into(output_files, args.processes))

        producer_f = partial(producer,
                             queue=queue,
                             num_readers=num_readers,
                             lock=lock)
        inresult = inpool.map_async(producer_f, input_chunks)
        consumer_f = partial(consumer,
                             queue=queue,
                             header=header,
                             documents=args.documents,
                             num_readers=num_readers,
                             lock=lock)
        outresult = outpool.map_async(consumer_f, output_chunks)

        docs_read, docs_written = sum(inresult.get()), sum(outresult.get())

        logging.debug('Joining processes...')
        inpool.close()
        outpool.close()
        inpool.join()
        outpool.join()
        logging.debug('Joined processes.')

        if docs_read != docs_written:
            logging.error(f'The number of documents read ({docs_read}) and '
                          f'the number of documents written ({docs_written}) '
                          f'differs!')

    logging.info('Done.')
    def test_when_a_logger_is_passed_then_it_wraps_all_handlers(self):
        install_mp_handler(self.logger)

        wrapper_handler, = self.logger.handlers
        self.assertIsInstance(wrapper_handler, MultiProcessingHandler)
        self.assertIs(wrapper_handler.sub_handler, self.handler)
Пример #6
0
def download_flickr_dataset(dataset_path, data_dir, ffmpeg_path, ffprobe_path,
                            log_path=None, verbose=False, disable_logging=False,
                            num_workers=1, **ffmpeg_cfg):
    """
    Downloads Flickr dataset files

    Args:
        dataset_path:  Path to dataset file containin URLs
                       (Type: str)

        data_dir:      Output directory where video will be saved if output
                       path is not explicitly given
                       (Type: str)

        ffmpeg_path:   Path to ffmpeg executable
                       (Type: str)

        ffprobe_path:  Path to ffprobe executable
                       (Type: str)

    Keyword Args:
        log_path:         Path to log file. If None, defaults to "flickr-soundnet-dl.log"
                          (Type: str or None)

        verbose:          If True, prints detailed messages to console
                          (Type: bool)

        disable_logging:  If True, does not log to a file
                          (Type: bool)

        num_workers:      Number of multiprocessing workers used to download videos
                          (Type: int)

        **ffmpeg_cfg:     ffmpeg configurations
    """

    init_console_logger(LOGGER, verbose=verbose)
    if not disable_logging:
        init_file_logger(LOGGER, log_path=log_path)
    multiprocessing_logging.install_mp_handler()
    print(verbose)
    LOGGER.debug('Initialized logging.')

    audio_dir = os.path.join(data_dir, 'audio')
    video_dir = os.path.join(data_dir, 'video')

    if not os.path.isdir(audio_dir):
        os.makedirs(audio_dir)

    if not os.path.isdir(video_dir):
        os.makedirs(video_dir)

    pool = mp.Pool(num_workers)
    try:
        with open(dataset_path, 'r') as f:
            for line_idx, line in enumerate(f):
                url = line.strip()
                media_filename = extract_flickr_id(url)
                video_filepath = os.path.join(data_dir, 'video', media_filename + '.' + ffmpeg_cfg.get('video_format', 'mp4'))
                audio_filepath = os.path.join(data_dir, 'audio', media_filename + '.' + ffmpeg_cfg.get('audio_format', 'flac'))

                if os.path.exists(video_filepath) and os.path.exists(audio_filepath):
                    info_msg = 'Already downloaded video {}. Skipping.'
                    LOGGER.info(info_msg.format(media_filename))
                    continue

                worker_args = [url, data_dir, ffmpeg_path, ffprobe_path]
                pool.apply_async(partial(download_flickr_video, **ffmpeg_cfg), worker_args)

    except KeyboardInterrupt:
        LOGGER.info("Forcing exit.")
        exit()
    finally:
        try:
            pool.close()
            pool.join()
        except KeyboardInterrupt:
            LOGGER.info("Forcing exit.")
            exit()

    LOGGER.info('Finished downloading videos!')
Пример #7
0
import multiprocessing
from pathlib import Path
from typing import List
import warnings

from astropy.io import fits
from astropy.wcs import WCS, FITSFixedWarning
import mocpy
import multiprocessing_logging

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s %(name)-25s %(levelname)-8s %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
)
multiprocessing_logging.install_mp_handler()
logger = logging.getLogger(__name__)


def stokes_type(stokes_str: str) -> List[str]:
    STOKES_PARAMS = ("I", "Q", "U", "V")
    stokes_str = stokes_str.upper()
    for char in stokes_str:
        if char not in STOKES_PARAMS:
            raise ValueError(
                f"Stokes parameter must be one of {''.join(STOKES_PARAMS)}")
    return [char for char in stokes_str]


def get_moc_output_dir(image_path: Path) -> Path:
    output_dir_name = image_path.parent.name.replace("IMAGES", "MOCS")
Пример #8
0
    def test_lj_sim_manager_openmm_integration_run(
        self,
        class_tmp_path_factory,
        boundary_condition_class,
        resampler_class,
        work_mapper_class,
        platform,
        lj_params,
        lj_omm_sys,
        lj_integrator,
        lj_reporter_classes,
        lj_reporter_kwargs,
        lj_init_walkers,
        lj_openmm_runner,
        lj_unbinding_bc,
        lj_wexplore_resampler,
        lj_revo_resampler,
    ):
        """Run all combinations of components in the fixtures for the smallest
        amount of time, just to make sure they all work together and don't give errors."""

        logging.getLogger().setLevel(logging.DEBUG)
        install_mp_handler()
        logging.debug("Starting the test")

        print("starting the test")

        # the configuration class gives us a convenient way to
        # parametrize our reporters for the locale
        from wepy.orchestration.configuration import Configuration

        # the runner
        from wepy.runners.openmm import OpenMMRunner

        # mappers
        from wepy.work_mapper.mapper import Mapper
        from wepy.work_mapper.worker import WorkerMapper
        from wepy.work_mapper.task_mapper import TaskMapper

        # the worker types for the WorkerMapper
        from wepy.work_mapper.worker import Worker
        from wepy.runners.openmm import OpenMMCPUWorker, OpenMMGPUWorker

        # the walker task types for the TaskMapper
        from wepy.work_mapper.task_mapper import WalkerTaskProcess
        from wepy.runners.openmm import OpenMMCPUWalkerTaskProcess, OpenMMGPUWalkerTaskProcess

        n_cycles = 1
        n_steps = 2
        num_workers = 2

        # generate the reporters and temporary directory for this test
        # combination

        tmpdir_template = 'lj_fixture_{plat}-{wm}-{res}-{bc}'
        tmpdir_name = tmpdir_template.format(plat=platform,
                                             wm=work_mapper_class,
                                             res=resampler_class,
                                             bc=boundary_condition_class)

        # make a temporary directory for this configuration to work with
        tmpdir = str(class_tmp_path_factory.mktemp(tmpdir_name))

        # make a config so that the reporters get parametrized properly
        reporters = Configuration(
            work_dir=tmpdir,
            reporter_classes=lj_reporter_classes,
            reporter_partial_kwargs=lj_reporter_kwargs).reporters

        steps = [n_steps for _ in range(n_cycles)]

        # choose the components based on the parametrization
        boundary_condition = None
        resampler = None

        walker_fixtures = [lj_init_walkers]
        runner_fixtures = [lj_openmm_runner]
        boundary_condition_fixtures = [lj_unbinding_bc]
        resampler_fixtures = [lj_wexplore_resampler, lj_revo_resampler]

        walkers = lj_init_walkers

        boundary_condition = [
            boundary_condition
            for boundary_condition in boundary_condition_fixtures
            if type(boundary_condition).__name__ == boundary_condition_class
        ][0]
        resampler = [
            resampler for resampler in resampler_fixtures
            if type(resampler).__name__ == resampler_class
        ][0]

        assert boundary_condition is not None
        assert resampler is not None

        # generate the work mapper given the type and the platform

        work_mapper_classes = {
            mapper_class.__name__: mapper_class
            for mapper_class in [Mapper, WorkerMapper, TaskMapper]
        }

        # # select the right one given the option
        # work_mapper_type = [mapper_type for mapper_type in work_mapper_classes
        #                     if type(mapper_type).__name__ == work_mapper_class][0]

        # decide based on the platform and the work mapper which
        # platform dependent components to build
        if work_mapper_class == 'Mapper':
            # then there is no settings
            work_mapper = Mapper()

        elif work_mapper_class == 'WorkerMapper':

            if platform == 'CUDA' or platform == 'OpenCL':
                work_mapper = WorkerMapper(num_workers=num_workers,
                                           worker_type=OpenMMGPUWorker,
                                           device_ids={
                                               '0': 0,
                                               '1': 1
                                           },
                                           proc_start_method='spawn')
            if platform == 'OpenCL':

                work_mapper = WorkerMapper(
                    num_workers=num_workers,
                    worker_type=OpenMMGPUWorker,
                    device_ids={
                        '0': 0,
                        '1': 1
                    },
                )

            elif platform == 'CPU':
                work_mapper = WorkerMapper(
                    num_workers=num_workers,
                    worker_type=OpenMMCPUWorker,
                    worker_attributes={'num_threads': 1})

            elif platform == 'Reference':
                work_mapper = WorkerMapper(
                    num_workers=num_workers,
                    worker_type=Worker,
                )

        elif work_mapper_class == 'TaskMapper':

            if platform == 'CUDA':
                work_mapper = TaskMapper(
                    num_workers=num_workers,
                    walker_task_type=OpenMMGPUWalkerTaskProcess,
                    device_ids={
                        '0': 0,
                        '1': 1
                    },
                    proc_start_method='spawn')

            elif platform == 'OpenCL':
                work_mapper = TaskMapper(
                    num_workers=num_workers,
                    walker_task_type=OpenMMGPUWalkerTaskProcess,
                    device_ids={
                        '0': 0,
                        '1': 1
                    })

            elif platform == 'CPU':
                work_mapper = TaskMapper(
                    num_workers=num_workers,
                    walker_task_type=OpenMMCPUWalkerTaskProcess,
                    worker_attributes={'num_threads': 1})

            elif platform == 'Reference':
                work_mapper = TaskMapper(
                    num_workers=num_workers,
                    worker_type=WalkerTaskProcess,
                )

        else:
            raise ValueError("Platform {} not recognized".format(platform))

        # initialize the runner with the platform
        runner = OpenMMRunner(lj_omm_sys.system,
                              lj_omm_sys.topology,
                              lj_integrator,
                              platform=platform)

        logging.debug("Constructing the manager")

        manager = Manager(walkers,
                          runner=runner,
                          boundary_conditions=boundary_condition,
                          resampler=resampler,
                          work_mapper=work_mapper,
                          reporters=reporters)

        # since different work mappers need different process start
        # methods for different platforms i.e. CUDA and linux fork
        # vs. spawn we choose the appropriate one for each method.

        logging.debug("Starting the simulation")

        walkers, filters = manager.run_simulation(n_cycles,
                                                  steps,
                                                  num_workers=num_workers)
    def test_when_a_logger_is_passed_then_it_wraps_all_handlers(self):
        install_mp_handler(self.logger)

        wrapper_handler, = self.logger.handlers
        self.assertIsInstance(wrapper_handler, MultiProcessingHandler)
        self.assertIs(wrapper_handler.sub_handler, self.handler)
Пример #10
0
def main():
    parser = argparse.ArgumentParser(description='''Starts a BLEva Gateway
            service on the device.''',
                                     epilog='''Note: This requires a BLED112
            dongle from Bluegiga.''')
    parser.add_argument('-u',
                        '--url',
                        help='''URL of BLEva server''',
                        required=True)
    parser.add_argument('-d',
                        '--debug',
                        help='Debug level (0-4)',
                        type=int,
                        default=20,
                        choices=[10, 20, 30, 40, 50])
    args = parser.parse_args()
    url = args.url
    print url
    tty_paths = util.get_tty_paths()
    FORMAT = '%(asctime)s - %(name)s - %(processName)s - %(levelname)s - %(message)s'
    logging.basicConfig(format=FORMAT, filename='bled112.log')
    logger = logging.getLogger('BLEva')
    logger.setLevel(args.debug)
    import multiprocessing_logging
    multiprocessing_logging.install_mp_handler()
    logger.info('\n--------------------')
    logger.info('BLEva has started')
    logger.info('\n--------------------')
    while True:
        logger.info('\n--------------------')
        logger.info('BLEva is waiting for new benchmark')
        print "BLEva is waiting for new benchmarks"
        b = getBenchmark(url + '/benchmark')
        print b
        if b != '':
            logger.info('BLEva received new benchmark')
            print "got new benchmark"
            j = json.loads(b)
            instances = []
            for dongle in j['dongles']:
                gap_role = dongle['gap_role']
                gatt_role = dongle['gatt_role']
                replicas = dongle['replicas']
                print replicas
                logger.debug("Replicas: " + str(replicas))
                if replicas > len(tty_paths):
                    raise Exception("Too few dongles connected.")
                for replica in xrange(0, replicas):
                    if gap_role in ['broadcaster', 'peripheral']:
                        a = dongle['steps']
                        steps = []
                        for v in a:
                            s = Step()
                            s.time = v['time']
                            print "json time " + str(['time'])
                            s.ble_operation = v['ble_operation']
                            # s.adv_data = map(ord, v['adv_data'][2:].decode("hex"))
                            # s.short_name = util.pad_truncate(s.short_name, 5)
                            # s.long_name = util.pad_truncate(s.long_name, 12)
                            s.long_name = v['long_name']
                            if replica < 10:
                                s.short_name = v['short_name'] + str(0) + str(
                                    replica)
                                if s.long_name != "":
                                    s.long_name = v['long_name'] + str(
                                        0) + str(replica)
                            else:
                                s.short_name = v['short_name'] + str(replica)
                                if s.long_name != "":
                                    s.long_name = v['long_name'] + str(replica)
                            s.short_name = util.pad_truncate(s.short_name, 7)
                            if s.long_name != "":
                                s.long_name = util.pad_truncate(
                                    s.long_name, 14)
                            logger.debug("Replica Short Name: " + s.short_name)
                            logger.debug("Replica Long Name: " + s.long_name)
                            s.major = int(
                                v['major'],
                                0)  # NOTE base=0 guesses base from string
                            s.minor = int(v['minor'], 0)
                            s.adv_interval_min = int(v['adv_interval_min'], 0)
                            s.adv_interval_max = int(v['adv_interval_max'], 0)
                            s.adv_channels = int(v['adv_channels'], 0)
                            s.gap_discoverable_mode = ble_codes.gap_discoverable_mode[
                                v['gap_discoverable_mode']]
                            s.gap_connectable_mode = ble_codes.gap_connectable_mode[
                                v['gap_connectable_mode']]
                            if "connection_interval_min" in v:
                                s.connection_interval_min = v[
                                    "connection_interval_min"]
                            if "connection_interval_max" in v:
                                s.connection_interval_max = v[
                                    "connection_interval_max"]
                            if "slave_latency" in v:
                                s.slave_latency = v["slave_latency"]
                            if "supervision_timeout" in v:
                                s.supervision_timeout = v[
                                    "supervision_timeout"]
                            steps.append(s)
                        peripheral = Peripheral(logger=logger,
                                                steps=steps,
                                                port_name=tty_paths[replica],
                                                gap_role=gap_role,
                                                gatt_role=gatt_role)
                        instances.append(peripheral)
            logger.info('BLEva is starting benchmark now')
            print "BLEva is starting benchmark now"
            processes = []
            logger.debug('Telling Phone to start')
            print "notifying phone"
            urllib2.urlopen(url + '/benchmark/sync/dongle').read()
            print "done notified"
            if not IBEACON:
                for i in instances:
                    print i
                    p = mp.Process(target=i.start_benchmark,
                                   name=i.steps[0].short_name)
                    p.start()
                    processes.append(p)
                for p in processes:
                    p.join()
            else:
                time.sleep(40)
            print "finished one benchmark"
            logger.info('BLEva finished one benchmark'
                        )  # FIXME fix logger to also log spawned processes
        if b == '':
            print "BLEva server not available, sleeping a while and try again."
            logger.info(
                'BLEva server not available, sleeping a while and try again.'
            )  # FIXME fix logger to also log spawned processes
            time.sleep(
                10)  # sleep and then try again until server is available
Пример #11
0
def main():
    config = get_config()

    logging.basicConfig(
        format=
        '%(asctime)s %(levelname)s [%(processName)s %(process)d] [%(name)s] %(message)s',
        datefmt="%Y-%m-%dT%H:%M:%S%z",
        level=logging.DEBUG)
    multiprocessing_logging.install_mp_handler()
    logging.getLogger("urllib3.connectionpool").setLevel(logging.ERROR)

    if config.zac.health_file is not None:
        health_file = os.path.abspath(config.zac.health_file)

    logging.info("Main start (%d) version %s", os.getpid(), __version__)

    stop_event = multiprocessing.Event()
    state_manager = multiprocessing.Manager()
    processes = []

    source_hosts_queues = []
    source_collectors = get_source_collectors(config)
    for source_collector in source_collectors:
        source_hosts_queue = multiprocessing.Queue()
        process = processing.SourceCollectorProcess(source_collector["name"],
                                                    state_manager.dict(),
                                                    source_collector["module"],
                                                    source_collector["config"],
                                                    source_hosts_queue)
        source_hosts_queues.append(source_hosts_queue)
        processes.append(process)

    try:
        process = processing.SourceHandlerProcess("source-handler",
                                                  state_manager.dict(),
                                                  config.zac.db_uri,
                                                  source_hosts_queues)
        processes.append(process)

        process = processing.SourceMergerProcess("source-merger",
                                                 state_manager.dict(),
                                                 config.zac.db_uri,
                                                 config.zac.host_modifier_dir)
        processes.append(process)

        process = processing.ZabbixHostUpdater("zabbix-host-updater",
                                               state_manager.dict(),
                                               config.zac.db_uri,
                                               config.zabbix)
        processes.append(process)

        process = processing.ZabbixHostgroupUpdater("zabbix-hostgroup-updater",
                                                    state_manager.dict(),
                                                    config.zac.db_uri,
                                                    config.zabbix)
        processes.append(process)

        process = processing.ZabbixTemplateUpdater("zabbix-template-updater",
                                                   state_manager.dict(),
                                                   config.zac.db_uri,
                                                   config.zabbix)
        processes.append(process)
    except exceptions.ZACException as e:
        logging.error("Failed to initialize child processes. Exiting: %s",
                      str(e))
        sys.exit(1)

    for process in processes:
        process.start()

    with processing.SignalHandler(stop_event):
        status_interval = 60
        next_status = datetime.datetime.now()

        while not stop_event.is_set():
            if next_status < datetime.datetime.now():
                if health_file is not None:
                    write_health(health_file, processes, source_hosts_queues,
                                 config.zabbix.failsafe)
                log_process_status(processes)
                next_status = datetime.datetime.now() + datetime.timedelta(
                    seconds=status_interval)

            dead_process_names = [
                process.name for process in processes
                if not process.is_alive()
            ]
            if dead_process_names:
                logging.error("A child has died: %s. Exiting",
                              ', '.join(dead_process_names))
                stop_event.set()

            time.sleep(1)

        logging.debug(
            "Queues: %s",
            ", ".join([str(queue.qsize()) for queue in source_hosts_queues]))

        for process in processes:
            logging.info("Terminating: %s(%d)", process.name, process.pid)
            process.terminate()

        alive_processes = [
            process for process in processes if process.is_alive()
        ]
        while alive_processes:
            process = alive_processes[0]
            logging.info("Waiting for: %s(%d)", process.name, process.pid)
            log_process_status(processes)  # TODO: Too verbose?
            process.join(10)
            if process.exitcode is None:
                logging.warning(
                    "Process hanging. Signaling new terminate: %s(%d)",
                    process.name, process.pid)
                process.terminate()
            time.sleep(1)
            alive_processes = [
                process for process in processes if process.is_alive()
            ]

    logging.info("Main exit")
Пример #12
0
import multiprocessing as mp
from collections import defaultdict
from queue import Full, Empty

import logging
import numpy as np
from multiprocessing_logging import install_mp_handler

from .base_iterator import BaseIterator
from ...routines.mp_routines import ArrayDictQueue
from ... import ROOT_LOGGER_NAME, ROOT_LOGGER_LEVEL
logger = logging.getLogger('{}.{}'.format(ROOT_LOGGER_NAME, __name__))
logger.setLevel(ROOT_LOGGER_LEVEL)
install_mp_handler(logger=logger)


class MultiProcessIterator(BaseIterator):
    """Iterates through data with base iterator interface, implementing in-batch parallelism"""
    def __init__(self, num_processes, max_tasks=100, max_results=100, use_shared=False, *args, **kwargs):
        """
        :param num_processes: number of processes two be used by iterator
        :param max_tasks: max number of tasks to be put in tasks queue
        :param max_results: max volume of output queue
        :param use_shared: whether to use array queue for passing big arrays without pickling them
        """

        super(MultiProcessIterator, self).__init__(*args, **kwargs)
        self._num_processes = num_processes
        self._max_tasks = max(max_tasks, self._batch_size)
        self._max_results = max(max_results, self._batch_size)
Пример #13
0
 def __init__(self,
              producer_func,
              producer_config_args,
              pipe_funcs,
              pipe_funcs_config_args,
              pipe_n_procs,
              accumulator_object,
              accumulator_func,
              accumulator_config_args,
              worker_get_limit=5):
     # enforce the contract.
     try:
         assert isinstance(worker_get_limit, int) and worker_get_limit > 1
     except AssertionError:
         raise AssertionError('worker_get_limit must be an integer > 1')
     # allow multiple producers
     self._multiple_producers = isinstance(producer_func, tuple)
     # check functions
     try:
         if self._multiple_producers:
             for func in producer_func:
                 assert callable(func)
         else:
             assert callable(producer_func)
     except AssertionError:
         raise AssertionError(
             'must provide a callable function for producer')
     try:
         if self._multiple_producers:
             for func in producer_func:
                 assert isgeneratorfunction(func)
         else:
             assert isgeneratorfunction(producer_func)
     except AssertionError:
         raise AssertionError(
             'producer function(s) must (all) be a generator function')
     try:
         assert isinstance(pipe_funcs, tuple)
     except AssertionError:
         raise AssertionError(
             'must supply a tuple of callable functions for pipe_funcs')
     for pf in pipe_funcs:
         try:
             assert callable(pf)
         except AssertionError:
             raise AssertionError(
                 'all elements inside of pipe_funcs must be callable functions'
             )
     # check arguments
     try:
         if self._multiple_producers:
             for args in producer_config_args:
                 assert isinstance(args, tuple)
         else:
             assert isinstance(producer_config_args, tuple)
     except AssertionError:
         raise AssertionError(
             'function arguments must be provided as a tuple')
     try:
         assert isinstance(pipe_funcs_config_args, tuple)
         for pfa in pipe_funcs_config_args:
             assert isinstance(pfa, tuple)
     except AssertionError:
         raise AssertionError(
             'pipe function arguments must be provided as a tuple of tuples'
         )
     # check procs
     try:
         assert isinstance(pipe_n_procs, tuple)
         for n in pipe_n_procs:
             assert isinstance(n, int)
     except AssertionError:
         raise AssertionError('must provide a tuple of integers')
     # check agreement between corellated inputs
     try:
         assert len(pipe_funcs) == len(pipe_funcs_config_args) and len(
             pipe_funcs) == len(pipe_n_procs)
     except AssertionError:
         raise AssertionError(
             'must provide one tuple of arguments and a number of processes for each pipe function'
         )
     try:
         assert len(pipe_funcs) != 0
     except AssertionError:
         raise AssertionError('must provide work for the pipe to do')
     # check accumulator function
     try:
         assert callable(accumulator_func)
     except AssertionError:
         raise AssertionError(
             'must provide callable function for accumulator_func')
     # check accumulator args
     try:
         assert isinstance(accumulator_config_args, tuple)
     except AssertionError:
         raise AssertionError(
             'must privde a tuple of arguments for accumulator_config_args')
     # contract satisfied
     self.N = len(pipe_funcs)  # used all over in here
     # setup handlers to send child process logs into main thread's logger
     install_mp_handler()
     self.producer_func = producer_func
     self.producer_config_args = producer_config_args
     self.pipe_funcs = pipe_funcs
     self.pipe_funcs_config_args = pipe_funcs_config_args
     self.pipe_n_procs = pipe_n_procs
     self.accumulator_object = accumulator_object
     self.accumulator_func = accumulator_func
     self.accumulator_config_args = accumulator_config_args
     self.worker_get_limit = worker_get_limit
     # use a manager server to make cleanup easy
     self._sync_server = Manager()
     # 1 manager for each pipe func
     self._managers = [None for _ in range(self.N)]
     self._error_flag = self._sync_server.Value('i', int(False))
     # 1 producer finished flag for each manager, 1 for the consumer
     self._flags = [
         self._sync_server.Value('i', 0) for _ in range(self.N + 1)
     ]
     # 1 out(in) queue per pipe_func, + 1 extra in(out)
     self._queues = [self._sync_server.Queue() for _ in range(self.N + 1)]
     self._total_produced = self._sync_server.Value('i', 0)
     self._total_consumed = 0
Пример #14
0
		time.sleep(1)
def excepthook(exctype, value, traceback):
    for p in multiprocessing.active_children():
    	p.terminate()
    raise

sys.excepthook = excepthook


with open('config.logging.json', 'rt') as f:
    config = json.load(f)

logging.config.dictConfig(config)

import multiprocessing_logging
multiprocessing_logging.install_mp_handler()
multiprocessing_logging.install_mp_handler(logging.getLogger('overseer.quality'))
multiprocessing_logging.install_mp_handler(logging.getLogger('protocol'))

logger = logging.getLogger('overseer')

config = rc_config()       
overseer_uuid = '%s' % uuid.uuid4()
site_uuid = config.site_uuid


logger.info('Overseer %s initializing' % (overseer_uuid))
logger.info('Site UUID: %s' % site_uuid)


demods = {}
Пример #15
0
def emmet(spec_or_dbfile, run, issue, sbatch, bb, yes, no_dupe_check, verbose):
    """Command line interface for emmet"""
    logger.setLevel(logging.DEBUG if verbose else logging.INFO)
    ctx = click.get_current_context()
    ctx.ensure_object(dict)

    if not sbatch and bb:
        raise EmmetCliError(
            "Burst buffer only available in SBatch mode (--sbatch).")

    if spec_or_dbfile:
        client = calcdb_from_mgrant(spec_or_dbfile)
        ctx.obj["CLIENT"] = client
        # ctx.obj["MONGO_HANDLER"] = BufferedMongoHandler(
        #    host=client.host,
        #    port=client.port,
        #    database_name=client.db_name,
        #    username=client.user,
        #    password=client.password,
        #    level=logging.WARNING,
        #    authentication_db=client.db_name,
        #    collection="emmet_logs",
        #    buffer_periodical_flush_timing=False,  # flush manually
        # )
        # logger.addHandler(ctx.obj["MONGO_HANDLER"])
        # coll = ctx.obj["MONGO_HANDLER"].collection
        # ensure_indexes(SETTINGS.log_fields, [coll])

    if run:
        if not issue:
            raise EmmetCliError(f"Need issue number via --issue!")

        ctx.obj["LOG_STREAM"] = StringIO()
        memory_handler = logging.StreamHandler(ctx.obj["LOG_STREAM"])
        formatter = logging.Formatter(
            "%(asctime)s %(name)-12s %(levelname)-8s %(message)s")
        memory_handler.setFormatter(formatter)
        logger.addHandler(memory_handler)

        CREDENTIALS = os.path.join(os.path.expanduser("~"),
                                   ".emmet_credentials")
        if not os.path.exists(CREDENTIALS):
            user = click.prompt("GitHub Username")
            password = click.prompt("GitHub Password", hide_input=True)
            auth = authorize(
                user,
                password,
                ["user", "repo", "gist"],
                "emmet CLI",
                two_factor_callback=opt_prompt,
            )
            with open(CREDENTIALS, "w") as fd:
                fd.write(auth.token)

        with open(CREDENTIALS, "r") as fd:
            token = fd.readline().strip()
            ctx.obj["GH"] = login(token=token)
    else:
        click.secho("DRY RUN! Add --run flag to execute changes.", fg="green")

    install_mp_handler(logger=logger)
Пример #16
0
def download_flickr_dataset(dataset_path, data_dir, ffmpeg_path, ffprobe_path,
                            log_path=None, verbose=False, disable_logging=False,
                            num_workers=1, **ffmpeg_cfg):
    """
    Downloads Flickr dataset files

    Args:
        dataset_path:  Path to dataset file containin URLs
                       (Type: str)

        data_dir:      Output directory where video will be saved if output
                       path is not explicitly given
                       (Type: str)

        ffmpeg_path:   Path to ffmpeg executable
                       (Type: str)

        ffprobe_path:  Path to ffprobe executable
                       (Type: str)

    Keyword Args:
        log_path:         Path to log file. If None, defaults to "flickr-soundnet-dl.log"
                          (Type: str or None)

        verbose:          If True, prints detailed messages to console
                          (Type: bool)

        disable_logging:  If True, does not log to a file
                          (Type: bool)

        num_workers:      Number of multiprocessing workers used to download videos
                          (Type: int)

        **ffmpeg_cfg:     ffmpeg configurations
    """

    init_console_logger(LOGGER, verbose=verbose)
    if not disable_logging:
        init_file_logger(LOGGER, log_path=log_path)
    multiprocessing_logging.install_mp_handler()
    LOGGER.debug('Initialized logging.')

    audio_dir = os.path.join(data_dir, 'audio')
    video_dir = os.path.join(data_dir, 'video')

    if not os.path.isdir(audio_dir):
        os.makedirs(audio_dir)

    if not os.path.isdir(video_dir):
        os.makedirs(video_dir)
    ffmpeg_cfg_gpu = dict.copy(ffmpeg_cfg)
    ffmpeg_cfg_gpu["video_codec"] += "_nvenc"
    url_queue = mp.Queue(3*num_workers)
    #lock = mp.Lock()
    cv_main_to_worker = mp.Condition()
    cv_worker_to_main = mp.Condition()
    p_list = []
    try:
        worker_args = (url_queue, cv_main_to_worker, cv_worker_to_main, data_dir, ffmpeg_path, ffprobe_path)
        for i in range(num_workers):
            if i < 2:
                p = mp.Process(target=download_flickr_video, args=worker_args, kwargs=ffmpeg_cfg_gpu)
            else:
                p = mp.Process(target=download_flickr_video, args=worker_args, kwargs=ffmpeg_cfg)
            p.start()
            p_list.append(p)
        with open(dataset_path, 'r') as f:
            for line in f:
                url = line.strip()
                if url:
                    media_filename = extract_flickr_id(url)
                    video_filepath = os.path.join(data_dir, 'video', media_filename + '.' + ffmpeg_cfg.get('video_format', 'mp4'))
                    skip_audio = ffmpeg_cfg.get("skip_audio", True)
                    if not skip_audio:
                        audio_filepath = os.path.join(data_dir, 'audio', media_filename + '.' + ffmpeg_cfg.get('audio_format', 'flac'))
                    else:
                        audio_filepath = None

                    if (skip_audio and os.path.exists(video_filepath)) or (not skip_audio and os.path.exists(video_filepath) and os.path.exists(audio_filepath)):
                        info_msg = 'Already downloaded video {}. Skipping.'
                        LOGGER.info(info_msg.format(media_filename))
                        continue
                    
                    while True:
                        try:
                            url_queue.put(url, False)
                            break
                        except queue.Full:
                            with cv_worker_to_main:
                                cv_worker_to_main.wait(5.0)
                    with cv_main_to_worker:
                        cv_main_to_worker.notify()
                    LOGGER.info('Notify a worker {}'.format(url))
        with cv_main_to_worker:
            for i in range(num_workers):
                url_queue.put("#END#")
            cv_main_to_worker.notify_all()
        LOGGER.info('End of enqueue')

    except KeyboardInterrupt:
        LOGGER.info("Received KeyboardInterrupt")
        with cv_main_to_worker:
            cv_main_to_worker.notify_all()
        for p in p_list:
            p.join()
        LOGGER.info("Forcing exit.")
        exit()
    finally:
        try:
            for p in p_list:
                p.join()
        except KeyboardInterrupt:
            LOGGER.info("Received KeyboardInterrupt")
            with cv_main_to_worker:
                cv_main_to_worker.notify_all()
            for p in p_list:
                p.join()
            LOGGER.info("Forcing exit.")
            exit()

    LOGGER.info('Finished downloading videos!')
Пример #17
0
def make_app(config_file):

    fileConfig(config_file)
    multiprocessing_logging.install_mp_handler()

    with codecs.open(config_file, 'r', 'utf8') as f:
        config = konfig.Config(f)

    bottle.debug(config['henet'].get('debug', DEFAULT_DEBUG))
    app = bottle.app()

    # bottle config used by bottle-utils
    csrf_config = dict([('csrf.%s' % key, val) for key, val in
                        config.items('csrf')])
    app.config.update(csrf_config)

    # setting up languages
    default_locale = config['henet'].get('default_locale', 'fr_FR')
    langs = [[p.strip() for p in lang.split(',')] for lang
             in config['henet'].get('langs', ['fr_FR'])]

    app = I18NPlugin(app, langs=langs, default_locale=default_locale,
                     locale_dir=LOCALES_PATH)

    cats = []

    config_cats = config['henet']['categories']
    if not isinstance(config_cats, list):
        config_cats = [config_cats]

    for cat in config_cats:
        values = dict(config[cat].items())
        # defaults
        if 'can_create' not in values:
            values['can_create'] = True
        cats.append((cat, values))

    pages = []

    config_pages = config['henet']['pages']
    if not isinstance(config_pages, list):
        config_pages = [config_pages]

    for page in config_pages:
        values = dict(config[page].items())
        # defaults
        if 'can_create' not in values:
            values['can_create'] = True
        pages.append((page, values))

    use_comments = config['henet'].get('comments', True)
    use_media = config['henet'].get('media', True)

    app_stack.vars = app.vars = {'pages': pages,
                                 'categories': cats,
                                 'get_alerts': get_alerts,
                                 'site_url': config['henet']['site_url'],
                                 'use_comments': use_comments,
                                 'use_media': use_media,
                                 'langs': langs}

    app_stack.view = partial(view, **app.vars)
    app_stack._config = app._config = config
    app_stack.workers = app.workers = MemoryWorkers()
    app_stack.use_comments = app.use_comments = use_comments
    app_stack.use_media = app.use_media = use_media
    app_stack.add_alert = app.add_alert = add_alert

    smtp_config = dict(config.items('smtp'))

    def _send_email(*args):
        args = list(args) + [smtp_config]
        app.workers.apply_async('send-email', send_email, args)

    app_stack.send_email = app.send_email = _send_email

    from henet import views  # NOQA

    def _close_workers(*args):
        app.workers.close()
        sys.exit(0)

    subscribe(ALL_EVENTS, add_alert)
    signal.signal(signal.SIGINT, _close_workers)

    return app
Пример #18
0
def sync(config, logs):
    """
    Main Sync process
    """

    logging.my_logfile(logs=logs)
    logging.my_fmt(label='main_sync')
    starttime = datetime.now()
    modify = {}
    workers = {}  # this is the array of running pnns
    pnns = None  # this is the array of pnn to be launched
    pool = None

    pcli = PhEDEx()

    install_mp_handler()

    conf = _load_config(config, modify, starttime)

    pnns = []

    size = conf['main']['pool']

    logging.summary('Starting')

    while conf['main']['run']:

        if pool is None:
            logging.notice('Started pool of size %d', size)
            pool = multiprocessing.NDPool(size)

        add = [
            pnn for pnn, sec in conf.items() if pnn != 'main' if sec['run']
            if pnn not in workers if pnn not in pnns
        ]

        pnns += add

        random.shuffle(pnns)

        if not _ping():
            logging.warning('Cannot ping, not launching workers')
        else:
            _launch_workers(pool, workers, pnns, pcli)
            pnns = []

        _poll_workers(workers, pnns)

        conf = _load_config(config, modify, starttime)

        if not conf['main']['run'] or\
            conf['main']['pool'] != size:

            # trigger draining of all workers, close the pool and wait
            # for the task to be over
            conf = _load_config(config, {'default': {'run': False}}, starttime)
            _drain_up(workers, pnns)
            workers = {}
            pool.close()
            pool = None
            size = conf['main']['pool']

        else:
            time.sleep(conf['main']['sleep'])

    logging.summary('Exiting.')

    return config
Пример #19
0
def download_audioset(data_dir,
                      ffmpeg_path,
                      ffprobe_path,
                      eval_segments_path,
                      balanced_train_segments_path,
                      unbalanced_train_segments_path,
                      disable_logging=False,
                      verbose=False,
                      num_workers=4,
                      log_path=None,
                      **ffmpeg_cfg):
    """
    Download AudioSet files

    Args:
        data_dir:                       Directory where dataset files will
                                        be saved
                                        (Type: str)

        ffmpeg_path:                    Path to ffmpeg executable
                                        (Type: str)

        ffprobe_path:                   Path to ffprobe executable
                                        (Type: str)

        eval_segments_path:             Path to evaluation segments file
                                        (Type: str)

        balanced_train_segments_path:   Path to balanced train segments file
                                        (Type: str)

        unbalanced_train_segments_path: Path to unbalanced train segments file
                                        (Type: str)

    Keyword Args:
        disable_logging:                Disables logging to a file if True
                                        (Type: bool)

        verbose:                        Prints verbose information to stdout
                                        if True
                                        (Type: bool)

        num_workers:                    Number of multiprocessing workers used
                                        to download videos
                                        (Type: int)

        log_path:                       Path where log file will be saved. If
                                        None, saved to './audiosetdl.log'
                                        (Type: str or None)

        **ffmpeg_cfg:                   Configuration for audio and video
                                        downloading and decoding done by ffmpeg
                                        (Type: dict[str, *])
    """
    init_console_logger(LOGGER, verbose=verbose)
    if not disable_logging:
        init_file_logger(LOGGER, log_path=log_path)
    multiprocessing_logging.install_mp_handler()
    LOGGER.debug('Initialized logging.')

    download_subset(eval_segments_path, "evaluation", data_dir, ffmpeg_path,
                    ffprobe_path, num_workers, **ffmpeg_cfg)
    download_subset(balanced_train_segments_path, "balanced_train", data_dir,
                    ffmpeg_path, ffprobe_path, num_workers, **ffmpeg_cfg)
Пример #20
0
def main(argv=None):
    if argv is None:
        argv = sys.argv[1:]

    class ArgumentParserWithDefaults(argparse.ArgumentParser):
        '''
        From https://stackoverflow.com/questions/12151306/argparse-way-to-include-default-values-in-help
        '''
        def add_argument(self, *args, help=None, default=None, **kwargs):
            if help is not None:
                kwargs['help'] = help
            if default is not None and args[0] != '-h':
                kwargs['default'] = default
                if help is not None:
                    kwargs['help'] += ' (default: {})'.format(default)
            super().add_argument(*args, **kwargs)

    parser = ArgumentParserWithDefaults(
        formatter_class=argparse.RawTextHelpFormatter)
    parser.add_argument("-l",
                        "--logconfig",
                        dest="logconfig",
                        help="logging configuration (default: logging.json)",
                        default='logging.json')
    parser.add_argument("--debug",
                        dest="debug",
                        help="Enable interactive debugger on error",
                        action='store_true')
    parser.add_argument("-c",
                        "--chart_output",
                        dest="chart_output",
                        help="Chart output directory",
                        required=True)
    parser.add_argument("-o",
                        "--output",
                        dest="output",
                        help="Output directory",
                        required=True)
    parser.add_argument("-s",
                        "--sim-output",
                        dest="sim_output",
                        help="Sim output directory",
                        required=True)
    parser.add_argument("-w",
                        "--window-size",
                        dest="window_size",
                        help="Minutes over which to collect data",
                        default=3,
                        type=int)
    parser.add_argument(
        "--first-timestamp-file",
        dest="first_timestamp_file",
        help=
        "Path to file containing the log timestamp that the simulation started",
        required=True)

    args = parser.parse_args(argv)

    map_utils.setup_logging(default_path=args.logconfig)
    if 'multiprocessing' in sys.modules:
        import multiprocessing_logging
        multiprocessing_logging.install_mp_handler()

    if args.debug:
        import pdb, traceback
        try:
            return main_method(args)
        except:
            extype, value, tb = sys.exc_info()
            traceback.print_exc()
            pdb.post_mortem(tb)
    else:
        return main_method(args)
filename = os.path.basename(__file__)
logfile = os.path.splitext(filename)[0] + '.log'
fh = logging.FileHandler(logfile, mode='w')
fh.setLevel(logging.DEBUG)
# create console handler with a higher log level
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
# create formatter and add it to the handlers
formatter = logging.Formatter(
    '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
ch.setFormatter(formatter)
fh.setFormatter(formatter)
# add the handlers to logger
logger.addHandler(ch)
logger.addHandler(fh)
multiprocessing_logging.install_mp_handler(logger=logger)

# ==================================== SALE ORDER LINE ====================================


def update_sale_order_line(pid, data_pool, product_ids, uom_ids,
                           order_tax_code_ids):
    sock = xmlrpclib.ServerProxy(URL, allow_none=True)
    while data_pool:
        try:
            data = data_pool.pop()
            order_id = data.get('order_id')
            order_lines = sock.execute(DB, UID, PSW, 'sale.order.line',
                                       'search_read',
                                       [('order_id', '=', order_id)],
                                       ['product_id', 'product_uom'])
Пример #22
0
def update_database():
    with logging_redirect_tqdm():
        install_mp_handler()

        latest_update = Update.get_latest_update(success=False)

        if latest_update.status not in [
                Update.Status.ERROR, Update.Status.SUCCESS
        ]:
            print(
                'Last update revision=%s is still not ended (or hang or crashed). '
                'Would you like to start new update (y) continue previous (N)?'
                % latest_update.id)
            response = input()
            if response.lower() == 'y':
                latest_update.status = 'error'
                latest_update.save()

                latest_update = Update.objects.create(
                    status=Update.Status.IN_PROGRESS)
        else:
            latest_update = Update.objects.create(
                status=Update.Status.IN_PROGRESS)

        # scraping level 3 indexes first
        level_3_koatuu = list(
            set([koatuu for koatuu in _get_indexes() if koatuu.level <= 2]))
        # sort unique ids then (stable sort, so level is still sorted)
        level_3_koatuu.sort(key=attrgetter('unique_id'))
        # sort level from 1 to 3 keeping stable unique_id
        level_3_koatuu.sort(key=attrgetter('level'))

        if latest_update.latest_koatuu:
            logging.info("Searching for the latest koatuu scraped")
            latest_koatuu_obj = next(
                (koatuu for koatuu in level_3_koatuu
                 if koatuu.unique_id == latest_update.latest_koatuu), None)
            if latest_koatuu_obj is None:
                level_3_koatuu = []
            else:
                logging.info("Found latest koatuu scraped %s",
                             latest_koatuu_obj)
                level_3_koatuu = level_3_koatuu[level_3_koatuu.
                                                index(latest_koatuu_obj):]
                logging.info('Koatuu to scrape only %s', len(level_3_koatuu))

        if level_3_koatuu:
            _download_and_insert(latest_update, level_3_koatuu)
        logging.info('All insert l1 operations ended')
        # process level 4 indexes only for regions where parcels
        # number is more than 100000

        annotated = Landuse.objects.all().values('koatuu').filter(
            revision=latest_update.id).annotate(
                total=Count('koatuu')).order_by('-total')

        level_4_koatuu = []
        all_koatuu = list(set([koatuu for koatuu in _get_indexes()]))
        for result in annotated:
            if result['total'] < 100000:
                continue
            koatuu_obj = next(koatuu for koatuu in all_koatuu
                              if koatuu.unique_id == str(result['koatuu']))

            if koatuu_obj.level == 2:
                level_3_koatuus = [
                    *set([
                        koatuu for koatuu in all_koatuu if koatuu.level == 3
                        and str(koatuu.parent) == koatuu_obj.unique_id
                    ])
                ]
                level_4_koatuu.extend(level_3_koatuus)
                for level_3_koatuu in level_3_koatuus:
                    level_4_koatuu.extend([
                        *set([
                            koatuu
                            for koatuu in all_koatuu if koatuu.level == 4
                            and str(koatuu.parent) == level_3_koatuu.unique_id
                        ])
                    ])

            if koatuu_obj.level == 3:
                level_4_koatuu.extend([
                    *set([
                        koatuu for koatuu in all_koatuu if koatuu.level == 4
                        and str(koatuu.parent) == koatuu_obj.unique_id
                    ])
                ])

        level_4_koatuu.sort(key=attrgetter('unique_id'))
        _download_and_insert(latest_update, level_4_koatuu)

    # detecting changes to create analysis table
    create_changeset(
        revision=Update.objects.get(id=latest_update.id),
        previous=Update.objects.get(id=Update.get_latest_update().id),
    )

    # everything is ok => success status
    Update.objects.filter(id=latest_update.id).update(
        status=Update.Status.SUCCESS)
Пример #23
0
if __name__ == "__main__":

    import os
    import shutil
    import sys
    import logging
    from multiprocessing_logging import install_mp_handler

    from wepy_tools.sim_makers.openmm.lennard_jones import LennardJonesPairOpenMMSimMaker

    OUTPUT_DIR = "_output/sim_maker_run"

    logging.getLogger().setLevel(logging.DEBUG)
    install_mp_handler()

    if sys.argv[1] == "-h" or sys.argv[1] == "--help":
        print(
            "arguments: n_cycles, n_steps, n_walkers, n_workers, platform, resampler"
        )
        exit()
    else:
        n_cycles = int(sys.argv[1])
        n_steps = int(sys.argv[2])
        n_walkers = int(sys.argv[3])
        n_workers = int(sys.argv[4])
        platform = sys.argv[5]
        resampler = sys.argv[6]

        print("Number of steps: {}".format(n_steps))
        print("Number of cycles: {}".format(n_cycles))
Пример #24
0
def main():
    config = get_config()

    logging.basicConfig(
        format=
        '%(asctime)s %(levelname)s [%(processName)s %(process)d] [%(name)s] %(message)s',
        datefmt="%Y-%m-%dT%H:%M:%S%z",
        level=logging.DEBUG)
    multiprocessing_logging.install_mp_handler()
    logging.getLogger("urllib3.connectionpool").setLevel(logging.ERROR)

    zabbix_config = dict(config["zabbix"])
    zabbix_config["failsafe"] = int(zabbix_config.get("failsafe", "20"))
    if zabbix_config["dryrun"] == "false":
        zabbix_config["dryrun"] = False
    elif zabbix_config["dryrun"] == "true":
        zabbix_config["dryrun"] = True
    else:
        raise Exception()

    logging.info("Main start (%d) version %s", os.getpid(), __version__)

    stop_event = multiprocessing.Event()
    processes = []

    source_hosts_queues = []
    source_collectors = get_source_collectors(config)
    for source_collector in source_collectors:
        source_hosts_queue = multiprocessing.Queue()
        process = processing.SourceCollectorProcess(source_collector["name"],
                                                    source_collector["module"],
                                                    source_collector["config"],
                                                    source_hosts_queue)
        source_hosts_queues.append(source_hosts_queue)
        processes.append(process)
        process.start()

    process = processing.SourceHandlerProcess("source-handler",
                                              config["zac"]["db_uri"],
                                              source_hosts_queues)
    process.start()
    processes.append(process)

    process = processing.SourceMergerProcess(
        "source-merger", config["zac"]["db_uri"],
        config["zac"]["host_modifier_dir"])
    process.start()
    processes.append(process)

    process = processing.ZabbixHostUpdater("zabbix-host-updater",
                                           config["zac"]["db_uri"],
                                           zabbix_config)
    process.start()
    processes.append(process)

    process = processing.ZabbixHostgroupUpdater("zabbix-hostgroup-updater",
                                                config["zac"]["db_uri"],
                                                zabbix_config)
    process.start()
    processes.append(process)

    process = processing.ZabbixTemplateUpdater("zabbix-template-updater",
                                               config["zac"]["db_uri"],
                                               zabbix_config)
    process.start()
    processes.append(process)

    with processing.SignalHandler(stop_event):
        status_interval = 60
        next_status = datetime.datetime.now()

        while not stop_event.is_set():
            if next_status < datetime.datetime.now():
                log_process_status(processes)
                next_status = datetime.datetime.now() + datetime.timedelta(
                    seconds=status_interval)

            dead_process_names = [
                process.name for process in processes
                if not process.is_alive()
            ]
            if dead_process_names:
                logging.error("A child has died: %s. Exiting",
                              ', '.join(dead_process_names))
                stop_event.set()

            time.sleep(1)

        logging.debug(
            "Queues: %s",
            ", ".join([str(queue.qsize()) for queue in source_hosts_queues]))

        for process in processes:
            logging.info("Terminating: %s(%d)", process.name, process.pid)
            process.terminate()

        alive_processes = [
            process for process in processes if process.is_alive()
        ]
        while alive_processes:
            process = alive_processes[0]
            logging.info("Waiting for: %s(%d)", process.name, process.pid)
            log_process_status(processes)  # TODO: Too verbose?
            process.join(10)
            if process.exitcode is None:
                logging.warning(
                    "Process hanging. Signaling new terminate: %s(%d)",
                    process.name, process.pid)
                process.terminate()
            time.sleep(1)
            alive_processes = [
                process for process in processes if process.is_alive()
            ]

    logging.info("Main exit")
    def test_when_a_logger_is_passed_then_it_does_not_change_the_root_logger(
            self):
        with mock.patch('logging.getLogger') as getLogger:
            install_mp_handler(self.logger)

            self.assertEqual(0, getLogger.call_count)
Пример #26
0
def main(argv):
    parser = argparse.ArgumentParser(
        description='Perform segmentation on .svg and .png files.')
    parser.add_argument('dirs',
                        nargs='+',
                        help='Directories that stores .svg&.png files.')
    parser.add_argument(
        '--num-workers',
        default=0,
        type=int,
        dest='num_workers',
        help='Number of processes. 0 for all available cpu cores.')
    parser.add_argument('--log',
                        default='segmentation.log',
                        type=str,
                        dest='log_file',
                        help='Path to log file.')
    parser.add_argument('--conf',
                        default='seg_conf.json',
                        type=str,
                        dest='confidence_file',
                        help='Path to segmentation confidence file.')
    parser.add_argument(
        '--no-optimize',
        default=True,
        action='store_false',
        dest='optimize',
        help=
        "Dont't use svgo optimization. This will produce larger svg files but cost much less time."
    )
    parser.add_argument('--export-contour',
                        default=False,
                        action='store_true',
                        dest='export_contour',
                        help='Export contour segmentation results.')
    parser.add_argument('--export-mask',
                        default=False,
                        action='store_true',
                        dest='export_mask',
                        help='Export morphed mask for debug use.')
    args = parser.parse_args(argv[1:])

    global logger
    logger = get_logger('segmentation',
                        args.log_file,
                        echo=False,
                        multiprocessing=True)
    install_mp_handler(logger)

    global USE_OPTIMIZE, EXPORT_CONTOUR_RESULTS, EXPORT_MASK
    USE_OPTIMIZE = args.optimize
    EXPORT_CONTOUR_RESULTS = args.export_contour
    EXPORT_MASK = args.export_mask

    num_workers = args.num_workers
    if num_workers == 0:
        num_workers = multiprocessing.cpu_count()
    logger.info('Using {} processes.'.format(num_workers))

    src_dirs = args.dirs
    for src_dir in src_dirs:
        if not osp.isdir(src_dir):
            continue
        logger.info('Processing {} ...'.format(src_dir))
        tgt_dir = src_dir
        tgts = []
        for f in glob.glob(osp.join(src_dir, '*.eps')):
            _id, _ = osp.splitext(osp.basename(f))
            svg_file, png_file = osp.join(src_dir, _id + '.svg'), osp.join(
                src_dir, _id + '.png')
            if osp.exists(svg_file) and osp.exists(png_file):
                tgts.append({
                    'id': _id,
                    'svg_file': svg_file,
                    'png_file': png_file
                })
        conf = seg(tgts, tgt_dir, num_workers=num_workers)
        with open(args.confidence_file, 'w') as f:
            f.write(
                json.dumps([{
                    'id': tgt['id'],
                    'score': s
                } for tgt, s in zip(tgts, conf)]))
Пример #27
0
from collections import OrderedDict

import numpy as np

import click

from data_iterator import TextIterator
from params import load_params


logging.basicConfig(level=logging.WARN,
                    format="%(asctime)s - %(levelname)s %(module)s - %(message)s",
                    datefmt="%Y-%m-%d %H:%M:%S")

import multiprocessing_logging
multiprocessing_logging.install_mp_handler()


def error_process(params, device, **model_options):

    import theano
    import theano.sandbox.cuda
    from build_model import build_model

    theano.sandbox.cuda.use(device)

    tparams = OrderedDict()
    for param_name, param in params.items():
        tparams[param_name] = theano.shared(param, name=param_name)

    process_name = current_process().name
Пример #28
0
    def processYears(self, years: List[int]) -> None:
        '''
        Parameters
        ----------
        years : List[int]
            list of years to (re-)process
        '''

        # If necessary, creates indexes for the data collection.
        self._createDataColIndex()
        # Does the grid collection exists?
        col_grid = self._createMongoConn(cfg=self.cfg)['col_grid']
        ndoc = col_grid.count_documents(filter={})
        if ndoc == 0:
            logging.info('Creation of the grid collection...')
            # Does the reference netCDF file exists?
            fname = self.downloadDir + 'era5_masks.nc'
            if os.path.exists(fname) is False:
                print('Downloading mask data...')
                self.getMasks()
            self.createGridCollection(mask=False)
        # Get all grid_ids
        self.all_ids = col_grid.distinct(key='id_grid')

        for year in years:
            self.year = int(year)
            logging.info(f' --- PROCESSING YEAR {year} ---')

            if self.download is True:
                logging.info('Proceeding with downloads...')
                today = datetime.today()
                if (year == today.year):
                    months = np.arange(1, today.month + 1).tolist()
                else:
                    months = np.arange(1, 12 + 1).tolist()

                # Are these months present as nc files ?
                # List this year's nc files
                try:
                    ncfiles = self.listNetCDFfiles(year)
                except FileNotFoundError:
                    logging.info(f'No ERA5T files downloaded for {year} yet.')
                    months_to_download = months
                else:
                    fmonths_present = sorted(
                        list(
                            map(
                                lambda x: int(x[x.find("-") + 1:x.find(".nc")]
                                              ), ncfiles)))
                    fmonths_needed = months
                    # Months needed but not present :
                    missing_months = list(
                        set(fmonths_needed) - (set(fmonths_present)))
                    months_to_download = missing_months
                    # months_to_download = list(
                    #     set(missing_months + months))  # distinct months
                # fmonths_present
                # print(ncfiles)
                # print(fmonths_present)
                # print('...')
                # print(missing_months)
                # import sys
                # sys.exit(0)

                if len(months_to_download) > 0:
                    logging.info(f'Downloading files for YEAR {year}....\n' +
                                 f'Months: {months_to_download}')
                    # Parralel download of monthly data:
                    install_mp_handler()
                    p = ThreadPool(processes=12)  # one thread per month
                    p.map(lambda m: self.getFiles(year=year, month=m),
                          months_to_download)
                    p.close()
                    p.join()
                    logging.info(f'Downloading files for YEAR {year} Done.')
                else:
                    logging.info(f'All files already present for year {year}.')
            else:
                logging.info('Proceeding without downloads')

            # List all the current year's nc files after download
            nc_local = self.listNetCDFfiles(year=year)

            # Open them all in one ds object
            # arrays will be loaded in chronological order
            try:
                ds = xr.open_mfdataset(nc_local, combine='by_coords')
            except Exception as e:
                logging.info(e)
            else:
                self.df_missing_dates = self.findMissingDates(ds)
                # Create the tile (chunks) elements
                # This operation starts to be useful at high grid resolution
                # i.e., from 0.25 x 0.25. For coarser grid (i.e., 0.1 x 0.1)
                # this is not really essential.
                delta = 30  # grid chunk in degrees (should be a multiple of
                # both 360 and 180)
                # ERA's lon have range [0, 360] and not [-180, 180]
                ilons = np.arange(0, 360, delta)
                ilats = np.arange(-60, 90, delta)
                elements = itertools.product(*[ilons, ilats])

                # Explore the grid chunks and select
                # those containing grid cells
                def worker_initializer00():
                    global col_grid
                    cons = self._createMongoConn(cfg=self.cfg)
                    col_grid = cons['col_grid']

                p = ThreadPool(processes=self.nthreads,
                               initializer=worker_initializer00)
                res = p.map(
                    lambda e: self.exploreChunks(ilon_chunk=e[0],
                                                 ilat_chunk=e[1],
                                                 delta=delta,
                                                 mask_query=None,
                                                 retrn='ndocs',
                                                 col_grid=col_grid), elements)
                p.close()
                p.join()
                df_e = pd.DataFrame(res)
                df_e = df_e.query('n > 0').sort_values(by='n').reset_index(
                    drop=True)

                # Do the insertion
                N = df_e.shape[0]
                for i in np.arange(N):
                    logging.info(f'Year {year}: processing chunk {i}/{N}')
                    ilon = df_e.loc[i, 'ilon_chunk']
                    ilat = df_e.loc[i, 'ilat_chunk']
                    n = df_e.loc[i, 'n']
                    self.insertChunk(ilon, ilat, delta, ds, 'insert')
                    logging.info(f'{n} documents inserted')
                logging.info(' --- PROCESSING YEAR %s DONE !---' % year)