Exemplo n.º 1
0
def parallel_run():
    """
    Start parallel engines to run
    """
    from IPython.parallel import Client

    c = Client()   # here is where the client establishes the connection
    lv = c.load_balanced_view()   # this object represents the engines (workers)


    rays = []
    maxs=25
    bounding = AABA(xmin=0, ymin=0, zmin=0, xmax=maxs, ymax=maxs, zmax=maxs,)
    gridd = np.zeros((maxs,maxs,maxs))
    # spectrum for red to nir leaves
    red_nir_leaves = spectrum(np.array([0.5, 0.85]), np.array([0.1, 0.6]), np.array([0.5, 0.1]))
    # spectrum for soil
    red_nir_soil = spectrum(np.array([0.5, 0.85]), np.array([0.3, 0.4]), np.array([0.0, 0.0]))


    # scattering setup
    scatt = BRDSF(red_nir_leaves, 0.0)
    lf = leaf(55.0, 0.8) # leaf angle distribution and leaf area density


    tasks = []
    for x in xrange(maxs):
        for y in xrange(maxs):
            tasks.append(lv.apply(prun, x,y, maxs, gridd, scatt, red_nir_soil, bounding, lf))

    result = [task.get() for task in tasks]  # blocks until all results are back

    return results
Exemplo n.º 2
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('db_fname',
                        help="Provide the filename of the HDF database "
                             "file here.")
    args = parser.parse_args()

    image_names = get_image_names_from_db(args.db_fname)
    logging.info('Found {} image_names'.format(len(image_names)))

    c = Client()
    dview = c.direct_view()
    lbview = c.load_balanced_view()

    dview.push({'do_clustering': do_clustering,
                'dbfile': args.db_fname})
    results = lbview.map_async(process_image_name, image_names)
    import time
    import sys
    import os
    dirname = os.path.join(os.environ['HOME'], 'data/planet4/catalog_2_and_3')
    while not results.ready():
        print("{:.1f} %".format(100 * results.progress / len(image_names)))
        sys.stdout.flush()
        time.sleep(10)
    for res in results.result:
        print(res)
    logging.info('Catalog production done. Results in {}.'.format(dirname))
Exemplo n.º 3
0
def add_engines(n=1, profile='iptest', total=False):
    """add a number of engines to a given profile.
    
    If total is True, then already running engines are counted, and only
    the additional engines necessary (if any) are started.
    """
    rc = Client(profile=profile)
    base = len(rc)
    
    if total:
        n = max(n - base, 0)
    
    eps = []
    for i in range(n):
        ep = TestProcessLauncher()
        ep.cmd_and_args = ipengine_cmd_argv + ['--profile=%s'%profile, '--log-level=50']
        ep.start()
        launchers.append(ep)
        eps.append(ep)
    tic = time.time()
    while len(rc) < base+n:
        if any([ ep.poll() is not None for ep in eps ]):
            raise RuntimeError("A test engine failed to start.")
        elif time.time()-tic > 15:
            raise RuntimeError("Timeout waiting for engines to connect.")
        time.sleep(.1)
        rc.spin()
    rc.close()
    return eps
Exemplo n.º 4
0
def run_jobs_on_ipythoncluster(worker, task_list, shutdown_ipengines_after_done=False):

    t0 = time.time()
    rc = Client(CLUSTER_CLIENT_JSON)
    lview = rc.load_balanced_view()
    print "\t# nodes in use: {}".format(len(lview.targets or rc.ids))
    lview.block = False

    print "\t# of tasks: {}".format(len(task_list))
    print "\tsubmitting...",
    job = lview.map_async(worker, task_list)
    print "done."
    try:
        job.wait_interactive()
    except KeyboardInterrupt:
        #handle "Ctrl-C"
        if ask("\nAbort all submitted jobs?") == 'Y':
            lview.abort()
            print "Aborted, all submitted jobs are cancelled."
        else:
            print "Aborted, but your jobs are still running on the cluster."
        return

    if len(job.result) != len(task_list):
        print "WARNING:\t# of results returned ({}) != # of tasks ({}).".format(len(job.result), len(task_list))
    print "\ttotal time: {}".format(timesofar(t0))

    if shutdown_ipengines_after_done:
        print "\tshuting down all ipengine nodes...",
        lview.shutdown()
        print 'Done.'
    return job.result
def analyze_log_file_in_phases(file_id, nstates, trials, iter):
    print_n_flush("Starting phase by phase analysis...")
    # id_to_log = lambda x: "logs/%s.exp.log" % x
    filename_log = id_to_log(file_id)
    responses, tests, responses_t, tests_t, images = toCSV(filename_log)
    from IPython.parallel import Client
    #     from functools import partial
    from rpy2.rinterface import initr

    rinterface.set_initoptions(("--max-ppsize=100000"))
    initr()
    client = Client(profile="default")
    # client[:].push(dict(initr=initr))
    # client[:].apply_sync(lambda: initr())
    lview = client.load_balanced_view()  # default load-balanced view
    lview.block = True
    # func = lambda args: train_hmm_n_times(file_id=args[0], nstates=args[1], trials=args[2], iter=args[3])
    # trials = 4
    client[:].push(dict(train_hmm_once=train_hmm_once))
    # args = [(file_id, nstates, trials, 1000) for nstates in range(5,10)]
    # results = lview.map(func, args)# hmm, d, results = train_hmm_n_times(file_id, nstates, trials=20, iter=1000)
    # pool.join()
    results = {}
    for i in range(3):
        results[i] = train_hmm_n_times(file_id, nstates=nstates, trials=trials, iter=iter, phase=i)
    return results
Exemplo n.º 6
0
    def __init__(self,mdl):
        """The constructor.
                mdl : model (of class Red3 or Red6)
        """
        IntGen.__init__(self,mdl)
        #find the engine processes
        rc = Client(profile='mpi')
        rc.clear()
        #Create a view of the processes
        self.view = rc[:]

        #number of clients
        nCl = len(rc.ids)

        if mdl.Y.ndim >2:
            #divisors of nCl
            div = [i for i in range(1,nCl+1) if nCl%i==0]
            ldiv = len(div)
            #the surface will be divided into nbx rows and nby columns
            if ldiv %2 == 0:
                self.nbx = div[ldiv/2]
                self.nby = div[ldiv/2-1]
            else:
                self.nbx = self.nby = div[ldiv/2]
        else:
            self.nbx = nCl
            self.nby = 0
Exemplo n.º 7
0
def map(r,func, args=None, modules=None):
	"""
	Before you run parallel.map, start your cluster (e.g. ipcluster start -n 4)
	
	map(r,func, args=None, modules=None):
	args=dict(arg0=arg0,...)
	modules='numpy, scipy'    
	
	examples:
	func= lambda x: numpy.random.rand()**2.
	z=parallel.map(r_[0:1000], func, modules='numpy, numpy.random')
	plot(z)
	
	A=ones((1000,1000));
	l=range(0,1000)
	func=lambda x : A[x,l]**2.
	z=parallel.map(r_[0:1000], func, dict(A=A, l=l))
	z=array(z)
	
	"""
	mec = Client()
	mec.clear()
	lview=mec.load_balanced_view()
	for k in mec.ids:
		mec[k].activate()
		if args is not None:
			mec[k].push(args)
		if modules is not None:
			mec[k].execute('import '+modules)
	z=lview.map(func, r)
	out=z.get()
	return out
Exemplo n.º 8
0
def remove_duplicates(df):
    logging.info('Removing duplicates.')

    image_names = df.image_name.unique()

    def process_image_name(image_name):
        data = df[df.image_name == image_name]
        data = remove_duplicates_from_image_name_data(data)
        data.to_hdf(get_temp_fname(image_name), 'df')

    # parallel approach, u need to launch an ipcluster/controller for this work!
    c = Client()
    dview = c.direct_view()
    dview.push({'remove_duplicates_from_image_name_data':
                remove_duplicates_from_image_name_data,
                'data_root': data_root})
    lbview = c.load_balanced_view()
    lbview.map_sync(process_image_name, image_names)

    df = []
    for image_name in image_names:
        try:
            df.append(pd.read_hdf(get_temp_fname(image_name), 'df'))
        except OSError:
            continue
        else:
            os.remove(get_temp_fname(image_name))
    df = pd.concat(df, ignore_index=True)
    logging.info('Duplicates removal complete.')
    return df
Exemplo n.º 9
0
def ipythonMP(m):
    from IPython.parallel import Client

    cli = Client()
    dview = cli[:]
    lbview = cli.load_balanced_view()
    return dview.map_sync(factorize,range(m))
def subsample(cache_dir, image_sets, ipython_profile):
    parameters = [(cache_dir, images) for images in image_sets]

    if ipython_profile:
        from IPython.parallel import Client, LoadBalancedView
        client = Client(profile='lsf')
        lview = client.load_balanced_view()
        generator = lview.imap(_compute_group_subsample, parameters)
    elif ipython_profile == False:
        generator = (_compute_group_subsample(p) for p in parameters)
    else:
        from multiprocessing import Pool
        lview = Pool()
        generator = lview.imap(_compute_group_subsample, parameters)
    progress = progressbar.ProgressBar(widgets=['Subsampling:',
                                                progressbar.Percentage(), ' ',
                                                progressbar.Bar(), ' ', 
                                                progressbar.Counter(), '/', 
                                                str(len(parameters)), ' ',
                                                progressbar.ETA()],
                                       maxval=len(parameters))
    results = list(generator)

    subsample = []
    for i, (p, r) in enumerate(zip(parameters, results)):
        if r is None:
            print >>sys.stderr, '#### There was an error, recomputing locally: %s' % parameters[i][1]
            results[i] = _compute_group_subsample(p) # just to see throw the exception
        subsample.extend(r)

    print "the subsampling set contains %d items" % len(subsample)
    return subsample
    def test_run_from_multiple_files_without_cache_on_ipy_cluster(self):
        try:
            from IPython.parallel import Client
            client = Client()

            pool = client.load_balanced_view()
        except:
            raise unittest.SkipTest("Cluster connection failed")

        models = [self.transport]
        p = Point(self.start_lon, self.start_lat)
        model = IPythonClusterModelController(geometry=p,
                                              depth=self.start_depth,
                                              start=self.start_time,
                                              step=self.time_step,
                                              nstep=self.num_steps,
                                              npart=self.num_particles,
                                              models=models,
                                              use_bathymetry=False,
                                              use_shoreline=False,
                                              pool=pool)

        model.setup_run("/data/lm/tests/pws_das_2014*.nc")
        model.run(output_formats=self.output_formats, output_path=self.output_path)

        self.assertTrue(os.path.exists(os.path.join(self.output_path, "simple_trackline.geojson")))
        self.draw_trackline(os.path.join(self.output_path, "simple_trackline.geojson"))
        # Not a caching controller, no cache path should exist
        self.assertFalse(os.path.exists(self.cache_path))
Exemplo n.º 12
0
    def __init__(self, config_filename=None, profile=None, seed=None, sshkey=None, packer='json'):
        """Initialize a IPClusterEngine

        Do IPython.parallel operations to set up cluster and generate mapper.

        """
        super(IPClusterEngine, self).__init__(seed=seed)
        rc = Client(config_filename, profile=profile, sshkey=sshkey, packer=packer)
        # FIXME: add a warning if environment in direct view is not 'empty'?
        #        else, might become dependent on an object created in
        #        environemnt in a prior run
        dview = rc.direct_view()
        lview = rc.load_balanced_view()
        with dview.sync_imports(local=True):
            import crosscat
        mapper = lambda f, tuples: self.lview.map(f, *tuples)
        # if you're trying to debug issues, consider clearning to start fresh
        # rc.clear(block=True)
        #
        self.rc = rc
        self.dview = dview
        self.lview = lview
        self.mapper = mapper
        self.do_initialize = None
        self.do_analyze = None
        return
Exemplo n.º 13
0
def _is_up(profile, cluster_id, n):
    try:
        #client = Client(profile=profile, cluster_id=cluster_id)
        client = Client(profile=profile)
        up = len(client.ids)
        client.close()
    except IOError, msg:
        return False
Exemplo n.º 14
0
def _test_wrapper_remote(func):
    """Execute a function on a remote ipengine"""
    from IPython.parallel import Client
    from qiita_core.configuration_manager import ConfigurationManager
    config = ConfigurationManager()
    c = Client(profile=config.ipython_default)
    bv = c.load_balanced_view()
    return _ipy_wait(bv.apply_async(func))
Exemplo n.º 15
0
def cluster_view(scheduler, queue, num_jobs, cores_per_job=1, profile=None,
                 start_wait=16, extra_params=None, retries=None):
    """Provide a view on an ipython cluster for processing.

      - scheduler: The type of cluster to start (lsf, sge, pbs, torque).
      - num_jobs: Number of jobs to start.
      - cores_per_job: The number of cores to use for each job.
      - start_wait: How long to wait for the cluster to startup, in minutes.
        Defaults to 16 minutes. Set to longer for slow starting clusters.
      - retries: Number of retries to allow for failed tasks.
    """
    if extra_params is None:
        extra_params = {}
    delay = 10
    max_delay = start_wait * 60
    # Increase default delay without changing max_delay for back compatibility
    delay = delay * 3
    max_tries = 10
    if profile is None:
        has_throwaway = True
        profile = create_throwaway_profile()
    else:
        # ensure we have an .ipython directory to prevent issues
        # creating it during parallel startup
        cmd = [sys.executable, "-c", "from IPython import start_ipython; start_ipython()",
               "profile", "create"]
        subprocess.check_call(cmd)
        has_throwaway = False
    num_tries = 0

    cluster_id = str(uuid.uuid4())
    url_file = get_url_file(profile, cluster_id)

    while 1:
        try:
            _start(scheduler, profile, queue, num_jobs, cores_per_job, cluster_id, extra_params)
            break
        except subprocess.CalledProcessError:
            if num_tries > max_tries:
                raise
            num_tries += 1
            time.sleep(delay)
    try:
        client = None
        slept = 0
        while not _is_up(url_file, num_jobs):
            time.sleep(delay)
            slept += delay
            if slept > max_delay:
                raise IOError("Cluster startup timed out.")
        client = Client(url_file, timeout=60)
        yield _get_balanced_blocked_view(client, retries)
    finally:
        if client:
            client.close()
        _stop(profile, cluster_id)
        if has_throwaway:
            delete_profile(profile)
Exemplo n.º 16
0
def calibrate_multiple():
    num_runs = 12
    rc = Client()
    lview = rc.load_balanced_view()
    lview.block = True
    print(datetime.datetime.now())
    res = lview.map(lambda q: calibrate_mh(), range(num_runs))
    print(datetime.datetime.now())
    1/0
Exemplo n.º 17
0
def _is_up(url_file, n):
    try:
        client = Client(url_file)
        up = len(client.ids)
        client.close()
    except IOError:
        return False
    else:
        return up >= n
Exemplo n.º 18
0
    def main_loop(self, time_budget=None, parallel=False, client_kwargs=None,
                  view_flags=None):
        """
        Run main_loop of each trainer.

        Note: if you get PickleErrors when running in parallel, make sure
        you have `dill` installed.

        Parameters
        ----------
        time_budget : int, optional
            The maximum number of seconds before interrupting
            training. Default is `None`, no time limit.
        parallel : bool, optional
            Whether to train subtrainers in parallel using
            IPython.parallel (default False).
        client_kwargs : dict, optional
            Keyword arguments for IPython.parallel Client.
        view_flags : dict, optional
            Flags for IPython.parallel LoadBalancedView.
        """
        self.setup()
        if parallel:
            from IPython.parallel import Client

            def _train(trainer, time_budget=None):
                """
                Run main_loop of this trainer.

                Parameters
                ----------
                trainer : Train object
                    Train object.
                time_budget : int, optional
                    The maximum number of seconds before interrupting
                    training. Default is `None`, no time limit.
                """
                trainer.main_loop(time_budget)
                return trainer

            if client_kwargs is None:
                client_kwargs = {}
            if view_flags is None:
                view_flags = {}
            client = Client(**client_kwargs)
            view = client.load_balanced_view()
            view.set_flags(**view_flags)
            call = view.map(_train,
                            self.trainers[self.skip_folds:],
                            [time_budget] * len(self.trainers[self.skip_folds:]),
                            block=False)
            self.trainers = call.get()
        else:
            for trainer in self.trainers[self.skip_folds:]:
                trainer.main_loop(time_budget)
        self.save()
Exemplo n.º 19
0
def main():
    parser = OptionParser()
    parser.add_option('-d','--dataset',dest='dataset',help='path to dataset')
    parser.add_option('--input_format',dest='input_format',help='format of training dataset(s) tsv | csv | mm (matrixmarket) | fsm (fast_sparse_matrix)')
    parser.add_option('--l1_min',dest='l1_min',type='float',help='min l1 constant to try (expected to be a power of 10)')
    parser.add_option('--l1_max',dest='l1_max',type='float',help='max l1 constant to try (expected to be a power of 10)')
    parser.add_option('--l2_min',dest='l2_min',type='float',help='min l2 constant to try (expected to be a power of 10)')
    parser.add_option('--l2_max',dest='l2_max',type='float',help='max l2 constant to try (expected to be a power of 10)')
    parser.add_option('--max_sims',dest='max_sims',type='int',default=2000,help='max desired number of positive item similarity weights (default: %default)')
    parser.add_option('--min_sims',dest='min_sims',type='int',default=15,help='min desired number of positive item similarity weights (default: %default)')
    parser.add_option('--max_sparse',dest='max_sparse',type='float',default=0.01,help='max allowable proportion of items with less than min_sims positive similarity weights (default: %default)')
    parser.add_option('--num_samples',dest='num_samples',type='int',default=100,help='number of sample items to evaluate for each regularization setting')
    parser.add_option('--packer',dest='packer',default='json',help='packer for IPython.parallel (default: %default)')
    parser.add_option('--add_module_paths',dest='add_module_paths',help='comma-separated list of paths to append to pythonpath to enable import of uninstalled modules')

    (opts,args) = parser.parse_args()
    if not opts.dataset or not opts.input_format or not opts.l1_min or not opts.l1_max or not opts.l2_min or not opts.l2_max:
        parser.print_help()
        raise SystemExit

    logging.basicConfig(level=logging.INFO,format='[%(asctime)s] %(levelname)s: %(message)s')

    dataset = load_fast_sparse_matrix(opts.input_format,opts.dataset)

    params = {'l1_reg':pow_range(opts.l1_min,opts.l1_max),
              'l2_reg':pow_range(opts.l2_min,opts.l2_max)}
    num_items = dataset.shape[1]
    sample_items = random.sample(xrange(num_items),opts.num_samples)

    logging.info('preparing tasks for a grid search of these values:')
    logging.info(params)
    tasks = [(args,dataset,opts.min_sims,sample_items) for args in ParameterGrid(params)]

    c = Client(packer=opts.packer)
    view = c.load_balanced_view()

    if opts.add_module_paths:
        c[:].execute('import sys')
        for path in opts.add_module_paths.split(','):
            logging.info('adding {0} to pythonpath on all engines'.format(path))
            c[:].execute("sys.path.append('{0}')".format(path))

    logging.info('running {0} tasks in parallel...'.format(len(tasks)))
    results = view.map(estimate_sparsity,tasks,ordered=False)

    candidates = [(args,nsims,nsparse,nneg) for args,nsims,nsparse,nneg in results if nsims <= opts.max_sims and nsparse <= opts.max_sparse]

    if candidates:
        best = min(candidates,key=itemgetter(1))

        print 'best parameter setting: {0}'.format(best[0])
        print 'mean # positive similarity weights per item = {0:.3}'.format(best[1])
        print 'proportion of items with fewer than {0} positive similarity weights = {1:.3}'.format(opts.min_sims,best[2])
        print 'mean # negative similarity weights per item = {0:.3}'.format(best[3])
    else:
        print 'no parameter settings satisfied the conditions, try increasing --min_sims, --max_sims or --max_sparse'
Exemplo n.º 20
0
    def test_run_from_multiple_files_without_cache_on_ipy_cluster(self):
        try:
            from IPython.parallel import Client
            client = Client()

            pool = client.load_balanced_view()
        except:
            raise unittest.SkipTest("Cluster connection failed")

        self.test_run_from_multiple_files_without_cache(pool=pool)
Exemplo n.º 21
0
 def connect_client(self):
     """connect a client with my Context, and track its sockets for cleanup"""
     c = Client(profile='iptest', context=self.context)
     c.wait = lambda *a, **kw: self.client_wait(c, *a, **kw)
     
     for name in filter(lambda n:n.endswith('socket'), dir(c)):
         s = getattr(c, name)
         s.setsockopt(zmq.LINGER, 0)
         self.sockets.append(s)
     return c
Exemplo n.º 22
0
def init(profile="mpi"):
    """Initialize pyDive.

    :param str profile: The name of the cluster profile of *IPython.parallel*. Has to be an MPI-profile.\
        Defaults to 'mpi'.
    """
    # init direct view
    global view

    client = Client(profile=profile)
    client.clear()
    view = client[:]
    view.block = True
    view.execute(
        """\
        import numpy as np
        from mpi4py import MPI
        import h5py as h5
        import os, sys
        import psutil
        import math
        os.environ["onTarget"] = 'True'
        from pyDive import structured
        from pyDive import algorithm
        from pyDive.distribution import interengine
        try:
            import pyDive.arrays.local.h5_ndarray
        except ImportError:
            pass
        try:
            import pyDive.arrays.local.ad_ndarray
        except ImportError:
            pass
        try:
            import pyDive.arrays.local.gpu_ndarray
            import pycuda.autoinit
        except ImportError:
            pass
         """
    )

    # get number of processes per node (ppn)
    def hostname():
        import socket

        return socket.gethostname()

    hostnames = view.apply(interactive(hostname))
    global ppn
    ppn = max(Counter(hostnames).values())

    # mpi ranks
    get_rank = interactive(lambda: MPI.COMM_WORLD.Get_rank())
    all_ranks = view.apply(get_rank)
    view["target2rank"] = all_ranks
Exemplo n.º 23
0
def _is_up(url_file, n):
    try:
        client = Client(url_file, timeout=60)
        up = len(client.ids)
        client.close()
    except iperror.TimeoutError:
        return False
    except IOError:
        return False
    else:
        return up >= n
Exemplo n.º 24
0
def main():
    partial_results=[]
    c=Client(profile='default')
    print c.ids
    view=c.load_balanced_view()
    ar = view.map_async(func, range(10))
    #print ar.get_dict(timeout=0)
    print ar.msg_ids
    for i, r in enumerate(ar):
        print r[1]
    print ar.get()
Exemplo n.º 25
0
def importanceSampling_parallel4(generateSample,args_generateSample,likelihoodRatio,args_likelihoodRatio, N):  
    # not working
    temp = 3
    from IPython.parallel import Client
    p = Client()[:]
    p.use_dill()
    print('This is the parallel output')
    p.map_sync(generateSample,np.arange(6))
    #sample = p.map_sync(generateSample,np.array((10,)))
    #print(sample)
    return(1)
Exemplo n.º 26
0
def run_parallel_jobs(jobs, job_fn, ipython_profile=None):
    # IPython will error out if jobs is empty.
    if jobs:
        if ipython_profile is None:
            c = Client()
        else:
            c = Client(profile=ipython_profile)

        lview = c.load_balanced_view()
        lview.block = True
        lview.map(job_fn, jobs)
Exemplo n.º 27
0
def run_nb(beliefs, meta, params, num_samples):
    if params['do_parallel']:
        rc = Client()
        lview = rc.load_balanced_view()
        lview.block = True
    else:
        lview = None
    print(datetime.datetime.now().time())
    results = aggregation.importance_multiple(beliefs, meta, params,
                                              num_samples, lview)
    print(datetime.datetime.now().time())
    return results
Exemplo n.º 28
0
  def featurize(self, mols, parallel=False, client_kwargs=None,
                view_flags=None, verbosity=None, log_every_n=1000):
    """
    Calculate features for molecules.

    Parameters
    ----------
    mols : iterable
        RDKit Mol objects.
    parallel : bool, optional
        Whether to train subtrainers in parallel using
        IPython.parallel (default False).
    client_kwargs : dict, optional
        Keyword arguments for IPython.parallel Client.
    view_flags : dict, optional
        Flags for IPython.parallel LoadBalancedView.
    """
    if self.conformers and isinstance(mols, types.GeneratorType):
      mols = list(mols)
    assert verbosity in [None, "low", "high"]

    if parallel:
      from IPython.parallel import Client

      if client_kwargs is None:
          client_kwargs = {}
      if view_flags is None:
          view_flags = {}
      client = Client(**client_kwargs)
      client.direct_view().use_dill()  # use dill
      view = client.load_balanced_view()
      view.set_flags(**view_flags)
      call = view.map(self._featurize, mols, block=False)
      features = call.get()

      # get output from engines
      call.display_outputs()

    else:
      features = []
      for i, mol in enumerate(mols):
        if verbosity is not None and i % log_every_n == 0:
          log("Featurizing %d / %d" % (i, len(mols)))
        if mol is not None:
          features.append(self._featurize(mol))
        else:
          features.append(np.array([]))

    if self.conformers:
      features = self.conformer_container(mols, features)
    else:
      features = np.asarray(features)
    return features
Exemplo n.º 29
0
def calibrate():
    num_runs = 12
    rc = Client()
    lview = rc.load_balanced_view()
    lview.block = True
    print(datetime.datetime.now())
    res = lview.map(lambda q: all_together_q(), range(num_runs))
    print(datetime.datetime.now())
    act = np.array([r[0] for r in res])
    mh =  np.array([r[1] for r in res])
    nb =  np.array([r[2] for r in res])
    gp =  np.array([r[3] for r in res])
    1/0
Exemplo n.º 30
0
def cluster_view(scheduler, queue, num_jobs, cores_per_job=1, profile=None,
                 start_wait=16, extra_params=None, retries=None):
    """Provide a view on an ipython cluster for processing.

      - scheduler: The type of cluster to start (lsf, sge, pbs, torque).
      - num_jobs: Number of jobs to start.
      - cores_per_job: The number of cores to use for each job.
      - start_wait: How long to wait for the cluster to startup, in minutes.
        Defaults to 16 minutes. Set to longer for slow starting clusters.
      - retries: Number of retries to allow for failed tasks.
    """
    if extra_params is None:
        extra_params = {}
    delay = 10
    max_delay = start_wait * 60
    max_tries = 10
    if profile is None:
        has_throwaway = True
        profile = create_throwaway_profile()
    else:
        has_throwaway = False
    num_tries = 0

    cluster_id = str(uuid.uuid4())
    url_file = get_url_file(profile, cluster_id)
    #cluster_id = ""
    while 1:
        try:
            _start(scheduler, profile, queue, num_jobs, cores_per_job, cluster_id, extra_params)
            break
        except subprocess.CalledProcessError:
            if num_tries > max_tries:
                raise
            num_tries += 1
            time.sleep(delay)
    try:
        client = None
        slept = 0
        while not _is_up(url_file, num_jobs):
            time.sleep(delay)
            slept += delay
            if slept > max_delay:
                raise IOError("Cluster startup timed out.")
        client = Client(url_file)
        yield _get_balanced_blocked_view(client, retries)
    finally:
        if client:
            client.close()
        _stop(profile, cluster_id)
        if has_throwaway:
            delete_profile(profile)
Exemplo n.º 31
0
def main():
    N = 500
    base = ['a', 'c', 'g', 't']
    seqPairs = []

    np.random.seed(1234)

    for i in xrange(8):
        seqA = ''.join([base[np.random.randint(0, 4)] for i in xrange(N)])
        seqB = ''.join([base[np.random.randint(0, 4)] for i in xrange(N)])
        seqPairs.append([seqA, seqB])

    client = Client()

    # Note: for ipclustergenomic to be loaded on the cluster side, engines
    # must have been started in the same directory as ipclustergenomic.py
    with client[:].sync_imports():
        from ipclustergenomic import editDistance

    printRes(client[:].map_sync(edProxy, seqPairs))
Exemplo n.º 32
0
class ClusterPool(object):
    def __init__(self, *args, **kwargs):
        self.client = Client(*args, **kwargs)
        self.lbview = self.client.load_balanced_view()
        self.chunksize = 1

    def map_with_shared_data(self, func, shared_data, args, chunksize=None):
        """Map a function over each in a set of arguments, also passing a constant shared variable to each invocation."""
        # no imap with shared data, since we couldn't guarantee the integrity of FUNC and SHARED_DATA
        self.dview.push(dict(FUNC=func, SHARED_DATA=shared_data), block=True)
        return self.lbview.map_sync(func_int,
                                    args,
                                    chunksize=chunksize or self.chunksize,
                                    ordered=True)

    def map(self, func, args, chunksize=None):
        map = self.lbview.map
        return iter(
            map(func,
                args,
                chunksize=chunksize or self.chunksize,
                block=True,
                ordered=True))

    def imap(self, func, args, chunksize=None):
        map = self.lbview.map
        return iter(
            map(func,
                args,
                chunksize=chunksize or self.chunksize,
                block=False,
                ordered=True))

    def imap_unordered(self, func, args, chunksize=None):
        map = self.lbview.map
        return iter(
            map(func,
                args,
                chunksize=chunksize or self.chunksize,
                block=False,
                ordered=False))
class MulticoreJob(object):
    def __init__(self):
        self.tasks = {}
        self.client = Client()
        self.lb_view = self.client.load_balanced_view()

    def apply(self, f, named_tasks):
        """named_tasks: dict of {nametask: taskparams}
        """
        self.tasks = {
            tname: self.lb_view.apply(f, **param)
            for (tname, param) in named_tasks.items()
        }
        return self

    def isready(self):
        return all([t.ready() for t in self.tasks.values()])

    def progress(self):
        return np.mean([t.ready() for t in self.tasks.values()])

    def partial_result(self):
        return {
            tname: tresult.get()
            for (tname, tresult) in self.tasks.items() if tresult.ready()
        }

    def wait(self):
        for (tname, tresult) in self.tasks.items():
            tresult.wait()
        return self

    def abort(self):
        for (tname, tresult) in self.tasks.items():
            if not tresult.ready():
                try:
                    tresult.abort()
                except:
                    pass
        return self
Exemplo n.º 34
0
def run_parallel_map():
    """ Run a test with synthetic data and MCMC inference
    """
    # Parse command line args
    (options, args) = parse_cmd_line_args()

    # Load the data
    data = load_data(options)
    # Get a model for the data
    model_type = 'standard_glm'
    model = make_model(model_type, N=data['N'])

    # Get parallel clients
    rc = Client(profile="sge")
    dview = rc[:]
    # dview = get_engines(n_workers=8)

    # Load imports on the client
    load_imports_on_client(dview)

    # Initialize population objects on the clients
    dview.apply_sync(initialize_client, (model_type,N,data))
Exemplo n.º 35
0
 def create_from_options(cls, parser, options):
     noptions = ((options.ipython_profile and 1 or 0) +
                 (options.lsf_directory and 1 or 0) +
                 (options.multiprocessing and 1 or 0))
     if noptions > 1:
         parser.error(
             'You can only specify one of --ipython-profile, --lsf-directory, and --multiprocessing.'
         )
     if options.lsf_directory:
         from . import lsf
         return lsf.LSF(options.njobs,
                        options.lsf_directory,
                        memory=options.memory,
                        job_array_name=options.jobname)
     elif options.ipython_profile:
         from IPython.parallel import Client, LoadBalancedView
         client = Client(profile=options.ipython_profile)
         return IPython(client)
     elif options.multiprocessing:
         return Multiprocessing()
     else:
         return Uniprocessing()
Exemplo n.º 36
0
def main():   
    parser = optparse.OptionParser(
        usage="""Usage: %prog [options] infolder outfolder
        
        infolder: input folder containing .jpg files
        outfolder: output folder to contain collection
        """)

    # implementing this means dependence on IPython
    parser.add_option("-m", "--mec", dest="mec", default=False,
                  help="Use IPython parallel, say 'default'")
    
    parser.add_option("-n", "--name", dest="name", default='dzc', 
                  help="collection name, default 'dzc'")
    (options, args) = parser.parse_args()

    if len(args)!=2:
        parser.print_help()
        sys.exit(1)
    infolder,outfolder = map(
        lambda d:os.path.abspath(os.path.expanduser(os.path.expandvars(d))),args
        )

    for folder in (infolder, outfolder):
        if not os.path.exists(folder):
            print "Folder %s not found" % folder
            sys.exit(1)
    mec = None 
    if options.mec:
        from IPython.parallel import Client 
        mec = Client(profile=mec)
        assert len(mec)>0, 'No engines found'
        
    print infolder, '-->', outfolder
    mc = MakeCollection(infolder, outfolder, collection_name=options.name)
    mc.convert(mec)
    mc.collect()
Exemplo n.º 37
0
Arquivo: utils.py Projeto: spolakh/rep
def map_on_cluster(parallel_profile, *args, **kw_args):
    """
    The same as map, but the first argument is ipc_profile. Distributes the task over IPython cluster.

    :param parallel_profile: the IPython cluster profile to use.
    :type parallel_profile: None or str
    :param list args: function, arguments
    :param dict kw_args: kwargs for LoadBalacedView.map_sync

    :return: the result of mapping
    """
    if parallel_profile is None:
        return map(*args)
    elif str.startswith(parallel_profile, 'threads-'):
        n_threads = int(parallel_profile[len('threads-'):])
        pool = ThreadPool(processes=n_threads)
        func, params = args[0], args[1:]
        return pool.map(_threads_wrapper, zip(itertools.cycle([func]),
                                              *params))
    else:
        from IPython.parallel import Client

        return Client(profile=parallel_profile).load_balanced_view().map_sync(
            *args, **kw_args)
Exemplo n.º 38
0
def test_dask_client_from_ipclient():
    c = Client()
    dask_client = dask_client_from_ipclient(c)

    # data
    a = np.arange(100).reshape(10, 10)
    d = da.from_array(a, ((5, 5), (5, 5)))

    try:
        # test array.mean
        expected = a.mean(axis=0)
        d1 = d.mean(axis=0)
        result = d1.compute(get=dask_client.get)
        assert_array_almost_equal(result, expected)

        # test ghosting
        d2 = da.ghost.ghost(d, depth=1, boundary='reflect')
        d3 = da.ghost.trim_internal(d2, {0: 1, 1: 1})
        result1 = d3.compute(get=dask_client.get)
        assert_array_almost_equal(result1, a)

    finally:
        # close the workers
        dask_client.close(close_scheduler=True)
Exemplo n.º 39
0
def parallel_directview_generic(PROFILE_NAME, module_names, FILE_REPOSITORY,
                                LOG_LEVEL, FILES_TO_PROCESS, input_dir,
                                output_dir, reports_dir, COMMENT,
                                MAKE_KML_FILES):
    '''sets up worker namespaces for ipython parallel runs'''
    print "Run 'ipcluster start -n 10' from the command line first!"
    #from IPython import parallel
    from IPython.parallel import Client
    c = Client()  #c=Client(debug=True)
    print c.ids
    #engine_count = len(c.ids)
    dview = c[:]  #DirectView list of engines
    dview.clear()  #clean up the namespaces on the eng
    dview.block = True

    #build parallel namespace
    dview['module_names'] = module_names
    dview['PROFILE_NAME'] = PROFILE_NAME
    dview['COMMENT'] = COMMENT
    dview['LOG_LEVEL'] = LOG_LEVEL
    dview.scatter('files_to_process', FILES_TO_PROCESS)
    dview['file_repository'] = FILE_REPOSITORY
    dview['MAKE_KML_FILES'] = MAKE_KML_FILES

    reports_dir = settings.PROFILE_REPORTS_PATH
    output_dir = settings.PROFILE_DATA_PATH + PROFILE_NAME + '/'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    dview['input_dir '] = input_dir
    dview['output_dir '] = output_dir
    dview['reports_dir '] = reports_dir

    #logger = logging.Logger('ipcluster')
    #logger.setLevel(LOG_LEVEL)
    #dview['logger'] = logger
    return dview
Exemplo n.º 40
0
def fit(df, alpha=0.1, eta0=0.01, power_t=0.25, epochs=3,
        latent_dimensions=10):

    rc = Client()
    dview = rc[:]
    k = float(len(rc))

    with dview.sync_imports():
        import random 

        from numpy.linalg import norm
        from numpy import dot, isnan
        from numpy.random import normal
        from pandas import DataFrame

    dview.scatter('df', df)
    res = dview.apply_sync(sgd, alpha=0.1, eta0=0.01, power_t=0.25, epochs=3,
            latent_dimensions=latent_dimensions)

    add = lambda a,b: a.add(b, fill_value=0)
    U = reduce(add, (r[0] for r in res))/k
    V = reduce(add, (r[1] for r in res))/k

    return U, V
Exemplo n.º 41
0
    def create_runners(self):
        # Check the status of runners and the cluster process
        # If cluster process dead (non-None return to p.poll)
        # kill and re-start (add config for this?) and then
        # create an in-process kernel and pop it on the queue
        # to keep things ticking

        if self.p is None:
            # Use the in-process runner for now
            # self.runners = [self.in_process_runner]
            self.start_cluster()

        elif self.p.poll() is None:
            # Create matching runners for the client
            # note that these may already exist; we need to check
            if self.client is None:
                self.client = Client(timeout=5)

                # FIXME: Inline plots are fine as long as we don't do it on the cluster+the interactive kernel; this results
                # in an image cache being generated that breaks the pickle

            for e in self.client:
                found = False
                for r in self.runners:
                    if e.targets == r.e.targets:
                        found = True

                if not found:
                    runner = ClusterRunner(e)
                    runner.e.execute('%reset -f')
                    runner.e.execute('%matplotlib inline')
                    self.runners.append(runner)

        else:
            # We've got a -value for poll; it's terminated this will trigger restart on next poll
            self.stop_cluster()
Exemplo n.º 42
0
    def featurize(self,
                  mols,
                  parallel=False,
                  client_kwargs=None,
                  view_flags=None):
        """
        Calculate features for molecules.

        Parameters
        ----------
        mols : iterable
            RDKit Mol objects.
        parallel : bool, optional (default False)
            Train subtrainers in parallel using IPython.parallel.
        client_kwargs : dict, optional
            Keyword arguments for IPython.parallel Client.
        view_flags : dict, optional
            Flags for IPython.parallel LoadBalancedView.
        """
        if parallel:
            from IPython.parallel import Client

            if client_kwargs is None:
                client_kwargs = {}
            if view_flags is None:
                view_flags = {}
            client = Client(**client_kwargs)
            client.direct_view().use_dill()  # use dill
            view = client.load_balanced_view()
            view.set_flags(**view_flags)
            call = view.map(self._featurize,
                            np.array_split(mols, len(client.direct_view())),
                            block=False)
            features = call.get()
            features = np.concatenate(features)

            # get output from engines
            call.display_outputs()

        else:
            features = self._featurize(mols)

        return np.asarray(features)
Exemplo n.º 43
0
def add_engines(n=1, profile='iptest'):
    rc = Client(profile=profile)
    base = len(rc)
    eps = []
    for i in range(n):
        ep = TestProcessLauncher()
        ep.cmd_and_args = ipengine_cmd_argv + ['--profile=%s'%profile, '--log-level=50']
        ep.start()
        launchers.append(ep)
        eps.append(ep)
    tic = time.time()
    while len(rc) < base+n:
        if any([ ep.poll() is not None for ep in eps ]):
            raise RuntimeError("A test engine failed to start.")
        elif time.time()-tic > 10:
            raise RuntimeError("Timeout waiting for engines to connect.")
        time.sleep(.1)
        rc.spin()
    rc.close()
    return eps
Exemplo n.º 44
0
def add_engines(n=1, profile='iptest'):
    rc = Client(profile=profile)
    base = len(rc)
    eps = []
    for i in range(n):
        ep = Popen(
            ['ipengine'] +
            ['--profile', profile, '--log-level', '10', '--log-to-file'],
            stdout=blackhole,
            stderr=STDOUT)
        # ep.start()
        processes.append(ep)
        eps.append(ep)
    tic = time.time()
    while len(rc) < base + n:
        if any([ep.poll() is not None for ep in eps]):
            raise RuntimeError("A test engine failed to start.")
        elif time.time() - tic > 10:
            raise RuntimeError("Timeout waiting for engines to connect.")
        time.sleep(.1)
        rc.spin()
    rc.close()
    return eps
Exemplo n.º 45
0
def starmap(func, iterable, **kwargs):
    """
    A dynamic load balancing parallel implementation of itertools.starmap for IPython.parallel.
    
    The reason for it's existence was twofold.
    First, the desire to easily submit a 'map' onto inputs
      already grouped in tuples in IPython.parallel.
    Second was the ability to submit a 'map' onto very large
      sequences.  Potentially infinite sequences.
    This function allows one to do that.  It is a generator function, so it is iterable.
    It maintains an internal list of returned results that are removed once yielded.
    The iterable passed as an argument need only have a next() method and raise StopIteration
      when it is finished iterating.

    Arguments
    ---------
    *func*   -   The function to be called (remotely) on each iterable.next()
    *iterable* - An iterable, generator, generator function...etc.  Something with a .next() that
                 will raise StopIteration when finished
    *profile*  -  (optional keyword argument.  Default = None) The ipython parallel cluster profile.
                  This function expects the cluster to already be 'up'.  Under the default of None,
                  this will start a client and load balanced view under the default profile, if
                  possible.  If the profile specified is not running, an IO error will be raised.
                  (Ignored if client keyword argument is specified)
    *client*   -  (optional keyword argument.  Default = None) An instance of
                  IPython.parallel.Client
    *max_fill*  - (optional keyword argument.  Default = 500000)The maximum number of
                  'jobs' to submit to the cluster before waiting for earlier jobs to finish.
    *wait*      - (optional keyword argument.  Default = 1)  Number of seconds to wait when
                  submission queue is full, and no further output may be yielded.
    *kwargs*    - Additional keyword arguments are treated as keyword arguments to func.


    A note on the profile and client keyword arguments:  If client is specified, the profile
    kwarg will be ignored.
    
    """
    profile = kwargs.pop('profile', None)
    rc = kwargs.pop('client', None)
    max_fill = kwargs.pop('max_fill', 50000)
    wait = kwargs.pop('wait', 1)
    if rc is None:
        rc = Client(profile=profile)
    elif not isinstance(rc, Client):
        raise ValueError(
            'client keyword value expected an instance of IPython.parallel.Client'
        )
    lbv = rc.load_balanced_view()

    async_results_list = []  #This will serve as our output queue

    while True:  #GO until StopIteration is raised

        if n_queued_jobs(lbv) < max_fill:
            #If there are less than the maximum number of jobs waiting to run,
            #submit the next job, unless we cannot.
            try:
                async_results_list.append(
                    lbv.apply(func, *iterable.next(), **kwargs))

            except StopIteration:
                if len(async_results_list) == 0:
                    raise

        while len(async_results_list) > 0 and async_results_list[0].ready():
            #If there are results ready to be read, pop them off
            yield async_results_list.pop(0).get()

        if n_queued_jobs(lbv) >= max_fill:
            time.sleep(wait)
Exemplo n.º 46
0
import os
import pickle
from datetime import datetime
from ConfigParser import ConfigParser
from IPython.parallel import Client

config = ConfigParser()
config.read('{0}/config.ini'.format(os.path.dirname(
    os.path.realpath(__file__))))

rc = Client(packer="pickle")
dview = rc[:]
print rc.ids


@dview.remote(block=True)
def fetch():
    import os
    os.chdir(node)
    for i, f in enumerate(files):
        fname = f.split("/")[-1].split(".")[0]
        if not os.path.exists("{0}.xml".format(fname)):
            os.system("wget {0}".format(f))
            os.system("unzip {0}.zip".format(fname))


fname = open("urls.pickle", "rb")
urls = pickle.load(fname)

master = config.get('directory', 'home')
node = config.get('directory', 'local')
Exemplo n.º 47
0
                                     Reference('it' + name))
        # This causes the StopIteration exception to be raised.
        except RemoteError, e:
            if e.ename == 'StopIteration':
                raise StopIteration
            else:
                raise e
        else:
            yield result


# Main, interactive testing
if __name__ == '__main__':

    from IPython.parallel import Client, Reference
    rc = Client()
    view = rc[:]
    print 'Engine IDs:', rc.ids

    # Make a set of 'sorted datasets'
    a0 = range(5, 20)
    a1 = range(10)
    a2 = range(15, 25)

    # Now, imagine these had been created in the remote engines by some long
    # computation.  In this simple example, we just send them over into the
    # remote engines.  They will all be called 'a' in each engine.
    rc[0]['a'] = a0
    rc[1]['a'] = a1
    rc[2]['a'] = a2
Exemplo n.º 48
0
# encoding: utf-8
# ---------------------------------------------------------------------------
#  Copyright (C) 2008-2014, IPython Development Team and Enthought, Inc.
#  Distributed under the terms of the BSD License.  See COPYING.rst.
# ---------------------------------------------------------------------------
"""
Calculate pi using a Monte Carlo method using IPython Parallel.
"""

from IPython.parallel import Client, interactive

from util import timer

client = Client()
view = client[:]
view.execute('import numpy')


@interactive  # this runs on the engins
def calc_pi_on_engines(n):
    x = numpy.random.rand(n)
    y = numpy.random.rand(n)
    r = numpy.hypot(x, y)
    return 4. * (r < 1.).sum() / n


@timer
def calc_pi(n):
    """Estimate pi using IPython.parallel."""
    n_engines = n / len(view)
    results = view.apply_sync(calc_pi_on_engines, n_engines)
Exemplo n.º 49
0
    def fit(self, X, y, sample_weight=None):
        """
        Run fit with all sets of parameters.

        :param X: array-like, shape = [n_samples, n_features]
            Training vector, where n_samples is the number of samples and n_features is the number of features.

        :param y: array-like, shape = [n_samples] or [n_samples, n_output], optional
        :param sample_weight: array-like, shape = [n_samples], weight
        """
        X, y, sample_weight = check_inputs(X,
                                           y,
                                           sample_weight=sample_weight,
                                           allow_none_weights=True)

        if self.parallel_profile is None:
            while self.evaluations_done < self.params_generator.n_evaluations:
                state_indices, state_dict = self.params_generator.generate_next_point(
                )
                status, value = apply_scorer(self.scorer, state_dict,
                                             self.base_estimator, X, y,
                                             sample_weight)
                assert status == 'success', 'Error during grid search ' + str(
                    value)
                self.params_generator.add_result(state_indices, value)
                self.evaluations_done += 1
                state_string = ", ".join(
                    [k + '=' + str(v) for k, v in state_dict.items()])
                self._log('{}: {}'.format(value, state_string))
        else:
            if str.startswith(self.parallel_profile, 'threads'):
                _, n_threads = str.split(self.parallel_profile, '-')
                portion = int(n_threads)
                print("Performing grid search in {} threads".format(portion))
            else:
                from IPython.parallel import Client

                direct_view = Client(
                    profile=self.parallel_profile).direct_view()
                portion = len(direct_view)
                print(
                    "There are {0} cores in cluster, the portion is equal {1}".
                    format(len(direct_view), portion))

            while self.evaluations_done < self.params_generator.n_evaluations:
                state_indices_array, state_dict_array = self.params_generator.generate_batch_points(
                    size=portion)
                current_portion = len(state_indices_array)
                result = map_on_cluster(
                    self.parallel_profile, apply_scorer,
                    [self.scorer] * current_portion, state_dict_array,
                    [self.base_estimator] * current_portion,
                    [X] * current_portion, [y] * current_portion,
                    [sample_weight] * current_portion)
                assert len(
                    result
                ) == current_portion, "The length of result is very strange"
                for state_indices, state_dict, (status, score) in zip(
                        state_indices_array, state_dict_array, result):
                    params = ", ".join(
                        [k + '=' + str(v) for k, v in state_dict.items()])
                    if status != 'success':
                        message = 'Fail during training on the node \nException {exc}\n Parameters {params}'
                        self._log(message.format(exc=score, params=params),
                                  level=40)
                    else:
                        self.params_generator.add_result(state_indices, score)
                        self._log("{}: {}".format(score, params))
                self.evaluations_done += current_portion
                print("%i evaluations done" % self.evaluations_done)
        return self
Exemplo n.º 50
0
def _ipython_map(func, iterable, cfg):

    import IPython

    if IPython.version_info[0] < 4:
        from IPython.parallel import Client
    else:
        from ipyparallel import Client

    rc = Client()
    rc[:].clear()


    ### Make modules for all dependencies on the engines
    for dep in cfg['dependencies']:
        mod_name = os.path.splitext(
            os.path.basename(dep)
        )[0]


        with open(dep) as f:
            code = f.read()

        code = code.encode('string_escape')

        rc[:].execute(
"""
import imp
import sys

_mod = imp.new_module('{mod_name}')
sys.modules['{mod_name}'] = _mod

exec '''{code}''' in _mod.__dict__

del _mod
""".format(code=code, mod_name=mod_name),
            block=True
        )




    ### Make sure all definitions surrounding the func are present on
    ### the engines (evaluate the code from the file of the func)
    fname = inspect.getfile(func)
    with open(fname) as f:
        code = f.read()


    logger.info("IPython engine IDs: {}".format(rc.ids))


    ## Need to escape all ' and " in order to embed the code into
    ## execute string
    # code = code.replace("\'", "\\\'")
    # code = code.replace("\"", "\\\"")

    code = code.encode('string_escape')


    ## The trick with `exec in {}' is done because we want to avoid
    ## executing `__main__'
    rc[:].execute(
"""
_tmp_dict = dict()
exec '''{code}''' in _tmp_dict
globals().update(_tmp_dict)
del _tmp_dict
""".format(code=code),
        block=True
    )
    # status.wait()

    # res = rc[:].apply(dir)
    # print(res.get())

    wrap = _FuncWrap(func)
    pool = rc.load_balanced_view()

    results = []
    for args in iterable:
        results.append( pool.apply_async(wrap, args) )


    for result in results:
        yield result.get()
Exemplo n.º 51
0
def load_client():
    global client, view
    client = Client()
    view = client.load_balanced_view()
    client.block = False
    client[:].use_dill()
def runScan(options):
    import os, sys, pickle
    import numpy as np

    scan_settings = pickle.load(
        open(
            os.path.join(user.scans_dir, options.NAME,
                         'OscFit_ScanSettings.pckl')))
    print '\nLLH scan: Running with the following fit settings'
    for one_key in scan_settings['fit_settings']:
        print '\t', one_key, '\t', scan_settings['fit_settings'][one_key]

    total_jobs = scan_settings['dm31_map'].size
    print '\nLLH scan: Total jobs ', total_jobs
    job_script = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                              'oscFit_oneJob_mctest.py')

    if len(options.JOBS) == 0:
        job_range = range(0, total_jobs)
    else:
        job_range = np.array(options.JOBS, dtype=int) - 1

    if options.TEST:
        print 'LLH scan: Running first point as a test!'
        print '\tYou will be asked if you wish to continue before moving on'
        os.system(' '.join(['python', job_script, '1', '1', options.NAME]))

        print 'LLH scan: Continue execution? ... '
        raw_input()

    if options.MODE == 'uge_farm':
        #print job_script
        farm_multiplicity = 1

        if len(options.JOBS) == 0:
            total_jobs = np.ceil(total_jobs * 1. / farm_multiplicity)
            job_array = '1-' + "%i" % total_jobs
            qsub_line = '  '.join([
                'qsub -t', job_array, './oscFit_farmScript.sh', job_script,
                "%i" % farm_multiplicity, options.NAME
            ])
            print qsub_line
            os.system(qsub_line)
        else:
            for job_array in options.JOBS:
                qsub_line = '  '.join([
                    'qsub -t', job_array, './oscFit_farmScript.sh', job_script,
                    "1", options.NAME
                ])
                print qsub_line
                os.system(qsub_line)

    elif options.MODE == 'iparallel':
        from IPython.parallel import Client, interactive
        import iparallel
        rc = Client(profile='sge')
        lview = rc.load_balanced_view()
        result = lview.map_async(doOscFit, [options.NAME] * len(job_range),
                                 job_range)
        iparallel.waitOn(result)

    elif options.MODE == 'local':
        import os
        for i in job_range:
            os.system('  '.join(
                ['python', job_script,
                 "%i" % i, '1', options.NAME]))
    print 'Finished!'
Exemplo n.º 53
0
    grid = ns.grid
    partition = ns.partition
    Lx = ns.Lx
    Ly = ns.Ly
    c = ns.c
    tstop = ns.tstop
    if ns.save:
        user_action = wave_saver
    else:
        user_action = None

    num_cells = 1.0*(grid[0]-1)*(grid[1]-1)
    final_test = True

    # create the Client
    rc = Client(profile=ns.profile)
    num_procs = len(rc.ids)

    if partition is None:
        partition = [1,num_procs]

    assert partition[0]*partition[1] == num_procs, "can't map partition %s to %i engines"%(partition, num_procs)

    view = rc[:]
    print("Running %s system on %s processes until %f" % (grid, partition, tstop))

    # functions defining initial/boundary/source conditions
    def I(x,y):
        from numpy import exp
        return 1.5*exp(-100*((x-0.5)**2+(y-0.5)**2))
    def f(x,y,t):
        del (grid)
    except:
        pass
    return 0


md = '/lcrc/group/earthscience/radar/nexrad/chicago_floods/'
idir = md
filelist = os.listdir(md)
good_files = []
for fl in filelist:
    if 'KLOT' in fl:
        good_files.append(idir + fl)
good_files.sort()
t1 = time()
My_Cluster = Client()
My_View = My_Cluster[:]
print My_View
print len(My_View)

#Turn off blocking so all engines can work async
My_View.block = False

#on all engines do an import of Py-ART
My_View.execute('import matplotlib')
My_View.execute('matplotlib.use("agg")')

#Map the code and input to all workers
result = My_View.map_async(do_grid_map_gates_to_grid, good_files)

#Reduce the result to get a list of output
Exemplo n.º 55
0
 def client_wait(self, client, jobs=None, timeout=-1):
     """my wait wrapper, sets a default finite timeout to avoid hangs"""
     if timeout < 0:
         timeout = self.timeout
     return Client.wait(client, jobs, timeout)
from IPython.parallel import Client
c = Client(profile='mpi')
view = c[:]
view.activate() # enable magics
view.run('psum.py')
view.scatter('a',np.arange(16,dtype='float'))
view['a']
%px totalsum = psum(a)
view['totalsum']
Exemplo n.º 57
0
# <codecell>

strike_vals = np.linspace(min_strike, max_strike, n_strikes)
sigma_vals = np.linspace(min_sigma, max_sigma, n_sigmas)

# <markdowncell>

# ## Parallel computation across strike prices and volatilities

# <markdowncell>

# The Client is used to setup the calculation and works with all engines.

# <codecell>

c = Client(profile=cluster_profile)

# <markdowncell>

# A LoadBalancedView is an interface to the engines that provides dynamic load
# balancing at the expense of not knowing which engine will execute the code.

# <codecell>

view = c.load_balanced_view()

# <codecell>

print("Strike prices: ", strike_vals)
print("Volatilities: ", sigma_vals)
Exemplo n.º 58
0
def proc_data(data_folder,
              h5name,
              multiproc=False,
              chunk_size=4,
              filetype='aia',
              reffile=None,
              fittype=None,
              calfile=None,
              picts=False,
              **kwargs):

    if filetype == 'aia':
        GcmsObj = gcf.AiaFile
        ends = ('CDF', 'AIA', 'cdf', 'aia')

    files = os.listdir(data_folder)
    files = [f for f in files if f.endswith(ends)]
    files = [os.path.join(data_folder, f) for f in files]

    ref = None
    if reffile:
        if reffile.endswith(('txt', 'TXT')):
            ref = gcr.TxtReference(reffile, **kwargs)

    fit = None
    if fittype:
        if fittype.lower() == 'nnls':
            fit = gcfit.Nnls(**kwargs)

    h5 = gcd.GcmsStore(h5name, **kwargs)

    if multiproc:
        try:
            client = Client()
        except:
            error = "ERROR! You do not have an IPython Cluster running.\n\n"
            error += "Start cluster with: ipcluster start -n # &\n"
            error += "Where # == the number of processors.\n\n"
            error += "Stop cluster with: ipcluster stop"
            print(error)
            h5.close()
            return

        dview = client[:]
        dview.block = True
        dview['ref'] = ref
        dview['fit'] = fit
        dview['GcmsObj'] = GcmsObj
        chunk_size = len(dview)

    # Chunk the data so lots of data files aren't opened in memory.
    for chunk in _chunker(files, chunk_size):
        if multiproc:
            datafiles = dview.map_sync(_proc_file,
                                       [(i, kwargs) for i in chunk])
        else:
            datafiles = [GcmsObj(f, **kwargs) for f in chunk]
            if ref:
                ref(datafiles)
            if fit:
                fit(datafiles)

        h5.append_gcms(datafiles)

    if calfile:
        cal = gcc.Calibrate(h5, **kwargs)
        cal.curvegen(calfile, picts=picts, **kwargs)
        cal.datagen(picts=picts, **kwargs)

    h5.compress()
Exemplo n.º 59
0
class Bakapara:
    """A "bakapara" client for IPython cluster.

    Args: identical to IPython.parallel.Client

    """
    def __init__(self, **args):
        self.rc = Client(**args)
        self.lview = None
        self.ar = None
        self.jobs = None
        self.indices = None
        self.finished = set()
        # Obtain the host names and PIDs of engines.
        self.pids = self.rc[:].apply(os.getpid).get_dict()
        self.hosts = self.rc[:].apply(socket.getfqdn).get_dict()
        # Change the working directory of each engine.
        self.rc[:].apply(os.chdir, os.getcwd())

    def run(self, jobs, targets=None):
        """Runs the jobs on the cluster.

        Args:
            jobs (list): list of dictionary objects describing the jobs.
            targets (int, list of ints, 'all', or None): the engine(s) on which the jobs will run.

        Returns:
            bool: True if successful, False otherwise (e.g., jobs are running).

        """
        if self.ar is not None and not self.ar.ready():
            return False
        self.lview = self.rc.load_balanced_view(targets)
        self.ar = self.lview.map_async(runjob, jobs)
        self.jobs = jobs
        self.indices = dict([(k, v) for v, k in enumerate(self.ar.msg_ids)])
        self.finished = set()
        return True

    def wait(self, timeout=1e-3):
        """Waits for the jobs to complete and writes job results.

        Args:
            timeout (float): a time in seconds, after which to give up.

        """
        if self.ar is None:
            return

        # Find finished msg_ids.
        pending = set(self.ar.msg_ids)
        try:
            self.rc.wait(pending, timeout)
        except TimeoutError:
            pass
        finished = pending.difference(self.rc.outstanding)

        # Overwrite the results in the job array.
        for msg_id in finished:
            i = self.indices[msg_id]
            if i in self.finished:
                continue
            job = self.jobs[i]
            meta = self.rc.metadata[msg_id]
            result = self.rc.results[msg_id][0]
            for key in ('submitted', 'started', 'completed', 'received'):
                result[key] = meta[key].isoformat()
            for key in ('engine_id', 'pyerr', 'pyout', 'status', 'msg_id'):
                result[key] = meta[key]
            result['elapsed'] = str(meta['completed'] - meta['started'])
            result['host'] = self.hosts[meta['engine_id']]
            job['result'] = result
            self.finished.add(i)

    def ready():
        """Returns whether the jobs have completed."""
        return self.ar is not None or self.ar.ready()

    def successful():
        """Returns whether the jobs completed without raising an exception.

        Raises:
            AssertionError: the result is not ready.

        """
        return self.ar is not None and self.ar.successful()

    def abort(self, **args):
        """Aborts jobs.

        Args: identical to IPython.parallel.client.view.LoadBalancedView.abort()
        
        """
        if self.lview is not None:
            self.lview.abort()

    def interrupt(self):
        """Sends SIGINT signal to engines (experimental).

        http://mail.scipy.org/pipermail/ipython-dev/2014-March/013426.html
        """
        self.abort()
        for i in self.rc.ids:
            host = self.hosts[i]
            pid = self.pids[i]
            if host == socket.getfqdn():
                os.kill(pid, signal.SIGINT)
            else:
                os.system('ssh {} kill -INT {}'.format(host, pid))

    def shutdown(self, **args):
        """Terminates one or more engine processes, optionally including the hub.

        Args: identical to IPython.parallel.Client.shutdown

        """
        if self.lview is None:
            return False
        self.lview.shutdown(**args)

    def status(self, interval=1., timeout=-1, fo=sys.stdout):
        """Waits for the jobs, printing progress at regular intervals

        Args:
            interval (float): a time in seconds, after which to print the progress.
            timeout (float): a time in seconds, after which to give up waiting.
            fo (file): a file object to which the progress is printed.

        """
        if self.ar is None:
            return
        if timeout is None:
            timeout = -1

        # Make sure to write the job results into the job objects.
        self.wait(1e-3)

        tic = time.time()
        while not self.ar.ready() and (timeout < 0
                                       or time.time() - tic <= timeout):
            self.wait(interval)
            clear_output(wait=True)
            dt = datetime.timedelta(seconds=self.ar.elapsed)
            fo.write('{}/{} tasks finished after {}'.format(
                self.ar.progress, len(self.ar), str(dt)))
            fo.flush()
        else:
            fo.write('\n')
        dt = datetime.timedelta(seconds=self.ar.elapsed)
        clear_output(wait=True)
        fo.write('{} tasks completed in {}\n'.format(len(self.ar), str(dt)))
        fo.flush()

    def __len__(self):
        """Returns the number of engines."""
        return len(self.rc)
Exemplo n.º 60
0
from IPython.parallel import Client
from numpy import array, savez, percentile, nan

from arch.compat.python import range, lmap

# Time in seconds to sleep before checking if ready
SLEEP = 10
# Number of repetitions
EX_NUM = 500
# Number of simulations per exercise
EX_SIZE = 200000
# Approximately controls memory use, in MiB
MAX_MEMORY_SIZE = 100

rc = Client()
dview = rc.direct_view()
with dview.sync_imports():
    from numpy import ones, vstack, arange, cumsum, sum, dot, zeros
    from numpy.random import RandomState, seed, random_integers
    from numpy.linalg import pinv


def clear_cache(client, view):
    """Cache-clearing function from mailing list"""
    assert not rc.outstanding, "don't clear history when tasks are outstanding"
    client.purge_results('all')  # clears controller
    client.results.clear()
    client.metadata.clear()
    view.results.clear()
    client.history = []