示例#1
0
    def initialize_ipengines(self):
        """ Initialize ipengines, load environ vars, etc.
        """
        from IPython.kernel import client  # 20091202 added

        self.mec = client.MultiEngineClient()
        #THE FOLLOWING LINE IS DANGEROUS WHEN OTHER TYPES OF TASKS MAY BE OCCURING:
        self.mec.reset(
            targets=self.mec.get_ids())  # Reset the namespaces of all engines
        self.tc = client.TaskClient()
        self.tc.clear(
        )  # This supposedly clears the list of finished task objects in the task-client
        self.mec.flush()  # This doesnt seem to do much in our system.

        #import pdb; pdb.set_trace() # DEBUG
        #import os,sys
        #import classification_interface
        #import plugin_classifier
        #import ptf_master
        #import MySQLdb
        #import get_classifications_for_ptf_srcid_and_class_schema
        #Get_Classifications_For_Ptf_Srcid = get_classifications_for_ptf_srcid_and_class_schema.GetClassificationsForPtfSrcid(schema_str=self.schema_str)

        exec_str = """import os,sys
import classification_interface
import plugin_classifier
import ptf_master
import MySQLdb
import get_classifications_for_ptf_srcid_and_class_schema
Get_Classifications_For_Ptf_Srcid = get_classifications_for_ptf_srcid_and_class_schema.GetClassificationsForPtfSrcid(schema_str="%s")
        """ % (self.schema_str)
        self.mec.execute(exec_str)
示例#2
0
def engines_get_from_bucket(bucket, mec=None, filestring='*', dir_root='/mnt'):
    if not mec:
        mec = client.MultiEngineClient()

    engines_import('boto', mec)

    raise NotImplementedError, 'unfinished!'
示例#3
0
    def __init__(self,
                 dict_iterable={},
                 func=None,
                 task_furl=None,
                 multiengine_furl=None,
                 engine_furl=None):
        """
        Sets the function to be called and the list of parameter dictinaries,
        connects to the IPython controller, distributes the tasks to the 
        engines and collects the results.
        
        Requires that ipcontroller and ipengine(s) are set up. If no FURLs are 
        given, the default location from the ipython setup is used.
        
        Parameters:
        dict_iterable - list of parameter dictionaries
        func - function to call with parameter dictionaries
        task_furl - FURL for task clients to connect to. 
        multiengine_furl - FURL for mltiengine clients to connect to
        engine_furl - FURL for ipengines to connect to
        """
        ParameterSearcher.__init__(self, dict_iterable, func)
        self.task_furl = task_furl
        self.multiengine_furl = multiengine_furl
        self.engine_furl = engine_furl
        from IPython.kernel import client
        self.mec = client.MultiEngineClient(furl_or_file=multiengine_furl)
        self.tc = client.TaskClient(furl_or_file=task_furl)

        # know which tasks we'll have to retrieve
        self.taskids = []
        # we keep track of failed tasks
        self.failed_tasks = []
示例#4
0
def pinit(message="Hello CASA Cluster"):
    casalog.post("Setting up the connection to the remote nodes...",origin="PDeconv::pinit");
    mec = client.MultiEngineClient();
    ids=mec.get_ids();
    print "Connected to IDs ",ids;
#        tt='print '+"'"+message+"'";
    mec.activate();
#	mec.execute(tt);
    return mec;
示例#5
0
    def __init__(self, site):
        self.tc = client.TaskClient()
        self.rc = client.MultiEngineClient()
        self.rc.execute(fetchParse)

        self.allLinks = []
        self.linksWorking = {}
        self.linksDone = {}

        self.site = site
示例#6
0
def fill_instance_engines(mec=None, eng_per_proc=2, **kwargs):
    if not mec:
        mec = client.MultiEngineClient()

    engines_import('os', mec)

    ids_by_ip = engine_ids_by_ip(mec)
    for ip, ids in ids_by_ip.items():
        mec.execute("p = int(os.environ['NUMPROCS'])")
        [procs] = mec.pull('p', ids[0])
        for i in range((procs * eng_per_proc) - len(ids)):
            launch_engine(ip, mec=mec, **kwargs)
示例#7
0
 def make_controller(controller_command='ipcontroller',
                     furl_dir=None,
                     max_wait=10.):
     """
     Start an ipcontroller. 
     
     Parameters: 
     controller_command - path to the command to invoke the controller with. 
                          Default requires the controller to be in the path.
     furl_dir - the directory to create furls in. Default is to create them 
                in the system's default temp directory as returned by 
                tempfile.gettempdir().
     max_wait - maximum number of seconds to wait for the controller to 
                become accessible. It is polled three times a second during 
                that time. 
                 
     Returns:Dictionary with keys:
     contr_obj - the controller's Popen-object 
     task_furl - path to the FURL for task clients
     multiengine_furl - path to the FURL for multiengine clients
     engine_furl - path to the FURL for engines
     """
     import subprocess, tempfile
     if furl_dir is None:
         furl_dir = tempfile.gettempdir()
     (fd, engine_furl) = tempfile.mkstemp(dir=furl_dir,
                                          prefix='furl_engine_')
     (fd, multiengine_furl) = tempfile.mkstemp(dir=furl_dir,
                                               prefix='furl_multiengine_')
     (fd, task_furl) = tempfile.mkstemp(dir=furl_dir, prefix='furl_task_')
     contr = subprocess.Popen(args=[
         controller_command,
         '--engine-furl-file=%s' % engine_furl,
         '--multiengine-furl-file=%s' % multiengine_furl,
         '--task-furl-file=%s' % task_furl
     ])
     # wait until controller is accessible
     import time
     t = time.time()
     from IPython.kernel import client
     while True:
         try:
             mec = client.MultiEngineClient(furl_or_file=multiengine_furl)
             time.sleep(0.5)
             break
         except Exception, e:
             if (time.time() - t) < max_wait:
                 print "can't connect to controller yet. Retrying..."
                 time.sleep(0.33)
             else:
                 print "No connection after %f seconds. Giving up..."
                 raise e
    def __init__(self, pars={}):
        self.pars = pars
        # TODO:             - initialize ipython modules
        self.mec = client.MultiEngineClient()
        #self.mec.reset(targets=self.mec.get_ids()) # Reset the namespaces of all engines
        self.tc = client.TaskClient()
        self.task_id_list = []

        #### 2011-01-21 added:
        self.mec.reset(targets=self.mec.get_ids())
        self.mec.clear_queue()
        self.mec.clear_pending_results()
        self.tc.task_controller.clear()
示例#9
0
def launch_bootstrap_engine(target_ip, mec=None, **kwargs):
    '''launches an engine on target_ip ONLY IF no engine is currently attached from there
    '''
    if not mec:
        mec = client.MultiEngineClient()
    try:
        ids_on_ip = engine_ids_by_ip(mec)[target_ip]
    except KeyError:
        pass
    else:
        mec.kill(targets=ids_on_ip)

    return launch_engine(target_ip, mec=mec, **kwargs)
示例#10
0
def engines_put_to_bucket_by_s3fs(filestring,
                                  mount_root='/mnt/s3fs',
                                  mec=None,
                                  s3_conn=None,
                                  bucket=None,
                                  gz=True):
    '''transfers files in filestring (can be glob) to bucket
    if bucket is none, attempts to transfer to a bucket named the last dir in the path of filestring
    creates bucket if it doesn't exist
    '''
    import boto

    if not mec:
        mec = client.MultiEngineClient()

    if not bucket:
        bucket = filestring.split('/')[-2]

    print 'bucket:', bucket

    if not s3_conn:
        aws_keys = get_keys_from_file()
        s3_conn = boto.connect_s3(**aws_keys)

    if not bucket in [b.name for b in s3_conn.get_all_buckets()]:
        print 'create bucket:', bucket
        s3_conn.create_bucket(bucket)

    engines_import('os', mec)

    if gz:
        zip = "os.system('gzip %s')" % filestring
        print zip
        filestring = filestring + '.gz'

        mec.execute(zip)

    mount = os.path.join(mount_root, bucket)
    makeMdirs = "os.makedirs('%s')" % mount
    print makeMdirs

    mountbucket = "os.system('s3fs %s %s')" % (bucket, mount)
    print mountbucket

    copy = "os.system('cp %s %s')" % (filestring, mount)
    print copy

    mec.execute(makeMdirs)
    mec.execute(mountbucket)
    mec.execute(copy)
示例#11
0
def launch_engine(target_ip,
                  username=os.environ['USER'],
                  mec=None,
                  key_file='~/.ssh/gsg-keypair'):
    '''launches an engine on target_ip
    '''
    if not mec:
        mec = client.MultiEngineClient()


#    ssh = r'ssh %s -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no %s%s ipengine 2\> /dev/null \> /dev/null \& 2> /dev/null' % \
    ssh = r'ssh %s -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no %s%s ipengine \& &' % \
        ((key_file and '-i '+key_file or ''),
         (username and username+'@' or ''), target_ip)
    return os.system(ssh)
示例#12
0
文件: parallel.py 项目: jhsa26/pymc
    def __init__(self, input, db='ram', chains=2):
        try:
            mec = client.MultiEngineClient()
        except:
            p = subprocess.Popen('ipcluster -n %d' % proc, shell=True)
            p.wait()
            mec = client.MultiEngineClient()

        # Check everything is alright.
        nproc = len(mec.get_ids())
        assert chains <= nproc

        Sampler.__init__(self, input, db=db)

        # Import the individual models in each process
        #mec.pushModule(input)

        proc = range(chains)

        try:
            mec.execute('import %s as input' % input.__name__, proc)
        except:
            mec.execute('import site', proc)
            mec.execute('site.addsitedir( ' + ` os.getcwd() ` + ' )', proc)
            mec.execute('import %s as input; reload(input)' % input.__name__,
                        proc)

        # Instantiate Sampler instances in each process
        mec.execute('from pymc import MCMC', proc)
        #mec.execute('from pymc.database.parallel import Database')
        #for i in range(nproc):
        #    mec.execute(i, 'db = Database(%d)'%i)
        mec.execute("S = MCMC(input, db='txt')", proc)

        self.mec = mec
        self.proc = proc
示例#13
0
def engine_ids_by_ip(mec=None, interface='eth0'):
    if not mec:
        mec = client.MultiEngineClient()
    if not mec.get_ids():
        return {}
    engines_import('net', mec)
    mec.execute("ip = net.get_ip_address('%s')" % interface)
    ids_by_ip = {}
    for id in mec.get_ids():
        [ip] = mec.pull('ip', [id])
        try:
            ids_by_ip[ip].append(id)
        except KeyError:
            ids_by_ip[ip] = [id]
    return ids_by_ip
示例#14
0
文件: ipython.py 项目: mfkiwl/lofar-1
 def _get_cluster(self):
     """
     Return task and multiengine clients connected to the running
     pipeline's IPython cluster.
     """
     self.logger.info("Connecting to IPython cluster")
     try:
         tc  = IPclient.TaskClient(self.config.get('cluster', 'task_furl'))
         mec = IPclient.MultiEngineClient(self.config.get('cluster', 'multiengine_furl'))
     except NoSectionError:
         self.logger.error("Cluster not definied in configuration")
         raise ClusterError
     except:
         self.logger.error("Unable to initialise cluster")
         raise ClusterError
     return tc, mec
    def initialize_clients(self):
        """ Instantiate ipython1 clients, import all module dependencies.
        """
        from IPython.kernel import client
        self.mec = client.MultiEngineClient()
        exec_str = """
import sys
import os
sys.path.append(os.path.abspath(os.environ.get("TCP_DIR") + 'Software/ingest_tools'))
import ptf_master
import analyze_iterative_tutor_classification

pars = analyze_iterative_tutor_classification.pars
aitc = analyze_iterative_tutor_classification.Analyze_Iterative_Tutor_Classification(pars)
aitc.connect_to_db()
"""
        print self.mec.execute(exec_str)# Do we get an echo during execution?
示例#16
0
def engines_get_from_bucket_by_s3fs(bucket,
                                    mec=None,
                                    filestring='*',
                                    mount_root='/mnt/s3fs',
                                    target_root='/mnt',
                                    gz=False):
    '''uses s3fs to mount bucket at mount_root/<bucket>, 
    creates directory target_root/<bucket>,
    does cp -R mount_root/<bucket>/filestring target_root/<bucket>
    if gz does gunzip target_root/<bucket>/*.gz

    NB: should be run with bootstrap engines only (i.e. 1 per host)
    '''

    if not mec:
        mec = client.MultiEngineClient()

    engines_import('os', mec)

    target = os.path.join(target_root, bucket)
    mount = os.path.join(mount_root, bucket)

    makeTdirs = "os.makedirs('%s')" % target
    print makeTdirs
    makeMdirs = "os.makedirs('%s')" % mount
    print makeMdirs

    mountbucket = "os.system('s3fs %s %s')" % (bucket, mount)
    print mountbucket

    cpsource = os.path.join(mount, filestring)
    copy = "os.system('cp -R %s %s')" % (cpsource, target)
    print copy

    mec.execute(makeTdirs)
    mec.execute(makeMdirs)
    mec.execute(mountbucket)
    mec.execute(copy)

    if gz:
        zipfiles = os.path.join(target, '*.gz')
        unzip = "os.system('gunzip %s')" % zipfiles
        print unzip

        mec.execute(unzip)
示例#17
0
def write_runclusterconfig(host_list,
                           controller_host=None,
                           mec=None,
                           eng_per_proc=1,
                           **kwargs):
    '''takes a list of hostnames and number of engines per processor, 
    makes an engine description dict and calls write_clusterconfig
    '''
    if not mec:
        mec = client.MultiEngineClient()

    engines_import('os', mec)

    mec.execute("n = int(os.environ['NUMPROCS'])")
    engines_dict = dict(
        zip(host_list, [i * eng_per_proc for i in mec.gather("n")]))

    write_clusterconfig(engines_dict, controller_host, **kwargs)
示例#18
0
	def test_ipy_island(self):
		from PyGMO import ipy_island, algorithm, problem
		try:
			from IPython.kernel import client
			mec = client.MultiEngineClient()
			if len(mec) == 0:
				raise RuntimeError()
		except ImportError as ie:
			return
		except BaseException as e:
			print('\nThere is a problem with parallel IPython setup. The error message is:')
			print(e)
			print('Tests for ipy_island will not be run.')
			return
		isl_type = ipy_island
		algo_list = [algorithm.py_example(1), algorithm.de(5)]
		prob_list = [problem.py_example(), problem.dejong(1)]
		for algo in algo_list:
			for prob in prob_list:
				self.__test_impl(isl_type,algo,prob)
    def main(self):
        """ Main function for Testing.
        """
        # This tests the Multi-engine interface:
        mec = client.MultiEngineClient()
        exec_str = """import os
os.environ['TCP_SEX_BIN']=os.path.expandvars('$HOME/bin/sex')
os.environ['TCP_WCSTOOLS_DIR']=os.path.expandvars('$HOME/src/install/wcstools-3.6.4/bin/')
os.environ['TCP_DIR']=os.path.expandvars('$HOME/src/TCP/')
os.environ['TCP_DATA_DIR']=os.path.expandvars('$HOME/scratch/TCP_scratch/')
os.environ['CLASSPATH']=os.path.expandvars('$HOME/src/install/weka-3-5-7/weka.jar')

        """
        #if os.path.exists(os.path.expandvars("$HOME/.ipython/custom_configs")): execfile(os.path.expandvars("$HOME/.ipython/custom_configs"))
        mec.execute(exec_str)

        # This tests the task client interface:
        tc = client.TaskClient()
        task_list = []

        n_iters_total = 8
        n_iters_per_clear = 10
        for i in xrange(n_iters_total):
            task_str = """cat = os.getpid()"""  # os.getpid() # os.environ
            taskid = tc.run(client.StringTask(task_str, pull="cat"))
            task_list.append(taskid)
            ### NOTE: This can be used to thin down the ipcontroller memory storage of
            ###       finished tasks, but afterwards you cannot retrieve values (below):
            #if (i % n_iters_per_clear == 0):
            #    tc.clear()
        print '!!! NUMBER OF TASKS STILL SCHEDULED: ', tc.queue_status(
        )['scheduled']
        for i, taskid in enumerate(task_list):
            ### NOTE: The following retrieval doesnt work if
            ###       tc.clear()      was called earlier:
            task_result = tc.get_task_result(taskid, block=True)
            print task_result['cat']
        print 'done'
        print tc.queue_status()
示例#20
0
def scatter_and_run(queue, sleeptime=60, mec=None, verbose=False):
    '''not tested, probably not finished :)
    '''
    from time import sleep
    if not mec:
        mec = client.MultiEngineClient()
    mec.scatter('q', queue)

    mec.execute('res = AWS.run_queue(q)', block=False)

    while any([i[1]['pending'] != 'None' for i in mec.queue_status()]):
        done = 0
        for i in mec.get_ids():
            if verbose: print >> sys.stderr, 'Engine %s:' % i,
            [status] = mec.pull(['on', 'tot'], [i])
            if verbose: print >> sys.stderr, '[%s/%s]' % tuple(status)
            done += status[0] - 1

        if verbose:
            print >> sys.stderr, 'total progress: %s of %s' % (done,
                                                               len(queue))
        sleep(sleeptime)

    return mec.gather('res')
示例#21
0
The dataset we have been using for this is the 200 million digit one here:
ftp://pi.super-computing.org/.2/pi200m/
"""

from IPython.kernel import client
from matplotlib import pyplot as plt
import numpy as np
from pidigits import *
from timeit import default_timer as clock

# Files with digits of pi (10m digits each)
filestring = 'pi200m-ascii-%(i)02dof20.txt'
files = [filestring % {'i': i} for i in range(1, 16)]

# Connect to the IPython cluster
mec = client.MultiEngineClient(profile='mycluster')
mec.run('pidigits.py')

# Run 10m digits on 1 engine
mapper = mec.mapper(targets=0)
t1 = clock()
freqs10m = mapper.map(compute_two_digit_freqs, files[:1])[0]
t2 = clock()
digits_per_second1 = 10.0e6 / (t2 - t1)
print "Digits per second (1 core, 10m digits):   ", digits_per_second1

# Run 150m digits on 15 engines (8 cores)
t1 = clock()
freqs_all = mec.map(compute_two_digit_freqs, files[:len(mec)])
freqs150m = reduce_freqs(freqs_all)
t2 = clock()
示例#22
0
def main():
    parser = OptionParser()
    parser.set_defaults(n=100)
    parser.set_defaults(tmin=1)
    parser.set_defaults(tmax=60)
    parser.set_defaults(controller='localhost')
    parser.set_defaults(meport=10105)
    parser.set_defaults(tport=10113)

    parser.add_option("-n",
                      type='int',
                      dest='n',
                      help='the number of tasks to run')
    parser.add_option("-t",
                      type='float',
                      dest='tmin',
                      help='the minimum task length in seconds')
    parser.add_option("-T",
                      type='float',
                      dest='tmax',
                      help='the maximum task length in seconds')
    parser.add_option("-c",
                      type='string',
                      dest='controller',
                      help='the address of the controller')
    parser.add_option(
        "-p",
        type='int',
        dest='meport',
        help=
        "the port on which the controller listens for the MultiEngine/RemoteController client"
    )
    parser.add_option(
        "-P",
        type='int',
        dest='tport',
        help=
        "the port on which the controller listens for the TaskClient client")

    (opts, args) = parser.parse_args()
    assert opts.tmax >= opts.tmin, "tmax must not be smaller than tmin"

    rc = client.MultiEngineClient()
    tc = client.TaskClient()
    print tc.task_controller
    rc.block = True
    nengines = len(rc.get_ids())
    rc.execute('from IPython.utils.timing import time')

    # the jobs should take a random time within a range
    times = [
        random.random() * (opts.tmax - opts.tmin) + opts.tmin
        for i in range(opts.n)
    ]
    tasks = [client.StringTask("time.sleep(%f)" % t) for t in times]
    stime = sum(times)

    print "executing %i tasks, totalling %.1f secs on %i engines" % (
        opts.n, stime, nengines)
    time.sleep(1)
    start = time.time()
    taskids = [tc.run(t) for t in tasks]
    tc.barrier(taskids)
    stop = time.time()

    ptime = stop - start
    scale = stime / ptime

    print "executed %.1f secs in %.1f secs" % (stime, ptime)
    print "%.3fx parallel performance on %i engines" % (scale, nengines)
    print "%.1f%% of theoretical max" % (100 * scale / nengines)
示例#23
0
#from __future__ import with_statement

# XXX This file is currently disabled to preserve 2.4 compatibility.

#def test_simple():
if 0:

    # XXX - for now, we need a running cluster to be started separately.  The
    # daemon work is almost finished, and will make much of this unnecessary.
    from IPython.kernel import client
    mec = client.MultiEngineClient(('127.0.0.1', 10105))

    try:
        mec.get_ids()
    except ConnectionRefusedError:
        import os, time
        os.system('ipcluster -n 2 &')
        time.sleep(2)
        mec = client.MultiEngineClient(('127.0.0.1', 10105))

    mec.block = False

    import itertools
    c = itertools.count()

    parallel = RemoteMultiEngine(mec)

    mec.pushAll()

    ## with parallel as pr:
    ##     # A comment
示例#24
0
def engines_import(module, mec=None):
    '''imports module (or more than one, separated by commas) on all engines
    '''
    if not mec:
        mec = client.MultiEngineClient()
    return mec.execute('import %s' % module)
示例#25
0
# Parallel Python in IPython
ipcluster local -n 6
from IPython.kernel import client
mec=client.MultiEngineClient()
mec.get_ids()

ipengine

mec.kill(controller=True)
示例#26
0
min_strike = ask_question("Min strike price", float, 90.0)
max_strike = ask_question("Max strike price", float, 110.0)
n_sigmas = ask_question("Number of volatility values", int, 5)
min_sigma = ask_question("Min volatility", float, 0.1)
max_sigma = ask_question("Max volatility", float, 0.4)

strike_vals = np.linspace(min_strike, max_strike, n_strikes)
sigma_vals = np.linspace(min_sigma, max_sigma, n_sigmas)

#-----------------------------------------------------------------------------
# Setup for parallel calculation
#-----------------------------------------------------------------------------

# The MultiEngineClient is used to setup the calculation and works with all
# engine.
mec = client.MultiEngineClient(profile=cluster_profile)

# The TaskClient is an interface to the engines that provides dynamic load
# balancing at the expense of not knowing which engine will execute the code.
tc = client.TaskClient(profile=cluster_profile)

# Initialize the common code on the engines. This Python module has the
# price_options function that prices the options.
mec.run('mcpricer.py')

#-----------------------------------------------------------------------------
# Perform parallel calculation
#-----------------------------------------------------------------------------

print "Running parallel calculation over strike prices and volatilities..."
print "Strike prices: ", strike_vals