def segmentTesting(thisModel, Ysample, Lnum, verbose, label, serialMode=False, optimise=100, calibrate=False): """ Method to test multiple samples at a time. Args: thisModel : SAMObject model to recall from. Ysample : Novel feature vector to test. Lnum : Ground truth labels to compare with. verbose : Enable or disable logging to stdout. label : Label for the current segments being tested. serialMode : Boolean to test serially or in parallel. optimise : Number of optimisation iterations to perform during recall. calibrate : Indicate calibration mode when True which requires a different return. Returns: labelList, confMatrix, ret, variancesKnown, variancesUnknown if calibrate is `True`. labelList, confMatrix, labelComparisonDict if calibrate is `False`. labelList : List of classification labels confMatrix : Numpy array with the confusion matrix ret : Classification object variancesKnown : Variances returned during calibration for known training instances variancesUnknown : Variances returned during calibration for unknown training instances labelComparisonDict : Dictionary with two items `'original'` and `'results'`. """ def testFunc(data, lab): d = testSegment(thisModel, data, verbose, visualiseInfo=None, optimise=optimise) if verbose: if lab == d[0]: res = True else: res = False logging.info('Actual ' + str(lab).ljust(11) + ' Classification: ' + str(d[0]).ljust(11) + ' with ' + \ str(d[1])[:6] + ' confidence: ' + str(res) + '\n') return d logging.info('') if type(Lnum).__module__ == np.__name__: useModelLabels = True else: useModelLabels = False if len(thisModel) > 1: labelList = copy.deepcopy(thisModel[0].textLabels) labelList.append('unknown') else: labelList = copy.deepcopy(thisModel[0].textLabels) labelList.append('unknown') confMatrix = np.zeros((len(labelList), len(labelList))) numItems = len(Ysample) off1 = 11 off2 = 8 off3 = len(str(numItems)) if useModelLabels: Lsample = [ thisModel[0].textLabels[int(Lnum[i])] for i in range(len(Lnum)) ] else: Lsample = Lnum if numItems < 1500: serialMode = True c = None logging.info('serialMode: ' + str(serialMode)) if not serialMode and thisModel[0].parallelOperation: try: logging.info('Trying engines ...') c = ipp.Client() numWorkers = len(c._engines) logging.info('Number of engines: ' + str(numWorkers)) except: logging.error("Parallel workers not found") thisModel[0].parallelOperation = False numWorkers = 1 else: logging.info(str(serialMode) + '= True') thisModel[0].parallelOperation = False numWorkers = 1 logging.info('Number of engines: ' + str(numWorkers)) # average 5 classifications before providing this time vTemp = copy.deepcopy(verbose) verbose = False if len(Lsample) < 400: numTrials = len(Lsample) * 0.1 numTrials = max(1, int(numTrials)) else: numTrials = 20 t0 = time.time() for j in range(numTrials): testFunc(Ysample[j], Lsample[j]) t1 = time.time() verbose = vTemp thisModel[0].avgClassTime = (t1 - t0) / numTrials logging.info('classification rate: ' + str(1.0 / thisModel[0].avgClassTime) + 'fps') logging.info('estimated time: ' + str(thisModel[0].avgClassTime * numItems / (60 * numWorkers)) + 'mins for ' + str(numItems) + ' items with ' + str(numWorkers) + ' workers') t0 = time.time() logging.info(t0) # check size of model # modelSize is size in megabytes modelSize = deep_getsizeof(thisModel, set()) / 1024.0 / 1024.0 logging.info("modelSize: " + str(modelSize)) logging.warning("required testing size: " + str((modelSize * numWorkers * 2) + 400) + " MB") # check available system memory in megabytes freeSystemMem = float(psutil.virtual_memory()[4]) / 1024.0 / 1024.0 logging.info("free memory: " + str(freeSystemMem) + " MB") if modelSize > 100 or not thisModel[0].parallelOperation or serialMode: # serial testing logging.warning('Testing serially') ret = [] for j in range(len(Lsample)): logging.info(str(j) + '/' + str(len(Lsample))) ret.append(testFunc(Ysample[j], Lsample[j])) else: # parallel testing logging.info('Testing in parallel') dview = c[:] # not load balanced lb = c.load_balanced_view() # load balanced # with dview.sync_imports(): # from SAM.SAM_Core import utils # if not thisModel[0].modelLoaded : dview.push({'thisModel': thisModel}) dview.push({'verbose': verbose}) dview.push({'optimise': optimise}) # thisModel[0].modelLoaded = True syn = lb.map_async(testFunc, Ysample, Lsample) wait_watching_stdout(syn, dt=1, truncate=1000) ret = syn.get() # maybe these are upsetting the ipcluster # dview.clear() # dview.purge_results('all') t1 = time.time() logging.info(t1) logging.info('Actual time taken = ' + str(t1 - t0)) if calibrate: variancesKnown = [] variancesUnknown = [] for i in range(len(ret)): currLabel = Lsample[i] if verbose: if currLabel == ret[i][0]: result = True else: result = False logging.info( str(i).rjust(off3) + '/' + str(numItems) + ' Truth: ' + currLabel.ljust(off1) + ' Model: ' + ret[i][0].ljust(off1) + ' with ' + str(ret[i][1])[:6].ljust(off2) + ' confidence: ' + str(result)) if currLabel in thisModel[0].textLabels and currLabel != "unknown": knownLabel = True else: knownLabel = False currLabel = 'unknown' if knownLabel: variancesKnown.append(ret[i][1]) else: variancesUnknown.append(ret[i][1]) confMatrix[labelList.index(currLabel), labelList.index(ret[i][0])] += 1 return labelList, confMatrix, ret, variancesKnown, variancesUnknown else: labelComparisonDict = dict() labelComparisonDict['original'] = [] labelComparisonDict['results'] = [] for i in range(len(ret)): currLabel = Lsample[i] retLabel = ret[i][0] if currLabel not in thisModel[0].textLabels: currLabel = 'unknown' if verbose: if currLabel == retLabel: result = True else: result = False logging.info( str(i).rjust(off3) + '/' + str(numItems) + ' Truth: ' + currLabel.ljust(off1) + ' Model: ' + retLabel.ljust(off1) + ' with ' + str(ret[i][1])[:6].ljust(off2) + ' confidence: ' + str(result)) labelComparisonDict['original'].append(Lsample[i]) labelComparisonDict['results'].append(retLabel) confMatrix[labelList.index(currLabel), labelList.index(retLabel)] += 1 return labelList, confMatrix, labelComparisonDict
import ipyparallel as ipp client = ipp.Client() # this will only run on machines that can import numpy: @ipp.require('numpy') def norm(A): from numpy.linalg import norm return norm(A, 2) def checkpid(pid): """return the pid of the engine""" import os return os.getpid() == pid def checkhostname(host): import socket return socket.gethostname() == host def getpid(): import os return os.getpid() pid0 = client[0].apply_sync(getpid)
#!/usr/bin/env python3 try: if (directview): pass except: import ipyparallel c = ipyparallel.Client(profile="mpi_slurm", cluster_id="Azure_cluster_0") directview = c[:] directview.block = True with directview.sync_imports(): import numpy import mpi4py from mpi4py import MPI class rankinfo(object): '''This holds a few "global" values of our problem, like stencil width and problem size. Parameters ---------- sizes : tuple of numbers of lattice points without ghost points along the coordinate axes (in x,y,z order) Attributes ---------- rank : the rank of this class in MPI.COMM_WORLD size : the number of ranks in MPI.COMM_WORLD ndim : how many physical dimensions does our lattice have
if arguments.timeseries_name is None: directory_full_path = arguments.directory else: directory_full_path = os.path.join(arguments.directory, arguments.timeseries_name) if not os.path.exists(directory_full_path): raise Exception("Directory %s does not exists." % directory_full_path) if arguments.cluster_sample_count < 1: raise Exception("Cluster sample count must be greater than zero.") _numSamples = arguments.cluster_sample_count try: pool = ipyparallel.Client(profile=arguments.profile)[:] except: raise Exception("A running IPython parallel cluster is required to run this script.") def tardir(path): # ziph is zipfile handle with tarfile.open(os.path.join(path, 'slycat-timeseries.tar.gz'), 'w:gz') as tarh: for root, dirs, files in os.walk(path): for file in files: if file != 'slycat-timeseries.tar.gz': tarh.add(os.path.join(root, file), arcname=file) # Compute the model. try: print("Examining and verifying data.") """ Find number of timeseries and accurate cluster sample count before starting model """
def __init__(self, wrapper, backend='multiprocessing', n_cpus=-1, verbosity=10, dask_args=None): # -1 cpus means all available cpus - 1 for the scheduler if n_cpus == -1: import multiprocessing n_cpus = multiprocessing.cpu_count() - 1 self.n_cpus = n_cpus self.wrapper = wrapper self.verbosity = verbosity self.dask_args = dask_args # This configures how to run single point simulations on the model: self._exec = self.wrapper ot.OpenTURNSPythonFunction.__init__(self, self.wrapper.getInputDimension(), self.wrapper.getOutputDimension()) self.setInputDescription(self.wrapper.getInputDescription()) self.setOutputDescription(self.wrapper.getOutputDescription()) assert backend in [ 'ipython', 'ipyparallel', 'multiprocessing', 'pathos', 'joblib', 'dask' ], "Unknown backend" # This configures how to run samples on the model : if self.n_cpus == 1: self._exec_sample = self.wrapper elif (backend == 'ipython') or (backend == 'ipyparallel'): # Check that ipyparallel is installed try: import ipyparallel as ipp # If it is, see if there is a cluster running try: rc = ipp.Client() ipy_backend = True except (ipp.error.TimeoutError, IOError) as e: ipy_backend = False import logging logging.warning('Unable to connect to an ipython cluster.') except ImportError: ipy_backend = False import logging logging.warning('ipyparallel package missing.') if ipy_backend: self._exec_sample = _exec_sample_ipyparallel( self.wrapper, self.getInputDimension(), self.getOutputDimension()) else: logging.warning('Using multiprocessing backend instead') self._exec_sample = _exec_sample_multiprocessing( self.wrapper, self.n_cpus) elif backend == 'joblib': # Check that joblib is installed try: import joblib joblib_backend = True except ImportError: try: from sklearn.externals import joblib joblib_backend = True except ImportError: joblib_backend = False import logging logging.warning('joblib package missing.') if joblib_backend: self._exec_sample = _exec_sample_joblib( self.wrapper, self.n_cpus, self.verbosity) else: logging.warning('Using multiprocessing backend instead') self._exec_sample = _exec_sample_multiprocessing( self.wrapper, self.n_cpus) elif backend == 'multiprocessing': self._exec_sample = _exec_sample_multiprocessing( self.wrapper, self.n_cpus) elif backend == 'pathos': self._exec_sample = _exec_sample_pathos(self.wrapper, self.n_cpus) elif backend == 'dask': assert 'scheduler' in self.dask_args, 'dask_args must have "scheduler" as key' assert 'workers' in self.dask_args, 'dask_args must have "workers" as key' self._exec_sample, self.dask_cluster, self.dask_client = _exec_sample_dask( self.wrapper, self.dask_args, self.verbosity) def close_dask(): from time import sleep self.dask_client.close() sleep(1) self.dask_cluster.close() self.close_dask = close_dask
import ipyparallel as ipp import pandas as pd import tables as tb import os import argparse parser = argparse.ArgumentParser(description='Model and year to estimate.') parser.add_argument('model', type=str, nargs='?', default='gpin') parser.add_argument('year', type=int, nargs='?', default=2014) args = parser.parse_args() print(vars(args)) rc = ipp.Client(cluster_id="{0}-{1}".format(args.model, args.year)) print(len(rc)) dv = rc[:] dv.push(vars(args)) lv = rc.load_balanced_view() h5 = tb.open_file('/scratch/nyu/hue/taqdf_1319.h5', mode='r') df = h5.get_node('/data/table') idx = list( set( filter(lambda x: x[1] == args.year, zip(df.col('permno'), df.col('yyyy'))))) @ipp.interactive def est(x): import os import pandas as pd import tables as tb
def get_client(cluster_id, profile, engines, timeout, cores, quiet, **kwargs): """ Creates a client to view ipcluster engines for a given profile and returns it with at least one engine spun up and ready to go. If no engines are found after nwait amount of time then an error is raised. If engines==MPI it waits a bit longer to find engines. If the number of engines is set then it waits even longer to try to find that number of engines. """ ## save stds for later, we're gonna hide them to prevent external printing devnull = open(os.devnull, 'w') save_stdout = sys.stdout save_stderr = sys.stderr sys.stdout = devnull sys.stderr = devnull ## get cluster_info print string connection_string = " establishing parallel connection:" ## wrapped search for ipcluster try: ## are we looking for a running ipcluster instance? if profile not in [None, "default"]: args = {'profile': profile, "timeout": timeout} else: clusterargs = [cluster_id, profile, timeout] argnames = ["cluster_id", "profile", "timeout"] args = {key:value for key, value in zip(argnames, clusterargs)} ## get connection within timeout window of wait time and hide messages ipyclient = ipp.Client(**args) sys.stdout = save_stdout sys.stderr = save_stderr ## check that all engines have connected if (engines == "MPI") or ("pta-cli-" in cluster_id): if not quiet: print(connection_string) for _ in range(6000): initid = len(ipyclient) time.sleep(0.01) ## If MPI then wait for all engines to start so we can report ## how many cores are on each host. If Local then only wait for ## one engine to be ready and then just go. if (engines == "MPI") or ("pta-cli-" in cluster_id): ## wait for cores to be connected if cores: time.sleep(0.1) if initid == cores: break if initid: time.sleep(3) if len(ipyclient) == initid: break else: if cores: if initid == cores: break else: if initid: break except KeyboardInterrupt as inst: raise inst ## This is raised if ipcluster is not running ------------ except IOError as inst: if "pta-cli-" in cluster_id: raise PTAError(NO_IPCLUSTER_CLI) else: raise PTAError(NO_IPCLUSTER_API) except (ipp.TimeoutError, ipp.NoEnginesRegistered) as inst: raise inst except Exception as inst: raise inst finally: ## ensure that no matter what we reset the stds sys.stdout = save_stdout sys.stderr = save_stderr return ipyclient
def setup(self, number_of_engines, is_coalescing, depth): self.client = ipp.Client(profile='asv', cluster_id=f'depth_{depth}') self.view = self.client.broadcast_view(is_coalescing=is_coalescing) self.view.targets = list(range(number_of_engines)) wait_for(lambda: len(self.client) >= number_of_engines)
def setup(self, number_of_engines, number_of_bytes): self.client = ipp.Client(profile='asv') self.view = get_view(self) self.view.targets = list(range(number_of_engines)) wait_for(lambda: len(self.client) >= number_of_engines)
# import IPython.parallel as ipp # Python 2 import ipyparallel as ipp # Python 3 rc = ipp.Client(profile='default', cluster_id='') rc.ids dview = rc[:] dview.block = True dview.apply(lambda : "Hello, World") lview = rc.load_balanced_view() lview.block = True import pandas dat = pandas.read_csv('/global/scratch/paciorek/bayArea.csv', header = None, encoding = 'latin1') dat.columns = ('Year','Month','DayofMonth','DayOfWeek','DepTime','CRSDepTime', 'ArrTime','CRSArrTime','UniqueCarrier','FlightNum','TailNum', 'ActualElapsedTime','CRSElapsedTime','AirTime','ArrDelay','DepDelay', 'Origin','Dest','Distance','TaxiIn','TaxiOut','Cancelled','CancellationCode', 'Diverted','CarrierDelay','WeatherDelay','NASDelay','SecurityDelay','LateAircraftDelay') dview.execute('import statsmodels.api as sm') dat2 = dat.loc[:, ('DepDelay','Year','Dest','Origin')] dests = dat2.Dest.unique() mydict = dict(dat2 = dat2, dests = dests) dview.push(mydict) def f(id): sub = dat2.loc[dat2.Dest == dests[id],:] sub = sm.add_constant(sub)
def run(func_parallel_loop, func_gen_args, func_init=None, base_dir=None, results_dir=None, description=None): ''' Runs the simulation Parameters ---------- func_parallel_loop: function The function that should be parallelized func_gen_args: function The function that will generate all the different inputs for func_parallel_loop func_init: function, optional A function that will be run before the simulation starts. This might generate some data or import some files for example base_dir: str, optional The location of the base directory for the simulation results_dir: str, optional The name of the directory where to save results description: str, optional A short description of the simulation for the help function ''' import os, json if description is None: description = 'Generic simulation script' if base_dir is None: base_dir = './' base_dir = os.path.abspath(base_dir) if results_dir is None: results_dir = os.path.join(base_dir, 'data/') elif not os.path.isabs(results_dir): results_dir = os.path.join(base_dir, results_dir) # create results directory if it doesn't exist if not os.path.exists(results_dir): os.mkdir(results_dir) parser = argparse.ArgumentParser(description=description) parser.add_argument('-d', '--dir', type=str, help='directory to store sim results') parser.add_argument('-p', '--profile', type=str, help='ipython profile of cluster') parser.add_argument('-t', '--test', action='store_true', help='test mode, runs a single loop of the simulation') parser.add_argument('-s', '--serial', action='store_true', help='run in a serial loop, ipyparallel not called') parser.add_argument( '--dummy', action='store_true', help= 'tags the directory as dummy, can be used for running small batches') parser.add_argument('parameters', type=str, help='JSON file containing simulation parameters') cli_args = parser.parse_args() ipcluster_profile = cli_args.profile test_flag = cli_args.test serial_flag = cli_args.serial dummy_flag = cli_args.dummy data_dir_name = None parameter_file = cli_args.parameters # Check the state of the github repository if dummy_flag: tag = 'dummy' else: # Not a dummy run, try to get the git hash try: tag = get_git_hash(base_dir, length=10) except DirtyGitRepositoryError: if test_flag: import warnings warnings.warn( 'The git repo has uncommited modifications. Going ahead for test.' ) tag = 'test' else: raise ValueError( 'The git repo has uncommited modifications. Aborting simulation.' ) except InvalidGitRepositoryError: tag = '' # get all the parameters with open(parameter_file, 'r') as f: parameters = json.load(f) # if no name is given, use the parameters file name if 'name' not in parameters: name = os.path.splitext(os.path.basename(parameter_file))[0] parameters['name'] = name else: name = parameters['name'] # record date and time date = time.strftime("%Y%m%d-%H%M%S") # for convenient access to parameters: p = collections.namedtuple('Struct', parameters.keys())(*parameters.values()) # Save the result to a directory if data_dir_name is None: ttag = '_' + tag if tag != '' else tag data_dir = os.path.join( results_dir, data_dir_format.format(date=date, name=name, tag=ttag)) else: data_dir = data_dir_name data_file_name = os.path.join(data_dir, data_file) # create directory if it doesn't exist if not os.path.exists(data_dir): os.mkdir(data_dir) # add a few practical things to the parameters parameters['_git_sha'] = tag parameters['_date'] = date parameters['_base_dir'] = base_dir parameters['_results_dir'] = data_dir parameters['_parallel'] = not serial_flag # Save the arguments in a json file param_file_name = os.path.join(data_dir, param_file) with open(param_file_name, "w") as f: json.dump(parameters, f, indent=2) f.close() # run the user provided init method if func_init is not None: func_init(parameters) # generate all the arguments to simulate arguments = func_gen_args(parameters) # Save the arguments in a json file args_file_name = os.path.join(data_dir, args_file) with open(args_file_name, "w") as f: json.dump(arguments, f, indent=0) f.close() # There is the option to only run one loop for test if test_flag: print('Running one test loop only.') arguments = arguments[:2] # Prepare a few things for the status line n_tasks = len(arguments) digits = int(math.log10(n_tasks) + 1) dformat = '{:' + str(digits) + 'd}' status_line = (' ' + dformat + '/' + dformat + (' tasks done. ' 'Forecast end {:>20s}. ' 'Ellapsed: {:>8s} Remaining: {:>8s}')) print('/!\\ the time estimate will only be correct ' 'when all tasks take about the same time to finish /!\\') forecast = 'NA' time_remaining = 'NA' # Main processing loop if serial_flag: # add parameters to builtins so that it is accessible in the namespace # of the calling script import builtins builtins.parameters = parameters print('Running everything in a serial loop.') # record start timestamp then = time.time() start_time = datetime.datetime.now() # Serial processing for i, ag in enumerate(arguments): result = func_parallel_loop(ag) # save the new result! json_append(data_file_name, result) # Now format some timing estimation n_remaining = n_tasks - (i + 1) ellapsed = int(time.time() - then) ellapsed_fmt = '{:02}:{:02}:{:02}'.format(ellapsed // 3600, ellapsed % 3600 // 60, ellapsed % 60) # estimate remaining time if ellapsed > 0: rate = (i + 1) / ellapsed # tasks per second delta_finish_min = int(rate * n_remaining / 60) + 1 tdelta = datetime.timedelta(minutes=delta_finish_min) end_date = datetime.datetime.now() + tdelta # convert to strings forecast = end_date.strftime('%Y-%m-%d %H:%M:%S') s = int(tdelta.total_seconds()) time_remaining = '{:02}:{:02}:{:02}'.format( s // 3600, s % 3600 // 60, s % 60) formatted_status_line = status_line.format(i + 1, n_tasks, forecast, ellapsed_fmt, time_remaining) print(formatted_status_line, end='\r') # clean the output print(' ' * len(formatted_status_line)) all_loops = int(time.time() - then) all_loops_format = '{:02}:{:02}:{:02}'.format(all_loops // 3600, all_loops % 3600 // 60, all_loops % 60) print('Total actual processing time: {} ({} s)'.format( all_loops_format, all_loops)) else: # Parallel processing code import ipyparallel as ip print('Using ipyparallel processing.') # Start the parallel processing c = ip.Client(profile=ipcluster_profile) NC = len(c.ids) print(NC, 'workers on the job') # Clear the engines namespace c.clear(block=True) # Push the global config to the workers var_space = dict(parameters=parameters, ) c[:].push(var_space, block=True) # record start timestamp then = time.time() start_time = datetime.datetime.now() # use a load balanced view lbv = c.load_balanced_view() # dispatch to workers ar = lbv.map_async(func_parallel_loop, arguments) # We use a try here so that if something happens, # we can catch it and abort the jobs on all engines try: for i, result in enumerate(ar): # save the new result! json_append(data_file_name, result) # Now format some timing estimation n_remaining = n_tasks - ar.progress ellapsed = int(time.time() - then) ellapsed_fmt = '{:02}:{:02}:{:02}'.format( ellapsed // 3600, ellapsed % 3600 // 60, round(ellapsed % 60)) if ar.progress > NC and n_remaining > NC: # estimate remaining time rate = ellapsed / ar.progress # tasks per second delta_finish_min = int(rate * n_remaining / 60) + 1 tdelta = datetime.timedelta(minutes=delta_finish_min) end_date = datetime.datetime.now() + tdelta # convert to strings forecast = end_date.strftime('%Y-%m-%d %H:%M:%S') s = int(tdelta.total_seconds()) time_remaining = '{:02}:{:02}:{:02}'.format( s // 3600, s % 3600 // 60, s % 60) formatted_status_line = status_line.format( ar.progress, n_tasks, forecast, ellapsed_fmt, time_remaining) print(formatted_status_line, end='\r') # clean the output print(' ' * len(formatted_status_line)) print('Show all output from nodes, if any:') ar.display_outputs() except: # so here, things went south. Show the traceback # and abort all the jobs scheduled import traceback traceback.print_exc() print('Aborting all remaining jobs...') c.abort(block=True) all_loops = int(time.time() - then) all_loops_format = '{:02}:{:02}:{:02}'.format(all_loops // 3600, all_loops % 3600 // 60, all_loops % 60) print('Total actual processing time: {} ({} s)'.format( all_loops_format, all_loops)) print('Saved data to folder: ' + data_dir)
import ipyparallel ipp_client = ipyparallel.Client( url_file="/groups/turaga/home/grisaitisw/.ipython/profile_greentea/security/ipcontroller-client.json", timeout=60 * 60 # 1 hour ) executor = ipp_client.load_balanced_view() executor.set_flags(retries=100000)
def main(): """ main function """ ## turn off traceback for the CLI ip.__interactive__ = 0 ## Check for a new version on anaconda _check_version() ## parse params file input (returns to stdout if --help or --version) args = parse_command_line() ## Turn the debug output written to ipyrad_log.txt up to 11! ## Clean up the old one first, it's cleaner to do this here than ## at the end (exceptions, etc) if os.path.exists(ip.__debugflag__): os.remove(ip.__debugflag__) if args.debug: print("\n ** Enabling debug mode ** ") ip._debug_on() atexit.register(ip._debug_off) ## create new paramsfile if -n if args.new: ## Create a tmp assembly, call write_params to make default params.txt try: tmpassembly = ip.Assembly(args.new, quiet=True, cli=True) tmpassembly.write_params("params-{}.txt".format(args.new), force=args.force) except Exception as inst: print(inst) sys.exit(2) print("\n New file 'params-{}.txt' created in {}\n".\ format(args.new, os.path.realpath(os.path.curdir))) sys.exit(2) ## if params then must provide action argument with it if args.params: if not any([args.branch, args.results, args.steps]): print(""" Must provide action argument along with -p argument for params file. e.g., ipyrad -p params-test.txt -r ## shows results e.g., ipyrad -p params-test.txt -s 12 ## runs steps 1 & 2 e.g., ipyrad -p params-test.txt -b newbranch ## branch this assembly """) sys.exit(2) if not args.params: if any([args.branch, args.results, args.steps]): print(""" Must provide params file for branching, doing steps, or getting results. e.g., ipyrad -p params-test.txt -r ## shows results e.g., ipyrad -p params-test.txt -s 12 ## runs steps 1 & 2 e.g., ipyrad -p params-test.txt -b newbranch ## branch this assembly """) ## if branching, or merging do not allow steps in same command ## print spacer if any([args.branch, args.merge]): args.steps = "" print("") ## always print the header when doing steps header = \ "\n -------------------------------------------------------------"+\ "\n ipyrad [v.{}]".format(ip.__version__)+\ "\n Interactive assembly and analysis of RAD-seq data"+\ "\n -------------------------------------------------------------" ## Log the current version. End run around the LOGGER ## so it'll always print regardless of log level. with open(ip.__debugfile__, 'a') as logfile: logfile.write(header) logfile.write("\n Begin run: {}".format(time.strftime("%Y-%m-%d %H:%M"))) logfile.write("\n Using args {}".format(vars(args))) logfile.write("\n Platform info: {}".format(os.uname())) ## if merging just do the merge and exit if args.merge: print(header) merge_assemblies(args) sys.exit(1) ## create new Assembly or load existing Assembly, quit if args.results elif args.params: parsedict = parse_params(args) if args.branch: branch_assembly(args, parsedict) elif args.steps: ## print header print(header) ## Only blank the log file if we're actually going to run a new ## assembly. This used to be in __init__, but had the side effect ## of occasionally blanking the log file in an undesirable fashion ## for instance if you run a long assembly and it crashes and ## then you run `-r` and it blanks the log, it's crazymaking. if os.path.exists(ip.__debugfile__): if os.path.getsize(ip.__debugfile__) > 50000000: with open(ip.__debugfile__, 'w') as clear: clear.write("file reset") ## run Assembly steps ## launch or load assembly with custom profile/pid data = getassembly(args, parsedict) ## set CLI ipcluster terms data._ipcluster["threads"] = args.threads ## if ipyclient is running (and matched profile) then use that one if args.ipcluster: ipyclient = ipp.Client(profile=args.ipcluster) data._ipcluster["cores"] = len(ipyclient) ## if not then we need to register and launch an ipcluster instance else: ## set CLI ipcluster terms ipyclient = None data._ipcluster["cores"] = args.cores if args.cores else detect_cpus() data._ipcluster["engines"] = "Local" if args.MPI: data._ipcluster["engines"] = "MPI" if not args.cores: raise IPyradWarningExit("must provide -c argument with --MPI") ## register to have a cluster-id with "ip- name" data = register_ipcluster(data) ## set to print headers data._headers = 1 ## run assembly steps steps = list(args.steps) data.run( steps=steps, force=args.force, preview=args.preview, show_cluster=1, ipyclient=ipyclient) if args.results: showstats(parsedict)
def __init__(self, profile=None, cluster_id=None): self.client = ipp.Client(profile=profile, cluster_id=cluster_id) self.statusDict = {} self.sleepSeconds = SLEEP_SECONDS self.keyField = 'key'
def calculate_background( filename, output_dir, *, check_validity_channel=False, th_factor=3.0, above_threshold_pixel_ratio_max=0.05, below_threshold_pixel_ratio_max=0.05, valid_ratio_threshold=0.4, intensity_bin_size=25, thumbnail_size=20, quantile=0.001, ipcluster_nproc=1, ): params_dict = locals() cli = ipp.Client(profile="default") dview = cli[:] dview.clear() bview = cli.load_balanced_view() dview.execute(""" import javabridge import bioformats as bf import pycziutils javabridge.start_vm(class_path=bf.JARS) """) os.makedirs(output_dir, exist_ok=True) log_dir = path.join(output_dir, "calcluate_background_log") os.makedirs(log_dir, exist_ok=True) def savefig(fig, name): fig.savefig(path.join(log_dir, name), bbox_inches="tight") ############## Load files ################ meta = pycziutils.get_tiled_omexml_metadata(filename) with open(path.join(output_dir, "metadata.xml"), "w") as f: f.write(meta) reader = pycziutils.get_tiled_reader(filename) sizeS, sizeT, sizeC, sizeX, sizeY, sizeZ = pycziutils.summarize_image_size( reader) pixel_sizes = pycziutils.parse_pixel_size(meta) assert pixel_sizes[1] == "µm" channels = pycziutils.parse_channels(meta) channel_names = [c["@Fluor"] for c in channels] print(channel_names) params_dict.update({ "channel_names": channel_names, }) if check_validity_channel: check_validity_channel_index = [ j for j, c in enumerate(channels) if check_validity_channel in c["@Fluor"] ][0] planes_df = pycziutils.parse_planes(meta) null_indices = planes_df.isnull().any(axis=1) params_dict["null_indices"] = list(planes_df[null_indices].index) planes_df = planes_df.loc[~null_indices, :] planes_df["S_index"] = planes_df["image"] if check_validity_channel: ############## Summarize image intensities ################ send_variable(dview, "filename", path.abspath(filename)) send_variable(dview, "read_image", read_image) send_variable(dview, "summarize_image", summarize_image) dview.execute("_reader = pycziutils.get_tiled_reader(filename)") check_ipcluster_variable_defined(dview, "_reader", timeout=120) sleep(10) check_ipcluster_variable_defined(dview, "read_image", timeout=120) check_ipcluster_variable_defined(dview, "summarize_image", timeout=120) @ipp.require(summarize_image) def _summarize_image(row): return summarize_image(row, _reader, thumbnail_size, quantile) # pylint: disable=undefined-variable res = bview.map_async(_summarize_image, [row for _, row in list(planes_df.iterrows())]) res.wait_interactive() keys = ["thumbnail", "max", "min", "mean", "median", "stdev"] for i, k in enumerate(keys): planes_df[k] = [r[i] for r in res.get()] display(planes_df) ############## Calculate most frequent "standard" mean and stdev for a image ############## mean_mode = {} stdev_mode = {} for iC, grp in planes_df.groupby("C_index"): fig, ax = plt.subplots(1, 1, figsize=(10, 10)) c_name = channel_names[iC] h, *edges, im = ax.hist2d(grp["mean"], grp["stdev"], bins=intensity_bin_size) mean_mode[iC], stdev_mode[iC] = [ float((edge[x[0]] + edge[x[0] + 1]) / 2.0) for edge, x in zip(edges, np.where(h == np.max(h))) ] ax.plot(mean_mode[iC], stdev_mode[iC], "ro") ax.set_xlabel("mean intensity") ax.set_ylabel("stdev intensity") ax.set_title(c_name) savefig(fig, f"1_mean_and_stdev_instensities_{iC}_{c_name}.pdf") m, s = ( mean_mode[check_validity_channel_index], stdev_mode[check_validity_channel_index], ) th_low = m - th_factor * s th_high = m + th_factor * s params_dict.update({ "mean_mode": mean_mode, "stdev_mode": stdev_mode, "ph_th_low": float(th_low), "ph_th_high": float(th_high), }) ph_planes_df = planes_df[planes_df["C_index"] == check_validity_channel_index].copy() thumbail_output_name = "2_thresholded_thumbnail" thumbail_output_path = path.join(log_dir, thumbail_output_name) os.makedirs(thumbail_output_path, exist_ok=True) for iS, grp in ph_planes_df.groupby("S_index"): fig, axes = plt.subplots(1, 2, figsize=(10, 5)) img_mean = grp["thumbnail"].iloc[0] axes[0].imshow(img_mean, vmin=th_low, vmax=th_high) axes[1].hist(img_mean.flatten(), bins=20, range=(0, 8000)) axes[1].set_xlabel("intensity") axes[1].set_ylabel("freq") fig.suptitle("series " + str(iS) + " below th count: " + str(np.sum(img_mean < m - th_factor * s)) + " above th count: " + str(np.sum(img_mean > m + th_factor * s))) savefig( fig, path.join(thumbail_output_name, f"2_thresholded_thumbnails_{iS}.pdf"), ) plt.close("all") sigma = 20 / float(pixel_sizes[0]) params_dict.update({"sigma": sigma}) send_variable(dview, "threshold_image", threshold_image) res = bview.map_async( lambda row: threshold_image(row, _reader, sigma, th_low, th_high), # pylint: disable=undefined-variable [row for _, row in list(ph_planes_df.iterrows())], ) res.wait_interactive() print("ok") ph_planes_df["below_th_count"] = [r[0] for r in res.get()] ph_planes_df["above_th_count"] = [r[1] for r in res.get()] ph_planes_df[ "below_th_ratio"] = ph_planes_df["below_th_count"] / sizeX / sizeY ph_planes_df[ "above_th_ratio"] = ph_planes_df["above_th_count"] / sizeX / sizeY print("ok") ph_planes_df.drop("thumbnail", axis=1).to_csv(path.join(log_dir, "ph_planes_df.csv")) ############## judge if the position is valid to calculate background ############## fig, ax = plt.subplots(1, 1, figsize=(5, 5)) ph_planes_df["is_valid"] = ( ph_planes_df["below_th_ratio"] < below_threshold_pixel_ratio_max ) & (ph_planes_df["above_th_ratio"] < above_threshold_pixel_ratio_max) ax.scatter( ph_planes_df["below_th_ratio"], ph_planes_df["above_th_ratio"], c=ph_planes_df["is_valid"], s=1, marker="o", cmap=plt.get_cmap("viridis"), alpha=0.3, ) ax.set_xlabel("below threshold ratios") ax.set_ylabel("above threshold ratios") fig.tight_layout() savefig(fig, f"4_threshold_results.pdf") series_df = pd.DataFrame() for Si, grp in ph_planes_df.groupby("S_index"): X = grp["X"].iloc[0] assert np.all(X == grp["X"]) Y = grp["Y"].iloc[0] assert np.all(Y == grp["Y"]) series_df = series_df.append( pd.DataFrame( { "thumbnail": [np.mean(grp["thumbnail"], axis=0)], "is_valid_ratio": grp["is_valid"].sum() / len(grp), "X": X, "Y": Y, }, index=[Si], )) fig, axes = plt.subplots(1, 2, figsize=(10, 5)) im = axes[0].scatter(series_df["X"], series_df["Y"], c=series_df["is_valid_ratio"]) axes[0].set_title("valid_ratio") fig.colorbar(im, ax=axes[0]) axes[1].scatter( series_df["X"], series_df["Y"], c=series_df["is_valid_ratio"] > valid_ratio_threshold, ) axes[1].set_title("thresholded") fig.tight_layout() savefig(fig, f"5_valid_positions.pdf") series_df[ "is_valid"] = series_df["is_valid_ratio"] > valid_ratio_threshold series_df.drop("thumbnail", axis=1).to_csv(path.join(log_dir, "series_df.csv")) valid_series = series_df[series_df["is_valid"]].index planes_df["is_valid"] = planes_df["S_index"].isin(valid_series) else: planes_df["is_valid"] = True valid_planes_df = planes_df[planes_df["is_valid"]] print("valid_positions:", len(valid_planes_df)) planes_df.drop("thumbnail", axis=1, errors="ignore").to_csv( path.join(output_dir, "planes_df.csv")) ############## calclulate backgrounds ############## # t.c.z.y.x median_images = np.empty((sizeT, sizeC, sizeZ, sizeY, sizeX)) mean_images = np.empty((sizeT, sizeC, sizeZ, sizeY, sizeX)) median_images[...] = np.nan mean_images[...] = np.nan print(sizeT) # assert np.array_equal(valid_planes_df["T_index"].unique(),np.arange(sizeT)) # assert np.array_equal(valid_planes_df["C_index"].unique(),np.arange(sizeC)) # assert np.array_equal(valid_planes_df["Z_index"].unique(),np.arange(sizeZ)) for (iC, iT, iZ), grp in tqdm( valid_planes_df.groupby(["C_index", "T_index", "Z_index"])): imgs = [] for i, row in grp.iterrows(): imgs.append(read_image(row, reader)) imgs = np.array(imgs) lq = np.quantile(imgs, quantile, axis=0) hq = np.quantile(imgs, 1.0 - quantile, axis=0) mask = np.logical_or(imgs < lq, imgs > hq) imgs_trunc = ma.array(imgs, mask=mask) median_images[iT, iC, iZ, ...] = np.median(imgs, axis=0) mean_images[iT, iC, iZ, ...] = imgs_trunc.mean(axis=0) print("saving background...") with h5py.File(path.join(output_dir, "background_per_tile.hdf5"), "w") as h5f: h5f.create_dataset("median_images", data=median_images) h5f.create_dataset("mean_images", data=mean_images) # h5f.attrs["channels"]=channels h5f.attrs["dimension_order"] = "tczyx" print("saved background") ############## check correlation of backgrounds ############## for iC, iZ in itertools.product(range(sizeC), range(sizeZ)): c_name = channel_names[iC] for img_key, img in zip(["median", "mean"], [median_images, mean_images]): fig, axes = plt.subplots(1, 6, figsize=(18, 3)) ps = [] j = sizeT // 2 ims = [img[i, iC, iZ] for i in (0, j, -1)] ps.append(axes[0].imshow(ims[0])) ps.append(axes[1].imshow(ims[1])) ps.append(axes[2].imshow(ims[2])) ps.append(axes[3].imshow(ims[1] - ims[0])) ps.append(axes[4].imshow(ims[1] - ims[2])) for p, ax in zip(ps, axes): fig.colorbar(p, ax=ax) axes[5].plot(ims[0].flatten(), ims[-1].flatten(), ".") axes[0].set_title("at time 0") axes[1].set_title(f"at time {j}") axes[2].set_title(f"at time {iT-1}") axes[3].set_title(f"diff at time {j} and 0") axes[4].set_title(f"diff at time {j} and {iT-1}") fig.tight_layout() savefig( fig, f"6_background_correlation_C{iC}_{c_name}_Z{iZ}_{img_key}.png") plt.close("all") ############## summarize and save backgrounds ############## background_directory = path.join(output_dir, "averaged_background") os.makedirs(background_directory, exist_ok=True) for iC, iZ in itertools.product(range(sizeC), range(sizeZ)): c_name = channel_names[iC] for img_key, img in zip(["median", "mean"], [median_images, mean_images]): filename = f"{img_key}_C{iC}_{c_name}_Z{iZ}" averaged_img = np.nanmean(img[:, iC, iZ], axis=0) fig, ax = plt.subplots(1, 1, figsize=(5, 5)) p = ax.imshow(averaged_img) fig.colorbar(p, ax=ax) savefig(fig, f"7_time_averaged_background_{filename}.pdf") plt.close("all") io.imsave( path.join(background_directory, filename + ".tiff"), averaged_img, check_contrast=False, ) params_path = path.join(background_directory, "calculate_background_params.yaml") with open(params_path, "w") as f: yaml.dump(params_dict, f) image_props = { "channel_names": channel_names, "pixel_sizes": pixel_sizes, "sizeS": sizeS, "sizeT": sizeT, "sizeC": sizeC, "sizeZ": sizeZ, "sizeY": sizeY, "sizeX": sizeX, } image_props_path = path.join(output_dir, "image_props.yaml") with open(image_props_path, "w") as f: yaml.dump(image_props, f)
#########################################################################3 # Example: Initializing IPyParallel # # This example demonstrates how to access the individual # ipython engines running within the cluster. # import ipyparallel import os import socket #Create a client instance, used to connect the controller to the remote engines rc = ipyparallel.Client(profile='crestone-cpu') nengines = len(rc) #create direct views into each engine all_proc = rc[:] # all_proc is a list of ipython DirectView objects #Only the controller prints this print('\n ', nengines, " Python engines are active.\n") # Each Python engine calls the gethostname and getpid functions hostnames = all_proc.apply_sync(socket.gethostname) pids = all_proc.apply_sync(os.getpid) for i in range(nengines): istr = '{:02d}'.format(i) # returns a 2-digit string whose value is i pstr = str(pids[i]) hstr = str(hostnames[i]) msg = 'Engine ' + istr + ': pid = ' + pstr + '; hostname =' + hstr print(msg)
grid = ns.grid partition = ns.partition Lx = ns.Lx Ly = ns.Ly c = ns.c tstop = ns.tstop if ns.save: user_action = wave_saver else: user_action = None num_cells = 1.0 * (grid[0] - 1) * (grid[1] - 1) final_test = True # create the Client rc = ipp.Client(profile=ns.profile) num_procs = len(rc.ids) if partition is None: partition = [1, num_procs] assert (partition[0] * partition[1] == num_procs ), "can't map partition %s to %i engines" % (partition, num_procs) view = rc[:] print("Running %s system on %s processes until %f" % (grid, partition, tstop)) # functions defining initial/boundary/source conditions def I(x, y): from numpy import exp
def svd4tet(data, nboots=100, method="all", nquarts=None, force=False): """ API wrapper for svd4tet analysis data ipyrad Assembly object nboots number of non-parametric bootstrap replicates to run method all, random, or equal. Default is all, which samples all possible quartets. For very large trees (>50 tips) this may take too long, in which case you should use random or equal. The arg nquarts determines how many quartets will be samples. In random, nquarts are sampled and used. In equal, a starting tree is inferred and the random quartets are drawn so that they are spread ~equally across splits of the tree. nquarts The numer of random quartets sampled in random or equal method. Default is 10000, or all if all < 10000. force Overwrite existing """ ## check that method was entered correctly assert method in ["all", "random", "equal"], \ "method type not recognized, must be one of ['all', 'random', 'equal']" if method != "all": ## require nquarts if method not all assert nquarts, "if method != all, must enter a value for nquarts" ## don't allow nquarts to be greater than all totalquarts = n_choose_k(len(data.samples), 4) if nquarts > totalquarts: print(" nquarts > total quartets, switching to method='all'") method = "all" if nquarts < 500: print(" few possible quartets, only method='all' available") method = "all" ## launch ipclient, assumes ipyparallel is running ipyclient = ipp.Client(timeout=10) ## protects it from KBD try: run(data, nboots, method, nquarts, force, ipyclient) except (KeyboardInterrupt, SystemExit): ## protect from KBD while saving try: ## cancel submitted jobs #ipyclient.abort() ## kill running jobs #ipyclient.close() ## remove any abandoned tmp arrays abandon = glob.glob(os.path.join(data.dirs.svd, "*_tmp_*.h5")) for afile in abandon: os.remove(afile) except KeyboardInterrupt: pass finally: ## checkpoint the state and save LOGGER.info("\n saving checkpoints to [Assembly].svd") LOGGER.info(" array checkpoint: %s", data.svd.checkpoint_arr) LOGGER.info(" boot checkpoint: %s", data.svd.checkpoint_boot) data.save()
def wait_for_connection(self): """ Creates a client to view ipcluster engines for a given profile and returns it with at least one engine spun up and ready to go. If no engines are found after nwait amount of time then an error is raised. If engines==MPI it waits a bit longer to find engines. If the number of engines is set then it waits even longer to try to find that number of engines. """ # save stds for later, hide here to prevent ipp enforced print() save_stdout = sys.stdout save_stderr = sys.stderr sys.stdout = StringIO() sys.stderr = StringIO() # wrapped search for ipcluster try: args = { "profile": self.tool.ipcluster["profile"], "timeout": self.tool.ipcluster["timeout"], "cluster_id": self.tool.ipcluster["cluster_id"], } ipyclient = ipp.Client(**args) # restore std printing now that Client print statement has passed # sys.stdout = save_stdout # sys.stderr = save_stderr # allow time to find the connection; count cores to break for _ in range(6000): # how many cores can we find right now? ncores = len(ipyclient) self.update_message( "Establishing parallel connection: {} cores" .format(ncores)) time.sleep(0.01) # If we know ncores, then wait for all print(self.tool.ipcluster["cores"]) if self.tool.ipcluster["cores"]: time.sleep(0.1) if ncores == self.tool.ipcluster["cores"]: break # Looking for all available cores, auto stop else: # If MPI and not all found break if no more in 3 secs if self.tool.ipcluster["engines"] == "MPI": # are any cores found yet? do long wait. if ncores: time.sleep(3) if len(ipyclient) == ncores: break # if Local then wait 1 second between checks else: if ncores: time.sleep(1.) if len(ipyclient) == ncores: break except KeyboardInterrupt as inst: raise inst except (IOError, OSError, ipp.TimeoutError, ipp.NoEnginesRegistered): raise IPyradError( "\nipcluster not found, use 'auto=True' or see docs.") finally: # no matter what we reset the stds sys.stdout = save_stdout sys.stderr = save_stderr # self.update_message( # "Parallel connection: {}".format(len(ipyclient))) return ipyclient
agent.salience = random.random() new_model.step() return new_model # Load data book_data = pd.read_csv("BDM_ColdWar.csv") book_data.Position = (book_data.Position + 100) / 200 agents = [] for i, row in book_data.iterrows(): new_agent = BDMActor(row.Country, row.Capability, row.Position, 1) new_agent.decision_model.Q = 0.5 new_agent.decision_model.T = 0.5 agents.append(new_agent) model = NegotiationModel_(agents) clients = ipyparallel.Client() print(clients.ids) dview = clients[:] with dview.sync_imports(): import copy import random all_models = dview.map_sync(run_model, [model] * 10, [25] * 10) all_model_out = [Model_Output(m) for m in all_models] with open("ColdWar_Experiment2_1.pickle", "wb") as f: pickle.dump(all_model_out, f) print("Done!")
def parallel_serv_start(): import subprocess subprocess.Popen(['ipcluster', 'start']) import ipyparallel as ipp rc = ipp.Client()
''' TODO!!! Could we use doxygen or similar to convert docstrings to .org and thus avoid the need to manually syncronise the code and the slides? ''' '''This simply fills the non-ghosted part of the lattice with squares of consequtive integers, stsrting from rank number, does the ghost exchange, computes gradients (2nd order central differences) in the non-ghosted area, calculates maxima of local gradients, and Allreduces the global maximum. Along the way, it prints some diagnostics about the lattice and eventually of the gradients. ''' try: import ipyparallel c = ipyparallel.Client(profile="mpi") directview = c[:] directview.block = True except IOError: try: del ipyparallel except: pass import mpi4py from mpi4py import MPI except ImportError: try: del ipyparallel except: pass import mpi4py
# ======== Header for ipyparallel kernels ======== import os, sys, types import ipyparallel as ipp # -------- Parallel kernels -------- print("Initializing cluster ...") # variables global kernels, cluster, nKernels kernels = ipp.Client() print(" Client variable \'kernels\'") cluster = kernels[:] print(" Cluster Direct View variable \'cluster\'") nKernels = len(kernels.ids) print(" Variable \'nKernels\' =", nKernels) # change cluster current working directory def f(cwd): os.chdir(cwd) print(os.getcwd) return cwd = os.getcwd() cwdList = [] for i in range(nKernels): cwdList.append(cwd) with cluster.sync_imports(): import os
dataHDDM["stim"] = dataHDDM.apply(lambda row: 1 if row['stim'] == 'Right' else 0, axis=1) dataHDDM["response"] = dataHDDM.apply(lambda row: 1 if row['givenResp'] == 'Right' else 0, axis=1) def v_link_func(x, data=dataHDDM): stim = (np.asarray(dmatrix('0 + C(s, [[1], [-1]])', {'s': data.stim.ix[x.index]}))) return x*stim if id < 4: ############## M1 LM = [{'model':'t ~ SAT + FC + contrast + SAT:FC + SAT:contrast + FC:contrast + SAT:FC:contrast', 'link_func': lambda x: x} , {'model':'v ~ contrast', 'link_func':v_link_func} , {'model':'a ~ FC + SAT + SAT:FC', 'link_func': lambda x: x} ] deps = {'sz' : 'SAT'} inc = ['sv','sz','st','z'] model_name = "Joint_t0" else : return np.nan() name = 'light_reg_PMT_%s' %str(id) m = hddm.HDDMRegressor(dataHDDM, LM , depends_on = deps, include=inc, group_only_nodes=['sv', 'sz','st', "sz_SAT"], group_only_regressors=False, keep_regressor_trace=True) m.find_starting_values() m.sample(iter=10000, burn=8500, thin=1, dbname='DDM/traces/db_%s'%name, db='pickle') m.save('DDM/Fits/%s'%name) return m v = ipyparallel.Client(profile="reg_PMT")[:]#sept jobs = v.map(run_model, range(4 * 1))#4 chains for each model wait_watching_stdout(jobs) models = jobs.get()
iter_list = create_list() scores = list(dview.map(parallel_method, iter_list).get()) #score_parameter_pairs = zip(scores,iter_list) #print(iter_list) from neuronunit import tests #from deap import hypervolume #test_0_run_exhaust() os.system('ipcluster start -n 8 --profile=default & sleep 5;') import ipyparallel as ipp rc = ipp.Client(profile='default') rc[:].use_cloudpickle() dview = rc[:] class ReducedModelTestCase(unittest.TestCase): """Test instantiation of the reduced model""" """Testing model optimization""" def setUp(self): #import sys #sys.path.append('../') #import neuronunit from neuronunit.models.reduced import ReducedModel #self.ReducedModel = ReducedModel #path = ReducedModelTestCase().path
def run_cnmfe(tiff_files, param_file, output_file): """ Run the CNMFe algorithm through CaImAn. :param tiff_files: A list of .tiff files corresponding to a calcium imaging movie. :param param_file: A .yaml parameter file, containing values for the following parameters: num_processes : int The number of processes to run in parallel. The more parallel processes, the more memory that is used. rf : array-like An array [half-width, half-height] that specifies the size of a patch. stride : int The amount of overlap in pixels between patches. K : int The maximum number of cells per patch. gSiz : int The expected diameter of a neuron in pixels. gSig : int The standard deviation a high pass Gaussian filter applied to the movie prior to seed pixel search, roughly equal to the half-size of the neuron in pixels. min_pnr : float The minimum peak-to-noise ratio that is taken into account when searching for seed pixels. min_corr : float The minimum pixel correlation that is taken into account when searching for seed pixels. min_SNR : float Cells with an signal-to-noise (SNR) less than this are rejected. rval_thr : float Cells with a spatial correlation of greater than this are accepted. decay_time : float The expected decay time of a calcium event in seconds. ssub_B : int The spatial downsampling factor used on the background term. merge_threshold : float Cells that are spatially close with a temporal correlation of greater than merge_threshold are automatically merged. :param output_file: The path to a .hdf5 file that will be written to contain the traces, footprints, and deconvolved events identified by CNMFe. """ for tiff_file in tiff_files: if not os.path.exists(tiff_file): raise FileNotFoundError(tiff_file) if not os.path.exists(param_file): raise FileNotFoundError(param_file) with open(param_file, 'r') as f: params = yaml.load(f) expected_params = [ 'gSiz', 'gSig', 'K', 'min_corr', 'min_pnr', 'rf', 'stride', 'decay_time', 'min_SNR', 'rval_thr', 'merge_threshold', 'ssub_B', 'frame_rate', 'num_rows', 'num_cols', 'num_frames', 'num_processes' ] for pname in expected_params: if pname not in params: raise ValueError('Missing parameter {} in file {}'.format( pname, param_file)) gSiz = params['gSiz'] gSig = params['gSig'] K = params['K'] min_corr = params['min_corr'] min_pnr = params['min_pnr'] rf = params['rf'] stride = params['stride'] decay_time = params['decay_time'] min_SNR = params['min_SNR'] rval_thr = params['rval_thr'] merge_threshold = params['merge_threshold'] ssub_B = params['ssub_B'] frame_rate = params['frame_rate'] num_rows = params['num_rows'] num_cols = params['num_cols'] num_frames = params['num_frames'] num_processes = params['num_processes'] # write memmapped file print('Exporting .isxd to memmap file...') mmap_file = _export_movie_to_memmap(tiff_files, num_frames, num_rows, num_cols, overwrite=False) print('Wrote .mmap file to: {}'.format(mmap_file)) # open memmapped file Yr, dims, T = load_memmap(mmap_file) Y = Yr.T.reshape((T, ) + dims, order='F') # grab parallel IPython handle dview = None if num_processes > 1: import ipyparallel as ipp c = ipp.Client() dview = c[:] print('Running using parallel IPython, # clusters = {}'.format( len(c.ids))) num_processes = len(c.ids) # initialize CNMFE parameter object and set user params cnmfe_params = CNMFParams() if gSiz is None: raise ValueError( 'You must set gSiz to an integer, ideally roughly equal to the expected half-cell width.' ) gSiz = _turn_into_array(gSiz) if gSig is None: raise ValueError( 'You must set gSig to a non-zero integer. The default value is 5.') gSig = _turn_into_array(gSig) cnmfe_params.set('preprocess', {'p': 1}) cnmfe_params.set( 'init', { 'K': K, 'min_corr': min_corr, 'min_pnr': min_pnr, 'gSiz': gSiz, 'gSig': gSig }) if rf is None: cnmfe_params.set('patch', {'rf': None, 'stride': 1}) else: cnmfe_params.set('patch', {'rf': np.array(rf), 'stride': stride}) cnmfe_params.set('data', {'decay_time': decay_time}) cnmfe_params.set('quality', {'min_SNR': min_SNR, 'rval_thr': rval_thr}) cnmfe_params.set('merging', {'merge_thr': merge_threshold}) # set parameters that force CNMF into one-photon mode with no temporal or spatial downsampling, # except for the background term cnmfe_params.set( 'init', { 'center_psf': True, 'method_init': 'corr_pnr', 'normalize_init': False, 'nb': -1, 'ssub_B': ssub_B, 'tsub': 1, 'ssub': 1 }) cnmfe_params.set( 'patch', { 'only_init': True, 'low_rank_background': None, 'nb_patch': -1, 'p_tsub': 1, 'p_ssub': 1 }) cnmfe_params.set('spatial', { 'nb': -1, 'update_background_components': False }) cnmfe_params.set('temporal', {'nb': -1, 'p': 1}) # construct and run CNMFE print('Running CNMFe...') cnmfe = CNMF(num_processes, dview=dview, params=cnmfe_params) cnmfe.fit(Y) # run auto accept/reject print('Estimating component quality...') idx_components, idx_components_bad, comp_SNR, r_values, pred_CNN = estimate_components_quality_auto( Y, cnmfe.estimates.A, cnmfe.estimates.C, cnmfe.estimates.b, cnmfe.estimates.f, cnmfe.estimates.YrA, frame_rate, cnmfe_params.get('data', 'decay_time'), cnmfe_params.get('init', 'gSiz'), cnmfe.dims, dview=None, min_SNR=cnmfe_params.get('quality', 'min_SNR'), use_cnn=False) save_cnmfe(cnmfe, output_file, good_idx=idx_components)
def optimize( self, method, quantiles=(0.1, 0.3, 0.5, 0.7, 0.9), n_runs=3, n_bootstraps=0, parallel_profile=None, ): """ Optimize model using ML, chi^2 or G^2. :Input: method : str Optimization method ('ML', 'chisquare' or 'gsquare'). quantiles : tuple A sequence of quantiles to be used for chi^2 and G^2. Default values are the ones used by Ratcliff (.1, .3, .5, .7, .9). n_runs : int <default=3> Number of attempts to optimize. n_bootstraps : int <default=0> Number of bootstrap iterations. parrall_profile : str <default=None> IPython profile for parallelization. :Output: results <dict> - a results dictionary of the parameters values. :Note: The values of the nodes in single subject model is updated according to the results. The nodes of group models are not updated """ results = self._run_optimization(method=method, quantiles=quantiles, n_runs=n_runs) # bootstrap if requested if n_bootstraps == 0: return results # init DataFrame to save results res = pd.DataFrame(np.zeros((n_bootstraps, len(self.values))), columns=list(self.values.keys())) # prepare view for parallelization if parallel_profile is not None: # create view client = ipyparallel.Client(profile=parallel_profile) view = client.load_balanced_view() runs_list = [None] * n_bootstraps else: view = None # define single iteration bootstrap function def single_bootstrap( data, accumulator_class=self.__class__, class_kwargs=self._kwargs, method=method, quantiles=quantiles, n_runs=n_runs, ): # resample data new_data = data.iloc[np.random.randint(0, len(data), len(data))] new_data = new_data.set_index(pd.Index(list(range(len(data))))) h = accumulator_class(new_data, **class_kwargs) # run optimization h._run_optimization(method=method, quantiles=quantiles, n_runs=n_runs) return pd.Series(h.values, dtype=np.float) # bootstrap iterations for i_strap in range(n_bootstraps): if view is None: res.iloc[i_strap] = single_bootstrap(self.data) else: # append to job queue runs_list[i_strap] = view.apply_async(single_bootstrap, self.data) # get parallel results if view is not None: view.wait(runs_list) for i_strap in range(n_bootstraps): res.iloc[i_strap] = runs_list[i_strap].get() # get statistics stats = res.describe() for q in [2.5, 97.5]: stats = stats.append( pd.DataFrame(res.quantile(q / 100.0), columns=[repr(q) + "%"]).T) self.bootstrap_stats = stats.sort_index() return results
G['industry_imputed'] except: G.vs['industry_imputed'] = [x == 'nan' for x in G.vs['industry']] industry_dist = np.array( [x['industry'] for x in G.vs if not x['industry_imputed']]) imputed_industry = np.random.choice(industry_dist, len(G.vs(industry_imputed_eq=True)), replace=True) for v, s in zip(G.vs(industry_imputed_eq=True), imputed_industry): v['industry'] = s has_ipyparallel = True try: dv = ipyparallel.Client( )[:] # This should be global (or a singleton) to avoid an error with too many files open https://github.com/ipython/ipython/issues/6039 dv.block = False dv.use_dill() except: has_ipyparallel = False print("Loading without ipyparallel support") callbacks = [ some_terminal_suppliers_reachable, percent_terminal_suppliers_reachable, ] def failure_reachability_single(r, G, med_suppliers=False,
import bubbles # Dynesty imports import pickle import dynesty from dynesty import plotting as dyplot from dynesty import DynamicNestedSampler from dynesty import utils as dyfunc from multiprocessing import Pool import ipyparallel as ipp from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor # To use multiprocessing run following command: # > ipcluster start -n 7 & rc = ipp.Client() nprocs = len(rc.ids) print(rc.ids) dview = rc[:] dview.use_dill() # ===================================================================== import argparse # argument managing # ============================================================================== # Managing arguments with argparse (see http://docs.python.org/howto/argparse.html) parser = argparse.ArgumentParser() # ---- required arguments ---- : parser.add_argument("file_name", type=str, help="File name, saved in ../chains/")
# Main processing loop if serial_flag: print 'Running everything in a serial loop.' # Serial processing out = [] for ag in args: out.append(parallel_loop(algo_names, parameters, ag)) else: import ipyparallel as ip print 'Using ipyparallel processing.' # Start the parallel processing c = ip.Client() NC = len(c.ids) print NC, 'workers on the job' # replicate some parameters algo_names_ls = [algo_names] * len(args) params_ls = [parameters] * len(args) # evaluate the runtime then = time.time() out1 = c[:].map_sync(parallel_loop, algo_names_ls[:NC], params_ls[:NC], args[:NC]) now = time.time() one_loop = now - then print 'Total estimated processing time:', len(args) * one_loop / len( c[:])