def CalculateNumberOfActiveThreads(numberOfTasks): if (cpu_count() == 2): return cpu_count() elif numberOfTasks < cpu_count(): return numberOfTasks else: return cpu_count()
def __init__(self, **kwargs): """ Base estimator with the following allowed keyword args memory (bool/str/joblib.Memory): The path or Memory for caching the computational results, default None means no cache. verbose (bool): Whether to show the progress of feature calculations. n_jobs (int): The number of parallel jobs. 0 means no parallel computations. If this value is set to negative or greater than the total cpu then n_jobs is set to the number of cpu on system. Args: **kwargs: keyword args that contain possibly memory (str/joblib.Memory), verbose (bool), n_jobs (int) """ allowed_kwargs = ['memory', 'verbose', 'n_jobs'] for k, v in kwargs.items(): if k not in allowed_kwargs: raise TypeError("%s not allowed as kwargs" % (str(k))) memory = kwargs.get("memory", None) if isinstance(memory, bool): memory = tempfile.mkdtemp() logger.info("Created temporary directory %s" % memory) verbose = kwargs.get("verbose", False) n_jobs = kwargs.get("n_jobs", 0) self.memory = check_memory(memory) self.verbose = verbose # find out the number of parallel jobs if (n_jobs < 0) or (n_jobs > cpu_count()): n_jobs = cpu_count() logger.info(f"Using {n_jobs} jobs for computation") self.n_jobs = n_jobs
def test_effective_n_jobs_with_context(): assert_equal(threaded.effective_n_jobs_with_context(), 1, "Default to 1 job") assert_equal( threaded.effective_n_jobs_with_context(-1), joblib.cpu_count(), "Use all cores with num_jobs=-1", ) assert_equal(threaded.effective_n_jobs_with_context(2), 2, "Use n_jobs if specified") with joblib.parallel_backend("threading"): assert_equal( threaded.effective_n_jobs_with_context(), joblib.cpu_count(), "Use all cores with context manager", ) with joblib.parallel_backend("threading", n_jobs=3): assert_equal( threaded.effective_n_jobs_with_context(), 3, "Use n_jobs from context manager", ) with joblib.parallel_backend("threading", n_jobs=3): assert_equal( threaded.effective_n_jobs_with_context(2), 2, "Use n_jobs specified rather than from context manager", )
def upload_chunks(vol, files, bin_paths, parallel=True): """Push tif images into vols with or without joblib Parallel Arguments: vol {cloudvolume.CloudVolume} -- volume that will contain image data files {list} -- strings of tif image filepaths bin_paths {list} -- binary paths to tif files parallel {bool} -- True to use parallel version, false otherwise """ # all tifs will be this size, should be 528x400x208 for mouselight chunk_size = vol.info["scales"][-1]["size"] num_workers = len(files) if len(files) < cpu_count() else cpu_count() if parallel: print("Doing parallel stuff") for f, bin_path in tqdm( zip(chunks(files, num_workers), chunks(bin_paths, num_workers)), total=len(files) // num_workers, desc="uploading tiffs", ): parallel_upload_chunks(vol, f, bin_path, chunk_size, num_workers) else: print("Not paralleling") for f, bin_path in zip(files, bin_paths): if vol.mip == len(vol.info["scales"]) - 1: img = np.squeeze(tf.imread(f)) vol[:, :, :] = img.T else: ranges = get_data_ranges(bin_path, chunk_size) img = np.squeeze(tf.imread(f)) upload_chunk(vol, ranges, img)
def process_dataset(prefix: str, batch_size=8): features = make_lazy_features( data_path=f'data/{prefix}_set.csv', metadata_path=f'data/{prefix}_set_metadata.csv') pool = jl.Parallel(n_jobs=jl.cpu_count(), backend='multiprocessing') with open(f'data/processed_{prefix}.csv', 'w') as out: current_keys = None is_finished = False while not is_finished: batch = fetch_batch_from_gen(batch_size=batch_size * jl.cpu_count(), g=features) for obj_id, keys, values in pool(batch): if current_keys is None: keys = [ 'object_id', ] + list(keys) line = ';'.join(keys) + '\n' out.write(line) current_keys = keys else: assert tuple( current_keys[1:] ) == keys, f'{tuple(current_keys[1:])[:10]}, {keys[:10]}' values = [ str(obj_id), ] + list(values) line = ';'.join(map(str, values)) + '\n' out.write(line) if len(batch) < batch_size: is_finished = True
def load_data(wavelet, scales, sampling_rate, filename="./dataset/mitdb.pkl"): import pickle from sklearn.preprocessing import RobustScaler with open(filename, "rb") as f: train_data, test_data = pickle.load(f) cpus = 22 if joblib.cpu_count( ) > 22 else joblib.cpu_count() - 1 # for multi-process # for training x1_train, x2_train, y_train, groups_train = [], [], [], [] with ProcessPoolExecutor(max_workers=cpus) as executor: for x1, x2, y, groups in executor.map( partial(worker, wavelet=wavelet, scales=scales, sampling_period=1. / sampling_rate), train_data): x1_train.append(x1) x2_train.append(x2) y_train.append(y) groups_train.append(groups) x1_train = np.expand_dims(np.concatenate(x1_train, axis=0), axis=1).astype(np.float32) x2_train = np.concatenate(x2_train, axis=0).astype(np.float32) y_train = np.concatenate(y_train, axis=0).astype(np.int64) groups_train = np.concatenate(groups_train, axis=0) # for test x1_test, x2_test, y_test, groups_test = [], [], [], [] with ProcessPoolExecutor(max_workers=cpus) as executor: for x1, x2, y, groups in executor.map( partial(worker, wavelet=wavelet, scales=scales, sampling_period=1. / sampling_rate), test_data): x1_test.append(x1) x2_test.append(x2) y_test.append(y) groups_test.append(groups) x1_test = np.expand_dims(np.concatenate(x1_test, axis=0), axis=1).astype(np.float32) x2_test = np.concatenate(x2_test, axis=0).astype(np.float32) y_test = np.concatenate(y_test, axis=0).astype(np.int64) groups_test = np.concatenate(groups_test, axis=0) # normalization scaler = RobustScaler() x2_train = scaler.fit_transform(x2_train) x2_test = scaler.transform(x2_test) return (x1_train, x2_train, y_train, groups_train), (x1_test, x2_test, y_test, groups_test)
def bootstrap(data, n_boot=1000, alpha=0.05, n_cores=1, func=np.sum): if n_cores is None or n_cores > cpu_count(): n_cores = cpu_count() - 1 def sample(X): idx = np.random.choice(X.shape[0], size=X.shape[0], replace=True) return func(X[idx, :], axis=0) results = (Parallel(n_jobs=n_cores, verbose=8)(delayed(sample)(data) for _ in range(n_boot))) means = np.vstack(results) lower = np.percentile(means, 100 * alpha / 2, axis=0) upper = np.percentile(means, 100 * (1 - alpha / 2), axis=0) return np.mean(means, axis=0), lower, upper
def do_the_job(dset, feats, model, calibration=None, lso=True, regression_model=('linreg', LinearRegression), results_dir=op.join(MANYSOURCES_DATA_ROOT, 'results', 'loss_by_cooc'), n_jobs=None, by_source=False): rm_name, rm_factory = regression_model results_dir = op.join(results_dir, 'dset=%s' % dset, 'feats=%s' % feats, 'model=%s' % model, 'calibration=%s' % calibration, 'LSO=%r' % lso, 'reg_model=%s' % rm_name, 'bysource=%r' %by_source) ensure_dir(results_dir) _, molids, _, _ = molecules_coocurrences_df(dset=dset, feats=feats, model=model, lso=lso) if n_jobs is None: n_jobs = cpu_count() Parallel(n_jobs=n_jobs)(delayed(do_for_one_molid)(calibration, dset, feats, lso, model, molid, results_dir, rm_factory, by_source) for molid in sorted(molids))
def fit(self, **kwargs): # Handle the number of jobs and the time for them if self.n_jobs is None or self.n_jobs == 1: self._n_jobs = 1 elif self.n_jobs == -1: self._n_jobs = joblib.cpu_count() else: self._n_jobs = self.n_jobs # Automatically set the cutoff time per task if self.per_run_time_limit is None: self.per_run_time_limit = self._n_jobs * self.time_left_for_this_task // 10 seed = self.seed self.automl_ = self.build_automl( seed=seed, ensemble_size=self.ensemble_size, initial_configurations_via_metalearning=( self.initial_configurations_via_metalearning), tmp_folder=self.tmp_folder, output_folder=self.output_folder, ) self.automl_.fit(load_models=True, **kwargs) return self
def getXY(e, n, yvec, d, t, extent): print "getting point cloud ..." if os.name == 'nt': o = Parallel(n_jobs=cpu_count(), verbose=0)( delayed(xyfunc)(e[k], n[k], yvec, d[k], t[k], extent) for k in xrange(len(n))) #eating, northing, distance to sonar, depth, heading X, Y = zip(*o) else: X = [] Y = [] for k in xrange(len(n)): out1, out2 = xyfunc(e[k], n[k], yvec, d[k], t[k], extent) X.append(out1) Y.append(out2) # merge flatten and stack X = np.asarray(X, 'float').T X = X.flatten() # merge flatten and stack Y = np.asarray(Y, 'float').T Y = Y.flatten() return X, Y
def glms_from_glm(glm_design, Q, n_jobs, return_w, voxels): """ Performs a GLM-separate from a GLM design matrix as input Needs a numpy array (no sparse matrix) as input **Note** output is unnormalized """ n_basis = Q.shape[1] glms_design = classic_to_obo(glm_design, n_basis) if n_jobs == -1: n_jobs = cpu_count() glms_split = np.array_split(glms_design, n_jobs, axis=0) out = Parallel(n_jobs=n_jobs)( delayed(_separate_innerloop)(glms_i, n_basis, voxels) for glms_i in glms_split) betas = [] w = [] for o in out: betas.append(o[0]) w.append(o[1]) full_betas = np.concatenate(betas, axis=1) full_w = np.concatenate(w, axis=1) hrfs = full_betas.T norm = np.sqrt((hrfs * hrfs).sum(-1)) hrfs /= norm[..., None] betas = norm if return_w: hrfs_w = full_w.T.dot(Q.T) norm_w = np.sqrt((hrfs_w * hrfs_w).sum(-1)) hrfs_w = hrfs_w / norm_w[..., None] betas_w = norm_w return hrfs.T, betas.T, betas_w.T return hrfs.T, betas.T
def __init__(self, features=[], n_jobs=1, indexing_type='label', **kwargs): logging.info("comparing - initialize {} class".format( self.__class__.__name__)) self.features = [] self.add(features) # public if n_jobs == -1: self.n_jobs = cpu_count() else: self.n_jobs = n_jobs self.indexing_type = indexing_type # label of position # logging self._i = 1 self._i_max = None self._n = [] self._eta = [] self._output_log_total = True # private self._compare_functions = [] if isinstance(features, (pandas.MultiIndex, pandas.Index)): warnings.warn( "It seems you are using the older version of the Compare API, " "see the documentation about how to update to the new API. " "http://recordlinkage.readthedocs.io/" "en/latest/ref-compare.html", DeprecationWarning)
def _parallel_learning(self, X, Y, w): n_samples = len(X) objective, positive_slacks = 0, 0 verbose = max(0, self.verbose - 3) if self.batch_size is not None: raise ValueError("If n_jobs != 1, batch_size needs to" "be None") # generate batches of size n_jobs # to speed up inference if self.n_jobs == -1: n_jobs = cpu_count() else: n_jobs = self.n_jobs n_batches = int(np.ceil(float(len(X)) / n_jobs)) slices = gen_even_slices(n_samples, n_batches) for batch in slices: X_b = X[batch] Y_b = Y[batch] candidate_constraints = Parallel( n_jobs=self.n_jobs, verbose=verbose)(delayed(find_constraint)(self.model, x, y, w) for x, y in zip(X_b, Y_b)) djoint_feature = np.zeros(self.model.size_joint_feature) for x, y, constraint in zip(X_b, Y_b, candidate_constraints): y_hat, delta_joint_feature, slack, loss = constraint if slack > 0: objective += slack djoint_feature += delta_joint_feature positive_slacks += 1 w = self._solve_subgradient(djoint_feature, n_samples, w) return objective, positive_slacks, w
def simus(nmtx, ncriteria, nweights, rank_by=1, b=None, solver="pulp", njobs=None): # determine the njobs njobs = njobs or joblib.cpu_count() t_nmtx = nmtx.T # check the b array and complete the missing values b = np.asarray(b) if None in b: mins = np.min(t_nmtx, axis=1) maxs = np.max(t_nmtx, axis=1) auto_b = np.where(ncriteria == MAX, maxs, mins) b = np.where(b.astype(bool), b, auto_b) # multiprocessing environment with joblib.Parallel(n_jobs=njobs) as jobs: # create and execute the stages stages, stage_results = solve_stages( t_nmtx=t_nmtx, b=b, ncriteria=ncriteria, solver=solver, jobs=jobs) # first methods points points1 = first_method(stage_results) points2, tita_j_p, tita_j_d, doms, dom_by_crit = second_method( stage_results, jobs) points = [points1, points2][rank_by - 1] ranking = rank.rankdata(points, reverse=True) return ( ranking, stages, stage_results, points1, points2, tita_j_p, tita_j_d, doms, dom_by_crit)
def getXY(e,n,yvec,d,t,extent): print("getting point cloud ...") #o = Parallel(n_jobs = cpu_count(), verbose=0)(delayed(getxy)(e[k], n[k], yvec, d[k], t[k], extent) for k in range(len(n))) o = Parallel(n_jobs = cpu_count(), verbose=0)(delayed(xyfunc)(e[k], n[k], yvec, d[k], t[k], extent) for k in range(len(n))) #eating, northing, distance to sonar, depth, heading X, Y, D, h, t = zip(*o) # merge flatten and stack X = np.asarray(X,'float').T X = X.flatten() # merge flatten and stack Y = np.asarray(Y,'float').T Y = Y.flatten() # merge flatten and stack D = np.asarray(D,'float').T D = D.flatten() # merge flatten and stack h = np.asarray(h,'float').T h = h.flatten() # merge flatten and stack t = np.asarray(t,'float').T t = t.flatten() return X, Y, D, h, t
def parallel_distance_computation(A, B, distance, n_jobs=-1, granularity=2, verbose=False, job_size_min=1000): """Computes the distance matrix between all objects in A and all objects in B in parallel over all cores. This function can be partially instantiated with a given distance, in order to obtain a the parallel version of a distance function with the same signature as the distance function. Example: distance_parallel = functools.partial(parallel_distance_computation, distance=distance) """ if (len(A) > job_size_min) and joblib_available and (n_jobs != 1): if n_jobs is None or n_jobs == -1: n_jobs = cpu_count() if verbose: print("Parallel computation of the distance matrix: %s cpus." % n_jobs) if n_jobs > 1: tmp = np.linspace(0, len(A), granularity * n_jobs + 1).astype(np.int) else: # corner case: joblib detected 1 cpu only. tmp = (0, len(A)) chunks = zip(tmp[:-1], tmp[1:]) dissimilarity_matrix = np.vstack(Parallel(n_jobs=n_jobs, verbose=verbose)(delayed(distance)(A[start:stop], B) for start, stop in chunks)) else: dissimilarity_matrix = distance(A, B) if verbose: print("Done.") return dissimilarity_matrix
def tree_parallel_query(my_tree, A, k=None, r=None, n_jobs=-1, query_radius=False): """Parallel query of the global Tree 'tree'. """ global tree tree = my_tree tmp = cpu_count() if (n_jobs is None or n_jobs == -1) and A.shape[0] >= tmp: n_jobs = tmp if n_jobs > 1: tmp = np.linspace(0, A.shape[0], n_jobs + 1).astype(np.int) else: # corner case: joblib detected 1 cpu only. tmp = (0, A.shape[0]) chunks = zip(tmp[:-1], tmp[1:]) print("chunks: %s" % chunks) if query_radius: if r is None: r = estimate_radius(tree, A, k) results = Parallel(n_jobs=n_jobs)(delayed(worker_query_radius)(A[start:stop, :], r) for start, stop in chunks) D, I = zip(*results) D = np.concatenate(D) I = np.concatenate(I) else: results = Parallel(n_jobs=n_jobs)(delayed(worker_query)(A[start:stop, :], k) for start, stop in chunks) worker = worker_query D, I = zip(*results) D = np.vstack(D) I = np.vstack(I) return D, I
def symbolize_signal(self, signal, parallel = None, n_jobs = -1): """ Symbolize whole time-series signal to a sentence (vector of words), parallel can be {None, "ipython"} """ window_index = self.sliding_window_index(len(signal)) if parallel == None: return map(lambda wi: self.symbolize_window(signal[wi]), window_index) elif parallel == "ipython": ## too slow raise NotImplementedError("parallel parameter %s not supported" % parallel) #return self.iparallel_symbolize_signal(signal) elif parallel == "joblib": with tempfile.NamedTemporaryFile(delete=False) as f: tf = f.name print "save temp file at %s" % tf tfiles = joblib.dump(signal, tf) xs = joblib.load(tf, "r") n_jobs = joblib.cpu_count() if n_jobs == -1 else n_jobs window_index = list(window_index) batch_size = len(window_index) / n_jobs batches = chunk(window_index, batch_size) symbols = Parallel(n_jobs)(delayed(joblib_symbolize_window)(self, xs, batch) for batch in batches) for f in tfiles: os.unlink(f) return sum(symbols, []) else: raise NotImplementedError("parallel parameter %s not supported" % parallel)
def finetune(config_path): with open(config_path, 'r') as f: config = yaml.load(f) expname = config['experiment_desc'] import os os.system(f'rm "{expname}"/*') batch_size = config.pop('batch_size') get_dataloader = partial(DataLoader, batch_size=batch_size, num_workers=cpu_count() // 2, shuffle=False, drop_last=True, pin_memory=True) import blurdata sigma = config['train']['sigma'] print('sigma:', sigma) tr = blurdata.get_transform(256, sigma, circular=False) datasets = ( blurdata.SyntheticDatasetFromFiles( glob.glob('/mnt/cdisk/anger/hdr+/trainresize/*'), transform=tr), blurdata.SyntheticDatasetFromFiles( glob.glob('/mnt/cdisk/anger/hdr+/trainresize/*'), transform=tr, val=True), ) train = get_dataloader(datasets[0], batch_size=batch_size) val = get_dataloader(datasets[1], batch_size=1) trainer = Trainer(config, train=train, val=val) trainer.load_checkpoint(config['load_checkpoint']) trainer.train()
def _get_n_jobs(n_jobs): """Get number of jobs for the computation. This function reimplements the logic of joblib to determine the actual number of jobs depending on the cpu count. If -1 all CPUs are used. If 1 is given, no parallel computing code is used at all, which is useful for debugging. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. Parameters ---------- n_jobs : int Number of jobs stated in joblib convention. Returns ------- n_jobs : int The actual number of jobs as positive integer. """ if n_jobs < 0: return max(cpu_count() + 1 + n_jobs, 1) elif n_jobs == 0: raise ValueError('Parameter n_jobs == 0 has no meaning.') else: return n_jobs
def parallel_upload_chunks(vol, files, bin_paths, chunk_size, num_workers): """Push tif images as chunks in CloudVolume object in Parallel Arguments: vol {cloudvolume.CloudVolume} -- volume that will contain image data files {list} -- strings of tif image filepaths bin_paths {list} -- binary paths to tif files chunk_size {list} -- 3 ints for original tif image dimensions num_workers {int} -- max number of concurrently running jobs """ tiff_jobs = int(num_workers / 2) if num_workers == cpu_count() else num_workers with tqdm_joblib(tqdm(desc="Load tiffs", total=len(files))) as progress_bar: tiffs = Parallel(tiff_jobs, timeout=1800, backend="multiprocessing", verbose=50)(delayed(tf.imread)(i) for i in files) with tqdm_joblib(tqdm(desc="Load ranges", total=len(bin_paths))) as progress_bar: ranges = Parallel(tiff_jobs, timeout=1800, backend="multiprocessing", verbose=50)(delayed(get_data_ranges)(i, chunk_size) for i in bin_paths) print("loaded tiffs and bin paths") vol_ = CloudVolume(vol.layer_cloudpath, parallel=False, mip=vol.mip) with tqdm_joblib(tqdm(desc="Upload chunks", total=len(ranges))) as progress_bar: Parallel(tiff_jobs, timeout=1800, backend="multiprocessing", verbose=50)(delayed(upload_chunk)(vol_, r, i) for r, i in zip(ranges, tiffs))
def importers(self): print('\nVersions:') print("Keras :", keras.__version__) print("Tensorflow :", tf.__version__) if self.workers < 0: self.workers = joblib.cpu_count() + self.workers print('\nNegative workers means all available except N-1 (%d)' % self.workers) if not self.gpu: tf.config.threading.set_inter_op_parallelism_threads(self.workers) tf.config.threading.set_intra_op_parallelism_threads(self.workers) print('\nGPU disabled!') os.environ["CUDA_VISIBLE_DEVICES"] = "-1" if len(tf.config.list_physical_devices('GPU')) > 0: print('You have at least one available GPU device not in use!') print('Using %d CPU workers' % self.workers) else: if len(tf.config.list_physical_devices('GPU')) < 1: print('\nGPU was not found. Using CPU instead!') else: print('\nGPU enabled!') if len(tf.config.list_physical_devices( 'GPU')) < self.workers or self.workers < 0: self.workers = len(tf.config.list_physical_devices('GPU')) print( 'Number of workers adjusted to fit the GPUs available') print('Using %d GPU workers' % self.workers) if self.gpu_test: self.testGPU() self.multicore = True if self.workers > 1 else False print('Multiprocessing status:', self.multicore)
def parallelIterateOnMemMap(function, data, result, iterations, moveTo=None, cleanup=True, n_jobs=cpu_count()): #try: #temporary files folder = tempfile.mkdtemp() data_name = os.path.join(folder, 'data') result_name = os.path.join(folder, 'result') #result memmap if isinstance(result, np.memmap): result_mmap = result if isinstance(result, tuple): # if shape create temp result memmap result_mmap = np.memmap(result_name, dtype=data.dtype, shape=result, mode='w+') elif isinstance(result, np.ndarray): result_mmap = np.memmap(result_name, dtype=data.dtype, shape=result.shape, mode='w+') else: raise RuntimeError('result should be array, memmap or tuple') #input data memmap dump(data, data_name) data_mmap = load(data_name, mmap_mode='r') # Fork the worker processes to perform computation concurrently #Parallel(n_jobs=n_jobs)(delayed(function)(data_mmap, result_mmap, i) for i in iterations) Parallel(n_jobs=n_jobs)(delayed(function)(data_mmap, result_mmap, i) for i in iterations) #except: # print("Exception inparallel processing!") # try: # shutil.rmtree(folder) # except: # print("Failed to delete: " + folder) if moveTo is None: result = np.array(result_mmap) else: result_mmap.flush() shutil.move(result_name, moveTo) result = np.memmap(moveTo, dtype=data.dtype, shape=result) if cleanup: try: shutil.rmtree(folder) except: print("Failed to delete: " + folder) return result
def getXY(e, n, yvec, d, t, extent): print("getting point cloud ...") #o = Parallel(n_jobs = cpu_count(), verbose=0)(delayed(getxy)(e[k], n[k], yvec, d[k], t[k], extent) for k in range(len(n))) o = Parallel(n_jobs=cpu_count(), verbose=0)( delayed(xyfunc)(e[k], n[k], yvec, d[k], t[k], extent) for k in range(len(n))) #eating, northing, distance to sonar, depth, heading X, Y, D, h, t = zip(*o) # merge flatten and stack X = np.asarray(X, 'float').T X = X.flatten() # merge flatten and stack Y = np.asarray(Y, 'float').T Y = Y.flatten() # merge flatten and stack D = np.asarray(D, 'float').T D = D.flatten() # merge flatten and stack h = np.asarray(h, 'float').T h = h.flatten() # merge flatten and stack t = np.asarray(t, 'float').T t = t.flatten() return X, Y, D, h, t
def _fit_multiclass_task(self, X, y, sample_weight, params): if params['init_model'] is not None: max_digits = len(str(len(self._classes))) init_model_filenames = ['{}.{}'.format(params['init_model'], str(i + 1).zfill(max_digits)) for i in range(self._n_classes)] ovr_list = [None] * self._n_classes for i, cls_num in enumerate(self._classes): if params['init_model'] is not None: params['init_model'] = init_model_filenames[i] self._classes_map[i] = cls_num ovr_list[i] = (y == cls_num).astype(int) self._estimators[i] = RGFExecuter(**params) n_jobs = self.n_jobs if self.n_jobs > 0 else cpu_count() + self.n_jobs + 1 substantial_n_jobs = max(n_jobs, self.n_classes_) if substantial_n_jobs < n_jobs and self.verbose: print('n_jobs = {0}, but RGFClassifier uses {1} CPUs because ' 'classes_ is {2}'.format(n_jobs, substantial_n_jobs, self.n_classes_)) self._estimators = Parallel(n_jobs=self.n_jobs)(delayed(utils.fit_ovr_binary)(self._estimators[i], X, ovr_list[i], sample_weight) for i in range(self._n_classes))
def correct_scans2(fp, TL): if np.ndim(fp) == 2: return c_scans2(fp, TL) else: return Parallel(n_jobs=cpu_count(), verbose=0)(delayed(c_scans2)(fp[p], TL[p]) for p in xrange(len(fp)))
def _job_chunks(l, n_jobs): n_chunks = n_jobs if n_jobs < 0: # so, have n chunks if we are using all n cores/cpus n_chunks = cpu_count() + 1 - n_jobs return _chunks(l, n_chunks)
def ingest_image_stack(s3_path, voxel_size, img_stack, extension, dtype): if extension == "tif": img = tf.imread(os.path.expanduser(img_stack)) else: tmp = sitk.ReadImage(os.path.expanduser(img_stack)) img = sitk.GetArrayFromImage(tmp) img = np.asarray(img, dtype=dtype) img_size = img.shape[::-1] vol = create_cloud_volume(s3_path, img_size, voxel_size, dtype=dtype) mem = virtual_memory() num_procs = min(math.floor(mem.total / (img.shape[0] * img.shape[1] * 8)), joblib.cpu_count()) print(f"num processes: {num_procs}") print(f"layer path: {vol.layer_cloudpath}") global layer_path, num_mips num_mips = 3 layer_path = vol.layer_cloudpath data = [(i, img.T[:, :, i]) for i in range(img.shape[0])] files = [i[1] for i in data] zs = [i[0] for i in data] Parallel(num_procs)(delayed(process)(z, f) for z, f in tqdm(zip(zs, files), total=len(zs)))
def fit_and_save_with_grid(data: Union[np.ndarray, pd.DataFrame], grid_path: str, type: str = 'umap', output_dir: str = '.', n_jobs: int = -1): type = type.lower() scaler = StandardScaler() data = scaler.fit_transform(data) os.makedirs(output_dir, exist_ok=True) joblib.dump(scaler, filename=os.path.join(output_dir, 'scaler.joblib')) if grid_path: with open(grid_path) as config_file: grid_dict = json.load(config_file) param_grid = ParameterGrid(grid_dict) if (n_jobs == -1) and (len(param_grid) > cpu_count()): n_jobs = len(param_grid) if n_jobs == 1: for params in param_grid: fit_and_save(data=data, output_dir=output_dir, n_jobs=1, **params) else: Parallel( n_jobs=n_jobs, backend='multiprocessing' )(delayed(fit_and_save) (data=data, output_dir=output_dir, type=type, n_jobs=1, **params) for params in param_grid) else: fit_and_save(data=data, output_dir=output_dir, n_jobs=n_jobs)
def pmap(pickleable_fn, data, n_jobs=None, verbose=1, **kwargs): """Parallel map using joblib. Parameters ---------- pickleable_fn : callable Function to map over data. data : iterable Data over which we want to parallelize the function call. n_jobs : int, optional The maximum number of concurrently running jobs. By default, it is one less than the number of CPUs. verbose: int, optional The verbosity level. If nonzero, the function prints the progress messages. The frequency of the messages increases with the verbosity level. If above 10, it reports all iterations. If above 50, it sends the output to stdout. kwargs Additional arguments for :attr:`pickleable_fn`. Returns ------- list The i-th element of the list corresponds to the output of applying :attr:`pickleable_fn` to :attr:`data[i]`. """ if n_jobs is None: n_jobs = cpu_count() - 1 return Parallel(n_jobs=n_jobs, verbose=verbose)(delayed(pickleable_fn)(d, **kwargs) for d in data)
def select(self, x: np.ndarray, y: np.ndarray, options: Optional[Dict] = None) -> np.ndarray: """ L0 combinatorial optimization Args: x (np.ndarray): design matrix y (np.ndarray): target vector options: Returns: """ n, p = x.shape index_array = list(range(p)) def _lstsq(c): x_comb = x[:, c] beta = lstsq(x_comb, y)[0] res = 1. / 2 * np.mean((x_comb.dot(beta) - y) ** 2) penalty = self.lambd * len(c) res += penalty return res indices = [] for p_temp in range(1, p + 1): for comb in combinations(index_array, p_temp): indices.append(comb) loss = Parallel(n_jobs=cpu_count())(delayed(_lstsq)(comb) for comb in indices) argmin = np.argmin(loss) self.indices = np.array(indices[argmin]) x_temp = x[:, self.indices] self.coef_ = np.zeros_like(x[0, :]) self.coef_[self.indices] = lstsq(x_temp, y)[0] return self.indices
def main(): """Do the gridsearch over several configuration files.""" parser = ap.ArgumentParser() parser.add_argument('-n', '--name', required=True) parser.add_argument('--cpu', type=int, default=cpu_count() // 2) parser.add_argument('--single-job', action="store_true") parser.add_argument('--outdir', type=str) parser.add_argument('confdir', type=str) parser.add_argument('--no-desc', dest='edit_desc', action='store_false') args = parser.parse_args() # /!\ If single-job is used, outdir has to be defined if args.single_job: with open(args.confdir, 'r') as f: conf = json.load(f) start_run(args.name, conf, args.outdir) else: start = datetime.datetime.now() packed_run_name = '{}_{}'.format(args.name, start.strftime('%y%m%d_%H%M%S')) res_grid_path = join('res', packed_run_name) os.makedirs(res_grid_path) shutil.copy('desc.template.md', join(res_grid_path, 'desc.md')) if args.edit_desc: subprocess.call([EDITOR, join(res_grid_path, 'desc.md')]) print('Using {} cpu'.format(args.cpu)) logging.basicConfig(level=logging.WARNING) Parallel(n_jobs=args.cpu)( delayed(start_run)(run_name, conf, res_grid_path) for run_name, conf in get_confs(args.confdir)) logger.info('All over!') print('took', datetime.datetime.now() - start)
def get_sys_info(): "Return a dictionary with info from the current system." # Import are nested to avoid long import time when func is not called import scipy import psutil import numpy as np from joblib import cpu_count info = {} # Info on the env info["env-OMP_NUM_THREADS"] = os.environ.get('OMP_NUM_THREADS') # Info on the OS info["platform"] = platform.system() info["platform-architecture"] = platform.machine() info["platform-release"] = platform.release() info["platform-version"] = platform.version() # Info on the hardware info["system-cpus"] = cpu_count() info["system-processor"] = _get_processor_name() info["system-ram (GB)"] = round(psutil.virtual_memory().total / (1024.0**3)) # Info on dependency libs info["version-cuda"] = get_cuda_version() info["version-numpy"] = (np.__version__, _get_numpy_libs()) info["version-scipy"] = scipy.__version__ # Info on benchmark version info["benchmark-git-tag"] = _get_git_tag() return info
def get_grid(mode, orig_def, targ_def, merge, influence, minX, maxX, minY, maxY, res, nn, sigmas, eps, shape, numstdevs, trans, humlon, humlat): if mode == 1: wf = None complete = 0 while complete == 0: try: try: dat = pyresample.kd_tree.resample_nearest( orig_def, merge.flatten(), targ_def, radius_of_influence=res * 20, fill_value=None, nprocs=cpu_count() - 2) except: dat = pyresample.kd_tree.resample_nearest( orig_def, merge.flatten(), targ_def, radius_of_influence=res * 20, fill_value=None, nprocs=1) if 'dat' in locals(): complete = 1 except: print 'Something went wrong with resampling...' dat = dat.reshape(shape) return dat, res
def dissimilarity(tracks, prototypes, distance=bundles_distances_mam, n_jobs=-1, verbose=False): """Compute the dissimilarity (distance) matrix between tracks and given prototypes. This function supports parallel (multicore) computation. Parameters ---------- tracks : list or array of objects an iterable of streamlines. prototypes : iterable of objects The prototypes. distance : function Distance function between groups of streamlines. prototype_policy : string Shortname for the prototype selection policy. The default value is 'sff'. n_jobs : int If joblib is available, split the dissimilarity computation in n_jobs. If n_jobs is -1, then all available cpus/cores are used. The default value is -1. verbose : bool If true prints some messages. Deafault is True. Return ------ dissimilarity_matrix : array (N, num_prototypes) See Also -------- furthest_first_traversal, subset_furthest_first Notes ----- """ if verbose: print("Computing the dissimilarity matrix.") if joblib_available and n_jobs != 1: if n_jobs is None or n_jobs == -1: n_jobs = cpu_count() if verbose: print("Parallel computation of the dissimilarity matrix: %s cpus." % n_jobs) if n_jobs > 1: tmp = np.linspace(0, len(tracks), n_jobs + 1).astype(np.int) else: # corner case: joblib detected 1 cpu only. tmp = (0, len(tracks)) chunks = zip(tmp[:-1], tmp[1:]) dissimilarity_matrix = np.vstack(Parallel(n_jobs=n_jobs)(delayed(distance)(tracks[start:stop], prototypes) for start, stop in chunks)) else: dissimilarity_matrix = distance(tracks, prototypes) if verbose: print("Done.") return dissimilarity_matrix
def save_pcaed_feat(drivers_list, X): if len(drivers_list) == 0: raise Exception("empty") n_proc = cpu_count() Parallel(n_jobs=n_proc)(delayed(p_save_reduced)(d, X[ind*200:(ind+1)*200, :]) for (ind, d) in enumerate(drivers_list)) return True
def extract_features_drivers(drivers_list): if len(drivers_list) == 0: raise Exception("empty") features = np.memmap("output", dtype="float64", shape=(len(drivers_list), 200, _N_F), mode='w+') n_proc = cpu_count() Parallel(n_jobs=n_proc)(delayed(p_getinfo)(d, features, i) for i, d in enumerate(drivers_list)) return features
def main(): # ------------------------------------------------------------------------------- # Parameters # the script will most likely work if we swap the TEXTS variable # with any iterable of text (where one element represents a document, # and the whole iterable is the corpus) newsgroups_train = fetch_20newsgroups(subset = 'train') TEXTS = newsgroups_train.data # spacy's english model for text preprocessing NLP = spacy.load('en') # a set of stopwords built-in to spacy, we can always # expand this set for the problem that we are working on, # here we include python built-in string punctuation mark STOPWORDS = spacy.en.STOP_WORDS | set(punctuation) | set(ENGLISH_STOP_WORDS) # create a directory called 'model' to store all outputs in later section MODEL_DIR = 'model' UNIGRAM_PATH = os.path.join(MODEL_DIR, 'unigram.txt') PHRASE_MODEL_CHECKPOINT = os.path.join(MODEL_DIR, 'phrase_model') BIGRAM_PATH = os.path.join(MODEL_DIR, 'bigram.txt') WORD2VEC_CHECKPOINT = os.path.join(MODEL_DIR, 'word2vec') # ------------------------------------------------------------------------------- logger.info('job started') if not os.path.isdir(MODEL_DIR): os.mkdir(MODEL_DIR) if not os.path.exists(UNIGRAM_PATH): logger.info('preprocessing text') export_unigrams(UNIGRAM_PATH, texts = TEXTS, parser = NLP, stopwords = STOPWORDS) if os.path.exists(PHRASE_MODEL_CHECKPOINT): phrase_model = Phrases.load(PHRASE_MODEL_CHECKPOINT) else: logger.info('training phrase model') # use LineSetence to stream text as oppose to loading it all into memory unigram_sentences = LineSentence(UNIGRAM_PATH) phrase_model = Phrases(unigram_sentences) phrase_model.save(PHRASE_MODEL_CHECKPOINT) if not os.path.exists(BIGRAM_PATH): logger.info('converting words to phrases') export_bigrams(UNIGRAM_PATH, BIGRAM_PATH, phrase_model) if os.path.exists(WORD2VEC_CHECKPOINT): word2vec = Word2Vec.load(WORD2VEC_CHECKPOINT) else: logger.info('training word2vec') sentences = LineSentence(BIGRAM_PATH) word2vec = Word2Vec(sentences, workers = cpu_count()) word2vec.save(WORD2VEC_CHECKPOINT) logger.info('job completed')
def execute(): dims = (100000,4); x = createSharedNumpyArray(dims); x[:] = np.random.rand(dims[0], dims[1]); res = Parallel(n_jobs = cpu_count())(delayed(su)(x,i) for i in range(dims[0]));
def denoise(self, videoIn): halfWindowSize = self.searchWindowSize / 2 halfTemplate = self.templateWindowSize / 2 delta = halfWindowSize + halfTemplate shape = tuple(np.add(videoIn.shape, (0, 2*delta, 2*delta))) video = np.zeros(shape) for i in xrange(0, videoIn.shape[0]): video[i] = cv2.copyMakeBorder(videoIn[i], delta, delta, delta, delta, cv2.BORDER_REFLECT_101) out = video.copy() outLBP = video.copy() outM = video.copy() outNonUni = video.copy() outNonUniXY = video.copy() #out = np.ones(video.shape) nFrames = video.shape[0] nRows = video.shape[1] nCols = video.shape[2] aux = np.zeros((self.templateWindowSize, self.templateWindowSize, self.templateWindowSize)) aux[halfTemplate, halfTemplate, halfTemplate] = 1 gaussian = ndimage.filters.gaussian_filter(aux, self.sigma) ranges = [range(delta, nFrames - delta), range(delta, nRows - delta), range(delta, nCols - delta)] self.coordinates = list(itertools.product(*ranges)) lbpTop = LBPTOP(LBP(8, uniform=True, rotation_invariant=True), \ LBP(8, uniform=True, rotation_invariant=True), \ LBP(8, uniform=True, rotation_invariant=True)) lbpVideos = lbpTop.generateCodes(video) sizeXY = lbpTop.getMaxXY() sizeXT = lbpTop.getMaxXT() sizeYT = lbpTop.getMaxYT() # Fast Non Local Means avg, avgGrad = self.neighborhoodFeatures(video) ncpus = joblib.cpu_count() results = Parallel(n_jobs=ncpus,max_nbytes=2e9)(delayed(processPixel)(video, t, i, j, self.h, halfWindowSize, halfTemplate, gaussian, lbpVideos, sizeXY, sizeXT, sizeYT) for t,i,j in coordinates) for idx in xrange(0,len(results)): out[coordinates[idx][0], coordinates[idx][1], coordinates[idx][2]] = results[idx][0] outLBP[coordinates[idx][0], coordinates[idx][1], coordinates[idx][2]] = results[idx][1] outM[coordinates[idx][0], coordinates[idx][1], coordinates[idx][2]] = results[idx][2] outNonUni[coordinates[idx][0], coordinates[idx][1], coordinates[idx][2]] = results[idx][3] outNonUniXY[coordinates[idx][0], coordinates[idx][1], coordinates[idx][2]] = results[idx][4] return out[:, delta: -delta, delta: -delta], \ outLBP[:, delta: -delta, delta: -delta], \ outM[:, delta: -delta, delta: -delta],\ outNonUni[:, delta: -delta, delta: -delta],\ outNonUniXY[:, delta: -delta, delta: -delta]
def signal_to_paa_vector(self, signal, n_jobs = -1): window_index = self.sliding_window_index(len(signal)) with tempfile.NamedTemporaryFile(delete=False) as f: tf = f.name print "save temp file at %s" % tf tfiles = joblib.dump(signal, tf) xs = joblib.load(tf, "r") n_jobs = joblib.cpu_count() if n_jobs == -1 else n_jobs window_index = list(window_index) batch_size = len(window_index) / n_jobs batches = chunk(window_index, batch_size) vecs = Parallel(n_jobs)(delayed(joblib_paa_window)(self, xs, batch) for batch in batches) for f in tfiles: os.unlink(f) return np.vstack(vecs)
def compute_dissimilarity(data, distance, prototype_policy, num_prototypes, verbose=False, size_limit=500000, n_jobs=None): """Compute dissimilarity matrix given data, distance, prototype_policy and number of prototypes. """ print "Computing dissimilarity data for the original data:", data_original = data num_proto = num_prototypes if data.shape[0] > size_limit: print print "Datset too big: subsampling to %s entries only!" % size_limit data = data[np.random.permutation(data.shape[0])[:size_limit], :] print prototype_policy print "number of prototypes:", num_proto stdout.flush() if verbose: print("Generating %s prototypes as" % num_proto), # Note that we use the original dataset here, not the subsampled one! if prototype_policy=='random': if verbose: print("random subset of the initial data.") prototype_idx = np.random.permutation(data_original.shape[0])[:num_proto] prototype = [data_original[i] for i in prototype_idx] elif prototype_policy=='fft': prototype_idx = furthest_first_traversal(data_original, num_proto, distance) prototype = [data_original[i] for i in prototype_idx] elif prototype_policy=='sff': prototype_idx = subset_furthest_first(data_original, num_proto, distance) prototype = [data_original[i] for i in prototype_idx] else: raise Exception if verbose: print("Computing dissimilarity matrix.") if joblib_available and n_jobs != 1: if n_jobs is None or n_jobs == -1: n_jobs = cpu_count() print("Parallel computation of the dissimilarity matrix: %s cpus." % n_jobs) if n_jobs > 1: tmp = np.linspace(0, data.shape[0], n_jobs).astype(np.int) else: # corner case: joblib detected 1 cpu only. tmp = (0, data.shape[0]) chunks = zip(tmp[:-1], tmp[1:]) data_dissimilarity = np.vstack(Parallel(n_jobs=n_jobs)(delayed(distance)(data[start:stop], prototype) for start, stop in chunks)) else: data_dissimilarity = distance(data, prototype) print return data_dissimilarity
def compute_preferences(MQD, click_models, cutoff, output_filepath): preferences = Parallel(n_jobs=cpu_count())( delayed(compute_preferences_parallel)( click_model_name, query, MQD[click_model_name][query]['model'], MQD[click_model_name][query]['relevances'], cutoff) for click_model_name in click_models for query in MQD[click_model_name].keys()) for stats in preferences: click_model_name, query, prefs = stats MQD[click_model_name][query]['preferences'] = prefs with open(output_filepath, 'wb') as ofile: pickle.dump(MQD, ofile, protocol=-1)
def ecfps_mp(numjobs=None, dest_dir=None): """Python-parallel computation of ECFPs. Parameters: - numjobs: the number of threads to use (None=all in the machine). - dest_dir: the directory to which the fingerprints will be written, in weird fp format(TM). """ dest_dir = _MALARIA_ECFPS_PARALLEL_RESULTS_DIR if dest_dir is None else dest_dir ensure_dir(dest_dir) numjobs = cpu_count() if numjobs is None else int(numjobs) Parallel(n_jobs=numjobs)(delayed(_molidsmiles_it_ecfp) (start=start, step=numjobs, output_file=op.join(dest_dir, 'all__fcfp=%r__start=%d__step=%d.weirdfps' % (fcfp, start, numjobs)), fcfp=fcfp) for start, fcfp in product(range(numjobs), (True, False)))
def electre1(nmtx, ncriteria, nweights, p, q, njobs=None): # determine the njobs njobs = njobs or joblib.cpu_count() # get the concordance and discordance info # multiprocessing environment with joblib.Parallel(n_jobs=njobs) as jobs: mtx_concordance = concordance(nmtx, ncriteria, nweights, jobs) mtx_discordance = discordance(nmtx, ncriteria, jobs) with np.errstate(invalid='ignore'): outrank = ( (mtx_concordance >= p) & (mtx_discordance <= q)) kernel_mask = ~outrank.any(axis=0) kernel = np.where(kernel_mask)[0] return kernel, outrank, mtx_concordance, mtx_discordance
def getXY(e,n,yvec,d,t,extent): print("getting point cloud ...") #o = Parallel(n_jobs = cpu_count(), verbose=0)(delayed(getxy)(e[k], n[k], yvec, d[k], t[k], extent) for k in range(len(n))) if os.name=='posix': o = Parallel(n_jobs = cpu_count(), verbose=0)(delayed(xyfunc)(e[k], n[k], yvec, d[k], t[k], extent) for k in range(len(n))) #eating, northing, distance to sonar, depth, heading X, Y, D, H, T = zip(*o) else: X = []; Y = []; D = []; H = []; T = [] for k in range(len(n)): out1,out2,out3,out4,out5 = xyfunc(e[k], n[k], yvec, d[k], t[k], extent) X.append(out1); Y.append(out2) D.append(out3); H.append(out4); T.append(out5) # merge flatten and stack X = np.asarray(X,'float').T X = X.flatten() # merge flatten and stack Y = np.asarray(Y,'float').T Y = Y.flatten() # merge flatten and stack D = np.asarray(D,'float').T D = D.flatten() # merge flatten and stack H = np.asarray(H,'float').T H = H.flatten() # merge flatten and stack T = np.asarray(T,'float').T T = T.flatten() return X, Y, D, H, T
def getXY(e,n,yvec,d,t,extent): print("getting point cloud ...") #o = Parallel(n_jobs = cpu_count(), verbose=0)(delayed(getxy)(e[k], n[k], yvec, d[k], t[k], extent) for k in range(len(n))) try: o = Parallel(n_jobs = cpu_count(), verbose=0)(delayed(xyfunc)(e[k], n[k], yvec, d[k], t[k], extent) for k in range(len(n))) except: o = Parallel(n_jobs = 1, verbose=0)(delayed(xyfunc)(e[k], n[k], yvec, d[k], t[k], extent) for k in range(len(n))) X, Y = zip(*o) # X flatten and stack X = np.asarray(X,'float') X = X.flatten() # Y flatten and stack Y = np.asarray(Y,'float') Y = Y.flatten() return X, Y
def __init__(self, n_jobs, function, args=None, kwargs=None): """ Parameters ---------- n_jobs : int Number of cores to be used for parallel calculation. If -1 use all available cores. function : object that supports __call__, as functions function to be run in parallel. args : list of tuples Arguments for function; see the ParallelCalculation class description. kwargs : list of dicts or None kwargs for function; see the ParallelCalculation class description. """ # args[i] should be a list of args, one for each run self.n_jobs = n_jobs if self.n_jobs == -1: self.n_jobs = cpu_count() self.functions = function if not hasattr(self.functions, '__iter__'): self.functions = [self.functions] * len(args) if len(self.functions) != len(args): self.functions = self.functions[:] * (len(args) // len(self.functions)) # Arguments should be present if args is None: args = [] self.args = args # If kwargs are not present, use empty dicts if kwargs: self.kwargs = kwargs else: self.kwargs = [{} for i in self.args] self.nruns = len(args)
def clean_corpus(texts, parser, stopwords, batch_size, n_jobs): """ Generator function using spaCy to parse reviews: - lemmatize the text - remove punctuation, whitespace and number - remove pronoun, e.g. 'it' - remove tokens that are shorter than 2 """ n_threads = cpu_count() if n_jobs > 0 and n_jobs < n_threads: n_threads = n_jobs # use the .pip to process texts as a stream; # this functionality supports using multi-threads for parsed_text in parser.pipe(texts, n_threads = n_threads, batch_size = batch_size): tokens = [] for token in parsed_text: if valid_word(token) and token.lemma_ not in stopwords: tokens.append(token.lemma_) cleaned_text = ' '.join(tokens) yield cleaned_text
def compute_lambdas(MQD, click_models, n_repeats, n_impressions, compute_lambdas_method, ranking_sampler, cutoff, store_impressions, output_filepath): # Run the computation of lambdas in paralell with the specified # `compute_lambdas_method` method and `ranking_sampler`. lambdas_counts = Parallel(n_jobs=cpu_count())( delayed(compute_lambdas_method)( click_model_name, query, MQD[click_model_name][query]['model'], MQD[click_model_name][query]['relevances'], n_impressions, n_repeats, ranking_sampler, store_impressions, cutoff) for click_model_name in click_models for query in MQD[click_model_name].keys()) # Copy the lambdas and counts into a dictionary. Note that Parallel # preserves the order of the results, but we still keep track of the # associated click model and queries names not to mix something up. for stats in lambdas_counts: click_model_name, query, cutoff, n_imps, ranking_sampler_name,\ lambdas, total_counts, viewed_counts, total_lambdas, viewed_lambdas,\ impressions = stats MQD[click_model_name][query]['stats'] = {} for i, n in enumerate(n_imps): MQD[click_model_name][query]['stats'][n] = {'lambdas': lambdas[i], 'total_lambdas': total_lambdas[i], 'viewed_lambdas': viewed_lambdas[i], 'total_counts': total_counts[i], 'viewed_counts': viewed_counts[i], 'cutoff': cutoff, 'impressions': impressions, 'ranking_sampler': ranking_sampler_name} with open(output_filepath, 'wb') as ofile: pickle.dump(MQD, ofile, protocol=-1)
def texture_slic(humfile, sonpath, doplot=1, numclasses=4, maxscale=20, notes=4): ''' Create a texture lengthscale map using the algorithm detailed by Buscombe et al. (2015) This textural lengthscale is not a direct measure of grain size. Rather, it is a statistical representation that integrates over many attributes of bed texture, of which grain size is the most important. The technique is a physically based means to identify regions of texture within a sidescan echogram, and could provide a basis for objective, automated riverbed sediment classification. Syntax ---------- [] = PyHum.texture(humfile, sonpath, doplot, numclasses, maxscale, notes) Parameters ---------- humfile : str path to the .DAT file sonpath : str path where the *.SON files are doplot : int, *optional* [Default=1] if 1, make plots, otherwise do not make plots numclasses : int, *optional* [Default=4] number of 'k means' that the texture lengthscale will be segmented into maxscale : int, *optional* [Default=20] Max scale as inverse fraction of data length for wavelet analysis notes : int, *optional* [Default=100] notes per octave for wavelet analysis Returns ------- sonpath+base+'_data_class.dat': memory-mapped file contains the texture lengthscale map sonpath+base+'_data_kclass.dat': memory-mapped file contains the k-means segmented texture lengthscale map References ---------- .. [1] Buscombe, D., Grams, P.E., and Smith, S.M.C., 2015, Automated riverbed sediment classification using low-cost sidescan sonar. Journal of Hydraulic Engineering 10.1061/(ASCE)HY.1943-7900.0001079, 06015019. ''' # prompt user to supply file if no input file given if not humfile: print('An input file is required!!!!!!') Tk().withdraw() # we don't want a full GUI, so keep the root window from appearing humfile = askopenfilename(filetypes=[("DAT files","*.DAT")]) # prompt user to supply directory if no input sonpath is given if not sonpath: print('A *.SON directory is required!!!!!!') Tk().withdraw() # we don't want a full GUI, so keep the root window from appearing sonpath = askdirectory() # print given arguments to screen and convert data type where necessary if humfile: print('Input file is %s' % (humfile)) if sonpath: print('Sonar file path is %s' % (sonpath)) if numclasses: numclasses = np.asarray(numclasses,int) print('Number of sediment classes: %s' % (str(numclasses))) if maxscale: maxscale = np.asarray(maxscale,int) print('Max scale as inverse fraction of data length: %s' % (str(maxscale))) if notes: notes = np.asarray(notes,int) print('Notes per octave: %s' % (str(notes))) if doplot: doplot = int(doplot) if doplot==0: print("Plots will not be made") print('[Default] Number of processors is %s' % (str(cpu_count()))) ######################################################## ######################################################## # start timer if os.name=='posix': # true if linux/mac or cygwin on windows start = time.time() else: # windows start = time.clock() # if son path name supplied has no separator at end, put one on if sonpath[-1]!=os.sep: sonpath = sonpath + os.sep base = humfile.split('.DAT') # get base of file name for output base = base[0].split(os.sep)[-1] # remove underscores, negatives and spaces from basename base = humutils.strip_base(base) meta = loadmat(os.path.normpath(os.path.join(sonpath,base+'meta.mat'))) ft = 1/loadmat(sonpath+base+'meta.mat')['pix_m'] #pix_m = np.squeeze(meta['pix_m']) #dep_m = np.squeeze(meta['dep_m']) dist_m = np.squeeze(meta['dist_m']) ### port print("processing port side ...") # load memory mapped scan ... port shape_port = np.squeeze(meta['shape_port']) if shape_port!='': if os.path.isfile(os.path.normpath(os.path.join(sonpath,base+'_data_port_lar.dat'))): port_fp = io.get_mmap_data(sonpath, base, '_data_port_lar.dat', 'float32', tuple(shape_port)) else: port_fp = io.get_mmap_data(sonpath, base, '_data_port_la.dat', 'float32', tuple(shape_port)) #port_fp2 = io.get_mmap_data(sonpath, base, '_data_port_l.dat', 'float32', tuple(shape_port)) ### star print("processing starboard side ...") # load memory mapped scan ... port shape_star = np.squeeze(loadmat(sonpath+base+'meta.mat')['shape_star']) if shape_star!='': if os.path.isfile(os.path.normpath(os.path.join(sonpath,base+'_data_star_lar.dat'))): star_fp = io.get_mmap_data(sonpath, base, '_data_star_lar.dat', 'float32', tuple(shape_star)) else: star_fp = io.get_mmap_data(sonpath, base, '_data_star_la.dat', 'float32', tuple(shape_star)) #star_fp2 = io.get_mmap_data(sonpath, base, '_data_star_l.dat', 'float32', tuple(shape_star)) if len(shape_star)>2: shape = shape_port.copy() shape[1] = shape_port[1] + shape_star[1] else: shape = [] shape.append(1) shape.append(shape_port[0]) shape.append(shape_port[1]) shape[1] = shape_port[0] + shape_star[0] #work on the entire scan #im = humutils.rescale(np.vstack((np.flipud(np.hstack(port_fp)), np.hstack(star_fp))),0,1) im = np.vstack((np.flipud(np.hstack(port_fp)), np.hstack(star_fp))) im[np.isnan(im)] = 0 im = humutils.rescale(im,0,1) #get SLIC superpixels segments_slic = slic(im, n_segments=int(im.shape[0]/10), compactness=.1) #pre-allocate texture lengthscale array tl = np.zeros(im.shape, dtype = "float64") #cycle through each segment and compute tl for k in np.unique(segments_slic): mask = np.zeros(im.shape[:2], dtype = "uint8") mask[segments_slic == k] = 255 cmask, cim = crop_toseg(mask, im) tl[segments_slic == k] = parallel_me(cim, maxscale, notes, np.shape(cim)[0]) R_fp = io.get_mmap_data(sonpath, base, '_data_range.dat', 'float32', tuple(shape_star)) R = np.vstack((np.flipud(np.hstack(R_fp)), np.hstack(R_fp))) R = R/np.max(R) #correct for range and scale tl = tl * np.cos(R) * (1/ft) tl[im==0] = np.nan tl[np.isnan(im)] = np.nan # create memory mapped file for Sp with open(os.path.normpath(os.path.join(sonpath,base+'_data_class.dat')), 'w+') as ff: fp = np.memmap(ff, dtype='float32', mode='w+', shape=tuple(shape)) counter = 0 if len(shape_star)>2: for p in range(len(port_fp)): if p==0: n,m = np.shape(np.vstack((np.flipud(port_fp[p]), star_fp[p]))) else: n,m = np.shape(np.vstack((np.flipud(port_fp[p]), star_fp[p]))) Sp = tl[:n, counter:counter+m] counter = counter+m fp[p] = Sp.astype('float32') del Sp del fp # flush data to file class_fp = io.get_mmap_data(sonpath, base, '_data_class.dat', 'float32', tuple(shape)) else: with open(os.path.normpath(os.path.join(sonpath,base+'_data_class.dat')), 'w+') as ff: np.save(ff, np.squeeze(Sp).astype('float32')) with open(os.path.normpath(os.path.join(sonpath,base+'_data_class.dat')), 'r') as ff: class_fp = np.load(ff) dist_m = np.squeeze(loadmat(sonpath+base+'meta.mat')['dist_m']) ######################################################## if doplot==1: if len(shape_star)>2: for p in range(len(star_fp)): plot_class(dist_m, shape_port, port_fp[p], star_fp[p], class_fp[p], ft, humfile, sonpath, base, p) else: plot_class(dist_m, shape_port, port_fp, star_fp, class_fp, ft, humfile, sonpath, base, 0) if len(shape_star)>2: for p in range(len(star_fp)): plot_contours(dist_m, shape_port, class_fp[p], ft, humfile, sonpath, base, numclasses, p) else: plot_contours(dist_m, shape_port, class_fp, ft, humfile, sonpath, base, numclasses, 0) ####################################################### # k-means if len(shape_star)>2: with open(os.path.normpath(os.path.join(sonpath,base+'_data_kclass.dat')), 'w+') as ff: fp = np.memmap(ff, dtype='float32', mode='w+', shape=tuple(shape)) for p in range(len(port_fp)): wc = get_kclass(class_fp[p].copy(), numclasses) fp[p] = wc.astype('float32') del wc del fp kclass_fp = io.get_mmap_data(sonpath, base, '_data_kclass.dat', 'float32', tuple(shape)) else: wc = get_kclass(class_fp.copy(), numclasses) with open(os.path.normpath(os.path.join(sonpath,base+'_data_kclass.dat')), 'w+') as ff: np.save(ff, np.squeeze(wc).astype('float32')) del wc with open(os.path.normpath(os.path.join(sonpath,base+'_data_kclass.dat')), 'r') as ff: kclass_fp = np.load(ff) ######################################################## if doplot==1: if len(shape_star)>2: for p in range(len(star_fp)): plot_kmeans(dist_m, shape_port, port_fp[p], star_fp[p], kclass_fp[p], ft, humfile, sonpath, base, p) else: plot_kmeans(dist_m, shape_port, port_fp, star_fp, kclass_fp, ft, humfile, sonpath, base, 0) if os.name=='posix': # true if linux/mac elapsed = (time.time() - start) else: # windows elapsed = (time.clock() - start) print("Processing took "+str(elapsed)+"seconds to analyse") print("Done!")
def resample(orig_def, target_def, ss): ''' Calculates Numpy Array for Raster Generation ''' result = kd_tree.resample_nearest(orig_def, ss, target_def, radius_of_influence=1, fill_value=None, nprocs = cpu_count()-1) return result
from keras.preprocessing import sequence from keras.models import model_from_json models_dir = '.' clf = model_from_json(open('%s/classifier_arc.json' % models_dir).read()) clf.load_weights('%s/classifier_weights.h5' % models_dir) from joblib import Parallel, delayed, cpu_count n_jobs = cpu_count() def generator(vectors): for idx, vector in enumerate(vectors): print idx yield vector def processing(vector): maxlen=40 vector = sequence.pad_sequences([vector,], maxlen=maxlen) predicted_label = clf.predict_classes(vector, verbose=0)[0][0] return predicted_label vectors = [[96, 838, 766, 246, 246, 29, 319, 2, 150, 1, 369, 304, 3684, 33, 1324, 1, 213, 51, 12, 35, 192, 1140, 17, 9903, 1, 506, 835, 266, 141, 19, 7384, 8689, 81536, 180, 291, 4, 33, 18, 1100, 1, 1565, 464, 506, 180, 1604, 7, 835, 17, 2, 599, 6, 38, 14, 7, 277, 1220, 191, 33, 133, 1604, 14, 1513, 24, 1111, 33, 115, 5984, 95, 18, 2, 1081, 822, 6, 5, 1397, 33, 39, 863, 0, 308, 0, 17, 2421, 6, 7822, 0, 418, 0, 37, 6, 4455, 36737, 0, 418, 0, 487, 12427, 33, 36, 7, 11090, 2666, 11, 2, 1165, 4, 115, 7254, 58, 38437, 3809, 34, 31029, 141, 16, 399, 835, 438, 160, 0, 418, 0, 17, 2421, 6, 7822, 25, 1711, 6, 99, 1051, 17, 634, 6, 5, 1397, 29, 212, 2161, 4, 2714, 136, 2704, 4, 229, 117, 23, 1745, 29, 319, 141, 2, 150, 4, 29, 314, 2, 441, 231, 261, 346, 2034, 3, 8, 14, 7, 4279, 606, 8, 14, 7, 642, 1775, 9, 1222, 24026, 606, 206, 2099, 1, 15, 141, 2, 835, 675, 12, 418, 403, 1211, 19, 4429, 81, 344, 564, 168, 3, 23248, 7, 443, 833, 386, 3, 3, 8, 14, 7, 642, 1775, 9, 52, 24026, 606, 287, 5, 29, 319, 2, 585, 1, 369, 1979, 2021], [96, 838, 766, 506, 835, 55, 304, 3684, 33, 1324, 1, 213, 51, 12, 35, 192, 1140, 17, 9903, 1, 506, 835, 180, 4, 33, 18, 1100, 1, 57103, 1604, 7, 835, 17, 2, 599, 6, 38, 14, 7, 277, 1220, 191, 33, 133, 1604, 14, 1513, 24, 1111, 33, 115, 5984, 95, 18, 2, 1081, 822, 6, 5, 1397, 33, 39, 863, 588, 418, 965, 17, 2421, 6, 7822, 346, 418, 2121, 37, 6, 4455, 36737, 513, 418, 2446, 487, 12427, 33, 36, 7, 11090, 2666, 11, 2, 1165, 4, 115, 7254, 58, 38437, 3809, 34, 31029, 141, 16, 399, 835, 438, 160, 612, 418, 2587, 17, 2421, 6, 7822, 25, 1711, 6, 99, 1051, 17, 634, 6, 5, 1397, 29, 212, 2161, 4, 2714, 136, 2704, 4, 229, 117, 23, 1745, 1979, 2021],[96, 766, 246, 246, 120, 15785, 30, 92, 3, 133, 31, 39, 19331, 1054, 2438, 457, 460, 27, 1688, 12, 2, 794, 1338, 221, 0, 3, 2, 1113, 7, 4209, 6, 822, 8, 45, 2286, 82, 7, 1635, 2312, 5376, 77, 1215, 86, 1764, 25, 31, 623, 708, 1113, 427, 222, 8617, 40, 68, 3599, 1, 2, 633, 139, 787, 7, 1764, 25355, 1308, 82, 478, 116, 7, 794, 1, 708, 68, 427, 1646, 227, 3, 8, 1707, 19, 7, 1391, 327, 2872, 9, 3647, 65, 0, 3, 7, 36, 347, 15, 1642, 159, 2, 4539, 65, 445, 209, 3028, 15, 450, 1, 1113, 3, 8, 1502, 1982, 38, 3047, 2, 248, 1930, 57613, 1057, 135, 6169, 459, 1, 2, 380, 25, 1136, 54, 292, 31, 39, 576, 7, 5582, 794, 538, 24, 2361, 38, 1, 281, 1822, 6, 42913, 2525, 819, 1257, 1046, 7, 0, 347, 674, 209, 8, 14, 70, 4709, 2551, 979, 7822, 630, 138, 380, 589, 3868, 8114, 3, 134, 1113, 1707, 15, 450, 16, 569, 4539, 105, 0, 35957, 8, 782, 1308, 460, 23408, 4, 27, 23832, 1, 20002, 4, 794, 1081, 692, 11, 199, 6, 2653, 2, 5317, 589, 938, 31, 597, 116, 1, 2892, 83, 106, 80, 3437, 2, 1642, 850, 159, 7, 794, 4200, 34, 20, 56908, 11, 199, 6, 1046, 33, 287, 0, 881, 794, 4706, 6, 0, 15533, 25, 179, 1882, 1845, 13, 5159, 13, 27, 24, 14, 1050, 29678, 3, 133, 11, 8, 12252, 25, 1113, 39, 65, 86, 529, 1054, 500, 445, 4, 524, 0, 3, 3002, 209, 155, 14, 24289, 1050, 116, 9, 7, 0, 1827, 2, 5679, 6, 604, 197, 36, 3647, 1189, 12, 1454, 1156, 1, 4249, 51, 4, 2199, 51, 2, 19879, 25, 419, 1189, 3647, 12, 70, 860, 860, 3626, 4, 338, 2, 1113, 287, 0, 8, 8199, 14963, 51, 2, 19879, 1998, 12, 191, 1156, 36, 2, 248, 2558, 178, 248, 96819, 4084, 178, 1016, 548, 20, 27, 1024, 221, 86, 1263, 548, 1755, 7, 794, 1, 8017, 3, 25, 23, 14, 7, 4232, 24, 604, 56, 3647, 6269, 1118, 1156, 209, 6819, 116, 7, 794, 672, 8, 14, 7, 4232, 3, 2488, 25, 380, 14, 1189, 2, 3647, 1113, 12, 1104, 694, 4259, 23, 133, 3204, 2, 0, 3, 6269, 24, 147, 342, 51, 12, 2123, 694, 3047, 33, 63383, 8, 4232, 632, 4, 2, 1113, 287, 2495, 25, 1454, 1784, 12, 91, 1156, 18, 1189, 4, 5042, 502, 91, 1784, 9, 0, 0, 58, 394, 94, 124, 325, 2188, 2112, 743, 607, 17, 425, 1817, 4196, 4858, 1453, 540, 3989, 4385, 1619, 5101, 396, 602, 467, 24582, 552, 4336, 560, 8, 15, 555, 45, 503, 452, 4, 410, 64, 4, 14, 107, 9, 2, 392, 87, 6, 2, 437, 13, 476, 1, 523, 23, 14, 527, 25, 5, 18, 22, 2, 107, 177, 13, 444, 453, 1, 433, 8, 32, 1, 2, 107, 177, 13, 76, 8, 32, 159, 574, 391, 29, 52, 1, 8, 32, 1, 479, 2, 429, 24, 2, 32, 46, 581, 76, 21, 5, 4, 209, 539, 384, 2, 32, 16, 30, 426, 278, 222, 402, 13, 578, 7, 333, 6, 2, 32, 12, 571, 418, 403, 1211, 19, 4429, 81, 344, 564, 168, 3, 3, 23248, 7, 443, 833, 386, 3, 3, 12, 571, 418, 403, 1211, 19, 24789, 115, 10276, 41126, 168, 3, 3, 3, 25, 383, 826, 14, 2677, 33, 2772, 2, 794, 147, 36, 2, 1734, 6, 16590, 2, 15, 459, 6, 7, 39338, 6, 15, 1249, 135, 7, 50211, 6, 604, 1009, 856, 2, 445, 12, 2, 694, 147, 1098, 2, 101, 9, 6806, 2388, 1, 106, 71, 380, 17, 2243, 15, 1249, 151, 36, 1009, 206, 164, 25, 24, 147, 20, 7, 2005, 13, 7, 0, 3, 3, 3, 3, 277, 340, 180, 34, 36, 1, 12692, 11, 12, 80, 356, 88, 1989, 2, 15785, 2438, 4, 0, 23, 1792, 16, 1454, 15, 1249, 9, 2, 383, 826, 3692, 133, 31, 39, 19331, 1054, 2438, 457, 460, 27, 1688, 12, 2, 794, 1338, 221, 157, 2, 1113, 7, 4209, 6, 822, 3, 3, 659, 794, 3204, 500, 9, 347, 15, 287, 1755, 1736, 794, 1470, 31, 1390, 20, 610, 1, 164, 752, 31, 43, 1054, 149, 51, 4, 1189, 136, 2399, 0, 3, 3, 697, 340, 71, 2075, 3133, 6, 1054, 3972, 556, 1113, 4, 794, 8462, 16, 1811, 191, 18, 12493, 14, 1, 36, 1113, 6505, 7, 277, 409, 6, 2, 524, 385, 16, 15, 459, 266, 141, 2691, 835, 19, 0, 1016, 25, 102, 2, 1113, 287, 2005, 16, 136, 6, 2, 1054, 149, 1189, 8624, 9, 2, 27, 3662, 2065, 24, 212, 1527, 2491, 3166, 3585, 568, 2, 27, 8195, 31, 43, 2, 27, 2075, 116, 1, 1698, 3082, 4500, 1, 27, 1113, 427, 3, 3, 1009, 3, 3, 3, 31, 39, 1288, 50, 18175, 3039, 1, 7, 7963, 1609, 0, 15417, 23, 5449, 82, 8, 3, 3, 7963, 1609, 5935, 37, 50, 2411, 1609, 7, 626, 1338, 221, 22660, 2, 1609, 1289, 105, 11, 2, 367, 149, 0, 4086, 2, 1609, 16369, 4517, 4, 1638, 1, 3441, 46770, 1609, 147, 503, 71, 18175, 985, 626, 191, 14, 2, 3039, 1, 367, 55049, 147, 521, 848, 1942, 24, 234, 0, 9, 2085, 4, 876, 657, 4, 266, 7963, 5939, 24, 234, 1117, 8208, 0, 147, 20, 71, 3406, 626, 2947, 2796, 17, 2, 1609, 24, 147, 5019, 2, 7640, 1117, 997, 9, 367, 3, 3, 1291, 2007, 5548, 3, 3, 1009, 3, 3, 3, 3, 1031, 10, 833, 12, 10450, 7, 794, 3, 3, 21338, 51, 35957, 445, 0, 2772, 11, 7, 1778, 0, 2, 380, 88, 1454, 1249, 2, 338, 147, 36, 1, 293, 311, 437, 49, 4492, 11881, 17, 7, 794, 2, 338, 287, 36, 7, 1221, 35957, 3039, 1, 2, 794, 4, 2, 794, 147, 2524, 2, 19485, 6, 8420, 92, 16, 2, 2598, 1249, 7, 380, 45, 36, 3, 3, 3, 8, 1707, 19, 7, 1391, 327, 2872, 9, 3647, 65, 0, 3, 7, 36, 347, 15, 1642, 159, 2, 4539, 65, 445, 209, 3028, 15, 450, 1, 1113, 764, 17, 1054, 1811, 4, 91, 7286, 0, 385, 9, 2085, 464, 35957, 0, 37, 0, 3, 3, 3, 8, 1502, 1982, 38, 3047, 2, 248, 1930, 57613, 1057, 135, 6169, 459, 1, 2, 380, 77, 133, 1754, 27, 794, 0, 3, 3, 134, 1113, 1707, 15, 450, 16, 569, 4539, 105, 0, 35957, 1113, 39, 2000, 556, 569, 1249, 628, 2, 10291, 209, 1707, 1054, 252, 7286, 9, 99, 92, 16, 3647, 4539, 464, 0, 3647, 4539, 14813, 10227, 15, 450, 16, 569, 4539, 4, 4010, 236, 3, 3, 3, 8, 1502, 1754, 1113, 1, 8017, 15, 32, 2728, 17, 2, 3647, 7286, 2728, 77, 1754, 24289, 1050, 8402, 16, 4539, 133, 11, 8, 12252, 25, 1113, 39, 65, 86, 529, 1054, 500, 445, 4, 524, 187, 3002, 209, 155, 14, 24289, 1050, 116, 9, 7, 0, 3, 3, 3, 3, 3, 3, 3, 4959, 144, 4712, 1720, 4010, 0, 13, 1113, 287, 2524, 8, 0, 206, 56, 7, 2005, 1332, 292, 3, 3, 0, 3, 3, 3, 3, 3, 1727, 708, 389, 0, 1302, 1, 20, 700, 21, 7, 794, 2488, 2, 338, 39, 3204, 86, 529, 30233, 708, 389, 1407, 666, 389, 3, 3, 6819, 39, 20, 700, 1332, 292, 1113, 287, 3204, 666, 389, 421, 12, 2, 7286, 23, 7542, 13, 9614, 0, 3, 3, 3, 3, 3, 4959, 144, 4712, 6269, 46770, 794, 147, 116, 1, 139, 2, 6269, 102, 24, 151, 342, 51, 67, 2, 380, 3445, 153, 91, 694, 3, 3, 25, 23, 14, 7, 4232, 24, 604, 56, 3647, 6269, 1118, 1156, 209, 6819, 116, 7, 794, 2488, 25, 380, 14, 1189, 2, 3647, 1113, 12, 1104, 694, 4259, 23, 133, 3204, 2, 438, 6269, 24, 147, 342, 51, 12, 2123, 694, 3047, 33, 63383, 8, 4232, 3, 3, 134, 3, 3, 3, 1009, 3, 3, 3, 3, 3, 4, 672, 25, 2, 380, 6035, 153, 694, 11, 7, 9912, 2244, 4, 2, 852, 14, 84, 3707, 12, 2, 694, 30, 24, 852, 14, 36579, 3, 3, 10276, 3, 3, 3, 3, 3, 12, 418, 562, 1211, 19, 15494, 81, 94, 124, 168, 3, 3, 2753, 11, 3266, 4, 10276, 183, 8, 237, 3, 0, 29, 56, 31558, 1956, 1109, 386, 25, 31, 18, 610, 1, 189, 7963, 9535, 9, 0, 6, 2, 1054, 5939, 31, 116, 23, 548, 117, 1778, 9, 5, 1397, 1, 149, 2, 18175, 3039, 9, 8, 227, 30, 6, 8, 2096, 50, 2856, 1, 782, 37, 460, 6, 2, 1054, 201, 12, 2, 694, 37, 938, 3, 3, 0, 39, 5, 1132, 12, 2, 1086, 6, 25, 31, 18, 610, 1, 708, 97, 248, 13, 30, 1054, 445, 201, 1, 2, 694, 95, 65, 31, 116, 4539, 9, 419, 28356, 24, 31, 116, 236, 9, 3, 3, 3, 21338, 51, 35957, 445, 3, 4959, 144, 4712, 1720, 4010, 2067, 2066, 50, 15, 16, 1095, 1, 173, 223, 1191, 156, 3, 1727, 708, 389, 3, 4959, 144, 4712, 6269, 3, 3, 374, 95, 25, 2, 604, 694, 14, 0, 151, 34, 6005, 30, 1811, 6269, 0, 1206, 3009, 1016, 1016, 7345, 30, 445, 2732, 4, 8071, 12, 694, 1659, 3, 3, 3, 394, 3, 3, 94, 124, 3, 3, 325, 3, 2188, 2112, 3, 743, 607, 17, 425, 1817, 3, 3, 4196, 4858, 1453, 540, 3989, 3, 4385, 1619, 5101, 3, 3, 396, 602, 3, 467, 24582, 3, 552, 4336, 3, 3, 3, 560, 8, 15, 555, 45, 503, 452, 4, 410, 64, 4, 14, 107, 9, 2, 392, 87, 6, 2, 437, 13, 476, 1, 523, 23, 14, 527, 25, 5, 18, 22, 2, 107, 177, 13, 444, 453, 1, 433, 8, 32, 1, 2, 107, 177, 13, 76, 8, 32, 159, 574, 391, 29, 52, 1, 8, 32, 1, 479, 2, 429, 24, 2, 32, 46, 581, 76, 21, 5, 4, 209, 539, 384, 2, 32, 16, 30, 426, 278, 222, 402, 13, 578, 7, 333, 6, 2, 32, 3, 12, 685, 418, 562, 1211, 19, 8430, 81, 180, 291, 4283, 168, 3, 3, 3, 304, 1397, 3, 3, 3, 419, 399, 2150, 16, 13303, 742, 77, 33, 1604, 24, 11, 2, 10245, 33, 287, 387, 1965, 17, 5, 136, 496, 33, 36, 3038, 8, 1165, 3, 3, 3, 3466, 203, 9, 18175, 383, 826, 9535, 33, 3038, 24, 2, 138, 292, 1, 212, 147, 20, 1, 87, 7963, 17223, 4, 6392, 18175, 6200, 1, 502, 7963, 9535, 1, 7462, 17, 236, 3, 3, 3, 155, 18, 7, 298, 6, 7963, 6806, 9535, 24, 33, 36, 158, 8420, 4, 5926, 17, 2, 1054, 1206, 4, 5344, 24, 70, 3048, 149, 1675, 3, 3, 3, 37, 667, 37, 33, 53, 13303, 742, 9, 375, 149, 1267, 319, 5, 2, 132, 6, 1054, 1206, 7882, 17, 7963, 9535, 24, 1708, 502, 1206, 4, 5344, 37, 229, 37, 7, 132, 6, 30, 2, 7963, 9535, 24, 33, 36, 702, 3, 3, 3, 3, 670, 3, 180, 3, 3, 3, 3, 3, 180, 291, 4283, 3, 3, 85321, 3, 3, 3, 3, 12, 871, 573, 507, 308, 507, 1211, 19, 34986, 180, 291, 4283, 168, 3, 3, 755, 3, 1009, 3, 31, 172, 387, 17, 7, 132, 6, 2, 1054, 1463, 1302, 1, 3568, 3647, 56, 814, 716, 178, 12, 367, 56, 95, 399, 1008, 1, 20, 3157, 4, 212, 16, 155, 3, 3, 180, 14, 8, 583, 5, 13, 865, 23477, 287, 24378, 3, 3, 33, 39, 117, 544, 7, 132, 4, 836, 1, 189, 54, 9535, 24, 18, 178, 9, 7640, 1117, 24, 39, 65, 2, 828, 1267, 133, 1237, 1, 2021, 1, 905, 1134, 1, 103, 141, 17, 8, 1009, 3, 3, 33, 2129, 164, 6, 0, 0, 17069, 102, 787, 7, 277, 707, 1, 387, 417, 3, 3, 3, 670, 3, 180, 3, 1009, 3, 3, 3, 12, 871, 308, 573, 1211, 344, 564, 168, 3, 3, 3, 3, 8815, 229, 419, 22, 254, 25, 2, 217, 94, 14, 1882, 61, 36, 6106, 11, 20476, 149, 82, 8, 23, 330, 34970, 511, 1, 2310, 77, 287, 20, 7, 3464, 930, 1, 43, 227, 3, 3, 33, 147, 914, 86, 938, 77, 9043, 12, 80, 8195, 99, 9535, 18, 4, 25, 155, 14, 447, 149, 178, 9, 850, 3, 3, 155, 18, 136, 1054, 3668, 9, 367, 544, 37, 0, 997, 77, 33, 36, 58, 1220, 80, 277, 151, 201, 4, 95, 710, 151, 737, 3, 1009, 3, 33, 115, 590, 1, 27073, 8, 201, 83, 31, 172, 387, 17, 7, 132, 6, 2, 1054, 1463, 1302, 1, 3568, 3647, 56, 814, 716, 178, 12, 367, 56, 95, 399, 1008, 1, 20, 3157, 4, 212, 16, 155, 3, 89, 16, 70, 338, 3, 12, 418, 573, 1211, 19, 13905, 81, 180, 291, 4283, 168, 3, 3, 3, 3, 419, 22, 50, 2545, 11, 8, 1666, 6, 201, 77, 33, 2772, 2, 84, 1027, 31, 287, 65, 14, 836, 1, 189, 71, 6, 99, 1530, 94, 1018, 4, 3578, 17, 236, 0, 20476, 544, 9535, 151, 147, 462, 38, 7, 1355, 1623, 1, 8, 536, 787, 1884, 153, 6106, 3, 3, 3, 95, 65, 5, 312, 3, 3, 3, 670, 3, 180, 3, 3, 3, 3, 3, 180, 291, 4283, 3, 3, 85321, 3, 3, 12, 871, 573, 507, 308, 507, 1211, 19, 6751, 94, 124, 168, 3, 220, 9, 2, 742, 1397, 3, 3, 120, 20476, 149, 1, 367, 8, 147, 20, 7, 1764, 35963, 19324, 25640, 4, 6133, 9, 38, 3, 3, 95, 25, 31, 1726, 83, 8, 20476, 201, 155, 18, 4416, 6, 20696, 1530, 24, 287, 65, 2, 20476, 201, 1659, 3, 3, 12, 871, 308, 573, 1211, 180, 291, 4283, 168, 3, 3, 3, 304, 3, 3, 12, 871, 573, 507, 308, 507, 1211, 19, 37101, 344, 564, 168, 3, 3, 755, 1397, 3, 3, 33, 1255, 17, 3920, 24, 1054, 445, 347, 15, 287, 43, 36184, 478, 36, 1, 1075, 136, 2399, 3607, 1, 2495, 80, 460, 478, 36, 1, 2613, 51, 9, 8, 787, 341, 33, 36, 25542, 778, 2, 1054, 149, 51, 4, 1189, 12, 1388, 6635, 6, 15, 6012, 829, 1, 43, 7, 681, 9, 95, 31, 18, 8587, 17, 3, 344, 88, 7, 277, 340, 31, 18, 320, 395, 1, 659, 7, 1813, 188, 167, 6, 2, 1054, 9, 8, 4, 31, 172, 1075, 502, 3607, 1, 56, 80, 2, 538, 9443, 3, 3, 3, 3, 77, 33, 133, 56, 2, 535, 6, 8, 790, 4, 24, 428, 604, 147, 122, 1054, 1811, 30, 2, 55, 22, 106, 95, 5, 2434, 155, 14, 2, 5679, 6, 31008, 8, 790, 25, 86, 2040, 1345, 4, 10888, 794, 1897, 24, 292, 2879, 865, 86, 73, 9, 95, 5, 2434, 77, 116, 1, 525, 7, 1494, 9, 1022, 6, 2, 0, 3, 8, 287, 20, 7, 2011, 1, 7948, 2, 2370, 6, 15785, 30, 2006, 92, 1009, 3, 3, 3, 3, 3, 3, 1301, 9, 729, 4539, 287, 20, 7232, 17, 5161, 729, 37, 651, 6, 2, 68, 582, 2992, 77, 34, 36, 1, 293, 3, 3, 33, 133, 1255, 23, 147, 20, 4454, 1, 65, 37, 460, 445, 12, 2, 204, 1113, 37, 938, 155, 18, 7, 1931, 13209, 1, 8, 3, 31, 206, 122, 1, 11191, 511, 2, 1937, 2558, 445, 30, 2, 0, 3, 7, 277, 409, 6, 2, 1054, 445, 149, 1675, 3048, 9535, 3157, 21, 2, 1054, 711, 135, 461, 20476, 8, 1, 367, 147, 248, 1975, 20, 0, 15610, 3, 33, 65, 36, 3569, 26803, 24, 31, 147, 451, 610, 1, 4088, 7, 3476, 409, 6, 24, 149, 1, 1075, 12, 367, 1156, 13755, 2, 383, 826, 9535, 31, 18, 226, 3, 133, 155, 18, 136, 2158, 6, 149, 191, 1075, 12, 3334, 21, 2, 292, 82, 5225, 60597, 24, 408, 20, 2796, 12, 471, 447, 1452, 222, 2089, 7, 1520, 31, 287, 19473, 12, 35, 529, 7, 538, 82, 5225, 60597, 77, 24, 147, 234, 1511, 1010, 6, 8091, 201, 3, 3, 3, 394, 3, 3, 94, 124, 3, 3, 325, 3, 2188, 2112, 3, 743, 607, 17, 425, 1817, 3, 3, 4196, 4858, 1453, 540, 3989, 3, 4385, 1619, 5101, 3, 3, 396, 602, 3, 467, 24582, 3, 552, 4336, 3, 3, 3, 560, 8, 15, 555, 45, 503, 452, 4, 410, 64, 4, 14, 107, 9, 2, 392, 87, 6, 2, 437, 13, 476, 1, 523, 23, 14, 527, 25, 5, 18, 22, 2, 107, 177, 13, 444, 453, 1, 433, 8, 32, 1, 2, 107, 177, 13, 76, 8, 32, 159, 574, 391, 29, 52, 1, 8, 32, 1, 479, 2, 429, 24, 2, 32, 46, 581, 76, 21, 5, 4, 209, 539, 384, 2, 32, 16, 30, 426, 278, 222, 402, 13, 578, 7, 333, 6, 2, 32, 3, 3, 3, 3, 3, 3, 3, 3]] output = Parallel(n_jobs=n_jobs, verbose=0, pre_dispatch='1.5*n_jobs')(delayed(processing)(vector) for vector in generator(vectors)) print output
def ss_plot(): #Pandas method of importing data frame and getting extents db_connect="dbname='reach_4a' user='******' host='localhost' port='9000'" conn = psycopg2.connect(db_connect) df = pd.read_sql_query('SELECT * from mb_may_2012_1m tt inner join ( SELECT s.easting, s.northing, s.texture, s.sidescan_intensity FROM ss_2012_05 s) ss on tt.easting=ss.easting and tt.northing=ss.northing;', con=conn) minE = df['easting'].min()[0] maxE = df['easting'].max()[0] minN = df['northing'].min()[0] maxN = df['northing'].max()[0] conn.close() print 'Done Importing Data from Database' #Create grid for countourf plot res = 1 grid_x, grid_y = np.meshgrid( np.arange(np.floor(minE), np.ceil(maxE), res), np.arange(np.floor(minN), np.ceil(maxN), res)) grid_lon, grid_lat = trans(grid_x,grid_y,inverse=True) #Re-sampling procedure m_lon, m_lat = trans(df['easting'].values.flatten(), df['northing'].values.flatten(), inverse=True) orig_def = geometry.SwathDefinition(lons=m_lon, lats=m_lat) target_def = geometry.SwathDefinition(lons=grid_lon.flatten(), lats=grid_lat.flatten()) print 'Now Resampling...' result = kd_tree.resample_nearest(orig_def, df['sidescan_intensity'].values.flatten(), target_def, radius_of_influence=1, fill_value=None, nprocs = cpu_count()) print 'Done Resampling!!!' #format side scan intensities grid for plotting gridded_result = np.reshape(result,np.shape(grid_lon)) gridded_result = np.squeeze(gridded_result) gridded_result[np.isinf(gridded_result)] = np.nan gridded_result[gridded_result<=0] = np.nan grid2plot = np.ma.masked_invalid(gridded_result) print 'Now mapping...' #Create Figure fig = plt.figure(frameon=True) ax = plt.subplot(1,1,1) map = Basemap(projection='merc', epsg=cs2cs_args.split(':')[1], llcrnrlon=np.min(grid_lon)-0.0009, llcrnrlat=np.min(grid_lat)-0.0009,urcrnrlon=np.max(grid_lon)+0.0009, urcrnrlat=np.max(grid_lat)+0.0009) gx,gy = map.projtran(grid_lon,grid_lat) map.arcgisimage(server='http://server.arcgisonline.com/ArcGIS', service='World_Imagery', xpixels=1000, ypixels=None, dpi=1200) im = map.pcolormesh(gx, gy, grid2plot, cmap='gray',vmin=0.1, vmax=30) divider = make_axes_locatable(ax) cax = divider.append_axes("right", size="5%", pad=0.1) cbr = plt.colorbar(im, cax=cax) cbr.set_label('Sidescan Intensity [dBw]', size=8) for t in cbr.ax.get_yticklabels(): t.set_fontsize(8) plt.savefig(r'C:\workspace\Texture_Classification\output\May2012_1m_sidescan_intensity.png')
pdat = np.load(posfile, mmap_mode = 'r+'); pdat[t,0,:] = pos_center; pdat[t,1,:] = pos_head; pdat[t,2,:] = pos_tail; #pdat.close(); cdat = np.load(curfile, mmap_mode = 'r+'); cdat[t,0] = curvature_mean; cdat[t,1] = curvature_variation; #cdat.close(); from joblib import Parallel, delayed, cpu_count Parallel(n_jobs = cpu_count())( delayed(analyse_shape_at_time)(t) for t in xrange(ntime)) Parallel(n_jobs = cpu_count())( delayed(analyse_shape_at_time)(t) for t in xrange(3212, 3300)) Parallel(n_jobs = cpu_count())( delayed(analyse_shape_at_time)(t) for t in xrange(3300, ntime)) Parallel(n_jobs = cpu_count())( delayed(analyse_shape_at_time)(t) for t in xrange(500000, ntime)) analyse_shape_at_time(500000) analyse_shape_at_time(513477) i = 0; i+=1; analyse_shape_at_time(513466+i)
def texture(humfile, sonpath, win, shift, doplot, density, numclasses, maxscale, notes): ''' Create a texture lengthscale map using the algorithm detailed by Buscombe et al. (forthcoming) This textural lengthscale is not a direct measure of grain size. Rather, it is a statistical representation that integrates over many attributes of bed texture, of which grain size is the most important. The technique is a physically based means to identify regions of texture within a sidescan echogram, and could provide a basis for objective, automated riverbed sediment classification. Syntax ---------- [] = PyHum.texture(humfile, sonpath, win, shift, doplot, density, numclasses, maxscale, notes) Parameters ---------- humfile : str path to the .DAT file sonpath : str path where the *.SON files are win : int, *optional* [Default=100] pixel in pixels of the moving window shift : int, *optional* [Default=10] shift in pixels for moving window operation doplot : int, *optional* [Default=1] if 1, make plots, otherwise do not make plots density : int, *optional* [Default=win/2] echogram will be sampled every 'density' pixels numclasses : int, *optional* [Default=4] number of 'k means' that the texture lengthscale will be segmented into maxscale : int, *optional* [Default=20] Max scale as inverse fraction of data length for wavelet analysis notes : int, *optional* [Default=100] notes per octave for wavelet analysis Returns ------- sonpath+base+'_data_class.dat': memory-mapped file contains the texture lengthscale map sonpath+base+'_data_kclass.dat': memory-mapped file contains the k-means segmented texture lengthscale map References ---------- .. [1] Buscombe, D., Grams, P.E., and Smith, S.M.C., Automated riverbed sediment classification using low-cost sidescan sonar. submitted to Journal of Hydraulic Engineering ''' # prompt user to supply file if no input file given if not humfile: print 'An input file is required!!!!!!' Tk().withdraw() # we don't want a full GUI, so keep the root window from appearing inputfile = askopenfilename(filetypes=[("DAT files","*.DAT")]) # prompt user to supply directory if no input sonpath is given if not sonpath: print 'A *.SON directory is required!!!!!!' Tk().withdraw() # we don't want a full GUI, so keep the root window from appearing sonpath = askdirectory() # print given arguments to screen and convert data type where necessary if humfile: print 'Input file is %s' % (humfile) if sonpath: print 'Sonar file path is %s' % (sonpath) if win: win = np.asarray(win,int) print 'Window is %s square pixels' % (str(win)) if shift: shift = np.asarray(shift,int) print 'Min shift is %s pixels' % (str(shift)) if density: density = np.asarray(density,int) print 'Image will be sampled every %s pixels' % (str(density)) if numclasses: numclasses = np.asarray(numclasses,int) print 'Number of sediment classes: %s' % (str(numclasses)) if maxscale: maxscale = np.asarray(maxscale,int) print 'Max scale as inverse fraction of data length: %s' % (str(maxscale)) if notes: notes = np.asarray(notes,int) print 'Notes per octave: %s' % (str(notes)) if doplot: doplot = int(doplot) if doplot==0: print "Plots will not be made" print '[Default] Number of processors is %s' % (str(cpu_count())) if not win: win = 100 print '[Default] Window is %s square pixels' % (str(win)) if not shift: shift = 10 print '[Default] Min shift is %s pixels' % (str(shift)) if not density: density = win/2 print '[Default] Echogram will be sampled every %s pixels' % (str(density)) if not numclasses: numclasses = 4 print '[Default] Number of sediment classes: %s' % (str(numclasses)) if not maxscale: maxscale = 20 print '[Default] Max scale as inverse fraction of data length: %s ' % (str(maxscale)) if not notes: notes = 4 print '[Default] Notes per octave: %s ' % (str(notes)) if not doplot: if doplot != 0: doplot = 1 print "[Default] Plots will be made" ######################################################## ######################################################## # start timer if os.name=='posix': # true if linux/mac or cygwin on windows start = time.time() else: # windows start = time.clock() # if son path name supplied has no separator at end, put one on if sonpath[-1]!=os.sep: sonpath = sonpath + os.sep base = humfile.split('.DAT') # get base of file name for output base = base[0].split(os.sep)[-1] ft = 1/loadmat(sonpath+base+'meta.mat')['pix_m'] pix_m = np.squeeze(loadmat(sonpath+base+'meta.mat')['pix_m']) dep_m = np.squeeze(loadmat(sonpath+base+'meta.mat')['dep_m']) dist_m = np.squeeze(loadmat(sonpath+base+'meta.mat')['dist_m']) ### port print "processing port side ..." # load memory mapped scan ... port shape_port = np.squeeze(loadmat(sonpath+base+'meta.mat')['shape_port']) if shape_port!='': port_fp = np.memmap(sonpath+base+'_data_port_la.dat', dtype='float32', mode='r', shape=tuple(shape_port)) port_fp2 = np.memmap(sonpath+base+'_data_port_l.dat', dtype='float32', mode='r', shape=tuple(shape_port)) ### star print "processing starboard side ..." # load memory mapped scan ... port shape_star = np.squeeze(loadmat(sonpath+base+'meta.mat')['shape_star']) if shape_star!='': star_fp = np.memmap(sonpath+base+'_data_star_la.dat', dtype='float32', mode='r', shape=tuple(shape_star)) star_fp2 = np.memmap(sonpath+base+'_data_star_l.dat', dtype='float32', mode='r', shape=tuple(shape_star)) shape = shape_port.copy() shape[1] = shape_port[1] + shape_star[1] # create memory mapped file for Sp fp = np.memmap(sonpath+base+'_data_class.dat', dtype='float32', mode='w+', shape=tuple(shape)) #SRT = [] for p in xrange(len(port_fp)): Z,ind = humutils.sliding_window(np.vstack((np.flipud(port_fp[p]), star_fp[p])),(win,win),(shift,shift)) try: print "%s windows to process with a density of %s" % (str(len(Z)), str(density)) #% (str(len(Z)), str(density)) # do the wavelet clacs and get the stats d = Parallel(n_jobs = -1, verbose=0)(delayed(parallel_me)(Z[k], maxscale, notes, win, density) for k in xrange(len(Z))) except: print "memory error: trying serial" d = Parallel(n_jobs = 1, verbose=0)(delayed(parallel_me)(Z[k], maxscale, notes, win, density) for k in xrange(len(Z))) srt = np.reshape(d , ( ind[0], ind[1] ) ) del d try: print "%s windows to process with a density of %s" % (str(len(Z)), str(density)) #% (str(len(Z)), str(density)) # do the wavelet clacs and get the stats d = Parallel(n_jobs = -1, verbose=0)(delayed(parallel_me)(Z[k].T, maxscale, notes, win, density) for k in xrange(len(Z))) except: print "memory error: trying serial" d = Parallel(n_jobs = 1, verbose=0)(delayed(parallel_me)(Z[k].T, maxscale, notes, win, density) for k in xrange(len(Z))) srt2 = np.reshape(d , ( ind[0], ind[1] ) ) del d Z = None SRT = srt+srt2 del srt, srt2 Snn = SRT.copy() del SRT # replace nans using infilling algorithm rn = replace_nans.RN(Snn.astype('float64'),1000,0.01,2,'localmean') Snn = rn.getdata() del rn Ny, Nx = np.shape( np.vstack((np.flipud(port_fp[p]), star_fp[p])) ) Snn = median_filter(Snn,(int(Nx/100),int(Ny/100))) Sp = humutils.im_resize(Snn,Nx,Ny) del Snn Sp[np.isnan(np.vstack((np.flipud(port_fp[p]), star_fp[p])))] = np.nan Sp[np.isnan(np.vstack((np.flipud(port_fp2[p]), star_fp2[p])))] = np.nan extent = shape_port[1] Zdist = dist_m[shape_port[-1]*p:shape_port[-1]*(p+1)] yvec = np.linspace(pix_m,extent*pix_m,extent) d = dep_m[shape_port[-1]*p:shape_port[-1]*(p+1)] R_fp = np.memmap(sonpath+base+'_data_range.dat', dtype='float32', mode='r', shape=tuple(shape_star)) #R = np.ones(np.shape(Sp)) #for k in range(len(d)): # R[:,k] = np.hstack((np.flipud(d[k]/yvec), d[k]/yvec)) #if len(d)<np.shape(port_fp[p])[1]: # d = np.append(d,d[-1]) #Zbed = np.squeeze(d*ft) #R1 = R[extent:,:] #R2 = np.flipud(R[:extent,:]) ## shift proportionally depending on where the bed is #for k in xrange(np.shape(R1)[1]): # R1[:,k] = np.r_[R1[Zbed[k]:,k], np.zeros( (np.shape(R1)[0] - np.shape(R1[Zbed[k]:,k])[0] ,) )] #for k in xrange(np.shape(R2)[1]): # R2[:,k] = np.r_[R2[Zbed[k]:,k], np.zeros( (np.shape(R2)[0] - np.shape(R2[Zbed[k]:,k])[0] ,) )] #R = np.vstack((np.flipud(R2),R1)) #del R1, R2 R = np.vstack((np.flipud(R_fp[0]),R_fp[0])) R[R>0.8] = np.nan rn = replace_nans.RN(R.astype('float64'),1000,0.01,2,'localmean') R = rn.getdata() del rn Sp = (Sp**2) * np.cos(R) / shift**2 fp[p] = Sp.astype('float32') del Sp del fp # flush data to file class_fp = np.memmap(sonpath+base+'_data_class.dat', dtype='float32', mode='r', shape=tuple(shape)) dist_m = np.squeeze(loadmat(sonpath+base+'meta.mat')['dist_m']) ######################################################## ######################################################## if doplot==1: for p in xrange(len(star_fp)): plot_class(dist_m, shape_port, port_fp[p], star_fp[p], class_fp[p], ft, humfile, sonpath, base, p) for p in xrange(len(star_fp)): plot_contours(dist_m, shape_port, class_fp[p], ft, humfile, sonpath, base, numclasses, p) ####################################################### # k-means fp = np.memmap(sonpath+base+'_data_kclass.dat', dtype='float32', mode='w+', shape=tuple(shape)) for p in xrange(len(port_fp)): Sk = class_fp[p].copy() Sk[np.isnan(Sk)] = 0 wc, values = humutils.cut_kmeans(Sk,numclasses+1) wc[Sk==0] = np.nan del Sk fp[p] = wc.astype('float32') del wc del fp kclass_fp = np.memmap(sonpath+base+'_data_kclass.dat', dtype='float32', mode='r', shape=tuple(shape)) ######################################################## if doplot==1: for p in xrange(len(star_fp)): plot_kmeans(dist_m, shape_port, port_fp[p], star_fp[p], kclass_fp[p], ft, humfile, sonpath, base, p) if os.name=='posix': # true if linux/mac elapsed = (time.time() - start) else: # windows elapsed = (time.clock() - start) print "Processing took ", elapsed , "seconds to analyse" print "Done!"
def getgrid_lm(humlon, humlat, merge, influence, minX, maxX, minY, maxY, res, mode, trans, nn, wf, sigmas, eps): complete=0 while complete==0: try: grid_x, grid_y, res = getmesh(minX, maxX, minY, maxY, res) longrid, latgrid = trans(grid_x, grid_y, inverse=True) shape = np.shape(grid_x) targ_def = pyresample.geometry.SwathDefinition(lons=longrid.flatten(), lats=latgrid.flatten()) del longrid, latgrid orig_def = pyresample.geometry.SwathDefinition(lons=humlon.flatten(), lats=humlat.flatten()) if mode==1: try: dat = pyresample.kd_tree.resample_nearest(orig_def, merge.flatten(), targ_def, radius_of_influence=res*10, fill_value=None, nprocs = cpu_count()) except: dat = pyresample.kd_tree.resample_nearest(orig_def, merge.flatten(), targ_def, radius_of_influence=res*10, fill_value=None, nprocs = 1) stdev = None counts = None elif mode==2: try: dat, stdev, counts = pyresample.kd_tree.resample_custom(orig_def, merge.flatten(),targ_def, radius_of_influence=res*10, neighbours=nn, weight_funcs=wf, fill_value=None, with_uncert = True, nprocs = cpu_count()) except: dat, stdev, counts = pyresample.kd_tree.resample_custom(orig_def, merge.flatten(),targ_def, radius_of_influence=res*10, neighbours=nn, weight_funcs=wf, fill_value=None, with_uncert = True, nprocs = 1) else: try: dat, stdev, counts = pyresample.kd_tree.resample_gauss(orig_def, merge.flatten(), targ_def, radius_of_influence=res*10, neighbours=nn, sigmas=sigmas, fill_value=None, with_uncert = np.nan, nprocs = cpu_count(), epsilon = eps) except: dat, stdev, counts = pyresample.kd_tree.resample_gauss(orig_def, merge.flatten(), targ_def, radius_of_influence=res*10, neighbours=nn, sigmas=sigmas, fill_value=None, with_uncert = np.nan, nprocs = 1, epsilon = eps) if 'dat' in locals(): complete=1 except: print("memory error: trying grid resolution of %s" % (str(res*2))) res = res*2 return dat, stdev, counts, res, complete, shape
def make_map(e, n, t, d, dat_port, dat_star, data_R, pix_m, res, cs2cs_args, sonpath, p, mode, nn, numstdevs, c, dx, use_uncorrected, scalemax): #dogrid, influence,dowrite, thres=5 trans = pyproj.Proj(init=cs2cs_args) mp = np.nanmean(dat_port) ms = np.nanmean(dat_star) if mp>ms: merge = np.vstack((dat_port,dat_star*(mp/ms))) else: merge = np.vstack((dat_port*(ms/mp),dat_star)) del dat_port, dat_star merge[np.isnan(merge)] = 0 merge = merge[:,:len(n)] ## actual along-track resolution is this: dx times dy = Af tmp = data_R * dx * (c*0.007 / 2) #dx = np.arcsin(c/(1000*meta['t']*meta['f'])) res_grid = np.sqrt(np.vstack((tmp, tmp))) del tmp res_grid = res_grid[:np.shape(merge)[0],:np.shape(merge)[1]] #if use_uncorrected != 1: # merge = merge - 10*np.log10(res_grid) res_grid = res_grid.astype('float32') merge[np.isnan(merge)] = 0 merge[merge<0] = 0 merge = merge.astype('float32') merge = denoise_tv_chambolle(merge.copy(), weight=.2, multichannel=False).astype('float32') R = np.vstack((np.flipud(data_R),data_R)) del data_R R = R[:np.shape(merge)[0],:np.shape(merge)[1]] # get number pixels in scan line extent = int(np.shape(merge)[0]/2) yvec = np.squeeze(np.linspace(np.squeeze(pix_m),extent*np.squeeze(pix_m),extent)) X, Y, D, h, t = getXY(e,n,yvec,np.squeeze(d),t,extent) X = X.astype('float32') Y = Y.astype('float32') D = D.astype('float32') h = h.astype('float32') t = t.astype('float32') X = X.astype('float32') D[np.isnan(D)] = 0 h[np.isnan(h)] = 0 t[np.isnan(t)] = 0 X = X[np.where(np.logical_not(np.isnan(Y)))] merge = merge.flatten()[np.where(np.logical_not(np.isnan(Y)))] res_grid = res_grid.flatten()[np.where(np.logical_not(np.isnan(Y)))] Y = Y[np.where(np.logical_not(np.isnan(Y)))] D = D[np.where(np.logical_not(np.isnan(Y)))] R = R.flatten()[np.where(np.logical_not(np.isnan(Y)))] h = h[np.where(np.logical_not(np.isnan(Y)))] t = t[np.where(np.logical_not(np.isnan(Y)))] Y = Y[np.where(np.logical_not(np.isnan(X)))] merge = merge.flatten()[np.where(np.logical_not(np.isnan(X)))] res_grid = res_grid.flatten()[np.where(np.logical_not(np.isnan(X)))] X = X[np.where(np.logical_not(np.isnan(X)))] D = D[np.where(np.logical_not(np.isnan(X)))] R = R.flatten()[np.where(np.logical_not(np.isnan(X)))] h = h[np.where(np.logical_not(np.isnan(X)))] t = t[np.where(np.logical_not(np.isnan(X)))] X = X[np.where(np.logical_not(np.isnan(merge)))] Y = Y[np.where(np.logical_not(np.isnan(merge)))] merge = merge[np.where(np.logical_not(np.isnan(merge)))] res_grid = res_grid.flatten()[np.where(np.logical_not(np.isnan(merge)))] D = D[np.where(np.logical_not(np.isnan(merge)))] R = R[np.where(np.logical_not(np.isnan(merge)))] h = h[np.where(np.logical_not(np.isnan(merge)))] t = t[np.where(np.logical_not(np.isnan(merge)))] X = X[np.where(np.logical_not(np.isinf(merge)))] Y = Y[np.where(np.logical_not(np.isinf(merge)))] merge = merge[np.where(np.logical_not(np.isinf(merge)))] res_grid = res_grid.flatten()[np.where(np.logical_not(np.isinf(merge)))] D = D[np.where(np.logical_not(np.isinf(merge)))] R = R[np.where(np.logical_not(np.isinf(merge)))] h = h[np.where(np.logical_not(np.isinf(merge)))] t = t[np.where(np.logical_not(np.isinf(merge)))] print("writing point cloud") #if dowrite==1: ## write raw bs to file outfile = os.path.normpath(os.path.join(sonpath,'x_y_ss_raw'+str(p)+'.asc')) ##write.txtwrite( outfile, np.hstack((humutils.ascol(X.flatten()),humutils.ascol(Y.flatten()), humutils.ascol(merge.flatten()), humutils.ascol(D.flatten()), humutils.ascol(R.flatten()), humutils.ascol(h.flatten()), humutils.ascol(t.flatten()) )) ) np.savetxt(outfile, np.hstack((humutils.ascol(X.flatten()),humutils.ascol(Y.flatten()), humutils.ascol(merge.flatten()), humutils.ascol(D.flatten()), humutils.ascol(R.flatten()), humutils.ascol(h.flatten()), humutils.ascol(t.flatten()) )) , fmt="%8.6f %8.6f %8.6f %8.6f %8.6f %8.6f %8.6f") del D, R, h, t sigmas = 0.1 #m eps = 2 print("gridding ...") #if dogrid==1: if 2>1: if res==99: resg = np.min(res_grid[res_grid>0])/2 print('Gridding at resolution of %s' % str(resg)) else: resg = res tree = KDTree(np.c_[X.flatten(),Y.flatten()]) complete=0 while complete==0: try: grid_x, grid_y, res = getmesh(np.min(X), np.max(X), np.min(Y), np.max(Y), resg) longrid, latgrid = trans(grid_x, grid_y, inverse=True) longrid = longrid.astype('float32') latgrid = latgrid.astype('float32') shape = np.shape(grid_x) ## create mask for where the data is not if pykdtree==1: dist, _ = tree.query(np.c_[grid_x.ravel(), grid_y.ravel()], k=1) else: try: dist, _ = tree.query(np.c_[grid_x.ravel(), grid_y.ravel()], k=1, n_jobs=cpu_count()) except: #print ".... update your scipy installation to use faster kd-tree queries" dist, _ = tree.query(np.c_[grid_x.ravel(), grid_y.ravel()], k=1) dist = dist.reshape(grid_x.shape) targ_def = pyresample.geometry.SwathDefinition(lons=longrid.flatten(), lats=latgrid.flatten()) del longrid, latgrid humlon, humlat = trans(X, Y, inverse=True) orig_def = pyresample.geometry.SwathDefinition(lons=humlon.flatten(), lats=humlat.flatten()) del humlon, humlat if 'orig_def' in locals(): complete=1 except: print("memory error: trying grid resolution of %s" % (str(resg*2))) resg = resg*2 if mode==1: complete=0 while complete==0: try: try: dat = pyresample.kd_tree.resample_nearest(orig_def, merge.flatten(), targ_def, radius_of_influence=res*20, fill_value=None, nprocs = cpu_count(), reduce_data=1) except: dat = pyresample.kd_tree.resample_nearest(orig_def, merge.flatten(), targ_def, radius_of_influence=res*20, fill_value=None, nprocs = 1, reduce_data=1) try: r_dat = pyresample.kd_tree.resample_nearest(orig_def, res_grid.flatten(), targ_def, radius_of_influence=res*20, fill_value=None, nprocs = cpu_count(), reduce_data=1) except: r_dat = pyresample.kd_tree.resample_nearest(orig_def, res_grid.flatten(), targ_def, radius_of_influence=res*20, fill_value=None, nprocs = 1, reduce_data=1) stdev = None counts = None if 'dat' in locals(): complete=1 except: del grid_x, grid_y, targ_def, orig_def wf = None humlon, humlat = trans(X, Y, inverse=True) dat, stdev, counts, resg, complete, shape = getgrid_lm(humlon, humlat, merge, res*10, min(X), max(X), min(Y), max(Y), resg*2, mode, trans, nn, wf, sigmas, eps) r_dat, stdev, counts, resg, complete, shape = getgrid_lm(humlon, humlat, res_grid, res*10, min(X), max(X), min(Y), max(Y), resg*2, mode, trans, nn, wf, sigmas, eps) del humlon, humlat elif mode==2: # custom inverse distance wf = lambda r: 1/r**2 complete=0 while complete==0: try: try: dat, stdev, counts = pyresample.kd_tree.resample_custom(orig_def, merge.flatten(),targ_def, radius_of_influence=res*20, neighbours=nn, weight_funcs=wf, fill_value=None, with_uncert = True, nprocs = cpu_count(), reduce_data=1) except: dat, stdev, counts = pyresample.kd_tree.resample_custom(orig_def, merge.flatten(),targ_def, radius_of_influence=res*20, neighbours=nn, weight_funcs=wf, fill_value=None, with_uncert = True, nprocs = 1, reduce_data=1) try: r_dat = pyresample.kd_tree.resample_custom(orig_def, res_grid.flatten(), targ_def, radius_of_influence=res*20, neighbours=nn, weight_funcs=wf, fill_value=None, with_uncert = False, nprocs = cpu_count(), reduce_data=1) except: r_dat = pyresample.kd_tree.resample_custom(orig_def, res_grid.flatten(), targ_def, radius_of_influence=res*20, neighbours=nn, weight_funcs=wf, fill_value=None, with_uncert = False, nprocs = 1, reduce_data=1) if 'dat' in locals(): complete=1 except: del grid_x, grid_y, targ_def, orig_def humlon, humlat = trans(X, Y, inverse=True) dat, stdev, counts, resg, complete, shape = getgrid_lm(humlon, humlat, merge, res*2, min(X), max(X), min(Y), max(Y), resg*2, mode, trans, nn, wf, sigmas, eps) r_dat, stdev, counts, resg, complete, shape = getgrid_lm(humlon, humlat, res_grid, res*2, min(X), max(X), min(Y), max(Y), resg*2, mode, trans, nn, wf, sigmas, eps) del humlat, humlon del stdev_null, counts_null elif mode==3: wf = None complete=0 while complete==0: try: try: dat, stdev, counts = pyresample.kd_tree.resample_gauss(orig_def, merge.flatten(), targ_def, radius_of_influence=res*20, neighbours=nn, sigmas=sigmas, fill_value=None, with_uncert = True, nprocs = cpu_count(), epsilon = eps, reduce_data=1) except: dat, stdev, counts = pyresample.kd_tree.resample_gauss(orig_def, merge.flatten(), targ_def, radius_of_influence=res*20, neighbours=nn, sigmas=sigmas, fill_value=None, with_uncert = True, nprocs = 1, epsilon = eps, reduce_data=1) try: r_dat = pyresample.kd_tree.resample_gauss(orig_def, res_grid.flatten(), targ_def, radius_of_influence=res*20, neighbours=nn, sigmas=sigmas, fill_value=None, with_uncert = False, nprocs = cpu_count(), epsilon = eps, reduce_data=1) except: r_dat = pyresample.kd_tree.resample_gauss(orig_def, res_grid.flatten(), targ_def, radius_of_influence=res*20, neighbours=nn, sigmas=sigmas, fill_value=None, with_uncert = False, nprocs = 1, epsilon = eps, reduce_data=1) if 'dat' in locals(): complete=1 except: del grid_x, grid_y, targ_def, orig_def humlon, humlat = trans(X, Y, inverse=True) dat, stdev, counts, resg, complete, shape = getgrid_lm(humlon, humlat, merge, res*10, min(X), max(X), min(Y), max(Y), resg*2, mode, trans, nn, wf, sigmas, eps) r_dat, stdev_null, counts_null, resg, complete, shape = getgrid_lm(humlon, humlat, res_grid, res*10, min(X), max(X), min(Y), max(Y), resg*2, mode, trans, nn, wf, sigmas, eps) del humlat, humlon del stdev_null, counts_null humlon, humlat = trans(X, Y, inverse=True) del X, Y, res_grid, merge dat = dat.reshape(shape) dat[dist>res*30] = np.nan del dist r_dat = r_dat.reshape(shape) r_dat[r_dat<1] = 1 r_dat[r_dat > 2*np.pi] = 1 r_dat[np.isnan(dat)] = np.nan dat = dat + r_dat #np.sqrt(np.cos(np.deg2rad(r_dat))) #dat*np.sqrt(r_dat) + dat del r_dat if mode>1: stdev = stdev.reshape(shape) counts = counts.reshape(shape) mask = dat.mask.copy() dat[mask==1] = np.nan #dat[mask==1] = 0 if mode>1: dat[(stdev>numstdevs) & (mask!=0)] = np.nan dat[(counts<nn) & (counts>0)] = np.nan #if dogrid==1: dat[dat==0] = np.nan dat[np.isinf(dat)] = np.nan dat[dat<thres] = np.nan datm = np.ma.masked_invalid(dat) glon, glat = trans(grid_x, grid_y, inverse=True) #del grid_x, grid_y try: from osgeo import gdal,ogr,osr proj = osr.SpatialReference() proj.ImportFromEPSG(int(cs2cs_args.split(':')[-1])) #26949) datout = np.squeeze(np.ma.filled(dat))#.astype('int16') datout[np.isnan(datout)] = -99 driver = gdal.GetDriverByName('GTiff') #rows,cols = np.shape(datout) cols,rows = np.shape(datout) outFile = os.path.normpath(os.path.join(sonpath,'geotiff_map'+str(p)+'.tif')) ds = driver.Create( outFile, rows, cols, 1, gdal.GDT_Float32, [ 'COMPRESS=LZW' ] ) if proj is not None: ds.SetProjection(proj.ExportToWkt()) xmin, ymin, xmax, ymax = [grid_x.min(), grid_y.min(), grid_x.max(), grid_y.max()] xres = (xmax - xmin) / float(rows) yres = (ymax - ymin) / float(cols) geotransform = (xmin, xres, 0, ymax, 0, -yres) ds.SetGeoTransform(geotransform) ss_band = ds.GetRasterBand(1) ss_band.WriteArray(np.flipud(datout)) #datout) ss_band.SetNoDataValue(-99) ss_band.FlushCache() ss_band.ComputeStatistics(False) del ds except: print("error: geotiff could not be created... check your gdal/ogr install") try: # ========================================================= print("creating kmz file ...") ## new way to create kml file pixels = 1024 * 10 fig, ax = humutils.gearth_fig(llcrnrlon=glon.min(), llcrnrlat=glat.min(), urcrnrlon=glon.max(), urcrnrlat=glat.max(), pixels=pixels) cs = ax.pcolormesh(glon, glat, datm, vmax=scalemax, cmap='gray') ax.set_axis_off() fig.savefig(os.path.normpath(os.path.join(sonpath,'map'+str(p)+'.png')), transparent=True, format='png') del fig, ax # ========================================================= fig = plt.figure(figsize=(1.0, 4.0), facecolor=None, frameon=False) ax = fig.add_axes([0.0, 0.05, 0.2, 0.9]) cb = fig.colorbar(cs, cax=ax) cb.set_label('Intensity [dB W]', rotation=-90, color='k', labelpad=20) fig.savefig(os.path.normpath(os.path.join(sonpath,'legend'+str(p)+'.png')), transparent=False, format='png') del fig, ax, cs, cb # ========================================================= humutils.make_kml(llcrnrlon=glon.min(), llcrnrlat=glat.min(), urcrnrlon=glon.max(), urcrnrlat=glat.max(), figs=[os.path.normpath(os.path.join(sonpath,'map'+str(p)+'.png'))], colorbar=os.path.normpath(os.path.join(sonpath,'legend'+str(p)+'.png')), kmzfile=os.path.normpath(os.path.join(sonpath,'GroundOverlay'+str(p)+'.kmz')), name='Sidescan Intensity') except: print("error: map could not be created...") #y1 = np.min(glat)-0.001 #x1 = np.min(glon)-0.001 #y2 = np.max(glat)+0.001 #x2 = np.max(glon)+0.001 print("drawing and printing map ...") fig = plt.figure(frameon=False) map = Basemap(projection='merc', epsg=cs2cs_args.split(':')[1], resolution = 'i', #h #f llcrnrlon=np.min(humlon)-0.001, llcrnrlat=np.min(glat)-0.001, urcrnrlon=np.max(humlon)+0.001, urcrnrlat=np.max(glat)+0.001) try: map.arcgisimage(server='http://server.arcgisonline.com/ArcGIS', service='World_Imagery', xpixels=1000, ypixels=None, dpi=300) except: map.arcgisimage(server='http://server.arcgisonline.com/ArcGIS', service='ESRI_Imagery_World_2D', xpixels=1000, ypixels=None, dpi=300) #finally: # print "error: map could not be created..." #if dogrid==1: gx,gy = map.projtran(glon, glat) ax = plt.Axes(fig, [0., 0., 1., 1.], ) ax.set_axis_off() fig.add_axes(ax) #if dogrid==1: if 2>1: if datm.size > 25000000: print("matrix size > 25,000,000 - decimating by factor of 5 for display") map.pcolormesh(gx[::5,::5], gy[::5,::5], datm[::5,::5], cmap='gray', vmin=np.nanmin(datm), vmax=scalemax) #vmax=np.nanmax(datm) else: map.pcolormesh(gx, gy, datm, cmap='gray', vmin=np.nanmin(datm), vmax=scalemax) #vmax=np.nanmax(datm) del datm, dat else: ## draw point cloud x,y = map.projtran(humlon, humlat) map.scatter(x.flatten(), y.flatten(), 0.5, merge.flatten(), cmap='gray', linewidth = '0') #map.drawmapscale(x1+0.001, y1+0.001, x1, y1, 200., units='m', barstyle='fancy', labelstyle='simple', fontcolor='k') #'#F8F8FF') #map.drawparallels(np.arange(y1-0.001, y2+0.001, 0.005),labels=[1,0,0,1], linewidth=0.0, rotation=30, fontsize=8) #map.drawmeridians(np.arange(x1, x2, 0.002),labels=[1,0,0,1], linewidth=0.0, rotation=30, fontsize=8) custom_save2(sonpath,'map_imagery'+str(p)) del fig del humlat, humlon return res #return the new resolution