def run_parallel(self, test_suites, test_runner, result_type=None, results_path=None): exit_code = 0 proc = None unittest.installHandler() processes = [] manager = Manager() results = manager.dict() manager.dict() start = time.time() test_mapping = {} for test_suite in test_suites: # Give each test suite an uuid so it can be # matched to the correct test result test_id = str(uuid.uuid4()) test_mapping[test_id] = test_suite proc = Process(target=self.execute_test, args=(test_runner, test_id, test_suite, results)) processes.append(proc) proc.start() for proc in processes: proc.join() finish = time.time() errors, failures, _ = self.dump_results(start, finish, results) if result_type is not None: all_results = [] for test_id, result in list(results.items()): tests = test_mapping[test_id] result_parser = SummarizeResults(vars(result), tests, (finish - start)) all_results += result_parser.gather_results() reporter = Reporter(result_parser=result_parser, all_results=all_results) reporter.generate_report(result_type=result_type, path=results_path) if failures or errors: exit_code = 1 return exit_code
def run_parallel( self, test_suites, test_runner, result_type=None, results_path=None): exit_code = 0 proc = None unittest.installHandler() processes = [] manager = Manager() results = manager.dict() manager.dict() start = time.time() test_mapping = {} for test_suite in test_suites: # Give each test suite an uuid so it can be # matched to the correct test result test_id = str(uuid.uuid4()) test_mapping[test_id] = test_suite proc = Process( target=self.execute_test, args=(test_runner, test_id, test_suite, results)) processes.append(proc) proc.start() for proc in processes: proc.join() finish = time.time() errors, failures, _ = self.dump_results(start, finish, results) if result_type is not None: all_results = [] for test_id, result in list(results.items()): tests = test_mapping[test_id] result_parser = SummarizeResults( vars(result), tests, (finish - start)) all_results += result_parser.gather_results() reporter = Reporter( result_parser=result_parser, all_results=all_results) reporter.generate_report( result_type=result_type, path=results_path) if failures or errors: exit_code = 1 return exit_code
def query(query_lst): manager = Manager() hits = manager.dict() results = [] for q in query_lst: r = requests.get('http://dblp.uni-trier.de/search/publ/api', params={ 'q': q, 'h': 100, 'format': 'json' }) if r.status_code == 429: raise Error json_answer = r.json() res = json_answer["result"]["hits"].get("hit", None) if res is None: continue results += res def f(d, hit, n): if hit is None: return authors = hit["info"].pop("authors") if isinstance(authors["author"], dict): hit["info"]["authors"] = authors["author"]["text"] else: hit["info"]["authors"] = [ fullname(a["text"]) for a in authors["author"] ] hit["info"]["bibtex"] = get_bib(hit["info"]["key"]) d[n] = hit["info"] job = [ Process(target=f, args=(hits, hit, n)) for n, hit in enumerate(results) ] _ = [p.start() for p in job] _ = [p.join() for p in job] return dict(hits)
def get_city_states(self): """ Creates city states from start time to end time :param: :return: """ city_states = [] start_time = self.start_time end_time = self.end_time # Create array of time slice values between the start and end time business_days = self.config['city_state_creator']['business_days'] business_hours_start = self.config['city_state_creator'][ 'business_hours_start'] business_hours_end = self.config['city_state_creator'][ 'business_hours_end'] index = pd.date_range(start=start_time, end=end_time, freq=str(self.time_unit_duration) + 'min') # Filter only the required days and hours index = index[index.day_name().isin(business_days)] index = index[(index.hour >= business_hours_start) & (index.hour <= business_hours_end)] time_slice_starts = index - timedelta( minutes=self.time_slice_duration / 2) time_slice_ends = index + timedelta(minutes=self.time_slice_duration / 2) # Create arguments dictionary for parallelization self.parallel_args = self.create_parallel_args(index, time_slice_starts, time_slice_ends) # Create city states manager = Manager() city_states = manager.dict() N = len(index.values) # Create parallel pool self.logger.info("Creating parallelization pool") pool = ProcessPool(nodes=25) pool.map(self.get_city_state, ([city_states, t] for t in xrange(N))) pool.close() pool.join() pool.clear() self.logger.info("Finished creating city states") return dict(city_states)
def run_post_process(): es = ES(FLAGS.configfile_name) manager=Manager() lock=manager.Lock() shared_dict=manager.dict({'time':0,"id":""}) process_num=int(cpu_count()-2) generator_list=[] for i in range(process_num): generator_list.append(_generator(lock,shared_dict,es)) #%% p=[] for i in range(process_num): p.append(Process(target=_process_unknown_record,args=(generator_list[i],))) p[i].start() for q in p: q.join()
latitude = check_env_var("HZN_LAT", printerr=True) longitude = check_env_var("HZN_LON", printerr=True) pws_units = check_env_var("PWS_UNITS", default='us', printerr=True) # weewx recommends only using 'us' pws_wu_loc = check_env_var("PWS_WU_LOC", default='', printerr=True) pws_wu_rapidfire = check_env_var("PWS_WU_RPDF", default='False', printerr=True) # Deal with a potential lower-case (boolean value from Horizon) or erroneous value if pws_wu_rapidfire == "true" or pws_wu_rapidfire == "True": pws_wu_rapidfire = "True" else: pws_wu_rapidfire = "False" ## Shared data structure (dict for flask server to read & serve) manager = Manager() sdata = manager.dict() standard_params = ["wu_id", "stationtype", "model", "latitude", "longitude", "units", "location"] standard_values = [pws_wu_id, pws_station_type, pws_model, latitude, longitude, pws_units, pws_wu_loc] sdata["r"] = dict(zip(["status"], ["Station initializing..."])) sdata["t"] = str(int(time.time())) # Timestamp sdata["i"] = dict(zip(standard_params, standard_values)) # Station Info ## Flask HTTPserver ---------------------------------------------------------- ## Start simple flask server at localhost:port and pass in shared data dict p_flask = Process(target=fl.run_server, args=('0.0.0.0', 8357, sdata)) p_flask.start() ## Weewx service ------------------------------------------------------------- # Modify the weewx configuration file with our env var settings weemod = weewx_mod(weewx_config_file, pws_station_type) weemod.wee_config_script = "/home/weewx/bin/wee_config"
from multiprocess import Manager, Process def fun(d, l): d[1] = '1' d[2] = 2 d[0.25] = None l.reverse() if __name__ == '__main__': manager = Manager() d = manager.dict() l = manager.list(range(10)) p = Process(target=fun, args=(d, l)) p.start() p.join() print d print l
def _multi_channel_apply_disk_parallel(self, function, cleanup_function, output_path, from_time, to_time, channels, cast_dtype, pass_batch_info, pass_batch_results, processes, **kwargs): self.logger.debug('Starting parallel operation...') if pass_batch_results: raise NotImplementedError("pass_batch_results is not " "implemented on 'disk' mode") # need to convert to a list, oherwise cannot be pickled data = list( self.multi_channel(from_time, to_time, channels, return_data=False)) n_batches = self.indexer.n_batches(from_time, to_time, channels) self.logger.info('Data will be splitted in %s batches', n_batches) output_path = Path(output_path) # create local variables to avoid pickling problems _path_to_recordings = copy(self.path_to_recordings) _dtype = copy(self.dtype) _n_channels = copy(self.n_channels) _data_order = copy(self.data_order) _loader = copy(self.loader) _buffer_size = copy(self.buffer_size) reader = partial(RecordingsReader, path_to_recordings=_path_to_recordings, dtype=_dtype, n_channels=_n_channels, data_order=_data_order, loader=_loader, return_data_index=True) m = Manager() mapping = m.dict() next_to_write = m.Value('i', 0) def parallel_runner(element): i, _ = element res = util.batch_runner(element, function, reader, pass_batch_info, cast_dtype, kwargs, cleanup_function, _buffer_size, save_chunks=False, output_path=output_path) if i == 0: mapping['dtype'] = str(res.dtype) while True: if next_to_write.value == i: with open(str(output_path), 'wb' if i == 0 else 'ab') as f: res.tofile(f) next_to_write.value += 1 break # run jobs self.logger.debug('Creating processes pool...') p = Pool(processes) res = p.map_async(parallel_runner, enumerate(data)) finished = 0 if self.show_progress_bar: pbar = tqdm(total=n_batches) if self.show_progress_bar: while True: if next_to_write.value > finished: update = next_to_write.value - finished pbar.update(update) finished = next_to_write.value if next_to_write.value == n_batches: break pbar.close() else: res.get() # save metadata params = util.make_metadata(channels, self.n_channels, mapping['dtype'], output_path) return output_path, params
class Storage(object): ''' Storage system ''' def __init__(self): # The given page size self._PAGE_SIZE = 4096 # The given size for data blocks self._BLOCK_SIZE = 1 * self._PAGE_SIZE # Meta data about datasets self._dataset_table = {} # Read/write head position self._position = 0 # Manager for concurrency self.manager = Manager() # Job-queue for reading data self.job_queue = self.manager.list() # Data queueueueuueue self.data_queues = self.manager.dict() # Path to storage file _path = 'data.data' # Size of storage (Default 200 mb) self._SIZE = 4096 * 256 * 200 # Amount of blocks self._BLOCKS = math.floor(self._SIZE / self._BLOCK_SIZE) # Check whether a storage file exists, else create one if not os.path.exists(_path): print('Writing storage file') f = open(_path, 'w+b') f.write(b'?' * self._SIZE) f.close # Open storage and create a MMAP try: storage = open(_path, 'a+b') except: print('Cannot open storage file!') # Create MMAP to file self.datamap = mmap.mmap(storage.fileno(), 0) # Free space vector self.free_space =[(0, self._BLOCKS)] def _write_data(self, address, data_block, flush=True): ''' Writes a data block to the page at the given address ''' print('¤ Writing data block at ' + str(address)) try: # Go to the current address self.datamap.seek(address) self._position = address # Write the block self.datamap.write(bytes(data_block, 'utf-8')) except: print('! Could not write data block to ' + str(address) + '. Not enough space.') # Flush the written data to the file if flush: try: self.datamap.flush() except: print("Cannot flush data with mmap!") pass def _read_block(self, address): ''' Writes data to a given address ''' print('+ Reading data from ' + str(address)) data = '' try: # Go to the current address self.datamap.seek(address) self._position = address # Read the data data = self.datamap.read(self._PAGE_SIZE) except: print('Could not read data block from ' + str(address)) return data def _worst_fit(self, n_blocks): ''' Data block allocation using worst-fit ''' # Get the largest free segment #! Faster to use max-heaps largest_segment = sorted(self.free_space, key=lambda x: x[1])[0] blocks_amount = largest_segment[1] assert blocks_amount >= n_blocks # Construct a list of free datablocks free_blocks = [] current_block = largest_segment[0] for _ in range(n_blocks): free_blocks.append(current_block) current_block += self._BLOCK_SIZE # Remove the free space and add the remaining # free space after allocation self.free_space.remove(largest_segment) self.free_space.append((current_block, blocks_amount - n_blocks)) return free_blocks def _request_blocks(self, n_blocks): return self._worst_fit(n_blocks) def get_size(self, dataset_id): ''' Get the amount of blocks in a dataset ''' return self._dataset_table[dataset_id].size def append_data(self, dataset_id, data_block, address, flush=True): ''' Append data to an existing dataset ''' # Check if there is any more allocated space # for the dataset if self._dataset_table[dataset_id].space_left(): # Write data block and increament size self._write_data(address, data_block, flush) self._dataset_table[dataset_id].size+=1 return address def add_dataset(self, dataset_id, dataset, size=None): ''' Add a new dataset to the storage ''' # Add metadata about the dataset if size: current_size = size else: current_size = len(dataset) self._dataset_table[dataset_id] = Dataset(current_size) requested_blocks = self._request_blocks(current_size) assert len(requested_blocks) >= len(dataset) # Write the data blocks to a file block_index = 0 for data_block in dataset: self.append_data(dataset_id, data_block, requested_blocks[block_index], flush=False) self._dataset_table[dataset_id].append_block_index(requested_blocks[block_index]) block_index += 1 try: self.datamap.flush() except: print("Cannot flush data with mmap!") pass def read_data(self, dataset_id, data_queue): ''' Run the execution-queue for a given dataset ''' # Generate a random id (6 characters) data_id = ''.join(random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(6)) dataset = self._dataset_table[dataset_id] self.data_queues[data_id] = data_queue for address in dataset.datablocks: self.job_queue.append((address, data_id)) return dataset.datablocks def reader(self): ''' A reading process, which serves data blocks requests from read_data ''' while True: # Sort the list of jobs by their address jobs = sorted(self.job_queue, key=lambda x: x[0]) try: # Find the job with the closest highest address (address, data_id) = next(x for x in jobs if x[0] >= self._position) # Read the data from disc data = self._read_block(address) # Serve data to the requesting process self.data_queues[data_id].put(data) # Remove the job from the list self.job_queue.remove((address, data_id)) except: # No jobs found. Start from position 0. self._position = 0 time.sleep(0.01)
def generate_data(self, data_dir, tmp_dir, task_id=-1): test = 20 train_paths = self.training_filepaths(data_dir, self.num_shards, shuffled=False) dev_paths = self.dev_filepaths(data_dir, self.num_dev_shards, shuffled=False) test_paths = self.test_filepaths(data_dir, self.num_test_shards, shuffled=True) try_num = 0 if test: try_num = test manager = Manager() lock = manager.Lock() #shared_dict=manager.dict({'current_id':id_init,"current_last_updated":0,"record_num":0,"source_index":0}) def process_files(train_paths, datasets, num_run, shared_dict): total_file_num = len(train_paths) num_per_partition = int(math.floor(total_file_num / num_run)) train_paths_list = [] for i in range(num_run): if i == num_run - 1: train_paths_list.append(train_paths[i * num_per_partition:]) else: train_paths_list.append( train_paths[i * num_per_partition:(i + 1) * num_per_partition]) generator_list = [] for i in range(num_run): generator_list.append( self.generator(data_dir, tmp_dir, datasets, lock, shared_dict, how_many=try_num)) p = [] for i in range(num_run): p.append( Process(target=generator_utils.generate_files, args=(generator_list[i], train_paths_list[i], try_num))) p[i].start() my_logger.error("Time: {} All processes started".format( str(datetime.datetime.now()))) for q in p: q.join() my_logger.error("Time: {} All processes ended".format( str(datetime.datetime.now()))) shared_dict = manager.dict({ 'current_id': id_init, "current_last_updated": 0, "record_num": 0, "source_index": 0 }) num_run = min(self.process_num, self.num_shards) process_files(train_paths, self.train_sources, num_run, shared_dict) if len(self.eval_sources) == 0: generator_utils.shuffle_dataset(train_paths) else: shared_dict["current_id"] = id_init shared_dict["current_last_updated"] = 0 shared_dict["record_num"] = 0 shared_dict["source_index"] = 0 num_run = min(self.process_num, self.num_dev_shards) my_logger.error("Time: {} process dev dataset".format( str(datetime.datetime.now()))) process_files(dev_paths, self.eval_sources, num_run, shared_dict) my_logger.error("Time: {} shuffle dataset".format( str(datetime.datetime.now()))) generator_utils.shuffle_dataset(train_paths + dev_paths) shared_dict["current_id"] = id_init shared_dict["current_last_updated"] = 0 shared_dict["record_num"] = 0 shared_dict["source_index"] = 0 num_run = min(self.process_num, self.num_test_shards) process_files(test_paths, self.test_sources, num_run, shared_dict)
import pandas as pd import json #import numba #from numba import jit, float64 import time import shapely from shapely.geometry import LineString #from multiprocess import Process, Manager, Pool from scipy import sparse import copy from ast import literal_eval import pyrocko from multiprocess import Process, Manager, Pool manager = Manager() # Multiprocess manager shared_d = manager.dict() # shared-memory dictionary # definitions of slip types dip_d = {'Normal-Sinistral' : 70., 'Sinistral' : 90., 'Dextral Normal' : 70., 'Normal' : 50., 'Thrust' : 25., 'Dextral' : 90., # None, not sure how to handle this 'Sinistral-Normal' : 70.} rake_d = {'Normal-Sinistral' : -45., 'Sinistral' : 0., 'Dextral Normal' : -135., 'Normal' : -90., 'Thrust' : 90.,
def getSubClusters(self, driver, mainClusterDict, mainClusterNo, zipcode, timeout=120): mode = self.subClusterMode i = mainClusterNo mainClusterUrl = driver.current_url subClusters = self.getClusters(driver) numSubClusters = len(subClusters) logging.info( 'Found {0} subclusters in cluster {1} in zipcode {2}.'.format( numSubClusters, i + 1, zipcode)) clusterDict = mainClusterDict['clusters'][i] clusterDict['numSubClusters'] = numSubClusters count = clusterDict['count'] allListingUrls = [] if mode == 'parallel': manager = Manager() parallelDict = manager.dict() jobs = [] timeouts = [] goodJobs = [] else: seriesDict = dict() for j in range(len(subClusters)): if ('subClusters' in clusterDict.keys()) and \ (j in clusterDict['subClusters'].keys()) and \ (clusterDict['subClusters'][j]['complete']): continue else: if mode == 'parallel': proc = Process( target=self.scrapeSubClusterUrls, args=(parallelDict, mainClusterUrl, i, numSubClusters, j, count)) proc.start() jobs.append(proc) else: self.scrapeSubClusterUrls( seriesDict, mainClusterUrl, i, numSubClusters, j, count) if mode == 'parallel': for j, job in enumerate(jobs): job.join(timeout) if job.is_alive(): job.terminate() timeouts.append(j) logging.info( 'Subcluster {0}.{1} timed out. Had to terminate.'. format(i + 1, j + 1)) else: goodJobs.append(j) parallelDict = dict(parallelDict) clusterDict['subClusters'] = dict() for j in timeouts: clusterDict['subClusters'][j] = self.formatSubClusterDict( False, None, False, None, []) for j in goodJobs: clusterDict['subClusters'][j] = parallelDict[j] else: clusterDict['subClusters'] = seriesDict subClustersDict = clusterDict['subClusters'] subClustersOver350 = [j for j in subClustersDict.keys() if subClustersDict[j]['count'] > 345] numSubClustersOver350 = len(subClustersOver350) subClustersNotClicked = [j for j in subClustersDict.keys() if not subClustersDict[j]['clickable']] numSubClustersNotClicked = len(subClustersNotClicked) for j in subClustersDict.keys(): allListingUrls += subClustersDict[j]['listingUrls'] uniqueUrls = set(allListingUrls) pctObtained = round(len(uniqueUrls) / count * 100.0, 1) clusterDict.update( {'subClustersOver350': subClustersOver350, 'numSubClustersOver350': numSubClustersOver350, 'subClustersNotClicked': subClustersNotClicked, 'numSubClustersNotClicked': numSubClustersNotClicked, 'pctObtained': pctObtained, 'listingUrls': uniqueUrls}) return
def solve_optimization_problem(self): sorted_vehicles = self.sorted_vehicles obs_infos = self.obs_infos old_ey = self.old_ey old_direction_flag = self.old_direction_flag bezier_xcurvs = self.bezier_xcurvs bezier_funcs = self.bezier_funcs xcurv_ego = self.xcurv_ego num_horizon = self.racing_game_param.num_horizon_planner num_veh = len(self.sorted_vehicles) ego = self.vehicles[self.agent_name] veh_length = ego.param.length veh_width = ego.param.width track = self.track safety_margin = 0.15 manager = Manager() dict_traj = manager.dict() dict_solve_time = manager.dict() dict_cost = manager.dict() list_opti = [] for index in range(num_veh + 1): list_opti.append( Process( target=self.generate_traj_per_region, args=( index, dict_traj, dict_solve_time, dict_cost, ), )) for index in range(num_veh + 1): list_opti[index].start() for index in range(num_veh + 1): list_opti[index].join() costs = [] solution_xvar = np.zeros((num_veh + 1, X_DIM, num_horizon + 1)) solve_time = np.zeros(num_veh + 1) for index in range(num_veh + 1): solution_xvar[index, :, :] = dict_traj[index] costs.append(dict_cost[index]) solve_time[index] = dict_solve_time[index] cost_selection = [] for index in range(num_veh + 1): cost_selection.append(0) for index in range(num_veh + 1): cost_selection[index] = -10 * (solution_xvar[index, 4, -1] - solution_xvar[index, 4, 0]) if index == 0: pass else: name = sorted_vehicles[index - 1] obs_traj = obs_infos[name] for j in range(num_horizon + 1): while obs_traj[4, j] > track.lap_length: obs_traj[4, j] = obs_traj[4, j] - track.lap_length diffs = solution_xvar[index, 4, j] - obs_traj[4, j] diffey = solution_xvar[index, 5, j] - obs_traj[5, j] if diffs**2 + diffey**2 - veh_length**2 - veh_width**2 >= 0: cost_selection[index] += 0 else: cost_selection[index] += 100 if index == num_veh: pass else: name = sorted_vehicles[index] obs_traj = obs_infos[name] for j in range(num_horizon + 1): while obs_traj[4, j] > track.lap_length: obs_traj[4, j] = obs_traj[4, j] - track.lap_length diffs = solution_xvar[index, 4, j] - obs_traj[4, j] diffey = solution_xvar[index, 5, j] - obs_traj[5, j] if diffs**2 + diffey**2 - veh_length**2 - veh_width**2 >= 0: cost_selection[index] += 0 else: cost_selection[index] += 100 if old_direction_flag is None: pass elif old_direction_flag == index: pass else: cost_selection[index] += 100 direction_flag = cost_selection.index(min(cost_selection)) traj_xcurv = solution_xvar[direction_flag, :, :].T return traj_xcurv, direction_flag, solve_time, solution_xvar