def split_4d_for_all_pat(files_paths, split_folder): p = pool.Pool(8) p.map( split_4d_parallel, zip(files_paths, [split_folder] * len(files_paths), [None] * len(files_paths)))
if __name__=='__main__': import glob # file = glob.glob("C:/Users/lily/Downloads/aliwood_product_dataset/aliwood_product_dataset/*")[1:] file = glob.glob('/home/liuchang/TaobaoItem/results/*/') import os.path import pickle import sys import time import math worker=32 pool=pool.Pool(worker) le_dic={} lfile=open('/home/liuchang/PA/length','rb') lfile = pickle.load(lfile) for i in range(len(lfile)//2): le_dic[lfile[2*i]] = lfile[2*i+1] for material_dir in file: results=[] dynamic_list=[] count=0 id = material_dir.split('/')[-2] print(id) S = le_dic[material_dir] print(S) file_list = get_all_image(material_dir)
def create_vm_tpu_pair(vm_name, tpu_name, reuse_if_exists=True, skip_confirmation=False): """Create a VM and paired TPU instance. Args: vm_name: str, name for VM. tpu_name: str, name for TPU instance. reuse_if_exists: bool, if True, this will act as a get or create. If False and vm_name or tpu_name already exists, will error. skip_confirmation: bool, whether to skip launch confirmations. Returns: tuple: (vm_ip, tpu_ip) Raises: ValueError: if instance exists but reuse_if_exists=False. """ vm_info = list_vm_names_and_ips() tpu_info = list_tpu_names_and_ips() vm_names = list(zip(*vm_info))[0] if vm_info else [] tpu_names = list(zip(*tpu_info))[0] if tpu_info else [] make_vm = False vm_ip = None if vm_name in vm_names: if not reuse_if_exists: raise ValueError("VM %s already exists and reuse_if_exists=False" % vm_name) tf.logging.info("VM %s already exists, reusing.", vm_name) vm_ip = vm_info[vm_names.index(vm_name)][1] else: print("Creating VM %s" % vm_name) if not skip_confirmation: assert confirm() make_vm = True make_tpu = False tpu_ip = None if tpu_name in tpu_names: if not reuse_if_exists: raise ValueError( "TPU instance %s already exists and reuse_if_exists=False" % tpu_name) tf.logging.info("TPU %s already exists, reusing.", tpu_name) tpu_ip = tpu_info[tpu_names.index(tpu_name)][1] else: print("Creating TPU instance %s" % tpu_name) if not skip_confirmation: assert confirm() make_tpu = True # Create VM and TPU in parallel pool = mp.Pool(2) vm_res = None tpu_res = None if make_vm: vm_res = pool.apply_async(create_vm, (vm_name, )) if make_tpu: tpu_res = pool.apply_async(create_tpu, (tpu_name, tpu_info)) if vm_res is not None: vm_ip = vm_res.get() if tpu_res is not None: tpu_ip = tpu_res.get() tf.logging.info("VM (Name, IP): %s, %s", vm_name, vm_ip) tf.logging.info("TPU (Name, IP): %s, %s", tpu_name, tpu_ip) tf.logging.info("To delete the VM, run: %s", Gcloud.DELETE_VM.format(name=vm_name)) tf.logging.info("To delete the TPU instance, run: %s", Gcloud.DELETE_TPU.format(name=tpu_name)) return vm_ip, tpu_ip
"depth": depth_map, 'deg_reduce': deg_reduce }, do_compression=False, ) parser = argparse.ArgumentParser() parser.add_argument("--datadir", dest='datadir', default=osp.join(DATA_ROOT, 'cityscapes')) parser.add_argument("--outname", default='offset_gt/dt_offset') parser.add_argument('--split', nargs='+', default=['val', 'train']) parser.add_argument("--ksize", type=int, default=5) parser.add_argument('--metric', default='euc', choices=['euc', 'taxicab']) args = parser.parse_args() ksize = args.ksize sobel_x, sobel_y = (sobel_kernel((ksize, ksize), i) for i in (0, 1)) sobel_ker = torch.cat([sobel_y, sobel_x], dim=0).view(2, 1, ksize, ksize).float() for dataset in args.split: indir = osp.join(args.datadir, dataset, 'label') outdir = osp.join(args.datadir, dataset, args.outname) os.makedirs(outdir, exist_ok=True) args_to_apply = [(indir, outdir, osp.basename(basename)) for basename in glob(osp.join(indir, "*.png"))] mpp.Pool(processes=mp.cpu_count() // 2).map(process, args_to_apply)
# t = Thread(target=loop) # # t = Process(target=loop) # print "the index is: ", index, time.time() # t.start() # while True: # pass import time # start_time = time.time() # my_list = range(100000000) # result = [f(x) for x in my_list] # end_time = time.time() # print "before: ", end_time - start_time from multiprocessing import pool p = pool.Pool(4) # lst = range(100000000) # p.map(f, lst) # print "after: ", time.time() - end_time my_parameter_list = [(index, index + 1) for index in range(100)] start_time = time.time() result = p.map(my_add, my_parameter_list) # # result = p.starmap(local_add, my_parameter_list) print(result) print time.time() - start_time # print "*****************************************************" # p = Pool(4) # result = p.starmap(local_add, my_parameter_list) # 3.3 version new characteristic # print result
) + '/stuffthingmaps/train2017/' # 在当前目录下新建文件夹,用于存储map中间结果图 if not os.path.exists(stuffmap_file_path): os.makedirs(stuffmap_file_path) # 多层目录 IMAGE_DIR = Path("./map/") # 要处理的map图片目录 im_files = [f for f in IMAGE_DIR.iterdir()] # 进度条 w = progressbar.widgets widgets = [ 'Progress: ', w.Percentage(), ' ', w.Bar('#'), ' ', w.Timer(), ' ', w.ETA(), ' ', w.FileTransferSpeed() ] progress = progressbar.ProgressBar(widgets=widgets) def write_image(im_file): img_cv = cv2.imread(str(im_file), 0) cv2.imwrite(stuffmap_file_path + str(im_file.stem) + ".png", img_cv) myPool = pool.Pool(processes=4) # 并行化处理 for im_file in progress(im_files): myPool.apply_async(func=write_image, args=(im_file, )) myPool.close() myPool.join()
def main(): parser = argparse.ArgumentParser( description="Evaluate algorithm based on features") parser.add_argument('--model_name', required=True, nargs='+', help='Model Name') parser.add_argument('--root', default="../data/ALISC", help="Root folder of all data. default in ../data/") parser.add_argument('--weights', nargs='+', help="Weights for each model in multiple-model case") c = vars(parser.parse_args()) c = vars(parser.parse_args()) model_names = c['model_name'] # read groundtruth DATA_ROOT = c['root'] print("1. Reading groudtruth...") valid_fn = os.path.join(DATA_ROOT, 'eval_tags/valid_image.txt') valid_ids, gt = read_alisc_groundtruth(valid_fn) dis_mats = [] for model_name in model_names: print("**********************Model {}********************".format( model_name)) print("2. Reading features for query images for model [{}]...".format( model_name)) valid_features = np.load(os.path.join( DATA_ROOT, 'query_features', model_name, 'feature.npy' )) print("3. Reading features for eval images for model [{}]...".format( model_name)) eval_ids, eval_features = read_feature( os.path.join(DATA_ROOT, 'eval_features', model_name)) print("4. Calculating Distances...") distance_type = 'cosine' dis_mat_ = calculate_distance_mat( valid_features, eval_features, distance_type=distance_type) dis_mats.append(dis_mat_) if c['weights'] is not None: weights = [float(x) for x in c['weights']] else: # take average weights weights = np.ones(len(model_names)) / len(model_names) print "Wegiths", weights # merge distance mats dis_mat = np.zeros(dis_mats[0].shape) for i in range(len(weights)): dis_mat += weights[i] * dis_mats[i] return_lists = {} knn = [] aps = [] print("5. Evaluate each images...") valid_MAP = 0 pool = mp.Pool(16) results = [ pool.apply_async(search_k_smallest, args=( dis_mat[i, ...].ravel(), 20) ) for i in range(len(valid_ids))] for r in results: knn.append([x[0] for x in r.get()]) for i in range(len(knn)): valid_id = valid_ids[i] top_k = [eval_ids[x] for x in knn[i]] return_lists[valid_id] = top_k ap = eval_ap(top_k, gt[i]) print("ImageID: {} / AP = {}".format(valid_id, ap)) valid_MAP += ap aps.append(ap) valid_MAP /= len(valid_ids) # save results with open('./{}_MAP.txt'.format("_".join(model_names)), 'w') as fp: fp.write('\n'.join([str(x) for x in aps])) fp.write('\n') fp.write('MAP: ') fp.write(str(valid_MAP)) print("MAP= ", valid_MAP) with open('./{}_list.txt'.format("_".join(model_names)), 'w') as fp: # pickle.dump(return_lists, fp) for key, value in return_lists.iteritems(): fp.write('{},{}\n'.format(key, ';'.join(value)))
def get_pool(processes=None, initializer=None, initargs=(), maxtasksperchild=None): return pool.Pool(processes, initializer, initargs, maxtasksperchild)
def get_pool(): return pool.Pool(1, init_pool)
if __name__ == '__main__': # uncomment next lines to set the random seed static instead of different on every run # seed = 7857863 # some integer # random.seed(a=seed) # np.random.seed(seed=seed) ns = [10, 20, 50, 100, 200, 400, 1000, 2000, 4000] m = 100 for n in ns: w = np.random.pareto(a=1.5, size=n) w.sort() func = partial(worker, w=w) p = pool.Pool(processes=10) degrees = np.array(p.map(func, range(m))) avg = np.zeros((n, ), dtype=np.float) var = np.zeros((n, ), dtype=np.float) for i in range(n): dis = degrees[:, i] avg[i] = np.average(dis) var[i] = np.var(dis) plt.figure() for i in range(n): dis = degrees[:, i] c = Counter(dis) maxval = max(c.values()) counts = np.array([(k, v) for (k, v) in c.items()])
def cesareans_output(): #pull 'birth_month' from input field and store it review_text = request.args.get('text_review') city_state_dict = [dict(city=city) for city in city_state_list] city, state = request.args.get('sel_city').split(",") # SearchBusinesses(review) top_n = 5 # Number of topics to choose for top of list. rev_topic = np.array(vectorsearch.GetDocTopic(review_text)) # Get the top few topics for this review. top_n_topics = rev_topic.argsort()[-top_n:][::-1] # print rev_topic # Print the topic vector. with open("query_history.txt", "a") as myfile: myfile.write("\n!@# " + review_text) bus_ids_in_city_state = get_bus_ids_city_state(city.strip(), state.strip()) topic_listings = [" ".join(vectorsearch.GetTopicWords(topic, )) for topic in top_n_topics] start = time.time() #top_bus_id, top_bus_sim = vectorsearch.FindBusinessSimilarityLDA(rev_topic, business_ids=bus_ids_in_city_state, method='Hel', top_n=30) top_bus_id, top_bus_sim = vectorsearch.FindBusinessSimilaritydoc2vec(review_text, bus_ids_in_city_state, top_n=50) print "Similarity took", time.time()-start, "seconds" #print topic_listings # Check that the names are not already included. names, valid_biz, biz_sims = [], [], [] for i_bus, bus_id in enumerate(top_bus_id): name = df_businesses.name[df_businesses.business_id==bus_id].values[0] if name not in names: names.append(name) valid_biz.append(bus_id) biz_sims.append(top_bus_sim[i_bus]) print name, top_bus_sim[i_bus] top_bus_id = valid_biz top_bus_sim = biz_sims # Visualize the search query..... img_path_query = '/images/insight/query_'+str(uuid.uuid4()) + '.png' #vectorsearch.visualize_topic(rev_topic, num_topics=top_n, save_path='/home/carlson/web/'+img_path_query) # Find the top businesses. top_businesses = [] words_paths = [] for i, bus_id in enumerate(top_bus_id[:20]): # This is the full topic array for the business. bus_topic_vec = vectorsearch.bus_lda_topics[vectorsearch.bus_lda_topics.business_id==bus_id].topic_vector.values[0] img_path = '/images/insight/'+bus_id+'.png' #print 'Generating image ', img_path lat = df_businesses.latitude[df_businesses.business_id==bus_id].values[0] lon = df_businesses.longitude[df_businesses.business_id==bus_id].values[0] URL = df_businesses.URL[df_businesses.business_id==bus_id].values[0] image_URL = df_businesses.image_URL[df_businesses.business_id==bus_id].values[0] words = bus_reviews[bus_id] words_paths.append((words, img_path)) #vectorsearch.visualize_topic(bus_topic_vec, num_topics=top_n, save_path='/home/carlson/web/'+img_path, top_topics=top_n_topics) # Append to list that gets passed to web page... top_businesses.append(dict(bus_id=bus_id, similarity=top_bus_sim[i], image_path='http://planck.ucsc.edu/'+img_path, bus_name="%i. "%(i+1) + df_businesses.name[df_businesses.business_id==bus_id].values[0], lat=lat, lon=lon, URL=URL, image_URL=image_URL)) # Generate word clouds p = pool.Pool(12) p.map(gen_word_cloud, words_paths) p.close() p.join() centroid_lat = np.average([biz['lat'] for biz in top_businesses]) centroid_lon = np.average([biz['lon'] for biz in top_businesses]) # Generate map.... map_path = img_path[:-4]+'.html' print "\nPATH TO MAP, lat, lon", map_path, '\n', centroid_lat, centroid_lon map_osm = folium.Map(location=[centroid_lat, centroid_lon], zoom_start=13, detect_retina=True, tiles='stamentoner', attr='Map tiles by <a href="http://stamen.com">Stamen Design</a>, under <a href="http://creativecommons.org/licenses/by/3.0">CC BY 3.0</a>. Data by <a href="http://openstreetmap.org">OpenStreetMap</a>, under <a href="http://creativecommons.org/licenses/by-sa/3.0">CC BY SA</a>.') # map_osm = folium.Map(location=[centroid_lat, centroid_lon], zoom_start=13, detect_retina=True, # tiles='http://{s}.basemaps.cartocdn.com/dark_all/{z}/{x}/{y}.png', attr='© <a href="http://www.openstreetmap.org/copyright">OpenStreetMap</a> © <a href="http://cartodb.com/attributions">CartoDB</a>') # map_osm.add_tile_layer(tile_url='http://tile.stamen.com/toner-labels/{z}/{x}/{y}.png', attr='labels', # active=True, overlay=True) for business in top_businesses[:]: html = r'''<div align="center"> <font size="4"><a href="'''+business['URL'] +'''"> <b>'''+business['bus_name']+'''</b></a></font> <br><img src="'''+business['image_path']+'''" alt="NOPE" style="width:250px;height:125px;"></div>''' iframe = folium.element.IFrame(html=html,width=300,height=175) popup = folium.Popup(html=iframe) icon = folium.Icon(color="blue", icon="ok") marker = folium.Marker(location=[business['lat'], business['lon']], popup=popup, icon=icon) map_osm.add_children(marker) heatmap_events = [(df_businesses.latitude[df_businesses.business_id==bus_id].values[0], df_businesses.longitude[df_businesses.business_id==bus_id].values[0], -top_bus_sim[i]+top_bus_sim[0]) for i, bus_id in enumerate(top_bus_id)] lats = sims_array = np.array(heatmap_events)[:,0] lons = sims_array = np.array(heatmap_events)[:,1] sims_array = np.array(heatmap_events)[:,2] scale = top_bus_sim[6]-top_bus_sim[0] sims_array = ((1-1/(np.exp(sims_array/scale)+1))*50).astype(np.int32) heatmap = [] for i, sim in enumerate(sims_array): for j in range(sim): heatmap += [[lats[i]+.00001*j, lons[i]]] map_osm.add_children(plugins.HeatMap(heatmap, max_zoom=18, radius=25, max_val=20)) map_osm.save('/home/carlson/web/'+map_path) append_mousemove_js('/home/carlson/web/'+map_path) return render_template("output.html", review_text=review_text, topic_listings=topic_listings, top_businesses=top_businesses, image_path_query='http://planck.ucsc.edu/'+img_path_query, map_path='http://planck.ucsc.edu/'+map_path, city_state_list=city_state_dict)
from .bookkeeping import PointIds, InMemorySessionInterface as IMSI from .schemas import oms as schema app = flask.Flask(__name__) # For production deployment: generate a different one via Python's `os.urandom` # and store it in a safe place. # See: http://flask.pocoo.org/docs/0.11/quickstart/#sessions app.secret_key = b"DON'T USE THIS IN PRODUCTION! " + b'\xdb\xcd\xb4\x8cp' app.session_interface = IMSI() # Set up a pool of workers to which jobs can be submitted and a dictionary # which stores the asynchronous result objects. app.workers = mpp.Pool(4) app.results = {} ##### Utility Functions ####################################################### # # Some functions used throughout this module (and maybe even elsewhere.) # # This should probably go into it's own module but I'm putting it all here for # now, as some parts need to stay in this module while some parts can be # factored out later. The 'factoring out' part can be considered an open TODO. # ############################################################################### def xml_response(template): response = flask.make_response(template) response.headers['Content-Type'] = 'text/xml'
simple_shp_2 = New_shp(boundbox[0], boundbox[1], boundbox[2], boundbox[3], boundbox[4], boundbox[5], boundbox[6], boundbox[7], boundbox[8]) shp_2 = BRepAlgoAPI_Cut(shp, simple_shp_2).Shape() converter.export_shape_to_svg(shape=shp_2, filename=MotherDic + item + ".svg", proj_ax=converter.DIRS["2"], scale=sc) return 1 except Exception as re: shutil.rmtree(MotherDic) print(MotherDic + "has been removed") print(fname + ' failed, due to: {}'.format(re)) return 0 p = pool.Pool(processes=args.n_cores) f = partial(Generate_task) t0 = time.time() mask = p.map(f, fnames) Mask = np.asarray(mask) label_valid = np.delete(label, np.where(Mask == 0)) label_valid = [int(i) for i in label_valid] dirs_valid = np.delete(np.array(dirs), np.where(Mask == 0)) Answer = dict(zip(dirs_valid, label_valid)) fname_answer = os.path.join(pathwrite, 'answer.json')
def optimize_population(N, sigmas, fixed_args, gamma_init=1. / 250., parallel=True): """ Function for optimizing the population :param N: int Number of neurons. :param sigmas: np.array White noise amplitude values (will not be optimized). :param fixed_args: list All fixed parameters of the optimization procedure. :param gamma_init: float Initial value of process jump rate (kHz). Default=1. / 250. :param parallel: bool Whether one wants to parallelize the procedure (approximate, but much faster) or do updates sequentially (slow, but less approximative). Default=True :return: list Optimal model variables. """ proc_params = gamma_init, 0., 1. pop_params = np.zeros(N), -6.5 * np.ones(N), sigmas model_variables = proc_params, pop_params print('Optimize Mu') if parallel: #num_cpu = 10 num_cpu = np.amin([N, int(np.floor(.9*cpu_count()))]) p = pool.Pool(num_cpu) mu_results = p.map(partial(parallel_simplex_mu_fit, model_variables=model_variables, fixed_args=fixed_args), range(N)) p.close() proc_params, pop_params = model_variables Cs, mus, sigmas = pop_params mus = np.array(mu_results) pop_params = Cs, mus, sigmas model_variables = proc_params, pop_params else: for ineuron in range(N): print('Neuron %d' %ineuron) opt_res = minimize_scalar(simplex_mu_fit_wrapper_mllk, bracket=[-6.5,-4.], bounds=[-8.,-3.], method='brent', args=(ineuron, model_variables, fixed_args), options={'xtol': 1e-3}) opt_mu = opt_res.x proc_params, pop_params = model_variables Cs, mus, sigmas = pop_params mus[ineuron] = opt_mu pop_params = Cs, mus, sigmas model_variables = proc_params, pop_params print('Optimize C/gamma shared') converged = False mllk_cur = -np.inf opt_shared_C = .5 opt_gamma = 1./500. while not converged: mllk_old = mllk_cur print('Optimize C') if parallel: #num_cpu = 10 num_cpu = np.amin([N, int(np.floor(.9 * cpu_count()))]) p = pool.Pool(num_cpu) Cs_fit = p.map(partial(parallel_simplex_C_fit, C_init=opt_shared_C, model_variables=model_variables, fixed_args=fixed_args), range(N)) p.close() else: Cs_fit = np.empty(N) for ineuron in range(N): print('Neuron %d' % ineuron) opt_res = minimize_scalar(simplex_C_fit_wrapper_mllk, bracket=[.5 * opt_shared_C, 2. * opt_shared_C], bounds=[0., 2.], method='brent', args=(ineuron, model_variables, fixed_args), options={'xtol': 1e-3}) opt_C = opt_res.x Cs_fit[ineuron] = opt_C proc_params, pop_params = model_variables Cs, mus, sigmas = pop_params pop_params = np.array(Cs_fit), mus, sigmas model_variables = proc_params, pop_params print('Optimize tau') opt_res = minimize_scalar(simplex_gamma_fit_wrapper_mllk, bracket=[.5*opt_gamma, 2.*opt_gamma], bounds=[1./2e3, 1./50.], method='brent', args=(model_variables, fixed_args), options={'xtol': 1e-3}) opt_gamma = opt_res.x mllk_cur = -opt_res.fun proc_params, pop_params = model_variables gamma_jump, x_bar_jump, zeta_jump = proc_params proc_params = opt_gamma, x_bar_jump, zeta_jump model_variables = proc_params, pop_params convergence = -(mllk_cur - mllk_old)/mllk_cur converged = convergence < 1e-3 print('Optimize mu and C') if parallel: #num_cpu = 10 num_cpu = np.amin([N, int(np.floor(.9 * cpu_count()))]) p = pool.Pool(num_cpu) mus_Cs_fit = p.map(partial(parallel_simplex_mu_C_fit, model_variables=model_variables, fixed_args=fixed_args), range(N)) p.close() mus_fit, Cs_fit = np.empty(N), np.empty(N) for ineuron in range(N): mus_fit[ineuron] = mus_Cs_fit[ineuron][0] Cs_fit[ineuron] = mus_Cs_fit[ineuron][1] else: mus_fit, Cs_fit = np.empty(N), np.empty(N) for ineuron in range(N): print('Neuron %d' % ineuron) init_variables = np.array([-4, .5]) initial_simplex = np.array([[-3., .1], [-7., .5], [-5., .7]]) opt_res = minimize(simplex_mu_C_fit_wrapper_mllk, x0=init_variables, method='Nelder-Mead', args=(ineuron, model_variables, fixed_args), tol=1e-3, options={ 'initial_simplex': initial_simplex}) opt_mu, opt_C = opt_res.x[0], opt_res.x[1] mus_fit[ineuron] = opt_mu Cs_fit[ineuron] = opt_C proc_params, pop_params = model_variables Cs, mus, sigmas = pop_params pop_params = np.array(Cs_fit), np.array(mus_fit), sigmas model_variables = proc_params, pop_params return model_variables
def main(): coco_output = { "info": INFO, "licenses": LICENSES, "categories": CATEGORIES, "images": [], "annotations": [] } image_id = 1 annotation_id = 1 im_files = [f for f in IMAGE_DIR.iterdir()] im_files.sort(key=lambda f: f.stem, reverse=True) an_files = [f for f in ANNOTATION_DIR.iterdir()] an_files.sort(key=lambda f: f.stem, reverse=True) assert len(an_files) == len(im_files), \ "#images does not equal to #labels, please run diff_two_folder.py,and delete the mis-match file." for im_file, an_file in zip(im_files, an_files): image = Image.open(im_file) im_info = pycococreatortools.create_image_info(image_id, im_file.name, image.size) coco_output['images'].append(im_info) myPool = pool.Pool(processes=16) annotation_info_list = [] with open(an_file, 'r') as f: datas = json.load(f) for i in range(len(datas)): data = datas[i] # print(data) bounding_box = get_info(data)[0] segmentation = get_info(data)[1] class_id = 1 print(bounding_box, segmentation) area = bounding_box[-1] * bounding_box[-2] an_infos = pycococreatortools.mask_create_annotation_info( annotation_id=annotation_id, image_id=image_id, category_id=class_id, area=area, image_size=image.size, bounding_box=bounding_box, segmentation=segmentation) annotation_info_list.append(an_infos) annotation_id += 1 myPool.close() myPool.join() for annotation_info in annotation_info_list: if annotation_info is not None: coco_output['annotations'].append(annotation_info) image_id += 1 print("[INFO]: Saving annotations") output_json = Path(RESULT_JSON_DIR) with output_json.open('w', encoding='utf-8') as f: json.dump(coco_output, f) print("[INFO]: Annotations JSON file saved in:", str(output_json))
def ga(): # STUDENT Feel free to play with this parameter pop_limit = 240 # Code to parallelize some computations batches = os.cpu_count() if pop_limit % batches != 0: print("It's ideal if pop_limit divides evenly into " + str(batches) + " batches.") batch_size = int(math.ceil(pop_limit / batches)) with mpool.Pool(processes=os.cpu_count()) as pool: init_time = time.time() # STUDENT (Optional) change population initialization population = [ Individual.random_individual() if random.random() < 0.9 else Individual.empty_individual() for _g in range(pop_limit) ] # But leave this line alone; we have to reassign to population because we get a new population that has more cached stuff in it. population = pool.map(Individual.calculate_fitness, population, batch_size) init_done = time.time() print("Created and calculated initial population statistics in:", init_done - init_time, "seconds") generation = 0 start = time.time() now = start print("Use ctrl-c to terminate this loop manually.") try: while True: now = time.time() # Print out statistics if generation > 0: best = max(population, key=Individual.fitness) print("Generation:", str(generation)) print("Max fitness:", str(best.fitness())) print("Average generation time:", (now - start) / generation) print("Net time:", now - start) print() with open("levels/last.txt", 'w') as f: for row in best.to_level(): f.write("".join(row) + "\n") generation += 1 # STUDENT Determine stopping condition stop_condition = False if stop_condition: break # STUDENT Also consider using FI-2POP as in the Sorenson & Pasquier paper gentime = time.time() next_population = generate_successors(population) gendone = time.time() print("Generated successors in:", gendone - gentime, "seconds") # Calculate fitness in batches in parallel next_population = pool.map(Individual.calculate_fitness, next_population, batch_size) popdone = time.time() print("Calculated fitnesses in:", popdone - gendone, "seconds") population = next_population except KeyboardInterrupt: pass return population
def lookup(self, words, batch=1000, epochs=10000, skip=True, threads=None): """ 查找单词的词向量,如果没有找到,则词向量被随机赋予一个向量 :param words: 词集, 必须是list格式 :param batch: 每个周期读取文件的行数 :param epochs: 查找周期数 :param skip: 是否跳过第一个词(第一个词可能是占位词) :param threads: 并行线程数,只有fast_mode下才有效,如果是None,则线程数等于cpu的线程数 :return: """ self.skip = skip if not isinstance(words, list): raise TypeError("words must be list type.") self.words_dict = {word: index for index, word in enumerate(words)} if not self.fast_mode: words = set(words) vectors = [[] for i in range(len(words))] self._build() left_num = 0 flag = 0 if skip: left_num = 1 for epoch in range(epochs): lines = [[] for i in range(batch)] for bat in range(batch): line = self.filehead.readline() if line: lines[bat] = line else: lines = lines[:bat] flag = 1 print("Epoch: {}, The End of File, So Break.".format( epoch)) break vector_ = self._process(lines) """for word in words: try: vectors[self.words[word]] = vector_[word] except KeyError: left_words.append(word)""" words_int = words.intersection(vector_.keys()) words.difference_update(words_int) for word in words_int: vectors[self.words_dict[word]] = vector_[word] if len(words) <= left_num: print("Epoch: {}, All Words Are Mapped, So Break.".format( epoch)) print("All words are mapped.") return vectors if flag == 1: vectors = self._free_padding(vectors) return vectors if ((epoch + 1) * batch) % 100000 == 0: print("Epoch: {}/{}, Complete.".format(epoch + 1, epochs)) print("Epoch is Enough, So Return.") vectors = self._free_padding(vectors) return vectors else: # fast_mode模式,用并行运算,分文件进行查找 # 首先,对待查询的words,分成以split_label中各符号为首字母的单词字典 # 其中, 字典的value值也是一个字典,key为单词,value为该单词在words中的index prepro = PreprocessVector(need_pro=self.need_pro) prepro.save_path(self.preprocessed_vector_path) prepro.get_path(filename=self.filename, path=self.path) prepro.subfile_name(self.split_label) prepro.process() vectors = [[] for i in range(len(words))] words = self._split() self._build() if threads is None: threads = cpu_count() p = pool.Pool(threads) labels = [label for label in self.split_label.keys()] words_dicts = [words[label] for label in labels] batch_bag = [batch for i in range(len(labels))] epochs_bag = [epochs for i in range(len(labels))] skip_bag = [skip for i in range(len(labels))] vectors_dicts = p.map( self._swap, zip(labels, words_dicts, batch_bag, epochs_bag, skip_bag)) p.close() for vectors_dict in vectors_dicts: for key in vectors_dict.keys(): vectors[key] = vectors_dict[key] return self._free_padding(vectors)
def main(): # coco lable文件(如training2017.json)需要存储的信息 coco_output = { "info": INFO, "licenses": LICENSES, "categories": CATEGORIES, "images": [], "annotations": [] } # 初始化id(以后依次加一) image_id = 1 annotation_id = 1 # 加载图片信息 im_files = [f for f in IMAGE_DIR.iterdir()] im_files.sort(key=lambda f: f.stem, reverse=True) # 排序,防止顺序错乱、数据和标签不对应 # print("im-length:",len(im_files),"\n im_files:",im_files) myPool = pool.Pool(processes=4) # 并行化处理 for im_file in im_files: # 写入图片信息(id、图片名、图片大小),其中id从1开始 image = Image.open(im_file) im_info = pycococreatortools.create_image_info(image_id, im_file.name, image.size) # 图片信息 coco_output['images'].append(im_info) # 存储图片信息(id、图片名、大小) annotation_info_list = [] # 存储标注信息 # 用于制作stuff-thing map img_cv = cv2.imread(str(im_file)) #调用opencv读取,方便后面使用opencv绘制mask、保存结果 rectangle = np.zeros(img_cv.shape[0:3], dtype="uint8") # 新建空白图像 # 使用白色填充图片区域,默认为0-黑色,255-白色 rectangle.fill(125) # 125 灰色 # 处理label信息, 包括左上角、右下角、四个角点(用于分割) bounding_box, segmentation = get_info(im_file) class_id = 1 # id 为数字形式,如 1,此时是list形式,后续需要转换 # 指定为1,因为只有”是车牌“这一类 # 显示日志 print(bounding_box, segmentation) # 制作stuff-thing map color = random_color(class_id) # 得到当前类别的颜色,保证每幅图像里每一类的颜色都相同 make_seg_mask(rectangle, segmentation, color) # area = bounding_box[-1] * bounding_box[-2] # 当前bounding-box的面积,宽×高 area = compute_polygon_area( segmentation) # 当前segmentation的面积(比bounding box更精确) myPool.apply_async(func=pycococreatortools.mask_create_annotation_info, args=(annotation_id, image_id, class_id, area, image.size, bounding_box, segmentation), callback=annotation_info_list.append) # an_infos = pycococreatortools.mask_create_annotation_info(annotation_id=annotation_id, image_id=image_id, # category_id=class_id, area=area, # image_size=image.size, bounding_box=bounding_box, # segmentation=segmentation) # annotation_info_list.append(an_infos) cv2.imwrite(file_path + str(im_file.stem) + ".png", rectangle) # 上面得到单张图片的所有bounding-box信息,接下来每单张图片存储一次 for annotation_info in annotation_info_list: if annotation_info is not None: coco_output['annotations'].append(annotation_info) image_id += 1 myPool.close() myPool.join() # 保存成json格式 print("[INFO] Storing annotations json file...") output_json = Path(f'ccpd_annotations.json') with output_json.open('w', encoding='utf-8') as f: json.dump(coco_output, f) print("[INFO] Annotations JSON file saved in:", str(output_json))
def process(self, batch=10000, encoding='utf-8', sorted=False, threads=None, split_label='&cut&', end_label='000000000', remove=True): """ 进行文件切分 :param batch: 每次读入的文件行数,取适当的值可以最大化利用cpu性能,默认1000,过大会造成cpu空置,过小会浪费硬盘吞吐能力 另外,batch小的时候,可以基本保持源文件中每行的先后顺序, 但是会增加内存的开销 :param encoding: 文件的编码格式 :param sorted: 是否需要对切分后的文件重新排序,以使子文件中每行的顺序与源文件相同,不建议排序,因为耗时较长,且需要重排的 数据不多,切分后的数据已与源文件基本相同,相差不大 :param threads: 线程数,不能大于计算机cpu的线程数,否则反而会拖慢速度。如果不设置,则默认等于cpu的线程数 :param split_label: 当sorted为True时使用,split时切割的标志 :param end_label: line首尾标志,用来标记正确处理的line :param remove: 用于确定是否删除临时文件,仅用于sorted为True时 注意:1.如果sorted为True,则会先将切分后的子文件每行之前加上其在源文件中的排序,然后调用sort函数进行重新排序 2.函数处理速度受硬盘吞吐量的影响,所以建议先用save_path函数将切分文件存在固态硬盘,然后调用copy函数将文件拷回目标文件夹 3.并行过程会随机出现一种错误,写入的line会以随机长度被写入两行,导致后续处理出错,生成的数据不能直接使用,必须处理掉错误 生成的那一部分line,这个错误只有用pool.Pool().apply时才可避免,但是apply的速度是最慢的。 4.引入end_label来标记每一行,只有包含头尾都包含end_label的line才会被认为是正确的格式,进而被处理 """ self.batch = batch self.sorted = sorted self.encoding = encoding self.split_label = split_label self.end_label = end_label if threads is None: threads = cpu_count() start_time = time.time() print( "Start Preprocessing Vectors: This line only shows when 'process' in class PreprocessVector is called." ) if self.need_pro or not self._search(): if not os.path.exists(self.subfile_path): os.makedirs(self.subfile_path) if self.need_pro: dirs = os.listdir(self.subfile_path) for i in dirs: os.remove(self.subfile_path + '/' + i) file = open(self.path + '/' + self.filename, 'r', encoding=encoding) p = pool.Pool(threads) # 为了避免第一行是整个文件的注释,这里做一些处理 tof = file.readline() if len(tof.split()) > 10: # 10是个随机给的值,因为如果是正常的格式,肯定大于10 file.seek(0) tem = [[file.readline() for x in range(self.batch)] for y in range(threads)] subfiles = [[line.strip() for line in lines if line] for lines in tem if lines[0]] start = 0 while len(subfiles) > 0: starts = [] for lines in subfiles: starts.append(start) start = start + len(lines) for f, s in zip(subfiles, starts): p.apply(self._process, args=(f, s)) #p.map_async(self._wrap_p, zip(subfiles, starts)) tem = [[file.readline() for x in range(self.batch)] for y in range(threads)] subfiles = [[line.strip() for line in lines if line] for lines in tem if lines[0]] p.close() #p.join() file.close() print("Preprocessing Operation is Completed. Cost Time is {:.2f}s". format(time.time() - start_time)) if sorted is True: self.sort(encoding=encoding, remove=remove)
return fn(*args) def run_star(args): return run_download(*args) if __name__ == '__main__': kaggle_int = 'kaggle.ini' if not os.path.exists(kaggle_int): print("Please create kaggle.ini first. See kaggle.ini.sample.") exit() competition, destination = read_args() username, password = read_config(kaggle_int) if username == "*****@*****.**" or password == "KAGGLE_PASSWORD": print( "Please setup kaggle.ini using your kaggle username and password.") else: session = login(username, password) data_url_list = get_data_url_by_name(competition) pool = pool.Pool() tasks = [(download, (url, session, destination)) for url in data_url_list] results = pool.map_async(run_star, tasks) results.wait()
def fit(self, model: SupervisedHeterogeneousNodeClassificationModel, dataset: Dataset) -> None: args = self.args self.device = "cpu" if not torch.cuda.is_available( ) or args.cpu else args.device_id[0] self.data = preprocess_dataset(dataset) global graph_pool graph_pool = self.data self.target_type = "def" self.train_target_nodes = self.data.train_target_nodes self.valid_target_nodes = self.data.valid_target_nodes self.test_target_nodes = self.data.test_target_nodes self.types = self.data.get_types() self.criterion = torch.nn.NLLLoss() self.stats = [] self.res = [] self.best_val = 0 self.train_step = 0 self.pool = mp.Pool(args.n_pool) self.st = time.time() self.jobs = prepare_data( args, self.data, self.target_type, self.train_target_nodes, self.valid_target_nodes, self.pool, ) """ Initialize GNN (model is specified by conv_name) and Classifier """ self.gnn = GNN( conv_name=args.conv_name, in_dim=len( self.data.node_feature[self.target_type]["emb"].values[0]), n_hid=args.n_hid, n_heads=args.n_heads, n_layers=args.n_layers, dropout=args.dropout, num_types=len(self.types), num_relations=len(self.data.get_meta_graph()) + 1, prev_norm=args.prev_norm, last_norm=args.last_norm, use_RTE=False, ) if args.use_pretrain: self.gnn.load_state_dict(load_gnn( torch.load(args.pretrain_model_dir)), strict=False) print("Load Pre-trained Model from (%s)" % args.pretrain_model_dir) self.classifier = Classifier(args.n_hid, self.data.y.max().item() + 1) self.model = torch.nn.Sequential(self.gnn, self.classifier).to(self.device) self.optimizer = torch.optim.AdamW(self.model.parameters(), lr=5e-4) if args.scheduler == "cycle": self.scheduler = torch.optim.lr_scheduler.OneCycleLR( self.optimizer, pct_start=0.02, anneal_strategy="linear", final_div_factor=100, max_lr=args.max_lr, total_steps=args.n_batch * args.n_epoch + 1, ) elif args.scheduler == "cosine": self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( self.optimizer, 500, eta_min=1e-6) else: assert False self.train_data = [job.get() for job in self.jobs[:-1]] self.valid_data = self.jobs[-1].get() self.pool.close() self.pool.join() self.et = time.time() print("Data Preparation: %.1fs" % (self.et - self.st)) for epoch in np.arange(self.args.n_epoch) + 1: """ Prepare Training and Validation Data """ train_data = [job.get() for job in self.jobs[:-1]] valid_data = self.jobs[-1].get() self.pool.close() self.pool.join() """ After the data is collected, close the pool and then reopen it. """ self.pool = mp.Pool(self.args.n_pool) self.jobs = prepare_data( self.args, self.data, self.target_type, self.train_target_nodes, self.valid_target_nodes, self.pool, ) self.et = time.time() print("Data Preparation: %.1fs" % (self.et - self.st)) """ Train """ self.model.train() train_losses = [] for ( node_feature, node_type, edge_time, edge_index, edge_type, x_ids, ylabel, ) in train_data: node_rep = self.gnn.forward( node_feature.to(self.device), node_type.to(self.device), edge_time.to(self.device), edge_index.to(self.device), edge_type.to(self.device), ) res = self.classifier.forward(node_rep[x_ids]) loss = self.criterion(res, ylabel.to(self.device)) self.optimizer.zero_grad() torch.cuda.empty_cache() loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.args.clip) self.optimizer.step() train_losses += [loss.cpu().detach().tolist()] self.train_step += 1 self.scheduler.step(self.train_step) del res, loss """ Valid """ self.model.eval() with torch.no_grad(): ( node_feature, node_type, edge_time, edge_index, edge_type, x_ids, ylabel, ) = valid_data node_rep = self.gnn.forward( node_feature.to(self.device), node_type.to(self.device), edge_time.to(self.device), edge_index.to(self.device), edge_type.to(self.device), ) res = self.classifier.forward(node_rep[x_ids]) loss = self.criterion(res, ylabel.to(self.device)) """ Calculate Valid F1. Update the best model based on highest F1 score. """ valid_f1 = f1_score(ylabel.tolist(), res.argmax(dim=1).cpu().tolist(), average="micro") if valid_f1 > self.best_val: self.best_val = valid_f1 # torch.save( # self.model, # os.path.join( # self.args.model_dir, # self.args.task_name + "_" + self.args.conv_name, # ), # ) self.best_model_dict = deepcopy(self.model.state_dict()) print("UPDATE!!!") self.st = time.time() print(( "Epoch: %d (%.1fs) LR: %.5f Train Loss: %.2f Valid Loss: %.2f Valid F1: %.4f" ) % ( epoch, (self.st - self.et), self.optimizer.param_groups[0]["lr"], np.average(train_losses), loss.cpu().detach().tolist(), valid_f1, )) self.stats += [[ np.average(train_losses), loss.cpu().detach().tolist() ]] del res, loss del train_data, valid_data self.model.load_state_dict(self.best_model_dict) best_model = self.model.to(self.device) # best_model = torch.load( # os.path.join( # self.args.model_dir, self.args.task_name + "_" + self.args.conv_name # ) # ).to(self.device) best_model.eval() gnn, classifier = best_model with torch.no_grad(): test_res = [] for _ in range(10): ( node_feature, node_type, edge_time, edge_index, edge_type, x_ids, ylabel, ) = node_classification_sample( self.args, self.target_type, randint(), self.test_target_nodes, {1: True}, ) paper_rep = gnn.forward( node_feature.to(self.device), node_type.to(self.device), edge_time.to(self.device), edge_index.to(self.device), edge_type.to(self.device), )[x_ids] res = classifier.forward(paper_rep) test_acc = accuracy_score(ylabel.tolist(), res.argmax(dim=1).cpu().tolist()) test_res += [test_acc] return dict(Acc=np.average(test_res))
import pickle from multiprocessing import pool data = pd.read_csv( 'all-prediction-matrix.csv' ).values #global matrix accessed by all treads. bad, bad, bad coding practice, but research def compute_disagreement_row_in_upper_triangular( i): #i is the reference column to compute disagreement with right_results = [ np.logical_xor(data[:, i], data[:, j]).sum() for j in range(i + 1, data.shape[1]) ] #only compute for columns on the right of i results = np.zeros(data.shape[1]) results[i + 1:] = right_results[:] #pad with zeros on the left side return i, results if __name__ == "__main__": poo = pool.Pool() res = poo.map(compute_disagreement_row_in_upper_triangular, range(data.shape[1])) results = np.vstack([j for i, j in sorted(res, key=lambda x: x[0]) ]) #sort and combine rows results += results.T #copy upper triangular to lower triangular results = results / float(data.shape[0]) #the fraction of disagreements results = 1. - results #the fraction of agreements pickle.dump(results, open("results.p", "wb"))
print(this_file, 'download finish') def download_file_given_file_name(file_name): url = get_video_url(file_name) ret = requests.get(url) contents = ret.content if ret.status_code == 404: raise ValueError('Stream file missing %s' % (file_name)) file_path = os.path.join(DATA_DIR, file_name) with open(file_path, 'wb+') as f: f.write(contents) def download_file(url, filename): ''' downloads a the contents of the provided url to a local file ''' contents = requests.get(url).content with open(filename, 'wb+') as f: f.write(contents) if __name__ == "__main__": from multiprocessing import pool p = pool.Pool(20) p.map(download_all_videos, [None] * 20) # download_all_videos()
def main(): cli.setup_logging() parser = argparse.ArgumentParser( description='Plot query response time histogram from answers stored ' 'in LMDB') parser.add_argument( '-o', '--output', type=str, default='histogram', help='output directory for image files (default: histogram)') parser.add_argument('-f', '--format', type=str, default='png', help='output image format (default: png)') parser.add_argument('-c', '--config', default='respdiff.cfg', dest='cfgpath', help='config file (default: respdiff.cfg)') parser.add_argument('envdir', type=str, help='LMDB environment to read answers from') args = parser.parse_args() config = cfg.read_cfg(args.cfgpath) servers = config['servers']['names'] dnsreplies_factory = DNSRepliesFactory(servers) with LMDB(args.envdir, readonly=True) as lmdb_: adb = lmdb_.open_db(LMDB.ANSWERS) try: MetaDatabase(lmdb_, servers, create=False) # check version and servers except NotImplementedError as exc: logging.critical(exc) sys.exit(1) with lmdb_.env.begin(adb) as txn: data = load_data(txn, dnsreplies_factory) def get_filepath(filename) -> str: return os.path.join(args.output, filename + '.' + args.format) if not os.path.exists(args.output): os.makedirs(args.output) create_histogram({k: [tup[0] for tup in d] for (k, d) in data.items()}, get_filepath('all'), 'all', config) # rcode-specific queries with pool.Pool() as p: fargs = [] for rcode in range(HISTOGRAM_RCODE_MAX + 1): rcode_text = dns.rcode.to_text(rcode) filepath = get_filepath(rcode_text) fargs.append((data, filepath, rcode_text, config, rcode)) p.starmap(histogram_by_rcode, fargs) filepath = get_filepath('unparsed') histogram_by_rcode(data, filepath, 'unparsed queries', config, None)
#异步 from multiprocessing import pool import time import os def test(): print("---进程池中的进程---pid=%d,ppid=%d" % (os.getpid(), os.getppid())) for i in range(3): print("---%d---" % i) time.sleep(1) return "haha" #args的值为test函数的返回值 def test2(args): print("---callback func--pid=%d" % os.getpid()) print("---callback func--args=%s" % args) po = pool.Pool(3) ''' 当子进程执行完test函数时,唤醒主进程回调test2函数 实现异步操作 ''' po.apply_async(func=test, callback=test2) #callback回调 实现异步 while True: time.sleep(1) print("---主进程---pid=%d" % os.getpid())