def csv_to_solr(fl, endpoint='http://dev-search:8983/solr/main', num_topics=999, reset_callback=None): if reset_callback is not None: print "Resetting (no way back now!)" reset_callback() print 'generating updates' initialize_doc = dict([('topic_%d_tf' % i, {'set': 0}) for i in range(1, num_topics)]) p = Pool(processes=8) line_groupings = [[]] grouping_counter = 0 total_lines = 0 for line in fl: line_groupings[grouping_counter].append(line) if len(line_groupings[grouping_counter]) >= 10000: if grouping_counter == 3: curr_lines = sum(map(len, line_groupings)) total_lines += curr_lines print 'processing line groups for', curr_lines, 'lines', total_lines, 'total' groupings = [(endpoint, initialize_doc, line_groupings[i]) for i in range(0, len(line_groupings))] print p.map_async(process_linegroup, groupings).get() grouping_counter = 0 line_groupings = [[]] else: grouping_counter += 1 line_groupings.append([]) groupings = [(endpoint, initialize_doc, line_groupings[i]) for i in range(0, len(line_groupings))] print p.map_async(process_linegroup, groupings).get() print "Committing..." requests.post('%s/update?commit=true' % endpoint, headers={'Content-type': 'application/json'}) return True
def convert_document(pdf_filename, resolution=200, delete_files=True, page_delineation='\n--------\n', verbose=False, temp_dir=str(uuid.uuid4()),password='', thumb_prefix='thumb_page_', pool_count=2): just_pdf_filename = os.path.basename(pdf_filename) temp_pdf_filename = '{0}/{1}'.format(temp_dir, just_pdf_filename) shutil.copyfile(pdf_filename, temp_pdf_filename) filename = decrypt_pdf(temp_pdf_filename, temp_dir, password) filenames = split_pdf(filename, temp_dir) for filename in filenames: __pdf_filenames.put(filename) pool = Pool() pool.map_async( _yapot_worker, [(tid, just_pdf_filename, temp_dir, resolution) for tid in range(0, pool_count)], ) while __text_filenames.qsize() != len(filenames): time.sleep(1) text_filenames = [] try: while(1): text_filenames.append(__text_filenames.get_nowait()) except: pass text = build_output_text(text_filenames, page_delineation) if delete_files: cleanup_yapot(temp_dir) return text
def filter_wids(wids, refresh=False): p = Pool(processes=8) wids = [x[0] for x in p.map_async(exists, wids).get() if x[1]] if not refresh: wids = [x[0] for x in p.map_async(not_processed, wids).get() if x[1]] return wids
def main(): args = parse_options() viewpoint = args.observation_point viewpoint = Vector(*viewpoint) vertex_count, face_count, lines_count = importutils.analyze_file(args.input_file) print 'Vertices: %d, Primitives: %d' % (vertex_count, face_count) faces = importutils.get_faces(args.input_file) print 'File imported.' pool = Pool(args.jobs) try: result = pool.map_async(geometryutils.build_triangles, faces, 10000) except KeyboardInterrupt: pool.terminate() print 'Program stopped.' return triangles = result.get() triangles = chain.from_iterable(triangles) print 'Triangles generated.' try: process_data = ((t, viewpoint, args.wavelength) for t in triangles) result = pool.map_async(processor.try_process_triangle, process_data) except KeyboardInterrupt: pool.terminate() print 'Program stopped.' return data = result.get() data = filter(lambda x: x, data) print 'Model processed.' processor.write_triangles_data(data, args.output_file) print 'Data written into %s' % args.output_file
def rc(rf, alphabet, numOfThreads): tryn=0 counterTmp = 0 printCounter = 1000 listBasic = [] if rf.endswith('.rar'): funcChosen = unrar elif rf.endswith('.zip') or rf.endswith('.7z') : funcChosen = zipFileUnzip for a in range(1,len(alphabet)+1): for b in itertools.product(alphabet,repeat=a): k="".join(b) k=re.escape(k) listBasic.append(k) tryn+=1 if len(listBasic) == numOfThreads: pool = Pool(numOfThreads) pool.map_async(funcChosen, listBasic, callback = exitPass) pool.close() if resultPass: timeWasted = time.time()-start print 'Found! Password is '+resultPass print "It took " +str(round(time.time()-start,3))+" seconds" print "Speed: "+str(round(tryn/float(timeWasted),2))+" passwords/sec" print "Tried "+str(tryn)+" passwords" exit() listBasic = [] counterTmp+=1 if counterTmp >= printCounter: print 'Trying combination number '+str(tryn)+':'+str(k) timeWasted = round(time.time()-start,2) if timeWasted > 0: print "It took already " +str(timeWasted) +" seconds. Speed: "+str(round(tryn/float(timeWasted),2))+" passwords/sec" counterTmp=0
def _get_images_from_pdf(pdf_filename, resolution, verbose, delete_files, temp_dir, make_thumbs, thumb_size, thumb_dir, thumb_prefix, pool_count=1): success = False try: if verbose == True: print "Splitting PDF into multiple pdf's for processing ..." # make sure there is a place to put our temporary pdfs if not os.path.exists(temp_dir): os.makedirs(temp_dir) # make sure if we are going to make thumbs, the folde rexists if make_thumbs == True: if not os.path.exists(thumb_dir): os.makedirs(thumb_dir) # read input pdf inputpdf = PdfFileReader(open(pdf_filename, "rb")) if inputpdf.getIsEncrypted(): inputpdf.decrypt('') if verbose == True: print "Writing out %i pages ..." % inputpdf.numPages # create all of the temporary pdfs for i in xrange(inputpdf.numPages): output = PdfFileWriter() output.addPage(inputpdf.getPage(i)) #print output.resolvedObjects filename = "{0}/document-page-{1}.pdf".format(temp_dir,i) with open(filename, "wb") as outputStream: output.write(outputStream) __pdf_queue.put(i) if verbose == True: print "Dispatching pdf workers ..." # spin up our workers to convert the pdfs to images #pool_count = 4 pool = Pool() pool.map_async( _pdf_converter_worker, [(x, resolution, verbose, delete_files, temp_dir, make_thumbs, thumb_size, thumb_dir, thumb_prefix) for \ x in range(pool_count)] ) while __pdf_texts.qsize() != inputpdf.numPages: time.sleep(.25) if verbose == True: print "Done converting PDF." success = True except Exception, e: print str(e)
def download(urls, targets, num_proc=1, username=None, password=None, recursive=False, filetypes=None): """ Download the urls and store them at the target filenames. Parameters ---------- urls: iterable iterable over url strings targets: iterable paths where to store the files num_proc: int, optional Number of parallel downloads to start username: string, optional Username to use for login password: string, optional Password to use for login recursive: boolean, optional If set then no exact filenames can be given. The data will then be downloaded recursively and stored in the target folder. filetypes: list, optional list of file extension to download, any others will no be downloaded """ p = Pool(num_proc) # partial function for Pool.map cookie_file = tempfile.NamedTemporaryFile() dlfunc = partial(wget.map_download, username=username, password=password, cookie_file=cookie_file.name, recursive=recursive, filetypes=filetypes) p.map_async(dlfunc, zip(urls, targets)).get(9999999) cookie_file.close()
def correction_terms_threaded(self): '''Finds the correction terms assoctiated to the quadratic form, for each of the equivalance classes it finds the maximum by iterating through the relation vectors of the group. Uses multiprocessing.''' print 'Using multiprocessing' pool = Pool() # default: processes=None => uses cpu_count() manager = Manager() start_time = time.time() coef_lists = lrange(self.group.structure) # representatives = elements of C_1(V) (np.matrix) representatives = map(lambda l: self.find_rep(l), coef_lists) # list of maxes lst = manager.list([None for i in xrange(len(representatives))]) alphalist = list(self.get_alpha()) # cannot pickle generators pool.map_async(functools.partial(process_alpha_outside, self, representatives, lst), alphalist) pool.close() pool.join() # wait for pool to finish # get corrterms via (|alpha|^2+b)/4 print 'Computed from quadratic form in %g seconds' \ % (time.time() - start_time) return [Fraction(Fraction(alpha, self.int_inverse[1]) + self.b, 4) \ for alpha in lst]
def _mp_compile(self, sources, output_dir=None, macros=None, include_dirs=None, debug=0, extra_preargs=None, extra_postargs=None, depends=None): """Compile one or more source files. see distutils.ccompiler.CCompiler.compile for comments. """ # A concrete compiler class can either override this method # entirely or implement _compile(). macros, objects, extra_postargs, pp_opts, build = self._setup_compile( output_dir, macros, include_dirs, sources, depends, extra_postargs) cc_args = self._get_cc_args(pp_opts, debug, extra_preargs) pool = Pool(MAX_PROCS) try: print("Building using %d processes" % pool._processes) except: pass arr = [(self, obj, build, cc_args, extra_postargs, pp_opts) for obj in objects] pool.map_async(_mp_compile_one, arr) pool.close() pool.join() # Return *all* object filenames, not just the ones we just built. return objects
def bruteUser(userlist, psize, hosti, pathi, porti, securei, userfound): global host host = hosti global port port = porti global secure secure = securei global userout userout = userfound global path path = pathi f = open(userout, "w").close() usersize = len(userlist) # manage pool if usersize <= psize: chunksize = 1 else: chunksize = (usersize / psize) + (usersize % psize) print("Userlist size: %d\tChunk size: %d\tPool size: %d" % (usersize, chunksize, psize)) print("Bruteforcing usernames") pool = Pool(processes=psize) for chunk in itertools.izip(grouper(userlist, chunksize)): pool.map_async(worker, chunk) pool.close() try: while len(active_children()) > 0: # how many active children do we have sleep(2) ignore = active_children() except KeyboardInterrupt: exit("CTRL^C caught, exiting...\n\n") print("Username bruteforce complete")
def pool_multiprocess_index(file_or_urls,shard_size=10000): pool = Pool(processes=3) for file_ref in file_or_urls: reader = pymarc.MARCReader(open(file_ref,'rb')) print("Start-up multiprocess pool") pool.map_async(index_shard,reader,shard_size) print("Finished multiprocess")
def __init__(self, subID): """ THINGS TO UNDERSTAND: - when adding simulation attributes, why is the list nested (need to call simObjList[0])?? """ # Create instance of class from superclass Subject.__init__(self, subID) # Prepare to process in parallel # Simulation descriptors simDescriptors = ['A_SD2F_RepGRF', 'A_SD2F_RepKIN', 'A_SD2S_RepGRF', 'A_SD2S_RepKIN', 'A_Walk_RepGRF', 'A_Walk_RepKIN', 'U_SD2F_RepGRF', 'U_SD2F_RepKIN', 'U_SD2S_RepGRF', 'U_SD2S_RepKIN', 'U_Walk_RepGRF', 'U_Walk_RepKIN'] # List of simulation names simNames = [subID + '_' + descriptor for descriptor in simDescriptors] # Initialize global variable for simulation objects initializeSimList() # Start worker pool pool = Pool(processes=12) # Run parallel processes to process simulations and append object to global list pool.map_async(runParallel, simNames, callback=updateSimList) # Clean up spawned processes pool.close() pool.join() # Add simulations as attributes to subject object for simObj in simObjList[0]: setattr(self, simObj.simName, simObj) # Display message to user print 'Time elapsed for processing subject ' + self.subID + ': ' + str(int(time.time()-self.startTime)) + ' seconds'
def process_batch(self): global batch_list # Wait for splitting to finish and reinitialize new Pool self.split_pool.close() self.split_pool.join() self.split_pool = Pool(processes=self.pool_size) # Filter array for None values batch_list = [x for x in batch_list if x is not None] # Get category of each word based on keywords process_pool = Pool(processes=self.pool_size) keyword_result = process_pool.map_async(get_keyword_categories, batch_list) # Get category of each word using conceptnet #conceptnet_pool = Pool(processes=self.pool_size) conceptnet_result = process_pool.map_async(get_conceptnet_categories, batch_list) # Get NER categories #NER_pool = Pool(processes=self.pool_size) NER_result = process_pool.map_async(get_NER_categories, batch_list) # Wait for processes in the batch to finish print "Keyword" sys.stdout.flush() keyword_result = keyword_result.get() #while(not conceptnet_result.ready()): # print conceptnet_result._number_left print "NER" sys.stdout.flush() NER_result = NER_result.get() print "Concept net" sys.stdout.flush() conceptnet_result = conceptnet_result.get() #conceptnet_result = process_pool.map(get_conceptnet_categories, batch_list) # Merge results from each type of category for i in range(0,len(keyword_result)): keyword_result[i].update(conceptnet_result[i]) keyword_result[i].update(NER_result[i]) # Build category values based on values of other columns keyword_result[i].update(get_columnval_categories(keyword_result[i])) # Build and write column values for CSV file for i in range(0,len(self.temp_row)): val_row = [] for column in self.headers: val_row.append(keyword_result[i][column]) cur_row = self.temp_row[i] + val_row self.file_writer.writerow(cur_row) # Reset temporary containers self.temp_row = [] batch_list = [None] * self.batch
def run_ntuples(analysis, channel, period, samples, loglevel, **kwargs): '''Run a given analyzer for the analysis''' logger = logging.getLogger(__name__) test = kwargs.pop('test',False) metShift = kwargs.pop('metShift','') ntup_dir = './ntuples/%s_%iTeV_%s' % (analysis, period, channel) python_mkdir(ntup_dir) root_dir, sample_names = get_sample_names(analysis,period,samples,**kwargs) filelists = {} for sample in sample_names: sampledir = '%s/%s' % (root_dir, sample) filelists[sample] = ['%s/%s' % (sampledir, x) for x in os.listdir(sampledir)] if len(sample_names)==1 or test: # only one, its a test, dont use map name = sample_names[0] outname = "%s/%s.root" % (ntup_dir, name) if test: outname = 'test.root' run_analyzer((analysis, channel, name, filelists[name], outname, period, metShift, loglevel)) return 0 p = Pool(8) try: p.map_async(run_analyzer, [(analysis, channel, name, filelists[name], "%s/%s.root" % (ntup_dir, name), period, metShift, loglevel) for name in sample_names]).get(999999) except KeyboardInterrupt: p.terminate() logger.info('Analyzer cancelled') sys.exit(1) return 0
def process_albums(albums, output, vk_session): """ :param albums: :param output: :return: """ logger.info('Begin downloading %s album(s)', len(albums)) for album in albums: offset = 0 album_folder = path.join(output, album['title']) if not path.exists(album_folder): mkdir(album_folder) logger.debug('Album Size: %s', album['size']) while offset <= album['size']: photo_urls = get_album_photos(album=album, offset=offset, vk_session=vk_session) logger.debug('Got URLs for %s photo(s)', len(photo_urls)) f = partial(download_photo, album_folder) pool = Pool(processes=8) pool.map_async(f, photo_urls) # And wait till end pool.close() pool.join() offset += 1000 logger.info(u'Album "%s" [%d] downloaded.', album['title'], album['size']) logger.info('%d photo(s) downloaded.' % sum([album['size'] for album in albums]))
def run_pipeline_local(threads, reads_list): # for read_data in reads_list: # multiprocessing(read_data) p = Pool(int(threads)) p.map_async(multiprocessing,reads_list).get(9999999) p.close() p.join()
def multi_download(bucket_name, key_name, dest_file_path, num_processes=10, force_overwrite=True): # Check that dest does not exist if os.path.exists(dest_file_path) and force_overwrite: os.remove(dest_file_path) elif os.path.exists(dest_file_path): raise ValueError("Destination file '{0}' exists".format(dest_file_path)) # Touch the file fd = os.open(dest_file_path, os.O_CREAT) os.close(fd) conn = get_conn() bucket = conn.lookup(bucket_name, validate=False) key = bucket.get_key(key_name) size = key.size num_parts = num_processes def arg_iterator(num_parts): for min_byte, max_byte in gen_byte_ranges(size, num_parts): yield (bucket.name, key.name, dest_file_path, min_byte, max_byte) s = size / (1024.0 * 1024.0) try: start_time = time.time() pool = Pool(processes=num_processes) pool.map_async(do_part_download, arg_iterator(num_parts)).get(9999999) time_diff = time.time() - start_time log("Finished downloading %0.2fM in %0.2fs (%0.2fMbps)" % (s, time_diff, s / time_diff)) except KeyboardInterrupt: log("User terminated") except Exception, err: log(err)
def main(): logging.basicConfig(level=logging.INFO) args = parser.parse_args() log.debug("Got args: %s" % args) # Check that src is a valid S3 url split_rs = urlparse.urlsplit(args.src) if split_rs.scheme != "s3": raise ValueError("'%s' is not an S3 url" % args.src) # Check that dest does not exist if os.path.exists(args.dest): if args.force: os.remove(args.dest) else: raise ValueError("Destination file '%s' exists, specify -f to" " overwrite" % args.dest) # Split out the bucket and the key s3 = boto.connect_s3() bucket = s3.lookup(split_rs.netloc) key = bucket.get_key(split_rs.path) # Determine the total size and calculate byte ranges conn = boto.connect_s3() resp = conn.make_request("HEAD", bucket=bucket, key=key) size = int(resp.getheader("content-length")) logging.info("Got headers: %s" % resp.getheaders()) # Skipping multipart if file is less than 1mb if size < 1024 * 1024: t1 = time.time() key.get_contents_to_filename(args.dest) t2 = time.time() - t1 log.info("Finished single-part download of %0.2fM in %0.2fs (%0.2fMbps)" % (size, t2, size/t2)) else: # Touch the file fd = os.open(args.dest, os.O_CREAT) os.close(fd) num_parts = args.num_processes def arg_iterator(num_parts): for min_byte, max_byte in gen_byte_ranges(size, num_parts): yield (bucket.name, key.name, args.dest, min_byte, max_byte) s = size / 1024 / 1024. try: t1 = time.time() pool = Pool(processes=args.num_processes) pool.map_async(do_part_download, arg_iterator(num_parts)).get(9999999) t2 = time.time() - t1 log.info("Finished downloading %0.2fM in %0.2fs (%0.2fMbps)" % (s, t2, s/t2)) except KeyboardInterrupt: log.info("User terminated") except Exception, err: log.error(err)
def load_crawlers(database_config_file, data_sources_file): pool = Pool(processes=10) while True: f = open(data_sources_file, 'r') xml_string = f.read() config = BeautifulSoup(xml_string, "xml") f.close() crawlers = [] #print config dom = xml.dom.minidom.parseString(xml_string) sources = dom.getElementsByTagName("source") for source in sources: crawler = source.getElementsByTagName("crawler")[0] if getText(crawler.getElementsByTagName("enable")[0].childNodes) == '1': crawler_data = CrawlerData() crawler_data.domain = getText(source.getElementsByTagName("domain")[0].childNodes) crawler_data.encode = getText(source.getElementsByTagName("encoding")[0].childNodes) crawler_data.source_name = getText(source.getElementsByTagName("source_name")[0].childNodes) crawler_data.database_config_file = database_config_file crawler_data.request_interval_seconds = int(getText(crawler.getElementsByTagName("request_interval_seconds")[0].childNodes)) crawlers.append(crawler_data) for c in crawlers: print c pool.map_async(create_crawler, crawlers).get(999999) time.sleep(20) pool.close() pool.join()
def brutePlugin(pluginlist,foundplug,hosti,pathi,porti,securei,psize): global host host = hosti global port port = porti global secure secure = securei global plugfound plugfound = foundplug global path path = pathi f = open(plugfound,'w').close() listsize = (len(pluginlist)) # manage pool if (psize == 0): psize = 5 if (list <= psize): chunksize = 1 else: chunksize = ((listsize / psize) + (listsize % psize)) print("Plugin list size: %d\tChunk size: %d\tPool size: %d" % ((listsize),chunksize,psize)) print("Plugin bruteforcing started") pool = Pool(processes=psize) for chunk in itertools.izip(grouper(pluginlist,chunksize)): pool.map_async(worker,chunk) pool.close() try: while(len(active_children()) > 0): # how many active children do we have sleep(2) ignore = active_children() except KeyboardInterrupt: exit('CTRL^C caught, exiting...\n\n') print("Plugin bruteforce complete")
def main(argv=None): if argv is None: argv = sys.argv[1:] args = parse_command_line(argv) loglevel = getattr(logging,args.log) logging.basicConfig(format='%(asctime)s.%(msecs)03d %(levelname)s %(name)s: %(message)s', level=loglevel, datefmt='%Y-%m-%d %H:%M:%S') branchingPoints = ['ee100','em100','mm100','et100','mt100','tt100','BP1','BP2','BP3','BP4'] masses = _3L_MASSES if args.analysis=='Hpp3l' else _4L_MASSES if args.do4l: masses = _4L_MASSES if args.doBoth: masses = _4L_MASSES if not args.allMasses: masses = [args.mass] if not args.allBranchingPoints: branchingPoints = [args.branchingPoint] poolArgs = [[m,b] for m in masses for b in branchingPoints] if len(poolArgs)==1: job = poolArgs[0] BPWrapper((args.analysis,args.channel,args.period,job[0],job[1],args.bgMode,args.scaleFactor,args.doAlphaTest,args.unblind,args.do4l,args.doBoth,args.cut,args.skipTau)) else: p = Pool(8) try: p.map_async(BPWrapper, [(args.analysis,args.channel,args.period,job[0],job[1],args.bgMode,args.scaleFactor,args.doAlphaTest,args.unblind,args.do4l,args.doBoth,args.cut,args.skipTau) for job in poolArgs]).get(999999) except KeyboardInterrupt: p.terminate() print 'limits cancelled' sys.exit(1) return 0
def wzlimits(analysis, region, period, **kwargs): cut = kwargs.pop('cut', '1') scalefactor = kwargs.pop( 'scalefactor', 'event.gen_weight*event.pu_weight*event.lep_scale*event.trig_scale') datacardDir = kwargs.pop('datacardDir', './datacards') mode = kwargs.pop('mode', 'all') poolArgs = [] for chan in ['eee', 'eem', 'mme', 'mmm']: poolArgs += [(analysis, region, period, chan, chan, cut, scalefactor, datacardDir, mode)] if len(poolArgs) == 1: job = poolArgs[0] wzLimitWrapper(job) else: p = Pool(8) try: p.map_async(wzLimitWrapper, poolArgs).get(999999) except KeyboardInterrupt: p.terminate() print 'limits cancelled' sys.exit(1) return 0
def generate(self, options): if getattr(settings, "MEDUSA_MULTITHREAD", False): # Upload up to ten items at once via `multiprocessing`. from multiprocessing import Pool, cpu_count print("Generating with up to %d processes..." % cpu_count()) pool = Pool(cpu_count()) pool.map_async( _disk_render_path, ((None, path, None) for path in self.paths), chunksize=5 ) pool.close() pool.join() else: # Use standard, serial upload. self.client = Client() if options['medusa_host']: host = options['medusa_host'] elif hasattr(settings, 'MEDUSA_HTTP_HOST'): host = settings.MEDUSA_HTTP_HOST else: host = None self.host = host for path in self.paths: self.render_path(path=path, host=options['medusa_host'])
def run(self): tasks = self.get_all_tasks_config() self.create_reports(tasks) self.init_rally_config() self.create_deployment() logging.info("Start to run tasks...") process_num = 2 try: cpu_num = multiprocessing.cpu_count() process_num = [process_num, cpu_num][process_num < cpu_num] except Exception: logging.info("cpu_count() has not been implemented!") logging.info("The number of processes will be %s." % process_num) try: pool = Pool(processes=process_num) pool.map_async(run_task, zip([self] * len(tasks), tasks)) pool.close() pool.join() except Exception as ex: logging.info("processing pool get exception: '%s'" % ex) finally: clean_pidfile()
def get_feature_data(args): bucket = connect_s3().get_bucket('nlp-data') widlines = bucket.get_key('datafiles/topwams.txt').get_contents_as_string().split("\n") wids = filter(lambda x: x, widlines)[:args.num_wikis] log("Loading entities and heads for ", len(wids), "wikis") pool = Pool(processes=args.num_processes) r = pool.map_async(get_data, wids) r.wait() wiki_data = defaultdict(dict, r.get()) log("Getting data from API") widstrings = [','.join(wids[i:i+20]) for i in range(0, len(wids), 20)] r = pool.map_async(get_wiki_data_from_api, widstrings) for grouping in r.get(): if type(grouping) != dict: continue for wiki_id, api_data in grouping.items(): wiki_data[wiki_id]['api_data'] = api_data log("Turning data into features") wiki_ids, data_dicts = zip(*wiki_data.items()) log("Working on", len(wiki_ids)) r = pool.map_async(data_to_features, data_dicts) r.wait() wid_to_features = zip(wiki_ids, r.get()) log(len(set([value for _, values in wid_to_features for value in values])), "features") return dict(wid_to_features)
def brutePasses(userlist,passlist,hosti,pathi,porti,securei,psize,loginsi): global host host = hosti global port port = porti global secure secure = securei global logins logins = loginsi global path path = pathi global usernames usernames = userlist usersize = len(userlist) passsize = len(passlist) # manage pool if (psize == 0): psize = 5 if ((usersize*passsize) <= psize): chunksize = 1 else: chunksize = (((usersize*passsize) / psize) + ((usersize*passsize) % psize)) #print("%s" % ((ceil(float((usersize*passsize)) / psize)) + ((usersize*passsize) % psize))) print("Userlist size: %d\tPassword size: %d\tChunk size: %d\tPool size: %d" % (usersize,passsize,chunksize,psize)) pool = Pool(processes=psize) for chunk in itertools.izip(grouper(passlist,chunksize)): pool.map_async(worker,chunk) pool.close() try: while(len(active_children()) > 0): # how many active children do we have sleep(2) ignore = active_children() except KeyboardInterrupt: exit('CTRL^C caught, exiting...\n\n') print("Password bruteforce attempts completed")
def SGDNOMAD(data, movies_, eta_ = 0.01, lambduh_ = 0.1, lambduh_w_ = 0.1, rank = 10, maxit = 10): global latentShape, weightShape, biasShape, userOffset, movieOffset, mp_arr, mp_w, mp_b, eta, lambduh, lambduh_w, counter, qsize, movies movies = movies_.tocsr() t1 = time.time() eta = eta_ lambduh = lambduh_ lambduh_w = lambduh_w_ userOffset = 0 movieOffset = data.shape[0] # Allocate shared memory across processors for latent variable latentShape = (sum(data.shape), rank) mp_arr = mp.Array(ctypes.c_double, latentShape[0] * latentShape[1]) latent = np.frombuffer(mp_arr.get_obj()).reshape(latentShape) weightShape = (latentShape[0], movies.shape[1]) mp_w = mp.Array(ctypes.c_double, weightShape[0] * weightShape[1]) weights = np.frombuffer(mp_w.get_obj()).reshape(weightShape) biasShape = (latentShape[0], 1) mp_b = mp.Array(ctypes.c_double, biasShape[0] * biasShape[1]) biases = np.frombuffer(mp_b.get_obj()).reshape(biasShape) counter = mp.Value('i', 0) qsize = mp.Array('i', [0] * FLAGS.cores) # Initialize latent variable so that expectation equals average rating avgRating = data.sum() / data.nnz latent[:] = np.random.rand(latentShape[0], latentShape[1]) * math.sqrt(avgRating / rank / 0.25) weights[:] = np.zeros(weightShape) biases[:] = np.zeros(biasShape) slices = slice(data, FLAGS.cores) rowSlices = rowSlice(data, FLAGS.cores) p2 = Pool(FLAGS.cores) p = Pool(FLAGS.cores) it = 0 printLog(0, 0, time.time() - t1, RMSE2(slices, data.nnz, p2)) manager = mp.Manager() queues = [manager.Queue() for x in range(FLAGS.cores)] colList = np.round(np.linspace(0, data.shape[1], (FLAGS.cores) * 20 + 1)).astype(int) #for i in range(data.shape[1]): #queues[np.random.randint(0, FLAGS.cores)].put(i) for i in range(len(colList) - 1): r = np.random.randint(0, FLAGS.cores) queues[r].put((colList[i], colList[i+1])) qsize[r] += 1 p.map_async(updateNOMAD, [(i, a, b, queues) for i, a, b in rowSlices]) countPerEpoch = FLAGS.cores * (len(colList) - 1) start = time.time() #print [q.qsize() for q in queues] print [q for q in qsize]
def start(self): """Starts the automatic manual analysis""" if self.mainConfiguration is None: raise Exception("The main configuration of the manual analysis has failed, cannot start it.") if self.reportingConfiguration is None: raise Exception("No reporting configuration found, cannot start the analysis.") if self.analysisConfiguration is None: raise Exception("The configuration of the manual analysis has failed, cannot start it.") self._logger.info(str(self)) # first step is to create the templates for all our emulators self._logger.debug("Create {0} templates, one for each emulator".format(self.analysisConfiguration.maxNumberOfEmulators)) AVDEmulator.createTemplates(self.mainConfiguration, self.analysisConfiguration) # Create a queue of threads distributedQueueManager = Manager() listOfAPKs = distributedQueueManager.Queue() pool = Pool(processes=self.analysisConfiguration.maxNumberOfEmulators) # Tell threads to analyze APKs which are push to the queue workerArgs = [(listOfAPKs, iEmulator, self.mainConfiguration, self.analysisConfiguration, self.reportingConfiguration) for iEmulator in range(self.analysisConfiguration.maxNumberOfEmulators)] self._logger.debug(workerArgs) try: pool.map_async(executeExperiment, workerArgs) # Continuously scan the directory and add identified APKs to ensure at least next emulators round is ready while True: if listOfAPKs.qsize() > self.analysisConfiguration.maxNumberOfEmulators*2: time.sleep(5) continue for directory in self.analysisConfiguration.apkFiles: #self._logger.info("Analyzing directory: {0}".format(directory)) filenames = os.listdir(directory) if len(filenames) == 0: self._logger.debug("All APKs have been pushed to the analyzing queue, sleeping 5 secs...") time.sleep(5) else: apkFileName = random.choice(filenames) apkFileInputPath = os.path.join(directory, apkFileName) if not os.access(apkFileInputPath, os.R_OK): self._logger.error("You don't have read access to file {0}, not pushing file to queue.".format(apkFileInputPath)) continue # compute Sha1 on name the file with it sha1=self._computeSha1(apkFileInputPath) apkFileOutputPath = os.path.join(self.analysisConfiguration.outputDirectory, sha1+".apk") # move APK to output dir shutil.move(apkFileInputPath, apkFileOutputPath) self._logger.info("Pushing APK {0} in queue.".format(apkFileOutputPath)) listOfAPKs.put(apkFileOutputPath) except KeyboardInterrupt: self._logger.error("Automatic analysis interrupted by a keyboard Exception.")
def test2(): for n in range(100000): def test3(i): n += i pool = Pool(processes=1) pool.map_async(test3,range(100000)) pool.close() pool.join()
def main(): args, _ = get_args() db, cursor = get_db_and_cursor(args) p = Pool(processes=args.num_processes) cursor.execute(u"SELECT wiki_id, url FROM wikis ") for i in range(0, cursor.rowcount, 500): print i p.map_async(get_pageviews_for_wiki, [Namespace(row=row, **vars(args)) for row in cursor.fetchmany(500)]).get()
print("..Pulling %s RDS samples from both worlds..." % NUM_RDS_SAMPLES) pullRDSsamples(thisWorldDir) pullRDSsamples(thisWorldDir + ", noepi") # queue up all jobs across a maximum of 4 cores print("Queueing network simulation processes") print( "Putting %s of these in the queue in a few seconds, and then waiting forever" % len(paramCombinations)) parallel = True if parallel: #THIS IS AWESOME! pool = Pool(processes=8) pool.map_async(OutbreakAndRDS, paramCombinations) import time time.sleep(1500000) else: for x in paramCombinations: OutbreakAndRDS(x) # just do this somewhere else # # Analyze how this world did. # print("Computing RDS statistics!") # executeCommand( "Rscript --vanilla analysis.R %s" % thisWorldDir ) # IT WON'T CONTINUE UNLESS WE JUST WAIT AND WAIT!
if __name__ == "__main__": gt_folder = sys.argv[1] pred_folder = sys.argv[2] gt_items = get_xml_files(gt_folder) pred_items = get_xml_files(pred_folder) all_paths = [] all_paths.extend(list(gt_items.values())) all_paths.extend(list(pred_items.values())) p = Pool(processes=None) m = Manager() q = m.Queue() results = p.map_async(convert_xml, all_paths) left = float('inf') while not results.ready(): if left != results._number_left: print "Left {}".format(results._number_left) left = results._number_left time.sleep(1) results = list(results.get()) print "Successfuly Ran {}/{}".format( sum([1 if r == True else 0 for r in results]), len(results)) pred_file = "pred.lst" gt_file = "gt.lst"
def optimize_for_order(conv_params, pool_kernel=None, pool_stride=None, sequential=True): # Generate permutations for the order loops = ['B/b', 'OW/ow', 'OH/oh', 'IC/ic', 'OC/oc'] order = set(permutations(loops)) return_dict = {} acc_obj, K, O, S, IC, OC, B, iprec, wprec, im2col, energy_cost = conv_params #print('optimizing for convolution layer: weights {}x{}x{}x{}'.format(OC,IC,K,K)) #print('Batch size: {}'.format(B)) if pool_kernel is None: pool_kernel = (1, 1, 1, 1) if pool_stride is None: pool_stride = (1, 1, 1, 1) conv_params_with_pool = acc_obj, K, O, S, IC, OC, B, iprec, wprec, im2col, energy_cost, pool_kernel, pool_stride if not sequential: _bound_optimizer_method = functools.partial(_optimize_for_order, conv_params_with_pool) try: pool = Pool(cpu_count()) results = pool.map_async(_bound_optimizer_method, order).get(10000) pool.close() pool.join() # for o in order: # _bound_optimizer_method(o) # exit() best_cycles = None best_energy = None min_cycles = min([x[-4] for x in results]) min_energy = min([x[-3] for x in results]) cycles_list = [x[-2] for x in results] energy_list = [x[-1] for x in results] energy_array = np.stack(energy_list) cycles_array = np.stack(cycles_list) for r in results: tiling, order_type, cycles, energy, _, _ = r # print('{}:\n{}\n\t{:1.2f}, {:1.2f}'.format(order_type, tiling, cycles/float(min_cycles), energy/float(min_energy))) if best_cycles is None or best_cycles > cycles or ( best_cycles == cycles and best_energy > energy): best_cycles = cycles best_energy = energy best_tiling = tiling best_order = order_type return best_tiling, best_order, cycles_array, energy_array except KeyboardInterrupt: pool.terminate() pool.join() return else: best_cycles = None best_energy = None best_tiling = None best_order = None for o in order: tiling, order_type, cycles, energy, _, _ = _optimize_for_order( conv_params_with_pool, o) if best_cycles is None or best_cycles > cycles: best_cycles = cycles best_energy = energy best_tiling = tiling best_order = order_type elif best_cycles == cycles and best_energy > energy: best_cycles = cycles best_energy = energy best_tiling = tiling best_order = order_type return best_tiling, best_order, None, None
def correct_strings(partition_alignments, seq_to_acc, ccs_dict, step, nr_cores = 1, verbose = False): S_prime = {} S_prime_quality = {} partition_unique_seq_to_acc = {} for m, partition in partition_alignments.items(): partition_unique_seq_to_acc[m] = {} partition_unique_seq_to_acc[m][m] = seq_to_acc[m] for s in partition: if s in seq_to_acc: s_accessions = seq_to_acc[s] partition_unique_seq_to_acc[m][s] = s_accessions if ccs_dict: partitioned_ccs_dict = {} for m, partition in partition_alignments.items(): partitioned_ccs_dict[m] = {} for s in partition: if s in seq_to_acc: s_accessions = seq_to_acc[s] for s_acc in s_accessions: partitioned_ccs_dict[m][s_acc] = ccs_dict[s_acc] else: partitioned_ccs_dict = {} for m, partition in partition_alignments.items(): partitioned_ccs_dict[m] = {} if nr_cores == 1: for m, partition in sorted(partition_alignments.items()): S_prime_partition, S_prime_quality_vectors = correct_to_consensus_helper( ((m, partition, partition_unique_seq_to_acc[m], step, verbose, partitioned_ccs_dict[m]), {}) ) for acc, s in S_prime_partition.items(): assert acc not in S_prime S_prime[acc] = s for acc, qual_vector in S_prime_quality_vectors.items(): S_prime_quality[acc] = qual_vector else: ####### parallelize statistical tests ######### # pool = Pool(processes=mp.cpu_count()) original_sigint_handler = signal.signal(signal.SIGINT, signal.SIG_IGN) signal.signal(signal.SIGINT, original_sigint_handler) pool = Pool(processes=nr_cores) try: res = pool.map_async(correct_to_consensus_helper, [ ( (m, partition, partition_unique_seq_to_acc[m], step, verbose, partitioned_ccs_dict[m]), {}) for m, partition in partition_alignments.items() if len(partition) > 1 ] ) S_prime_partition_dicts =res.get(999999999) # Without the timeout this blocking call ignores all signals. except KeyboardInterrupt: print("Caught KeyboardInterrupt, terminating workers") pool.terminate() sys.exit() else: # print("Normal termination") pool.close() pool.join() for S_prime_partition, S_prime_quality_vectors in S_prime_partition_dicts: for acc, s in S_prime_partition.items(): assert acc not in S_prime S_prime[acc] = s for acc, qual_vector in S_prime_quality_vectors.items(): S_prime_quality[acc] = qual_vector return S_prime, S_prime_quality
firstIndex = 9251 lastIndex = 14871 def scrapePage(i): try: url = baseUrl + str(i) page = urllib2.urlopen(url) dom = BeautifulSoup(page, 'html.parser') crosswordElement = dom.find('div', attrs={'class': 'js-crossword'}) crosswordDataJson = crosswordElement.get('data-crossword-data') crosswordData = json.loads(crosswordDataJson) print crosswordData['name'] return crosswordData except: print 'Scraping crossword data failed:' print i def outputResults(results): sanitisedResults = list(filter(lambda x: x is not None, results)) output = open('crosswords.txt', 'w') outputJson = json.dumps(sanitisedResults) output.write(outputJson) output.close() print 'Done! Total crosswords parsed:' print len(results) if __name__ == '__main__': pool = Pool() r = pool.map_async(scrapePage, range(firstIndex, lastIndex + 1), callback=outputResults) r.wait()
#!/usr/bin/env python # _*_ coding: utf-8 _*_ # @author: Drizzle_Zhang # @file: pool.py # @time: 2020/3/24 10:43 from time import time from multiprocessing import Pool def func(x): return x * x if __name__ == '__main__': time_start = time() pool = Pool(3) list_input = [1, 2, 3, 4, 5, 6] # res = [pool.apply_async(func, (i, )) for i in list_input] res = pool.map_async(func, list_input) pool.close() pool.join() list_output = res.get() time_end = time() print(time_end - time_start)
progress_per_part = 70 / len(ont_class_list) counter = Value('f', float(20)) max_processes = len(ont_class_list) if len( ont_class_list ) <= celery.conf.MAX_MULTIPROCESSING else celery.conf.MAX_MULTIPROCESSING pool = Pool(max_processes, p_q_initializer, [counter]) if len(ont_id_list) > 1: fetch_msg = 'Fetching %s' % ', '.join(ont_id_list[:-1]) fetch_msg += ' and %s...' % ont_id_list[-1] else: fetch_msg = 'Fetching %s...' % ont_id_list[0] pool_result = pool.map_async( fetch_and_save_by_class_ont_wrapper, zip(ont_class_list, repeat(config_graph), repeat(data_graph), repeat(ltw_data_graph), repeat(progress_per_part))) pool.close() try: while not pool_result.ready(): current_task.update_state(state='PROGRESS', meta={ 'progress_percent': int(counter.value), 'progress_msg': fetch_msg }) sleep(1) pool_result.wait() except Exception, e:
def make_oligomer(input_file, largest_oligo_complexes, report, args, residue_index_mapping=None): global workdir global input_name global verbosity global g_input_file global g_args global best_oligo_template_code global renamed_chains_file g_input_file = input_file g_args = args verbosity = args.verbosity workdir = os.getcwd() symmetry = args.symmetry # Subsection 2[a] ####################################################################### if args.sequence_mode is False: input_name = os.path.basename(input_file).split(".pdb")[0].replace( '.', '_') candidate_qscores = {} # Select structurally best oligomeric template using GESAMT pctools.print_section(2, 'OLIGOMER ASSEMBLING') pctools.print_subsection('2[a]', 'Structural template selection') if args.multiprocess is True: p = Pool() for hitchain, average_qscore, output in p.map_async( analyse_largest_complexes, largest_oligo_complexes.items()).get(): candidate_qscores[hitchain] = average_qscore report['hits'][hitchain]['qscore'] = round(average_qscore, 3) print(output) p.close() p.join() else: for item in largest_oligo_complexes.items(): hitchain, average_qscore, output = analyse_largest_complexes( item) candidate_qscores[hitchain] = average_qscore report['hits'][hitchain]['qscore'] = round(average_qscore, 3) print(output) best_oligo_template = max(candidate_qscores.keys(), key=(lambda x: candidate_qscores[x])) if candidate_qscores[best_oligo_template] >= args.qscore_cutoff: print('Structurally, the best template is: ' + clrs['y'] + best_oligo_template + clrs['n'] + '. Using that!\n') report['best_template'] = best_oligo_template.split(':')[0] report['best_id'] = report['hits'][best_oligo_template]['id'] report['best_cov'] = report['hits'][best_oligo_template][ 'coverage'] report['best_qscore'] = report['hits'][best_oligo_template][ 'qscore'] report['best_nchains'] = report['hits'][best_oligo_template][ 'final_homo_chains'] else: print('No template had an average Q-score above cut-off of ' + clrs['c'] + str(args.qscore_cutoff) + clrs['n'] + '\nTry lowering the cutoff or running in sequence mode.\n') report['exit'] = '4' return None, None, report report['topology_figure'] = './' + best_oligo_template.replace( ':', '_') + '_CHOIR_Topology.png' template_chains = largest_oligo_complexes[best_oligo_template] best_oligo_template_code = best_oligo_template.split(':')[0] clean_template_file = make_local_template(best_oligo_template_code) elif args.sequence_mode is True: if input_file.endswith('.pdb'): input_name = os.path.basename(input_file).split(".pdb")[0].replace( '.', '_') input_file = os.path.join( workdir, input_name + '_CHOIR_MonomerSequence.fasta') g_input_file = input_file elif input_file.endswith('_CHOIR_MonomerSequence.fasta'): input_name = os.path.basename(input_file).split( "_CHOIR_MonomerSequence.fasta")[0] pctools.print_section(2, 'OLIGOMER ASSEMBLING - SEQUENCE MODE') print(clrs['y'] + "Skipping section 2[a] - Structural template selection" + clrs['n'] + "\n") attempt = 0 while attempt < len(largest_oligo_complexes): try: best_oligo_template = list(largest_oligo_complexes)[attempt] report['best_template'] = best_oligo_template.split(':')[0] report['best_id'] = report['hits'][best_oligo_template]['id'] report['best_cov'] = report['hits'][best_oligo_template][ 'coverage'] report['best_qscore'] = 'NA' report['best_nchains'] = report['hits'][best_oligo_template][ 'final_homo_chains'] report['topology_figure'] = './' + best_oligo_template.replace( ':', '_') + '_CHOIR_Topology.png' template_chains = largest_oligo_complexes[best_oligo_template] best_oligo_template_code = best_oligo_template.split(':')[0] clean_template_file = make_local_template( best_oligo_template_code) break except: attempt += 1 if attempt < len(largest_oligo_complexes): print('Attempt ' + str(attempt) + ' failed, trying a differente template candidate.') if attempt == len(largest_oligo_complexes): print('Failed to find templates in local databases.') report['exit'] = '5' return None, None, report relevant_chains_file = extract_relevant_chains(clean_template_file, template_chains) if args.generate_report is True: report['template_figure'], pymol_output = pctools.pymol_screenshot( relevant_chains_file, args) print(pymol_output) renamed_chains_file, chains_dict = rename_relevant_chains( relevant_chains_file) relevant_chains = [ chains_dict[template_chain] for template_chain in template_chains ] # Subsection 2[b] ####################################################################### pctools.print_subsection('2[b]', 'Generating alignment') # Generate per chain alignment files alignment_files = [] if args.sequence_mode is False: if args.multiprocess is True: p = Pool() for qscore, rmsd, fasta_out, gesamt_output in p.map_async( run_gesamt_parallel, chains_dict.values()).get(): alignment_files.append(fasta_out) print(gesamt_output) p.close() p.join() else: for chain in chains_dict.values(): qscore, rmsd, fasta_out, gesamt_output = run_gesamt_parallel( chain) alignment_files.append(fasta_out) print(gesamt_output) elif args.sequence_mode is True: if args.multiprocess is True: p = Pool() for fasta_out, output in p.map_async(alignment_from_sequence, chains_dict.values()).get(): alignment_files.append(fasta_out) print(output) else: for current_chain in chains_dict.values(): fasta_out, output = alignment_from_sequence(current_chain) alignment_files.append(fasta_out) print(output) print('Alignment files:\n' + clrs['g'] + ('\n').join([os.path.basename(i) for i in alignment_files]) + clrs['n']) # Generate final alignment which will be the input for Modeller final_alignment, full_residue_mapping = generate_ali( alignment_files, best_oligo_template_code, residue_index_mapping, args) # Score said alignment and enforce treshold report[ 'relative_alignment_score'], relative_wscores, nchains = score_alignment( final_alignment) print('\nFinal average relative score for alignment: ' + str(round(report['relative_alignment_score'], 2)) + '%') bad_streches = 0 for wscore in relative_wscores: if wscore < args.similarity_cutoff: bad_streches += 1 if bad_streches >= args.bad_streches * nchains: if args.sequence_mode is True: print( '\nThe alignment score was unacceptable for ' + clrs['r'] + str(bad_streches) + clrs['n'] + ' 30-res segments of the protein complex.\nTry running the default (structure) mode.\n' ) else: print( '\nThe alignment score was unacceptable for ' + clrs['r'] + str(bad_streches) + clrs['n'] + ' 30-res segments of the protein complex.\nTry increasing the number of candidate templates or tweaking the similarity cut-offs.\n' ) report['exit'] = '6' return None, None, report # Subsection 2[c] ####################################################################### pctools.print_subsection('2[c]', 'Generating models') genmodel_file, expected_models = create_genmodel(final_alignment, best_oligo_template_code, relevant_chains, args) run_modeller(genmodel_file) # Record list of oligomers built nmodels = 0 built_oligomers = [] for model in expected_models: built_oligomers.append( restore_chain_identifiers(model, chains_dict, full_residue_mapping)) nmodels += 1 print(clrs['b'] + 'ProtCHOIR' + clrs['n'] + ' built ' + clrs['c'] + str(nmodels) + clrs['n'] + ' model oligomers:') for model in built_oligomers: print(clrs['g'] + model + clrs['n']) return best_oligo_template, built_oligomers, report
def ricciCurvature(G, alpha=0.5, weight=None, compute_nc=True, proc=cpu_count(), edge_list=None, method="OTD", verbose=False): """ Compute ricci curvature for all nodes and edges in G. Node ricci curvature is defined as the average of all it's adjacency edge. :param G: A connected NetworkX graph. :param alpha: The parameter for the discrete ricci curvature, range from 0 ~ 1. It means the share of mass to leave on the original node. eg. x -> y, alpha = 0.4 means 0.4 for x, 0.6 to evenly spread to x's nbr. :param weight: The edge weight used to compute Ricci curvature. :param compute_nc: True if the average node curvatures should be computed. :param proc: Number of processing used for parallel computing :param edge_list: Target edges to compute curvature :param method: Transportation method, OTD for Optimal transportation Distance, ATD for Average transportation Distance. :param verbose: Set True to output the detailed log. :return: G: A NetworkX graph with Ricci Curvature with edge attribute "ricciCurvature" """ # Construct the all pair shortest path lookup if importlib.util.find_spec("networkit") is not None: import networkit as nk t0 = time.time() Gk = nk.nxadapter.nx2nk(G, weightAttr=weight) apsp = nk.distance.APSP(Gk).run().getDistances() length = {} for i, n1 in enumerate(G.nodes()): length[n1] = {} for j, n2 in enumerate(G.nodes()): length[n1][n2] = apsp[i][j] print(time.time() - t0, " sec for all pair by NetworKit.") else: print( "NetworKit not found, use NetworkX for all pair shortest path instead." ) t0 = time.time() length = dict(nx.all_pairs_dijkstra_path_length(G, weight=weight)) print(time.time() - t0, " sec for all pair.") t0 = time.time() # compute edge ricci curvature p = Pool(processes=proc) # if there is no assigned edges to compute, compute all edges instead if not edge_list: edge_list = G.edges() args = [(G, source, target, alpha, length, verbose, method) for source, target in edge_list] result = p.map_async(_wrapRicci, args) result = result.get() p.close() p.join() # assign edge Ricci curvature from result to graph G for rc in result: for k in list(rc.keys()): source, target = k G[source][target]['ricciCurvature'] = rc[k] # compute node Ricci curvature if compute_nc: for n in G.nodes(): rcsum = 0 # sum of the neighbor Ricci curvature if G.degree(n) != 0: for nbr in G.neighbors(n): if 'ricciCurvature' in G[n][nbr]: rcsum += G[n][nbr]['ricciCurvature'] # assign the node Ricci curvature to be the average of node's adjacency edges G.node[n]['ricciCurvature'] = rcsum / G.degree(n) if verbose: print("node %d, Ricci Curvature = %f" % (n, G.node[n]['ricciCurvature'])) print(time.time() - t0, " sec for Ricci curvature computation.") return G
def analyze(problem, Y, calc_second_order=True, num_resamples=100, conf_level=0.95, print_to_console=False, parallel=False, n_processors=None): """Perform Sobol Analysis on model outputs. Returns a dictionary with keys 'S1', 'S1_conf', 'ST', and 'ST_conf', where each entry is a list of size D (the number of parameters) containing the indices in the same order as the parameter file. If calc_second_order is True, the dictionary also contains keys 'S2' and 'S2_conf'. Parameters ---------- problem : dict The problem definition Y : numpy.array A NumPy array containing the model outputs calc_second_order : bool Calculate second-order sensitivities (default True) num_resamples : int The number of resamples (default 100) conf_level : float The confidence interval level (default 0.95) print_to_console : bool Print results directly to console (default False) References ---------- .. [1] Sobol, I. M. (2001). "Global sensitivity indices for nonlinear mathematical models and their Monte Carlo estimates." Mathematics and Computers in Simulation, 55(1-3):271-280, doi:10.1016/S0378-4754(00)00270-6. .. [2] Saltelli, A. (2002). "Making best use of model evaluations to compute sensitivity indices." Computer Physics Communications, 145(2):280-297, doi:10.1016/S0010-4655(02)00280-1. .. [3] Saltelli, A., P. Annoni, I. Azzini, F. Campolongo, M. Ratto, and S. Tarantola (2010). "Variance based sensitivity analysis of model output. Design and estimator for the total sensitivity index." Computer Physics Communications, 181(2):259-270, doi:10.1016/j.cpc.2009.09.018. Examples -------- >>> X = saltelli.sample(problem, 1000) >>> Y = Ishigami.evaluate(X) >>> Si = sobol.analyze(problem, Y, print_to_console=True) """ # determining if groups are defined and adjusting the number # of rows in the cross-sampled matrix accordingly if not problem.get('groups'): D = problem['num_vars'] else: D = len(set(problem['groups'])) if calc_second_order and Y.size % (2 * D + 2) == 0: N = int(Y.size / (2 * D + 2)) elif not calc_second_order and Y.size % (D + 2) == 0: N = int(Y.size / (D + 2)) else: raise RuntimeError(""" Incorrect number of samples in model output file. Confirm that calc_second_order matches option used during sampling.""") if conf_level < 0 or conf_level > 1: raise RuntimeError("Confidence level must be between 0-1.") # normalize the model output Y = (Y - Y.mean()) / Y.std() A, B, AB, BA = separate_output_values(Y, D, N, calc_second_order) r = np.random.randint(N, size=(N, num_resamples)) Z = norm.ppf(0.5 + conf_level / 2) if not parallel: S = create_Si_dict(D, calc_second_order) for j in range(D): S['S1'][j] = first_order(A, AB[:, j], B) S['S1_conf'][j] = Z * first_order(A[r], AB[r, j], B[r]).std(ddof=1) S['ST'][j] = total_order(A, AB[:, j], B) S['ST_conf'][j] = Z * total_order(A[r], AB[r, j], B[r]).std(ddof=1) # Second order (+conf.) if calc_second_order: for j in range(D): for k in range(j + 1, D): S['S2'][j, k] = second_order(A, AB[:, j], AB[:, k], BA[:, j], B) S['S2_conf'][j, k] = Z * second_order( A[r], AB[r, j], AB[r, k], BA[r, j], B[r]).std(ddof=1) else: tasks, n_processors = create_task_list(D, calc_second_order, n_processors) func = partial(sobol_parallel, Z, A, AB, BA, B, r) pool = Pool(n_processors) S_list = pool.map_async(func, tasks) pool.close() pool.join() S = Si_list_to_dict(S_list.get(), D, calc_second_order) # Print results to console if print_to_console: print_indices(S, problem, calc_second_order) # Add problem context and override conversion method for special case S.problem = problem S.to_df = MethodType(to_df, S) return S
from multiprocessing import Pool, cpu_count import numpy as np import time import math def great_circle(args): lon1, lat1, lon2, lat2 = args radius = 3956 x = math.pi / 180.0 a = (90.0 - lat1) * (x) b = (90.0 - lat2) * (x) theta = (lon2 - lon1) * (x) c = math.acos((math.cos(a) * math.cos(b)) + (math.sin(a) * math.sin(b) * math.cos(theta))) return radius * c lon1, lat1, lon2, lat2 = 42, 0.5, -13, -32 n = 1e06 mat = np.zeros((n, 4), ) mat = mat + [lon1, lat1, lon2, lat2] timeStart = time.time() po = Pool(processes=cpu_count() - 1) _results = po.map_async(great_circle, (mat[i, :] for i in range(mat.shape[0]))) results = _results.get() print time.strftime('%H:%M:%S', time.gmtime(time.time() - timeStart)) print 'done'
def f(x): return x * x pool = Pool(processes=cpu_count()) result = pool.apply_async(f, [ 1, ]) print "get=%s" % result.get(timeout=2) pool = Pool(processes=cpu_count()) print "map=%s" % pool.map(f, range(10)) a = [] results = [pool.apply_async(f, [ i, ], callback=a.append) for i in range(10)] print "a=%s" % (str(a)) pool.close() pool.join() print "a=%s" % (str(a)) pool = Pool(processes=cpu_count()) b = [] results = pool.map_async(f, range(10), callback=b.append, chunksize=10) print "b=%s" % (str(b)) results.wait() print "b=%s" % (str(b)) pool.close() pool.join()
import os from multiprocessing import Pool mmin = 110.0 mmax = 150.0 nm = 81 card = "testcard.txt" cmdlist = [] for i in range(nm): mass = mmin + i * (mmax - mmin) / float(nm - 1) obsexec = "combine -d %s -m %g -U -M Asymptotic --rRelAcc=0.001 --rAbsAcc=0.001 --minimizerStrategy=0 --rMax=30 --run=expected -n LimitsFromGridObs" % ( card, mass) print obsexec cmdlist.append(obsexec) pool = Pool(processes=20) pool.map_async(os.system, cmdlist) pool.close() pool.join() os.system("hadd smrel.root higgsCombineLimitsFromGridObs*.root") os.system("rm higgsCombineLimitsFromGrid*.root")
"target": "i686-pc-windows-msvc-cp36", "extension": "zip" }, ] def retrieve(url): sess = requests.Session() print("Getting %s" % urlsplit(url).path.split("/")[-1]) retrieved = sess.get(url, stream=True) # don't continue if something's wrong retrieved.raise_for_status() try: raw_zip = zipfile.ZipFile(io.BytesIO(retrieved.content)) raw_zip.extractall(path) except zipfile.BadZipfile: # it's a tar tar = tarfile.open(mode="r:gz", fileobj=io.BytesIO(retrieved.content)) tar.extractall(path) urls = (url.format(**release) for release in releases) # let's do this in parallel pool = Pool(processes=len(releases)) # we could use map, but it consumes the entire iterable (doesn't matter for small n) res = pool.map_async(retrieve, urls) # need these if we use _async pool.close() pool.join()
def next(self): """For python 2.x. # Returns The next batch. """ # index_array: # current_index # current_batch_size with self.lock: index_array, current_index, current_batch_size = next( self.index_generator) # print("Index array: ") # print(index_array) # print("current_index: ", current_index) # print("current_batch_size: ", current_batch_size) # build batch of image data batch_x = np.zeros((current_batch_size, ) + (self.frames_per_step, ) + self.image_shape, dtype=K.floatx()) # # my addition of +(1,) grayscale = self.color_mode == 'grayscale' # print(index_array) # print(chunks) chunks = [index_array[i::4] for i in range(4)] pool = Pool(processes=4) batch_x = pool.map_async(get_action_tube, chunks) print(batch_x) # # Computes the batch for training # for i in range(len(index_array)): # action_tube_dir = self.filenames[index_array[i]] # frame_count = 0 # for fname in sorted(os.listdir(os.path.join(self.directory, action_tube_dir))): # if (frame_count >= self.frames_per_step): # break # very shit # img = load_img(os.path.join(os.path.join(self.directory, action_tube_dir), fname), # grayscale=grayscale, # target_size=self.target_size) # x = img_to_array(img, data_format=self.data_format) # batch_x[i,frame_count] = x # print(frame_count) # frame_count +=1 # for kk in range(self.frames_per_step): # for i in range(int(len(index_array)/self.frames_per_step)): # fname = self.filenames[index_array[i]] # img = load_img(os.path.join(self.directory, fname), # grayscale=grayscale, # target_size=self.target_size) # x = img_to_array(img, data_format=self.data_format) # x = self.image_data_generator.random_transform(x) # x = self.image_data_generator.standardize(x) # x = self.image_data_generator.change_dims(x) # my addition # # batch_x[i,kk] = x # optionally save augmented images to disk for debugging purposes if self.save_to_dir: for i in range(current_batch_size): img = array_to_img(batch_x[i], self.data_format, scale=True) fname = '{prefix}_{index}_{hash}.{format}'.format( prefix=self.save_prefix, index=current_index + i, hash=np.random.randint(1e4), format=self.save_format) img.save(os.path.join(self.save_to_dir, fname)) # build batch of labels if self.class_mode == 'input': batch_y = batch_x.copy() elif self.class_mode == 'sparse': batch_y = self.classes[index_array] elif self.class_mode == 'binary': batch_y = self.classes[index_array].astype(K.floatx()) elif self.class_mode == 'categorical': batch_y = np.zeros((len(batch_x), self.num_class), dtype=K.floatx()) for i, label in enumerate(self.classes[index_array]): batch_y[i, label] = 1. else: return batch_x return batch_x, batch_y
processes = int(sys.argv[i + 1]) else: processes = 2 t = int(sys.stdin.readline()) if verbose: print >> sys.stderr, "Solving %d test cases" % t # read input test_cases = [read() for i in xrange(t)] # solve if parallelize: process_pool = Pool(processes=processes) if check_mode: test_results = process_pool.map_async(check, test_cases).get(9999999) else: test_results = process_pool.map_async(solve, test_cases).get(9999999) else: if check_mode: test_results = [check(data) for data in test_cases] else: test_results = [solve(data) for data in test_cases] if verbose: sys.stderr.write("\n") sys.stderr.flush() # write output
except urllib2.HTTPError: pass if verbose: print 'Checking for %s... Missing!' % relative_file else: sys.stdout.write(u'✗') return sdk_artifact if not verbose: print 'Checking for %s artifacts on remote...' % len(sdk_artifacts), pool = Pool(20) missing_artifacts = [] pool.map_async(remote_has_artifact, sdk_artifacts, callback=missing_artifacts.extend).wait(999999) missing_artifacts = filter(lambda x: x is not None, missing_artifacts) if not verbose: print print '%s of %s artifacts missing from remote.' % (len(missing_artifacts), len(sdk_artifacts)) if verbose: print deployed = 0 for index, missing_artifact in enumerate(missing_artifacts): cmd = [ 'mvn', 'deploy:deploy-file', '-DrepositoryId=' + repo_id, '-Durl=' + repo_url, '-Dfile=' + missing_artifact['file'],
class Predictor(object): """Load a pretrained DocReader model and predict inputs on the fly.""" def __init__(self, model=None, tokenizer=None, normalize=True, embedding_file=None, num_workers=None): """ Args: model: path to saved model file. tokenizer: option string to select tokenizer class. normalize: squash output score to 0-1 probabilities with a softmax. embedding_file: if provided, will expand dictionary to use all available pretrained vectors in this file. num_workers: number of CPU processes to use to preprocess batches. """ logger.info('Initializing model...') self.model = Model.load(model or DEFAULTS['model'], normalize=normalize) if embedding_file: logger.info('Expanding dictionary...') words = utils.index_embedding_words(embedding_file) added = self.model.expand_dictionary(words) self.model.load_embeddings(added, embedding_file) logger.info('Initializing tokenizer...') annotators = tokenizers.get_annotators_for_model(self.model) if not tokenizer: tokenizer_class = DEFAULTS['tokenizer'] else: tokenizer_class = tokenizers.get_class(tokenizer) if num_workers is None or num_workers > 0: self.workers = ProcessPool( num_workers, initializer=init, initargs=(tokenizer_class, annotators), ) else: self.workers = None self.tokenizer = tokenizer_class(annotators=annotators) def predict(self, document, question, candidates=None, top_n=1): """Predict a single document - question pair.""" results = self.predict_batch([( document, question, candidates, )], top_n) return results[0] def predict_batch(self, batch, top_n=1): """Predict a batch of document - question pairs.""" documents, questions, candidates = [], [], [] for b in batch: documents.append(b[0]) questions.append(b[1]) candidates.append(b[2] if len(b) == 3 else None) candidates = candidates if any(candidates) else None # Tokenize the inputs, perhaps multi-processed. if self.workers: q_tokens = self.workers.map_async(tokenize, questions) d_tokens = self.workers.map_async(tokenize, documents) q_tokens = list(q_tokens.get()) d_tokens = list(d_tokens.get()) else: q_tokens = list(map(self.tokenizer.tokenize, questions)) d_tokens = list(map(self.tokenizer.tokenize, documents)) examples = [] for i in range(len(questions)): examples.append({ 'id': i, 'question': q_tokens[i].words(), 'qlemma': q_tokens[i].lemmas(), 'document': d_tokens[i].words(), 'lemma': d_tokens[i].lemmas(), 'pos': d_tokens[i].pos(), 'ner': d_tokens[i].entities(), }) # Stick document tokens in candidates for decoding if candidates: candidates = [{ 'input': d_tokens[i], 'cands': candidates[i] } for i in range(len(candidates))] # Build the batch and run it through the model batch_exs = batchify([vectorize(e, self.model) for e in examples]) s, e, score = self.model.predict(batch_exs, candidates, top_n) # Retrieve the predicted spans results = [] for i in range(len(s)): predictions = [] for j in range(len(s[i])): span = d_tokens[i].slice(s[i][j], e[i][j] + 1).untokenize() predictions.append((span, score[i][j])) results.append(predictions) return results def cuda(self): self.model.cuda() def cpu(self): self.model.cpu()
def main(): progname = os.path.basename(sys.argv[0]) usage="""prog --model model1.hdf,model2.hdf --oldpath refine_01 Perform a 3d classification like e2refine_multi using the orientation of each particle in an e2refine_easy""" parser = EMArgumentParser(usage=usage,version=EMANVERSION) parser.add_argument("--newpath", type=str,help="Path to the classified results. Default = multinoali_XX", default=None) parser.add_argument("--oldpath", type=str,help="Path to the original refinement", default=None,guitype='filebox', filecheck=False,browser="EMBrowserWidget(withmodal=True,multiselect=False)", row=2, col=0, rowspan=1, colspan=3) parser.add_argument("--models","--model", dest="model", type=str,help="Comma separated list of reference maps used for classification. If a single map is provided, data will be split into two groups based on similarity to the single map.", default=None,guitype='filebox', browser='EMModelsTable(withmodal=True,multiselect=True)', filecheck=False, row=7, col=0, rowspan=1, colspan=3) parser.add_argument("--simcmp",type=str,help="The name of a 'cmp' to be used in comparing the aligned images. eg- frc:minres=80:maxres=20. Default=ccc", default="ccc", guitype='strbox', row=10, col=0, rowspan=1, colspan=3) parser.add_argument("--threads", type=int,help="Number of threads.", default=4, guitype='intbox', row=12, col=0, rowspan=1, colspan=1) parser.add_argument("--iter", type=int,help="Number of iterations.", default=1, guitype='intbox', row=12, col=1, rowspan=1, colspan=1) parser.add_header(name="optheader", help='Optional parameters:', title="Optional:", row=14, col=0, rowspan=1, colspan=3) parser.add_argument("--mask",type=str,help="Name of an optional mask file. The mask is applied to the input models to focus the classification on a particular region of the map. Consider e2classifyligand.py instead.", default=None,guitype='filebox', browser='EMModelsTable(withmodal=True,multiselect=False)', filecheck=False, row=15, col=0, rowspan=1, colspan=3) parser.add_argument("--ppid", type=int, help="Set the PID of the parent process, used for cross platform PPID",default=-1) (options, args) = parser.parse_args() logid=E2init(sys.argv) if not options.model: print "No model input. Exit." exit() inputmodel=options.model.split(',') modelstack=0 if len(inputmodel)==1: num=EMUtil.get_image_count(inputmodel[0]) if num>1: modelstack=num print "3D stack input. Perform multi-model refinement using existing alignment..." multimodel=True inputmodel=inputmodel*num else: multimodel=False print "One input model. Split the data by half accroding to the similarity to the input model..." else: multimodel=True print "Multiple input models. Perform multi-model refinement using existing alignment..." ### make new folder if options.newpath == None: fls=[int(i[-2:]) for i in os.listdir(".") if i[:11]=="multinoali_" and len(i)==13 and str.isdigit(i[-2:])] if len(fls)==0 : fls=[0] options.newpath = "multinoali_{:02d}".format(max(fls)+1) print "Working directory: {}".format(options.newpath) try: os.mkdir(options.newpath) except: print "New path {} exist. Overwrite...".format(options.newpath) pass ### read json file and parse some parameters with open(options.oldpath+"/0_refine_parms.json") as json_file: db = json.load(json_file) db=parse_json(db.copy()) options.simcmp=parsemodopt(options.simcmp) sym=db["sym"] if db["breaksym"]: sym="c1" ### copy the model to the new folder print "Preprocessing the input models..." if options.mask: options.mask="--multfile {}".format(options.mask) else: options.mask="" db_apix=db["apix"] if db_apix==0: e=EMData(inputmodel[0],0,True) db_apix=e["apix_x"] if multimodel: if modelstack>1: models=range(modelstack) for m in range(modelstack): outfile="{path}/model_input_{k}.hdf".format(path=options.newpath, k=m) run("e2proc3d.py {model} {out} --process=filter.lowpass.randomphase:cutoff_freq={freq} --apix={apix} {mask} --first {mi} --last {mi}".format(model=inputmodel[m],out=outfile,freq=1.0/(db["targetres"]*2),apix=db_apix,mask=options.mask, mi=m)) inputmodel[m]=outfile else: models=range(len(inputmodel)) for m in models: outfile="{path}/model_input_{k}.hdf".format(path=options.newpath, k=m) run("e2proc3d.py {model} {out} --process=filter.lowpass.randomphase:cutoff_freq={freq} --apix={apix} {mask}".format(model=inputmodel[m],out=outfile,freq=1.0/(db["targetres"]*2),apix=db_apix,mask=options.mask)) inputmodel[m]=outfile else: models=[0,1] outfile="{path}/model_input.hdf".format(path=options.newpath) run("e2proc3d.py {model} {out} --process=filter.lowpass.randomphase:cutoff_freq={freq} --apix={apix} {mask}".format(model=inputmodel[0],out=outfile,freq=1.0/(db["targetres"]*2),apix=db_apix,mask=options.mask)) inputmodel[0]=outfile output_3d=[] output_cls=[] input_eo_order={0:"even",1:"odd"} for it in range(options.iter): print "Starting iteration {} ...".format(it) print "Making projections..." if it==0: #### first iteration. do one projection for even/odd if multimodel: projfile=[] for m in models: projfile.append("{path}/projections_{it:02d}_{k}.hdf".format(path=options.newpath, k=m, it=it)) run("e2project3d.py {model} --outfile {proj} -f --orientgen {orient} --sym {sym} --parallel thread:{threads}".format( model=inputmodel[m],proj=projfile[-1],orient=db["orientgen"],sym=db["sym"],threads=options.threads)) else: projfile=["{path}/projections_{it:02d}.hdf".format(path=options.newpath, it=it)] run("e2project3d.py {model} --outfile {proj} -f --orientgen {orient} --sym {sym} --parallel thread:{threads}".format( model=inputmodel[0],proj=projfile[0],orient=db["orientgen"],sym=db["sym"],threads=options.threads)) output_3d.append({}) output_cls.append({}) ### even/odd loop for eoid,eo in input_eo_order.items(): if it>0: inputmodel=[output_3d[-2][eo][m] for m in models] print inputmodel multimodel=True #### make projections for even/odd projfile=["{path}/projections_{it:02d}_{k}_{eo}.hdf".format(path=options.newpath, k=m, it=it,eo=eo) for m in models] for m in models: run("e2project3d.py {model} --outfile {proj} -f --orientgen {orient} --sym {sym} --parallel thread:{threads}".format( model=inputmodel[m],proj=projfile[m],orient=db["orientgen"],sym=db["sym"],threads=options.threads)) oldmapfile=str(db["last_{}".format(eo)]) ptclfile=str(db["input"][eoid]) clsmx=oldmapfile.replace("threed","classmx") ### old projection file is used for classaverage alignment oldprojfile=oldmapfile.replace("threed","projections") ncls=EMUtil.get_image_count(projfile[0]) npt=EMUtil.get_image_count(ptclfile) newclsmx=["{path}/classmx_{it:02d}_{n}_{eo}.hdf".format(path=options.newpath,n=i,eo=eo,it=it) for i in models] classout=["{path}/classes_{it:02d}_{n}_{eo}.hdf".format(path=options.newpath,n=i,eo=eo,it=it) for i in models] threedout=["{path}/threed_{it:02d}_{n}_{eo}.hdf".format(path=options.newpath,n=i,eo=eo,it=it) for i in models] output_3d[-1][eo]=threedout output_cls[-1][eo]=classout ### get alignment from classmx file and calculate similarity print "Calculating similarity matrix..." cmxcls=EMData(clsmx,0) cmxtx=EMData(clsmx,2) cmxty=EMData(clsmx,3) cmxalpha=EMData(clsmx,4) cmxmirror=EMData(clsmx,5) projs=[] for pj in projfile: projs.append(EMData.read_images(pj)) xforms=[] for i in range(npt): c=int(cmxcls[0,i]) tr=Transform({"type":"2d","alpha":cmxalpha[0,i],"mirror":int(cmxmirror[0,i]),"tx":cmxtx[0,i],"ty":cmxty[0,i]}) pjs=[projs[k][c] for k in range(len(projfile))] xforms.append({"ptclfile":ptclfile,"proj":pjs,"idx":i,"xform":tr,"cmp":options.simcmp}) pool = Pool() corr=pool.map_async(do_compare, xforms) pool.close() while (True): if (corr.ready()): break remaining = corr._number_left print "Waiting for", remaining, "tasks to complete..." time.sleep(2) corr=corr.get() np.savetxt("{path}/simmx_{it:02d}_{eo}.txt".format(path=options.newpath,eo=eo, it=it),corr) #corr=np.loadtxt("{path}/simmx_00_{eo}.txt".format(path=options.newpath,eo=eo)) ### classification print "Classifying particles..." cmxtmp=cmxcls.copy() cmxtmp.to_zero() cmxtmp.sub(1) cmxout=[cmxtmp.copy() for s in models] if multimodel: ### simply classify cls=np.argmin(corr,1) print eo,[float(sum(cls==k))/float(npt) for k in models] for i in range(npt): v=cmxcls[0,i] for s in models: if s==cls[i]: cmxout[s][0,i]=v else: cmxout[s][0,i]=-1 else: ### one model input, split the data to two halves for c in range(ncls): ss=[] ns=0 for i in range(npt): v=cmxcls[0,i] if v==c: ss.append(corr[i]) ns+=1 else: ss.append([10]*len(corr[i])) ### split the data by halv spt=int(ns*.5) for s in models: if s==0: toavg=np.argsort(ss)[:spt] else: toavg=np.argsort(ss)[spt:ns] for i in toavg: cmxout[s][0,i]=c ### write classmx for s in models: cmxout[s].write_image(newclsmx[s]) ns=EMUtil.get_image_count(clsmx) for i in range(1,ns): e=EMData(clsmx,i) e.write_image(newclsmx[s],i) print "Making class average and 3d map..." for s in models: ### class average run("e2classaverage.py --input {inputfile} --classmx {clsmx} --decayedge --storebad --output {clsout} --ref {proj} --iter {classiter} -f --normproc {normproc} --averager {averager} {classrefsf} {classautomask} --keep {classkeep} {classkeepsig} --cmp {classcmp} --align {classalign} --aligncmp {classaligncmp} {classralign} {prefilt} --parallel thread:{thrd}".format( inputfile=ptclfile, clsmx=newclsmx[s], clsout=classout[s], proj=projfile[s], classiter=db["classiter"], normproc=db["classnormproc"], averager=db["classaverager"], classrefsf=db["classrefsf"], classautomask=db["classautomask"],classkeep=db["classkeep"], classkeepsig=db["classkeepsig"], classcmp=db["classcmp"], classalign=db["classalign"], classaligncmp=db["classaligncmp"], classralign=db["classralign"], prefilt=db["prefilt"], thrd=options.threads)) ### make 3d run("e2make3dpar.py --input {clsout} --sym {sym} --output {threed} {preprocess} --keep {m3dkeep} {keepsig} --apix {apix} --pad {m3dpad} --mode gauss_5 --threads {threads} ".format( clsout=classout[s],threed=threedout[s], sym=sym, recon=db["recon"], preprocess=db["m3dpreprocess"], m3dkeep=db["m3dkeep"], keepsig=db["m3dkeepsig"], m3dpad=db["pad"],threads=options.threads, apix=db_apix)) ### post process print "Post processing..." if os.path.exists("strucfac.txt") : m3dsetsf="--setsf strucfac.txt" else: m3dsetsf="" for s in models: final3d="{path}/threed_{it:02d}_{n}.hdf".format(path=options.newpath,n=s, it=it) run("e2refine_postprocess.py --even {even3d} --odd {odd3d} --output {final3d} --automaskexpand {amaskxp} --align --mass {mass} --iter 0 {amask3d} {amask3d2} {m3dpostproc} {setsf} --sym={sym} --restarget={restarget} --underfilter".format(even3d=output_3d[-1]["even"][s], odd3d=output_3d[-1]["odd"][s], final3d=final3d, mass=db["mass"], amask3d=db["automask3d"], sym=sym, amask3d2=db["automask3d2"], m3dpostproc=db["m3dpostprocess"], setsf=m3dsetsf,restarget=db["targetres"], amaskxp=db.setdefault("automaskexpand","0"))) ### copy the fsc files.. fscs=["fsc_unmasked_{:02d}.txt".format(it),"fsc_masked_{:02d}.txt".format(it),"fsc_maskedtight_{:02d}.txt".format(it)] for fsc in fscs: fm=os.path.join(options.newpath,fsc) fmnew=os.path.join(options.newpath,fsc[:-4]+"_model_{:02d}.txt".format(s)) try: copyfile(fm,fmnew) os.remove(fm) except: pass if it==options.iter-1: ### make lists tmpcls=["tmpcls_even.lst","tmpcls_odd.lst"] tmpcls_m=[l.replace('.','_m1.') for l in tmpcls] run("e2classextract.py {clsfile} --refinemulti --setname {tmpcls}".format(clsfile=output_cls[-1]["even"][s],tmpcls=tmpcls[0])) run("e2classextract.py {clsfile} --refinemulti --setname {tmpcls}".format(clsfile=output_cls[-1]["odd"][s],tmpcls=tmpcls[1])) lstout="sets/{}_{}.lst".format(options.newpath,s) run("e2proclst.py {lst1} {lst2} --mergesort {lstout}".format(lst1=tmpcls_m[0], lst2=tmpcls_m[1], lstout=lstout)) for l in tmpcls_m: try: os.remove(l) except: pass E2end(logid)
if cal: t1 = time.time() p = Pool(processes=process) result = [] paras = read_para(input_) print(paras) kwds = [dict(paras=para, code=code, output=output, start_time=start_time, end_time=end_time, symbol=symbol, time_frame=time_frame, log=log, ) for para in paras] task = p.map_async(run_backest, kwds) # for para in paras: # result.append( # p.apply_async(func=run_backest, # kwds=dict(paras=para, # code=code, # output=output, # start_time=start_time, # end_time=end_time, # ) # ) # # ) t2 = time.time() print('进程池创建完毕,大小:<%s>,总耗时:%s seconds' % (process, t2 - t1)) # prints "[42, None, 'hello']"
# Reference D04 Clipped Fishnet Output d04_output = os.path.join(derived_dir, D04, 'FISHNET', D04_FINAL) # Reference LASD target_lasd = os.path.join(derived_dir, project_id + '.lasd') # Reference Data Domain For Filtering Fishnet data_domain = os.path.join(derived_dir, D01, 'RESULTS', D01_DATA_DOMAIN) # Create Filtered Fishnet & Return Extent For Processing extent_dict = filter_fishnet(data_domain, base_dir, d04_output) # Create Path for Output Rasters raster_path = os.path.join(base_dir, 'RASTER') os.mkdir(raster_path) # Use Multiprocessing Pool for Raster Generation pool = Pool(processes=cpu_count() - 2) result = pool.map_async( partial(generate_raster, target_lasd, raster_path), extent_dict.items()) pool.close() pool.join() except Exception as e: print('Exception', e) finally: print('Program Ran: {0}'.format(time.time() - start))
############################################################################################################### store = pd.HDFStore("/mnt/DataGuillaume/corr_pop_no_hd/" + session) store.put('rip_corr', rip_corr) store.put('allrip_corr', allrip_corr) store.put('wak_corr', wak_corr) store.put('allwak_corr', allwak_corr) store.put('rem_corr', rem_corr) store.put('allrem_corr', allrem_corr) store.close() print(time.clock() - start_time, "seconds") return time.clock() - start_time dview = Pool(8) a = dview.map_async(compute_population_correlation, sessions).get() # a = compute_population_correlation(datasets[0]) # ############################################################################################################### # # PLOT # ############################################################################################################### # last = np.max([np.max(allrip_corr[:,0]),np.max(alltheta_corr[:,0])]) # bins = np.arange(0.0, last, 0.2) # # average rip corr # index_rip = np.digitize(allrip_corr[:,0], bins) # mean_ripcorr = np.array([np.mean(allrip_corr[index_rip == i,1]) for i in np.unique(index_rip)[0:30]]) # # average theta corr # index_theta = np.digitize(alltheta_corr[:,0], bins) # mean_thetacorr = np.array([np.mean(alltheta_corr[index_theta == i,1]) for i in np.unique(index_theta)[0:30]]) # xt = list(bins[0:30][::-1]*-1.0)+list(bins[0:30])
def compress_zso(fname_in, fname_out, level): fin, fout = open_input_output(fname_in, fname_out) fin.seek(0, os.SEEK_END) total_bytes = fin.tell() fin.seek(0) magic, header_size, block_size, ver, align = ZISO_MAGIC, 0x18, 0x800, 1, DEFAULT_ALIGN # We have to use alignment on any ZSO files which > 2GB, for MSB bit of index as the plain indicator # If we don't then the index can be larger than 2GB, which its plain indicator was improperly set align = total_bytes // 2**31 header = generate_zso_header(magic, header_size, total_bytes, block_size, ver, align) fout.write(header) total_block = total_bytes // block_size index_buf = [0 for i in range(total_block + 1)] fout.write(b"\x00\x00\x00\x00" * len(index_buf)) show_comp_info(fname_in, fname_out, total_bytes, block_size, align, level) write_pos = fout.tell() percent_period = total_block / 100 percent_cnt = 0 if MP: pool = Pool() block = 0 while block < total_block: if MP: percent_cnt += min(total_block - block, MP_NR) else: percent_cnt += 1 if percent_cnt >= percent_period and percent_period != 0: percent_cnt = 0 if block == 0: print("compress %3d%% avarage rate %3d%%\r" % (block / percent_period, 0), file=sys.stderr, end='\r') else: print("compress %3d%% avarage rate %3d%%\r" % (block / percent_period, 100 * write_pos / (block * 0x800)), file=sys.stderr, end='\r') if MP: iso_data = [(fin.read(block_size), level) for i in range(min(total_block - block, MP_NR))] zso_data_all = pool.map_async(lz4_compress_mp, iso_data).get(9999999) for i, zso_data in enumerate(zso_data_all): write_pos = set_align(fout, write_pos, align) index_buf[block] = write_pos >> align if 100 * len(zso_data) / len(iso_data[i][0]) >= min( COMPRESS_THREHOLD, 100): zso_data = iso_data[i][0] index_buf[block] |= 0x80000000 # Mark as plain elif index_buf[block] & 0x80000000: print( "Align error, you have to increase align by 1 or OPL won't be able to read offset above 2 ** 31 bytes" ) sys.exit(1) fout.write(zso_data) write_pos += len(zso_data) block += 1 else: iso_data = fin.read(block_size) try: zso_data = lz4_compress(iso_data, level) except Exception as e: print("%d block: %s" % (block, e)) sys.exit(-1) write_pos = set_align(fout, write_pos, align) index_buf[block] = write_pos >> align if 100 * len(zso_data) / len(iso_data) >= COMPRESS_THREHOLD: zso_data = iso_data index_buf[block] |= 0x80000000 # Mark as plain elif index_buf[block] & 0x80000000: print( "Align error, you have to increase align by 1 or CFW won't be able to read offset above 2 ** 31 bytes" ) sys.exit(1) fout.write(zso_data) write_pos += len(zso_data) block += 1 # Last position (total size) index_buf[block] = write_pos >> align # Update index block fout.seek(len(header)) for i in index_buf: idx = pack('I', i) fout.write(idx) print("ziso compress completed , total size = %8d bytes , rate %d%%" % (write_pos, (write_pos * 100 / total_bytes))) fin.close() fout.close()
def download(cls, url): form = 'https://www.x23qb.com{}' html = get(url).content soup = BS(html, 'html.parser') title = soup.find("div", {"class": 'd_title'}).text title = title.strip().split()[0] print(title) li = soup.find(id='chapterList').findAll('li') a = [i.find('a') for i in li] chapters = [i.text.split(' ', 1) for i in a] j = 0 before = '' for i in chapters: if i[0] != before: before = i[0] j += 1 i[0] = itoa(j) + i[0] novel = {} now = '' j = 1 for i in range(len(a)): href = form.format(a[i].get('href')) volume, chapter = chapters[i] if volume != now: now = volume if now in novel: chapter = chapter.split(' ', 1) if len(chapter) == 1: pass else: volume += chapter[0] chapter = chapter[1] novel[volume] = novel.get(volume, []) + [(chapter, href)] path = f'./download/{title}/' if not os.path.isdir(path): os.mkdir(path) for volume, a in novel.items(): pool = Pool(20) content = pool.map_async(cls.get_content, [i[1].rstrip('.html') for i in a]) pool.close() pool.join() content.wait() content = content.get() out = '' for i in range(len(a)): chapter = a[i][0] print(chapter) out += f'\n第{i+1}章 {chapter}\n' out += '\n\n'.join(content[i]) out += '\n' with open(path + f'{volume}.txt', 'w') as f: f.write(out)
def main(): """ Runner """ args = parse_arguments() pool = Pool(args.processes) if args.tags: cmd = 'behave -d --no-junit --f json --no-summary --no-skipped -t {}'.format( ' -t '.join(args.tags)) else: cmd = 'behave -d --no-junit --f json --no-summary --no-skipped' p = Popen(cmd, stdout=PIPE, shell=True) out, err = p.communicate() try: j = json.loads(out.decode()) except ValueError: j = [] # Identify all the feature files that have the tags features = [e['location'][:-2] for e in j] features_scenarios = [] for scenario_elements in j: # Check if a feature file is reported with no matching scenario, if so skip it if 'elements' in scenario_elements: for i in scenario_elements['elements']: if i['keyword'].upper() in [ "scenario".upper(), "scenario outline".upper() ]: # Build a list of filepaths for valid scenarios features_scenarios.append( scenario_elements['location'][:-2] + delimiter + i['name']) logger.info("Found {} features".format(len(features))) logger.info("Found {} scenarios".format(len(features_scenarios))) if args.processes > len(features): logger.info( "You have defined {} and will execute only necessary {} parallel process " .format(args.processes, len(features))) else: logger.info("Will execute {} parallel process".format(args.processes)) run_feature = partial(_run_feature, tags=args.tags, userdata=args.define) logger.info( "--------------------------------------------------------------------------" ) output = 0 failed_tests = passed_tests = [] # https://stackoverflow.com/questions/1408356/keyboard-interrupts-with-pythons-multiprocessing-p for feature, scenario, status in pool.map_async( run_feature, features_scenarios).get(9999): if status == 'PASSED': passed_tests.append((feature, scenario, status)) else: failed_tests.append((feature, scenario, status)) # failed_tests_including_rerun = list(set(failed_tests) - set(passed_tests)) # if output == 0: # failed_tests.append((feature, scenario, status)) # if status == "FAILED": # output = 1 # else: # output = 2 logger.info( "--------------------------------------------------------------------------" ) end_time = datetime.now() if failed_tests_including_rerun: output = 1 for failed_test in failed_tests_including_rerun: logger.info("{0:50}: {1} --> {2}".format(failed_test[0], failed_test[1], failed_test[2])) # if failed_tests: # for failed_test in failed_tests: # logger.info( # "{0:50}: {1} --> {2}".format(failed_test[0], failed_test[1], failed_test[2])) logger.info("Duration: {}".format(format(end_time - start_time))) logger.info("Test Status: {0}".format(str(output))) sys.exit(output)
def main(): global opts try: # TODO: load fofn with open(opts.fofn[0], 'r') as f: bamlist = [x.rstrip() for x in f] N = len(bamlist) M = opts.cores bamlist = np.array_split(bamlist, M) if opts.verbose: logging.info('%d bam files in total, and split to %d parts' % (N, M)) # TODO: build temporary directory tmp_dir = tempfile.mkdtemp() if opts.verbose: logging.info('work in temporary dir %s' % tmp_dir) # TODO: run samtools depth on local bamlist params = [] for i, local_bamlist in enumerate(bamlist): params.extend([(tmp_dir, i, list(local_bamlist))]) pool = Pool(opts.cores) bedlist = [] r = pool.map_async(batch_depth, params, callback=bedlist.extend) r.wait() pool.close() pool.join() # TODO: run bedtools on bedlist if opts.verbose: logging.info('merge the results') # TODO: join local results if opts.verbose: Z = 0 for b in bedlist: Z += int(os.path.getsize(b)) logging.info('join {} depth-files in total size {} Mb'.format( M, round(Z / 1024. / 1024., 5))) bed = os.path.join(tmp_dir, 'all.bed') if len(bedlist) == 1: subprocess.call(['mv', bedlist[0], bed]) else: p = pipes.Template() p.append(r'join -t $%s\t%s -e 0 -a 1 -a 2 -j 1 -o 0,1.2,2.2 %s %s 2>/dev/null'%\ ("'","'",bedlist[0],bedlist[1]),\ '--') for i in xrange(2, len(bedlist)): p.append(r'join -t $%s\t%s -e 0 -a 1 -a 2 -j 1 -o 0,%s,2.2 - %s 2>/dev/null'%\ ("'","'",','.join(['1.%d' % (x+2) for x in xrange(i)]),bedlist[i]),\ '--') f = p.open(bed, 'w') f.close() # TODO: split into parts wc = subprocess.Popen(['wc', '-l', str(bed)], stdout=subprocess.PIPE) wc_res, wc_err = wc.communicate() LN = int(wc_res.strip().split()[0]) if opts.verbose: logging.info('split %d nucleotide positions into %d parts of each with %d positions' %\ (LN,opts.cores,int(np.ceil(LN/float(opts.cores))))) subprocess.call(['split','-a','3','-d','-l',str(int(np.ceil(LN/float(opts.cores)))),str(bed),\ str(os.path.join(tmp_dir,'all_bed_split'))]) bedsplit = [ os.path.join(tmp_dir, 'all_bed_split%03d' % x) for x in xrange(opts.cores) ] params = [] for i, bs in enumerate(bedsplit): params.extend([(tmp_dir, i, M, bs)]) pool2 = Pool(opts.cores) bedsplit = [] r2 = pool2.map_async(nucleotide_depth, params, callback=bedsplit.extend) r2.wait() pool2.close() pool2.join() # TODO: report if opts.verbose: logging.info('merge %d parts, then sort' % opts.cores) p = pipes.Template() p.append('cat %s' % " ".join([str(x) for x in bedsplit]), '--') p.append('sort -k1,1n -k2,2n', '--') f = p.open(bed, 'w') f.close() proc_sum_bed = subprocess.Popen([ 'bedtools', 'merge', '-c', '4', '-o', 'mean', '-d', '10', '-i', str(bed) ]) proc_sum_bed.wait() if opts.verbose: logging.info('rm %s' % tmp_dir) #shutil.rmtree(tmp_dir) if opts.verbose: logging.info('complete') except Exception as e: logging.exception('rm %s' % tmp_dir) shutil.rmtree(tmp_dir) raise e
n_workers = int(raw_input("How many workers should I use? ")) print n_workers #-------------------------------------------------------------------------------------- # Make a pool to execute the commands #-------------------------------------------------------------------------------------- pool = Pool(n_workers) #-------------------------------------------------------------------------------------- # Try to get the pool of commands to run. # If the user wants to quit, save the remaining copies to a file. #-------------------------------------------------------------------------------------- try: pool.map_async(printAndExecute, command_list).get(99999999) except KeyboardInterrupt: #-------------------------------------------------------------------------------------- # First, terminate the pool #-------------------------------------------------------------------------------------- pool.terminate() #-------------------------------------------------------------------------------------- # Now write the remaining commands to a .txt file #-------------------------------------------------------------------------------------- print "\n" print "\n"
POST autograder results (in parallel, load testing) """ def post_grade(data): h = {'content-type': 'application/json'} addr = 'http://localhost:%d' % (server.server_address[1]) res = requests.post(addr, data=data, headers=h) if not res.ok: logger.error('Failed request! Status code: {0}\n{1}'.format( res.status_code, res.content)) logger.warn('Starting load test thread pool...') pool = Pool(num_threads) pool.map_async(post_grade, data_packets) """ Handle results in work queue """ time.sleep(5) logger.warn('Starting rq worker...') util.start_rq_worker(work_queue) """ Check for successful entry """ logger.warn('Checking entries...') for cq in check_queries: grade = connection.Grade.find_one(cq[0]) if not grade: logger.error('Missing grade!\n{0}'.format(jprint.pformat(cq[1])))
def fragments(bam, fragment_path, min_mapq=30, nproc=1, cellbarcode="CB", chromosomes="(?i)^chr", readname_barcode=None, cells=None, max_distance=5000, min_distance=10, chunksize=500000, shifts=[4, -5], collapse_within=False): """Create ATAC fragment file from BAM file Iterate over reads in BAM file, extract fragment coordinates and cell barcodes. Collapse sequencing duplicates. Parameters ---------- bam : str Path to BAM file fragment_path : str Path for output fragment file min_mapq : int Minimum MAPQ to retain fragment nproc : int, optional Number of processors to use. Default is 1. cellbarcode : str Tag used for cell barcode. Default is CB (used by cellranger) chromosomes : str, optional Regular expression used to match chromosome names to include in the output file. Default is "(?i)^chr" (starts with "chr", case-insensitive). If None, use all chromosomes in the BAM file. readname_barcode : str, optional Regular expression used to match cell barocde stored in read name. If None (default), use read tags instead. Use "[^:]*" to match all characters before the first colon (":"). cells : str File containing list of cell barcodes to retain. If None (default), use all cell barcodes found in the BAM file. max_distance : int, optional Maximum distance between integration sites for the fragment to be retained. Allows filtering of implausible fragments that likely result from incorrect mapping positions. Default is 5000 bp. min_distance : int, optional Minimum distance between integration sites for the fragment to be retained. Allows filtering implausible fragments that likely result from incorrect mapping positions. Default is 10 bp. chunksize : int Number of BAM entries to read through before collapsing and writing fragments to disk. Higher chunksize will use more memory but will be faster. shifts : list Fragment position shifts to apply. First element defines shift for + strand, second element defines shift for - strand. collapse_within : bool Only collapse fragments containing the same start and end coordinate within the same cell barcode. Setting to True will only collapse duplicates if the cell barcode is the same (allows same fragment coordinates with different cell barcode) """ nproc = int(nproc) chrom = utils.get_chromosomes(bam, keep_contigs=chromosomes) cells = utils.read_cells(cells) p = Pool(nproc) frag_lists = [ p.map_async( functools.partial(getFragments, bam=bam, min_mapq=int(min_mapq), cellbarcode=cellbarcode, readname_barcode=readname_barcode, cells=cells, max_distance=max_distance, min_distance=min_distance, chunksize=chunksize, shifts=shifts, collapse_within=collapse_within), list(chrom.items()), ) ] filenames = [res.get() for res in frag_lists] # cat files and write to output with open(fragment_path, "w") as outfile: for i in filenames: for j in i: with open(j, "r") as infile: for line in infile: outfile.write(line) os.remove(j)
############################################################################################################### store = pd.HDFStore("/mnt/DataGuillaume/corr_pop/"+session.split("/")[1]+".h5") store.put('rip_corr', rip_corr) store.put('allrip_corr', allrip_corr) store.put('wak_corr', wak_corr) store.put('allwak_corr', allwak_corr) store.put('rem_corr', rem_corr) store.put('allrem_corr', allrem_corr) store.close() print(time.clock() - start_time, "seconds") return time.clock() - start_time dview = Pool(8) a = dview.map_async(compute_population_correlation, datasets) # a = compute_population_correlation(datasets[0]) # ############################################################################################################### # # PLOT # ############################################################################################################### # last = np.max([np.max(allrip_corr[:,0]),np.max(alltheta_corr[:,0])]) # bins = np.arange(0.0, last, 0.2) # # average rip corr # index_rip = np.digitize(allrip_corr[:,0], bins) # mean_ripcorr = np.array([np.mean(allrip_corr[index_rip == i,1]) for i in np.unique(index_rip)[0:30]]) # # average theta corr # index_theta = np.digitize(alltheta_corr[:,0], bins) # mean_thetacorr = np.array([np.mean(alltheta_corr[index_theta == i,1]) for i in np.unique(index_theta)[0:30]])