def compress_stuff(self): if not self.is_compressinating: self.is_compressinating = True pool = ThreadPool(8) to_process = self.root.ids.unprocessed.text parsed_list = [] for line in to_process.splitlines(): if not line[0] == '#': parsed = re.sub(r'[\n\/]+$', '', line) parsed = re.sub(r'#.*$', '', parsed) if parsed: if re.match(url_regex, parsed) is not None: parsed_list.append(parsed) else: printlog( 'It looks like you entered a malformed URL. I am gonna ignore it ' + parsed) self.compressed_so_far = 0 self.total_to_compress = len(parsed_list) self.show_popup() self.clock_interval = Clock.schedule_interval( self.update_status, 1 / 15) try: pool.starmap_async(process_url, zip(parsed_list, itertools.repeat(updatedConfig)), callback=self.sites_compressed) except Exception as err: printlog('URL processing error: ' + repr(err))
class ParallelSim(object): """ 多进程map类 pl = ParallelSim() pl.add(yourFunc, yourIter) pl.get_results() data = list(pl.get_results()) """ def __init__(self, processes=cpu_count()): self.pool = Pool(processes=processes) self.total_processes = 0 self.completed_processes = 0 self.results = [] self.data = None self.cores = processes # cpu核心数量 def add(self, func, iter): if isinstance(iter, list) and self.cores > 1: for i in range(self.cores): pLen = int(len(iter) / self.cores) + 1 self.data = self.pool.starmap_async( func, iter[int(i * pLen):int((i + 1) * pLen)], callback=self.complete) self.total_processes += 1 else: self.data = self.pool.starmap_async(func=func, iterable=iter, callback=self.complete) self.total_processes += 1 self.data.get() def complete(self, result): self.results.extend(result) self.completed_processes += 1 print('Progress: {:.2f}%'.format( (self.completed_processes / self.total_processes) * 100)) def run(self): self.pool.close() self.pool.join() def get_results(self): return self.results
def CheckDirs(dir, clientname): global early_stop assert (os.path.isdir(dir)) verified = set() callsites = [ os.path.abspath(os.path.join(dir, o)) for o in os.listdir(dir) if os.path.isdir(os.path.join(dir, o)) and o.startswith("callsite_") ] innput_array = [] for callsite in callsites: innput_array.append([callsite, verified, clientname]) pool = ThreadPool(int(cpu_count() / 2)) results = pool.starmap_async(CheckCallSite, innput_array) while (not results.ready() and not early_stop): continue if early_stop: return False else: return True
pwd = sys.argv[2].encode('gbk') elif sys.argv[2] == "rm": if len(sys.argv) == 4: pwd = sys.argv[3].encode('gbk') return pwd def rm_file(files): """删除源文件""" if len(sys.argv) > 2: if sys.argv[2] == "rm": for file in files: os.remove(file) if __name__ == '__main__': path = sys.argv[1] pwd = get_pwd() # with open(r'UnRAR.exe','rb') as f: # with open(os.path.join(path,'UnRAR.exe'),'wb') as other: # other.write(f.read()) file_names = point_file_name(path) while file_names: pool = Pool() pool.starmap_async(un_rar, zip(file_names, [pwd] * len(file_names))) pool.close() pool.join() rm_file(file_names) for name in file_names: file_names = point_file_name(name.split(".")[0])
class ImageHandler: dir_postfix = 'imgc' dir_files = '_files-imgc' imgs_done = 0 imgs_total = 0 def __init__(self, queue=None, **kwargs): self.__dict__.update(kwargs) self.images = [] # list of (src, dst) tuples # threading.Thread.__init__(self) # self.queue = queue # self._stop = threading.Event() self.finished = False self.generate_image_paths() def generate_image_paths(self): for arg in self.src_images: # process directory path if os.path.isdir(arg): src = os.path.abspath(arg) dst = os.path.abspath("{}-{}".format(src, self.dir_postfix)) if not os.path.exists(dst): os.makedirs(dst) # target path must always exist for root, dirs, files in os.walk(src): for d in dirs: src_dir = os.path.join(root, d) dst_dir = src_dir.replace(src, dst) for f in files: if extension(f) not in IMAGE_EXTS: continue # if os.path.splitext(f)[1][1:].strip().lower() not in IMAGE_EXTS: continue src_file = os.path.join(root, f) dst_file = src_file.replace(src, dst) parent_dir = os.path.dirname(dst_file) print(parent_dir) if not os.path.exists(parent_dir): os.makedirs(parent_dir) print(dst_file) self.append((src_file, dst_file)) # process file path elif os.path.isfile(arg): src = os.path.abspath(arg) dst_dir = os.path.join(os.path.dirname(src), self.dir_files) if not os.path.exists(dst_dir): os.makedirs(dst_dir) dst = os.path.join(dst_dir, os.path.basename(arg)) self.append((src, dst)) # print (self.images) self.imgs_total = len(self.images) # def stop(self): # self._stop.set() # def stopped(self): # return self._stop.isSet() def terminate_pool(self): if not self.finished: self.pool.close() self.pool.terminate() return self.finished def on_finish(self, x): print("{}: finished successfully!".format(self.__class__.__name__)) self.finished = True self.error = False def on_error(self, x): raise x # print("{}: error - {}".format(self.__class__.__name__, str(x))) # self.finished = True # self.error = True def run(self): if not self.images: print("No images found!") sys.exit() # print("Images found: {}".format(self.imgs_total)) print("Images found: {}".format(len(self.images))) # print(self.images) self.pool = ThreadPool(self.workers) time_start = time.time() # self.pool.starmap(self.resize_image, self.images) self.pool.starmap_async( self.resize_image, self.images, 1, self.on_finish, self.on_error) # pool.close() # pool.join() # time_end = time.time() # # window = tkinter.Tk() # # window.wm_withdraw() # # tkinter.messagebox.showinfo('imgc - work finished', 'Images compressed: {}'.format(self.imgs_done)) # print("Time elapsed: {}".format(time_end - time_start)) def append(self, path_tuple): self.images.append(path_tuple) def print_status(self, dst): print("[{}/{}] processed image {}".format( self.imgs_done, self.imgs_total, os.path.split(dst)[1])) # FIXME: add watermark feature # def watermark(): # try: # watermark = Image.open(self.wmfile) # # if extension(self.wmfile) in "png": # # watermark.load() # wm_oldsize = watermark.size # wm_newdim = min(wnew, hnew) * 0.2 # wm_oldindex, wm_olddim = min(enumerate(wm_oldsize)) # print(wm_oldindex, wm_olddim) # wm_ratio = wm_newdim/wm_olddim # wm_newsize = (int(wm_oldsize[0] * wm_ratio), # int(wm_oldsize[1] * wm_ratio)) # print(wm_newsize) # watermark = watermark.resize(wm_newsize) # # mask = watermark.convert("L").point(lambda x: min(x, 50)) # # .point(lambda x: 240) # # mask.show() # # watermark.putalpha(mask) # im.paste(watermark, (0, 0), watermark) # except AttributeError: # pass def resize_image(self, src, dst): print("resizing") pattern = self.size quality = self.quality im = Image.open(src) new_size = ImageSize.parse(pattern, image=im) print('new_size:', new_size) im = im.resize(new_size, Image.BICUBIC) if os.path.splitext(dst)[1][1:].strip().lower() not in IMAGE_JPG: im.save(dst) print("saving non-jpeg %s" % dst) else: # quality supported by jpegs only im.save(dst, 'JPEG', quality=quality) print("saving jpeg %s" % dst ) self.imgs_done += 1 # self.print_status(dst) print ("processed") try: self.on_image_processed(self.imgs_done, self.imgs_total) print ("method called") except AttributeError: self.print_status(dst)
class ThreadPool: # multiprocessing.dummy.Pool with exc_info in error_callback def __init__(self,name=None,processes=None): self._processes=processes self._pool=NamedPool(self._processes,name=name) self._lock=Lock() # lock for self self._cblock=Lock() # lock for callback self._errcblock=Lock() # lock for error_callback self._closed=False self.name=name def apply(self,*args,**kwargs): return self._pool.apply(*args,**kwargs) def map(self,*args,**kwargs): return self._pool.map(*args,**kwargs) def map_async(self,*args,**kwargs): return self._pool.map_async(*args,**kwargs) def imap(self,*args,**kwargs): return self._pool.imap(*args,**kwargs) def imap_unordered(self,*args,**kwargs): return self._pool.imap_unordered(*args,**kwargs) def starmap(self,*args,**kwargs): return self._pool.starmap(*args,**kwargs) def starmap_async(self,*args,**kwargs): return self._pool.starmap_async(*args,**kwargs) def join(self): return self._pool.join() def _uiter(self,iterable): buf=[] for item in iterable: if item in buf: continue yield item buf.append(item) buf.clear() def _trycall(self,func,args=(),kwargs={},lock=None): if not callable(func): return with lock: try: return func(*args,**kwargs) except: pass def _caller(self,func,args,kwargs,callback,error_callback,exc_raise): try: result=func(*args,**kwargs) except: etype,value,tb=sys.exc_info() self._trycall(error_callback,args=(self.name,etype,value,tb), lock=self._errcblock) if exc_raise: raise etype(value) else: self._trycall(callback,args=(result,), lock=self._cblock) return result def apply_async(self,func,args=(),kwargs={}, callback=None,error_callback=None): # run error_callback with ThreadPool.name and exc_info if func failed, # callback and error_callback will *not* run in multi thread. # other arguments is same as Pool.apply_async return self._pool.apply_async( self._caller,(func,args,kwargs,None,error_callback,True), callback=callback) def cbmap(self,func,iterable,callback=None,error_callback=None): # shortcut of: # # for item in iterable: # apply_async(func,args=(item,),kwargs={}, # callback=callback,error_callback=error_callback) # # always return None for item in iterable: self.apply_async(func,args=(item,), callback=callback,error_callback=error_callback) def ucbmap(self,func,iterable,callback=None,error_callback=None): # unique version of ThreadPool.cbmap return self.cbmap(func,self._uiter(iterable),callback,error_callback) def umap(self,func,iterable,chunksize=None): # unique version of ThreadPool.map return self.map(func,self._uiter(iterable),chunksize=chunksize) def umap_async(self,func,iterable,chunksize=None, callback=None,error_callback=None): # unique version of ThreadPool.map_async return self.map_async( func,self._uiter(iterable),chunksize, callback,error_callback) def uimap(self,func,iterable,chunksize=None): # unique version of ThreadPool.imap return self.imap(func,self._uiter(iterable),chunksize) def uimap_unordered(self,func,iterable,chunksize=None): # unique version of ThreadPool.imap_unordered return self.imap_unordered(func,self._uiter(iterable),chunksize) def ustarmap(self,func,iterable,chunksize=None): # unique version of ThreadPool.starmap return self.starmap(func,self._uiter(iterable),chunksize) def ustarmap_async(self,func,iterable,chunksize=None, callback=None,error_callback=None): # unique version of ThreadPool.starmap_async return self.starmap_async( func,self._uiter(iterable),chunksize, callback,error_callback) def close(self): # same as Pool.close self._closed=True return self._pool.close() def terminate(self): # same as Pool.terminate self._closed=True return self._pool.terminate() def renew(self): # terminate all process and start a new clean pool with self._lock: self.terminate() self._pool=Pool(self._processes) self._closed=False @property def closed(self): # True if ThreadPool closed return self._closed def __enter__(self): return self def __exit__(self,etype,value,tb): self.terminate()
timeNow = int(str(item['time'])[0:-3]) timeLocal = time.localtime(timeNow) comments['时间'] = time.strftime("%Y-%m-%d %H:%M:%S", timeLocal) write_to_csv(comments) print(item['content']) except Exception as error: print(error) else: print(req.status_code) if __name__ == '__main__': _id = sys.argv[1] create_csv() pool = Pool(20) count = 0 url = "http://music.163.com/weapi/v1/resource/comments/R_SO_4_{}/?csrf_token=" musicURL = url.format(_id) total = fetchNum(musicURL, _id) nums = int(total / 100) + 2 args = [] for num in range(1, nums): args.append((musicURL, _id, str((num - 1) * 100))) pool.starmap_async(fetch, args) pool.close() pool.join() print(count) end = time.time() print(end - start)
def extractAllAndCompare(self): self.customScoreLabel.hide() names = [ "times of india", "the hindu", "guardian", "new york times", "google news", "cnn", "reddit news", "reddit world news", "telegraph", "bbc" ] today = str(datetime.date.today()) directory = "./data/allFiles/" + today if not os.path.exists(directory): os.makedirs(directory) storageFile = directory + "/allValueFiles.txt" if not os.path.exists(storageFile): e = multiprocessing.Event( ) # Passing it since argument is required, nothing to sync queue = multiprocessing.Queue( ) # To get score file from threaded process pool = ThreadPool(4) results = pool.starmap_async(extractorRunner.runScrapper, zip(names, repeat(e), repeat(queue)), chunksize=1) while not results.ready(): self.extractingAllLabel.setText( _fromUtf8( "<html><head/><body><p align=\"center\"><span style=\" font-size:14pt;font-family:'Lucida Calligraphy';\ font-weight:600; color:black;\">\ Extracting And Analyzing All Sources: " + str(10 - results._number_left) + "/10<u></u></span></p></body></html>")) self.extractingAllLabel.show() QApplication.processEvents() pool.close() pool.join() # Wait for all threads to return outputfiles = "" for i in range(10): outputfiles += " " + queue.get() with open(storageFile, "w") as temp: temp.write(outputfiles) self.extractingAllLabel.hide() QApplication.processEvents() # Show comparision graph outputProcess = subprocess.Popen("python -m ui.comparingAll " + storageFile) # Check if best source file exists today = str(datetime.date.today()) directory = "./data/BestSource/" + today if not os.path.exists(directory): os.makedirs(directory) best_source_file = directory + "/source.txt" while not os.path.exists(best_source_file): time.sleep(0.5) with open("./data/done.txt", "w") as file: # For server service file.write("Done") outputProcess.wait() QApplication.processEvents()
self.path = file_name.split('.')[0] self.lock = RLock() self.pwd = pwd self.zip = zipfile.ZipFile(self.file_name) self.zip.setpassword(pwd=self.pwd) self.files = self.zip.namelist() # def get_file(self): # for self.file in self.files: # yield self.file def un_zip(self, file): # with self.lock: self.zip.extract(file, self.path) def extr_all(self): self.zip.extractall(path=self.path, pwd=self.pwd) if __name__ == "__main__": file_name = r"/Users/lihailong/Desktop/未命名文件夹/用户画像.zip" orpwd = "cnd2018知识库" pwd = orpwd.encode("gbk") unzip = Zip(file_name, pwd) po = Pool(10) files = unzip.files # unzip.extr_all() po.starmap_async(unzip.un_zip, zip(files)) po.close() po.join()
class ImageHandler: dir_postfix = 'imgc' dir_files = '_files-imgc' imgs_done = 0 imgs_total = 0 def __init__(self, queue=None, **kwargs): self.__dict__.update(kwargs) self.images = [] # list of (src, dst) tuples # threading.Thread.__init__(self) # self.queue = queue # self._stop = threading.Event() self.finished = False self.generate_image_paths() def generate_image_paths(self): for arg in self.src_images: # process directory path if os.path.isdir(arg): src = os.path.abspath(arg) dst = os.path.abspath("{}-{}".format(src, self.dir_postfix)) if not os.path.exists(dst): os.makedirs(dst) # target path must always exist for root, dirs, files in os.walk(src): for d in dirs: src_dir = os.path.join(root, d) dst_dir = src_dir.replace(src, dst) for f in files: if extension(f) not in IMAGE_EXTS: continue # if os.path.splitext(f)[1][1:].strip().lower() not in IMAGE_EXTS: continue src_file = os.path.join(root, f) dst_file = src_file.replace(src, dst) parent_dir = os.path.dirname(dst_file) print(parent_dir) if not os.path.exists(parent_dir): os.makedirs(parent_dir) print(dst_file) self.append((src_file, dst_file)) # process file path elif os.path.isfile(arg): src = os.path.abspath(arg) dst_dir = os.path.join(os.path.dirname(src), self.dir_files) if not os.path.exists(dst_dir): os.makedirs(dst_dir) dst = os.path.join(dst_dir, os.path.basename(arg)) self.append((src, dst)) # print (self.images) self.imgs_total = len(self.images) # def stop(self): # self._stop.set() # def stopped(self): # return self._stop.isSet() def terminate_pool(self): if not self.finished: self.pool.close() self.pool.terminate() return self.finished def on_finish(self, x): print("{}: finished successfully!".format(self.__class__.__name__)) self.finished = True self.error = False def on_error(self, x): raise x # print("{}: error - {}".format(self.__class__.__name__, str(x))) # self.finished = True # self.error = True def run(self): if not self.images: print("No images found!") sys.exit() # print("Images found: {}".format(self.imgs_total)) print("Images found: {}".format(len(self.images))) # print(self.images) self.pool = ThreadPool(self.workers) time_start = time.time() # self.pool.starmap(self.resize_image, self.images) self.pool.starmap_async(self.resize_image, self.images, 1, self.on_finish, self.on_error) # pool.close() # pool.join() # time_end = time.time() # # window = tkinter.Tk() # # window.wm_withdraw() # # tkinter.messagebox.showinfo('imgc - work finished', 'Images compressed: {}'.format(self.imgs_done)) # print("Time elapsed: {}".format(time_end - time_start)) def append(self, path_tuple): self.images.append(path_tuple) def print_status(self, dst): print("[{}/{}] processed image {}".format(self.imgs_done, self.imgs_total, os.path.split(dst)[1])) # FIXME: add watermark feature # def watermark(): # try: # watermark = Image.open(self.wmfile) # # if extension(self.wmfile) in "png": # # watermark.load() # wm_oldsize = watermark.size # wm_newdim = min(wnew, hnew) * 0.2 # wm_oldindex, wm_olddim = min(enumerate(wm_oldsize)) # print(wm_oldindex, wm_olddim) # wm_ratio = wm_newdim/wm_olddim # wm_newsize = (int(wm_oldsize[0] * wm_ratio), # int(wm_oldsize[1] * wm_ratio)) # print(wm_newsize) # watermark = watermark.resize(wm_newsize) # # mask = watermark.convert("L").point(lambda x: min(x, 50)) # # .point(lambda x: 240) # # mask.show() # # watermark.putalpha(mask) # im.paste(watermark, (0, 0), watermark) # except AttributeError: # pass def resize_image(self, src, dst): # print("resizing") pattern = self.size quality = self.quality try: im = Image.open(src) new_size = ImageSize.parse(pattern, image=im) # print('new_size:', new_size) im = im.resize(new_size, Image.BICUBIC) if os.path.splitext(dst)[1][1:].strip().lower() not in IMAGE_JPG: im.save(dst) else: # quality supported by jpegs only im.save(dst, 'JPEG', quality=quality) print("saved: %s" % dst) self.imgs_done += 1 except OSError as err: # e.g. file is corrupt and cannot be open print('ERROR') print(err) try: self.on_image_processed(self.imgs_done, self.imgs_total) print("method called") except AttributeError: self.print_status(dst)