def compute_coherent_state_multi(n,m,n0,q,c,t_final,t_step,a_range,mag_range): """for a coherent state, we will asume that the magnetization and seed are whats conserved, NOT n0 the probability is the multiplication of the probability for m and n""" #compute all the elements that need to be computed n_ele = np.arange(n-a_range, n + a_range+1,1) m_ele = np.arange(m-mag_range, m + mag_range+1,1) nm = [[nn,mm] for nn in n_ele for mm in m_ele] pairs = n-n0 #loop through and compute details, save to multidimensional arrays mean = np.zeros((len(nm),int(t_final/t_step))) std = np.zeros((len(nm),int(t_final/t_step))) total_prob = 0 #set up multiprocessing queue = Queue(5) procs = {} for i, state in enumerate(nm): prob = calc_prob(n,m,state[0],state[1]) total_prob += prob procs[i] = Process(target = calc_state_with_prob, args = (queue, int(state[0]),int(state[1]),int(state[0]-pairs-state[1]),q,c,t_final,t_step,prob)) procs[i].start() #get answers for i in range(len(nm)): t, mean[i], std[i] = queue.get() return t, np.sum(mean,axis=0)/total_prob, np.sum(std,axis=0)/total_prob
def start_services(services, codegen, importcode, tornadoapp, forker, boot_function, template_path="."): # loop through all services # create hosts file # update kill-file for them (processes) # create all files (websocket server, and js/python client files) host_file = {} kill_file = {} queue = Queue() for service in services: websocket_server_code = codegen( service, "websocket_server_template.tpl", loader=template.Loader(template_path) ) websocket_server_module = importcode(websocket_server_code) websocket_server_class_name = "%s_websocket" % (service["servicename"]) websocket_server_application = tornadoapp( [(r"/", getattr(websocket_server_module, websocket_server_class_name))] ) websocket_server_process = forker( 0, queue, boot_function, websocket_server_application, service["servicename"], 0, service ) print websocket_server_process print "WSS, ", queue.qsize() return queue
class Manager(Process): def __init__(self, wnum=3): Process.__init__(self) self.s2m = Queue() # message Manager receive from worker and svr self.m2w = Queue() # message send to works self.works = [0] * wnum for i in range(wnum): self.works[i] = Worker(self.s2m, self.m2w) self.works[i].start() def stop(self): for w in self.works: self.m2w.put(None) # FIXME should call worker.Terminal? """ Video Site: bilibili.com Title: 【BD‧1080P】【高分剧情】鸟人-飞鸟侠 2014【中文字幕】 Type: Flash video (video/x-flv) Size: 3410.85 MiB (3576536465 Bytes) Downloading 【BD‧1080P】【高分剧情】鸟人-飞鸟侠 2014【中文字幕】.flv ... 0.7% ( 22.2/3410.9MB) [# """ def run(self): # reset DB flags kuos = get_by_flag(WORK) for uo in kuos: set_flag(uo.mid, STOP) tuos = get_by_flag(WAIT) for uo in tuos: set_flag(uo.mid, STOP) while True: msg = self.s2m.get() # print("pid=%s, self.s2m.get=%s" % (os.getpid(), repr(msg))) who = msg.get("who") if who == "worker": self.handle_mid(msg["mid"], msg["dat"]) elif who == "svr": # self.m2w.put(msg['mid']) self.m2w.put(pick_url(msg["mid"])) elif who == "error": sys.stderr.write(msg["dat"]) # FIXME sys.stderr.write("\n") else: sys.stderr.write("Unknow msg:\n") sys.stderr.write(msg) sys.stderr.write("\n") def handle_mid(self, mid, dat): print(dat) if dat.startswith("Process "): dd = dat.split() act = dd[2].lower() print("mid=%s, act=%s" % (mid, act)) set_flag(mid, act) elif dat.startswith("Downloading "): print("mid=[%s]" % mid) update_filename(mid, dat[12:-5])
def recoverPRNGState(cookie,timeMillisEstimate,PRNGMillisEstimate,IPAddr,serverPort,numWorkers,chunkSize): global PRNGMillisDelta global initalSeek q = Queue(0) i = 0 if PRNGMillisDelta%chunkSize > 0: q.put((PRNGMillisEstimate+PRNGMillisDelta-PRNGMillisDelta%chunkSize,PRNGMillisEstimate+PRNGMillisDelta,initalSeek)) for i in range(PRNGMillisEstimate,PRNGMillisEstimate+PRNGMillisDelta-PRNGMillisDelta%chunkSize,chunkSize): q.put((i,i+chunkSize,initalSeek)) desc = [] seedValue = Value('d', 0) # Start worker processes and assign work. for i in range(numWorkers): p = Process(target=recoverPRNGStateWorker, args=(cookie,timeMillisEstimate,q,IPAddr,serverPort,seedValue)) p.start() desc.append(p) # Wait for worker processes finish. for p in desc: p.join() return long(seedValue.value)
def main(): arg = parse_args() folder = arg.fold core = arg.core output = arg.out start = arg.start if start: start = start.replace('-', '') + '000000' task_queue = Queue() result_queue = Queue() task_count = create_task(folder, task_queue, start) print task_count for i in range(core): Process(target=worker, args=(task_queue, result_queue)).start() #send stop signal for i in range(core): task_queue.put('STOP') #print result out_files = {} for i in range(task_count): actions = result_queue.get() user = actions["user"] for day in actions["actions"]: if day not in out_files: out_files[day] = open(os.path.join(output, day), "w") out_files[day].write(json.dumps({"user": user, "actions": actions["actions"][day]}) + "\n") for day in out_files: out_files[day].flush() out_files[day].close()
def processFiles(patch_dir): root = os.getcwd() glbl.data_dirs = {} if root != patch_dir: working_path = root+"/"+patch_dir else: working_path = root for path, dirs, files in os.walk(working_path): if len(dirs) == 0: glbl.data_dirs[path] = '' # Multiprocessing Section ######################################### Qids = glbl.data_dirs.keys() manager = Manager() # creates shared memory manager object results = manager.dict() # Add dictionary to manager, so it can be accessed across processes nextid = Queue() # Create Queue object to serve as shared id generator across processes for qid in Qids: nextid.put(qid) # Load the ids to be tested into the Queue for x in range(0,multiprocessing.cpu_count()): # Create one process per logical CPU p = Process(target=processData, args=(nextid,results)) # Assign process to processCBR function, passing in the Queue and shared dictionary glbl.jobs.append(p) # Add the process to a list of running processes p.start() # Start process running for j in glbl.jobs: j.join() # For each process, join them back to main, blocking on each one until finished # write out results c = 1 sets = results.keys() sets.sort() for x in sets: if results[x] != 'None': FINAL = open('result'+str(c)+'.txt','w') n = "\n************************************************************************************************\n" FINAL.write(n+"* "+x+' *\n'+n+results[x]+"\n") FINAL.close() c += 1
def main(world_folder, replacement_file_name): global replacements world = nbt.world.WorldFolder(world_folder) logger = configure_logging() logger.info("Starting processing of %s", world_folder) if not isinstance(world, nbt.world.AnvilWorldFolder): logger.error("%s is not an Anvil world" % (world_folder)) return 65 # EX_DATAERR if replacement_file_name != None: logger.info("Using Replacements file: %s", replacement_file_name) with open(replacement_file_name, 'r') as replacement_file: replacements = json.load(replacement_file) # get list of region files, going to pass this into function to process region region_files = world.get_regionfiles(); # Parallel q = Queue() lp = threading.Thread(target=logger_thread, args=[q]) lp.start() p = Pool(initializer=process_init, initargs=[q,replacements], maxtasksperchild=1) region_data = p.map(process_region, region_files) # Map has finished up, lets close the logging QUEUE q.put(None) lp.join() # Not Parallel # region_data = map(process_region, region_files) # Write output data write_block_data(region_data,"output.txt") return 0
def test(): NUMBER_OF_PROCESSES = 4 TASKS1 = [(mul, (i, 7)) for i in range(20)] TASKS2 = [(plus, (i, 8)) for i in range(10)] # Create queues task_queue = Queue() done_queue = Queue() # Submit tasks for task in TASKS1: task_queue.put(task) # Start worker processes for i in range(NUMBER_OF_PROCESSES): Process(target=worker, args=(task_queue, done_queue)).start() # Get and print results print 'Unordered results:' for i in range(len(TASKS1)): print '\t', done_queue.get() # Add more tasks using `put()` for task in TASKS2: task_queue.put(task) # Get and print some more results for i in range(len(TASKS2)): print '\t', done_queue.get() # Tell child processes to stop for i in range(NUMBER_OF_PROCESSES): task_queue.put('STOP')
def send_probe_requests(interface=None, ssid=None): # initialize shared memory results = Queue() # start sniffer before sending out probe requests p = Process(target=sniffer, args=(interface, results,)) p.start() # give sniffer a chance to initialize so that we don't miss # probe responses time.sleep(3) # send out probe requests... sniffer will catch any responses ProbeReq(ssid=ssid, interface='wlp3s0') # make sure to get results from shared memory before allowing # sniffer to join with parent process probe_responses = results.get() # join sniffer with its parent process p.join() # return results return probe_responses
def main(): # Threads we will use, don't change this because each thread calculates keys for 100 games exactly # (You can change this if you know how, I'm too euphoric now to do more flexibility) start = time(); threads = 10; for line in sys.stdin: # Parsing the stdin encryptedMessage,encryptedGames = line.strip().split(':'); encryptedGames = encryptedGames.split('~'); # Queue with keys q = Queue(); # Threads for i in range(10): p = Process(target=keysFinder, args=(encryptedGames[i*100:(i+1)*100],q)); p.start(); # Number of threads already finished finished = 0; keys = []; while finished < threads: keys += q.get(); finished+=1; # From all keys, try which one decrypts a valid message em = binascii.unhexlify(encryptedMessage); found = False; for key in keys: x = AES.new(key); dec = x.decrypt(em); if (isCorrect(dec)): found = True; # Make unpadding and print. Voila! print removePadding(dec.strip()); if (sys.argv[1] == 'benchmark'): print "Time elapsed: ",time()-start;
def genPairs(PNGMaps, compareMaps): pairA = [] pairB = [] # Maximum possible fitness totalFitness = len(compareMaps) threadsA = [] threadsB = [] # Thread safe way to get parent PNGMaps queueA = Queue() queueB = Queue() # Create a list of threads to get a PNGMap for listMap in PNGMaps: threadA = Process(target=randPair , args=(PNGMaps, compareMaps, totalFitness, queueA)) threadB = Process(target=randPair , args=(PNGMaps, compareMaps, totalFitness, queueB)) threadA.start() threadB.start() threadsA.append(threadA) threadsB.append(threadB) # Get the parents from the queues while not len(pairA) == len(PNGMaps): pairA.append(queueA.get()) while not len(pairB) == len(PNGMaps): pairB.append(queueB.get()) # Join the threads with the current one for thread in threadsA: thread.join() for thread in threadsB: thread.join() # Return the pair of PNGMaps return pairA, pairB
def test_same_report_filtering(self, fromConfig, fromOptions, getLogger): def fake_virts(logger, config): new_fake_virt = Mock() new_fake_virt.config.name = config.name return new_fake_virt fromConfig.side_effect = fake_virts options = Mock() options.interval = 0 options.oneshot = True options.print_ = False options.log_dir = '' options.log_file = '' virtwho = VirtWho(self.logger, options, config_dir="/nonexistant") queue = Queue() # Create another report with same hash report2 = HostGuestAssociationReport(self.config, self.fake_report.association) self.assertEqual(self.fake_report.hash, report2.hash) def send(report): report.state = AbstractVirtReport.STATE_FINISHED # Put second report when the first is done queue.put(report2) return True virtwho.send = Mock(side_effect=send) virtwho.queue = queue virtwho.retry_after = 1 virtwho.configManager.addConfig(self.config) queue.put(self.fake_report) virtwho.run() self.assertEquals(virtwho.send.call_count, 1)
class UpDown: def __init__(self, down_workers=2, up_workers=2, db=None): self.down_workers_num = down_workers self.up_workers_num = up_workers self.db = db self.base_url = "http://eol.jsc.nasa.gov/SearchPhotos/" self.down_workers = [] self.up_workers = [] self.to_upload = [] self.q = Queue() def down_worker(self, download_url, image_id): """ Download images and set the database after the download was complete. """ down = ImageDownload(self.base_url + download_url) down.find_urls() if(down.dl()): self.db.update_image_downloaded(image_id, down.file_name) def up_worker(self, mission_id): """ Check for images that are downloaded but not uploaded every minute. """ while True: self.to_upload = self.db.get_to_upload(mission_id) print "No files to upload found!\n" if(len(list(self.to_upload)) > 0): print "Found a file to upload!\n" self.to_upload = list(self.db.get_to_upload(mission_id)) self.q.put(self.to_upload) time.sleep(60)
def run(self): logger.info('starting horizon agent') listen_queue = Queue(maxsize=settings.MAX_QUEUE_SIZE) pid = getpid() #If we're not using oculus, don't bother writing to mini try: skip_mini = True if settings.OCULUS_HOST == '' else False except Exception: skip_mini = True # Start the workers for i in range(settings.WORKER_PROCESSES): if i == 0: Worker(listen_queue, pid, skip_mini, canary=True).start() else: Worker(listen_queue, pid, skip_mini).start() # Start the listeners Listen(settings.PICKLE_PORT, listen_queue, pid, type="pickle").start() Listen(settings.UDP_PORT, listen_queue, pid, type="udp").start() # Start the roomba Roomba(pid, skip_mini).start() # Warn the Mac users try: listen_queue.qsize() except NotImplementedError: logger.info('WARNING: Queue().qsize() not implemented on Unix platforms like Mac OS X. Queue size logging will be unavailable.') # Keep yourself occupied, sucka while 1: time.sleep(100)
def test_report_hash_added_after_send(self, fromConfig, fromOptions, getLogger): # Side effect for fromConfig def fake_virts(logger, config): new_fake_virt = Mock() new_fake_virt.config.name = config.name return new_fake_virt fromConfig.side_effect = fake_virts options = Mock() options.interval = 0 options.oneshot = True options.print_ = False options.log_file = '' options.log_dir = '' virtwho = VirtWho(self.logger, options, config_dir="/nonexistant") def send(report): report.state = AbstractVirtReport.STATE_FINISHED return True virtwho.send = Mock(side_effect=send) queue = Queue() virtwho.queue = queue virtwho.retry_after = 1 virtwho.configManager.addConfig(self.config) virtwho.configManager.addConfig(self.second_config) queue.put(self.fake_report) queue.put(self.fake_domain_list) virtwho.run() self.assertEquals(virtwho.send.call_count, 2) self.assertEqual(virtwho.last_reports_hash[self.config.name], self.fake_report.hash) self.assertEqual(virtwho.last_reports_hash[self.second_config.name], self.fake_domain_list.hash)
def run(self): logger.info('starting horizon agent') listen_queue = Queue(maxsize=settings.MAX_QUEUE_SIZE) pid = getpid() # Start the workers for i in range(settings.WORKER_PROCESSES): Worker(listen_queue, pid).start() # Start the listeners Listen(settings.PICKLE_PORT, listen_queue, pid, type="pickle").start() Listen(settings.UDP_PORT, listen_queue, pid, type="udp").start() # Start the roomba Roomba(pid).start() # Warn the Mac users try: listen_queue.qsize() except NotImplementedError: logger.info('WARNING: Queue().qsize() not implemented on Unix platforms like Mac OS X. Queue size logging will be unavailable.') # Keep yourself occupied, sucka while 1: time.sleep(100)
class BackgroundProcess(object): """A background process that reads batches and stores them in a queue. The :meth:`main` method needs to be called in order to start reading batches into the queue. Note that this process will run infinitely; start it as a :attr:`~multiprocessing.Process.daemon` to make sure it will get killed when the main process exits. Parameters ---------- data_stream : :class:`.DataStream` or :class:`Transformer` The data stream from which to read batches. max_batches : int The maximum number of batches to store in the queue. If reached, the process wil block until a batch is popped from the queue. """ def __init__(self, data_stream, max_batches): self.data_stream = data_stream self.batches = Queue(max_batches) self.run_background = True def main(self): while True: iterator = self.data_stream.get_epoch_iterator() for batch in iterator: self.batches.put(batch) self.batches.put(StopIteration) def get_next_data(self): return self.batches.get()
def main(): register_openers() printers = local_settings.PRINTERS statuses = [] threads = [] queue = Queue() for item in printers: p = multiprocessing.Process(target=clientprog, args=(queue, item)) p.start() threads.append(p) for item in threads: try: item.join() except: pass while not queue.empty(): statuses.append(queue.get()) if len(statuses) == 0: print "Failed to connect to any printer" return data = {"printers": statuses, "timestamp": time.time()} json.dump(data, open("statuses.json", "w")) send(settings.UPLOAD_DESTINATION, "statuses.json")
class TaskQueue: N = 4 symb = string.ascii_letters + string.digits def __init__(self): self.tasks = Queue() self.done = Queue() self.results = {} self.processes = [] for i in range(TaskQueue.N): self.processes.append(Process(target=self.run_tasks)) self.processes[-1].start() threading.Thread(target=self.collect_results).start() def add(self, f, args): id = ''.join(random.choice(TaskQueue.symb) for i in range(15)) self.tasks.put((id, f,args)) return id def get(self, id): return self.results.pop(id, '_NotFound_') def run_tasks(self): for id, func, args in iter(self.tasks.get, 'STOP'): result = func(*args) self.done.put((id,result)) def collect_results(self): for id, r in iter(self.done.get, 'STOP'): self.results[id] = r
def ParCalculate(systems,calc,cleanup=True,block=True,prefix="Calc_"): ''' Run calculators in parallel for all systems. Calculators are executed in isolated processes and directories. The resulting objects are returned in the list (one per input system). ''' if type(systems) != type([]) : sysl=[systems] else : sysl=systems if block : iq=Queue(len(sysl)+1) oq=Queue(len(sysl)+1) # Create workers for s in sysl: __PCalcProc(iq, oq, calc, prefix=prefix, cleanup=cleanup).start() # Put jobs into the queue for n,s in enumerate(sysl): iq.put([n,s]) # Protection against too quick insertion time.sleep(0.2) if verbose : print("Workers started:", len(sysl)) # Collect the results res=[] while len(res)<len(sysl) : n,s=oq.get() res.append([n,s]) #print("Got from oq:", n, s.get_volume(), s.get_pressure()) else : # We do not need the multiprocessing complications for non-blocking # workers. We just run all in sequence. basedir=os.getcwd() res=[] for n,s in enumerate(sysl): s.set_calculator(copy.deepcopy(calc)) s.get_calculator().block=block place=tempfile.mkdtemp(prefix=prefix, dir=basedir) os.chdir(place) s.get_calculator().working_dir=place #print("Start at :", place) if hasattr(calc, 'name') and calc.name=='Siesta': s.get_potential_energy() else: s.get_calculator().calculate(s) os.chdir(basedir) #print("Submited", s.get_calculator().calc_finished(), os.getcwd()) # Protection against too quick insertion time.sleep(0.2) res.append([n,s]) if verbose : print("Workers started:", len(sysl)) return [r for ns,s in enumerate(sysl) for nr,r in res if nr==ns]
def getFeatureMultiprocessing(subProcFunc, blwFile, outputFile, funcArgs, keyword=['Vietnamese_by_catalog', 'ppVietnamese_by_catalog']): START_TIME = time.time() # getFreqWordsForFileFromDict(['data/ppVietnamese_by_catalog/Easy/ct24/ct24 (100).txt',12.35,3, 4], 'data/TanSoTu.txt') # getDataNFeatureFromFile('test_data.txt', 'output/test_Vietnamese_output_classifier.csv', 'test') # X3 = getDataNFeatureFromFile('Difficult_data.txt', 'output/vietnamesewn_Difficult_output.csv', 3) # X1 = getDataNFeatureFromFile('Easy_data.txt','output/vietnamesewn_Easy_output.csv', 1) # X2 = getDataNFeatureFromFile('Normal_data.txt','output/vietnamesewn_Normal_output.csv', 2) _tempfile = open(blwFile, 'r') temp = _tempfile.read().splitlines() _tempfile.close() filesQueue = Queue() RESULT_QUEUE = Queue() for i in range(1, len(temp)): temp[i] = temp[i].split(',') temp[i][0] = re.sub(keyword[0], keyword[1], temp[i][0]) if not keyword[0] == '' and (not temp[i][0].find(keyword[-1]) > 0): print('[ERROR] processing ', temp[i][0]) print('sub', keyword[0], keyword[-1], re.sub(keyword[0], keyword[-1], temp[i][0])) return filesQueue.put(temp[i]) PROCESS_LOCK = Lock() myProcess = [] for processID in range(MAX_PROCESS): myProcess.append(Process(target=getDataNFeatureFromFileForAProc, args=(PROCESS_LOCK, RESULT_QUEUE, filesQueue, subProcFunc, funcArgs))) myProcess.append(Process(target=writeOutResult, args=(RESULT_QUEUE, outputFile))) for _process in myProcess: _process.start() for _process in myProcess: _process.join() print('total runtime:', time.time() - START_TIME)
class JobPool(object): """ Pool container. """ pool = None message_queue = None def __init__(self, max_instances=4): self.message_queue = Queue() self.pool = Pool(max_instances, execute_task, (self.message_queue,)) atexit.register(self.clear) def add_analysis(self, analysis): """ Add analysis to the pool. """ analysis.set_started() self.message_queue.put(analysis) def clear(self): """ Pool cleanup. """ self.pool.terminate() self.pool.join()
def test_req_all_open_orders(self): result_queue = Queue() class MockClientSocket(ClientSocket): def __init__(self): ClientSocket.__init__(self) def open_order(self, req_id, contract, order): result_queue.put(req_id) result_queue.put(contract) result_queue.put(order) def open_order_end(self): result_queue.put('open_order_end') def order_status(self, req_id, status, filled, remaining, avg_fill_price, perm_id, parent_id, last_fill_price, client_id, why_held): result_queue.put(req_id) result_queue.put(status) result_queue.put(filled) result_queue.put(remaining) result_queue.put(avg_fill_price) result_queue.put(perm_id) result_queue.put(parent_id) result_queue.put(last_fill_price) result_queue.put(client_id) result_queue.put(why_held) client = MockClientSocket() client.connect() client.req_all_open_orders() while True: result = result_queue.get() self.assertIsNotNone(result) if result == 'open_order_end': break client.disconnect()
class MultiSegmentWriter(IndexWriter): def __init__(self, index, procs=2, **writerargs): self.index = index self.lock = index.storage.lock(index.indexname + "_LOCK") self.tasks = [] self.postingqueue = Queue() #self.resultqueue = Queue() names = [index._next_segment_name() for _ in xrange(procs)] self.tasks = [SegmentWritingTask(index.storage, index.indexname, segname, writerargs, self.postingqueue) for segname in names] for task in self.tasks: task.start() def add_document(self, **args): self.postingqueue.put(args) def cancel(self): for task in self.tasks: task.cancel() self.lock.release() def commit(self): procs = len(self.tasks) for _ in xrange(procs): self.postingqueue.put(None) for task in self.tasks: print "Joining", task task.join() self.index.segments.append(task.get_segment()) self.index.commit() self.lock.release()
def f(idx, q,r): path = "data%s"%(idx) os.makedirs(path) while True: item = q.get() if( item.item_type == ITEM_QUIT ): break; count = 0 localQueue = Queue() current = item.data while True: print current fo = urlopen(current) data = fo.read() name = "%s/%s"%(path,count) fw = open( name, "w" ) count = count + 1 fw.write(data) fw.close() fo.close() p = MyHTMLParser() try: p.feed(data) except: pass for href in p.hrefs: print item.data, ": ", href try: current = localQueue.get_nowait() except: break;
def likelihood_mp_simple(seqlens,fss,uon,bon,theta,seqnum,K,ufnum,bfnum,regtype,sigma): global _gradient grad = numpy.array(fss,copy=True) # data distribuition likelihood = numpy.dot(fss,theta) que1 = Queue() # for the likihood output que2 = Queue() # for the gradient output np = 0 subprocesses = [] corenum=multiprocessing.cpu_count() #corenum=1 if corenum>1: chunk=seqnum/corenum+1 else: chunk=seqnum starti=0 while starti < (seqnum): endi=starti+chunk if endi>seqnum: endi=seqnum p = Process(target=likelihoodthread_simple, args=(seqlens[starti:endi],uon[starti:endi],bon[starti:endi],theta,K,ufnum,bfnum,que1,que2)) p.start() np+=1 #print 'delegated %s:%s to subprocess %s' % (starti, endi, np) subprocesses.append(p) starti += chunk for i in range(np): likelihood += que1.get() for i in range(np): grad += que2.get() while subprocesses: subprocesses.pop().join() grad -= regularity_deriv(theta,regtype,sigma) _gradient = grad return likelihood - regularity(theta,regtype,sigma)
class Updater(Process): def __init__(self, maxsize=15): Process.__init__(self) #self.queue = Queue(maxsize) self.queue = Queue() self.queue_lock = Lock() self._exit = Event() def run(self): while not self._exit.is_set(): #with self.queue_lock: self.queue.put(self.receive()) #self.queue.put_nowait(self.receive()) #if self.queue.full(): # try: # self.queue.get_nowait() # except: # pass def stop(self): self._exit.set() # This leaves the process hanging on Windows #self.join(STOP_TIMEOUT) if self.is_alive(): #TODO make a nicer warning print 'Terminating updater:', self self.terminate() def receive(self): raise NotImplementedError
def test(): queue = Queue() proc = Process(target=doNothing, args=(queue, )) proc.start() _logger.info("Started dummy process with PID %d", proc.pid) startCodeCheckerServerAttachedToPid(proc.pid) time.sleep(3) _logger.info("Allowing the dummy process to finish") queue.put(1) proc.join() if utils.isProcessRunning(proc.pid): _logger.warning("Dummy process %d was still running", proc.pid) proc.terminate() time.sleep(1) it.assertFalse(utils.isProcessRunning(proc.pid), "Process %d is still running after terminating " "it!" % proc.pid) time.sleep(1) _logger.info("Server should have died by now") with it.assertRaises(requests.ConnectionError): requests.post(it._url + '/get_diagnose_info')
def likelihood_multithread_O(seqlens,fss,uon,bon,theta,seqnum,K,ufnum,bfnum): # multithread version of likelihood '''conditional log likelihood log p(Y|X)''' likelihood = numpy.dot(fss,theta) thetab=theta[0:bfnum] thetau=theta[bfnum:] que = Queue() np = 0 subprocesses = [] corenum=multiprocessing.cpu_count() #corenum=1 if corenum>1: chunk=seqnum/corenum+1 else: chunk=seqnum starti=0 while starti < (seqnum): endi=starti+chunk if endi>seqnum: endi=seqnum p = Process(target=likelihoodthread, args=(seqlens,uon,bon,thetau,thetab,seqnum,K,ufnum,bfnum,starti,endi,que)) p.start() np+=1 #print 'delegated %s:%s to subprocess %s' % (starti, endi, np) subprocesses.append(p) starti += chunk for i in range(np): likelihood += que.get() while subprocesses: subprocesses.pop().join() return likelihood - regularity(theta)
class YaraJobPool(object): """ Yara pool container. """ pool = None message_queue = None def __init__(self, max_instances=3): self.message_queue = Queue() self.pool = Pool(max_instances, execute_yara_task, (self.message_queue,)) atexit.register(self.clear) def add_yara_task(self, yara_task): """ Adds the yara task. """ self.message_queue.put(yara_task) def clear(self): """ Pool cleanup. """ self.pool.terminate() self.pool.join()
for i in generate("", "", 0, length): queue.put(i) def worker(queue, counter): while True: if queue.empty() == True: break else: pattern = queue.get() if test_excessive_pattern(pattern) != None: counter.increment(2) else: counter.increment(1) if __name__ == '__main__': # for leng in range(15,22): leng = 20 c = Counter() queue = Queue(60000) writer = Process(target=feeder_thread, args=(queue, leng)) writer.start() processes = [Process(target=worker, args=(queue, c)) for i in range(3)] for i in processes: i.daemon = True i.start() for i in processes: i.join() print((leng+1, c.value))
def genQueue(num): queries = query.genQueries(num) q = Queue() for i in queries: q.put(i) return q
def make_queue(cls, queue_name): if queue_name in cls.queues: return q = Queue() cls.queues[queue_name] = q
if __name__ == '__main__': found_dirs = [file for file in glob('MicrosoftGestureDataset-RC/data/*.tagstream')] print('Processing %d files...' % (len(found_dirs))) data_set = 'MSRC12' h5file = h5.File(data_set+"v1.h5", "w") subjects = set() actions = set() max_frame_count = 0 num_procs = 4 # Create queues task_queue = Queue() done_queue = Queue() # Submit tasks for found_dir in found_dirs: task_queue.put(found_dir) # Start worker processes print('Spawning processes...') for _ in range(num_procs): Process(target=worker, args=(task_queue, done_queue)).start() # Get and print results print('Processed Files:') t = trange(len(found_dirs), dynamic_ncols=True) seq_num = 0
def __init__(self, options): super(Combine, self).__init__(options) self.input_observer = Combine.InputObserver(self) self.notify_counter = Queue() self.notify_counter.put(0) self.queue = Queue()
class Xgjoin(Join): def __init__(self, vars): self.left_table = dict() self.right_table = dict() self.qresults = Queue() self.vars = vars # Second stage settings self.secondStagesTS = [] self.lastSecondStageTS = float("-inf") self.timeoutSecondStage = 100000000 self.sourcesBlocked = False # Main memory settings self.memorySize = 100000000 # Represents the main memory size (# tuples).OLD:Represents the main memory size (in KB). self.fileDescriptor_left = {} self.fileDescriptor_right = {} self.memory_left = 0 self.memory_right = 0 def instantiate(self, d): newvars = self.vars - set(d.keys()) return Xgjoin(newvars) def instantiateFilter(self, instantiated_vars, filter_str): newvars = self.vars - set(instantiated_vars) return Xgjoin(newvars) def execute(self, left, right, out): # Executes the Xgjoin. #print "gjoin!" self.left = left self.right = right self.qresults = out # Initialize tuples. tuple1 = None tuple2 = None # Create alarm to go to stage 2. signal.signal(signal.SIGALRM, self.stage2) # Get the tuples from the queues. while (not (tuple1 == "EOF") or not (tuple2 == "EOF")): # Try to get and process tuple from left queue. if (not (tuple1 == "EOF")): try: tuple1 = self.left.get(False) #print "tuple1", tuple1 signal.alarm(self.timeoutSecondStage) self.stage1(tuple1, self.left_table, self.right_table) self.memory_right += 1 except Empty: # Empty: in tuple1 = self.left.get(False), when the queue is empty. pass except TypeError: # TypeError: in resource = resource + tuple[var], when the tuple is "EOF". pass except IOError: # IOError: when a tuple is received, but the alarm is fired. self.sourcesBlocked = False pass # Try to get and process tuple from right queue. if (not (tuple2 == "EOF")): try: tuple2 = self.right.get(False) #print "tuple2", tuple2 signal.alarm(self.timeoutSecondStage) self.stage1(tuple2, self.right_table, self.left_table) self.memory_left += 1 except Empty: # Empty: in tuple2 = self.right.get(False), when the queue is empty. pass except TypeError: # TypeError: in resource = resource + tuple[var], when the tuple is "EOF". pass except IOError: # IOError: when a tuple is received, but the alarm is fired. self.sourcesBlocked = False pass if (len(self.left_table) + len(self.right_table) >= self.memorySize): self.flushRJT() #print "Flushed RJT!" # Turn off alarm to stage 2. signal.alarm(0) # Perform the last probes. self.stage3() def stage1(self, tuple, tuple_rjttable, other_rjttable): #print " Stage 1: While one of the sources is sending data." if (tuple != "EOF"): # Get the resource associated to the tuples. resource = '' #print(tuple) for var in self.vars: if var in tuple: resource = resource + str(tuple[var]) # Probe the tuple against its RJT table. probeTS = self.probe(tuple, resource, tuple_rjttable) # Create the records. record = Record(tuple, probeTS, time(), float("inf")) # Insert the record in the other RJT table. if resource in other_rjttable: other_rjttable.get(resource).updateRecords(record) other_rjttable.get(resource).setRJTProbeTS(probeTS) #other_rjttable.get(resource).append(record) else: tail = RJTTail(record, probeTS) other_rjttable[resource] = tail #other_rjttable[resource] = [record] def stage2(self, signum, frame): #print " Stage 2: When both sources become blocked." self.sourcesBlocked = True # Get common resources. resources1 = set(self.left_table.keys()) & set( self.fileDescriptor_right.keys()) resources2 = set(self.right_table.keys()) & set( self.fileDescriptor_left.keys()) # Iterate while there are common resources and both sources are blocked. while ((resources1 or resources2) and self.sourcesBlocked): if (resources1): resource = resources1.pop() rjts1 = self.left_table[resource].records for rjt1 in rjts1: probed = self.probeFile(rjt1, self.fileDescriptor_right, resource, 2) if (probed): rjt1.probeTS = time() elif (resources2): resource = resources2.pop() rjts1 = self.right_table[resource].records for rjt1 in rjts1: probed = self.probeFile(rjt1, self.fileDescriptor_left, resource, 2) if (probed): rjt1.probeTS = time() # End of second stage. self.lastSecondStageTS = time() self.secondStagesTS.append(self.lastSecondStageTS) # fd_left = len(set(map(FileDescriptor.getSize, self.fileDescriptor_left.values()))) # fd_right = len(set(map(FileDescriptor.getSize, self.fileDescriptor_right.values()))) # count = 0 # # while ((count < fd_left + fd_right) and self.sourcesBlocked): # # (largestRJTs, table) = self.getLargestRJTs(count) # #print "Largests RJT:", largestRJTs # common_resources = set(largestRJTs.keys()) & set(table.keys()) # print "Common R:", common_resources # for resource in common_resources: # rjts1 = table[resource].records # for rjt1 in rjts1: # self.probeFile(rjt1, largestRJTs, resource, 2) # # count = count + 1 # # self.lastSecondStageTS = time() # self.secondStagesTS.append(self.lastSecondStageTS) # print "----------------END Second Stage!" def stage3(self): #print "Stage 3: When both sources sent all the data." # RJTs in main (left) memory are probed against RJTs in secondary (right) memory. common_resources = set(self.left_table.keys()) & set( self.fileDescriptor_right.keys()) for resource in common_resources: rjts1 = self.left_table[resource].records for rjt1 in rjts1: self.probeFile(rjt1, self.fileDescriptor_right, resource, 3) # RJTs in main (right) memory are probed against RJTs in secondary (left) memory. common_resources = set(self.right_table.keys()) & set( self.fileDescriptor_left.keys()) for resource in common_resources: rjts1 = self.right_table[resource].records for rjt1 in rjts1: self.probeFile(rjt1, self.fileDescriptor_left, resource, 3) # RJTs in secondary memory are probed to produce new results. common_resources = set(self.fileDescriptor_left.keys()) & set( self.fileDescriptor_right.keys()) for resource in common_resources: file1 = open(self.fileDescriptor_right[resource].file.name) rjts1 = file1.readlines() for rjt1 in rjts1: (tuple1, probeTS1, insertTS1, flushTS1) = rjt1.split('|') self.probeFile( Record(eval(tuple1), float(probeTS1), float(insertTS1), float(flushTS1)), self.fileDescriptor_left, resource, 3) file1.close() for resource in common_resources: file1 = open(self.fileDescriptor_left[resource].file.name) rjts1 = file1.readlines() for rjt1 in rjts1: (tuple1, probeTS1, insertTS1, flushTS1) = rjt1.split('|') self.probeFile( Record(eval(tuple1), float(probeTS1), float(insertTS1), float(flushTS1)), self.fileDescriptor_right, resource, 3) file1.close() # Delete files from secondary memory. for resource in self.fileDescriptor_left: remove(self.fileDescriptor_left[resource].file.name) for resource in self.fileDescriptor_right: remove(self.fileDescriptor_right[resource].file.name) # Put EOF in queue and exit. self.qresults.put("EOF") def probe(self, tuple, resource, rjttable): # Probe a tuple against its corresponding table. probeTS = time() # If the resource is in table, produce results. if resource in rjttable: rjttable.get(resource).setRJTProbeTS(probeTS) list_records = rjttable[resource].records for record in list_records: res = {} res.update(record.tuple) #res = record.tuple.copy() res.update(tuple) self.qresults.put(res) #print hex(id(self)), "res:", res return probeTS def probeFile(self, rjt1, filedescriptor2, resource, stage): # Probe an RJT against its corresponding partition in secondary memory. file2 = open(filedescriptor2[resource].file.name, 'r') rjts2 = file2.readlines() st = "" probed = False for rjt2 in rjts2: (tuple2, probeTS2, insertTS2, flushTS2) = rjt2.split('|') probedStage1 = False probedStage2 = False #Checking Property 2: Probed in stage 2. for ss in self.secondStagesTS: if (float(flushTS2) < rjt1.insertTS and rjt1.insertTS < ss and ss < rjt1.flushTS): probedStage2 = True break # Checking Property 1: Probed in stage 1. if (rjt1.probeTS < float(flushTS2)): probedStage1 = True # Produce result if it has not been produced. if (not (probedStage1) and not (probedStage2)): res = rjt1.tuple.copy() res.update(eval(tuple2)) self.qresults.put(res) probed = True # Update probeTS of tuple2. stprobeTS = "%.40r" % (time()) st = st + tuple2 + '|' + stprobeTS + '|' + insertTS2 + '|' + flushTS2 file2.close() # Update file2 if in stage 2. if ((stage == 2) and probed): file2 = open(filedescriptor2[resource].file.name, 'w') file2.write(st) file2.close() return probed def flushRJT(self): # Flush an RJT to secondary memory. # Choose a victim from each partition (table). (resource_to_flush1, tail_to_flush1, least_ts1) = self.getVictim(self.left_table) (resource_to_flush2, tail_to_flush2, least_ts2) = self.getVictim(self.right_table) # Flush resource from left table. if (least_ts1 <= least_ts2): file_descriptor = self.fileDescriptor_left table = self.left_table resource_to_flush = resource_to_flush1 tail_to_flush = tail_to_flush1 # Flush resource from right table. if (least_ts2 < least_ts1): file_descriptor = self.fileDescriptor_right table = self.right_table resource_to_flush = resource_to_flush2 tail_to_flush = tail_to_flush2 # Create flush timestamp. flushTS = time() # Update file descriptor if (file_descriptor.has_key(resource_to_flush)): lentail = file_descriptor[resource_to_flush].size file = open(file_descriptor[resource_to_flush].file.name, 'a') file_descriptor.update({ resource_to_flush: FileDescriptor(file, len(tail_to_flush.records) + lentail, flushTS) }) else: file = NamedTemporaryFile(suffix=".rjt", prefix="", delete=False) file_descriptor.update({ resource_to_flush: FileDescriptor(file, len(tail_to_flush.records), flushTS) }) # Flush tail in file. for record in tail_to_flush.records: sttuple = str(record.tuple) stprobeTS = "%.40r" % (record.probeTS) stinsertTS = "%.40r" % (record.insertTS) stflushTS = "%.40r" % (flushTS) file.write(sttuple + '|') file.write(stprobeTS + '|') file.write(stinsertTS + '|') file.write(stflushTS + '\n') file.close() # Delete resource from main memory. del table[resource_to_flush] def getVictim(self, table): # Selects a victim from a partition in main memory to flush. resource_to_flush = "" tail_to_flush = RJTTail([], 0) least_ts = float("inf") for resource, tail in table.iteritems(): resource_ts = tail.rjtProbeTS if ((resource_ts < least_ts) or (resource_ts == least_ts and len(tail.records) > len(tail_to_flush.records))): resource_to_flush = resource tail_to_flush = tail least_ts = resource_ts #print "Victim chosen:", resource_to_flush, "TS:", least_ts, "LEN:", len(tail_to_flush.records) return (resource_to_flush, tail_to_flush, least_ts) def getLargestRJTs(self, i): # Selects the i-th largest RJT stored in secondary memory. sizes1 = set( map(FileDescriptor.getSize, self.fileDescriptor_left.values())) sizes2 = set( map(FileDescriptor.getSize, self.fileDescriptor_right.values())) sizes1 = list(sizes1) sizes2 = list(sizes2) sizes1.sort() sizes2.sort() if (sizes1 and sizes2): if (sizes1[len(sizes1) - 1] > sizes2[len(sizes2) - 1]): file_descriptor = self.fileDescriptor_left max_len = sizes1[len(sizes1) - (i + 1)] table = self.right_table else: file_descriptor = self.fileDescriptor_right max_len = sizes2[len(sizes2) - (i + 1)] table = self.left_table elif (sizes1): file_descriptor = self.fileDescriptor_left max_len = sizes1[len(sizes1) - (i + 1)] table = self.right_table else: file_descriptor = self.fileDescriptor_right max_len = sizes2[len(sizes2) - (i + 1)] table = self.left_table largestRJTs = {} for resource, fd in file_descriptor.iteritems(): if (fd.size == max_len): largestRJTs[resource] = fd return (largestRJTs, table)
class FunctionInvoker: """ Module responsible to perform the invocations against the compute backend """ def __init__(self, config, log_level): self.config = config self.log_level = log_level storage_config = extract_storage_config(self.config) self.internal_storage = InternalStorage(storage_config) compute_config = extract_compute_config(self.config) self.remote_invoker = self.config['pywren'].get( 'remote_invoker', False) self.rabbitmq_monitor = self.config['pywren'].get( 'rabbitmq_monitor', False) if self.rabbitmq_monitor: self.rabbit_amqp_url = self.config['rabbitmq'].get('amqp_url') self.workers = self.config['pywren'].get('workers') logger.debug('Total workers: {}'.format(self.workers)) self.compute_handlers = [] cb = compute_config['backend'] regions = compute_config[cb].get('region') if regions and type(regions) == list: for region in regions: new_compute_config = compute_config.copy() new_compute_config[cb]['region'] = region self.compute_handlers.append(Compute(new_compute_config)) else: self.compute_handlers.append(Compute(compute_config)) self.token_bucket_q = Queue() self.pending_calls_q = Queue() self.job_monitor = JobMonitor(self.config, self.internal_storage, self.token_bucket_q) def _invoke(self, job, call_id): """ Method used to perform the actual invocation against the Compute Backend """ payload = { 'config': self.config, 'log_level': self.log_level, 'func_key': job.func_key, 'data_key': job.data_key, 'extra_env': job.extra_env, 'execution_timeout': job.execution_timeout, 'data_byte_range': job.data_ranges[int(call_id)], 'executor_id': job.executor_id, 'job_id': job.job_id, 'call_id': call_id, 'host_submit_time': time.time(), 'pywren_version': __version__, 'runtime_name': job.runtime_name, 'runtime_memory': job.runtime_memory } # do the invocation start = time.time() compute_handler = random.choice(self.compute_handlers) activation_id = compute_handler.invoke(job.runtime_name, job.runtime_memory, payload) roundtrip = time.time() - start resp_time = format(round(roundtrip, 3), '.3f') if not activation_id: self.pending_calls_q.put((job, call_id)) return logger.info( 'ExecutorID {} | JobID {} - Function invocation {} done! ({}s) - Activation' ' ID: {}'.format(job.executor_id, job.job_id, call_id, resp_time, activation_id)) return call_id def run(self, job_description): """ Run a job described in job_description """ job = SimpleNamespace(**job_description) log_msg = ( 'ExecutorID {} | JobID {} - Starting function invocation: {}() - Total: {} ' 'activations'.format(job.executor_id, job.job_id, job.function_name, job.total_calls)) logger.info(log_msg) self.total_calls = job.total_calls for i in range(self.workers): self.token_bucket_q.put('#') for i in range(job.total_calls): call_id = "{:05d}".format(i) self.pending_calls_q.put((job, call_id)) self.job_monitor.start_job_monitoring(job) invokers = [] for inv_id in range(4): p = Process(target=self._run_process, args=(inv_id, )) invokers.append(p) p.daemon = True p.start() for p in invokers: p.join() def _run_process(self, inv_id): """ Run process that implements token bucket scheduling approach """ logger.info('Invoker process {} started'.format(inv_id)) call_futures = [] with ThreadPoolExecutor(max_workers=250) as executor: # TODO: Change pending_calls_q check while self.pending_calls_q.qsize() > 0: self.token_bucket_q.get() job, call_id = self.pending_calls_q.get() future = executor.submit(self._invoke, job, call_id) call_futures.append(future) logger.info('Invoker process {} finished'.format(inv_id))
from multiprocessing import Process, Queue import time #创建消息队列 q = Queue() def fun1(): for i in range(10): time.sleep(1) q.put((1, 2)) def fun2(): for i in range(10): time.sleep(1.5) a, b = q.get() print("sum = ", a + b) p1 = Process(target=fun1) p2 = Process(target=fun2) p1.start() p2.start() p1.join() p2.join()
class DependentOperator(object): ''' Implements an operator that must be resolved with an instance. It receives as input the url of the server to be contacted, the filename that contains the query, the header size of the response message, the buffer size (length of the string) of the messages. The execute() method performs a semantic check. If the instance can be derreferenced from the source, it will contact the source. ''' def __init__(self, server, query, vs, buffersize): #headersize ??? self.server = server #self.filename = filename self.query = query #self.headersize = headersize self.buffersize = buffersize self.q = None self.q = Queue() self.atts = vs self.prefs = [] #query.prefs #self.atts = self.getQueryAttributes() #self.catalog = Catalog("/home/gabriela/Anapsid/src/Catalog/endpoints.desc") def execute(self, variables, instances, outputqueue): self.query = open(self.filename).read() # ? signal.signal(12, onSignal) # Replace in the query, the instance that is derreferenced. for i in range(len(variables)): self.query = string.replace(self.query, "?" + variables[i], "", 1) self.query = string.replace(self.query, "?" + variables[i], "<" + instances[i] + ">") # If the instance has no ?query. Example: DESCRIBE --- if (instances[0].find("sparql?query") == -1): pos = instances[0].find("/resource") pre = instances[0][0:pos] # Semantic check!. for server in self.server: prefixes = self.catalog.data[server] try: # Contact the source. pos = prefixes.index(pre) self.p = Process(target=contactSource, args=(server, self.query, self.headersize, self.buffersize, self.q,)) self.p.start() # first_tuple = True while True: # Get the next item in queue. res = self.q.get() # #Get the variables from the answer # if (first_tuple): # vars = res.keys() # outputqueue.put(vars) # first_tuple = False # Put the result into the output queue. outputqueue.put(res) # Check if there's no more data. if (res == "EOF"): break except ValueError: # The source shouldn't be contacted. outputqueue.put(self.atts) outputqueue.put("EOF") def getQueryAttributes(self): # Read the query from file and apply lower case. query = open(self.filename).read() query2 = string.lower(query) # Extract the variables, separated by commas. # TODO: it supposes that there's no from clause. begin = string.find(query2, "select") begin = begin + len("select") end = string.find(query2, "where") listatts = query[begin:end] listatts = string.split(listatts, " ") # Iterate over the list of attributes, and delete "?". outlist = [] for att in listatts: if ((len(att) > 0) and (att[0] == '?')): if ((att[len(att)-1] == ',') or (att[len(att)-1] == '\n')): outlist = outlist + [att[1:len(att)-1]] else: outlist = outlist + [att[1:len(att)]] return outlist
else: results_dict[name] = "(N: %d ntray: %d ttray: %d)" % (value[0], value[1], value[2]) tf = t2 - t1 with open("tiempos_cpu.txt", mode='a') as file: file.write('Time %s: %s. Started: %s. NODE %s, CORE %d\n' % (name, tf, t1, sNode, nCore)) except: logger.info("%s could not access to the queue at this moment: %s" % (name, sys.exc_info()) ) tb = time() tf = tb - ta with open("final_cpu.txt", mode='a') as file: file.write('Tiempo de %s: %s. \n' % (name, tf)) if __name__ == '__main__': data_index = Queue() results = Queue() semaphore = Semaphore(1) manager = Manager() results_dict = manager.dict() #This example executes the result of the GA for a case study of 7 sizes: 216, 512, 1000, 2197, 4096, 8000 and 15625 #In this example, the cluster has 4 machines, "bullxual01" to "bullxual04", each one with 16 cores and 2 GPU's #Take into account that two CPU-cores are needed to handle the tow GPUs in each machine, so only 14 CPU-cores are available #Number of trajectories, time steps and times for sizes 1 to 7 for GPU and CPU ntray = 500 ttray = 500 tiemposGPU = (167, 172, 182, 193, 226, 336, 540) tiemposCPU = (430, 700, 1500, 2900, 5172, 9558, 22253)
#!/usr/bin/env python3 from multiprocessing import Process, Queue import sys import csv INCOME_TAX_LOOKUP_TABLE = [(80000, 0.45, 13505), (55000, 0.35, 5505), (35000, 0.30, 2755), (9000, 0.25, 1005), (4500, 0.2, 555), (1500, 0.1, 105), (0, 0.03, 0)] queue1 = Queue() queue2 = Queue() class Args(object): def __init__(self): self.args = sys.argv[1:] def file_after_option(self, option): try: index = self.args.index(option) return self.args[index + 1] except (ValueError, IndexError): print("Parameter Error") exit() @property def config_path(self): return self.file_after_option('-c') @property def userdata_path(self):
class Boundary(Thread): def __init__(self, parent_pid): """ Initialize the Boundary """ super(Boundary, self).__init__() self.redis_conn = StrictRedis(unix_socket_path=REDIS_SOCKET) self.daemon = True self.parent_pid = parent_pid self.current_pid = getpid() self.boundary_metrics = Manager().list() self.anomalous_metrics = Manager().list() self.exceptions_q = Queue() self.anomaly_breakdown_q = Queue() def check_if_parent_is_alive(self): """ Self explanatory """ try: kill(self.current_pid, 0) kill(self.parent_pid, 0) except: exit(0) def send_graphite_metric(self, name, value): if settings.GRAPHITE_HOST != '': sock = socket.socket() try: sock.connect((settings.GRAPHITE_HOST, settings.CARBON_PORT)) except socket.error: endpoint = '%s:%d' % (settings.GRAPHITE_HOST, settings.CARBON_PORT) logger.error('Cannot connect to Graphite at %s' % endpoint) return False sock.sendall('%s %s %i\n' % (name, value, time())) sock.close() return True return False def unique_noHash(self, seq): seen = set() return [x for x in seq if str(x) not in seen and not seen.add(str(x))] # This is to make a dump directory in /tmp if ENABLE_BOUNDARY_DEBUG is True # for dumping the metric timeseries data into for debugging purposes def mkdir_p(self, path): try: os.makedirs(path) return True except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(path): pass else: raise def spin_process(self, i, boundary_metrics): """ Assign a bunch of metrics for a process to analyze. """ # Determine assigned metrics bp = settings.BOUNDARY_PROCESSES bm_range = len(boundary_metrics) keys_per_processor = int(ceil(float(bm_range) / float(bp))) if i == settings.BOUNDARY_PROCESSES: assigned_max = len(boundary_metrics) else: # This is a skyine bug, the original skyline code uses 1 as the # beginning position of the index, python indices begin with 0 # assigned_max = len(boundary_metrics) # This closes the etsy/skyline pull request opened by @languitar on 17 Jun 2014 # https://github.com/etsy/skyline/pull/94 Fix analyzer worker metric assignment assigned_max = min(len(boundary_metrics), i * keys_per_processor) assigned_min = (i - 1) * keys_per_processor assigned_keys = range(assigned_min, assigned_max) # Compile assigned metrics assigned_metrics_and_algos = [ boundary_metrics[index] for index in assigned_keys ] if ENABLE_BOUNDARY_DEBUG: logger.info('debug - printing assigned_metrics_and_algos') for assigned_metric_and_algo in assigned_metrics_and_algos: logger.info('debug - assigned_metric_and_algo - %s' % str(assigned_metric_and_algo)) # Compile assigned metrics assigned_metrics = [] for i in assigned_metrics_and_algos: assigned_metrics.append(i[0]) # unique unhashed things def unique_noHash(seq): seen = set() return [ x for x in seq if str(x) not in seen and not seen.add(str(x)) ] unique_assigned_metrics = unique_noHash(assigned_metrics) if ENABLE_BOUNDARY_DEBUG: logger.info('debug - unique_assigned_metrics - %s' % str(unique_assigned_metrics)) logger.info('debug - printing unique_assigned_metrics:') for unique_assigned_metric in unique_assigned_metrics: logger.info('debug - unique_assigned_metric - %s' % str(unique_assigned_metric)) # Check if this process is unnecessary if len(unique_assigned_metrics) == 0: return # Multi get series try: raw_assigned = self.redis_conn.mget(unique_assigned_metrics) except: logger.error("failed to mget assigned_metrics from redis") return # Make process-specific dicts exceptions = defaultdict(int) anomaly_breakdown = defaultdict(int) # Reset boundary_algortims all_boundary_algorithms = [] for metric in BOUNDARY_METRICS: all_boundary_algorithms.append(metric[1]) # The unique algorithms that are being used boundary_algorithms = unique_noHash(all_boundary_algorithms) if ENABLE_BOUNDARY_DEBUG: logger.info('debug - boundary_algorithms - %s' % str(boundary_algorithms)) discover_run_metrics = [] # Distill metrics into a run list for i, metric_name, in enumerate(unique_assigned_metrics): self.check_if_parent_is_alive() try: if ENABLE_BOUNDARY_DEBUG: logger.info('debug - unpacking timeseries for %s - %s' % (metric_name, str(i))) raw_series = raw_assigned[i] unpacker = Unpacker(use_list=False) unpacker.feed(raw_series) timeseries = list(unpacker) except Exception as e: exceptions['Other'] += 1 logger.error("redis data error: " + traceback.format_exc()) logger.error("error: %e" % e) base_name = metric_name.replace(FULL_NAMESPACE, '', 1) # Determine the metrics BOUNDARY_METRICS metric tuple settings for metrick in BOUNDARY_METRICS: CHECK_MATCH_PATTERN = metrick[0] check_match_pattern = re.compile(CHECK_MATCH_PATTERN) pattern_match = check_match_pattern.match(base_name) metric_pattern_matched = False if pattern_match: metric_pattern_matched = True algo_pattern_matched = False for algo in boundary_algorithms: for metric in BOUNDARY_METRICS: CHECK_MATCH_PATTERN = metric[0] check_match_pattern = re.compile( CHECK_MATCH_PATTERN) pattern_match = check_match_pattern.match( base_name) if pattern_match: if ENABLE_BOUNDARY_DEBUG: logger.info( "debug - metric and algo pattern MATCHED - " + metric[0] + " | " + base_name + " | " + str(metric[1])) metric_expiration_time = False metric_min_average = False metric_min_average_seconds = False metric_trigger = False algorithm = False algo_pattern_matched = True algorithm = metric[1] try: if metric[2]: metric_expiration_time = metric[2] except: metric_expiration_time = False try: if metric[3]: metric_min_average = metric[3] except: metric_min_average = False try: if metric[4]: metric_min_average_seconds = metric[4] except: metric_min_average_seconds = 1200 try: if metric[5]: metric_trigger = metric[5] except: metric_trigger = False try: if metric[6]: alert_threshold = metric[6] except: alert_threshold = False try: if metric[7]: metric_alerters = metric[7] except: metric_alerters = False if metric_pattern_matched and algo_pattern_matched: if ENABLE_BOUNDARY_DEBUG: logger.info( 'debug - added metric - %s, %s, %s, %s, %s, %s, %s, %s, %s' % (str(i), metric_name, str(metric_expiration_time), str(metric_min_average), str(metric_min_average_seconds), str(metric_trigger), str(alert_threshold), metric_alerters, algorithm)) discover_run_metrics.append([ i, metric_name, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger, alert_threshold, metric_alerters, algorithm ]) if ENABLE_BOUNDARY_DEBUG: logger.info('debug - printing discover_run_metrics') for discover_run_metric in discover_run_metrics: logger.info('debug - discover_run_metrics - %s' % str(discover_run_metric)) logger.info('debug - build unique boundary metrics to analyze') # Determine the unique set of metrics to run run_metrics = unique_noHash(discover_run_metrics) if ENABLE_BOUNDARY_DEBUG: logger.info('debug - printing run_metrics') for run_metric in run_metrics: logger.info('debug - run_metrics - %s' % str(run_metric)) # Distill timeseries strings and submit to run_selected_algorithm for metric_and_algo in run_metrics: self.check_if_parent_is_alive() try: raw_assigned_id = metric_and_algo[0] metric_name = metric_and_algo[1] base_name = metric_name.replace(FULL_NAMESPACE, '', 1) metric_expiration_time = metric_and_algo[2] metric_min_average = metric_and_algo[3] metric_min_average_seconds = metric_and_algo[4] metric_trigger = metric_and_algo[5] alert_threshold = metric_and_algo[6] metric_alerters = metric_and_algo[7] algorithm = metric_and_algo[8] if ENABLE_BOUNDARY_DEBUG: logger.info('debug - unpacking timeseries for %s - %s' % (metric_name, str(raw_assigned_id))) raw_series = raw_assigned[metric_and_algo[0]] unpacker = Unpacker(use_list=False) unpacker.feed(raw_series) timeseries = list(unpacker) if ENABLE_BOUNDARY_DEBUG: logger.info('debug - unpacked OK - %s - %s' % (metric_name, str(raw_assigned_id))) autoaggregate = False autoaggregate_value = 0 # Determine if the namespace is to be aggregated if BOUNDARY_AUTOAGGRERATION: for autoaggregate_metric in BOUNDARY_AUTOAGGRERATION_METRICS: autoaggregate = False autoaggregate_value = 0 CHECK_MATCH_PATTERN = autoaggregate_metric[0] base_name = metric_name.replace(FULL_NAMESPACE, '', 1) check_match_pattern = re.compile(CHECK_MATCH_PATTERN) pattern_match = check_match_pattern.match(base_name) if pattern_match: autoaggregate = True autoaggregate_value = autoaggregate_metric[1] if ENABLE_BOUNDARY_DEBUG: logger.info( 'debug - BOUNDARY_AUTOAGGRERATION passed - %s - %s' % (metric_name, str(autoaggregate))) if ENABLE_BOUNDARY_DEBUG: logger.info( 'debug - analysing - %s, %s, %s, %s, %s, %s, %s, %s, %s, %s' % (metric_name, str(metric_expiration_time), str(metric_min_average), str(metric_min_average_seconds), str(metric_trigger), str(alert_threshold), metric_alerters, autoaggregate, autoaggregate_value, algorithm)) # Dump the the timeseries data to a file timeseries_dump_dir = "/tmp/skyline/boundary/" + algorithm self.mkdir_p(timeseries_dump_dir) timeseries_dump_file = timeseries_dump_dir + "/" + metric_name + ".json" with open(timeseries_dump_file, 'w+') as f: f.write(str(timeseries)) f.close() # Check if a metric has its own unique BOUNDARY_METRICS alert # tuple, this allows us to paint an entire metric namespace with # the same brush AND paint a unique metric or namespace with a # different brush or scapel has_unique_tuple = False run_tupple = False boundary_metric_tuple = (base_name, algorithm, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger, alert_threshold, metric_alerters) wildcard_namespace = True for metric_tuple in BOUNDARY_METRICS: if not has_unique_tuple: CHECK_MATCH_PATTERN = metric_tuple[0] check_match_pattern = re.compile(CHECK_MATCH_PATTERN) pattern_match = check_match_pattern.match(base_name) if pattern_match: if metric_tuple[0] == base_name: wildcard_namespace = False if not has_unique_tuple: if boundary_metric_tuple == metric_tuple: has_unique_tuple = True run_tupple = True if ENABLE_BOUNDARY_DEBUG: logger.info('unique_tuple:') logger.info( 'boundary_metric_tuple: %s' % str(boundary_metric_tuple)) logger.info('metric_tuple: %s' % str(metric_tuple)) if not has_unique_tuple: if wildcard_namespace: if ENABLE_BOUNDARY_DEBUG: logger.info('wildcard_namespace:') logger.info('boundary_metric_tuple: %s' % str(boundary_metric_tuple)) run_tupple = True else: if ENABLE_BOUNDARY_DEBUG: logger.info( 'wildcard_namespace: BUT WOULD NOT RUN') logger.info('boundary_metric_tuple: %s' % str(boundary_metric_tuple)) if ENABLE_BOUNDARY_DEBUG: logger.info('WOULD RUN run_selected_algorithm = %s' % run_tupple) if run_tupple: # Submit the timeseries and settings to run_selected_algorithm anomalous, ensemble, datapoint, metric_name, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger, alert_threshold, metric_alerters, algorithm = run_selected_algorithm( timeseries, metric_name, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger, alert_threshold, metric_alerters, autoaggregate, autoaggregate_value, algorithm) if ENABLE_BOUNDARY_DEBUG: logger.info('debug - analysed - %s' % (metric_name)) else: anomalous = False if ENABLE_BOUNDARY_DEBUG: logger.info( 'debug - more unique metric tuple not analysed - %s' % (metric_name)) # If it's anomalous, add it to list if anomalous: anomalous_metric = [ datapoint, metric_name, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger, alert_threshold, metric_alerters, algorithm ] self.anomalous_metrics.append(anomalous_metric) # Get the anomaly breakdown - who returned True? for index, value in enumerate(ensemble): if value: anomaly_breakdown[algorithm] += 1 # It could have been deleted by the Roomba except TypeError: exceptions['DeletedByRoomba'] += 1 except TooShort: exceptions['TooShort'] += 1 except Stale: exceptions['Stale'] += 1 except Boring: exceptions['Boring'] += 1 except: exceptions['Other'] += 1 logger.info("exceptions['Other'] traceback follows:") logger.info(traceback.format_exc()) # Add values to the queue so the parent process can collate for key, value in anomaly_breakdown.items(): self.anomaly_breakdown_q.put((key, value)) for key, value in exceptions.items(): self.exceptions_q.put((key, value)) def run(self): """ Called when the process intializes. """ while 1: now = time() # Make sure Redis is up try: self.redis_conn.ping() except: logger.error( 'skyline can\'t connect to redis at socket path %s' % settings.REDIS_SOCKET_PATH) sleep(10) self.redis_conn = StrictRedis( unix_socket_path=settings.REDIS_SOCKET_PATH) continue # Discover unique metrics unique_metrics = list( self.redis_conn.smembers(settings.FULL_NAMESPACE + 'unique_metrics')) if len(unique_metrics) == 0: logger.info( 'no metrics in redis. try adding some - see README') sleep(10) continue # Reset boundary_metrics boundary_metrics = [] # Build boundary metrics for metric_name in unique_metrics: for metric in BOUNDARY_METRICS: CHECK_MATCH_PATTERN = metric[0] check_match_pattern = re.compile(CHECK_MATCH_PATTERN) base_name = metric_name.replace(FULL_NAMESPACE, '', 1) pattern_match = check_match_pattern.match(base_name) if pattern_match: if ENABLE_BOUNDARY_DEBUG: logger.info( "debug - boundary metric - pattern MATCHED - " + metric[0] + " | " + base_name) boundary_metrics.append([metric_name, metric[1]]) if ENABLE_BOUNDARY_DEBUG: logger.info("debug - boundary metrics - " + str(boundary_metrics)) if len(boundary_metrics) == 0: logger.info( 'no metrics in redis. try adding some - see README') sleep(10) continue # Spawn processes pids = [] for i in range(1, settings.BOUNDARY_PROCESSES + 1): if i > len(boundary_metrics): logger.info( 'WARNING: skyline boundary is set for more cores than needed.' ) break p = Process(target=self.spin_process, args=(i, boundary_metrics)) pids.append(p) p.start() # Send wait signal to zombie processes for p in pids: p.join() # Grab data from the queue and populate dictionaries exceptions = dict() anomaly_breakdown = dict() while 1: try: key, value = self.anomaly_breakdown_q.get_nowait() if key not in anomaly_breakdown.keys(): anomaly_breakdown[key] = value else: anomaly_breakdown[key] += value except Empty: break while 1: try: key, value = self.exceptions_q.get_nowait() if key not in exceptions.keys(): exceptions[key] = value else: exceptions[key] += value except Empty: break # Send alerts if settings.BOUNDARY_ENABLE_ALERTS: for anomalous_metric in self.anomalous_metrics: datapoint = str(anomalous_metric[0]) metric_name = anomalous_metric[1] base_name = metric_name.replace(FULL_NAMESPACE, '', 1) expiration_time = str(anomalous_metric[2]) metric_trigger = str(anomalous_metric[5]) alert_threshold = int(anomalous_metric[6]) metric_alerters = anomalous_metric[7] algorithm = anomalous_metric[8] if ENABLE_BOUNDARY_DEBUG: logger.info("debug - anomalous_metric - " + str(anomalous_metric)) # Determine how many times has the anomaly been seen if the # ALERT_THRESHOLD is set to > 1 and create a cache key in # redis to keep count so that alert_threshold can be honored if alert_threshold == 0: times_seen = 1 if ENABLE_BOUNDARY_DEBUG: logger.info("debug - alert_threshold - " + str(alert_threshold)) if alert_threshold == 1: times_seen = 1 if ENABLE_BOUNDARY_DEBUG: logger.info("debug - alert_threshold - " + str(alert_threshold)) if alert_threshold > 1: if ENABLE_BOUNDARY_DEBUG: logger.info("debug - alert_threshold - " + str(alert_threshold)) anomaly_cache_key_count_set = False anomaly_cache_key_expiration_time = ( int(alert_threshold) + 1) * 60 anomaly_cache_key = 'anomaly_seen.%s.%s' % (algorithm, base_name) try: anomaly_cache_key_count = self.redis_conn.get( anomaly_cache_key) if not anomaly_cache_key_count: try: if ENABLE_BOUNDARY_DEBUG: logger.info( "debug - redis no anomaly_cache_key - " + str(anomaly_cache_key)) times_seen = 1 if ENABLE_BOUNDARY_DEBUG: logger.info( "debug - redis setex anomaly_cache_key - " + str(anomaly_cache_key)) self.redis_conn.setex( anomaly_cache_key, anomaly_cache_key_expiration_time, packb(int(times_seen))) logger.info( 'set anomaly seen key :: %s seen %s' % (anomaly_cache_key, str(times_seen))) except Exception as e: logger.error('redis setex failed :: %s' % str(anomaly_cache_key)) logger.error("couldn't set key: %s" % e) else: if ENABLE_BOUNDARY_DEBUG: logger.info( "debug - redis anomaly_cache_key retrieved OK - " + str(anomaly_cache_key)) anomaly_cache_key_count_set = True except: if ENABLE_BOUNDARY_DEBUG: logger.info( "debug - redis failed - anomaly_cache_key retrieval failed - " + str(anomaly_cache_key)) anomaly_cache_key_count_set = False if anomaly_cache_key_count_set: unpacker = Unpacker(use_list=False) unpacker.feed(anomaly_cache_key_count) raw_times_seen = list(unpacker) times_seen = int(raw_times_seen[0]) + 1 try: self.redis_conn.setex( anomaly_cache_key, anomaly_cache_key_expiration_time, packb(int(times_seen))) logger.info( 'set anomaly seen key :: %s seen %s' % (anomaly_cache_key, str(times_seen))) except: times_seen = 1 logger.error( 'set anomaly seen key failed :: %s seen %s' % (anomaly_cache_key, str(times_seen))) # Alert the alerters if times_seen > alert_threshold if times_seen >= alert_threshold: if ENABLE_BOUNDARY_DEBUG: logger.info( "debug - times_seen %s is greater than or equal to alert_threshold %s" % (str(times_seen), str(alert_threshold))) for alerter in metric_alerters.split("|"): # Determine alerter limits send_alert = False alerts_sent = 0 if ENABLE_BOUNDARY_DEBUG: logger.info("debug - checking alerter - %s" % alerter) try: if ENABLE_BOUNDARY_DEBUG: logger.info( "debug - determining alerter_expiration_time for settings" ) alerter_expiration_time_setting = settings.BOUNDARY_ALERTER_OPTS[ 'alerter_expiration_time'][alerter] alerter_expiration_time = int( alerter_expiration_time_setting) if ENABLE_BOUNDARY_DEBUG: logger.info( "debug - determined alerter_expiration_time from settings - %s" % str(alerter_expiration_time)) except: # Set an arbitrary expiry time if not set alerter_expiration_time = 160 if ENABLE_BOUNDARY_DEBUG: logger.info( "debug - could not determine alerter_expiration_time from settings" ) try: if ENABLE_BOUNDARY_DEBUG: logger.info( "debug - determining alerter_limit from settings" ) alerter_limit_setting = settings.BOUNDARY_ALERTER_OPTS[ 'alerter_limit'][alerter] alerter_limit = int(alerter_limit_setting) alerter_limit_set = True if ENABLE_BOUNDARY_DEBUG: logger.info( "debug - determined alerter_limit from settings - %s" % str(alerter_limit)) except: alerter_limit_set = False send_alert = True if ENABLE_BOUNDARY_DEBUG: logger.info( "debug - could not determine alerter_limit from settings" ) # If the alerter_limit is set determine how many # alerts the alerter has sent if alerter_limit_set: alerter_sent_count_key = 'alerts_sent.%s' % ( alerter) try: alerter_sent_count_key_data = self.redis_conn.get( alerter_sent_count_key) if not alerter_sent_count_key_data: if ENABLE_BOUNDARY_DEBUG: logger.info( "debug - redis no alerter key, no alerts sent for - " + str(alerter_sent_count_key)) alerts_sent = 0 send_alert = True if ENABLE_BOUNDARY_DEBUG: logger.info( "debug - alerts_sent set to %s" % str(alerts_sent)) logger.info( "debug - send_alert set to %s" % str(sent_alert)) else: if ENABLE_BOUNDARY_DEBUG: logger.info( "debug - redis alerter key retrieved, unpacking" + str(alerter_sent_count_key)) unpacker = Unpacker(use_list=False) unpacker.feed( alerter_sent_count_key_data) raw_alerts_sent = list(unpacker) alerts_sent = int(raw_alerts_sent[0]) if ENABLE_BOUNDARY_DEBUG: logger.info( "debug - alerter %s alerts sent %s " % (str(alerter), str(alerts_sent))) except: logger.info("No key set - %s" % alerter_sent_count_key) alerts_sent = 0 send_alert = True if ENABLE_BOUNDARY_DEBUG: logger.info( "debug - alerts_sent set to %s" % str(alerts_sent)) logger.info( "debug - send_alert set to %s" % str(send_alert)) if alerts_sent < alerter_limit: send_alert = True if ENABLE_BOUNDARY_DEBUG: logger.info( "debug - alerts_sent %s is less than alerter_limit %s" % (str(alerts_sent), str(alerter_limit))) logger.info( "debug - send_alert set to %s" % str(send_alert)) # Send alert alerter_alert_sent = False if send_alert: cache_key = 'last_alert.boundary.%s.%s.%s' % ( alerter, base_name, algorithm) if ENABLE_BOUNDARY_DEBUG: logger.info( "debug - checking cache_key - %s" % cache_key) try: last_alert = self.redis_conn.get(cache_key) if not last_alert: try: self.redis_conn.setex( cache_key, int(anomalous_metric[2]), packb(int( anomalous_metric[0]))) if ENABLE_BOUNDARY_DEBUG: logger.info( 'debug - key setex OK - %s' % (cache_key)) trigger_alert( alerter, datapoint, base_name, expiration_time, metric_trigger, algorithm) logger.info( 'alert sent :: %s - %s - via %s - %s' % (base_name, datapoint, alerter, algorithm)) trigger_alert( "syslog", datapoint, base_name, expiration_time, metric_trigger, algorithm) logger.info( 'alert sent :: %s - %s - via syslog - %s' % (base_name, datapoint, algorithm)) alerter_alert_sent = True except Exception as e: logger.error( 'alert failed :: %s - %s - via %s - %s' % (base_name, datapoint, alerter, algorithm)) logger.error( "couldn't send alert: %s" % str(e)) trigger_alert( "syslog", datapoint, base_name, expiration_time, metric_trigger, algorithm) else: if ENABLE_BOUNDARY_DEBUG: logger.info( "debug - cache_key exists not alerting via %s for %s is less than alerter_limit %s" % (alerter, cache_key)) trigger_alert("syslog", datapoint, base_name, expiration_time, metric_trigger, algorithm) logger.info( 'alert sent :: %s - %s - via syslog - %s' % (base_name, datapoint, algorithm)) except: trigger_alert("syslog", datapoint, base_name, expiration_time, metric_trigger, algorithm) logger.info( 'alert sent :: %s - %s - via syslog - %s' % (base_name, datapoint, algorithm)) else: trigger_alert("syslog", datapoint, base_name, expiration_time, metric_trigger, algorithm) logger.info( 'alert sent :: %s - %s - via syslog - %s' % (base_name, datapoint, algorithm)) # Update the alerts sent for the alerter cache key, # to allow for alert limiting if alerter_alert_sent and alerter_limit_set: try: alerter_sent_count_key = 'alerts_sent.%s' % ( alerter) new_alerts_sent = int(alerts_sent) + 1 self.redis_conn.setex( alerter_sent_count_key, alerter_expiration_time, packb(int(new_alerts_sent))) logger.info('set %s - %s' % (alerter_sent_count_key, str(new_alerts_sent))) except: logger.error('failed to set %s - %s' % (alerter_sent_count_key, str(new_alerts_sent))) else: # Always alert to syslog, even if alert_threshold is not # breached or if send_alert is not True trigger_alert("syslog", datapoint, base_name, expiration_time, metric_trigger, algorithm) logger.info('alert sent :: %s - %s - via syslog - %s' % (base_name, datapoint, algorithm)) # Write anomalous_metrics to static webapp directory if len(self.anomalous_metrics) > 0: filename = path.abspath( path.join(path.dirname(__file__), '..', settings.ANOMALY_DUMP)) with open(filename, 'w') as fh: # Make it JSONP with a handle_data() function anomalous_metrics = list(self.anomalous_metrics) anomalous_metrics.sort(key=operator.itemgetter(1)) fh.write('handle_data(%s)' % anomalous_metrics) # Log progress logger.info('seconds to run :: %.2f' % (time() - now)) logger.info('total metrics :: %d' % len(boundary_metrics)) logger.info('total analyzed :: %d' % (len(boundary_metrics) - sum(exceptions.values()))) logger.info('total anomalies :: %d' % len(self.anomalous_metrics)) logger.info('exception stats :: %s' % exceptions) logger.info('anomaly breakdown :: %s' % anomaly_breakdown) # Log to Graphite self.send_graphite_metric( 'skyline.boundary.' + SERVER_METRIC_PATH + 'run_time', '%.2f' % (time() - now)) self.send_graphite_metric( 'skyline.boundary.' + SERVER_METRIC_PATH + 'total_analyzed', '%.2f' % (len(boundary_metrics) - sum(exceptions.values()))) self.send_graphite_metric( 'skyline.boundary.' + SERVER_METRIC_PATH + 'total_anomalies', '%d' % len(self.anomalous_metrics)) self.send_graphite_metric( 'skyline.boundary.' + SERVER_METRIC_PATH + 'total_metrics', '%d' % len(boundary_metrics)) for key, value in exceptions.items(): send_metric = 'skyline.boundary.' + SERVER_METRIC_PATH + 'exceptions.%s' % key self.send_graphite_metric(send_metric, '%d' % value) for key, value in anomaly_breakdown.items(): send_metric = 'skyline.boundary.' + SERVER_METRIC_PATH + 'anomaly_breakdown.%s' % key self.send_graphite_metric(send_metric, '%d' % value) # Check canary metric raw_series = self.redis_conn.get(settings.FULL_NAMESPACE + settings.CANARY_METRIC) if raw_series is not None: unpacker = Unpacker(use_list=False) unpacker.feed(raw_series) timeseries = list(unpacker) time_human = (timeseries[-1][0] - timeseries[0][0]) / 3600 projected = 24 * (time() - now) / time_human logger.info('canary duration :: %.2f' % time_human) self.send_graphite_metric( 'skyline.boundary.' + SERVER_METRIC_PATH + 'duration', '%.2f' % time_human) self.send_graphite_metric( 'skyline.boundary.' + SERVER_METRIC_PATH + 'projected', '%.2f' % projected) # Reset counters self.anomalous_metrics[:] = [] # Only run once per minute seconds_to_run = int((time() - now)) if seconds_to_run < 60: sleep_for_seconds = 60 - seconds_to_run else: sleep_for_seconds = 0 if sleep_for_seconds > 0: logger.info('sleeping for %s seconds' % sleep_for_seconds) sleep(sleep_for_seconds)
def trim_file(infile, adapter, outfile, threads=1, phred=33): read_queue = Queue() result_queue = Queue() trimmed_queue = Queue() workers = [] def start_workers(): for i in xrange(threads): worker = Worker(queue=read_queue, results=result_queue, phred64=phred==64, adapter=adapter) workers.append(worker) worker.start() writer = Writer(queue=result_queue, trimmed=trimmed_queue, outfile=outfile) writer.start() batch = [] for index, read in enumerate(FastqReader(infile)): batch.append(read) if index < 1000 and phred == 33: if any([i for i in read.qualities if ord(i) > 74]): phred = 64 if index % 10000 == 0: if not workers: start_workers() read_queue.put(batch) batch = [] if not workers: start_workers() read_queue.put(batch) processed = index+1 # poison pill to stop workers for i in xrange(threads): read_queue.put(None) for i in workers: i.join() # poison pill for writers result_queue.put(None) # wait for writing to finish writer.join() #print "Output done" trimmed_queue.put(None) kept_reads = sum([i for i in iter(trimmed_queue.get, None)]) return (phred, processed, kept_reads) #with logfile as o: # o.write('Starting reads: {0}\n'.format(processed)) # o.write('Processed reads: {0}\n'.format(kept_reads)) #print ('{0}\n'.format(phred))
class IndependentOperator(object): ''' Implements an operator that can be resolved independently. It receives as input the url of the server to be contacted, the filename that contains the query, the header size of the of the messages. The execute() method reads tuples from the input queue and response message and the buffer size (length of the string) place them in the output queue. ''' def __init__(self, query, tree, c, buffersize=16384): (e, sq, vs) = tree.getInfoIO(query) self.contact = c self.server = e self.query = query self.tree = tree self.query_str = sq self.vars = vs self.buffersize = buffersize self.cardinality = None self.joinCardinality = [] def instantiate(self, d): new_tree = self.tree.instantiate(d) return IndependentOperator(self.query, new_tree, self.contact, self.buffersize) def getCardinality(self): if self.cardinality == None: self.cardinality = askCount(self.query, self.tree, set(), self.contact) return self.cardinality def getJoinCardinality(self, vars): c = None for (v, c2) in self.joinCardinality: if v == vars: c = c2 break if c == None: if len(vars) == 0: c = self.getCardinality() else: c = askCount(self.query, self.tree, vars, self.contact) self.joinCardinality.append((vars, c)) return c def allTriplesLowSelectivity(self): return self.tree.service.allTriplesLowSelectivity() def places(self): return self.tree.places() def constantNumber(self): return self.tree.constantNumber() def constantPercentage(self): return self.constantNumber()/self.places() def aux(self, n): return self.tree.aux(n) def execute(self, outputqueue): # Evaluate the independent operator. self.q = None self.q = Queue() self.p = Process(target=self.contact, args=(self.server, self.query_str, self.q, self.buffersize,)) self.p.start() while True: # Get the next item in queue. res = self.q.get(True) # Put the result into the output queue. #print res outputqueue.put(res) # Check if there's no more data. if (res == "EOF"): break def __repr__(self): return str(self.tree)
import sshtunnel import pysftp import utils import uuid import json import tqdm import multiprocessing from multiprocessing import Queue, Lock import logging # init tensorflow from keras.backend.tensorflow_backend import set_session from keras import backend as K import tensorflow as tf # init global lock mutex = Lock() mutex1 = Queue(1) mutex2 = Queue(1) mutex_data = None # end init global lock class task(): """ mainthread: True : need to maintain the run() in the main thread to provide service False : auto create process to provide service handler_type:
import ROOT as ROOT from multiprocessing import Process, Queue import time, sys, os def input_thread(q, stdin): while True: print 'ROOT: ' cmd = stdin.readline() q.put(cmd) def root(char): assert isinstance(char, str), "Argument must be string!" ROOT.gROOT.ProcessLine(char) if __name__ == '__main__': ___queue___ = Queue() ___newstdin___ = os.fdopen(os.dup(sys.stdin.fileno())) ___input_p___ = Process(target=input_thread, args=(___queue___, ___newstdin___)) ___input_p___.daemon = True ___input_p___.start() ___g___ = ROOT.gSystem.ProcessEvents try: while 1: if not ___queue___.empty(): ___cmd___ = ___queue___.get() try: exec(___cmd___, globals()) except: print sys.exc_info() time.sleep(0.01)
# coding:utf-8 from multiprocessing import Queue try: q = Queue() q.get(timeout=5) except BaseException as e: print '--' + str(e)
#!/usr/bin/env python # -*- coding:utf8 -*- # @TIME :2019/3/20 18:32 # @Author : 洪松 # @File : 进程队列queue.py from multiprocessing import Process, Queue def f(q): q.put([42, 2, 'hello']) print('subprocess a id:', id(q)) if __name__ == '__main__': q = Queue() #创建进程队列对象 p_list = [] print('main a id:', id(q)) for i in range(3): p = Process(target=f, args=(q, )) p_list.append(p) p.start() print(q.get()) print(q.get()) print(q.get()) for i in p_list: i.join()
class Scheduler(object): """ Sucuri scheduler. """ TASK_TAG = 0 TERMINATE_TAG = 1 def __init__(self, graph, n_workers=1, mpi_enabled=True): """ :param graph: DFGraph The dataflow graph. :param n_workers: int Number of workers used. :param mpi_enabled: Indicates if uses MPI or not. """ # self.taskq = Queue() #queue where the ready tasks are inserted self.operq = Queue() self.graph = graph self.tasks = [] worker_conns = [] self.conn = [] self.waiting = [] # queue containing idle workers self.n_workers = n_workers # number of workers # keeps track of the number of tasks sent to each worker without a request from the worker (due to affinity) self.pending_tasks = [0] * n_workers for i in range(n_workers): sched_conn, worker_conn = Pipe() worker_conns += [worker_conn] self.conn += [sched_conn] self.workers = [ Worker(self.graph, self.operq, worker_conns[i], i) for i in range(n_workers) ] if mpi_enabled: self.mpi_handle() else: self.mpi_rank = None def mpi_handle(self): """ MPI implementation for the dataflow. """ from mpi4py import MPI comm = MPI.COMM_WORLD rank = comm.Get_rank() self.mpi_size = comm.Get_size() self.mpi_rank = rank self.n_slaves = self.mpi_size - 1 self.keep_working = True if rank == 0: print "I am the master. There are %s mpi processes. (hostname = %s)" % ( self.mpi_size, MPI.Get_processor_name()) self.pending_tasks = [0] * self.n_workers * self.mpi_size self.outqueue = Queue() def mpi_input(inqueue): while self.keep_working: msg = comm.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG) # print "MPI Received opermsg from slave." inqueue.put(msg) def mpi_output(outqueue): while self.keep_working: task = outqueue.get() if task is not None: # task == None means termination # print "MPI Sending task to slave node." dest = task.workerid / self.n_workers # destination mpi process comm.send(task, dest=dest, tag=Scheduler.TASK_TAG) else: self.keep_working = False mpi_terminate() def mpi_terminate(): print "MPI TERMINATING" for i in xrange(0, self.mpi_size): comm.send(None, dest=i, tag=Scheduler.TERMINATE_TAG) t_in = threading.Thread(target=mpi_input, args=(self.operq, )) t_out = threading.Thread(target=mpi_output, args=(self.outqueue, )) else: print "I am a slave. (hostname = %s)" % MPI.Get_processor_name() # slave self.inqueue = Queue() for worker in self.workers: worker.wid += rank * self.n_workers status = MPI.Status() def mpi_input(inqueue): while self.keep_working: task = comm.recv(source=0, tag=MPI.ANY_TAG, status=status) if status.Get_tag() == Scheduler.TERMINATE_TAG: self.keep_working = False print "MPI received termination." self.terminate_workers(self.workers) else: # print "MPI Sending task to worker in slave." workerid = task.workerid connid = workerid % self.n_workers self.conn[connid].send(task) self.operq.put(None) def mpi_output(outqueue): while self.keep_working: msg = outqueue.get() if msg is not None: # print "MPI send opermsg to master." comm.send(msg, dest=0, tag=0) t_in = threading.Thread(target=mpi_input, args=(self.inqueue, )) t_out = threading.Thread(target=mpi_output, args=(self.operq, )) threads = [t_in, t_out] self.threads = threads for t in threads: t.start() def propagate_op(self, oper): dst = self.graph.nodes[oper.dstid] dst.inport[oper.dstport] += [oper] args = dst.match() if args is not None: self.issue(dst, args) def check_affinity(self, task): node = self.graph.nodes[task.nodeid] if node.affinity is None: return None affinity = node.affinity[0] if len(node.affinity) > 1: node.affinity = node.affinity[1:] + [node.affinity[0]] return affinity def issue(self, node, args): # print "Args %s " %args task = Task(node.f, node.id, args) self.tasks += [task] def all_idle(self, workers): # print [(w.idle, w.name) for w in workers] # print "All idle? %s" %reduce(lambda a, b: a and b, [w.idle for w in workers]) if self.mpi_rank == 0: return len(self.waiting) == self.n_workers * self.mpi_size else: return len(self.waiting) == self.n_workers def terminate_workers(self, workers): print "Terminating workers %s %d %d" % (self.all_idle( self.workers), self.operq.qsize(), len(self.tasks)) if self.mpi_rank == 0: self.outqueue.put(None) for t in self.threads: t.join() for worker in workers: worker.terminate() def start(self): """ Starts the processing dataflow environment. """ operq = self.operq print "Roots %s" % [r for r in self.graph.nodes if len(r.inport) == 0] for root in [r for r in self.graph.nodes if len(r.inport) == 0]: task = Task(root.f, root.id) self.tasks += [task] for worker in self.workers: print "Starting %s" % worker.wid worker.start() if self.mpi_rank == 0 or self.mpi_rank is None: # it this is the leader process or if mpi is not being used print "Main loop" self.main_loop() def main_loop(self): tasks = self.tasks operq = self.operq workers = self.workers while len(tasks) > 0 or not self.all_idle( self.workers) or operq.qsize() > 0: opersmsg = operq.get() for oper in opersmsg: if oper.val is not None: self.propagate_op(oper) wid = opersmsg[0].wid if wid not in self.waiting and opersmsg[0].request_task: if self.pending_tasks[wid] > 0: self.pending_tasks[wid] -= 1 else: self.waiting += [ wid ] # indicate that the worker is idle, waiting for a task while len(tasks) > 0 and len(self.waiting) > 0: task = tasks.pop(0) wid = self.check_affinity(task) if wid is not None: if wid in self.waiting: self.waiting.remove(wid) else: self.pending_tasks[wid] += 1 else: wid = self.waiting.pop(0) # print "Got opermsg from worker %d" %wid if wid < self.n_workers: # local worker worker = workers[wid] self.conn[worker.wid].send(task) else: task.workerid = wid self.outqueue.put(task) print "Waiting %s" % self.waiting self.terminate_workers(self.workers)
from multiprocessing import Process,Queue import threading #import queue # def f(q): # q.put([42, None, 'hello']) def f(qq): qq.put([42, None, 'hello']) if __name__ == '__main__': q = Queue() #p = threading.Thread(target=f,) p = Process(target=f, args=(q,)) p.start() print(q.get()) # prints "[42, None, 'hello']" p.join()
def calc_features(self,num_threads=1,block_size=3000): """ Calculate the features for every observation using the appended Features instances. num_threads: number of threads to run on block_size: number of seqeuences to put on a single process """ # Create a list of feature functions... feature_names = [] self._feature_functions = [] num_features = 0 for e in self._features_engines: num_features += e.num_features feature_names.append(e.features) self._feature_functions.append(e.score) # Create a compiled list of feature names self._feature_names = np.concatenate(feature_names) # If enough threads are specified that only a few threads would start, # make the block size smaller if len(self._sequences)//num_threads < block_size: block_size = len(self._sequences)//num_threads + 20 # Split squences in to blocks of block_size block_edges = [] for i in range(0,len(self._sequences),block_size): block_edges.append(i) block_edges.append(len(self._sequences) - 1) # Start a process for each thread proc_list = [] queue_list = [] out = [] # Go through each sequence for i in range(len(block_edges)-1): first_seq = block_edges[i] last_seq = block_edges[i+1] queue_list.append(Queue()) proc_list.append(Process(target=self._calc_features_on_thread, args=(first_seq,last_seq,queue_list[-1]))) proc_list[-1].start() # If we've capped our number of threads, wait until one of the # processes finishes to move on if (len(queue_list) == num_threads) or (i == len(block_edges) - 2): waiting = True while waiting: # Go through queues for j, q in enumerate(queue_list): # Try to get output on queue. If output is there, get # the output and then remove the associated process and # queue try: out.append(q.get(block=True,timeout=0.1)) p = proc_list.pop(j) queue_list.pop(j) waiting = False break except queue_module.Empty: pass # If we're on the last block, wait until the queue is # completely empty before proceeding if len(queue_list) != 0 and i == (len(block_edges) - 2): waiting = True # Load results into self._features self._features = np.zeros((len(self._sequences),num_features),dtype=float) for o in out: self._features[o[0]:o[1],:] = o[2]
def mpi_handle(self): """ MPI implementation for the dataflow. """ from mpi4py import MPI comm = MPI.COMM_WORLD rank = comm.Get_rank() self.mpi_size = comm.Get_size() self.mpi_rank = rank self.n_slaves = self.mpi_size - 1 self.keep_working = True if rank == 0: print "I am the master. There are %s mpi processes. (hostname = %s)" % ( self.mpi_size, MPI.Get_processor_name()) self.pending_tasks = [0] * self.n_workers * self.mpi_size self.outqueue = Queue() def mpi_input(inqueue): while self.keep_working: msg = comm.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG) # print "MPI Received opermsg from slave." inqueue.put(msg) def mpi_output(outqueue): while self.keep_working: task = outqueue.get() if task is not None: # task == None means termination # print "MPI Sending task to slave node." dest = task.workerid / self.n_workers # destination mpi process comm.send(task, dest=dest, tag=Scheduler.TASK_TAG) else: self.keep_working = False mpi_terminate() def mpi_terminate(): print "MPI TERMINATING" for i in xrange(0, self.mpi_size): comm.send(None, dest=i, tag=Scheduler.TERMINATE_TAG) t_in = threading.Thread(target=mpi_input, args=(self.operq, )) t_out = threading.Thread(target=mpi_output, args=(self.outqueue, )) else: print "I am a slave. (hostname = %s)" % MPI.Get_processor_name() # slave self.inqueue = Queue() for worker in self.workers: worker.wid += rank * self.n_workers status = MPI.Status() def mpi_input(inqueue): while self.keep_working: task = comm.recv(source=0, tag=MPI.ANY_TAG, status=status) if status.Get_tag() == Scheduler.TERMINATE_TAG: self.keep_working = False print "MPI received termination." self.terminate_workers(self.workers) else: # print "MPI Sending task to worker in slave." workerid = task.workerid connid = workerid % self.n_workers self.conn[connid].send(task) self.operq.put(None) def mpi_output(outqueue): while self.keep_working: msg = outqueue.get() if msg is not None: # print "MPI send opermsg to master." comm.send(msg, dest=0, tag=0) t_in = threading.Thread(target=mpi_input, args=(self.inqueue, )) t_out = threading.Thread(target=mpi_output, args=(self.operq, )) threads = [t_in, t_out] self.threads = threads for t in threads: t.start()
def tail_logs(self, service, levels, components, clusters, raw_mode=False): """Sergeant function for spawning off all the right log tailing functions. NOTE: This function spawns concurrent processes and doesn't necessarily worry about cleaning them up! That's because we expect to just exit the main process when this function returns (as main() does). Someone calling this function directly with something like "while True: tail_paasta_logs()" may be very sad. NOTE: We try pretty hard to supress KeyboardInterrupts to prevent big useless stack traces, but it turns out to be non-trivial and we fail ~10% of the time. We decided we could live with it and we're shipping this to see how it fares in real world testing. Here are some things we read about this problem: * http://stackoverflow.com/questions/1408356/keyboard-interrupts-with-pythons-multiprocessing-pool * http://jtushman.github.io/blog/2014/01/14/python-%7C-multiprocessing-and-interrupts/ * http://bryceboe.com/2010/08/26/python-multiprocessing-and-keyboardinterrupt/ We could also try harder to terminate processes from more places. We could use process.join() to ensure things have a chance to die. We punted these things. It's possible this whole multiprocessing strategy is wrong-headed. If you are reading this code to curse whoever wrote it, see discussion in PAASTA-214 and https://reviewboard.yelpcorp.com/r/87320/ and feel free to implement one of the other options. """ queue = Queue() spawned_processes = [] def callback(component, stream_info, scribe_env, cluster): kw = { 'scribe_env': scribe_env, 'service': service, 'levels': levels, 'components': components, 'clusters': clusters, 'queue': queue, 'filter_fn': stream_info.filter_fn, } if stream_info.per_cluster: kw['stream_name'] = stream_info.stream_name_fn( service, cluster) kw['clusters'] = [cluster] else: kw['stream_name'] = stream_info.stream_name_fn(service) process = Process(target=self.scribe_tail, kwargs=kw) spawned_processes.append(process) process.start() self.run_code_over_scribe_envs(clusters=clusters, components=components, callback=callback) # Pull things off the queue and output them. If any thread dies we are no # longer presenting the user with the full picture so we quit. # # This is convenient for testing, where a fake scribe_tail() can emit a # fake log and exit. Without the thread aliveness check, we would just sit # here forever even though the threads doing the tailing are all gone. # # NOTE: A noisy tailer in one scribe_env (such that the queue never gets # empty) will prevent us from ever noticing that another tailer has died. while True: try: # This is a blocking call with a timeout for a couple reasons: # # * If the queue is empty and we get_nowait(), we loop very tightly # and accomplish nothing. # # * Testing revealed a race condition where print_log() is called # and even prints its message, but this action isn't recorded on # the patched-in print_log(). This resulted in test flakes. A short # timeout seems to soothe this behavior: running this test 10 times # with a timeout of 0.0 resulted in 2 failures; running it with a # timeout of 0.1 resulted in 0 failures. # # * There's a race where thread1 emits its log line and exits # before thread2 has a chance to do anything, causing us to bail # out via the Queue Empty and thread aliveness check. # # We've decided to live with this for now and see if it's really a # problem. The threads in test code exit pretty much immediately # and a short timeout has been enough to ensure correct behavior # there, so IRL with longer start-up times for each thread this # will surely be fine. # # UPDATE: Actually this is leading to a test failure rate of about # 1/10 even with timeout of 1s. I'm adding a sleep to the threads # in test code to smooth this out, then pulling the trigger on # moving that test to integration land where it belongs. line = queue.get(True, 0.1) print_log(line, levels, raw_mode) except Empty: try: # If there's nothing in the queue, take this opportunity to make # sure all the tailers are still running. running_processes = [ tt.is_alive() for tt in spawned_processes ] if not running_processes or not all(running_processes): log.warn( 'Quitting because I expected %d log tailers to be alive but only %d are alive.' % ( len(spawned_processes), running_processes.count(True), )) for process in spawned_processes: if process.is_alive(): process.terminate() break except KeyboardInterrupt: # Die peacefully rather than printing N threads worth of stack # traces. # # This extra nested catch is because it's pretty easy to be in # the above try block when the user hits Ctrl-C which otherwise # dumps a stack trace. log.warn('Terminating.') break except KeyboardInterrupt: # Die peacefully rather than printing N threads worth of stack # traces. log.warn('Terminating.') break
class ConcurrentTestRunner(TestRunner): """TestRunner that uses the multiprocessing package to execute tests concurrently. """ def __init__(self, options, subproc_queue): super(ConcurrentTestRunner, self).__init__(options, subproc_queue) self.num_procs = options.num_procs # only do concurrent stuff if num_procs > 1 if self.num_procs > 1: self.get_iter = self.run_concurrent_tests # Create queues self.task_queue = Queue() self.done_queue = Queue() self.procs = [] # Start worker processes for i in range(self.num_procs): worker_id = "%d_%d" % (os.getpid(), i) self.procs.append( Process(target=worker, args=(self.task_queue, self.done_queue, subproc_queue, worker_id))) for proc in self.procs: proc.start() def run_concurrent_tests(self, input_iter): """Run tests concurrently.""" it = iter(input_iter) numtests = 0 try: for proc in self.procs: self.task_queue.put(advance_iterator(it)) numtests += 1 except StopIteration: pass else: try: while numtests: stop = False results = self.done_queue.get() for result in results: yield result if self.stop: if (result.status == 'FAIL' and not result.expected_fail) or ( result.status == 'OK' and result.expected_fail): stop = True break if stop: break numtests -= 1 self.task_queue.put(advance_iterator(it)) numtests += 1 except StopIteration: pass for proc in self.procs: self.task_queue.put('STOP') for i in range(numtests): results = self.done_queue.get() for result in results: yield result for proc in self.procs: proc.join()
# detect, then generate a set of bounding box colors for each class CLASSES = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3)) # load our serialized model from disk print("[INFO] loading model...") net = cv2.dnn.readNetFromCaffe(args["prototxt"], args["model"]) #net = cv2.dnn.readNetFromTensorflow('ssd_mobilenet_v2_coco_2018_03_29/frozen_inference_graph.pb', 'ssd_mobilenet_v2_coco_2018_03_29/pipeline.config') #net = cv2.dnn.readNetFromTensorflow('models/frozen_inference_graph.pb','models/ssd_mobilenet_v2_coco_2018_03_29.pbtxt') # initialize the input queue (frames), output queue (detections), # and the list of actual detections returned by the child process inputQueue = Queue(maxsize=1) outputQueue = Queue(maxsize=1) detections = None # construct a child process *indepedent* from our main process of # execution print("[INFO] starting process...") p = Process(target=classify_frame, args=(net, inputQueue, outputQueue,)) p.daemon = True p.start() engine = pyttsx3.init() voices = engine.getProperty('voices') engine.setProperty('voice', voices[2].id) engine.setProperty('rate', 100)
"""获取总页数""" for url in self.comment_url: html=self.download.download(url) comments_page=self.parse.parses(html) yield comments_page['result']['maxPage'], url @property def gen_comment_urls(self): """生成要爬取的url 即每个skuid评论的所有链接""" for pages, url in self.get_comment_page: url = re.sub('page=(\d+)?&', 'page={}&', url) for page in range(1, pages): yield url.format(page) task_queue = Queue() # 任务队列 result_queue = Queue() # 结果队列 def return_task(): """返回任务队列""" return task_queue def return_result(): """返回结果队列""" return result_queue def get_result(result): """获取结果"""
def execute(self, left_queue, right_operator, out, processqueue=Queue()): self.left_queue = left_queue self.right_operator = right_operator self.qresults = out #print "right_operator", right_operator tuple1 = None tuple2 = None right_queues = dict() while (not(tuple1 == "EOF") or (len(right_queues) > 0)): # Try to get and process tuple from left queue if not(tuple1 == "EOF"): try: tuple1 = self.left_queue.get(False) #print "tuple1: "+str(tuple1) instance = self.probeAndInsert1(tuple1, self.right_table, self.left_table, time()) if instance: # the join variables have not been used to # instanciate the right_operator new_right_operator = self.makeInstantiation(tuple1, self.right_operator) #print "new op: "+str(new_right_operator) resource = self.getResource(tuple1) queue = Queue() right_queues[resource] = queue #print "new_right_operator.__class__", new_right_operator.__class__ #print "new_right_operator.left.__class__", new_right_operator.left.__class__ new_right_operator.execute(queue) #p2 = Process(target=new_right_operator.execute, args=(queue,)) #p2.start() except Empty: pass except TypeError: # TypeError: in resource = resource + tuple[var], when the tuple is "EOF". pass except Exception as e: #print "Unexpected error:", sys.exc_info()[0] print(e) pass toRemove = [] # stores the queues that have already received all its tuples for r in right_queues: try: q = right_queues[r] tuple2 = q.get(False) #print "tuple2", tuple2 if tuple2 == "EOF": toRemove.append(r) else: self.probeAndInsert2(r, tuple2, self.left_table, self.right_table, time()) except Exception: # This catch: # Empty: in tuple2 = self.right.get(False), when the queue is empty. # TypeError: in att = att + tuple[var], when the tuple is "EOF". #print "Unexpected error:", sys.exc_info()[0] pass for r in toRemove: del right_queues[r] # Put EOF in queue and exit. self.qresults.put("EOF") return
def multiprocessing(self, in_data, nproc=8, batch_size_chars=1000000, only_cui=False, addl_info=[]): r''' Run multiprocessing NOT FOR TRAINING in_data: an iterator or array with format: [(id, text), (id, text), ...] nproc: number of processors batch_size_chars: size of a batch in number of characters return: an list of tuples: [(id, doc_json), (id, doc_json), ...] ''' if self._meta_annotations: # Hack for torch using multithreading, which is not good here import torch torch.set_num_threads(1) # Create the input output for MP in_q = Queue(maxsize=4*nproc) manager = Manager() out_dict = manager.dict() out_dict['processed'] = [] # Create processes procs = [] for i in range(nproc): p = Process(target=self._mp_cons, kwargs={'in_q': in_q, 'out_dict': out_dict, 'pid': i, 'only_cui': only_cui, 'addl_info': addl_info}) p.start() procs.append(p) data = [] nchars = 0 for id, text in in_data: data.append((id, str(text))) nchars += len(str(text)) if nchars >= batch_size_chars: in_q.put(data) data = [] nchars = 0 # Put the last batch if it exists if len(data) > 0: in_q.put(data) for _ in range(nproc): # tell workers we're done in_q.put(None) for p in procs: p.join() # Close the queue as it can cause memory leaks in_q.close() out = [] for key in out_dict.keys(): if 'pid' in key: data = out_dict[key] out.extend(data) # Sometimes necessary to free memory out_dict.clear() del out_dict return out
def __init__(self, vars): self.left_table = dict() self.right_table = dict() self.qresults = Queue() self.vars = vars
class TestNameSpace(object): async_task_was_done = Queue() async_task_was_run = False ready_to_proceed_with_second_cycle = Queue() second_cycle_complete = Queue()