Python Queue示例，multiprocessing.Queue Python示例

示例#1

0

显示文件

文件： FockStateDiag.py 项目： ZachGlassman/SpinorBECSimulation

def compute_coherent_state_multi(n,m,n0,q,c,t_final,t_step,a_range,mag_range):
    """for a coherent state, we will asume that the magnetization and seed are whats conserved, NOT n0
    the probability is the multiplication of the probability for m and n"""
    #compute all the elements that need to be computed
    n_ele = np.arange(n-a_range, n + a_range+1,1)
    m_ele = np.arange(m-mag_range, m + mag_range+1,1)
    nm = [[nn,mm] for nn in n_ele for mm in m_ele]
    pairs = n-n0
    #loop through and compute details, save to multidimensional arrays
    mean = np.zeros((len(nm),int(t_final/t_step)))
    std =  np.zeros((len(nm),int(t_final/t_step)))
    total_prob = 0
    #set up multiprocessing
    queue = Queue(5)
    procs = {}
    for i, state in enumerate(nm):
        prob = calc_prob(n,m,state[0],state[1])
        total_prob += prob
        procs[i] = Process(target = calc_state_with_prob,
          args = (queue, int(state[0]),int(state[1]),int(state[0]-pairs-state[1]),q,c,t_final,t_step,prob))
        procs[i].start()
    #get answers
    for i in range(len(nm)):
        t, mean[i], std[i] = queue.get()

    return t, np.sum(mean,axis=0)/total_prob, np.sum(std,axis=0)/total_prob

示例#2

0

显示文件

文件： tracerbull.py 项目： atbrox/tracerbull

    def start_services(services, codegen, importcode, tornadoapp, forker, boot_function, template_path="."):

        # loop through all services
        # create hosts file
        # update kill-file for them (processes)
        # create all files (websocket server, and js/python client files)
        host_file = {}
        kill_file = {}
        queue = Queue()
        for service in services:
            websocket_server_code = codegen(
                service, "websocket_server_template.tpl", loader=template.Loader(template_path)
            )
            websocket_server_module = importcode(websocket_server_code)
            websocket_server_class_name = "%s_websocket" % (service["servicename"])
            websocket_server_application = tornadoapp(
                [(r"/", getattr(websocket_server_module, websocket_server_class_name))]
            )
            websocket_server_process = forker(
                0, queue, boot_function, websocket_server_application, service["servicename"], 0, service
            )
            print websocket_server_process
            print "WSS, ", queue.qsize()

        return queue

示例#3

0

显示文件

文件： dwn.py 项目： pastebt/you-get-wui

class Manager(Process):
    def __init__(self, wnum=3):
        Process.__init__(self)
        self.s2m = Queue()  # message Manager receive from worker and svr
        self.m2w = Queue()  # message send to works
        self.works = [0] * wnum
        for i in range(wnum):
            self.works[i] = Worker(self.s2m, self.m2w)
            self.works[i].start()

    def stop(self):
        for w in self.works:
            self.m2w.put(None)  # FIXME should call worker.Terminal?

    """
Video Site: bilibili.com
Title:      【BD‧1080P】【高分剧情】鸟人-飞鸟侠 2014【中文字幕】
Type:       Flash video (video/x-flv)
Size:       3410.85 MiB (3576536465 Bytes)

Downloading 【BD‧1080P】【高分剧情】鸟人-飞鸟侠 2014【中文字幕】.flv ...
  0.7% ( 22.2/3410.9MB) [#
    """

    def run(self):
        # reset DB flags
        kuos = get_by_flag(WORK)
        for uo in kuos:
            set_flag(uo.mid, STOP)
        tuos = get_by_flag(WAIT)
        for uo in tuos:
            set_flag(uo.mid, STOP)

        while True:
            msg = self.s2m.get()
            # print("pid=%s, self.s2m.get=%s" % (os.getpid(), repr(msg)))
            who = msg.get("who")
            if who == "worker":
                self.handle_mid(msg["mid"], msg["dat"])
            elif who == "svr":
                # self.m2w.put(msg['mid'])
                self.m2w.put(pick_url(msg["mid"]))
            elif who == "error":
                sys.stderr.write(msg["dat"])  # FIXME
                sys.stderr.write("\n")
            else:
                sys.stderr.write("Unknow msg:\n")
                sys.stderr.write(msg)
                sys.stderr.write("\n")

    def handle_mid(self, mid, dat):
        print(dat)
        if dat.startswith("Process "):
            dd = dat.split()
            act = dd[2].lower()
            print("mid=%s, act=%s" % (mid, act))
            set_flag(mid, act)
        elif dat.startswith("Downloading "):
            print("mid=[%s]" % mid)
            update_filename(mid, dat[12:-5])

示例#4

0

显示文件

文件： WinstoneSessionCatcher.py 项目： votadlos/JavaCG

def recoverPRNGState(cookie,timeMillisEstimate,PRNGMillisEstimate,IPAddr,serverPort,numWorkers,chunkSize):
    global PRNGMillisDelta
    global initalSeek
    
    q = Queue(0)
    i = 0
    
    if PRNGMillisDelta%chunkSize > 0:
        q.put((PRNGMillisEstimate+PRNGMillisDelta-PRNGMillisDelta%chunkSize,PRNGMillisEstimate+PRNGMillisDelta,initalSeek))
    
    for i in range(PRNGMillisEstimate,PRNGMillisEstimate+PRNGMillisDelta-PRNGMillisDelta%chunkSize,chunkSize):
        q.put((i,i+chunkSize,initalSeek))
        
    desc = []

    seedValue = Value('d', 0)

    # Start worker processes and assign work.                     
    for i in range(numWorkers):
        p = Process(target=recoverPRNGStateWorker, args=(cookie,timeMillisEstimate,q,IPAddr,serverPort,seedValue))
        p.start()
        desc.append(p)
        
    # Wait for worker processes finish.       
    for p in desc:
        p.join()

    return long(seedValue.value)

示例#5

0

显示文件

文件： extractDailyInteraction.py 项目： Tskatom/company_market

def main():
    arg = parse_args()
    folder = arg.fold
    core = arg.core
    output = arg.out
    start = arg.start
    if start:
        start = start.replace('-', '') + '000000'

    task_queue = Queue()
    result_queue = Queue()

    task_count = create_task(folder, task_queue, start)
    print task_count
    for i in range(core):
        Process(target=worker, args=(task_queue, result_queue)).start()

    #send stop signal
    for i in range(core):
        task_queue.put('STOP')

    #print result
    out_files = {}
    for i in range(task_count):
        actions = result_queue.get()
        user = actions["user"]
        for day in actions["actions"]:
            if day not in out_files:
                out_files[day] = open(os.path.join(output, day), "w")
            out_files[day].write(json.dumps({"user": user, "actions": actions["actions"][day]}) + "\n")
    for day in out_files:
        out_files[day].flush()
        out_files[day].close()

示例#6

0

显示文件

文件： processPatchesv4_Rpy1.py 项目： talonsensei/Bfx_scripts

def processFiles(patch_dir):
    root = os.getcwd()
    glbl.data_dirs = {}
    if root != patch_dir: working_path = root+"/"+patch_dir
    else: working_path = root

    for path, dirs, files in os.walk(working_path):
        if len(dirs) == 0: glbl.data_dirs[path] = ''
    

    # Multiprocessing Section
    #########################################
    Qids = glbl.data_dirs.keys()
    manager = Manager()                                      # creates shared memory manager object
    results = manager.dict()                                 # Add dictionary to manager, so it can be accessed across processes
    nextid = Queue()                                         # Create Queue object to serve as shared id generator across processes
    for qid in Qids: nextid.put(qid)                         # Load the ids to be tested into the Queue
    for x in range(0,multiprocessing.cpu_count()):           # Create one process per logical CPU
        p = Process(target=processData, args=(nextid,results)) # Assign process to processCBR function, passing in the Queue and shared dictionary
        glbl.jobs.append(p)                                   # Add the process to a list of running processes
        p.start()                                             # Start process running
    for j in glbl.jobs:
        j.join()                                              # For each process, join them back to main, blocking on each one until finished
    
    # write out results
    c = 1
    sets = results.keys()
    sets.sort()
    for x in sets:
        if results[x] != 'None':
            FINAL = open('result'+str(c)+'.txt','w')
            n = "\n************************************************************************************************\n"
            FINAL.write(n+"* "+x+'    *\n'+n+results[x]+"\n")
            FINAL.close()     
            c += 1

示例#7

0

显示文件

文件： WorldAnalysis.py 项目： bot190/MinecraftAnalysis

def main(world_folder, replacement_file_name):
    global replacements
    world = nbt.world.WorldFolder(world_folder)
    logger = configure_logging()
    logger.info("Starting processing of %s", world_folder)
    if not isinstance(world, nbt.world.AnvilWorldFolder):
        logger.error("%s is not an Anvil world" % (world_folder))
        return 65 # EX_DATAERR
    if replacement_file_name != None:
        logger.info("Using Replacements file: %s", replacement_file_name)
        with open(replacement_file_name, 'r') as replacement_file:
            replacements = json.load(replacement_file)
    # get list of region files, going to pass this into function to process region
    region_files = world.get_regionfiles();
    
    # Parallel
    q = Queue()
    lp = threading.Thread(target=logger_thread, args=[q])
    lp.start()
    p = Pool(initializer=process_init, initargs=[q,replacements], maxtasksperchild=1)
    region_data = p.map(process_region, region_files)
    # Map has finished up, lets close the logging QUEUE
    q.put(None)
    lp.join()
    
    # Not Parallel
#     region_data = map(process_region, region_files)
    
    # Write output data
    write_block_data(region_data,"output.txt")
    return 0

示例#8

0

显示文件

文件： mp_workers.py 项目： CRYPTOlab/python-resin

def test():
    NUMBER_OF_PROCESSES = 4
    TASKS1 = [(mul, (i, 7)) for i in range(20)]
    TASKS2 = [(plus, (i, 8)) for i in range(10)]

    # Create queues
    task_queue = Queue()
    done_queue = Queue()

    # Submit tasks
    for task in TASKS1:
        task_queue.put(task)

    # Start worker processes
    for i in range(NUMBER_OF_PROCESSES):
        Process(target=worker, args=(task_queue, done_queue)).start()

    # Get and print results
    print 'Unordered results:'
    for i in range(len(TASKS1)):
        print '\t', done_queue.get()

    # Add more tasks using `put()`
    for task in TASKS2:
        task_queue.put(task)

    # Get and print some more results
    for i in range(len(TASKS2)):
        print '\t', done_queue.get()

    # Tell child processes to stop
    for i in range(NUMBER_OF_PROCESSES):
        task_queue.put('STOP')

示例#9

0

显示文件

文件： sniffer.py 项目： BwRy/sentrygun

def send_probe_requests(interface=None, ssid=None):

    # initialize shared memory
    results = Queue()

    # start sniffer before sending out probe requests
    p = Process(target=sniffer, args=(interface, results,))
    p.start()

    # give sniffer a chance to initialize so that we don't miss
    # probe responses
    time.sleep(3)

    # send out probe requests... sniffer will catch any responses
    ProbeReq(ssid=ssid, interface='wlp3s0')

    # make sure to get results from shared memory before allowing 
    # sniffer to join with parent process 
    probe_responses = results.get()

    # join sniffer with its parent process
    p.join()

    # return results
    return probe_responses

示例#10

0

显示文件

文件： challenge20.py 项目： colega/tuentichallenge2012

def main():
	# Threads we will use, don't change this because each thread calculates keys for 100 games exactly
	# (You can change this if you know how, I'm too euphoric now to do more flexibility)
	start = time();
	threads = 10;
	for line in sys.stdin:
		# Parsing the stdin
		encryptedMessage,encryptedGames = line.strip().split(':');
		encryptedGames = encryptedGames.split('~');
		# Queue with keys
		q = Queue();
		# Threads
		for i in range(10):
			p = Process(target=keysFinder, args=(encryptedGames[i*100:(i+1)*100],q));
			p.start();
		# Number of threads already finished
		finished = 0;
		keys = [];
		while finished < threads:
			keys += q.get();
			finished+=1;

		# From all keys, try which one decrypts a valid message
		em = binascii.unhexlify(encryptedMessage);
		found = False;
		for key in keys:
			x = AES.new(key);
			dec = x.decrypt(em);
			if (isCorrect(dec)):
				found = True;
				# Make unpadding and print. Voila!
				print removePadding(dec.strip());
	if (sys.argv[1] == 'benchmark'):
		print "Time elapsed: ",time()-start;

示例#11

0

显示文件

def genPairs(PNGMaps, compareMaps):
    pairA = []
    pairB = []
    # Maximum possible fitness
    totalFitness = len(compareMaps)
    threadsA = []
    threadsB = []
    # Thread safe way to get parent PNGMaps
    queueA = Queue()
    queueB = Queue()
    # Create a list of threads to get a PNGMap
    for listMap in PNGMaps:
        threadA = Process(target=randPair , args=(PNGMaps, compareMaps, totalFitness, queueA))
        threadB = Process(target=randPair , args=(PNGMaps, compareMaps, totalFitness, queueB))
        threadA.start()
        threadB.start()
        threadsA.append(threadA)
        threadsB.append(threadB)
    # Get the parents from the queues
    while not len(pairA) == len(PNGMaps):
        pairA.append(queueA.get())
    while not len(pairB) == len(PNGMaps):
        pairB.append(queueB.get())
    # Join the threads with the current one
    for thread in threadsA:
        thread.join()
    for thread in threadsB:
        thread.join()
    # Return the pair of PNGMaps
    return pairA, pairB

示例#12

0

显示文件

文件： test_virtwho.py 项目： mtulio/virt-who

    def test_same_report_filtering(self, fromConfig, fromOptions, getLogger):
        def fake_virts(logger, config):
            new_fake_virt = Mock()
            new_fake_virt.config.name = config.name
            return new_fake_virt

        fromConfig.side_effect = fake_virts
        options = Mock()
        options.interval = 0
        options.oneshot = True
        options.print_ = False
        options.log_dir = ''
        options.log_file = ''
        virtwho = VirtWho(self.logger, options, config_dir="/nonexistant")

        queue = Queue()
        # Create another report with same hash
        report2 = HostGuestAssociationReport(self.config, self.fake_report.association)
        self.assertEqual(self.fake_report.hash, report2.hash)

        def send(report):
            report.state = AbstractVirtReport.STATE_FINISHED
            # Put second report when the first is done
            queue.put(report2)
            return True
        virtwho.send = Mock(side_effect=send)
        virtwho.queue = queue
        virtwho.retry_after = 1
        virtwho.configManager.addConfig(self.config)
        queue.put(self.fake_report)
        virtwho.run()

        self.assertEquals(virtwho.send.call_count, 1)

示例#13

0

显示文件

文件： UpDown.py 项目： PlanetHunt/nasaeol

class UpDown:

    def __init__(self, down_workers=2, up_workers=2, db=None):
        self.down_workers_num = down_workers
        self.up_workers_num = up_workers
        self.db = db
        self.base_url = "http://eol.jsc.nasa.gov/SearchPhotos/"
        self.down_workers = []
        self.up_workers = []
        self.to_upload = []
        self.q = Queue()

    def down_worker(self, download_url, image_id):
        """
        Download images and set the database after the download was complete.
        """
        down = ImageDownload(self.base_url + download_url)
        down.find_urls()
        if(down.dl()):
            self.db.update_image_downloaded(image_id, down.file_name)

    def up_worker(self, mission_id):
        """
        Check for images that are downloaded but not uploaded every minute.
        """
        while True:
            self.to_upload = self.db.get_to_upload(mission_id)
            print "No files to upload found!\n"
            if(len(list(self.to_upload)) > 0):
                print "Found a file to upload!\n"
                self.to_upload = list(self.db.get_to_upload(mission_id))
                self.q.put(self.to_upload)
            time.sleep(60)

示例#14

0

显示文件

文件： horizon-agent.py 项目： B-Rich/skyline

    def run(self):
        logger.info('starting horizon agent')
        listen_queue = Queue(maxsize=settings.MAX_QUEUE_SIZE)
        pid = getpid()

        #If we're not using oculus, don't bother writing to mini
        try:
            skip_mini = True if settings.OCULUS_HOST == '' else False
        except Exception:
            skip_mini = True

        # Start the workers
        for i in range(settings.WORKER_PROCESSES):
            if i == 0:
                Worker(listen_queue, pid, skip_mini, canary=True).start()
            else:
                Worker(listen_queue, pid, skip_mini).start()

        # Start the listeners
        Listen(settings.PICKLE_PORT, listen_queue, pid, type="pickle").start()
        Listen(settings.UDP_PORT, listen_queue, pid, type="udp").start()

        # Start the roomba
        Roomba(pid, skip_mini).start()

        # Warn the Mac users
        try:
            listen_queue.qsize()
        except NotImplementedError:
            logger.info('WARNING: Queue().qsize() not implemented on Unix platforms like Mac OS X. Queue size logging will be unavailable.')

        # Keep yourself occupied, sucka
        while 1:
            time.sleep(100)

示例#15

0

显示文件

文件： test_virtwho.py 项目： mtulio/virt-who

    def test_report_hash_added_after_send(self, fromConfig, fromOptions, getLogger):
        # Side effect for fromConfig
        def fake_virts(logger, config):
            new_fake_virt = Mock()
            new_fake_virt.config.name = config.name
            return new_fake_virt

        fromConfig.side_effect = fake_virts
        options = Mock()
        options.interval = 0
        options.oneshot = True
        options.print_ = False
        options.log_file = ''
        options.log_dir = ''
        virtwho = VirtWho(self.logger, options, config_dir="/nonexistant")

        def send(report):
            report.state = AbstractVirtReport.STATE_FINISHED
            return True
        virtwho.send = Mock(side_effect=send)
        queue = Queue()
        virtwho.queue = queue
        virtwho.retry_after = 1
        virtwho.configManager.addConfig(self.config)
        virtwho.configManager.addConfig(self.second_config)
        queue.put(self.fake_report)
        queue.put(self.fake_domain_list)
        virtwho.run()

        self.assertEquals(virtwho.send.call_count, 2)
        self.assertEqual(virtwho.last_reports_hash[self.config.name], self.fake_report.hash)
        self.assertEqual(virtwho.last_reports_hash[self.second_config.name], self.fake_domain_list.hash)

示例#16

0

显示文件

文件： horizon-agent.py 项目： ftdysa/skyline

    def run(self):
        logger.info('starting horizon agent')
        listen_queue = Queue(maxsize=settings.MAX_QUEUE_SIZE)
        pid = getpid()

        # Start the workers
        for i in range(settings.WORKER_PROCESSES):
            Worker(listen_queue, pid).start()

        # Start the listeners
        Listen(settings.PICKLE_PORT, listen_queue, pid, type="pickle").start()
        Listen(settings.UDP_PORT, listen_queue, pid, type="udp").start()

        # Start the roomba
        Roomba(pid).start()

        # Warn the Mac users
        try:
            listen_queue.qsize()
        except NotImplementedError:
            logger.info('WARNING: Queue().qsize() not implemented on Unix platforms like Mac OS X. Queue size logging will be unavailable.')

        # Keep yourself occupied, sucka
        while 1:
            time.sleep(100)

示例#17

0

显示文件

文件： __init__.py 项目： yk/fuel

class BackgroundProcess(object):
    """A background process that reads batches and stores them in a queue.

    The :meth:`main` method needs to be called in order to start reading
    batches into the queue. Note that this process will run infinitely;
    start it as a :attr:`~multiprocessing.Process.daemon` to make sure it
    will get killed when the main process exits.

    Parameters
    ----------
    data_stream : :class:`.DataStream` or :class:`Transformer`
        The data stream from which to read batches.
    max_batches : int
        The maximum number of batches to store in the queue. If reached,
        the process wil block until a batch is popped from the queue.

    """
    def __init__(self, data_stream, max_batches):
        self.data_stream = data_stream
        self.batches = Queue(max_batches)
        self.run_background = True

    def main(self):
        while True:
            iterator = self.data_stream.get_epoch_iterator()
            for batch in iterator:
                self.batches.put(batch)
            self.batches.put(StopIteration)

    def get_next_data(self):
        return self.batches.get()

示例#18

0

显示文件

文件： printer_status.py 项目： 4711/status.futurice.com

def main():
    register_openers()

    printers = local_settings.PRINTERS
    statuses = []
    threads = []
    queue = Queue()
    for item in printers:
        p = multiprocessing.Process(target=clientprog, args=(queue, item))
        p.start()
        threads.append(p)
    for item in threads:
        try:
            item.join()
        except:
            pass
    while not queue.empty():
        statuses.append(queue.get())

    if len(statuses) == 0:
        print "Failed to connect to any printer"
        return
    data = {"printers": statuses, "timestamp": time.time()}
    json.dump(data, open("statuses.json", "w"))
    send(settings.UPLOAD_DESTINATION, "statuses.json")

示例#19

0

显示文件

文件： neo.py 项目： vtphan/neo

class TaskQueue:
    N = 4
    symb = string.ascii_letters + string.digits
    
    def __init__(self):
        self.tasks = Queue()
        self.done = Queue()
        self.results = {}
        self.processes = []
        for i in range(TaskQueue.N):
            self.processes.append(Process(target=self.run_tasks))
            self.processes[-1].start()
        threading.Thread(target=self.collect_results).start()

    def add(self, f, args):
        id = ''.join(random.choice(TaskQueue.symb) for i in range(15))
        self.tasks.put((id, f,args))
        return id

    def get(self, id):
        return self.results.pop(id, '_NotFound_')
            
    def run_tasks(self):
        for id, func, args in iter(self.tasks.get, 'STOP'):
            result = func(*args)
            self.done.put((id,result))

    def collect_results(self):
        for id, r in iter(self.done.get, 'STOP'):
            self.results[id] = r

示例#20

0

显示文件

文件： parcalc.py 项目： digideskio/Elastic

def ParCalculate(systems,calc,cleanup=True,block=True,prefix="Calc_"):
    '''
    Run calculators in parallel for all systems. 
    Calculators are executed in isolated processes and directories.
    The resulting objects are returned in the list (one per input system).
    '''

    if type(systems) != type([]) :
        sysl=[systems]
    else :
        sysl=systems

    if block :
        iq=Queue(len(sysl)+1)
        oq=Queue(len(sysl)+1)
            
        # Create workers    
        for s in sysl:
            __PCalcProc(iq, oq, calc, prefix=prefix, cleanup=cleanup).start()

        # Put jobs into the queue
        for n,s in enumerate(sysl):
            iq.put([n,s])
            # Protection against too quick insertion
            time.sleep(0.2)
        
        if verbose : 
            print("Workers started:", len(sysl))
        
       # Collect the results
        res=[]
        while len(res)<len(sysl) :
            n,s=oq.get()
            res.append([n,s])
            #print("Got from oq:", n, s.get_volume(), s.get_pressure())
    else :
        # We do not need the multiprocessing complications for non-blocking 
        # workers. We just run all in sequence.
        basedir=os.getcwd()
        res=[]
        for n,s in enumerate(sysl):
            s.set_calculator(copy.deepcopy(calc))
            s.get_calculator().block=block
            place=tempfile.mkdtemp(prefix=prefix, dir=basedir)
            os.chdir(place)
            s.get_calculator().working_dir=place
            #print("Start at :", place)
            if hasattr(calc, 'name') and calc.name=='Siesta':
                s.get_potential_energy()
            else:
                s.get_calculator().calculate(s)
            os.chdir(basedir)
            #print("Submited", s.get_calculator().calc_finished(), os.getcwd())
            # Protection against too quick insertion
            time.sleep(0.2)
            res.append([n,s])
        if verbose : 
            print("Workers started:", len(sysl))
            
    return [r for ns,s in enumerate(sysl) for nr,r in res if nr==ns]

示例#21

0

显示文件

文件： classifier_get_feature.py 项目： hungminhtran/caculate-readability-by-wordnet

def getFeatureMultiprocessing(subProcFunc, blwFile, outputFile, funcArgs, keyword=['Vietnamese_by_catalog', 'ppVietnamese_by_catalog']):
    START_TIME = time.time()
    # getFreqWordsForFileFromDict(['data/ppVietnamese_by_catalog/Easy/ct24/ct24 (100).txt',12.35,3, 4], 'data/TanSoTu.txt')
    # getDataNFeatureFromFile('test_data.txt', 'output/test_Vietnamese_output_classifier.csv', 'test')
    # X3 = getDataNFeatureFromFile('Difficult_data.txt', 'output/vietnamesewn_Difficult_output.csv', 3)
    # X1 = getDataNFeatureFromFile('Easy_data.txt','output/vietnamesewn_Easy_output.csv', 1)
    # X2 = getDataNFeatureFromFile('Normal_data.txt','output/vietnamesewn_Normal_output.csv', 2)
    _tempfile = open(blwFile, 'r')
    temp = _tempfile.read().splitlines()
    _tempfile.close()
    filesQueue = Queue()
    RESULT_QUEUE = Queue()
    for i in range(1, len(temp)):
            temp[i] = temp[i].split(',')
            temp[i][0] = re.sub(keyword[0], keyword[1], temp[i][0])
            if not keyword[0] == '' and (not temp[i][0].find(keyword[-1]) > 0):
                print('[ERROR] processing ', temp[i][0])
                print('sub', keyword[0], keyword[-1], re.sub(keyword[0], keyword[-1], temp[i][0]))
                return
            filesQueue.put(temp[i])
    PROCESS_LOCK = Lock()
    myProcess = []
    for processID in range(MAX_PROCESS):
        myProcess.append(Process(target=getDataNFeatureFromFileForAProc, args=(PROCESS_LOCK, RESULT_QUEUE, filesQueue, subProcFunc, funcArgs)))
    myProcess.append(Process(target=writeOutResult, args=(RESULT_QUEUE, outputFile)))

    for _process in myProcess:
        _process.start()
    for _process in myProcess:
        _process.join()
    print('total runtime:', time.time() - START_TIME)

示例#22

0

显示文件

文件： jobpool.py 项目： ANSSI-FR/polichombr

class JobPool(object):

    """
    Pool container.
    """
    pool = None
    message_queue = None

    def __init__(self, max_instances=4):
        self.message_queue = Queue()
        self.pool = Pool(max_instances, execute_task, (self.message_queue,))
        atexit.register(self.clear)

    def add_analysis(self, analysis):
        """
        Add analysis to the pool.
        """
        analysis.set_started()
        self.message_queue.put(analysis)

    def clear(self):
        """
        Pool cleanup.
        """
        self.pool.terminate()
        self.pool.join()

示例#23

0

显示文件

文件： client_socket_tests.py 项目： schalekamp/ibapipy

 def test_req_all_open_orders(self):
     result_queue = Queue()
     class MockClientSocket(ClientSocket):
         def __init__(self):
             ClientSocket.__init__(self)
         def open_order(self, req_id, contract, order):
             result_queue.put(req_id)
             result_queue.put(contract)
             result_queue.put(order)
         def open_order_end(self):
             result_queue.put('open_order_end')
         def order_status(self, req_id, status, filled, remaining,
                          avg_fill_price, perm_id, parent_id,
                          last_fill_price, client_id, why_held):
             result_queue.put(req_id)
             result_queue.put(status)
             result_queue.put(filled)
             result_queue.put(remaining)
             result_queue.put(avg_fill_price)
             result_queue.put(perm_id)
             result_queue.put(parent_id)
             result_queue.put(last_fill_price)
             result_queue.put(client_id)
             result_queue.put(why_held)
     client = MockClientSocket()
     client.connect()
     client.req_all_open_orders()
     while True:
         result = result_queue.get()
         self.assertIsNotNone(result)
         if result == 'open_order_end':
             break
     client.disconnect()

示例#24

0

显示文件

文件： multiproc.py 项目： KeNJiKunG/E-Tipitaka-for-PC

class MultiSegmentWriter(IndexWriter):
    def __init__(self, index, procs=2, **writerargs):
        self.index = index
        self.lock = index.storage.lock(index.indexname + "_LOCK")
        self.tasks = []
        self.postingqueue = Queue()
        #self.resultqueue = Queue()
        
        names = [index._next_segment_name() for _ in xrange(procs)]
        
        self.tasks = [SegmentWritingTask(index.storage, index.indexname,
                                         segname, writerargs, self.postingqueue)
                      for segname in names]
        for task in self.tasks:
            task.start()
        
    def add_document(self, **args):
        self.postingqueue.put(args)
        
    def cancel(self):
        for task in self.tasks:
            task.cancel()
        self.lock.release()
        
    def commit(self):
        procs = len(self.tasks)
        for _ in xrange(procs):
            self.postingqueue.put(None)
        for task in self.tasks:
            print "Joining", task
            task.join()
            self.index.segments.append(task.get_segment())
        self.index.commit()
        self.lock.release()

示例#25

0

显示文件

文件： main.py 项目： charsyam/pythoncrawl

def f(idx, q,r):
    path = "data%s"%(idx)
    os.makedirs(path)
    while True:
        item = q.get()
        if( item.item_type == ITEM_QUIT ):
            break;

        count = 0
        localQueue = Queue()
        current = item.data
        while True:
            print current
            fo = urlopen(current)
            data = fo.read()
            name = "%s/%s"%(path,count)
            fw = open( name, "w" )
            count = count + 1
            fw.write(data)
            fw.close()
            fo.close()
            p = MyHTMLParser()
            try:
                p.feed(data)
            except:
                pass

            for href in p.hrefs:
                print item.data, ": ", href

            try:
                current = localQueue.get_nowait()
            except:
                break;

示例#26

0

显示文件

文件： LinearCRF2.py 项目： liuminglu19870419/python_crf

def likelihood_mp_simple(seqlens,fss,uon,bon,theta,seqnum,K,ufnum,bfnum,regtype,sigma):
    global _gradient
    grad = numpy.array(fss,copy=True)  # data distribuition
    likelihood = numpy.dot(fss,theta)
    que1 = Queue() # for the likihood output
    que2 = Queue() # for the gradient output
    np = 0
    subprocesses = []
    corenum=multiprocessing.cpu_count()
    #corenum=1
    if corenum>1:
        chunk=seqnum/corenum+1
    else:
        chunk=seqnum
    starti=0
    while starti < (seqnum):
        endi=starti+chunk
        if endi>seqnum:
            endi=seqnum    
        p = Process(target=likelihoodthread_simple, 
            args=(seqlens[starti:endi],uon[starti:endi],bon[starti:endi],theta,K,ufnum,bfnum,que1,que2))
        p.start()
        np+=1
        #print 'delegated %s:%s to subprocess %s' % (starti, endi, np)
        subprocesses.append(p)
        starti += chunk
    for i in range(np):
        likelihood += que1.get()
    for i in range(np):
        grad += que2.get()
    while subprocesses:
        subprocesses.pop().join()
    grad -= regularity_deriv(theta,regtype,sigma)
    _gradient = grad
    return likelihood - regularity(theta,regtype,sigma)

示例#27

0

显示文件

文件： updater.py 项目： KN2C/pyroboime

class Updater(Process):

    def __init__(self, maxsize=15):
        Process.__init__(self)
        #self.queue = Queue(maxsize)
        self.queue = Queue()
        self.queue_lock = Lock()
        self._exit = Event()

    def run(self):
        while not self._exit.is_set():
            #with self.queue_lock:
            self.queue.put(self.receive())
            #self.queue.put_nowait(self.receive())
            #if self.queue.full():
            #    try:
            #        self.queue.get_nowait()
            #    except:
            #        pass

    def stop(self):
        self._exit.set()
        # This leaves the process hanging on Windows
        #self.join(STOP_TIMEOUT)
        if self.is_alive():
            #TODO make a nicer warning
            print 'Terminating updater:', self
            self.terminate()

    def receive(self):
        raise NotImplementedError

示例#28

0

显示文件

文件： test_hdlcc_server.py 项目： suoto/hdlcc

    def test():

        queue = Queue()

        proc = Process(target=doNothing, args=(queue, ))
        proc.start()

        _logger.info("Started dummy process with PID %d", proc.pid)
        startCodeCheckerServerAttachedToPid(proc.pid)
        time.sleep(3)
        _logger.info("Allowing the dummy process to finish")
        queue.put(1)
        proc.join()

        if utils.isProcessRunning(proc.pid):
            _logger.warning("Dummy process %d was still running", proc.pid)
            proc.terminate()
            time.sleep(1)
            it.assertFalse(utils.isProcessRunning(proc.pid),
                           "Process %d is still running after terminating "
                           "it!" % proc.pid)

        time.sleep(1)
        _logger.info("Server should have died by now")

        with it.assertRaises(requests.ConnectionError):
            requests.post(it._url + '/get_diagnose_info')

示例#29

0

显示文件

文件： LinearCRF2.py 项目： liuminglu19870419/python_crf

def likelihood_multithread_O(seqlens,fss,uon,bon,theta,seqnum,K,ufnum,bfnum):   # multithread version of likelihood
    '''conditional log likelihood log p(Y|X)'''
    likelihood = numpy.dot(fss,theta)
    thetab=theta[0:bfnum]
    thetau=theta[bfnum:]
    que = Queue()
    np = 0
    subprocesses = []
    corenum=multiprocessing.cpu_count()
    #corenum=1
    if corenum>1:
        chunk=seqnum/corenum+1
    else:
        chunk=seqnum
    starti=0
    while starti < (seqnum):
        endi=starti+chunk
        if endi>seqnum:
            endi=seqnum    
        p = Process(target=likelihoodthread, 
           args=(seqlens,uon,bon,thetau,thetab,seqnum,K,ufnum,bfnum,starti,endi,que))
        p.start()
        np+=1
        #print 'delegated %s:%s to subprocess %s' % (starti, endi, np)
        subprocesses.append(p)
        starti += chunk
    for i in range(np):
        likelihood += que.get()
    while subprocesses:
        subprocesses.pop().join()
    return likelihood - regularity(theta)

示例#30

0

显示文件

文件： jobpool.py 项目： ANSSI-FR/polichombr

class YaraJobPool(object):

    """
    Yara pool container.
    """
    pool = None
    message_queue = None

    def __init__(self, max_instances=3):
        self.message_queue = Queue()
        self.pool = Pool(max_instances, execute_yara_task,
                         (self.message_queue,))
        atexit.register(self.clear)

    def add_yara_task(self, yara_task):
        """
        Adds the yara task.
        """
        self.message_queue.put(yara_task)

    def clear(self):
        """
        Pool cleanup.
        """
        self.pool.terminate()
        self.pool.join()

示例#31

0

显示文件

文件： Problem494_multithread.py 项目： jiamingkong/ProjectEuler

    for i in generate("", "", 0, length):
        queue.put(i)


def worker(queue, counter):
    while True:
        if queue.empty() == True:
            break
        else:
            pattern = queue.get()
            if test_excessive_pattern(pattern) != None:
                counter.increment(2)
            else:
                counter.increment(1)


if __name__ == '__main__':
    # for leng in range(15,22):
    leng = 20
    c = Counter()
    queue = Queue(60000)
    writer = Process(target=feeder_thread, args=(queue, leng))
    writer.start()
    processes = [Process(target=worker, args=(queue, c)) for i in range(3)]
    for i in processes:
        i.daemon = True
        i.start()
    for i in processes:
        i.join()
    print((leng+1, c.value))

示例#32

0

显示文件

def genQueue(num):
    queries = query.genQueries(num)
    q = Queue()
    for i in queries:
        q.put(i)
    return q

示例#33

0

显示文件

文件： EventManager.py 项目： ptwz/mmm

    def make_queue(cls, queue_name):
        if queue_name in cls.queues:
            return

        q = Queue()
        cls.queues[queue_name] = q

示例#34

0

显示文件

文件： msrc12_skels_to_h5.py 项目： tobytoy/MotionGAN

if __name__ == '__main__':

    found_dirs = [file for file in glob('MicrosoftGestureDataset-RC/data/*.tagstream')]
    print('Processing %d files...' % (len(found_dirs)))

    data_set = 'MSRC12'
    h5file = h5.File(data_set+"v1.h5", "w")

    subjects = set()
    actions = set()
    max_frame_count = 0

    num_procs = 4

    # Create queues
    task_queue = Queue()
    done_queue = Queue()

    # Submit tasks
    for found_dir in found_dirs:
        task_queue.put(found_dir)

    # Start worker processes
    print('Spawning processes...')
    for _ in range(num_procs):
        Process(target=worker, args=(task_queue, done_queue)).start()

    # Get and print results
    print('Processed Files:')
    t = trange(len(found_dirs), dynamic_ncols=True)
    seq_num = 0

示例#35

0

显示文件

 def __init__(self, options):
     super(Combine, self).__init__(options)
     self.input_observer = Combine.InputObserver(self)
     self.notify_counter = Queue()
     self.notify_counter.put(0)
     self.queue = Queue()

示例#36

0

显示文件

文件： Xgjoin.py 项目： SDM-TIB/anapsid

class Xgjoin(Join):
    def __init__(self, vars):
        self.left_table = dict()
        self.right_table = dict()
        self.qresults = Queue()
        self.vars = vars

        # Second stage settings
        self.secondStagesTS = []
        self.lastSecondStageTS = float("-inf")
        self.timeoutSecondStage = 100000000
        self.sourcesBlocked = False

        # Main memory settings
        self.memorySize = 100000000  # Represents the main memory size (# tuples).OLD:Represents the main memory size (in KB).
        self.fileDescriptor_left = {}
        self.fileDescriptor_right = {}
        self.memory_left = 0
        self.memory_right = 0

    def instantiate(self, d):
        newvars = self.vars - set(d.keys())
        return Xgjoin(newvars)

    def instantiateFilter(self, instantiated_vars, filter_str):
        newvars = self.vars - set(instantiated_vars)
        return Xgjoin(newvars)

    def execute(self, left, right, out):
        # Executes the Xgjoin.
        #print "gjoin!"
        self.left = left
        self.right = right
        self.qresults = out

        # Initialize tuples.
        tuple1 = None
        tuple2 = None

        # Create alarm to go to stage 2.
        signal.signal(signal.SIGALRM, self.stage2)

        # Get the tuples from the queues.
        while (not (tuple1 == "EOF") or not (tuple2 == "EOF")):

            # Try to get and process tuple from left queue.
            if (not (tuple1 == "EOF")):
                try:
                    tuple1 = self.left.get(False)
                    #print "tuple1", tuple1
                    signal.alarm(self.timeoutSecondStage)
                    self.stage1(tuple1, self.left_table, self.right_table)
                    self.memory_right += 1
                except Empty:
                    # Empty: in tuple1 = self.left.get(False), when the queue is empty.
                    pass
                except TypeError:
                    # TypeError: in resource = resource + tuple[var], when the tuple is "EOF".
                    pass
                except IOError:
                    # IOError: when a tuple is received, but the alarm is fired.
                    self.sourcesBlocked = False
                    pass

            # Try to get and process tuple from right queue.
            if (not (tuple2 == "EOF")):
                try:
                    tuple2 = self.right.get(False)
                    #print "tuple2", tuple2
                    signal.alarm(self.timeoutSecondStage)
                    self.stage1(tuple2, self.right_table, self.left_table)
                    self.memory_left += 1
                except Empty:
                    # Empty: in tuple2 = self.right.get(False), when the queue is empty.
                    pass
                except TypeError:
                    # TypeError: in resource = resource + tuple[var], when the tuple is "EOF".
                    pass
                except IOError:
                    # IOError: when a tuple is received, but the alarm is fired.
                    self.sourcesBlocked = False
                    pass

            if (len(self.left_table) + len(self.right_table) >=
                    self.memorySize):
                self.flushRJT()
                #print "Flushed RJT!"

        # Turn off alarm to stage 2.
        signal.alarm(0)
        # Perform the last probes.
        self.stage3()

    def stage1(self, tuple, tuple_rjttable, other_rjttable):
        #print " Stage 1: While one of the sources is sending data."
        if (tuple != "EOF"):
            # Get the resource associated to the tuples.
            resource = ''
            #print(tuple)
            for var in self.vars:
                if var in tuple:
                    resource = resource + str(tuple[var])

            # Probe the tuple against its RJT table.
            probeTS = self.probe(tuple, resource, tuple_rjttable)

            # Create the records.
            record = Record(tuple, probeTS, time(), float("inf"))

            # Insert the record in the other RJT table.
            if resource in other_rjttable:
                other_rjttable.get(resource).updateRecords(record)
                other_rjttable.get(resource).setRJTProbeTS(probeTS)
                #other_rjttable.get(resource).append(record)
            else:
                tail = RJTTail(record, probeTS)
                other_rjttable[resource] = tail
                #other_rjttable[resource] = [record]

    def stage2(self, signum, frame):
        #print " Stage 2: When both sources become blocked."
        self.sourcesBlocked = True

        # Get common resources.
        resources1 = set(self.left_table.keys()) & set(
            self.fileDescriptor_right.keys())
        resources2 = set(self.right_table.keys()) & set(
            self.fileDescriptor_left.keys())

        # Iterate while there are common resources and both sources are blocked.
        while ((resources1 or resources2) and self.sourcesBlocked):

            if (resources1):
                resource = resources1.pop()
                rjts1 = self.left_table[resource].records
                for rjt1 in rjts1:
                    probed = self.probeFile(rjt1, self.fileDescriptor_right,
                                            resource, 2)
                    if (probed):
                        rjt1.probeTS = time()

            elif (resources2):
                resource = resources2.pop()
                rjts1 = self.right_table[resource].records
                for rjt1 in rjts1:
                    probed = self.probeFile(rjt1, self.fileDescriptor_left,
                                            resource, 2)
                    if (probed):
                        rjt1.probeTS = time()

        # End of second stage.
        self.lastSecondStageTS = time()
        self.secondStagesTS.append(self.lastSecondStageTS)

#        fd_left  = len(set(map(FileDescriptor.getSize, self.fileDescriptor_left.values())))
#        fd_right = len(set(map(FileDescriptor.getSize, self.fileDescriptor_right.values())))
#        count = 0
#
#        while ((count < fd_left + fd_right) and self.sourcesBlocked):
#
#            (largestRJTs, table) = self.getLargestRJTs(count)
#            #print "Largests RJT:", largestRJTs
#            common_resources = set(largestRJTs.keys()) & set(table.keys())
#            print "Common R:", common_resources
#            for resource in common_resources:
#                rjts1 = table[resource].records
#                for rjt1 in rjts1:
#                    self.probeFile(rjt1, largestRJTs, resource, 2)
#
#            count = count + 1
#
#        self.lastSecondStageTS = time()
#        self.secondStagesTS.append(self.lastSecondStageTS)
#        print "----------------END Second Stage!"

    def stage3(self):
        #print "Stage 3: When both sources sent all the data."

        # RJTs in main (left) memory are probed against RJTs in secondary (right) memory.
        common_resources = set(self.left_table.keys()) & set(
            self.fileDescriptor_right.keys())
        for resource in common_resources:
            rjts1 = self.left_table[resource].records
            for rjt1 in rjts1:
                self.probeFile(rjt1, self.fileDescriptor_right, resource, 3)

        # RJTs in main (right) memory are probed against RJTs in secondary (left) memory.
        common_resources = set(self.right_table.keys()) & set(
            self.fileDescriptor_left.keys())
        for resource in common_resources:
            rjts1 = self.right_table[resource].records
            for rjt1 in rjts1:
                self.probeFile(rjt1, self.fileDescriptor_left, resource, 3)

        # RJTs in secondary memory are probed to produce new results.
        common_resources = set(self.fileDescriptor_left.keys()) & set(
            self.fileDescriptor_right.keys())
        for resource in common_resources:
            file1 = open(self.fileDescriptor_right[resource].file.name)
            rjts1 = file1.readlines()
            for rjt1 in rjts1:
                (tuple1, probeTS1, insertTS1, flushTS1) = rjt1.split('|')
                self.probeFile(
                    Record(eval(tuple1), float(probeTS1), float(insertTS1),
                           float(flushTS1)), self.fileDescriptor_left,
                    resource, 3)
            file1.close()

        for resource in common_resources:
            file1 = open(self.fileDescriptor_left[resource].file.name)
            rjts1 = file1.readlines()
            for rjt1 in rjts1:
                (tuple1, probeTS1, insertTS1, flushTS1) = rjt1.split('|')
                self.probeFile(
                    Record(eval(tuple1), float(probeTS1), float(insertTS1),
                           float(flushTS1)), self.fileDescriptor_right,
                    resource, 3)
            file1.close()

        # Delete files from secondary memory.
        for resource in self.fileDescriptor_left:
            remove(self.fileDescriptor_left[resource].file.name)

        for resource in self.fileDescriptor_right:
            remove(self.fileDescriptor_right[resource].file.name)

        # Put EOF in queue and exit.
        self.qresults.put("EOF")

    def probe(self, tuple, resource, rjttable):
        # Probe a tuple against its corresponding table.

        probeTS = time()
        # If the resource is in table, produce results.
        if resource in rjttable:
            rjttable.get(resource).setRJTProbeTS(probeTS)
            list_records = rjttable[resource].records

            for record in list_records:
                res = {}
                res.update(record.tuple)
                #res = record.tuple.copy()
                res.update(tuple)
                self.qresults.put(res)
                #print hex(id(self)), "res:", res

        return probeTS

    def probeFile(self, rjt1, filedescriptor2, resource, stage):
        # Probe an RJT against its corresponding partition in secondary memory.

        file2 = open(filedescriptor2[resource].file.name, 'r')
        rjts2 = file2.readlines()
        st = ""
        probed = False

        for rjt2 in rjts2:
            (tuple2, probeTS2, insertTS2, flushTS2) = rjt2.split('|')
            probedStage1 = False
            probedStage2 = False

            #Checking Property 2: Probed in stage 2.
            for ss in self.secondStagesTS:
                if (float(flushTS2) < rjt1.insertTS and rjt1.insertTS < ss
                        and ss < rjt1.flushTS):
                    probedStage2 = True
                    break

            # Checking Property 1: Probed in stage 1.
            if (rjt1.probeTS < float(flushTS2)):
                probedStage1 = True

            # Produce result if it has not been produced.
            if (not (probedStage1) and not (probedStage2)):
                res = rjt1.tuple.copy()
                res.update(eval(tuple2))
                self.qresults.put(res)
                probed = True

            # Update probeTS of tuple2.
            stprobeTS = "%.40r" % (time())
            st = st + tuple2 + '|' + stprobeTS + '|' + insertTS2 + '|' + flushTS2

        file2.close()

        # Update file2 if in stage 2.
        if ((stage == 2) and probed):
            file2 = open(filedescriptor2[resource].file.name, 'w')
            file2.write(st)
            file2.close()

        return probed

    def flushRJT(self):
        # Flush an RJT to secondary memory.

        # Choose a victim from each partition (table).
        (resource_to_flush1, tail_to_flush1,
         least_ts1) = self.getVictim(self.left_table)
        (resource_to_flush2, tail_to_flush2,
         least_ts2) = self.getVictim(self.right_table)

        # Flush resource from left table.
        if (least_ts1 <= least_ts2):
            file_descriptor = self.fileDescriptor_left
            table = self.left_table
            resource_to_flush = resource_to_flush1
            tail_to_flush = tail_to_flush1

        # Flush resource from right table.
        if (least_ts2 < least_ts1):
            file_descriptor = self.fileDescriptor_right
            table = self.right_table
            resource_to_flush = resource_to_flush2
            tail_to_flush = tail_to_flush2

        # Create flush timestamp.
        flushTS = time()

        # Update file descriptor
        if (file_descriptor.has_key(resource_to_flush)):
            lentail = file_descriptor[resource_to_flush].size
            file = open(file_descriptor[resource_to_flush].file.name, 'a')
            file_descriptor.update({
                resource_to_flush:
                FileDescriptor(file,
                               len(tail_to_flush.records) + lentail, flushTS)
            })
        else:
            file = NamedTemporaryFile(suffix=".rjt", prefix="", delete=False)
            file_descriptor.update({
                resource_to_flush:
                FileDescriptor(file, len(tail_to_flush.records), flushTS)
            })

        # Flush tail in file.
        for record in tail_to_flush.records:
            sttuple = str(record.tuple)
            stprobeTS = "%.40r" % (record.probeTS)
            stinsertTS = "%.40r" % (record.insertTS)
            stflushTS = "%.40r" % (flushTS)

            file.write(sttuple + '|')
            file.write(stprobeTS + '|')
            file.write(stinsertTS + '|')
            file.write(stflushTS + '\n')
        file.close()

        # Delete resource from main memory.
        del table[resource_to_flush]

    def getVictim(self, table):
        # Selects a victim from a partition in main memory to flush.

        resource_to_flush = ""
        tail_to_flush = RJTTail([], 0)
        least_ts = float("inf")

        for resource, tail in table.iteritems():
            resource_ts = tail.rjtProbeTS
            if ((resource_ts < least_ts)
                    or (resource_ts == least_ts
                        and len(tail.records) > len(tail_to_flush.records))):
                resource_to_flush = resource
                tail_to_flush = tail
                least_ts = resource_ts

        #print "Victim chosen:", resource_to_flush, "TS:", least_ts, "LEN:", len(tail_to_flush.records)
        return (resource_to_flush, tail_to_flush, least_ts)

    def getLargestRJTs(self, i):
        # Selects the i-th largest RJT stored in secondary memory.

        sizes1 = set(
            map(FileDescriptor.getSize, self.fileDescriptor_left.values()))
        sizes2 = set(
            map(FileDescriptor.getSize, self.fileDescriptor_right.values()))

        sizes1 = list(sizes1)
        sizes2 = list(sizes2)

        sizes1.sort()
        sizes2.sort()

        if (sizes1 and sizes2):
            if (sizes1[len(sizes1) - 1] > sizes2[len(sizes2) - 1]):
                file_descriptor = self.fileDescriptor_left
                max_len = sizes1[len(sizes1) - (i + 1)]
                table = self.right_table
            else:
                file_descriptor = self.fileDescriptor_right
                max_len = sizes2[len(sizes2) - (i + 1)]
                table = self.left_table
        elif (sizes1):
            file_descriptor = self.fileDescriptor_left
            max_len = sizes1[len(sizes1) - (i + 1)]
            table = self.right_table
        else:
            file_descriptor = self.fileDescriptor_right
            max_len = sizes2[len(sizes2) - (i + 1)]
            table = self.left_table

        largestRJTs = {}

        for resource, fd in file_descriptor.iteritems():
            if (fd.size == max_len):
                largestRJTs[resource] = fd

        return (largestRJTs, table)

示例#37

0

显示文件

class FunctionInvoker:
    """
    Module responsible to perform the invocations against the compute backend
    """
    def __init__(self, config, log_level):
        self.config = config
        self.log_level = log_level
        storage_config = extract_storage_config(self.config)
        self.internal_storage = InternalStorage(storage_config)
        compute_config = extract_compute_config(self.config)

        self.remote_invoker = self.config['pywren'].get(
            'remote_invoker', False)
        self.rabbitmq_monitor = self.config['pywren'].get(
            'rabbitmq_monitor', False)
        if self.rabbitmq_monitor:
            self.rabbit_amqp_url = self.config['rabbitmq'].get('amqp_url')

        self.workers = self.config['pywren'].get('workers')
        logger.debug('Total workers: {}'.format(self.workers))

        self.compute_handlers = []
        cb = compute_config['backend']
        regions = compute_config[cb].get('region')
        if regions and type(regions) == list:
            for region in regions:
                new_compute_config = compute_config.copy()
                new_compute_config[cb]['region'] = region
                self.compute_handlers.append(Compute(new_compute_config))
        else:
            self.compute_handlers.append(Compute(compute_config))

        self.token_bucket_q = Queue()
        self.pending_calls_q = Queue()

        self.job_monitor = JobMonitor(self.config, self.internal_storage,
                                      self.token_bucket_q)

    def _invoke(self, job, call_id):
        """
        Method used to perform the actual invocation against the Compute Backend
        """
        payload = {
            'config': self.config,
            'log_level': self.log_level,
            'func_key': job.func_key,
            'data_key': job.data_key,
            'extra_env': job.extra_env,
            'execution_timeout': job.execution_timeout,
            'data_byte_range': job.data_ranges[int(call_id)],
            'executor_id': job.executor_id,
            'job_id': job.job_id,
            'call_id': call_id,
            'host_submit_time': time.time(),
            'pywren_version': __version__,
            'runtime_name': job.runtime_name,
            'runtime_memory': job.runtime_memory
        }

        # do the invocation
        start = time.time()
        compute_handler = random.choice(self.compute_handlers)
        activation_id = compute_handler.invoke(job.runtime_name,
                                               job.runtime_memory, payload)
        roundtrip = time.time() - start
        resp_time = format(round(roundtrip, 3), '.3f')

        if not activation_id:
            self.pending_calls_q.put((job, call_id))
            return

        logger.info(
            'ExecutorID {} | JobID {} - Function invocation {} done! ({}s) - Activation'
            ' ID: {}'.format(job.executor_id, job.job_id, call_id, resp_time,
                             activation_id))

        return call_id

    def run(self, job_description):
        """
        Run a job described in job_description
        """
        job = SimpleNamespace(**job_description)

        log_msg = (
            'ExecutorID {} | JobID {} - Starting function invocation: {}()  - Total: {} '
            'activations'.format(job.executor_id, job.job_id,
                                 job.function_name, job.total_calls))
        logger.info(log_msg)

        self.total_calls = job.total_calls

        for i in range(self.workers):
            self.token_bucket_q.put('#')

        for i in range(job.total_calls):
            call_id = "{:05d}".format(i)
            self.pending_calls_q.put((job, call_id))

        self.job_monitor.start_job_monitoring(job)

        invokers = []
        for inv_id in range(4):
            p = Process(target=self._run_process, args=(inv_id, ))
            invokers.append(p)
            p.daemon = True
            p.start()

        for p in invokers:
            p.join()

    def _run_process(self, inv_id):
        """
        Run process that implements token bucket scheduling approach
        """
        logger.info('Invoker process {} started'.format(inv_id))
        call_futures = []
        with ThreadPoolExecutor(max_workers=250) as executor:
            # TODO: Change pending_calls_q check
            while self.pending_calls_q.qsize() > 0:
                self.token_bucket_q.get()
                job, call_id = self.pending_calls_q.get()
                future = executor.submit(self._invoke, job, call_id)
                call_futures.append(future)

        logger.info('Invoker process {} finished'.format(inv_id))

示例#38

0

显示文件

from multiprocessing import Process, Queue
import time

#创建消息队列
q = Queue()


def fun1():
    for i in range(10):
        time.sleep(1)
        q.put((1, 2))


def fun2():
    for i in range(10):
        time.sleep(1.5)
        a, b = q.get()
        print("sum = ", a + b)


p1 = Process(target=fun1)
p2 = Process(target=fun2)

p1.start()
p2.start()
p1.join()
p2.join()

示例#39

0

显示文件

文件： PlanOLD.py 项目： MagicCream/MagicCream

class DependentOperator(object):
    '''
    Implements an operator that must be resolved with an instance.

    It receives as input the url of the server to be contacted,
    the filename that contains the query, the header size of the
    response message, the buffer size (length of the string) of the
    messages.

    The execute() method performs a semantic check. If the instance
    can be derreferenced from the source, it will contact the source.
    '''

    def __init__(self, server, query, vs, buffersize): #headersize ???
        self.server = server
        #self.filename = filename
        self.query = query
        #self.headersize = headersize
        self.buffersize = buffersize
        self.q = None
        self.q = Queue()
        self.atts = vs
        self.prefs = [] #query.prefs
        #self.atts = self.getQueryAttributes()
        #self.catalog = Catalog("/home/gabriela/Anapsid/src/Catalog/endpoints.desc")


    def execute(self, variables, instances, outputqueue):

        self.query = open(self.filename).read()
        # ? signal.signal(12, onSignal)
        # Replace in the query, the instance that is derreferenced.
        for i in range(len(variables)):
            self.query = string.replace(self.query, "?" + variables[i], "", 1)
            self.query = string.replace(self.query, "?" + variables[i], "<" + instances[i] + ">")

        # If the instance has no ?query. Example: DESCRIBE ---
        if (instances[0].find("sparql?query") == -1):
            pos = instances[0].find("/resource")
            pre = instances[0][0:pos]

            # Semantic check!.
            for server in self.server:
                prefixes = self.catalog.data[server]

                try:
                    # Contact the source.
                    pos = prefixes.index(pre)
                    self.p = Process(target=contactSource,
                              args=(server, self.query, self.headersize, self.buffersize, self.q,))
                    self.p.start()

#                    first_tuple = True

                    while True:
                        # Get the next item in queue.
                        res = self.q.get()

#                        #Get the variables from the answer
#                        if (first_tuple):
#                            vars = res.keys()
#                            outputqueue.put(vars)
#                            first_tuple = False

                        # Put the result into the output queue.
                        outputqueue.put(res)

                        # Check if there's no more data.
                        if (res == "EOF"):
                            break

                except ValueError:
                    # The source shouldn't be contacted.
                    outputqueue.put(self.atts)
                    outputqueue.put("EOF")


    def getQueryAttributes(self):
        # Read the query from file and apply lower case.
        query = open(self.filename).read()
        query2 = string.lower(query)

        # Extract the variables, separated by commas.
        # TODO: it supposes that there's no from clause.
        begin = string.find(query2, "select")
        begin = begin + len("select")
        end = string.find(query2, "where")
        listatts = query[begin:end]
        listatts = string.split(listatts, " ")

        # Iterate over the list of attributes, and delete "?".
        outlist = []
        for att in listatts:
            if ((len(att) > 0) and (att[0] == '?')):
                if ((att[len(att)-1] == ',') or (att[len(att)-1] == '\n')):
                    outlist = outlist + [att[1:len(att)-1]]
                else:
                    outlist = outlist + [att[1:len(att)]]

        return outlist

示例#40

0

显示文件

文件： Launcher.py 项目： 2forts/GENS

			else:
					results_dict[name] = "(N: %d ntray: %d ttray: %d)" % (value[0], value[1], value[2])

			tf = t2 - t1
			with open("tiempos_cpu.txt", mode='a') as file:
					file.write('Time %s:  %s. Started: %s. NODE %s, CORE %d\n' % (name, tf, t1, sNode, nCore))
		except:
            logger.info("%s could not access to the queue at this moment: %s" % (name, sys.exc_info()) )
			
	tb = time()
	tf = tb - ta
	with open("final_cpu.txt", mode='a') as file:
		file.write('Tiempo de %s:  %s. \n' % (name, tf))

if __name__ == '__main__':
	data_index = Queue()
	results = Queue()
	semaphore = Semaphore(1)

	manager = Manager() 
	results_dict = manager.dict()

	#This example executes the result of the GA for a case study of 7 sizes: 216, 512, 1000, 2197, 4096, 8000 and 15625
	#In this example, the cluster has 4 machines, "bullxual01" to "bullxual04", each one with 16 cores and 2 GPU's
	#Take into account that two CPU-cores are needed to handle the tow GPUs in each machine, so only 14 CPU-cores are available
	
	#Number of trajectories, time steps and times for sizes 1 to 7 for GPU and CPU
	ntray = 500
	ttray = 500
	tiemposGPU = (167, 172, 182, 193, 226, 336, 540)
	tiemposCPU = (430, 700, 1500, 2900, 5172, 9558, 22253)

示例#41

0

显示文件

文件： hw4.py 项目： MingjunWu/louPlus

#!/usr/bin/env python3
from multiprocessing import Process, Queue
import sys
import csv

INCOME_TAX_LOOKUP_TABLE = [(80000, 0.45, 13505), (55000, 0.35, 5505),
                           (35000, 0.30, 2755), (9000, 0.25, 1005),
                           (4500, 0.2, 555), (1500, 0.1, 105), (0, 0.03, 0)]

queue1 = Queue()
queue2 = Queue()


class Args(object):
    def __init__(self):
        self.args = sys.argv[1:]

    def file_after_option(self, option):
        try:
            index = self.args.index(option)
            return self.args[index + 1]
        except (ValueError, IndexError):
            print("Parameter Error")
            exit()

    @property
    def config_path(self):
        return self.file_after_option('-c')

    @property
    def userdata_path(self):

示例#42

0

显示文件

文件： boundary.py 项目： gaos1/skyline

class Boundary(Thread):
    def __init__(self, parent_pid):
        """
        Initialize the Boundary
        """
        super(Boundary, self).__init__()
        self.redis_conn = StrictRedis(unix_socket_path=REDIS_SOCKET)
        self.daemon = True
        self.parent_pid = parent_pid
        self.current_pid = getpid()
        self.boundary_metrics = Manager().list()
        self.anomalous_metrics = Manager().list()
        self.exceptions_q = Queue()
        self.anomaly_breakdown_q = Queue()

    def check_if_parent_is_alive(self):
        """
        Self explanatory
        """
        try:
            kill(self.current_pid, 0)
            kill(self.parent_pid, 0)
        except:
            exit(0)

    def send_graphite_metric(self, name, value):
        if settings.GRAPHITE_HOST != '':
            sock = socket.socket()

            try:
                sock.connect((settings.GRAPHITE_HOST, settings.CARBON_PORT))
            except socket.error:
                endpoint = '%s:%d' % (settings.GRAPHITE_HOST,
                                      settings.CARBON_PORT)
                logger.error('Cannot connect to Graphite at %s' % endpoint)
                return False

            sock.sendall('%s %s %i\n' % (name, value, time()))
            sock.close()
            return True

        return False

    def unique_noHash(self, seq):
        seen = set()
        return [x for x in seq if str(x) not in seen and not seen.add(str(x))]

    # This is to make a dump directory in /tmp if ENABLE_BOUNDARY_DEBUG is True
    # for dumping the metric timeseries data into for debugging purposes
    def mkdir_p(self, path):
        try:
            os.makedirs(path)
            return True
        except OSError as exc:
            # Python >2.5
            if exc.errno == errno.EEXIST and os.path.isdir(path):
                pass
            else:
                raise

    def spin_process(self, i, boundary_metrics):
        """
        Assign a bunch of metrics for a process to analyze.
        """
        # Determine assigned metrics
        bp = settings.BOUNDARY_PROCESSES
        bm_range = len(boundary_metrics)
        keys_per_processor = int(ceil(float(bm_range) / float(bp)))
        if i == settings.BOUNDARY_PROCESSES:
            assigned_max = len(boundary_metrics)
        else:
            # This is a skyine bug, the original skyline code uses 1 as the
            # beginning position of the index, python indices begin with 0
            # assigned_max = len(boundary_metrics)
            # This closes the etsy/skyline pull request opened by @languitar on 17 Jun 2014
            # https://github.com/etsy/skyline/pull/94 Fix analyzer worker metric assignment
            assigned_max = min(len(boundary_metrics), i * keys_per_processor)
        assigned_min = (i - 1) * keys_per_processor
        assigned_keys = range(assigned_min, assigned_max)

        # Compile assigned metrics
        assigned_metrics_and_algos = [
            boundary_metrics[index] for index in assigned_keys
        ]
        if ENABLE_BOUNDARY_DEBUG:
            logger.info('debug - printing assigned_metrics_and_algos')
            for assigned_metric_and_algo in assigned_metrics_and_algos:
                logger.info('debug - assigned_metric_and_algo - %s' %
                            str(assigned_metric_and_algo))

        # Compile assigned metrics
        assigned_metrics = []
        for i in assigned_metrics_and_algos:
            assigned_metrics.append(i[0])

        # unique unhashed things
        def unique_noHash(seq):
            seen = set()
            return [
                x for x in seq if str(x) not in seen and not seen.add(str(x))
            ]

        unique_assigned_metrics = unique_noHash(assigned_metrics)

        if ENABLE_BOUNDARY_DEBUG:
            logger.info('debug - unique_assigned_metrics - %s' %
                        str(unique_assigned_metrics))
            logger.info('debug - printing unique_assigned_metrics:')
            for unique_assigned_metric in unique_assigned_metrics:
                logger.info('debug - unique_assigned_metric - %s' %
                            str(unique_assigned_metric))

        # Check if this process is unnecessary
        if len(unique_assigned_metrics) == 0:
            return

        # Multi get series
        try:
            raw_assigned = self.redis_conn.mget(unique_assigned_metrics)
        except:
            logger.error("failed to mget assigned_metrics from redis")
            return

        # Make process-specific dicts
        exceptions = defaultdict(int)
        anomaly_breakdown = defaultdict(int)

        # Reset boundary_algortims
        all_boundary_algorithms = []
        for metric in BOUNDARY_METRICS:
            all_boundary_algorithms.append(metric[1])

        # The unique algorithms that are being used
        boundary_algorithms = unique_noHash(all_boundary_algorithms)
        if ENABLE_BOUNDARY_DEBUG:
            logger.info('debug - boundary_algorithms - %s' %
                        str(boundary_algorithms))

        discover_run_metrics = []

        # Distill metrics into a run list
        for i, metric_name, in enumerate(unique_assigned_metrics):
            self.check_if_parent_is_alive()

            try:
                if ENABLE_BOUNDARY_DEBUG:
                    logger.info('debug - unpacking timeseries for %s - %s' %
                                (metric_name, str(i)))
                raw_series = raw_assigned[i]
                unpacker = Unpacker(use_list=False)
                unpacker.feed(raw_series)
                timeseries = list(unpacker)
            except Exception as e:
                exceptions['Other'] += 1
                logger.error("redis data error: " + traceback.format_exc())
                logger.error("error: %e" % e)

            base_name = metric_name.replace(FULL_NAMESPACE, '', 1)

            # Determine the metrics BOUNDARY_METRICS metric tuple settings
            for metrick in BOUNDARY_METRICS:
                CHECK_MATCH_PATTERN = metrick[0]
                check_match_pattern = re.compile(CHECK_MATCH_PATTERN)
                pattern_match = check_match_pattern.match(base_name)
                metric_pattern_matched = False
                if pattern_match:
                    metric_pattern_matched = True
                    algo_pattern_matched = False
                    for algo in boundary_algorithms:
                        for metric in BOUNDARY_METRICS:
                            CHECK_MATCH_PATTERN = metric[0]
                            check_match_pattern = re.compile(
                                CHECK_MATCH_PATTERN)
                            pattern_match = check_match_pattern.match(
                                base_name)
                            if pattern_match:
                                if ENABLE_BOUNDARY_DEBUG:
                                    logger.info(
                                        "debug - metric and algo pattern MATCHED - "
                                        + metric[0] + " | " + base_name +
                                        " | " + str(metric[1]))
                                metric_expiration_time = False
                                metric_min_average = False
                                metric_min_average_seconds = False
                                metric_trigger = False
                                algorithm = False
                                algo_pattern_matched = True
                                algorithm = metric[1]
                                try:
                                    if metric[2]:
                                        metric_expiration_time = metric[2]
                                except:
                                    metric_expiration_time = False
                                try:
                                    if metric[3]:
                                        metric_min_average = metric[3]
                                except:
                                    metric_min_average = False
                                try:
                                    if metric[4]:
                                        metric_min_average_seconds = metric[4]
                                except:
                                    metric_min_average_seconds = 1200
                                try:
                                    if metric[5]:
                                        metric_trigger = metric[5]
                                except:
                                    metric_trigger = False
                                try:
                                    if metric[6]:
                                        alert_threshold = metric[6]
                                except:
                                    alert_threshold = False
                                try:
                                    if metric[7]:
                                        metric_alerters = metric[7]
                                except:
                                    metric_alerters = False
                            if metric_pattern_matched and algo_pattern_matched:
                                if ENABLE_BOUNDARY_DEBUG:
                                    logger.info(
                                        'debug - added metric - %s, %s, %s, %s, %s, %s, %s, %s, %s'
                                        % (str(i), metric_name,
                                           str(metric_expiration_time),
                                           str(metric_min_average),
                                           str(metric_min_average_seconds),
                                           str(metric_trigger),
                                           str(alert_threshold),
                                           metric_alerters, algorithm))
                                discover_run_metrics.append([
                                    i, metric_name, metric_expiration_time,
                                    metric_min_average,
                                    metric_min_average_seconds, metric_trigger,
                                    alert_threshold, metric_alerters, algorithm
                                ])

        if ENABLE_BOUNDARY_DEBUG:
            logger.info('debug - printing discover_run_metrics')
            for discover_run_metric in discover_run_metrics:
                logger.info('debug - discover_run_metrics - %s' %
                            str(discover_run_metric))
            logger.info('debug - build unique boundary metrics to analyze')

        # Determine the unique set of metrics to run
        run_metrics = unique_noHash(discover_run_metrics)

        if ENABLE_BOUNDARY_DEBUG:
            logger.info('debug - printing run_metrics')
            for run_metric in run_metrics:
                logger.info('debug - run_metrics - %s' % str(run_metric))

        # Distill timeseries strings and submit to run_selected_algorithm
        for metric_and_algo in run_metrics:
            self.check_if_parent_is_alive()

            try:
                raw_assigned_id = metric_and_algo[0]
                metric_name = metric_and_algo[1]
                base_name = metric_name.replace(FULL_NAMESPACE, '', 1)
                metric_expiration_time = metric_and_algo[2]
                metric_min_average = metric_and_algo[3]
                metric_min_average_seconds = metric_and_algo[4]
                metric_trigger = metric_and_algo[5]
                alert_threshold = metric_and_algo[6]
                metric_alerters = metric_and_algo[7]
                algorithm = metric_and_algo[8]

                if ENABLE_BOUNDARY_DEBUG:
                    logger.info('debug - unpacking timeseries for %s - %s' %
                                (metric_name, str(raw_assigned_id)))

                raw_series = raw_assigned[metric_and_algo[0]]
                unpacker = Unpacker(use_list=False)
                unpacker.feed(raw_series)
                timeseries = list(unpacker)

                if ENABLE_BOUNDARY_DEBUG:
                    logger.info('debug - unpacked OK - %s - %s' %
                                (metric_name, str(raw_assigned_id)))

                autoaggregate = False
                autoaggregate_value = 0

                # Determine if the namespace is to be aggregated
                if BOUNDARY_AUTOAGGRERATION:
                    for autoaggregate_metric in BOUNDARY_AUTOAGGRERATION_METRICS:
                        autoaggregate = False
                        autoaggregate_value = 0
                        CHECK_MATCH_PATTERN = autoaggregate_metric[0]
                        base_name = metric_name.replace(FULL_NAMESPACE, '', 1)
                        check_match_pattern = re.compile(CHECK_MATCH_PATTERN)
                        pattern_match = check_match_pattern.match(base_name)
                        if pattern_match:
                            autoaggregate = True
                            autoaggregate_value = autoaggregate_metric[1]

                if ENABLE_BOUNDARY_DEBUG:
                    logger.info(
                        'debug - BOUNDARY_AUTOAGGRERATION passed - %s - %s' %
                        (metric_name, str(autoaggregate)))

                if ENABLE_BOUNDARY_DEBUG:
                    logger.info(
                        'debug - analysing - %s, %s, %s, %s, %s, %s, %s, %s, %s, %s'
                        %
                        (metric_name, str(metric_expiration_time),
                         str(metric_min_average),
                         str(metric_min_average_seconds), str(metric_trigger),
                         str(alert_threshold), metric_alerters, autoaggregate,
                         autoaggregate_value, algorithm))
                    # Dump the the timeseries data to a file
                    timeseries_dump_dir = "/tmp/skyline/boundary/" + algorithm
                    self.mkdir_p(timeseries_dump_dir)
                    timeseries_dump_file = timeseries_dump_dir + "/" + metric_name + ".json"
                    with open(timeseries_dump_file, 'w+') as f:
                        f.write(str(timeseries))
                        f.close()

                # Check if a metric has its own unique BOUNDARY_METRICS alert
                # tuple, this allows us to paint an entire metric namespace with
                # the same brush AND paint a unique metric or namespace with a
                # different brush or scapel
                has_unique_tuple = False
                run_tupple = False
                boundary_metric_tuple = (base_name, algorithm,
                                         metric_expiration_time,
                                         metric_min_average,
                                         metric_min_average_seconds,
                                         metric_trigger, alert_threshold,
                                         metric_alerters)
                wildcard_namespace = True
                for metric_tuple in BOUNDARY_METRICS:
                    if not has_unique_tuple:
                        CHECK_MATCH_PATTERN = metric_tuple[0]
                        check_match_pattern = re.compile(CHECK_MATCH_PATTERN)
                        pattern_match = check_match_pattern.match(base_name)
                        if pattern_match:
                            if metric_tuple[0] == base_name:
                                wildcard_namespace = False
                            if not has_unique_tuple:
                                if boundary_metric_tuple == metric_tuple:
                                    has_unique_tuple = True
                                    run_tupple = True
                                    if ENABLE_BOUNDARY_DEBUG:
                                        logger.info('unique_tuple:')
                                        logger.info(
                                            'boundary_metric_tuple: %s' %
                                            str(boundary_metric_tuple))
                                        logger.info('metric_tuple: %s' %
                                                    str(metric_tuple))

                if not has_unique_tuple:
                    if wildcard_namespace:
                        if ENABLE_BOUNDARY_DEBUG:
                            logger.info('wildcard_namespace:')
                            logger.info('boundary_metric_tuple: %s' %
                                        str(boundary_metric_tuple))
                        run_tupple = True
                    else:
                        if ENABLE_BOUNDARY_DEBUG:
                            logger.info(
                                'wildcard_namespace: BUT WOULD NOT RUN')
                            logger.info('boundary_metric_tuple: %s' %
                                        str(boundary_metric_tuple))

                if ENABLE_BOUNDARY_DEBUG:
                    logger.info('WOULD RUN run_selected_algorithm = %s' %
                                run_tupple)

                if run_tupple:
                    # Submit the timeseries and settings to run_selected_algorithm
                    anomalous, ensemble, datapoint, metric_name, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger, alert_threshold, metric_alerters, algorithm = run_selected_algorithm(
                        timeseries, metric_name, metric_expiration_time,
                        metric_min_average, metric_min_average_seconds,
                        metric_trigger, alert_threshold, metric_alerters,
                        autoaggregate, autoaggregate_value, algorithm)
                    if ENABLE_BOUNDARY_DEBUG:
                        logger.info('debug - analysed - %s' % (metric_name))
                else:
                    anomalous = False
                    if ENABLE_BOUNDARY_DEBUG:
                        logger.info(
                            'debug - more unique metric tuple not analysed - %s'
                            % (metric_name))

                # If it's anomalous, add it to list
                if anomalous:
                    anomalous_metric = [
                        datapoint, metric_name, metric_expiration_time,
                        metric_min_average, metric_min_average_seconds,
                        metric_trigger, alert_threshold, metric_alerters,
                        algorithm
                    ]
                    self.anomalous_metrics.append(anomalous_metric)
                    # Get the anomaly breakdown - who returned True?
                    for index, value in enumerate(ensemble):
                        if value:
                            anomaly_breakdown[algorithm] += 1

            # It could have been deleted by the Roomba
            except TypeError:
                exceptions['DeletedByRoomba'] += 1
            except TooShort:
                exceptions['TooShort'] += 1
            except Stale:
                exceptions['Stale'] += 1
            except Boring:
                exceptions['Boring'] += 1
            except:
                exceptions['Other'] += 1
                logger.info("exceptions['Other'] traceback follows:")
                logger.info(traceback.format_exc())

        # Add values to the queue so the parent process can collate
        for key, value in anomaly_breakdown.items():
            self.anomaly_breakdown_q.put((key, value))

        for key, value in exceptions.items():
            self.exceptions_q.put((key, value))

    def run(self):
        """
        Called when the process intializes.
        """
        while 1:
            now = time()

            # Make sure Redis is up
            try:
                self.redis_conn.ping()
            except:
                logger.error(
                    'skyline can\'t connect to redis at socket path %s' %
                    settings.REDIS_SOCKET_PATH)
                sleep(10)
                self.redis_conn = StrictRedis(
                    unix_socket_path=settings.REDIS_SOCKET_PATH)
                continue

            # Discover unique metrics
            unique_metrics = list(
                self.redis_conn.smembers(settings.FULL_NAMESPACE +
                                         'unique_metrics'))

            if len(unique_metrics) == 0:
                logger.info(
                    'no metrics in redis. try adding some - see README')
                sleep(10)
                continue

            # Reset boundary_metrics
            boundary_metrics = []

            # Build boundary metrics
            for metric_name in unique_metrics:
                for metric in BOUNDARY_METRICS:
                    CHECK_MATCH_PATTERN = metric[0]
                    check_match_pattern = re.compile(CHECK_MATCH_PATTERN)
                    base_name = metric_name.replace(FULL_NAMESPACE, '', 1)
                    pattern_match = check_match_pattern.match(base_name)
                    if pattern_match:
                        if ENABLE_BOUNDARY_DEBUG:
                            logger.info(
                                "debug - boundary metric - pattern MATCHED - "
                                + metric[0] + " | " + base_name)
                        boundary_metrics.append([metric_name, metric[1]])

            if ENABLE_BOUNDARY_DEBUG:
                logger.info("debug - boundary metrics - " +
                            str(boundary_metrics))

            if len(boundary_metrics) == 0:
                logger.info(
                    'no metrics in redis. try adding some - see README')
                sleep(10)
                continue

            # Spawn processes
            pids = []
            for i in range(1, settings.BOUNDARY_PROCESSES + 1):
                if i > len(boundary_metrics):
                    logger.info(
                        'WARNING: skyline boundary is set for more cores than needed.'
                    )
                    break

                p = Process(target=self.spin_process,
                            args=(i, boundary_metrics))
                pids.append(p)
                p.start()

            # Send wait signal to zombie processes
            for p in pids:
                p.join()

            # Grab data from the queue and populate dictionaries
            exceptions = dict()
            anomaly_breakdown = dict()
            while 1:
                try:
                    key, value = self.anomaly_breakdown_q.get_nowait()
                    if key not in anomaly_breakdown.keys():
                        anomaly_breakdown[key] = value
                    else:
                        anomaly_breakdown[key] += value
                except Empty:
                    break

            while 1:
                try:
                    key, value = self.exceptions_q.get_nowait()
                    if key not in exceptions.keys():
                        exceptions[key] = value
                    else:
                        exceptions[key] += value
                except Empty:
                    break

            # Send alerts
            if settings.BOUNDARY_ENABLE_ALERTS:
                for anomalous_metric in self.anomalous_metrics:
                    datapoint = str(anomalous_metric[0])
                    metric_name = anomalous_metric[1]
                    base_name = metric_name.replace(FULL_NAMESPACE, '', 1)
                    expiration_time = str(anomalous_metric[2])
                    metric_trigger = str(anomalous_metric[5])
                    alert_threshold = int(anomalous_metric[6])
                    metric_alerters = anomalous_metric[7]
                    algorithm = anomalous_metric[8]
                    if ENABLE_BOUNDARY_DEBUG:
                        logger.info("debug - anomalous_metric - " +
                                    str(anomalous_metric))

                    # Determine how many times has the anomaly been seen if the
                    # ALERT_THRESHOLD is set to > 1 and create a cache key in
                    # redis to keep count so that alert_threshold can be honored
                    if alert_threshold == 0:
                        times_seen = 1
                        if ENABLE_BOUNDARY_DEBUG:
                            logger.info("debug - alert_threshold - " +
                                        str(alert_threshold))

                    if alert_threshold == 1:
                        times_seen = 1
                        if ENABLE_BOUNDARY_DEBUG:
                            logger.info("debug - alert_threshold - " +
                                        str(alert_threshold))

                    if alert_threshold > 1:
                        if ENABLE_BOUNDARY_DEBUG:
                            logger.info("debug - alert_threshold - " +
                                        str(alert_threshold))
                        anomaly_cache_key_count_set = False
                        anomaly_cache_key_expiration_time = (
                            int(alert_threshold) + 1) * 60
                        anomaly_cache_key = 'anomaly_seen.%s.%s' % (algorithm,
                                                                    base_name)
                        try:
                            anomaly_cache_key_count = self.redis_conn.get(
                                anomaly_cache_key)
                            if not anomaly_cache_key_count:
                                try:
                                    if ENABLE_BOUNDARY_DEBUG:
                                        logger.info(
                                            "debug - redis no anomaly_cache_key - "
                                            + str(anomaly_cache_key))
                                    times_seen = 1
                                    if ENABLE_BOUNDARY_DEBUG:
                                        logger.info(
                                            "debug - redis setex anomaly_cache_key - "
                                            + str(anomaly_cache_key))
                                    self.redis_conn.setex(
                                        anomaly_cache_key,
                                        anomaly_cache_key_expiration_time,
                                        packb(int(times_seen)))
                                    logger.info(
                                        'set anomaly seen key :: %s seen %s' %
                                        (anomaly_cache_key, str(times_seen)))
                                except Exception as e:
                                    logger.error('redis setex failed :: %s' %
                                                 str(anomaly_cache_key))
                                    logger.error("couldn't set key: %s" % e)
                            else:
                                if ENABLE_BOUNDARY_DEBUG:
                                    logger.info(
                                        "debug - redis anomaly_cache_key retrieved OK - "
                                        + str(anomaly_cache_key))
                                anomaly_cache_key_count_set = True
                        except:
                            if ENABLE_BOUNDARY_DEBUG:
                                logger.info(
                                    "debug - redis failed - anomaly_cache_key retrieval failed - "
                                    + str(anomaly_cache_key))
                            anomaly_cache_key_count_set = False

                        if anomaly_cache_key_count_set:
                            unpacker = Unpacker(use_list=False)
                            unpacker.feed(anomaly_cache_key_count)
                            raw_times_seen = list(unpacker)
                            times_seen = int(raw_times_seen[0]) + 1
                            try:
                                self.redis_conn.setex(
                                    anomaly_cache_key,
                                    anomaly_cache_key_expiration_time,
                                    packb(int(times_seen)))
                                logger.info(
                                    'set anomaly seen key :: %s seen %s' %
                                    (anomaly_cache_key, str(times_seen)))
                            except:
                                times_seen = 1
                                logger.error(
                                    'set anomaly seen key failed :: %s seen %s'
                                    % (anomaly_cache_key, str(times_seen)))

                    # Alert the alerters if times_seen > alert_threshold
                    if times_seen >= alert_threshold:
                        if ENABLE_BOUNDARY_DEBUG:
                            logger.info(
                                "debug - times_seen %s is greater than or equal to alert_threshold %s"
                                % (str(times_seen), str(alert_threshold)))
                        for alerter in metric_alerters.split("|"):
                            # Determine alerter limits
                            send_alert = False
                            alerts_sent = 0
                            if ENABLE_BOUNDARY_DEBUG:
                                logger.info("debug - checking alerter - %s" %
                                            alerter)
                            try:
                                if ENABLE_BOUNDARY_DEBUG:
                                    logger.info(
                                        "debug - determining alerter_expiration_time for settings"
                                    )
                                alerter_expiration_time_setting = settings.BOUNDARY_ALERTER_OPTS[
                                    'alerter_expiration_time'][alerter]
                                alerter_expiration_time = int(
                                    alerter_expiration_time_setting)
                                if ENABLE_BOUNDARY_DEBUG:
                                    logger.info(
                                        "debug - determined alerter_expiration_time from settings - %s"
                                        % str(alerter_expiration_time))
                            except:
                                # Set an arbitrary expiry time if not set
                                alerter_expiration_time = 160
                                if ENABLE_BOUNDARY_DEBUG:
                                    logger.info(
                                        "debug - could not determine alerter_expiration_time from settings"
                                    )
                            try:
                                if ENABLE_BOUNDARY_DEBUG:
                                    logger.info(
                                        "debug - determining alerter_limit from settings"
                                    )
                                alerter_limit_setting = settings.BOUNDARY_ALERTER_OPTS[
                                    'alerter_limit'][alerter]
                                alerter_limit = int(alerter_limit_setting)
                                alerter_limit_set = True
                                if ENABLE_BOUNDARY_DEBUG:
                                    logger.info(
                                        "debug - determined alerter_limit from settings - %s"
                                        % str(alerter_limit))
                            except:
                                alerter_limit_set = False
                                send_alert = True
                                if ENABLE_BOUNDARY_DEBUG:
                                    logger.info(
                                        "debug - could not determine alerter_limit from settings"
                                    )

                            # If the alerter_limit is set determine how many
                            # alerts the alerter has sent
                            if alerter_limit_set:
                                alerter_sent_count_key = 'alerts_sent.%s' % (
                                    alerter)
                                try:
                                    alerter_sent_count_key_data = self.redis_conn.get(
                                        alerter_sent_count_key)
                                    if not alerter_sent_count_key_data:
                                        if ENABLE_BOUNDARY_DEBUG:
                                            logger.info(
                                                "debug - redis no alerter key, no alerts sent for - "
                                                + str(alerter_sent_count_key))
                                        alerts_sent = 0
                                        send_alert = True
                                        if ENABLE_BOUNDARY_DEBUG:
                                            logger.info(
                                                "debug - alerts_sent set to %s"
                                                % str(alerts_sent))
                                            logger.info(
                                                "debug - send_alert set to %s"
                                                % str(sent_alert))
                                    else:
                                        if ENABLE_BOUNDARY_DEBUG:
                                            logger.info(
                                                "debug - redis alerter key retrieved, unpacking"
                                                + str(alerter_sent_count_key))
                                        unpacker = Unpacker(use_list=False)
                                        unpacker.feed(
                                            alerter_sent_count_key_data)
                                        raw_alerts_sent = list(unpacker)
                                        alerts_sent = int(raw_alerts_sent[0])
                                        if ENABLE_BOUNDARY_DEBUG:
                                            logger.info(
                                                "debug - alerter %s alerts sent %s "
                                                % (str(alerter),
                                                   str(alerts_sent)))
                                except:
                                    logger.info("No key set - %s" %
                                                alerter_sent_count_key)
                                    alerts_sent = 0
                                    send_alert = True
                                    if ENABLE_BOUNDARY_DEBUG:
                                        logger.info(
                                            "debug - alerts_sent set to %s" %
                                            str(alerts_sent))
                                        logger.info(
                                            "debug - send_alert set to %s" %
                                            str(send_alert))

                                if alerts_sent < alerter_limit:
                                    send_alert = True
                                    if ENABLE_BOUNDARY_DEBUG:
                                        logger.info(
                                            "debug - alerts_sent %s is less than alerter_limit %s"
                                            % (str(alerts_sent),
                                               str(alerter_limit)))
                                        logger.info(
                                            "debug - send_alert set to %s" %
                                            str(send_alert))

                            # Send alert
                            alerter_alert_sent = False
                            if send_alert:
                                cache_key = 'last_alert.boundary.%s.%s.%s' % (
                                    alerter, base_name, algorithm)
                                if ENABLE_BOUNDARY_DEBUG:
                                    logger.info(
                                        "debug - checking cache_key - %s" %
                                        cache_key)
                                try:
                                    last_alert = self.redis_conn.get(cache_key)
                                    if not last_alert:
                                        try:
                                            self.redis_conn.setex(
                                                cache_key,
                                                int(anomalous_metric[2]),
                                                packb(int(
                                                    anomalous_metric[0])))
                                            if ENABLE_BOUNDARY_DEBUG:
                                                logger.info(
                                                    'debug - key setex OK - %s'
                                                    % (cache_key))
                                            trigger_alert(
                                                alerter, datapoint, base_name,
                                                expiration_time,
                                                metric_trigger, algorithm)
                                            logger.info(
                                                'alert sent :: %s - %s - via %s - %s'
                                                % (base_name, datapoint,
                                                   alerter, algorithm))
                                            trigger_alert(
                                                "syslog", datapoint, base_name,
                                                expiration_time,
                                                metric_trigger, algorithm)
                                            logger.info(
                                                'alert sent :: %s - %s - via syslog - %s'
                                                % (base_name, datapoint,
                                                   algorithm))
                                            alerter_alert_sent = True
                                        except Exception as e:
                                            logger.error(
                                                'alert failed :: %s - %s - via %s - %s'
                                                % (base_name, datapoint,
                                                   alerter, algorithm))
                                            logger.error(
                                                "couldn't send alert: %s" %
                                                str(e))
                                            trigger_alert(
                                                "syslog", datapoint, base_name,
                                                expiration_time,
                                                metric_trigger, algorithm)
                                    else:
                                        if ENABLE_BOUNDARY_DEBUG:
                                            logger.info(
                                                "debug - cache_key exists not alerting via %s for %s is less than alerter_limit %s"
                                                % (alerter, cache_key))
                                        trigger_alert("syslog", datapoint,
                                                      base_name,
                                                      expiration_time,
                                                      metric_trigger,
                                                      algorithm)
                                        logger.info(
                                            'alert sent :: %s - %s - via syslog - %s'
                                            %
                                            (base_name, datapoint, algorithm))
                                except:
                                    trigger_alert("syslog", datapoint,
                                                  base_name, expiration_time,
                                                  metric_trigger, algorithm)
                                    logger.info(
                                        'alert sent :: %s - %s - via syslog - %s'
                                        % (base_name, datapoint, algorithm))
                            else:
                                trigger_alert("syslog", datapoint, base_name,
                                              expiration_time, metric_trigger,
                                              algorithm)
                                logger.info(
                                    'alert sent :: %s - %s - via syslog - %s' %
                                    (base_name, datapoint, algorithm))

                            # Update the alerts sent for the alerter cache key,
                            # to allow for alert limiting
                            if alerter_alert_sent and alerter_limit_set:
                                try:
                                    alerter_sent_count_key = 'alerts_sent.%s' % (
                                        alerter)
                                    new_alerts_sent = int(alerts_sent) + 1
                                    self.redis_conn.setex(
                                        alerter_sent_count_key,
                                        alerter_expiration_time,
                                        packb(int(new_alerts_sent)))
                                    logger.info('set %s - %s' %
                                                (alerter_sent_count_key,
                                                 str(new_alerts_sent)))
                                except:
                                    logger.error('failed to set %s - %s' %
                                                 (alerter_sent_count_key,
                                                  str(new_alerts_sent)))

                    else:
                        # Always alert to syslog, even if alert_threshold is not
                        # breached or if send_alert is not True
                        trigger_alert("syslog", datapoint, base_name,
                                      expiration_time, metric_trigger,
                                      algorithm)
                        logger.info('alert sent :: %s - %s - via syslog - %s' %
                                    (base_name, datapoint, algorithm))

            # Write anomalous_metrics to static webapp directory
            if len(self.anomalous_metrics) > 0:
                filename = path.abspath(
                    path.join(path.dirname(__file__), '..',
                              settings.ANOMALY_DUMP))
                with open(filename, 'w') as fh:
                    # Make it JSONP with a handle_data() function
                    anomalous_metrics = list(self.anomalous_metrics)
                    anomalous_metrics.sort(key=operator.itemgetter(1))
                    fh.write('handle_data(%s)' % anomalous_metrics)

            # Log progress
            logger.info('seconds to run    :: %.2f' % (time() - now))
            logger.info('total metrics     :: %d' % len(boundary_metrics))
            logger.info('total analyzed    :: %d' %
                        (len(boundary_metrics) - sum(exceptions.values())))
            logger.info('total anomalies   :: %d' %
                        len(self.anomalous_metrics))
            logger.info('exception stats   :: %s' % exceptions)
            logger.info('anomaly breakdown :: %s' % anomaly_breakdown)

            # Log to Graphite
            self.send_graphite_metric(
                'skyline.boundary.' + SERVER_METRIC_PATH + 'run_time',
                '%.2f' % (time() - now))
            self.send_graphite_metric(
                'skyline.boundary.' + SERVER_METRIC_PATH + 'total_analyzed',
                '%.2f' % (len(boundary_metrics) - sum(exceptions.values())))
            self.send_graphite_metric(
                'skyline.boundary.' + SERVER_METRIC_PATH + 'total_anomalies',
                '%d' % len(self.anomalous_metrics))
            self.send_graphite_metric(
                'skyline.boundary.' + SERVER_METRIC_PATH + 'total_metrics',
                '%d' % len(boundary_metrics))
            for key, value in exceptions.items():
                send_metric = 'skyline.boundary.' + SERVER_METRIC_PATH + 'exceptions.%s' % key
                self.send_graphite_metric(send_metric, '%d' % value)
            for key, value in anomaly_breakdown.items():
                send_metric = 'skyline.boundary.' + SERVER_METRIC_PATH + 'anomaly_breakdown.%s' % key
                self.send_graphite_metric(send_metric, '%d' % value)

            # Check canary metric
            raw_series = self.redis_conn.get(settings.FULL_NAMESPACE +
                                             settings.CANARY_METRIC)
            if raw_series is not None:
                unpacker = Unpacker(use_list=False)
                unpacker.feed(raw_series)
                timeseries = list(unpacker)
                time_human = (timeseries[-1][0] - timeseries[0][0]) / 3600
                projected = 24 * (time() - now) / time_human

                logger.info('canary duration   :: %.2f' % time_human)
                self.send_graphite_metric(
                    'skyline.boundary.' + SERVER_METRIC_PATH + 'duration',
                    '%.2f' % time_human)
                self.send_graphite_metric(
                    'skyline.boundary.' + SERVER_METRIC_PATH + 'projected',
                    '%.2f' % projected)

            # Reset counters
            self.anomalous_metrics[:] = []

            # Only run once per minute
            seconds_to_run = int((time() - now))
            if seconds_to_run < 60:
                sleep_for_seconds = 60 - seconds_to_run
            else:
                sleep_for_seconds = 0
            if sleep_for_seconds > 0:
                logger.info('sleeping for %s seconds' % sleep_for_seconds)
                sleep(sleep_for_seconds)

示例#43

0

显示文件

文件： trim_file.py 项目： xjyx/miRge

def trim_file(infile, adapter, outfile, threads=1, phred=33):
	read_queue = Queue()
	result_queue = Queue()
	trimmed_queue = Queue()
	workers = []
	def start_workers():
		for i in xrange(threads):
			worker = Worker(queue=read_queue, results=result_queue, phred64=phred==64, adapter=adapter)
			workers.append(worker)
			worker.start()
	writer = Writer(queue=result_queue, trimmed=trimmed_queue, outfile=outfile)
	writer.start()
	batch = []
	for index, read in enumerate(FastqReader(infile)):
		batch.append(read)
		if index < 1000 and phred == 33:
			if any([i for i in read.qualities if ord(i) > 74]):
				phred = 64
		if index % 10000 == 0:
			if not workers:
				start_workers()
			read_queue.put(batch)
			batch = []
	if not workers:
		start_workers()
	read_queue.put(batch)
	processed = index+1
	# poison pill to stop workers
	for i in xrange(threads):
		read_queue.put(None)

	for i in workers:
		i.join()

	# poison pill for writers
	result_queue.put(None)

	# wait for writing to finish
	writer.join()
	#print "Output done"

	trimmed_queue.put(None)

	kept_reads = sum([i for i in iter(trimmed_queue.get, None)])
	
	return (phred, processed, kept_reads)
	#with logfile as o:
	#	o.write('Starting reads: {0}\n'.format(processed))
	#	o.write('Processed reads: {0}\n'.format(kept_reads))
	#print ('{0}\n'.format(phred))

示例#44

0

显示文件

文件： PlanOLD.py 项目： MagicCream/MagicCream

class IndependentOperator(object):
    '''
    Implements an operator that can be resolved independently.

    It receives as input the url of the server to be contacted,
    the filename that contains the query, the header size of the
    of the messages.

    The execute() method reads tuples from the input queue and
    response message and the buffer size (length of the string)
    place them in the output queue.
    '''
    def __init__(self, query, tree, c, buffersize=16384):

        (e, sq, vs) = tree.getInfoIO(query)
        self.contact = c
        self.server = e
        self.query = query
        self.tree = tree
        self.query_str = sq
        self.vars = vs
        self.buffersize = buffersize
        self.cardinality = None
        self.joinCardinality = []

    def instantiate(self, d):
        new_tree = self.tree.instantiate(d)
        return IndependentOperator(self.query, new_tree, self.contact,
                                   self.buffersize)

    def getCardinality(self):
        if self.cardinality == None:
            self.cardinality = askCount(self.query, self.tree, set(), self.contact)
        return self.cardinality

    def getJoinCardinality(self, vars):
        c = None
        for (v, c2) in self.joinCardinality:
            if v == vars:
                c = c2
                break
        if c == None:
            if len(vars) == 0:
                c = self.getCardinality()
            else:
                c = askCount(self.query, self.tree, vars, self.contact)
            self.joinCardinality.append((vars, c))
        return c

    def allTriplesLowSelectivity(self):
        return self.tree.service.allTriplesLowSelectivity()

    def places(self):
        return self.tree.places()

    def constantNumber(self):
        return self.tree.constantNumber()

    def constantPercentage(self):
        return self.constantNumber()/self.places()

    def aux(self, n):
        return self.tree.aux(n)

    def execute(self, outputqueue):
        # Evaluate the independent operator.
        self.q = None
        self.q = Queue()
        self.p = Process(target=self.contact,
                         args=(self.server, self.query_str,
                               self.q, self.buffersize,))
        self.p.start()

        while True:
            # Get the next item in queue.
            res = self.q.get(True)
            # Put the result into the output queue.
            #print res
            outputqueue.put(res)

            # Check if there's no more data.
            if (res == "EOF"):
                break

    def __repr__(self):
        return str(self.tree)

示例#45

0

显示文件

文件： pspnet_serving.py 项目： neuroph12/python-socketio-cluster

import sshtunnel
import pysftp
import utils
import uuid
import json
import tqdm
import multiprocessing
from multiprocessing import Queue, Lock
import logging
# init tensorflow
from keras.backend.tensorflow_backend import set_session
from keras import backend as K
import tensorflow as tf
# init global lock
mutex = Lock()
mutex1 = Queue(1)
mutex2 = Queue(1)
mutex_data = None

# end init global lock


class task():
    """
    mainthread:

        True    : need to maintain the run() in the main thread to provide service
        False   : auto create process to provide service

    handler_type:

示例#46

0

显示文件

    import ROOT as ROOT
    from multiprocessing import Process, Queue
    import time, sys, os

    def input_thread(q, stdin):
        while True:
            print 'ROOT: '
            cmd = stdin.readline()
            q.put(cmd)

    def root(char):
        assert isinstance(char, str), "Argument must be string!"
        ROOT.gROOT.ProcessLine(char)

    if __name__ == '__main__':
        ___queue___ = Queue()
        ___newstdin___ = os.fdopen(os.dup(sys.stdin.fileno()))
        ___input_p___ = Process(target=input_thread,
                                args=(___queue___, ___newstdin___))
        ___input_p___.daemon = True
        ___input_p___.start()
        ___g___ = ROOT.gSystem.ProcessEvents
        try:
            while 1:
                if not ___queue___.empty():
                    ___cmd___ = ___queue___.get()
                    try:
                        exec(___cmd___, globals())
                    except:
                        print sys.exc_info()
                time.sleep(0.01)

示例#47

0

显示文件

文件： testqueue.py 项目： AmmRage/IPProxyPool

# coding:utf-8
from multiprocessing import Queue

try:
    q = Queue()
    q.get(timeout=5)
except BaseException as e:
    print
    '--' + str(e)

示例#48

0

显示文件

文件：进程队列queue.py 项目： hsyy673150343/PythonLearning

#！/usr/bin/env python
# -*- coding:utf8 -*-
# @TIME     :2019/3/20 18:32
# @Author   : 洪松
# @File     : 进程队列queue.py

from multiprocessing import Process, Queue


def f(q):
    q.put([42, 2, 'hello'])
    print('subprocess a id:', id(q))


if __name__ == '__main__':
    q = Queue()  #创建进程队列对象
    p_list = []
    print('main a id:', id(q))

    for i in range(3):
        p = Process(target=f, args=(q, ))
        p_list.append(p)
        p.start()

    print(q.get())
    print(q.get())
    print(q.get())

    for i in p_list:
        i.join()

示例#49

0

显示文件

文件： pydf.py 项目： LabCad/Sucuri-mog

class Scheduler(object):
    """
	Sucuri scheduler.
	"""
    TASK_TAG = 0
    TERMINATE_TAG = 1

    def __init__(self, graph, n_workers=1, mpi_enabled=True):
        """
		:param graph: DFGraph
			The dataflow graph.
		:param n_workers: int
			Number of workers used.
		:param mpi_enabled:
			Indicates if uses MPI or not.
		"""
        # self.taskq = Queue()  #queue where the ready tasks are inserted
        self.operq = Queue()

        self.graph = graph
        self.tasks = []
        worker_conns = []
        self.conn = []
        self.waiting = []  # queue containing idle workers
        self.n_workers = n_workers  # number of workers
        # keeps track of the number of tasks sent to each worker without a request from the worker (due to affinity)
        self.pending_tasks = [0] * n_workers
        for i in range(n_workers):
            sched_conn, worker_conn = Pipe()
            worker_conns += [worker_conn]
            self.conn += [sched_conn]
        self.workers = [
            Worker(self.graph, self.operq, worker_conns[i], i)
            for i in range(n_workers)
        ]

        if mpi_enabled:
            self.mpi_handle()
        else:
            self.mpi_rank = None

    def mpi_handle(self):
        """
		MPI implementation for the dataflow.
		"""
        from mpi4py import MPI
        comm = MPI.COMM_WORLD
        rank = comm.Get_rank()
        self.mpi_size = comm.Get_size()
        self.mpi_rank = rank
        self.n_slaves = self.mpi_size - 1
        self.keep_working = True

        if rank == 0:
            print "I am the master. There are %s mpi processes. (hostname = %s)" % (
                self.mpi_size, MPI.Get_processor_name())
            self.pending_tasks = [0] * self.n_workers * self.mpi_size
            self.outqueue = Queue()

            def mpi_input(inqueue):
                while self.keep_working:
                    msg = comm.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG)
                    # print "MPI Received opermsg from slave."
                    inqueue.put(msg)

            def mpi_output(outqueue):
                while self.keep_working:
                    task = outqueue.get()
                    if task is not None:  # task == None means termination
                        # print "MPI Sending task to slave node."
                        dest = task.workerid / self.n_workers  # destination mpi process
                        comm.send(task, dest=dest, tag=Scheduler.TASK_TAG)
                    else:
                        self.keep_working = False
                        mpi_terminate()

            def mpi_terminate():
                print "MPI TERMINATING"
                for i in xrange(0, self.mpi_size):
                    comm.send(None, dest=i, tag=Scheduler.TERMINATE_TAG)

            t_in = threading.Thread(target=mpi_input, args=(self.operq, ))
            t_out = threading.Thread(target=mpi_output, args=(self.outqueue, ))
        else:
            print "I am a slave. (hostname = %s)" % MPI.Get_processor_name()
            # slave
            self.inqueue = Queue()
            for worker in self.workers:
                worker.wid += rank * self.n_workers

            status = MPI.Status()

            def mpi_input(inqueue):
                while self.keep_working:
                    task = comm.recv(source=0, tag=MPI.ANY_TAG, status=status)
                    if status.Get_tag() == Scheduler.TERMINATE_TAG:
                        self.keep_working = False
                        print "MPI received termination."
                        self.terminate_workers(self.workers)
                    else:
                        # print "MPI Sending task to worker in slave."
                        workerid = task.workerid
                        connid = workerid % self.n_workers
                        self.conn[connid].send(task)
                self.operq.put(None)

            def mpi_output(outqueue):
                while self.keep_working:
                    msg = outqueue.get()
                    if msg is not None:
                        # print "MPI send opermsg to master."
                        comm.send(msg, dest=0, tag=0)

            t_in = threading.Thread(target=mpi_input, args=(self.inqueue, ))
            t_out = threading.Thread(target=mpi_output, args=(self.operq, ))

        threads = [t_in, t_out]
        self.threads = threads
        for t in threads:
            t.start()

    def propagate_op(self, oper):
        dst = self.graph.nodes[oper.dstid]

        dst.inport[oper.dstport] += [oper]
        args = dst.match()
        if args is not None:
            self.issue(dst, args)

    def check_affinity(self, task):
        node = self.graph.nodes[task.nodeid]
        if node.affinity is None:
            return None

        affinity = node.affinity[0]
        if len(node.affinity) > 1:
            node.affinity = node.affinity[1:] + [node.affinity[0]]
        return affinity

    def issue(self, node, args):
        #	print "Args %s " %args
        task = Task(node.f, node.id, args)
        self.tasks += [task]

    def all_idle(self, workers):
        # print [(w.idle, w.name) for w in workers]
        # print "All idle? %s" %reduce(lambda a, b: a and b, [w.idle for w in workers])
        if self.mpi_rank == 0:
            return len(self.waiting) == self.n_workers * self.mpi_size
        else:
            return len(self.waiting) == self.n_workers

    def terminate_workers(self, workers):
        print "Terminating workers %s %d %d" % (self.all_idle(
            self.workers), self.operq.qsize(), len(self.tasks))
        if self.mpi_rank == 0:
            self.outqueue.put(None)
            for t in self.threads:
                t.join()
        for worker in workers:
            worker.terminate()

    def start(self):
        """
		Starts the processing dataflow environment.
		"""
        operq = self.operq

        print "Roots %s" % [r for r in self.graph.nodes if len(r.inport) == 0]
        for root in [r for r in self.graph.nodes if len(r.inport) == 0]:
            task = Task(root.f, root.id)
            self.tasks += [task]

        for worker in self.workers:
            print "Starting %s" % worker.wid
            worker.start()

        if self.mpi_rank == 0 or self.mpi_rank is None:
            # it this is the leader process or if mpi is not being used
            print "Main loop"
            self.main_loop()

    def main_loop(self):
        tasks = self.tasks
        operq = self.operq
        workers = self.workers
        while len(tasks) > 0 or not self.all_idle(
                self.workers) or operq.qsize() > 0:
            opersmsg = operq.get()
            for oper in opersmsg:
                if oper.val is not None:
                    self.propagate_op(oper)

            wid = opersmsg[0].wid
            if wid not in self.waiting and opersmsg[0].request_task:
                if self.pending_tasks[wid] > 0:
                    self.pending_tasks[wid] -= 1
                else:
                    self.waiting += [
                        wid
                    ]  # indicate that the worker is idle, waiting for a task

            while len(tasks) > 0 and len(self.waiting) > 0:
                task = tasks.pop(0)
                wid = self.check_affinity(task)
                if wid is not None:
                    if wid in self.waiting:
                        self.waiting.remove(wid)
                    else:
                        self.pending_tasks[wid] += 1
                else:
                    wid = self.waiting.pop(0)
                # print "Got opermsg from worker %d" %wid
                if wid < self.n_workers:  # local worker
                    worker = workers[wid]

                    self.conn[worker.wid].send(task)
                else:
                    task.workerid = wid
                    self.outqueue.put(task)

        print "Waiting %s" % self.waiting
        self.terminate_workers(self.workers)

示例#50

0

显示文件

文件：进程queue.py 项目： taizilinger123/day9

from multiprocessing import Process,Queue
import threading
#import queue

# def f(q):
#     q.put([42, None, 'hello'])

def f(qq):
    qq.put([42, None, 'hello'])

if __name__ == '__main__':
    q = Queue()
    #p = threading.Thread(target=f,)
    p = Process(target=f, args=(q,))
    p.start()
    print(q.get())  # prints "[42, None, 'hello']"
    p.join()

示例#51

0

显示文件

    def calc_features(self,num_threads=1,block_size=3000):
        """
        Calculate the features for every observation using the appended Features
        instances. 

        num_threads: number of threads to run on
        block_size: number of seqeuences to put on a single process
        """
    
        # Create a list of feature functions... 
        feature_names = []
        self._feature_functions = [] 
        num_features = 0
        for e in self._features_engines:
            num_features += e.num_features
            feature_names.append(e.features)
            self._feature_functions.append(e.score)

        # Create a compiled list of feature names
        self._feature_names = np.concatenate(feature_names)
       
        # If enough threads are specified that only a few threads would start, 
        # make the block size smaller 
        if len(self._sequences)//num_threads < block_size:
            block_size = len(self._sequences)//num_threads + 20
        
        # Split squences in to blocks of block_size
        block_edges = []
        for i in range(0,len(self._sequences),block_size):
            block_edges.append(i)
        block_edges.append(len(self._sequences) - 1)

        # Start a process for each thread
        proc_list = []
        queue_list = []
        out = []

        # Go through each sequence
        for i in range(len(block_edges)-1):
            
            first_seq = block_edges[i]
            last_seq =  block_edges[i+1]

            queue_list.append(Queue())
            proc_list.append(Process(target=self._calc_features_on_thread,
                                     args=(first_seq,last_seq,queue_list[-1])))
            proc_list[-1].start()
           
            # If we've capped our number of threads, wait until one of the
            # processes finishes to move on 
            if (len(queue_list) == num_threads) or (i == len(block_edges) - 2):

                waiting = True
                while waiting:

                    # Go through queues
                    for j, q in enumerate(queue_list):
                      
                        # Try to get output on queue.  If output is there, get 
                        # the output and then remove the associated process and
                        # queue 
                        try:
                            out.append(q.get(block=True,timeout=0.1))
                            p = proc_list.pop(j)
                            queue_list.pop(j)

                            waiting = False
                            break

                        except queue_module.Empty:
                            pass

                    # If we're on the last block, wait until the queue is
                    # completely empty before proceeding
                    if len(queue_list) != 0 and i == (len(block_edges) - 2):
                        waiting = True
 
        # Load results into self._features
        self._features = np.zeros((len(self._sequences),num_features),dtype=float)
        for o in out:
            self._features[o[0]:o[1],:] = o[2]

示例#52

0

显示文件

文件： pydf.py 项目： LabCad/Sucuri-mog

    def mpi_handle(self):
        """
		MPI implementation for the dataflow.
		"""
        from mpi4py import MPI
        comm = MPI.COMM_WORLD
        rank = comm.Get_rank()
        self.mpi_size = comm.Get_size()
        self.mpi_rank = rank
        self.n_slaves = self.mpi_size - 1
        self.keep_working = True

        if rank == 0:
            print "I am the master. There are %s mpi processes. (hostname = %s)" % (
                self.mpi_size, MPI.Get_processor_name())
            self.pending_tasks = [0] * self.n_workers * self.mpi_size
            self.outqueue = Queue()

            def mpi_input(inqueue):
                while self.keep_working:
                    msg = comm.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG)
                    # print "MPI Received opermsg from slave."
                    inqueue.put(msg)

            def mpi_output(outqueue):
                while self.keep_working:
                    task = outqueue.get()
                    if task is not None:  # task == None means termination
                        # print "MPI Sending task to slave node."
                        dest = task.workerid / self.n_workers  # destination mpi process
                        comm.send(task, dest=dest, tag=Scheduler.TASK_TAG)
                    else:
                        self.keep_working = False
                        mpi_terminate()

            def mpi_terminate():
                print "MPI TERMINATING"
                for i in xrange(0, self.mpi_size):
                    comm.send(None, dest=i, tag=Scheduler.TERMINATE_TAG)

            t_in = threading.Thread(target=mpi_input, args=(self.operq, ))
            t_out = threading.Thread(target=mpi_output, args=(self.outqueue, ))
        else:
            print "I am a slave. (hostname = %s)" % MPI.Get_processor_name()
            # slave
            self.inqueue = Queue()
            for worker in self.workers:
                worker.wid += rank * self.n_workers

            status = MPI.Status()

            def mpi_input(inqueue):
                while self.keep_working:
                    task = comm.recv(source=0, tag=MPI.ANY_TAG, status=status)
                    if status.Get_tag() == Scheduler.TERMINATE_TAG:
                        self.keep_working = False
                        print "MPI received termination."
                        self.terminate_workers(self.workers)
                    else:
                        # print "MPI Sending task to worker in slave."
                        workerid = task.workerid
                        connid = workerid % self.n_workers
                        self.conn[connid].send(task)
                self.operq.put(None)

            def mpi_output(outqueue):
                while self.keep_working:
                    msg = outqueue.get()
                    if msg is not None:
                        # print "MPI send opermsg to master."
                        comm.send(msg, dest=0, tag=0)

            t_in = threading.Thread(target=mpi_input, args=(self.inqueue, ))
            t_out = threading.Thread(target=mpi_output, args=(self.operq, ))

        threads = [t_in, t_out]
        self.threads = threads
        for t in threads:
            t.start()

示例#53

0

显示文件

    def tail_logs(self, service, levels, components, clusters, raw_mode=False):
        """Sergeant function for spawning off all the right log tailing functions.

        NOTE: This function spawns concurrent processes and doesn't necessarily
        worry about cleaning them up! That's because we expect to just exit the
        main process when this function returns (as main() does). Someone calling
        this function directly with something like "while True: tail_paasta_logs()"
        may be very sad.

        NOTE: We try pretty hard to supress KeyboardInterrupts to prevent big
        useless stack traces, but it turns out to be non-trivial and we fail ~10%
        of the time. We decided we could live with it and we're shipping this to
        see how it fares in real world testing.

        Here are some things we read about this problem:
        * http://stackoverflow.com/questions/1408356/keyboard-interrupts-with-pythons-multiprocessing-pool
        * http://jtushman.github.io/blog/2014/01/14/python-%7C-multiprocessing-and-interrupts/
        * http://bryceboe.com/2010/08/26/python-multiprocessing-and-keyboardinterrupt/

        We could also try harder to terminate processes from more places. We could
        use process.join() to ensure things have a chance to die. We punted these
        things.

        It's possible this whole multiprocessing strategy is wrong-headed. If you
        are reading this code to curse whoever wrote it, see discussion in
        PAASTA-214 and https://reviewboard.yelpcorp.com/r/87320/ and feel free to
        implement one of the other options.
        """
        queue = Queue()
        spawned_processes = []

        def callback(component, stream_info, scribe_env, cluster):
            kw = {
                'scribe_env': scribe_env,
                'service': service,
                'levels': levels,
                'components': components,
                'clusters': clusters,
                'queue': queue,
                'filter_fn': stream_info.filter_fn,
            }

            if stream_info.per_cluster:
                kw['stream_name'] = stream_info.stream_name_fn(
                    service, cluster)
                kw['clusters'] = [cluster]
            else:
                kw['stream_name'] = stream_info.stream_name_fn(service)

            process = Process(target=self.scribe_tail, kwargs=kw)
            spawned_processes.append(process)
            process.start()

        self.run_code_over_scribe_envs(clusters=clusters,
                                       components=components,
                                       callback=callback)

        # Pull things off the queue and output them. If any thread dies we are no
        # longer presenting the user with the full picture so we quit.
        #
        # This is convenient for testing, where a fake scribe_tail() can emit a
        # fake log and exit. Without the thread aliveness check, we would just sit
        # here forever even though the threads doing the tailing are all gone.
        #
        # NOTE: A noisy tailer in one scribe_env (such that the queue never gets
        # empty) will prevent us from ever noticing that another tailer has died.
        while True:
            try:
                # This is a blocking call with a timeout for a couple reasons:
                #
                # * If the queue is empty and we get_nowait(), we loop very tightly
                # and accomplish nothing.
                #
                # * Testing revealed a race condition where print_log() is called
                # and even prints its message, but this action isn't recorded on
                # the patched-in print_log(). This resulted in test flakes. A short
                # timeout seems to soothe this behavior: running this test 10 times
                # with a timeout of 0.0 resulted in 2 failures; running it with a
                # timeout of 0.1 resulted in 0 failures.
                #
                # * There's a race where thread1 emits its log line and exits
                # before thread2 has a chance to do anything, causing us to bail
                # out via the Queue Empty and thread aliveness check.
                #
                # We've decided to live with this for now and see if it's really a
                # problem. The threads in test code exit pretty much immediately
                # and a short timeout has been enough to ensure correct behavior
                # there, so IRL with longer start-up times for each thread this
                # will surely be fine.
                #
                # UPDATE: Actually this is leading to a test failure rate of about
                # 1/10 even with timeout of 1s. I'm adding a sleep to the threads
                # in test code to smooth this out, then pulling the trigger on
                # moving that test to integration land where it belongs.
                line = queue.get(True, 0.1)
                print_log(line, levels, raw_mode)
            except Empty:
                try:
                    # If there's nothing in the queue, take this opportunity to make
                    # sure all the tailers are still running.
                    running_processes = [
                        tt.is_alive() for tt in spawned_processes
                    ]
                    if not running_processes or not all(running_processes):
                        log.warn(
                            'Quitting because I expected %d log tailers to be alive but only %d are alive.'
                            % (
                                len(spawned_processes),
                                running_processes.count(True),
                            ))
                        for process in spawned_processes:
                            if process.is_alive():
                                process.terminate()
                        break
                except KeyboardInterrupt:
                    # Die peacefully rather than printing N threads worth of stack
                    # traces.
                    #
                    # This extra nested catch is because it's pretty easy to be in
                    # the above try block when the user hits Ctrl-C which otherwise
                    # dumps a stack trace.
                    log.warn('Terminating.')
                    break
            except KeyboardInterrupt:
                # Die peacefully rather than printing N threads worth of stack
                # traces.
                log.warn('Terminating.')
                break

示例#54

0

显示文件

文件： runner.py 项目： toddrme2178/testflo

class ConcurrentTestRunner(TestRunner):
    """TestRunner that uses the multiprocessing package
    to execute tests concurrently.
    """
    def __init__(self, options, subproc_queue):
        super(ConcurrentTestRunner, self).__init__(options, subproc_queue)
        self.num_procs = options.num_procs

        # only do concurrent stuff if num_procs > 1
        if self.num_procs > 1:
            self.get_iter = self.run_concurrent_tests

            # Create queues
            self.task_queue = Queue()
            self.done_queue = Queue()

            self.procs = []

            # Start worker processes
            for i in range(self.num_procs):
                worker_id = "%d_%d" % (os.getpid(), i)
                self.procs.append(
                    Process(target=worker,
                            args=(self.task_queue, self.done_queue,
                                  subproc_queue, worker_id)))

            for proc in self.procs:
                proc.start()

    def run_concurrent_tests(self, input_iter):
        """Run tests concurrently."""

        it = iter(input_iter)
        numtests = 0
        try:
            for proc in self.procs:
                self.task_queue.put(advance_iterator(it))
                numtests += 1
        except StopIteration:
            pass
        else:
            try:
                while numtests:
                    stop = False
                    results = self.done_queue.get()
                    for result in results:
                        yield result
                        if self.stop:
                            if (result.status == 'FAIL'
                                    and not result.expected_fail) or (
                                        result.status == 'OK'
                                        and result.expected_fail):
                                stop = True
                                break
                    if stop:
                        break
                    numtests -= 1
                    self.task_queue.put(advance_iterator(it))
                    numtests += 1
            except StopIteration:
                pass

        for proc in self.procs:
            self.task_queue.put('STOP')

        for i in range(numtests):
            results = self.done_queue.get()
            for result in results:
                yield result

        for proc in self.procs:
            proc.join()

示例#55

0

显示文件

文件： evision_FYP.py 项目： Ranausama420/evision-FYP

# detect, then generate a set of bounding box colors for each class
CLASSES = ["background", "aeroplane", "bicycle", "bird", "boat",
	"bottle", "bus", "car", "cat", "chair", "cow", "diningtable",
	"dog", "horse", "motorbike", "person", "pottedplant", "sheep",
	"sofa", "train", "tvmonitor"]
COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3))

# load our serialized model from disk
print("[INFO] loading model...")
net = cv2.dnn.readNetFromCaffe(args["prototxt"], args["model"])
#net = cv2.dnn.readNetFromTensorflow('ssd_mobilenet_v2_coco_2018_03_29/frozen_inference_graph.pb', 'ssd_mobilenet_v2_coco_2018_03_29/pipeline.config')
#net = cv2.dnn.readNetFromTensorflow('models/frozen_inference_graph.pb','models/ssd_mobilenet_v2_coco_2018_03_29.pbtxt')

# initialize the input queue (frames), output queue (detections),
# and the list of actual detections returned by the child process
inputQueue = Queue(maxsize=1)
outputQueue = Queue(maxsize=1)
detections = None

# construct a child process *indepedent* from our main process of
# execution
print("[INFO] starting process...")
p = Process(target=classify_frame, args=(net, inputQueue,
	outputQueue,))
p.daemon = True
p.start()

engine = pyttsx3.init()
voices = engine.getProperty('voices')
engine.setProperty('voice', voices[2].id)
engine.setProperty('rate', 100)

示例#56

0

显示文件

文件： Server.py 项目： abreezes/abreezes-mycrawlers

        """获取总页数"""
        for url in self.comment_url:
            html=self.download.download(url)
            comments_page=self.parse.parses(html)
            yield comments_page['result']['maxPage'], url

    @property
    def gen_comment_urls(self):
        """生成要爬取的url 即每个skuid评论的所有链接"""
        for pages, url in self.get_comment_page:
            url = re.sub('page=(\d+)?&', 'page={}&', url)
            for page in range(1, pages):
                yield url.format(page)


task_queue = Queue()  # 任务队列
result_queue = Queue()  # 结果队列


def return_task():
    """返回任务队列"""
    return task_queue


def return_result():
    """返回结果队列"""
    return result_queue


def get_result(result):
    """获取结果"""

示例#57

0

显示文件

文件： NestedHashJoin.py 项目： keme686/knowledge-engine

    def execute(self, left_queue, right_operator, out, processqueue=Queue()):

        self.left_queue = left_queue
        self.right_operator = right_operator
        self.qresults = out
        #print "right_operator", right_operator
        tuple1 = None
        tuple2 = None
        right_queues = dict()

        while (not(tuple1 == "EOF") or (len(right_queues) > 0)):

            # Try to get and process tuple from left queue
            if not(tuple1 == "EOF"):
                try:
                    tuple1 = self.left_queue.get(False)
                    #print "tuple1: "+str(tuple1)
                    instance = self.probeAndInsert1(tuple1, self.right_table,
                                                    self.left_table, time())
                    if instance: # the join variables have not been used to
                                 # instanciate the right_operator

                        new_right_operator = self.makeInstantiation(tuple1,
                                                                    self.right_operator)
                        #print "new op: "+str(new_right_operator)
                        resource = self.getResource(tuple1)
                        queue = Queue()
                        right_queues[resource] = queue
                        #print "new_right_operator.__class__", new_right_operator.__class__
                        #print "new_right_operator.left.__class__", new_right_operator.left.__class__
                        new_right_operator.execute(queue)
                        #p2 = Process(target=new_right_operator.execute, args=(queue,))
                        #p2.start()
                except Empty:
                    pass
                except TypeError:
                    # TypeError: in resource = resource + tuple[var], when the tuple is "EOF".
                    pass
                except Exception as e:
                    #print "Unexpected error:", sys.exc_info()[0]
                    print(e)
                    pass

            toRemove = [] # stores the queues that have already received all its tuples

            for r in right_queues:
                try:
                    q = right_queues[r]
                    tuple2 = q.get(False)
                    #print "tuple2", tuple2
                    if tuple2 == "EOF":
                        toRemove.append(r)
                    else:
                        self.probeAndInsert2(r, tuple2, self.left_table,
                                             self.right_table, time())
                except Exception:
                    # This catch:
                    # Empty: in tuple2 = self.right.get(False), when the queue is empty.
                    # TypeError: in att = att + tuple[var], when the tuple is "EOF".
                    #print "Unexpected error:", sys.exc_info()[0]
                    pass

            for r in toRemove:
                del right_queues[r]
        # Put EOF in queue and exit.
        self.qresults.put("EOF")
        return

示例#58

0

显示文件

文件： cat.py 项目： tomolopolis/MedCAT-1

    def multiprocessing(self, in_data, nproc=8, batch_size_chars=1000000, only_cui=False, addl_info=[]):
        r''' Run multiprocessing NOT FOR TRAINING

        in_data:  an iterator or array with format: [(id, text), (id, text), ...]
        nproc:  number of processors
        batch_size_chars: size of a batch in number of characters

        return:  an list of tuples: [(id, doc_json), (id, doc_json), ...]
        '''
        if self._meta_annotations:
            # Hack for torch using multithreading, which is not good here
            import torch
            torch.set_num_threads(1)

        # Create the input output for MP
        in_q = Queue(maxsize=4*nproc)
        manager = Manager()
        out_dict = manager.dict()
        out_dict['processed'] = []

        # Create processes
        procs = []
        for i in range(nproc):
            p = Process(target=self._mp_cons, kwargs={'in_q': in_q, 'out_dict': out_dict, 'pid': i, 'only_cui': only_cui,
                'addl_info': addl_info})
            p.start()
            procs.append(p)

        data = []
        nchars = 0
        for id, text in in_data:
            data.append((id, str(text)))
            nchars += len(str(text))
            if  nchars >= batch_size_chars:
                in_q.put(data)
                data = []
                nchars = 0
        # Put the last batch if it exists
        if len(data) > 0:
            in_q.put(data)

        for _ in range(nproc):  # tell workers we're done
            in_q.put(None)

        for p in procs:
            p.join()

        # Close the queue as it can cause memory leaks
        in_q.close()

        out = []
        for key in out_dict.keys():
            if 'pid' in key:
                data = out_dict[key]
                out.extend(data)

        # Sometimes necessary to free memory
        out_dict.clear()
        del out_dict

        return out

示例#59

0

显示文件

文件： NestedHashJoin.py 项目： keme686/knowledge-engine

 def __init__(self, vars):
     self.left_table = dict()
     self.right_table = dict()
     self.qresults = Queue()
     self.vars = vars

示例#60

0

显示文件

文件： resources.py 项目： leanhd/hendrix

class TestNameSpace(object):

    async_task_was_done = Queue()
    async_task_was_run = False
    ready_to_proceed_with_second_cycle = Queue()
    second_cycle_complete = Queue()