def __init__(self, kvs, context): super(EngineBlock, self).__init__(name='EngineBlock') self.daemon = True self.context = context cylinders = context.cylinders[context.nodeId] env = os.environ envres = {} for v in context.envres: e = env.get(v) if e: l = e.split(',') if len(l) < cylinders: logger.error('Requested envres variable "%s" has too few values, decreasing cylinders to match: %s', v, e) # This may not be safe: driver is still feeding tasks based on original count cylinders = len(l) elif len(l) > cylinders: logger.warning('Requested envres variable "%s" has too many values, so some resources will not be used: %s', v, e) envres[v] = l else: logger.warning('Requested envres variable "%s" not found', v) self.parent = kvs self.kvs = kvs.clone() env.update(self.kvs.view('.common env')) # Note we are using the Queue construct from the # mulitprocessing module---we need to coordinate between # independent processes. self.ciq, self.coq, self.throttle = mpQueue(), mpQueue(), BoundedSemaphore(cylinders) self.Injector(kvs, ".node." + context.node, None, self.coq) self.Injector(kvs, ".task", self.throttle, self.coq) self.cylinders = [self.Cylinder(context, env, envres, self.ciq, self.coq, x) for x in range(cylinders)] self.start()
def __init__(self): self._dataQ = mpQueue() self._animDataQ = mpQueue() self._resultQ = mpQueue() self._running = mpEvent() self._readArraySize = 100 self._signal = 1 # self._dict = libs.dictionary.UIsettings() # self._animation = libs.Animation.Animation(self._running, self._animDataQ, self._signal) self._counter = libs.Counter.Counter(self._running, self._dataQ, self._readArraySize, 1)
def multifactor(n, methods=factoring_methods.values(), verbose=False): from multiprocessing import Process, Queue as mpQueue factors = mpQueue() procs = [ Process(target=_multifactor_proc, args=(m, n, factors)) for m in methods ] for p in procs: p.start() (f, g) = factors.get() for p in procs: p.terminate() if verbose: names = { "pollardRho_brent": "prb", "pollard_pm1": "p-1", "williams_pp1": "p+1" } if g in names: name = names[g] else: name = g print("\033[1;31m" + name + "\033[;m", end=' ') stdout.flush() return f
def generateHDFbyGroup(testManager,njobs): """This class manages the processes for generating the temp HDF5 files with variants grouped by gene name. """ HDFfileNames=glob.glob("tmp*_genotypes.h5") groupGenerators=[] fileQueue=mpQueue() taskQueue=queue.Queue() start=time.time() groupGenerators=[None]*min(njobs,len(HDFfileNames)) if len(testManager.sample_IDs)>0: HDFfileNames=[] cur = testManager.db.cursor() sampleIDstring=",".join([str(ID) for ID in testManager.sample_IDs]) sql="select distinct HDF5 from sample where sample_id in ({0});".format(sampleIDstring) cur.execute(sql) res=cur.fetchall() for filename in res: HDFfileNames.append(filename[0]) for HDFfileName in HDFfileNames: fileQueue.put(HDFfileName) for i in range(len(HDFfileNames)): taskQueue.put(GroupHDFGenerator(fileQueue,testManager.proj,testManager.group_names,i)) while taskQueue.qsize()>0: for i in range(njobs): if groupGenerators[i] is None or not groupGenerators[i].is_alive(): task=taskQueue.get() groupGenerators[i]=task task.start() fileQueue.put(None) break for groupHDFGenerator in groupGenerators: groupHDFGenerator.join()
def generateHDFbyGroup_update(testManager,njobs): HDFfileNames=glob.glob("tmp*_genotypes.h5") fileQueue=mpQueue() taskQueue=queue.Queue() start=time.time() groupGenerators=[None]*min(njobs,len(HDFfileNames)) geneDict,geneSet=getGroupDict(testManager) for HDFfileName in HDFfileNames: fileQueue.put(HDFfileName) for i in range(len(HDFfileNames)): # taskQueue.put(GroupHDFGenerator_memory(geneDict,geneSet,fileQueue,testManager.proj,testManager.group_names,i)) taskQueue.put(GroupHDFGenerator_append(geneDict,geneSet,fileQueue,testManager.proj,testManager.group_names,i)) while taskQueue.qsize()>0: for i in range(njobs): if groupGenerators[i] is None or not groupGenerators[i].is_alive(): task=taskQueue.get() groupGenerators[i]=task fileQueue.put(None) task.start() break for groupHDFGenerator in groupGenerators: groupHDFGenerator.join() print(("group time: ",time.time()-start))
def multifactor(n, methods=(_primefac.pollardRho_brent, _primefac.pollard_pm1, _primefac.williams_pp1, _primefac.ecm, _primefac.mpqs, _primefac.fermat, _primefac.factordb), verbose=False): from multiprocessing import Process, Queue as mpQueue from six.moves import xrange, reduce import six def factory(method, n, output): g = method(n) if g is not None: output.put((g, str(method).split()[1])) factors = mpQueue() procs = [Process(target=factory, args=(m, n, factors)) for m in methods] for p in procs: p.start() (f, g) = factors.get() for p in procs: p.terminate() if verbose: names = { "pollardRho_brent": "prb", "pollard_pm1": "p-1", "williams_pp1": "p+1" } if g in names: name = names[g] else: name = g print("\033[1;31m" + name + "\033[;m", end=' ') stdout.flush() return f
def multifactor(n, methods): def factory(method, n, output): output.put(method(n)) factors = mpQueue() procs = [Process(target=factory, args=(m, n, factors)) for m in methods] for p in procs: p.start() f = factors.get() for p in procs: p.terminate() return f
def multifactor(n, methods=(pollardRho_brent, pollard_pm1, williams_pp1, ecm, mpqs), verbose=False): # Note that the multiprocing incurs relatively significant overhead. Only call this if n is proving difficult to factor. def factory(method, n, output): output.put((method(n), str(method).split()[1])) factors = mpQueue() procs = [Process(target=factory, args=(m, n, factors)) for m in methods] for p in procs: p.start() (f, g) = factors.get() for p in procs: p.terminate() if verbose: names = {"pollardRho_brent":"prb", "pollard_pm1":"p-1", "williams_pp1":"p+1"} print "\033[1;31m" + (names[g] if g in names else g) + "\033[;m", stdout.flush() return f
def __init__(self, filePath, update_signal, finish_signal): super(objrThread, self).__init__() self.isWorking = False self.update_signal = update_signal # self.finish_signal = finish_signal classes_path = os.path.expanduser('model_data/newclothclass.txt') with open(classes_path) as f: class_names = f.readlines() self.class_names = [c.strip() for c in class_names] hsv_tuples = [(x / len(self.class_names), 1., 1.) for x in range(len(self.class_names))] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) np.random.seed(10101) # Fixed seed for consistent colors across runs. np.random.shuffle( self.colors) # Shuffle colors to decorrelate adjacent classes. np.random.seed(None) # Reset seed to default self.newImg_event = Event() self.subpro_finish_event = Event() self.lastepochFlag = False if filePath == "": self.filePath = "E:/YJF/hotelvideo/video17.MP4" else: self.filePath = filePath self.rlt_queue = mpQueue() self.areaClass = ['toilet', 'sink', 'desktop'] self.errdict = {} self.corrList = [] self.corrDict = {} self.testTotalError = [] self.testTotalCorr = [] self.testDelError = [] self.testDelCorr = [] self.f = open('test.txt') fourcc = cv2.VideoWriter_fourcc(*'XVID') self.out = cv2.VideoWriter('rltout.avi', fourcc, 30.0, (1920, 1080), True)
def multifactor( n, methods=( _primefac.pollardRho_brent, _primefac.pollard_pm1, _primefac.williams_pp1, _primefac.ecm, _primefac.mpqs, _primefac.fermat, _primefac.factordb, ), verbose=False, timeout=59, ): """Multifactor implementation """ from multiprocessing import Process, Queue as mpQueue from six.moves import xrange, reduce import six def factory(method, n, output): """Simple factory """ try: g = method(n) except OverflowError: return None if g is not None: output.put((g, str(method).split()[1])) factors = mpQueue() procs = [Process(target=factory, args=(m, n, factors)) for m in methods] timer = Timer(timeout, kill_procs, [procs]) try: timer.start() for p in procs: p.start() (f, g) = factors.get() for p in procs: try: p.terminate() except: pass finally: timer.cancel() return f
def multifactor(n, methods=(_primefac.pollardRho_brent, _primefac.pollard_pm1, _primefac.williams_pp1, _primefac.ecm, _primefac.mpqs, _primefac.fermat, _primefac.factordb), verbose=False, timeout=59): from multiprocessing import Process, Queue as mpQueue def factory(method, n, output): try: g = method(n) except OverflowError: return None if g is not None: output.put((g, str(method).split()[1])) factors = mpQueue() procs = [Process(target=factory, args=(m, n, factors)) for m in methods] timer = Timer(timeout, kill_procs, [procs]) try: timer.start() for p in procs: p.start() (f, g) = factors.get() for p in procs: try: p.terminate() except: pass finally: timer.cancel() if verbose: names = { "pollardRho_brent": "prb", "pollard_pm1": "p-1", "williams_pp1": "p+1" } if g in names: name = names[g] else: name = g print("\033[1;31m" + name + "\033[;m", end=' ') stdout.flush() return f
def generateHDFbyGroup(testManager, njobs): """This class manages the processes for generating the temp HDF5 files with variants grouped by gene name. """ HDFfileNames = glob.glob("tmp*_genotypes.h5") groupGenerators = [] fileQueue = mpQueue() taskQueue = queue.Queue() groupGenerators = [None] * min(njobs, len(HDFfileNames)) if len(testManager.sample_IDs) > 0: HDFfileNames = [] cur = testManager.db.cursor() sampleIDstring = ",".join([str(ID) for ID in testManager.sample_IDs]) sql = "select distinct HDF5 from sample where sample_id in ({0});".format( sampleIDstring) cur.execute(sql) res = cur.fetchall() for filename in res: HDFfileNames.append(filename[0]) for HDFfileName in HDFfileNames: fileQueue.put(HDFfileName) for i in range(len(HDFfileNames)): taskQueue.put( GroupHDFGenerator(fileQueue, testManager.proj, testManager.group_names, i)) while taskQueue.qsize() > 0: for i in range(njobs): if groupGenerators[i] is None or not groupGenerators[i].is_alive(): task = taskQueue.get() groupGenerators[i] = task task.start() fileQueue.put(None) break for groupHDFGenerator in groupGenerators: groupHDFGenerator.join()
def restart(self): self.queue = mpQueue(maxsize=self._maxsize)
def __init__(self, name='', maxsize=128): self.name = name self._maxsize = maxsize self.queue = mpQueue(maxsize=self._maxsize)
def fromSampleStat(self, stat, genotypes, samples): '''Add a field using expression calculated from sample variant table''' IDs = self.proj.selectSampleByPhenotype(samples) if not IDs: raise ValueError( 'No sample is selected using condition "{}"'.format(samples)) # # at least one, at most number of IDs nJobs = max(min(self.jobs, len(IDs)), 1) # start all workers idQueue = queue.Queue() idmpQueue = mpQueue() status = GenotypeStatStatus() mpStatus = Manager().dict() if self.proj.store == "sqlite": for j in range(nJobs): GenotypeStatCalculator('{}_genotype.DB'.format(self.proj.name), stat, idQueue, status, genotypes).start() for ID in IDs: idQueue.put(ID) count = 0 prog = ProgressBar('Calculating phenotype', len(IDs)) while True: if status.count() > count: count = status.count() prog.update(count) # if everything is done if status.count() == len(IDs): # stop all threads for j in range(nJobs): idQueue.put(None) break # wait 1 sec to check status again time.sleep(1) prog.done() # submit all results, these should be quick so no progress bar is used count = [0, 0, 0] cur_fields = self.db.getHeaders('sample')[3:] new_field = {} for field in [x[0] for x in stat]: if field.lower() not in [x.lower() for x in cur_fields]: if field.upper() in SQL_KEYWORDS: raise ValueError( "Phenotype name '{}' is not allowed because it is a reserved word." .format(field)) new_field[field] = True else: new_field[field] = False count[2] += 1 # updated cur = self.db.cursor() for ID in IDs: res = status.get(ID) for idx, (field, expr) in enumerate(stat): if new_field[field]: fldtype = typeOfValues( [str(status.get(x)[idx]) for x in IDs]) # determine the type of value self.db.execute( 'ALTER TABLE sample ADD {} {} NULL;'.format( field, fldtype)) env.logger.debug( 'Adding phenotype {} of type {}'.format( field, fldtype)) new_field[field] = False count[1] += 1 # new cur.execute( 'UPDATE sample SET {0}={1} WHERE sample_id = {1}' .format(field, self.db.PH), [res[idx], ID]) count[0] += 1 elif self.proj.store == "hdf5": for j in range(nJobs): GenotypeStatCalculator_HDF5(self.proj, stat, idmpQueue, mpStatus, genotypes).start() for ID in IDs: idmpQueue.put(ID) count = 0 prog = ProgressBar('Calculating phenotype', len(IDs)) while True: if len(mpStatus) > count: count = len(mpStatus) prog.update(count) # if everything is done if len(mpStatus) == len(IDs): # stop all threads for j in range(nJobs): idmpQueue.put(None) break # wait 1 sec to check status again time.sleep(1) prog.done() # submit all results, these should be quick so no progress bar is used count = [0, 0, 0] cur_fields = self.db.getHeaders('sample')[3:] new_field = {} for field in [x[0] for x in stat]: if field.lower() not in [x.lower() for x in cur_fields]: if field.upper() in SQL_KEYWORDS: raise ValueError( "Phenotype name '{}' is not allowed because it is a reserved word." .format(field)) new_field[field] = True else: new_field[field] = False count[2] += 1 # updated cur = self.db.cursor() for ID in IDs: res = mpStatus[ID] for idx, (field, expr) in enumerate(stat): if new_field[field]: fldtype = typeOfValues( [str(mpStatus[x][idx]) for x in IDs]) # determine the type of value self.db.execute( 'ALTER TABLE sample ADD {} {} NULL;'.format( field, fldtype)) env.logger.debug( 'Adding phenotype {} of type {}'.format( field, fldtype)) new_field[field] = False count[1] += 1 # new cur.execute( 'UPDATE sample SET {0}={1} WHERE sample_id = {1}' .format(field, self.db.PH), [res[idx], ID]) count[0] += 1 # report result env.logger.info( '{} values of {} phenotypes ({} new, {} existing) of {} samples are updated.' .format(count[0], count[1] + count[2], count[1], count[2], len(IDs))) self.db.commit()
api_client = apc.api_client_factory(config.cfg) logger.info("Setting up monitor") response = None while response is None: response = api_client.setup_media_monitor() time.sleep(5) storage_directory = apc.encode_to(response["stor"], 'utf-8') logger.info("Storage Directory is: %s", storage_directory) config.storage_directory = os.path.normpath(storage_directory) config.imported_directory = os.path.normpath(storage_directory + '/imported') config.organize_directory = os.path.normpath(storage_directory + '/organize') config.recorded_directory = os.path.normpath(storage_directory + '/recorded') multi_queue = mpQueue() logger.info("Initializing event processor") except Exception, e: logger.error('Exception: %s', e) try: wm = WatchManager() mmc = MediaMonitorCommon(config) pe = AirtimeProcessEvent(queue=multi_queue, airtime_config=config, wm=wm, mmc=mmc) bootstrap = AirtimeMediaMonitorBootstrap(logger, pe, api_client, mmc) bootstrap.scan() notifier = AirtimeNotifier(wm, pe, read_freq=0.1, timeout=0, airtime_config=config, api_client=api_client, bootstrap=bootstrap, mmc=mmc)
if exc.errno != errno.EEXIST: raise FORMAT = '%(asctime)s %(processName)s %(message)s' logging.basicConfig(format=FORMAT, filename=args.output + 'log_%s.log' % args.starttime, level=logging.DEBUG, datefmt='%Y-%m-%d %H:%M:%S') logging.info("Started: %s" % sys.argv) logging.info("Arguments: %s" % args) # Initialisation ribQueue = None countQueue = Queue.Queue(10) hegemonyQueue = Queue.Queue(60000) saverQueue = mpQueue(10000) announceQueue = None nbGM = args.N / 2 pipeGM = [] for i in range(nbGM): pipeGM.append(mpPipe(False)) # Analysis Modules ag = None if args.asGraph: ribQueue = Queue.Queue(5000) ag = asGraph.asGraph(ribQueue) gm = [] pm = None if nbGM:
def __init__(self, addresses="127.0.0.1", address_probe_timeout=30, task_port="50097", result_port="50098", cores_per_worker=1, mem_per_worker=None, max_workers=float('inf'), prefetch_capacity=0, uid=None, block_id=None, heartbeat_threshold=120, heartbeat_period=30, poll_period=10, cpu_affinity=False): """ Parameters ---------- addresses : str comma separated list of addresses for the interchange address_probe_timeout : int Timeout in seconds for the address probe to detect viable addresses to the interchange. Default : 30s uid : str string unique identifier block_id : str Block identifier that maps managers to the provider blocks they belong to. cores_per_worker : float cores to be assigned to each worker. Oversubscription is possible by setting cores_per_worker < 1.0. Default=1 mem_per_worker : float GB of memory required per worker. If this option is specified, the node manager will check the available memory at startup and limit the number of workers such that the there's sufficient memory for each worker. If set to None, memory on node is not considered in the determination of workers to be launched on node by the manager. Default: None max_workers : int caps the maximum number of workers that can be launched. default: infinity prefetch_capacity : int Number of tasks that could be prefetched over available worker capacity. When there are a few tasks (<100) or when tasks are long running, this option should be set to 0 for better load balancing. Default is 0. heartbeat_threshold : int Seconds since the last message from the interchange after which the interchange is assumed to be un-available, and the manager initiates shutdown. Default:120s Number of seconds since the last message from the interchange after which the worker assumes that the interchange is lost and the manager shuts down. Default:120 heartbeat_period : int Number of seconds after which a heartbeat message is sent to the interchange poll_period : int Timeout period used by the manager in milliseconds. Default: 10ms cpu_affinity : str Whether each worker should force its affinity to different CPUs """ logger.info("Manager started") try: ix_address = probe_addresses(addresses.split(','), task_port, timeout=address_probe_timeout) if not ix_address: raise Exception("No viable address found") else: logger.info( "Connection to Interchange successful on {}".format( ix_address)) task_q_url = "tcp://{}:{}".format(ix_address, task_port) result_q_url = "tcp://{}:{}".format(ix_address, result_port) logger.info("Task url : {}".format(task_q_url)) logger.info("Result url : {}".format(result_q_url)) except Exception: logger.exception( "Caught exception while trying to determine viable address to interchange" ) print( "Failed to find a viable address to connect to interchange. Exiting" ) exit(5) self.context = zmq.Context() self.task_incoming = self.context.socket(zmq.DEALER) self.task_incoming.setsockopt(zmq.IDENTITY, uid.encode('utf-8')) # Linger is set to 0, so that the manager can exit even when there might be # messages in the pipe self.task_incoming.setsockopt(zmq.LINGER, 0) self.task_incoming.connect(task_q_url) self.result_outgoing = self.context.socket(zmq.DEALER) self.result_outgoing.setsockopt(zmq.IDENTITY, uid.encode('utf-8')) self.result_outgoing.setsockopt(zmq.LINGER, 0) self.result_outgoing.connect(result_q_url) logger.info("Manager connected") self.uid = uid self.block_id = block_id if os.environ.get('PARSL_CORES'): cores_on_node = int(os.environ['PARSL_CORES']) else: cores_on_node = multiprocessing.cpu_count() if os.environ.get('PARSL_MEMORY_GB'): available_mem_on_node = float(os.environ['PARSL_MEMORY_GB']) else: available_mem_on_node = round( psutil.virtual_memory().available / (2**30), 1) self.max_workers = max_workers self.prefetch_capacity = prefetch_capacity mem_slots = max_workers # Avoid a divide by 0 error. if mem_per_worker and mem_per_worker > 0: mem_slots = math.floor(available_mem_on_node / mem_per_worker) self.worker_count = min(max_workers, mem_slots, math.floor(cores_on_node / cores_per_worker)) logger.info("Manager will spawn {} workers".format(self.worker_count)) self.pending_task_queue = mpQueue() self.pending_result_queue = mpQueue() self.ready_worker_queue = mpQueue() self.max_queue_size = self.prefetch_capacity + self.worker_count self.tasks_per_round = 1 self.heartbeat_period = heartbeat_period self.heartbeat_threshold = heartbeat_threshold self.poll_period = poll_period self.cpu_affinity = cpu_affinity
def main(): global STATS global VERSION_KEY global CHANGEDCT global shutdown_event global finished_event def signal_handler(signum, frame): signal.signal(signal.SIGINT, SIGINT_ORIG) sys.stdout.write("\rCleaning Up ... Please Wait ...\n") shutdown_event.set() #Let the current workload finish sys.stdout.write("\tStopping Workers\n") for t in threads: t.join(1) stats_queue.put('finished') insert_queue.put("finished") stats_worker_thread.join(5) #Give the Elasticsearch process 5 seconds to exit es_worker_thread.join(5) #If it's still alive, terminate it if es_worker_thread.is_alive(): try: es_worker_thread.terminate() except: pass #Attempt to update the stats #XXX try: es.update(index=meta_index_name, id=options.identifier, body = { 'doc': { 'total' : STATS['total'], 'new' : STATS['new'], 'updated' : STATS['updated'], 'unchanged' : STATS['unchanged'], 'duplicates': STATS['duplicates'], 'changed_stats': CHANGEDCT } }) except: pass if reader_thread.is_alive(): try: #Flush the queue so reader can see the shutdown_event while not work_queue.empty(): work_queue.get_nowait() except: pass reader_thread.join() try: work_queue.close() insert_queue.close() stats_queue.close() except: pass sys.stdout.write("... Done\n") sys.exit(0) optparser = OptionParser(usage='usage: %prog [options]') optparser.add_option("-f", "--file", action="store", dest="file", default=None, help="Input CSV file") optparser.add_option("-d", "--directory", action="store", dest="directory", default=None, help="Directory to recursively search for CSV files - prioritized over 'file'") optparser.add_option("-e", "--extension", action="store", dest="extension", default='csv', help="When scanning for CSV files only parse files with given extension (default: 'csv')") optparser.add_option("-i", "--identifier", action="store", dest="identifier", type="int", default=None, help="Numerical identifier to use in update to signify version (e.g., '8' or '20140120')") optparser.add_option("-t", "--threads", action="store", dest="threads", type="int", default=2, help="Number of workers, defaults to 2. Note that each worker will increase the load on your ES cluster") optparser.add_option("-B", "--bulk-size", action="store", dest="bulk_size", type="int", default=1000, help="Size of Bulk Insert Requests") optparser.add_option("-v", "--verbose", action="store_true", dest="verbose", default=False, help="Be verbose") optparser.add_option("--vverbose", action="store_true", dest="vverbose", default=False, help="Be very verbose (Prints status of every domain parsed, very noisy)") optparser.add_option("-s", "--stats", action="store_true", dest="stats", default=False, help="Print out Stats after running") optparser.add_option("-x", "--exclude", action="store", dest="exclude", default="", help="Comma separated list of keys to exclude if updating entry") optparser.add_option("-n", "--include", action="store", dest="include", default="", help="Comma separated list of keys to include if updating entry (mutually exclusive to -x)") optparser.add_option("-o", "--comment", action="store", dest="comment", default="", help="Comment to store with metadata") optparser.add_option("-r", "--redo", action="store_true", dest="redo", default=False, help="Attempt to re-import a failed import or import more data, uses stored metatdata from previous import (-o and -x not required and will be ignored!!)") #ES Specific Options optparser.add_option("-u", "--es-uri", action="store", dest="es_uri", default='localhost:9200', help="Location of ElasticSearch Server (e.g., foo.server.com:9200)") optparser.add_option("-p", "--index-prefix", action="store", dest="index_prefix", default='whois', help="Index prefix to use in ElasticSearch (default: whois)") if (len(sys.argv) < 2): optparser.parse_args(['-h']) (options, args) = optparser.parse_args() if options.vverbose: options.verbose = True threads = [] work_queue = mpQueue(maxsize=options.bulk_size) insert_queue = mpQueue(maxsize=options.bulk_size) stats_queue = mpQueue() meta_index_name = '@' + options.index_prefix + "_meta" data_template = None template_path = os.path.dirname(os.path.realpath(__file__)) with open("%s/es_templates/data.template" % template_path, 'r') as dtemplate: data_template = json.loads(dtemplate.read()) if options.identifier is None and options.redo is False: print "Identifier required\n" optparser.parse_args(['-h']) elif options.identifier is not None and options.redo is True: print "Redo requested and Identifier Specified. Please choose one or the other\n" optparser.parse_args(['-h']) elif options.exclude != "" and options.include != "": print "Options include and exclude are mutually exclusive, choose only one\n" optparser.parse_args(['-h']) es = connectElastic(options.es_uri) metadata = None #Create the metadata index if it doesn't exist if not es.indices.exists(meta_index_name): if options.redo: print "Cannot redo when no initial data exists" sys.exit(1) if data_template is not None: data_template["template"] = "%s-*" % options.index_prefix es.indices.put_template(name='%s-template' % options.index_prefix, body = data_template) #Create the metadata index with only 1 shard, even with thousands of imports #This index shouldn't warrant multiple shards #Also use the keyword analyzer since string analsysis is not important es.indices.create(index=meta_index_name, body = {"settings" : { "index" : { "number_of_shards" : 1, "analysis" : { "analyzer" : { "default" : { "type" : "keyword" } } } } } }) #Create the 0th metadata entry metadata = { "metadata": 0, "firstVersion": options.identifier, "lastVersion": options.identifier } es.create(index=meta_index_name, doc_type='meta', id = 0, body = metadata) #Specially create the first index to have 2x the shards than normal #since future indices should be diffs of the first index (ideally) es.indices.create(index='%s-%s' % (options.index_prefix, options.identifier), body = {"settings": { "index": { "number_of_shards": int(data_template["settings"]["number_of_shards"]) * 2 } } }) else: try: result = es.get(index=meta_index_name, id=0) if result['found']: metadata = result['_source'] else: raise Exception("Not Found") except: print "Error fetching metadata from index" sys.exit(1) if options.redo is False: #Identifier is auto-pulled from db, no need to check if options.identifier < 1: print "Identifier must be greater than 0" sys.exit(1) if metadata['lastVersion'] >= options.identifier: print "Identifier must be 'greater than' previous identifier" sys.exit(1) if options.redo is False: if options.exclude != "": options.exclude = options.exclude.split(',') else: options.exclude = None if options.include != "": options.include = options.include.split(',') else: options.include = None #Start worker threads if options.verbose: print "Starting %i worker threads" % options.threads for i in range(options.threads): t = Process(target=process_worker, args=(work_queue, insert_queue, stats_queue, options), name='Worker %i' % i) t.daemon = True t.start() threads.append(t) #Upate the lastVersion in the metadata es.update(index=meta_index_name, id=0, doc_type='meta', body = {'doc': {'lastVersion': options.identifier}} ) #Create the entry for this import meta_struct = { 'metadata': options.identifier, 'comment' : options.comment, 'total' : 0, 'new' : 0, 'updated' : 0, 'unchanged' : 0, 'duplicates': 0, 'changed_stats': {} } if options.exclude != None: meta_struct['excluded_keys'] = options.exclude elif options.include != None: meta_struct['included_keys'] = options.include es.create(index=meta_index_name, id=options.identifier, doc_type='meta', body = meta_struct) else: #redo is True #Get the record for the attempted import options.identifier = int(metadata['lastVersion']) try: redo_record = es.get(index=meta_index_name, id=options.identifier)['_source'] except: print "Unable to retrieve information for last import" sys.exit(1) if 'excluded_keys' in redo_record: options.exclude = redo_record['excluded_keys'] else: options.exclude = None if 'included_keys' in redo_record: options.include = redo_record['included_keys'] else: options.include = None options.comment = redo_record['comment'] STATS['total'] = int(redo_record['total']) STATS['new'] = int(redo_record['new']) STATS['updated'] = int(redo_record['updated']) STATS['unchanged'] = int(redo_record['unchanged']) STATS['duplicates'] = int(redo_record['duplicates']) CHANGEDCT = redo_record['changed_stats'] if options.verbose: print "Re-importing for: \n\tIdentifier: %s\n\tComment: %s" % (options.identifier, options.comment) for ch in CHANGEDCT.keys(): CHANGEDCT[ch] = int(CHANGEDCT[ch]) #Start the reworker threads if options.verbose: print "Starting %i reworker threads" % options.threads for i in range(options.threads): t = Process(target=process_reworker, args=(work_queue, insert_queue, stats_queue, options), name='Worker %i' % i) t.daemon = True t.start() threads.append(t) #No need to update lastVersion or create metadata entry #Start up the Elasticsearch Bulk Processor es_worker_thread = Process(target=es_worker, args=(insert_queue, options)) es_worker_thread.daemon = True es_worker_thread.start() stats_worker_thread = Thread(target=stats_worker, args=(stats_queue,), name = 'Stats') stats_worker_thread.daemon = True stats_worker_thread.start() #Set up signal handler before we go into the real work SIGINT_ORIG = signal.getsignal(signal.SIGINT) signal.signal(signal.SIGINT, signal_handler) #Start up Reader Thread reader_thread = Thread(target=reader_worker, args=(work_queue, options), name='Reader') reader_thread.daemon = True reader_thread.start() while True: reader_thread.join(.1) if not reader_thread.is_alive(): break time.sleep(.1) while not work_queue.empty(): time.sleep(.01) finished_event.set() for t in threads: t.join() insert_queue.put("finished") es_worker_thread.join() stats_queue.put('finished') stats_worker_thread.join() #Update the stats es.update(index=meta_index_name, id=options.identifier, doc_type='meta', body = { 'doc': { 'total' : STATS['total'], 'new' : STATS['new'], 'updated' : STATS['updated'], 'unchanged' : STATS['unchanged'], 'duplicates': STATS['duplicates'], 'changed_stats': CHANGEDCT }} ); if options.stats: print "Stats: " print "Total Entries:\t\t %d" % STATS['total'] print "New Entries:\t\t %d" % STATS['new'] print "Updated Entries:\t %d" % STATS['updated'] print "Duplicate Entries\t %d" % STATS['duplicates'] print "Unchanged Entries:\t %d" % STATS['unchanged']
def main(): global STATS global VERSION_KEY global CHANGEDCT global shutdown_event global finished_event global bulkError_event parser = argparse.ArgumentParser() dataSource = parser.add_mutually_exclusive_group(required=True) dataSource.add_argument("-f", "--file", action="store", dest="file", default=None, help="Input CSV file") dataSource.add_argument("-d", "--directory", action="store", dest="directory", default=None, help="Directory to recursively search for CSV files -- mutually exclusive to '-f' option") parser.add_argument("-e", "--extension", action="store", dest="extension", default='csv', help="When scanning for CSV files only parse files with given extension (default: 'csv')") parser.add_argument("-r", "--redo", action="store_true", dest="redo", default=False, help="Attempt to re-import a failed import or import more data, uses stored metatdata from previous import (-o, -n, and -x not required and will be ignored!!)") parser.add_argument("-v", "--verbose", action="store_true", dest="verbose", default=False, help="Be verbose") parser.add_argument("--vverbose", action="store_true", dest="vverbose", default=False, help="Be very verbose (Prints status of every domain parsed, very noisy)") parser.add_argument("-s", "--stats", action="store_true", dest="stats", default=False, help="Print out Stats after running") updateMethod = parser.add_mutually_exclusive_group() updateMethod.add_argument("-x", "--exclude", action="store", dest="exclude", default="", help="Comma separated list of keys to exclude if updating entry") updateMethod.add_argument("-n", "--include", action="store", dest="include", default="", help="Comma separated list of keys to include if updating entry (mutually exclusive to -x)") parser.add_argument("-o", "--comment", action="store", dest="comment", default="", help="Comment to store with metadata") parser.add_argument("-u", "--es-uri", nargs="*", dest="es_uri", default=['localhost:9200'], help="Location(s) of ElasticSearch Server (e.g., foo.server.com:9200) Can take multiple endpoints") parser.add_argument("-p", "--index-prefix", action="store", dest="index_prefix", default='whois', help="Index prefix to use in ElasticSearch (default: whois)") parser.add_argument("-i", "--identifier", action="store", dest="identifier", type=int, default=None, help="Numerical identifier to use in update to signify version (e.g., '8' or '20140120')") parser.add_argument("-B", "--bulk-size", action="store", dest="bulk_size", type=int, default=5000, help="Size of Bulk Elasticsearch Requests") parser.add_argument("--optimize-import", action="store_true", dest="optimize_import", default=False, help="If enabled, will change ES index settings to speed up bulk imports, but if the cluster has a failure, data might be lost permanently!") parser.add_argument("-t", "--threads", action="store", dest="threads", type=int, default=2, help="Number of workers, defaults to 2. Note that each worker will increase the load on your ES cluster since it will try to lookup whatever record it is working on in ES") parser.add_argument("--bulk-serializers", action="store", dest="bulk_serializers", type=int, default=1, help="How many threads to spawn to combine messages from workers. Only increase this if you're are running a lot of workers and one cpu is unable to keep up with the load") parser.add_argument("--bulk-threads", action="store", dest="bulk_threads", type=int, default=1, help="How many threads to spawn to send bulk ES messages. The larger your cluster, the more you can increase this") parser.add_argument("--enable-delta-indexes", action="store_true", dest="enable_delta_indexes", default=False, help="If enabled, will put changed entries in a separate index. These indexes can be safely deleted if space is an issue, also provides some other improvements") options = parser.parse_args() if options.vverbose: options.verbose = True options.firstImport = False threads = [] work_queue = jmpQueue(maxsize=options.bulk_size * options.threads) insert_queue = jmpQueue(maxsize=options.bulk_size * options.bulk_threads) bulk_request_queue = jmpQueue(maxsize = 2 * options.bulk_threads) stats_queue = mpQueue() meta_index_name = '@' + options.index_prefix + "_meta" data_template = None template_path = os.path.dirname(os.path.realpath(__file__)) with open("%s/es_templates/data.template" % template_path, 'r') as dtemplate: data_template = json.loads(dtemplate.read()) if options.identifier is None and options.redo is False: print("Identifier required\n") argparse.parse_args(['-h']) elif options.identifier is not None and options.redo is True: print("Redo requested and Identifier Specified. Please choose one or the other\n") argparse.parse_args(['-h']) es = connectElastic(options.es_uri) metadata = None previousVersion = 0 #Create the metadata index if it doesn't exist if not es.indices.exists(meta_index_name): if options.redo: print("Cannot redo when no initial data exists") sys.exit(1) if data_template is not None: data_template["template"] = "%s-*" % options.index_prefix es.indices.put_template(name='%s-template' % options.index_prefix, body = data_template) #Create the metadata index with only 1 shard, even with thousands of imports #This index shouldn't warrant multiple shards #Also use the keyword analyzer since string analsysis is not important es.indices.create(index=meta_index_name, body = {"settings" : { "index" : { "number_of_shards" : 1, "analysis" : { "analyzer" : { "default" : { "type" : "keyword" } } } } } }) #Create the 0th metadata entry metadata = { "metadata": 0, "firstVersion": options.identifier, "lastVersion": options.identifier, "deltaIndexes": options.enable_delta_indexes, } es.create(index=meta_index_name, doc_type='meta', id = 0, body = metadata) #Specially create the first index to have 2x the shards than normal #since future indices should be diffs of the first index (ideally) index_name = "%s-%s" % (options.index_prefix, options.identifier) if options.enable_delta_indexes: index_name += "-o" es.indices.create(index=index_name, body = {"settings": { "index": { "number_of_shards": int(data_template["settings"]["number_of_shards"]) * 2 } } }) options.firstImport = True else: try: result = es.get(index=meta_index_name, id=0) if result['found']: metadata = result['_source'] else: raise Exception("Not Found") except: print("Error fetching metadata from index") sys.exit(1) if options.redo is False: #Identifier is auto-pulled from db, no need to check # Pre-emptively create index index_name = "%s-%s" % (options.index_prefix, options.identifier) if options.enable_delta_indexes: index_name += "-o" es.indices.create(index=index_name) if options.identifier < 1: print("Identifier must be greater than 0") sys.exit(1) if metadata['lastVersion'] >= options.identifier: print("Identifier must be 'greater than' previous identifier") sys.exit(1) previousVersion = metadata['lastVersion'] # Pre-emptively create delta index if options.enable_delta_indexes and previousVersion > 0: index_name = "%s-%s-d" % (options.index_prefix, previousVersion) es.indices.create(index=index_name) else: result = es.search(index=meta_index_name, body = { "query": { "match_all": {} }, "sort":[ {"metadata": {"order": "asc"}} ] }) if result['hits']['total'] == 0: print("Unable to fetch entries from metadata index") sys.exit(1) previousVersion = result['hits']['hits'][-2]['_id'] options.previousVersion = previousVersion # Change Index settings to better suit bulk indexing optimizeIndexes(es, options) index_list = es.search(index=meta_index_name, body = { "query": { "match_all": {} }, "_source": "metadata", "sort":[ {"metadata": {"order": "desc"}} ] }) index_list = [entry['_source']['metadata'] for entry in index_list['hits']['hits'][:-1]] options.INDEX_LIST = [] for index_name in index_list: if options.enable_delta_indexes: options.INDEX_LIST.append('%s-%s-o' % (options.index_prefix, index_name)) else: options.INDEX_LIST.append('%s-%s' % (options.index_prefix, index_name)) if options.redo is False: if options.exclude != "": options.exclude = options.exclude.split(',') else: options.exclude = None if options.include != "": options.include = options.include.split(',') else: options.include = None #Start worker threads if options.verbose: print("Starting %i worker threads" % options.threads) for i in range(options.threads): t = Process(target=process_worker, args=(work_queue, insert_queue, stats_queue, options), name='Worker %i' % i) t.daemon = True t.start() threads.append(t) #Upate the lastVersion in the metadata es.update(index=meta_index_name, id=0, doc_type='meta', body = {'doc': {'lastVersion': options.identifier}} ) #Create the entry for this import meta_struct = { 'metadata': options.identifier, 'comment' : options.comment, 'total' : 0, 'new' : 0, 'updated' : 0, 'unchanged' : 0, 'duplicates': 0, 'changed_stats': {} } if options.exclude != None: meta_struct['excluded_keys'] = options.exclude elif options.include != None: meta_struct['included_keys'] = options.include es.create(index=meta_index_name, id=options.identifier, doc_type='meta', body = meta_struct) else: #redo is True #Get the record for the attempted import options.identifier = int(metadata['lastVersion']) try: redo_record = es.get(index=meta_index_name, id=options.identifier)['_source'] except: print("Unable to retrieve information for last import") sys.exit(1) if 'excluded_keys' in redo_record: options.exclude = redo_record['excluded_keys'] else: options.exclude = None if 'included_keys' in redo_record: options.include = redo_record['included_keys'] else: options.include = None options.comment = redo_record['comment'] STATS['total'] = int(redo_record['total']) STATS['new'] = int(redo_record['new']) STATS['updated'] = int(redo_record['updated']) STATS['unchanged'] = int(redo_record['unchanged']) STATS['duplicates'] = int(redo_record['duplicates']) CHANGEDCT = redo_record['changed_stats'] if options.verbose: print("Re-importing for: \n\tIdentifier: %s\n\tComment: %s" % (options.identifier, options.comment)) for ch in CHANGEDCT.keys(): CHANGEDCT[ch] = int(CHANGEDCT[ch]) #Start the reworker threads if options.verbose: print("Starting %i reworker threads" % options.threads) for i in range(options.threads): t = Process(target=process_reworker, args=(work_queue, insert_queue, stats_queue, options), name='Worker %i' % i) t.daemon = True t.start() threads.append(t) #No need to update lastVersion or create metadata entry # Start up the Elasticsearch Bulk Serializers # Its job is just to combine work into bulk-sized chunks to be sent to the bulk API # One serializer should be enough for a lot of workers, but anyone with a super large cluster might # be able to run a lot of workers which can subsequently overwhelm a single serializer es_serializers = [] for i in range(options.bulk_serializers): es_serializer = Process(target=es_serializer_proc, args=(insert_queue, bulk_request_queue, options)) es_serializer.start() es_serializers.append(es_serializer) # Start up ES Bulk Shippers, each in their own process # As far as I can tell there's an issue (bug? feature?) that causes every request made to ES to hinder the entire process even if it's in a separate python thread # not sure if this is GIL related or not, but instead of debugging how the elasticsearch library or urllib does things # its easier to just spawn a separate process for every connection being made to ES for i in range(options.bulk_threads): es_bulk_shipper = Process(target=es_bulk_shipper_proc, args=(bulk_request_queue, i, options)) es_bulk_shipper.daemon = True es_bulk_shipper.start() stats_worker_thread = Thread(target=stats_worker, args=(stats_queue,), name = 'Stats') stats_worker_thread.daemon = True stats_worker_thread.start() #Start up Reader Thread reader_thread = Thread(target=reader_worker, args=(work_queue, options), name='Reader') reader_thread.daemon = True reader_thread.start() try: while True: reader_thread.join(.1) if not reader_thread.is_alive(): break # If bulkError occurs stop reading from the files if bulkError_event.is_set(): sys.stdout.write("Bulk API error -- forcing program shutdown \n") raise KeyboardInterrupt("Error response from ES worker, stopping processing") if options.verbose: sys.stdout.write("All files ingested ... please wait for processing to complete ... \n") sys.stdout.flush() while not work_queue.empty(): # If bulkError occurs stop processing if bulkError_event.is_set(): sys.stdout.write("Bulk API error -- forcing program shutdown \n") raise KeyboardInterrupt("Error response from ES worker, stopping processing") work_queue.join() try: # Since this is the shutdown section, ignore Keyboard Interrupts # especially since the interrupt code (below) does effectively the same thing insert_queue.join() finished_event.set() for t in threads: t.join() # Wait for the es serializer(s) to package up all of the bulk requests for es_serializer in es_serializers: es_serializer.join() # Wait for shippers to send all bulk requests bulk_request_queue.join() # Change settings back unOptimizeIndexes(es, data_template, options) stats_queue.put('finished') stats_worker_thread.join() #Update the stats try: es.update(index=meta_index_name, id=options.identifier, doc_type='meta', body = { 'doc': { 'total' : STATS['total'], 'new' : STATS['new'], 'updated' : STATS['updated'], 'unchanged' : STATS['unchanged'], 'duplicates': STATS['duplicates'], 'changed_stats': CHANGEDCT }} ); except Exception as e: sys.stdout.write("Error attempting to update stats: %s\n" % str(e)) except KeyboardInterrupt: pass if options.verbose: sys.stdout.write("Done ...\n\n") sys.stdout.flush() if options.stats: print("Stats: ") print("Total Entries:\t\t %d" % STATS['total']) print("New Entries:\t\t %d" % STATS['new']) print("Updated Entries:\t %d" % STATS['updated']) print("Duplicate Entries\t %d" % STATS['duplicates']) print("Unchanged Entries:\t %d" % STATS['unchanged']) except KeyboardInterrupt as e: sys.stdout.write("\rCleaning Up ... Please Wait ...\nWarning!! Forcefully killing this might leave Elasticsearch in an inconsistent state!\n") shutdown_event.set() # Flush the queue if the reader is alive so it can see the shutdown_event # in case it's blocked on a put sys.stdout.write("\tShutting down input reader threads ...\n") while reader_thread.is_alive(): try: work_queue.get_nowait() work_queue.task_done() except queue.Empty: break reader_thread.join() # Don't join on the work queue, we don't care if the work has been finished # The worker threads will exit on their own after getting the shutdown_event # Joining on the insert queue is important to ensure ES isn't left in an inconsistent state if delta indexes are being used # since it 'moves' documents from one index to another which involves an insert and a delete insert_queue.join() # All of the workers should have seen the shutdown event and exited after finishing whatever they were last working on sys.stdout.write("\tStopping workers ... \n") for t in threads: t.join() # Send the finished message to the stats queue to shut it down stats_queue.put('finished') stats_worker_thread.join() sys.stdout.write("\tWaiting for ElasticSearch bulk uploads to finish ... \n") # The ES serializer does not recognize the shutdown event only the graceful finished_event # so set the event so it can gracefully shutdown finished_event.set() # Wait for es serializer(s) to package up all bulk requests for es_serializer in es_serializers: es_serializer.join() # Wait for shippers to send all bulk requests, otherwise ES might be left in an inconsistent state bulk_request_queue.join() #Attempt to update the stats #XXX try: sys.stdout.write("\tFinalizing metadata\n") es.update(index=meta_index_name, id=options.identifier, body = { 'doc': { 'total' : STATS['total'], 'new' : STATS['new'], 'updated' : STATS['updated'], 'unchanged' : STATS['unchanged'], 'duplicates': STATS['duplicates'], 'changed_stats': CHANGEDCT } }) except: pass sys.stdout.write("\tFinalizing settings\n") # Make sure to de-optimize the indexes for import unOptimizeIndexes(es, data_template, options) try: work_queue.close() insert_queue.close() stats_queue.close() except: pass sys.stdout.write("... Done\n") sys.exit(0)
def stopBtn(self): """ Try to stop animation and join all the processes/threads. Extensive checking can be shortened. """ if self._running.is_set(): self._running.clear() self._anim.anim._stop() self.progress.setRange(0, 1) while not self._semaphore.get_value() == 3: pass # joining self.statusBar.showMessage("Stopped! Please wait while data is being processed.") self._laser.join(timeout=3.0) self._u.join(timeout=3.0) self._counter1.join(timeout=5.0) self._counter2.join(timeout=5.0) self._dataProcesser2.join(timeout=5.0) self._dataProcesser1.join(timeout=5.0) # extensive checking for joining check = 0 if self._dataProcesser1.is_alive(): print("DataProcesser 1 did not join.") del self._dataProcesser1 check += 1 if self._dataProcesser2.is_alive(): print("DataProcesser 2 did not join.") del self._dataProcesser2 check += 1 if self._counter1.is_alive(): print("Counter 1 did not join.") del self._counter1 check += 1 if self._counter2.is_alive(): print("Counter 2 did not join.") del self._counter2 check += 1 if self._u.is_alive(): print("Waiter thread did not join.") del self._u check += 1 if self._laser.is_alive(): print("Laser did not join.") del self._laser check += 1 if not check: print("All workers have joined.") # Print the timetrace of the data, is also saved as *.png into the data folder if not self._new_folder == 'Cont': self._timetrace.doThings(self._new_folder) self._files.convertToPhotonHDF5(self._new_folder, self.signal) # Unfortunately there is no other way than deleting the queues # If not done so, the rest data will be shown in the next measurement # instead of the new data. del self._animDataQ1, self._animDataQ2 self._animDataQ1 = mpQueue() self._animDataQ2 = mpQueue() self.statusBar.showMessage("Stopped and idle!") else: self.statusBar.showMessage("Already stopped or not running at all.")
def main(): global STATS global VERSION_KEY global CHANGEDCT global shutdown_event def signal_handler(signum, frame): signal.signal(signal.SIGINT, SIGINT_ORIG) sys.stdout.write("\rCleaning Up ... Please Wait ...\n") shutdown_event.set() # Let the current workload finish sys.stdout.write("\tStopping Workers\n") for t in threads: t.join(1) insert_queue.put("finished") # Give the Mongo process 5 seconds to exit mongo_worker_thread.join(5) # If it's still alive, terminate it if mongo_worker_thread.is_alive(): try: mongo_worker_thread.terminate() except: pass # Attempt to update the stats try: meta.update({'metadata': options.identifier}, {'$set': { 'total': STATS['total'], 'new': STATS['new'], 'updated': STATS['updated'], 'unchanged': STATS['unchanged'], 'duplicates': STATS['duplicates'], 'changed_stats': CHANGEDCT } }); except: pass sys.stdout.write("... Done\n") sys.exit(0) optparser = OptionParser(usage='usage: %prog [options]') optparser.add_option("-f", "--file", action="store", dest="file", default=None, help="Input CSV file") optparser.add_option("-d", "--directory", action="store", dest="directory", default=None, help="Directory to recursively search for CSV files - prioritized over 'file'") optparser.add_option("-e", "--extension", action="store", dest="extension", default='csv', help="When scanning for CSV files only parse files with given extension (default: 'csv')") optparser.add_option("-i", "--identifier", action="store", dest="identifier", type="int", default=None, help="Numerical identifier to use in update to signify version (e.g., '8' or '20140120')") optparser.add_option("-m", "--mongo-host", action="store", dest="mongo_host", default='localhost', help="Location of mongo db/cluster") optparser.add_option("-p", "--mongo-port", action="store", dest="mongo_port", type="int", default=27017, help="Location of mongo db/cluster") optparser.add_option("-b", "--database", action="store", dest="database", default='whois', help="Name of database to use (default: 'whois')") optparser.add_option("-c", "--collection", action="store", dest="collection", default='whois', help="Name of collection to use (default: 'whois')") optparser.add_option("-t", "--threads", action="store", dest="threads", type="int", default=multiprocessing.cpu_count(), help="Number of worker threads") optparser.add_option("-B", "--bulk-size", action="store", dest="bulk_size", type="int", default=1000, help="Size of Bulk Insert Requests") optparser.add_option("-v", "--verbose", action="store_true", dest="verbose", default=False, help="Be verbose") optparser.add_option("--vverbose", action="store_true", dest="vverbose", default=False, help="Be very verbose (Prints status of every domain parsed, very noisy)") optparser.add_option("-s", "--stats", action="store_true", dest="stats", default=False, help="Print out Stats after running") optparser.add_option("-x", "--exclude", action="store", dest="exclude", default="", help="Comma separated list of keys to exclude if updating entry") optparser.add_option("-n", "--include", action="store", dest="include", default="", help="Comma separated list of keys to include if updating entry (mutually exclusive to -x)") optparser.add_option("-o", "--comment", action="store", dest="comment", default="", help="Comment to store with metadata") optparser.add_option("-r", "--redo", action="store_true", dest="redo", default=False, help="Attempt to re-import a failed import or import more data, uses stored metatdata for previous import (-o and -x not required and will be ignored!!)") (options, args) = optparser.parse_args() if options.vverbose: options.verbose = True threads = [] work_queue = Queue.Queue(maxsize=options.bulk_size) insert_queue = mpQueue(maxsize=options.bulk_size) client = MongoClient(host=options.mongo_host, port=options.mongo_port) whodb = client[options.database] collection = whodb[options.collection] meta = whodb[options.collection + '_meta'] if options.identifier is None and options.redo is False: print "Identifier required" sys.exit(1) elif options.identifier is not None and options.redo is True: print "Redo requested and Identifier Specified. Please choose one or the other" sys.exit(1) elif options.exclude != "" and options.include != "": print "Options include and exclude are mutually exclusive, choose only one" sys.exit(1) metadata = meta.find_one({'metadata': 0}) meta_id = None if metadata is None: # Doesn't exist if options.redo is False: md = { 'metadata': 0, 'firstVersion': options.identifier, 'lastVersion': options.identifier, } meta_id = meta.insert(md) metadata = meta.find_one({'_id': meta_id}) # Setup indexes collection.ensure_index(UNIQUE_KEY, background=True, unique=True) collection.ensure_index(VERSION_KEY, background=True) collection.ensure_index('domainName', background=True) collection.ensure_index([('domainName', pymongo.ASCENDING), (VERSION_KEY, pymongo.ASCENDING)], background=True) collection.ensure_index('details.contactEmail', background=True) collection.ensure_index('details.registrant_name', background=True) collection.ensure_index('details.registrant_telephone', background=True) else: print "Cannot redo when no initial import exists" sys.exit(1) else: if options.redo is False: # Identifier is auto-pulled from db, no need to check if options.identifier < 1: print "Identifier must be greater than 0" sys.exit(1) if metadata['lastVersion'] >= options.identifier: print "Identifier must be 'greater than' previous identifier" sys.exit(1) meta_id = metadata['_id'] if options.redo is False: if options.exclude != "": options.exclude = options.exclude.split(',') else: options.exclude = None if options.include != "": options.include = options.include.split(',') else: options.include = None # Start worker threads if options.verbose: print "Starting %i worker threads" % options.threads for i in range(options.threads): t = Thread(target=process_worker, args=(work_queue, insert_queue, collection, options), name='Worker %i' % i) t.daemon = True t.start() threads.append(t) # Upate the lastVersion in the metadata meta.update({'_id': meta_id}, {'$set': {'lastVersion': options.identifier}}) # Create the entry for this import meta_struct = { 'metadata': options.identifier, 'comment': options.comment, 'total': 0, 'new': 0, 'updated': 0, 'unchanged': 0, 'duplicates': 0, 'changed_stats': {} } if options.exclude != None: meta_struct['excluded_keys'] = options.exclude elif options.include != None: meta_struct['included_keys'] = options.include meta.insert(meta_struct) else: # redo is True # Get the record for the attempted import options.identifier = int(metadata['lastVersion']) redo_record = meta.find_one({'metadata': options.identifier}) if 'excluded_keys' in redo_record: options.exclude = redo_record['excluded_keys'] else: options.exclude = None if 'included_keys' in redo_record: options.include = redo_record['included_keys'] else: options.include = None options.comment = redo_record['comment'] STATS['total'] = int(redo_record['total']) STATS['new'] = int(redo_record['new'], ) STATS['updated'] = int(redo_record['updated']) STATS['unchanged'] = int(redo_record['unchanged']) STATS['duplicates'] = int(redo_record['duplicates']) CHANGEDCT = redo_record['changed_stats'] if options.verbose: print "Re-importing for: \n\tIdentifier: %s\n\tComment: %s" % (options.identifier, options.comment) for ch in CHANGEDCT.keys(): CHANGEDCT[ch] = int(CHANGEDCT[ch]) # Start the reworker threads if options.verbose: print "Starting %i reworker threads" % options.threads for i in range(options.threads): t = Thread(target=process_reworker, args=(work_queue, insert_queue, collection, options), name='Worker %i' % i) t.daemon = True t.start() threads.append(t) # No need to update lastVersion or create metadata entry # Start up the Mongo Bulk Processor mongo_worker_thread = Process(target=mongo_worker, args=(insert_queue, options)) mongo_worker_thread.daemon = True mongo_worker_thread.start() # Set up signal handler before we go into the real work SIGINT_ORIG = signal.getsignal(signal.SIGINT) signal.signal(signal.SIGINT, signal_handler) # Start up Reader Thread reader_thread = Thread(target=reader_worker, args=(work_queue, collection, options), name='Reader') reader_thread.daemon = True reader_thread.start() while True: reader_thread.join(.1) if not reader_thread.isAlive(): break time.sleep(.1) while not work_queue.empty(): time.sleep(.01) work_queue.join() insert_queue.put("finished") mongo_worker_thread.join() # Update the stats meta.update({'metadata': options.identifier}, {'$set': { 'total': STATS['total'], 'new': STATS['new'], 'updated': STATS['updated'], 'unchanged': STATS['unchanged'], 'duplicates': STATS['duplicates'], 'changed_stats': CHANGEDCT } }); if options.stats: print "Stats: " print "Total Entries:\t\t %d" % STATS['total'] print "New Entries:\t\t %d" % STATS['new'] print "Updated Entries:\t %d" % STATS['updated'] print "Duplicate Entries\t %d" % STATS['duplicates'] print "Unchanged Entries:\t %d" % STATS['unchanged']
def __init__(self): super(MainWindow, self).__init__() """ Init hosts all the variables and UI related functionality. The following classes can not be initializes in here, because they inherit from multiprocessing.Process: libs.Counter, libs.Laser and libs.dataProcesser. Arguments have to be passed by inheritance, later there's no possibility, due to their separation from the main loop. libs.Animation also gets instanciated later, due to the window launching functionality in its init method. """ self._dict = libs.UIsettings.UIsettings() self._files = libs.SaveFiles.SaveFiles() self._changeRA = libs.ChangeReadArray.ChangeReadArray() self._timetrace = libs.Timetraces.Timetrace() self._r = libs.Refresh.Refresh() self._readArraySize = int(1e3) # Queues, Semaphores and Events all derived from the multiprocessing library self._dataQ1 = mpQueue() self._dataQ2 = mpQueue() self._animDataQ1 = mpQueue() self._animDataQ2 = mpQueue() self._running = mpEvent() self._semaphore = Semaphore(3) # pyqtSignals self.signal = Communicate() self.signal.stopMeasurement.connect(self.stopBtn) self.signal.measurementProgress.connect(lambda x: self.setProgressBar(x)) self.signal.warning.connect(self.warnPopUp) self.signal.displayRates.connect(lambda x: self.displayRatesOnLCD(x)) self.signal.convertDone.connect(lambda: self.statusBar.showMessage('Conversion done!')) self.signal.alreadyConverted.connect(lambda: self.statusBar.showMessage('Already converted!')) self.signal.noData.connect(lambda: self.statusBar.showMessage('No data files in this directory!')) # ################## # # Window and widgets # # ################## # self.setGeometry(500, 300, 500, 200) # x, y, width, height self.setWindowTitle('ALEX') # ## Statusbar self.statusBar = QStatusBar() self.setStatusBar(self.statusBar) self.statusBar.showMessage("idle") # ## file menue # load app, loads laser settings from file self.loadApp = QAction('Load settings', self) self.loadApp.setShortcut('Ctrl+l') self.loadApp.triggered.connect(self.loadDict) # save app, saves measurement settings to file self.saveApp = QAction('Save settings', self) self.saveApp.setShortcut('Ctrl+s') self.saveApp.triggered.connect(lambda: self._files.saveSetsDict(self._dict._a, self.getDirectory(), 'Measurement_settings')) # convert data to photon-hdf5 self.convertData = QAction('Convert raw data to photon-hdf5', self) self.convertData.setShortcut('Ctrl+c') self.convertData.triggered.connect(lambda: self._files.convertToPhotonHDF5(self.getDirectory(), self.signal)) # change ReadArray size self.setArraySize = QAction('Read arrays size', self) self.setArraySize.setShortcut('Ctrl+a') self.setArraySize.triggered.connect(self.changeArraySize) # close the app self.closeApp = QAction('Close', self) self.closeApp.setShortcut('Ctrl+q') self.closeApp.triggered.connect(self.closeApplication) self.menueLayout() # ## GroupBox Directory: # Group contains: # - Location display QLineEdit # - Browse directories QPushButton filesGroup = QGroupBox('Directory') hbox12 = QHBoxLayout() filesGroup.setLayout(hbox12) self._location = QLineEdit() self._location.setToolTip("Please select a directory. If you do not, the data will not be saved!") self._location.setMaxLength(100) self._location.setReadOnly(True) hbox12.addWidget(self._location) hbox12.setSpacing(10) self._browseButton = QPushButton('Browse', self) self._browseButton.clicked.connect(self.getFileLocation) hbox12.addWidget(self._browseButton) # ## label for different widgets self.label1 = QLabel("Laserpower green") self.label2 = QLabel("Laserpower red") self.label3 = QLabel("Ratio of illumination\ngreen/red") self.label4 = QLabel("ALEX\nfrequency [Hz]") self.label5 = QLabel("Measurement\nduration [s]") self.label6 = QLabel("Measurement mode") self.label7 = QLabel("Counts in green channel") self.label8 = QLabel("Counts in red channel") self.label9 = QLabel("% Green") self.label10 = QLabel("% Red") # ## GroupBox Laser: # Group contains: # - Laser power red slider # - Laser power red spinbox # - Laser power green slider # - Laser power green spinbox # - Laser period percentage slider # - Laser period percentage red spinbox # - Laser period percentage green spinbox laserGroup = QGroupBox("Laser settings") hbox1 = QHBoxLayout() hbox1.setSpacing(30) hbox2 = QHBoxLayout() hbox2.setSpacing(30) hbox9 = QHBoxLayout() hbox11 = QHBoxLayout() hbox11.setSpacing(30) hbox10 = QHBoxLayout() hbox10.setSpacing(30) vbox1 = QVBoxLayout() vbox1.addWidget(self.label2) vbox1.addLayout(hbox1) vbox1.addStretch(1) vbox1.addWidget(self.label1) vbox1.addLayout(hbox2) vbox1.addStretch(1) vbox1.addWidget(self.label3) vbox1.addLayout(hbox9) vbox1.addStretch(1) vbox1.addLayout(hbox11) vbox1.addStretch(1) vbox1.addLayout(hbox10) vbox1.addStretch(1) laserGroup.setLayout(vbox1) hbox11.addWidget(self.label9) hbox11.addWidget(self.label10) # Laserpower slider red self.sld_red = QSlider(Qt.Horizontal, self) self.sld_red.setFocusPolicy(Qt.NoFocus) self.sld_red.setGeometry(80, 20, 50, 10) self.sld_red.setMinimum(0) self.sld_red.setMaximum(100) self.sld_red.setValue(50) self.sld_red.setTickPosition(QSlider.TicksBelow) self.sld_red.setTickInterval(20) self.sld_red.valueChanged.connect(lambda: self.refreshUI(0, 'sld_red', self.sld_red.value())) hbox1.addWidget(self.sld_red) # Laserpower QSpinBox red self.sb_red = QSpinBox(self) self.sb_red.setMinimum(0) self.sb_red.setMaximum(100) self.sb_red.setValue(50) self.sb_red.valueChanged.connect(lambda: self.refreshUI(1, 'sb_red', self.sb_red.value())) hbox1.addWidget(self.sb_red) # Laserpower slider green self.sld_green = QSlider(Qt.Horizontal, self) self.sld_green.setFocusPolicy(Qt.NoFocus) self.sld_green.setGeometry(160, 40, 100, 30) self.sld_green.setMinimum(0) self.sld_green.setMaximum(100) self.sld_green.setValue(50) self.sld_green.setTickPosition(QSlider.TicksBelow) self.sld_green.setTickInterval(20) self.sld_green.valueChanged.connect(lambda: self.refreshUI(0, 'sld_green', self.sld_green.value())) hbox2.addWidget(self.sld_green) # Laserpower QSpinBox green self.sb_green = QSpinBox(self) self.sb_green.setMinimum(0) self.sb_green.setMaximum(100) self.sb_green.setValue(50) self.sb_green.valueChanged.connect(lambda: self.refreshUI(1, 'sb_green', self.sb_green.value())) hbox2.addWidget(self.sb_green) # Illumination percentage slider self.sld_percentage = QSlider(Qt.Horizontal, self) self.sld_percentage.setFocusPolicy(Qt.NoFocus) self.sld_percentage.setGeometry(160, 40, 100, 30) self.sld_percentage.setMinimum(0) self.sld_percentage.setMaximum(100) self.sld_percentage.setTickPosition(QSlider.TicksBelow) self.sld_percentage.setTickInterval(10) self.sld_percentage.setValue(50) self.sld_percentage.valueChanged.connect(lambda: self.refreshUI(0, 'sld_percentage', self.sld_percentage.value())) hbox9.addWidget(self.sld_percentage) # Illumination percentage QSpinBox green self.sb_percentG = QSpinBox(self) self.sb_percentG.setMinimum(0) self.sb_percentG.setMaximum(100) self.sb_percentG.setValue(50) self.sb_percentG.valueChanged.connect(lambda: self.refreshUI(1, 'sb_percentG', self.sb_percentG.value())) hbox10.addWidget(self.sb_percentG) # Illumination QSpinBox red self.sb_percentR = QSpinBox(self) self.sb_percentR.setMinimum(0) self.sb_percentR.setMaximum(100) self.sb_percentR.setValue(50) self.sb_percentR.valueChanged.connect(lambda: self.refreshUI(1, 'sb_percentR', self.sb_percentR.value())) hbox10.addWidget(self.sb_percentR) # ## APD GroupBox # Group contains: # - Laser alternation frequency spinbox # - Measurement mode continuous radiobutton # - Measurement mode finite radiobutton # - Measurement duration spinbox apdGroup = QGroupBox("Measurement") hbox3 = QHBoxLayout() hbox3.setSpacing(30) hbox4 = QHBoxLayout() hbox4.setSpacing(30) hbox5 = QHBoxLayout() hbox5.setSpacing(30) vbox2 = QVBoxLayout() vbox2.addLayout(hbox3) vbox2.addStretch(1) vbox2.addLayout(hbox4) vbox2.addStretch(1) vbox2.addWidget(self.label6) vbox2.addLayout(hbox5) apdGroup.setLayout(vbox2) hbox3.addWidget(self.label4) hbox3.addWidget(self.label5) # Sample frequence QSpinBox self.sb_sampFreq = QSpinBox(self) self.sb_sampFreq.setMinimum(100) self.sb_sampFreq.setMaximum(100000) self.sb_sampFreq.setValue(10000) self.sb_sampFreq.valueChanged.connect(lambda: self.refreshUI(1, 'sb_sampFreq', self.sb_sampFreq.value())) hbox4.addWidget(self.sb_sampFreq) # Radiobutton Continuous Measurement self.rb_cont = QRadioButton("Continuous") self.rb_cont.setChecked(True) self.rb_cont.toggled.connect(lambda: self.refreshUI(2, self.rb_cont, self.rb_finite)) hbox5.addWidget(self.rb_cont) # Radiobutton Finite Measurement self.rb_finite = QRadioButton("Finite") self.rb_finite.toggled.connect(lambda: self.refreshUI(2, self.rb_finite, self.rb_cont)) hbox5.addWidget(self.rb_finite) # Measurement duration QSpinBox self.duration = QSpinBox(self) self.duration.setMinimum(0) self.duration.setMaximum(600) self.duration.setValue(300.0) self.duration.valueChanged.connect(lambda: self.refreshUI(1, 'duration', self.duration.value())) hbox4.addWidget(self.duration) # ## Button GroupBox: # Group contains: # - Start button # - Stop button # - ProgressBar buttonGroup = QGroupBox("Control") hbox6 = QHBoxLayout() hbox11 = QHBoxLayout() vbox3 = QVBoxLayout() vbox3.addLayout(hbox6) vbox3.addStretch(1) vbox3.addLayout(hbox11) vbox3.addStretch(1) buttonGroup.setLayout(vbox3) # Start button self.startButton = QPushButton("Start", self) self.startButton.clicked.connect(self.startBtn) hbox6.addWidget(self.startButton) # Stop button self.stopButton = QPushButton("Stop", self) self.stopButton.clicked.connect(self.stopBtn) hbox6.addWidget(self.stopButton) # Progress Bar self.progress = QProgressBar(self) self.progress.setAlignment(Qt.AlignHCenter) self.progress.setRange(0, 100) hbox11.addWidget(self.progress) # ## LCD display group: # Group contains widgets: # - LCD display green # - LCD display red lcdGroup = QGroupBox("Count rates") hbox7 = QHBoxLayout() hbox8 = QHBoxLayout() vbox4 = QVBoxLayout() vbox4.addLayout(hbox7) vbox4.addLayout(hbox8) lcdGroup.setLayout(vbox4) hbox7.addWidget(self.label7) hbox7.addWidget(self.label8) # Green channel count rate self.green_lcd = QLCDNumber(self) self.green_lcd.setNumDigits(12) hbox8.addWidget(self.green_lcd) # Red channel count rate self.red_lcd = QLCDNumber(self) self.red_lcd.setNumDigits(12) hbox8.addWidget(self.red_lcd) # ## General Layout settings: self.centralBox = QGroupBox("Settings") self.setCentralWidget(self.centralBox) # self.centralWidget.setStyleSheet("QMainWindow {background: 'yellow';}"); # Arrange groups in grid: grid = QGridLayout() grid.addWidget(filesGroup, 0, 0, 1, 3) grid.addWidget(laserGroup, 1, 0, 2, 2) grid.addWidget(apdGroup, 1, 2, 1, 1) grid.addWidget(buttonGroup, 2, 2, 1, 1) grid.addWidget(lcdGroup, 3, 0, 1, 3) self.centralBox.setLayout(grid)
def fromSampleStat(self, stat, genotypes, samples): '''Add a field using expression calculated from sample variant table''' IDs = self.proj.selectSampleByPhenotype(samples) if not IDs: raise ValueError('No sample is selected using condition "{}"'.format(samples)) # # at least one, at most number of IDs nJobs = max(min(self.jobs, len(IDs)), 1) # start all workers idQueue = queue.Queue() idmpQueue=mpQueue() status = GenotypeStatStatus() mpStatus=Manager().dict() if self.proj.store=="sqlite": for j in range(nJobs): GenotypeStatCalculator('{}_genotype.DB'.format(self.proj.name), stat, idQueue, status, genotypes).start() for ID in IDs: idQueue.put(ID) count = 0 prog = ProgressBar('Calculating phenotype', len(IDs)) while True: if status.count() > count: count = status.count() prog.update(count) # if everything is done if status.count() == len(IDs): # stop all threads for j in range(nJobs): idQueue.put(None) break # wait 1 sec to check status again time.sleep(1) prog.done() # submit all results, these should be quick so no progress bar is used count = [0, 0, 0] cur_fields = self.db.getHeaders('sample')[3:] new_field = {} for field in [x[0] for x in stat]: if field.lower() not in [x.lower() for x in cur_fields]: if field.upper() in SQL_KEYWORDS: raise ValueError("Phenotype name '{}' is not allowed because it is a reserved word.".format(field)) new_field[field] = True else: new_field[field] = False count[2] += 1 # updated cur = self.db.cursor() for ID in IDs: res = status.get(ID) for idx, (field, expr) in enumerate(stat): if new_field[field]: fldtype = typeOfValues([str(status.get(x)[idx]) for x in IDs]) # determine the type of value self.db.execute('ALTER TABLE sample ADD {} {} NULL;' .format(field, fldtype)) env.logger.debug('Adding phenotype {} of type {}' .format(field, fldtype)) new_field[field] = False count[1] += 1 # new cur.execute('UPDATE sample SET {0}={1} WHERE sample_id = {1}'.format(field, self.db.PH), [res[idx], ID]) count[0] += 1 elif self.proj.store=="hdf5": for j in range(nJobs): GenotypeStatCalculator_HDF5(self.proj, stat, idmpQueue, mpStatus, genotypes).start() for ID in IDs: idmpQueue.put(ID) count = 0 prog = ProgressBar('Calculating phenotype', len(IDs)) while True: if len(mpStatus) > count: count = len(mpStatus) prog.update(count) # if everything is done if len(mpStatus) == len(IDs): # stop all threads for j in range(nJobs): idmpQueue.put(None) break # wait 1 sec to check status again time.sleep(1) prog.done() # submit all results, these should be quick so no progress bar is used count = [0, 0, 0] cur_fields = self.db.getHeaders('sample')[3:] new_field = {} for field in [x[0] for x in stat]: if field.lower() not in [x.lower() for x in cur_fields]: if field.upper() in SQL_KEYWORDS: raise ValueError("Phenotype name '{}' is not allowed because it is a reserved word.".format(field)) new_field[field] = True else: new_field[field] = False count[2] += 1 # updated cur = self.db.cursor() for ID in IDs: res = mpStatus[ID] for idx, (field, expr) in enumerate(stat): if new_field[field]: fldtype = typeOfValues([str(mpStatus[x][idx]) for x in IDs]) # determine the type of value self.db.execute('ALTER TABLE sample ADD {} {} NULL;' .format(field, fldtype)) env.logger.debug('Adding phenotype {} of type {}' .format(field, fldtype)) new_field[field] = False count[1] += 1 # new cur.execute('UPDATE sample SET {0}={1} WHERE sample_id = {1}'.format(field, self.db.PH), [res[idx], ID]) count[0] += 1 # report result env.logger.info('{} values of {} phenotypes ({} new, {} existing) of {} samples are updated.'.format( count[0], count[1]+count[2], count[1], count[2], len(IDs))) self.db.commit()
# Company: Goethe University of Frankfurt # Institute: Institute of Physical and Theoretical Chemistry # Department: Single Molecule Biophysics # License: GPL3 #####################################################################''' import numpy as np import anim from multiprocessing import Queue as mpQueue import time duration = 10 signal = None def gen_data(duration): for i in np.arange(duration): time.sleep(0.1) a = np.arange(1000) q1.put(a) q2.put(a) q1 = mpQueue() q2 = mpQueue() anim = anim.Animation(q1, q2, duration, 1000) anim.run() gen_data(duration) anim.animate() anim.anim._stop()
def main(): global STATS global VERSION_KEY global CHANGEDCT global shutdown_event def signal_handler(signum, frame): signal.signal(signal.SIGINT, SIGINT_ORIG) sys.stdout.write("\rCleaning Up ... Please Wait ...\n") shutdown_event.set() #Let the current workload finish sys.stdout.write("\tStopping Workers\n") for t in threads: t.join(1) insert_queue.put("finished") #Give the Mongo process 5 seconds to exit mongo_worker_thread.join(5) #If it's still alive, terminate it if mongo_worker_thread.is_alive(): try: mongo_worker_thread.terminate() except: pass #Attempt to update the stats try: meta.update({'metadata': options.identifier}, {'$set' : { 'total' : STATS['total'], 'new' : STATS['new'], 'updated' : STATS['updated'], 'unchanged' : STATS['unchanged'], 'duplicates': STATS['duplicates'], 'changed_stats': CHANGEDCT } }); except: pass sys.stdout.write("... Done\n") sys.exit(0) optparser = OptionParser(usage='usage: %prog [options]') optparser.add_option("-f", "--file", action="store", dest="file", default=None, help="Input CSV file") optparser.add_option("-d", "--directory", action="store", dest="directory", default=None, help="Directory to recursively search for CSV files - prioritized over 'file'") optparser.add_option("-e", "--extension", action="store", dest="extension", default='csv', help="When scanning for CSV files only parse files with given extension (default: 'csv')") optparser.add_option("-i", "--identifier", action="store", dest="identifier", type="int", default=None, help="Numerical identifier to use in update to signify version (e.g., '8' or '20140120')") optparser.add_option("-m", "--mongo-host", action="store", dest="mongo_host", default='localhost', help="Location of mongo db/cluster") optparser.add_option("-p", "--mongo-port", action="store", dest="mongo_port", type="int", default=27017, help="Location of mongo db/cluster") optparser.add_option("-b", "--database", action="store", dest="database", default='whois', help="Name of database to use (default: 'whois')") optparser.add_option("-c", "--collection", action="store", dest="collection", default='whois', help="Name of collection to use (default: 'whois')") optparser.add_option("-t", "--threads", action="store", dest="threads", type="int", default=multiprocessing.cpu_count(), help="Number of worker threads") optparser.add_option("-B", "--bulk-size", action="store", dest="bulk_size", type="int", default=1000, help="Size of Bulk Insert Requests") optparser.add_option("-v", "--verbose", action="store_true", dest="verbose", default=False, help="Be verbose") optparser.add_option("--vverbose", action="store_true", dest="vverbose", default=False, help="Be very verbose (Prints status of every domain parsed, very noisy)") optparser.add_option("-s", "--stats", action="store_true", dest="stats", default=False, help="Print out Stats after running") optparser.add_option("-x", "--exclude", action="store", dest="exclude", default="", help="Comma separated list of keys to exclude if updating entry") optparser.add_option("-n", "--include", action="store", dest="include", default="", help="Comma separated list of keys to include if updating entry (mutually exclusive to -x)") optparser.add_option("-o", "--comment", action="store", dest="comment", default="", help="Comment to store with metadata") optparser.add_option("-r", "--redo", action="store_true", dest="redo", default=False, help="Attempt to re-import a failed import or import more data, uses stored metatdata for previous import (-o and -x not required and will be ignored!!)") (options, args) = optparser.parse_args() if options.vverbose: options.verbose = True threads = [] work_queue = Queue.Queue(maxsize=options.bulk_size) insert_queue = mpQueue(maxsize=options.bulk_size) client = MongoClient(host=options.mongo_host, port=options.mongo_port) whodb = client[options.database] collection = whodb[options.collection] meta = whodb[options.collection + '_meta'] if options.identifier is None and options.redo is False: print "Identifier required" sys.exit(1) elif options.identifier is not None and options.redo is True: print "Redo requested and Identifier Specified. Please choose one or the other" sys.exit(1) elif options.exclude != "" and options.include != "": print "Options include and exclude are mutually exclusive, choose only one" sys.exit(1) metadata = meta.find_one({'metadata':0}) meta_id = None if metadata is None: #Doesn't exist if options.redo is False: md = { 'metadata': 0, 'firstVersion': options.identifier, 'lastVersion' : options.identifier, } meta_id = meta.insert(md) metadata = meta.find_one({'_id': meta_id}) # Setup indexes collection.ensure_index(UNIQUE_KEY, background=True, unique=True) collection.ensure_index(VERSION_KEY, background=True) collection.ensure_index('domainName', background=True) collection.ensure_index([('domainName', pymongo.ASCENDING), (VERSION_KEY, pymongo.ASCENDING)], background=True) collection.ensure_index('details.contactEmail', background=True) collection.ensure_index('details.registrant_name', background=True) collection.ensure_index('details.registrant_telephone', background=True) else: print "Cannot redo when no initial import exists" sys.exit(1) else: if options.redo is False: #Identifier is auto-pulled from db, no need to check if options.identifier < 1: print "Identifier must be greater than 0" sys.exit(1) if metadata['lastVersion'] >= options.identifier: print "Identifier must be 'greater than' previous identifier" sys.exit(1) meta_id = metadata['_id'] if options.redo is False: if options.exclude != "": options.exclude = options.exclude.split(',') else: options.exclude = None if options.include != "": options.include = options.include.split(',') else: options.include = None #Start worker threads if options.verbose: print "Starting %i worker threads" % options.threads for i in range(options.threads): t = Thread(target=process_worker, args=(work_queue, insert_queue, collection, options), name='Worker %i' % i) t.daemon = True t.start() threads.append(t) #Upate the lastVersion in the metadata meta.update({'_id': meta_id}, {'$set' : {'lastVersion': options.identifier}}) #Create the entry for this import meta_struct = { 'metadata': options.identifier, 'comment' : options.comment, 'total' : 0, 'new' : 0, 'updated' : 0, 'unchanged' : 0, 'duplicates': 0, 'changed_stats': {} } if options.exclude != None: meta_struct['excluded_keys'] = options.exclude elif options.include != None: meta_struct['included_keys'] = options.include meta.insert(meta_struct) else: #redo is True #Get the record for the attempted import options.identifier = int(metadata['lastVersion']) redo_record = meta.find_one({'metadata': options.identifier}) if 'excluded_keys' in redo_record: options.exclude = redo_record['excluded_keys'] else: options.exclude = None if 'included_keys' in redo_record: options.include = redo_record['included_keys'] else: options.include = None options.comment = redo_record['comment'] STATS['total'] = int(redo_record['total']) STATS['new'] = int(redo_record['new'],) STATS['updated'] = int(redo_record['updated']) STATS['unchanged'] = int(redo_record['unchanged']) STATS['duplicates'] = int(redo_record['duplicates']) CHANGEDCT = redo_record['changed_stats'] if options.verbose: print "Re-importing for: \n\tIdentifier: %s\n\tComment: %s" % (options.identifier, options.comment) for ch in CHANGEDCT.keys(): CHANGEDCT[ch] = int(CHANGEDCT[ch]) #Start the reworker threads if options.verbose: print "Starting %i reworker threads" % options.threads for i in range(options.threads): t = Thread(target=process_reworker, args=(work_queue, insert_queue, collection, options), name='Worker %i' % i) t.daemon = True t.start() threads.append(t) #No need to update lastVersion or create metadata entry #Start up the Mongo Bulk Processor mongo_worker_thread = Process(target=mongo_worker, args=(insert_queue, options)) mongo_worker_thread.daemon = True mongo_worker_thread.start() #Set up signal handler before we go into the real work SIGINT_ORIG = signal.getsignal(signal.SIGINT) signal.signal(signal.SIGINT, signal_handler) #Start up Reader Thread reader_thread = Thread(target=reader_worker, args=(work_queue, collection, options), name='Reader') reader_thread.daemon = True reader_thread.start() while True: reader_thread.join(.1) if not reader_thread.isAlive(): break time.sleep(.1) while not work_queue.empty(): time.sleep(.01) work_queue.join() insert_queue.put("finished") mongo_worker_thread.join() #Update the stats meta.update({'metadata': options.identifier}, {'$set' : { 'total' : STATS['total'], 'new' : STATS['new'], 'updated' : STATS['updated'], 'unchanged' : STATS['unchanged'], 'duplicates': STATS['duplicates'], 'changed_stats': CHANGEDCT } }); if options.stats: print "Stats: " print "Total Entries:\t\t %d" % STATS['total'] print "New Entries:\t\t %d" % STATS['new'] print "Updated Entries:\t %d" % STATS['updated'] print "Duplicate Entries\t %d" % STATS['duplicates'] print "Unchanged Entries:\t %d" % STATS['unchanged']
# Get data from the sqlite db conn = apsw.Connection(slf) cursor = conn.cursor() cursor.execute("SELECT scope, ts, asn, hege FROM hegemony ORDER BY scope") for scope, ts, asn, hege in cursor.fetchall(): data[scope][ts][asn] = hege # Push data to PostgreSQL dt = slf.partition("_")[2] dt = dt.partition(" ")[0] ye, mo, da = dt.split("-") starttime = datetime(int(ye), int(mo), int(da)) saverQueue = mpQueue(1000) ss = Process(target=saverPostgresql, args=(starttime, af, saverQueue), name="saverPostgresql") ss.start() saverQueue.put("BEGIN TRANSACTION;") for scope, allts in data.iteritems(): for ts, hege in allts.iteritems(): saverQueue.put(("hegemony", (ts, scope, hege)) ) saverQueue.put("COMMIT;") logging.debug("Finished") saverQueue.join() ss.terminate()
storage_directory = response["stor"] watched_dirs = response["watched_dirs"] logger.info("Storage Directory is: %s", storage_directory) config.storage_directory = os.path.normpath(storage_directory) config.imported_directory = os.path.normpath(os.path.join(storage_directory, 'imported')) config.organize_directory = os.path.normpath(os.path.join(storage_directory, 'organize')) config.recorded_directory = os.path.normpath(os.path.join(storage_directory, 'recorded')) config.problem_directory = os.path.normpath(os.path.join(storage_directory, 'problem_files')) dirs = [config.imported_directory, config.organize_directory, config.recorded_directory, config.problem_directory] for d in dirs: if not os.path.exists(d): os.makedirs(d, 02775) multi_queue = mpQueue() logger.info("Initializing event processor") wm = WatchManager() mmc = MediaMonitorCommon(config, wm=wm) pe = AirtimeProcessEvent(queue=multi_queue, airtime_config=config, wm=wm, mmc=mmc, api_client=api_client) bootstrap = AirtimeMediaMonitorBootstrap(logger, pe, api_client, mmc, wm, config) bootstrap.scan() notifier = AirtimeNotifier(wm, pe, read_freq=0, timeout=0, airtime_config=config, api_client=api_client, bootstrap=bootstrap, mmc=mmc) notifier.coalesce_events() #create 5 worker threads wp = MediaMonitorWorkerProcess(config, mmc) for i in range(5):
anchors = create_anchors(sizes=[(16,16)], ratios=[1], scales=[0.3, 0.375,0.45]) detect_thresh = 0.3 nms_thresh = 0.4 result_boxes = {} result_regression = {} import multiprocessing from multiprocessing import Queue as mpQueue from queue import Queue import queue import time jobQueue=mpQueue() outputQueue=mpQueue() def getPatchesFromQueue(slide_container, jobQueue, outputQueue): x,y=0,0 try: while (True): if (outputQueue.qsize()<100): x,y = jobQueue.get(timeout=1) outputQueue.put((x,y,slide_container.get_patch(x, y) / 255.)) else: time.sleep(0.1) except queue.Empty: print('One worker died.') pass # Timeout happened, exit
def __init__(self, filePath, update_signal, finish_signal): super(objrThread, self).__init__() self.isWorking = False self.update_signal = update_signal self.finish_signal = finish_signal self.model_path = 'model_data/trained_weights_final.h5' # model path or trained weights path self.classes_path = 'model_data/newclothclass.txt' self.anchors_path = 'model_data/yolo_anchors.txt' classes_path = os.path.expanduser(self.classes_path) with open(classes_path) as f: class_names = f.readlines() self.class_names = [c.strip() for c in class_names] # hsv_tuples = [(x / len(self.class_names), 1., 1.) # for x in range(len(self.class_names))] # self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) # self.colors = list( # map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), # self.colors)) # np.random.seed(10101) # Fixed seed for consistent colors across runs. # np.random.shuffle(self.colors) # Shuffle colors to decorrelate adjacent classes. # np.random.seed(None) # Reset seed to default self.colors = [(0, 0, 255), (0, 255, 127), (173, 222, 255), (18, 153, 255), (214, 112, 218), (255, 153, 18), (255, 0, 0)] self.lastepochFlag = False self.isWorking = True if filePath == "": self.filePath = Manager().Value( c_char_p, "E:/YJF/hotelvideo/video17.MP4") # 共享字符串变量 else: self.filePath = Manager().Value(c_char_p, filePath) # 共享字符串变量 self.stopMainProcSignal = Event() self.input_queue = mpQueue() self.camereStartSignal = Event() self.camereNotPauseSignal = Event() self.camereStopSignal = Event() self.camereNewinputSignal = Event() self.camereInputendSignal = Event() self.detDoneSignal = Event() self.newRltSignal = Event() self.cameraProc = CameraProc( self.filePath, self.input_queue, self.camereStartSignal, self.camereNotPauseSignal, self.camereStopSignal, self.camereNewinputSignal, self.camereInputendSignal, self.detDoneSignal, self.newRltSignal) self.rlt_queue = mpQueue() self.detproc = DetectorProc(self.input_queue, self.rlt_queue, self.camereNewinputSignal, self.newRltSignal, self.detDoneSignal, self.model_path, self.classes_path, self.anchors_path) self.areaClass = ['toilet', 'sink', 'desktop'] self.errdict = {} self.corrList = [] self.corrDict = {} self.testTotalError = [] self.testTotalCorr = [] self.testDelError = [] self.testDelCorr = [] self.f = open('test.txt', 'a')
def main(): thq = Queue.Queue() mpq = mpQueue() channels.events_queue(thq, ).start() threading.Thread(target=EventWork, args=(mpq, thq)).start() Process(target=esl_work, args=(mpq, )).start()