class Calculator: '''run the calculation''' def __init__(self, args, unknown_args, extern_data): # extern data can be a dictionary or a list of dicts self.powerdata = PowerData(args, unknown_args, extern_data) # initialize data, only get the fixed part of data for now self.data = self.powerdata.dump_fixed() if type(extern_data) is list: self.data['replicates'] = len(extern_data) self.data['name'] = self.data['title'] self.extern_data = extern_data else: self.extern_data = None self.sample = Sample() self.original_keys = sorted(list(self.data.keys())) self.result = {} self.failure_count = Counter() def calculate(self, workQueue, resQueue): '''calculation of each replicate''' while True: replicate = workQueue.get() if replicate is None: break if self.extern_data is not None: self.data = self.powerdata.dump_updated(self.extern_data[replicate]) self.data['name'] = self.extern_data[replicate]['name'] # reset data data = copy.deepcopy(self.data) sample = self.sample.clone() # set replicate ID data['replicate_id'] = replicate + 1 # reset seed if data['seed'] == 0: # use a seed based on current time sample.seed(0, os.getpid()) rng.seed(sample.runif()) else: # use a seed based on given seed sample.seed(data['seed'] + replicate) rng.seed(data['seed'] + replicate) # get the random part of the data data.update(self.powerdata.dump_random()) # initialize empty genotype object pop = Population() pop.set("variant", data['pos']) # apply algorithm to data try: for item in getAlgorithm(data): item.apply(data, sample, pop) except NullResultException: self.failure_count.increment() data = {} else: # reclaim memory for k in self.original_keys + ['replicate_id']: del data[k] resQueue.put(data) def run(self): '''run multiple replicates''' if self.data['verbosity'] <= 1: iterations = range(self.data['replicates']) else: widgets = ['{0} : '.format(self.data['name']), Percentage(), ' ', Bar('='), ' ', ETA()] pbar = ProgressBar(widgets=widgets, maxval=self.data['replicates'], term_width=get_terminal_size()[0] - 5) iterations = pbar((i for i in range(self.data['replicates']))) nJobs = max(min(self.data['jobs'], self.data['replicates']), 1) workQueue = Queue() resQueue = Queue() # put all replicates + stop signals in queue for replicate in range(self.data['replicates']): workQueue.put(replicate) for i in range(nJobs): workQueue.put(None) # spawn workers procs = [Process(target = self.calculate, args = (workQueue, resQueue)) for j in range(nJobs)] for p in procs: p.start() # collect the results off the queue for i in iterations: try: self.__save(resQueue.get()) except KeyboardInterrupt as e: raise ValueError("calculator terminated!") for p in procs: p.join() if self.failure_count.value(): env.logger.info("{} invalid replicate(s)".format(self.failure_count.value())) self.data['replicates'] = self.data['replicates'] - self.failure_count.value() return {} if len(self.result) == 0 else dict(list(self.data.items()) + list(self.result.items())) def __save(self, data): '''save result''' for k in data.keys(): # a newly added value to be collected if k not in list(self.result.keys()) \ and not isinstance(data[k], list): self.result[k] = L.RunningStat(int(self.data['replicates']/2), int(self.data['replicates']/2)) if data[k] == data[k]: # a valid value if not isinstance(data[k], list): self.result[k].add(data[k]) else: self.result[k] = data[k]
class Calculator: '''run the calculation''' def __init__(self, args, unknown_args, extern_data): # extern data can be a dictionary or a list of dicts self.powerdata = PowerData(args, unknown_args, extern_data) # initialize data, only get the fixed part of data for now self.data = self.powerdata.dump_fixed() if type(extern_data) is list: self.data['replicates'] = len(extern_data) self.data['name'] = self.data['title'] self.extern_data = extern_data else: self.extern_data = None self.sample = Sample() self.original_keys = sorted(list(self.data.keys())) self.result = {} self.failure_count = Counter() def calculate(self, workQueue, resQueue): '''calculation of each replicate''' while True: replicate = workQueue.get() if replicate is None: break if self.extern_data is not None: self.data = self.powerdata.dump_updated( self.extern_data[replicate]) self.data['name'] = self.extern_data[replicate]['name'] # reset data data = copy.deepcopy(self.data) sample = self.sample.clone() # set replicate ID data['replicate_id'] = replicate + 1 # reset seed if data['seed'] == 0: # use a seed based on current time sample.seed(0, os.getpid()) rng.seed(sample.runif()) else: # use a seed based on given seed sample.seed(data['seed'] + replicate) rng.seed(data['seed'] + replicate) # get the random part of the data data.update(self.powerdata.dump_random()) # initialize empty genotype object pop = Population() pop.set("variant", data['pos']) # apply algorithm to data try: for item in getAlgorithm(data): item.apply(data, sample, pop) except NullResultException: self.failure_count.increment() data = {} else: # reclaim memory for k in self.original_keys + ['replicate_id']: del data[k] resQueue.put(data) def run(self): '''run multiple replicates''' if self.data['verbosity'] <= 1: iterations = range(self.data['replicates']) else: widgets = [ '{0} : '.format(self.data['name']), Percentage(), ' ', Bar('='), ' ', ETA() ] pbar = ProgressBar(widgets=widgets, maxval=self.data['replicates'], term_width=get_terminal_size()[0] - 5) iterations = pbar((i for i in range(self.data['replicates']))) nJobs = max(min(self.data['jobs'], self.data['replicates']), 1) workQueue = Queue() resQueue = Queue() # put all replicates + stop signals in queue for replicate in range(self.data['replicates']): workQueue.put(replicate) for i in range(nJobs): workQueue.put(None) # spawn workers procs = [ Process(target=self.calculate, args=(workQueue, resQueue)) for j in range(nJobs) ] for p in procs: p.start() # collect the results off the queue for i in iterations: try: self.__save(resQueue.get()) except KeyboardInterrupt as e: raise ValueError("calculator terminated!") for p in procs: p.join() if self.failure_count.value(): env.logger.info("{} invalid replicate(s)".format( self.failure_count.value())) self.data['replicates'] = self.data[ 'replicates'] - self.failure_count.value() return {} if len(self.result) == 0 else dict( list(self.data.items()) + list(self.result.items())) def __save(self, data): '''save result''' for k in data.keys(): # a newly added value to be collected if k not in list(self.result.keys()) \ and not isinstance(data[k], list): self.result[k] = L.RunningStat( int(self.data['replicates'] / 2), int(self.data['replicates'] / 2)) if data[k] == data[k]: # a valid value if not isinstance(data[k], list): self.result[k].add(data[k]) else: self.result[k] = data[k]
from spower.utils import getLogger from spower.simulator.sampler import Sample if __name__ == "__main__": d = Sample(getLogger(1)) d.seed(0, 1) c = d.clone() c.seed(0, 1)