示例#1
0
class Calculator:
    '''run the calculation'''
    def __init__(self, args, unknown_args, extern_data):
        # extern data can be a dictionary or a list of dicts
        self.powerdata = PowerData(args, unknown_args, extern_data)
        # initialize data, only get the fixed part of data for now
        self.data = self.powerdata.dump_fixed()
        if type(extern_data) is list:
            self.data['replicates'] = len(extern_data)
            self.data['name'] = self.data['title']
            self.extern_data = extern_data
        else:
            self.extern_data = None
        self.sample = Sample()
        self.original_keys = sorted(list(self.data.keys()))
        self.result = {}
        self.failure_count = Counter()
    
    def calculate(self, workQueue, resQueue):
        '''calculation of each replicate'''
        while True:
            replicate = workQueue.get()
            if replicate is None:
                break
            if self.extern_data is not None:
                self.data = self.powerdata.dump_updated(self.extern_data[replicate])
                self.data['name'] = self.extern_data[replicate]['name']
            # reset data
            data = copy.deepcopy(self.data)
            sample = self.sample.clone()
            # set replicate ID
            data['replicate_id'] = replicate + 1
            # reset seed
            if data['seed'] == 0:
                # use a seed based on current time
                sample.seed(0, os.getpid())
                rng.seed(sample.runif())
            else:
                # use a seed based on given seed
                sample.seed(data['seed'] + replicate)
                rng.seed(data['seed'] + replicate)
            # get the random part of the data
            data.update(self.powerdata.dump_random())
            # initialize empty genotype object
            pop = Population()
            pop.set("variant", data['pos'])
            # apply algorithm to data
            try:
                for item in getAlgorithm(data):
                    item.apply(data, sample, pop)
            except NullResultException:
                self.failure_count.increment()
                data = {}
            else:
                # reclaim memory
                for k in self.original_keys + ['replicate_id']:
                    del data[k]
            resQueue.put(data)

    def run(self):
        '''run multiple replicates'''
        if self.data['verbosity'] <= 1:
            iterations = range(self.data['replicates'])
        else:
            widgets = ['{0} : '.format(self.data['name']), Percentage(),
                       ' ', Bar('='), ' ', ETA()]
            pbar = ProgressBar(widgets=widgets, maxval=self.data['replicates'],
                               term_width=get_terminal_size()[0] - 5)
            iterations = pbar((i for i in range(self.data['replicates'])))
        nJobs = max(min(self.data['jobs'], self.data['replicates']), 1)
        workQueue = Queue()
        resQueue = Queue()
        # put all replicates + stop signals in queue
        for replicate in range(self.data['replicates']):
            workQueue.put(replicate)
        for i in range(nJobs):
            workQueue.put(None)
        # spawn workers
        procs = [Process(target = self.calculate,
                         args = (workQueue, resQueue)) for j in range(nJobs)]
        for p in procs:
            p.start()
        # collect the results off the queue
        for i in iterations:
            try:
                self.__save(resQueue.get())
            except KeyboardInterrupt as e:
                raise ValueError("calculator terminated!")
        for p in procs:
            p.join()
        if self.failure_count.value():
            env.logger.info("{} invalid replicate(s)".format(self.failure_count.value()))
            self.data['replicates'] = self.data['replicates'] - self.failure_count.value()
        return {} if len(self.result) == 0 else dict(list(self.data.items()) + list(self.result.items()))

    def __save(self, data):
        '''save result'''
        for k in data.keys():
            # a newly added value to be collected
            if k not in list(self.result.keys()) \
              and not isinstance(data[k], list):
                self.result[k] = L.RunningStat(int(self.data['replicates']/2),
                                               int(self.data['replicates']/2))
            if data[k] == data[k]:
                # a valid value
                if not isinstance(data[k], list):
                    self.result[k].add(data[k])
                else:
                    self.result[k] = data[k]
示例#2
0
class Calculator:
    '''run the calculation'''
    def __init__(self, args, unknown_args, extern_data):
        # extern data can be a dictionary or a list of dicts
        self.powerdata = PowerData(args, unknown_args, extern_data)
        # initialize data, only get the fixed part of data for now
        self.data = self.powerdata.dump_fixed()
        if type(extern_data) is list:
            self.data['replicates'] = len(extern_data)
            self.data['name'] = self.data['title']
            self.extern_data = extern_data
        else:
            self.extern_data = None
        self.sample = Sample()
        self.original_keys = sorted(list(self.data.keys()))
        self.result = {}
        self.failure_count = Counter()

    def calculate(self, workQueue, resQueue):
        '''calculation of each replicate'''
        while True:
            replicate = workQueue.get()
            if replicate is None:
                break
            if self.extern_data is not None:
                self.data = self.powerdata.dump_updated(
                    self.extern_data[replicate])
                self.data['name'] = self.extern_data[replicate]['name']
            # reset data
            data = copy.deepcopy(self.data)
            sample = self.sample.clone()
            # set replicate ID
            data['replicate_id'] = replicate + 1
            # reset seed
            if data['seed'] == 0:
                # use a seed based on current time
                sample.seed(0, os.getpid())
                rng.seed(sample.runif())
            else:
                # use a seed based on given seed
                sample.seed(data['seed'] + replicate)
                rng.seed(data['seed'] + replicate)
            # get the random part of the data
            data.update(self.powerdata.dump_random())
            # initialize empty genotype object
            pop = Population()
            pop.set("variant", data['pos'])
            # apply algorithm to data
            try:
                for item in getAlgorithm(data):
                    item.apply(data, sample, pop)
            except NullResultException:
                self.failure_count.increment()
                data = {}
            else:
                # reclaim memory
                for k in self.original_keys + ['replicate_id']:
                    del data[k]
            resQueue.put(data)

    def run(self):
        '''run multiple replicates'''
        if self.data['verbosity'] <= 1:
            iterations = range(self.data['replicates'])
        else:
            widgets = [
                '{0} : '.format(self.data['name']),
                Percentage(), ' ',
                Bar('='), ' ',
                ETA()
            ]
            pbar = ProgressBar(widgets=widgets,
                               maxval=self.data['replicates'],
                               term_width=get_terminal_size()[0] - 5)
            iterations = pbar((i for i in range(self.data['replicates'])))
        nJobs = max(min(self.data['jobs'], self.data['replicates']), 1)
        workQueue = Queue()
        resQueue = Queue()
        # put all replicates + stop signals in queue
        for replicate in range(self.data['replicates']):
            workQueue.put(replicate)
        for i in range(nJobs):
            workQueue.put(None)
        # spawn workers
        procs = [
            Process(target=self.calculate, args=(workQueue, resQueue))
            for j in range(nJobs)
        ]
        for p in procs:
            p.start()
        # collect the results off the queue
        for i in iterations:
            try:
                self.__save(resQueue.get())
            except KeyboardInterrupt as e:
                raise ValueError("calculator terminated!")
        for p in procs:
            p.join()
        if self.failure_count.value():
            env.logger.info("{} invalid replicate(s)".format(
                self.failure_count.value()))
            self.data['replicates'] = self.data[
                'replicates'] - self.failure_count.value()
        return {} if len(self.result) == 0 else dict(
            list(self.data.items()) + list(self.result.items()))

    def __save(self, data):
        '''save result'''
        for k in data.keys():
            # a newly added value to be collected
            if k not in list(self.result.keys()) \
              and not isinstance(data[k], list):
                self.result[k] = L.RunningStat(
                    int(self.data['replicates'] / 2),
                    int(self.data['replicates'] / 2))
            if data[k] == data[k]:
                # a valid value
                if not isinstance(data[k], list):
                    self.result[k].add(data[k])
                else:
                    self.result[k] = data[k]
示例#3
0
from spower.utils import getLogger
from spower.simulator.sampler import Sample

if __name__ == "__main__":
    d = Sample(getLogger(1))
    d.seed(0, 1)
    c = d.clone()
    c.seed(0, 1)
示例#4
0
from spower.utils import getLogger
from spower.simulator.sampler import Sample
if __name__ == "__main__":
    d = Sample(getLogger(1))
    d.seed(0, 1)
    c = d.clone()
    c.seed(0, 1)