示例#1
0
 def __init__(self, args, unknown_args, extern_data):
     # extern data can be a dictionary or a list of dicts
     self.powerdata = PowerData(args, unknown_args, extern_data)
     # initialize data, only get the fixed part of data for now
     self.data = self.powerdata.dump_fixed()
     if type(extern_data) is list:
         self.data['replicates'] = len(extern_data)
         self.data['name'] = self.data['title']
         self.extern_data = extern_data
     else:
         self.extern_data = None
     self.sample = Sample()
     self.original_keys = sorted(list(self.data.keys()))
     self.result = {}
     self.failure_count = Counter()
示例#2
0
def initSampleObj(powerDataObj):
    '''
    initialize a spower.Sample() object using information
    contained in a pre-processed 'PowerData' object
    '''
    sampleObj = Sample()
    sampleObj.seed(0, os.getpid())
    sampleObj.set(**powerDataObj.data)
    return sampleObj
示例#3
0
 def __init__(self, args, unknown_args, extern_data):
     # extern data can be a dictionary or a list of dicts
     self.powerdata = PowerData(args, unknown_args, extern_data)
     # initialize data, only get the fixed part of data for now
     self.data = self.powerdata.dump_fixed()
     if type(extern_data) is list:
         self.data['replicates'] = len(extern_data)
         self.data['name'] = self.data['title']
         self.extern_data = extern_data
     else:
         self.extern_data = None
     self.sample = Sample()
     self.original_keys = sorted(list(self.data.keys()))
     self.result = {}
     self.failure_count = Counter()
示例#4
0
def initSampleAndPowerdata(configFile, srvResFile, srvGeneRep=None):
    '''
    initialize a spower.Sample() object and a PowerData object
    srvGeneRep - gene replicate # to be used in powerdata, randomly sample a gene replicate if None
    '''
    args, srvData = spowerArgsNData(configFile, srvResFile, srvGeneRep)
    powerdata = PowerData(args, [], srvData)
    powerdata.update_fixed()
    powerdata.update_random()
    sampleObj = Sample()
    sampleObj.seed(0, os.getpid())
    sampleObj.set(**powerdata.data)
    return sampleObj, powerdata
示例#5
0
class Calculator:
    '''run the calculation'''
    def __init__(self, args, unknown_args, extern_data):
        # extern data can be a dictionary or a list of dicts
        self.powerdata = PowerData(args, unknown_args, extern_data)
        # initialize data, only get the fixed part of data for now
        self.data = self.powerdata.dump_fixed()
        if type(extern_data) is list:
            self.data['replicates'] = len(extern_data)
            self.data['name'] = self.data['title']
            self.extern_data = extern_data
        else:
            self.extern_data = None
        self.sample = Sample()
        self.original_keys = sorted(list(self.data.keys()))
        self.result = {}
        self.failure_count = Counter()
    
    def calculate(self, workQueue, resQueue):
        '''calculation of each replicate'''
        while True:
            replicate = workQueue.get()
            if replicate is None:
                break
            if self.extern_data is not None:
                self.data = self.powerdata.dump_updated(self.extern_data[replicate])
                self.data['name'] = self.extern_data[replicate]['name']
            # reset data
            data = copy.deepcopy(self.data)
            sample = self.sample.clone()
            # set replicate ID
            data['replicate_id'] = replicate + 1
            # reset seed
            if data['seed'] == 0:
                # use a seed based on current time
                sample.seed(0, os.getpid())
                rng.seed(sample.runif())
            else:
                # use a seed based on given seed
                sample.seed(data['seed'] + replicate)
                rng.seed(data['seed'] + replicate)
            # get the random part of the data
            data.update(self.powerdata.dump_random())
            # initialize empty genotype object
            pop = Population()
            pop.set("variant", data['pos'])
            # apply algorithm to data
            try:
                for item in getAlgorithm(data):
                    item.apply(data, sample, pop)
            except NullResultException:
                self.failure_count.increment()
                data = {}
            else:
                # reclaim memory
                for k in self.original_keys + ['replicate_id']:
                    del data[k]
            resQueue.put(data)

    def run(self):
        '''run multiple replicates'''
        if self.data['verbosity'] <= 1:
            iterations = range(self.data['replicates'])
        else:
            widgets = ['{0} : '.format(self.data['name']), Percentage(),
                       ' ', Bar('='), ' ', ETA()]
            pbar = ProgressBar(widgets=widgets, maxval=self.data['replicates'],
                               term_width=get_terminal_size()[0] - 5)
            iterations = pbar((i for i in range(self.data['replicates'])))
        nJobs = max(min(self.data['jobs'], self.data['replicates']), 1)
        workQueue = Queue()
        resQueue = Queue()
        # put all replicates + stop signals in queue
        for replicate in range(self.data['replicates']):
            workQueue.put(replicate)
        for i in range(nJobs):
            workQueue.put(None)
        # spawn workers
        procs = [Process(target = self.calculate,
                         args = (workQueue, resQueue)) for j in range(nJobs)]
        for p in procs:
            p.start()
        # collect the results off the queue
        for i in iterations:
            try:
                self.__save(resQueue.get())
            except KeyboardInterrupt as e:
                raise ValueError("calculator terminated!")
        for p in procs:
            p.join()
        if self.failure_count.value():
            env.logger.info("{} invalid replicate(s)".format(self.failure_count.value()))
            self.data['replicates'] = self.data['replicates'] - self.failure_count.value()
        return {} if len(self.result) == 0 else dict(list(self.data.items()) + list(self.result.items()))

    def __save(self, data):
        '''save result'''
        for k in data.keys():
            # a newly added value to be collected
            if k not in list(self.result.keys()) \
              and not isinstance(data[k], list):
                self.result[k] = L.RunningStat(int(self.data['replicates']/2),
                                               int(self.data['replicates']/2))
            if data[k] == data[k]:
                # a valid value
                if not isinstance(data[k], list):
                    self.result[k].add(data[k])
                else:
                    self.result[k] = data[k]
示例#6
0
from spower.utils import getLogger
from spower.simulator.sampler import Sample

if __name__ == "__main__":
    d = Sample(getLogger(1))
    d.seed(0, 1)
    c = d.clone()
    c.seed(0, 1)
示例#7
0
    #
    ##### generate disease by PAR
    #s = Sample()
    #s.seed(0)
    #s.set(**powerdata.data)
    #L.GenotypeGenerator().apply(s.data)
    #L.PARModel(powerdata.data['par'], powerdata.data['PAR_variable']).apply(s.data)#L.ORModel(powerdata.data['odds_ratio']).apply(s.data)
    #L.DiseaseEffectGenerator(powerdata.data['baseline_effect']).apply(s.data)
    #L.DiseaseStatusGenerator().apply(s.data)
    #print powerdata.data['maf']
    #print s.get('haplotype1'), s.get('haplotype2')
    #print s.get('effect'),
    #print s.get('phenotype')

    #### generate qt
    s = Sample()
    s.seed(0)

    print dir(s)

    s.set(**powerdata.data)
    #L.GenotypeGenerator().apply(s.data)
    s.set(haplotype1=[0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0],
          haplotype2=[1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1])
    L.MeanShiftModel(powerdata.data['mean_shift']).apply(s.data)
    L.QtEffectGenerator().apply(s.data)
    L.QtValueGenerator().apply(s.data)
    print s.get('haplotype1'), s.get('haplotype2')
    print s.get('effect')
    print s.get('phenotype')
示例#8
0
class Calculator:
    '''run the calculation'''
    def __init__(self, args, unknown_args, extern_data):
        # extern data can be a dictionary or a list of dicts
        self.powerdata = PowerData(args, unknown_args, extern_data)
        # initialize data, only get the fixed part of data for now
        self.data = self.powerdata.dump_fixed()
        if type(extern_data) is list:
            self.data['replicates'] = len(extern_data)
            self.data['name'] = self.data['title']
            self.extern_data = extern_data
        else:
            self.extern_data = None
        self.sample = Sample()
        self.original_keys = sorted(list(self.data.keys()))
        self.result = {}
        self.failure_count = Counter()

    def calculate(self, workQueue, resQueue):
        '''calculation of each replicate'''
        while True:
            replicate = workQueue.get()
            if replicate is None:
                break
            if self.extern_data is not None:
                self.data = self.powerdata.dump_updated(
                    self.extern_data[replicate])
                self.data['name'] = self.extern_data[replicate]['name']
            # reset data
            data = copy.deepcopy(self.data)
            sample = self.sample.clone()
            # set replicate ID
            data['replicate_id'] = replicate + 1
            # reset seed
            if data['seed'] == 0:
                # use a seed based on current time
                sample.seed(0, os.getpid())
                rng.seed(sample.runif())
            else:
                # use a seed based on given seed
                sample.seed(data['seed'] + replicate)
                rng.seed(data['seed'] + replicate)
            # get the random part of the data
            data.update(self.powerdata.dump_random())
            # initialize empty genotype object
            pop = Population()
            pop.set("variant", data['pos'])
            # apply algorithm to data
            try:
                for item in getAlgorithm(data):
                    item.apply(data, sample, pop)
            except NullResultException:
                self.failure_count.increment()
                data = {}
            else:
                # reclaim memory
                for k in self.original_keys + ['replicate_id']:
                    del data[k]
            resQueue.put(data)

    def run(self):
        '''run multiple replicates'''
        if self.data['verbosity'] <= 1:
            iterations = range(self.data['replicates'])
        else:
            widgets = [
                '{0} : '.format(self.data['name']),
                Percentage(), ' ',
                Bar('='), ' ',
                ETA()
            ]
            pbar = ProgressBar(widgets=widgets,
                               maxval=self.data['replicates'],
                               term_width=get_terminal_size()[0] - 5)
            iterations = pbar((i for i in range(self.data['replicates'])))
        nJobs = max(min(self.data['jobs'], self.data['replicates']), 1)
        workQueue = Queue()
        resQueue = Queue()
        # put all replicates + stop signals in queue
        for replicate in range(self.data['replicates']):
            workQueue.put(replicate)
        for i in range(nJobs):
            workQueue.put(None)
        # spawn workers
        procs = [
            Process(target=self.calculate, args=(workQueue, resQueue))
            for j in range(nJobs)
        ]
        for p in procs:
            p.start()
        # collect the results off the queue
        for i in iterations:
            try:
                self.__save(resQueue.get())
            except KeyboardInterrupt as e:
                raise ValueError("calculator terminated!")
        for p in procs:
            p.join()
        if self.failure_count.value():
            env.logger.info("{} invalid replicate(s)".format(
                self.failure_count.value()))
            self.data['replicates'] = self.data[
                'replicates'] - self.failure_count.value()
        return {} if len(self.result) == 0 else dict(
            list(self.data.items()) + list(self.result.items()))

    def __save(self, data):
        '''save result'''
        for k in data.keys():
            # a newly added value to be collected
            if k not in list(self.result.keys()) \
              and not isinstance(data[k], list):
                self.result[k] = L.RunningStat(
                    int(self.data['replicates'] / 2),
                    int(self.data['replicates'] / 2))
            if data[k] == data[k]:
                # a valid value
                if not isinstance(data[k], list):
                    self.result[k].add(data[k])
                else:
                    self.result[k] = data[k]
示例#9
0
from spower.utils import getLogger
from spower.simulator.sampler import Sample
if __name__ == "__main__":
    d = Sample(getLogger(1))
    d.seed(0, 1)
    c = d.clone()
    c.seed(0, 1)