class ped_writer(unittest.TestCase): def setUp(self): self.kb = KB(driver='omero')('localhost', 'root', 'romeo') def tearDown(self): pass def test_base(self): def extract_data_sample(group, mset, dsample_name): by_individual = {} for i in self.kb.get_individuals(group): gds = filter(lambda x: x.snpMarkersSet == mset, self.kb.get_data_samples(i, dsample_name)) assert (len(gds) == 1) by_individual[i.id] = gds[0] return by_individual study = self.kb.get_study('TEST01') family = self.kb.get_individuals(study) mset = self.kb.get_snp_markers_set(label='FakeTaqSet01') gds_by_individual = extract_data_sample(study, mset, 'GenotypeDataSample') pw = PedWriter(mset, base_path="./foo") pw.write_map() pw.write_family(study.id, family, gds_by_individual) pw.close()
class ped_writer(unittest.TestCase): def setUp(self): self.kb = KB(driver="omero")("localhost", "root", "romeo") def tearDown(self): pass def test_base(self): def extract_data_sample(group, mset, dsample_name): by_individual = {} for i in self.kb.get_individuals(group): gds = filter(lambda x: x.snpMarkersSet == mset, self.kb.get_data_samples(i, dsample_name)) assert len(gds) == 1 by_individual[i.id] = gds[0] return by_individual study = self.kb.get_study("TEST01") family = self.kb.get_individuals(study) mset = self.kb.get_snp_markers_set(label="FakeTaqSet01") gds_by_individual = extract_data_sample(study, mset, "GenotypeDataSample") pw = PedWriter(mset, base_path="./foo") pw.write_map() pw.write_family(study.id, family, gds_by_individual) pw.close()
class App(object): def __init__(self, host, user, passwd): self.kb = KB(driver="omero")(host, user, passwd) self.logger = logging.getLogger() def compute(self, maker, model, release): mset = self.kb.get_snp_markers_set(maker, model, release) if not mset: raise ValueError("SNPMarkersSet[%s,%s,%s] has not been defined." % (maker, model, release)) # projector = (np.arange(0, 100), np.array([101, 109]), np.arange(110,N)) # selector = kb.build_selector( # s = self.kb.get_gdo_iterator(mset, selector, projector) s = self.kb.get_gdo_iterator(mset) # -- start = time.clock() counts = algo.count_homozygotes(s) print "counts on %d:" % counts[0], time.clock() - start start = time.clock() mafs = algo.maf(None, counts) print "mafs on %d:" % counts[0], time.clock() - start start = time.clock() hwe = algo.hwe(None, counts) print "hwe on %d:" % counts[0], time.clock() - start
class App(object): def __init__(self, host, user, passwd): self.kb = KB(driver='omero')(host, user, passwd) self.logger = logging.getLogger() def compute(self, maker, model, release): mset = self.kb.get_snp_markers_set(maker, model, release) if not mset: raise ValueError('SNPMarkersSet[%s,%s,%s] has not been defined.' % (maker, model, release)) # projector = (np.arange(0, 100), np.array([101, 109]), np.arange(110,N)) # selector = kb.build_selector( # s = self.kb.get_gdo_iterator(mset, selector, projector) s = self.kb.get_gdo_iterator(mset) #-- start = time.clock() counts = algo.count_homozygotes(s) print 'counts on %d:' % counts[0], time.clock() - start start = time.clock() mafs = algo.maf(None, counts) print 'mafs on %d:' % counts[0], time.clock() - start start = time.clock() hwe = algo.hwe(None, counts) print 'hwe on %d:' % counts[0], time.clock() - start
class App(object): def __init__(self, host, user, passwd, study_label, maker, model, release): self.kb = KB(driver='omero')(host, user, passwd) self.mset = self.kb.get_snp_markers_set(maker, model, release) self.logger = logging.getLogger() if not self.mset: raise ValueError('SNPMarkersSet[%s,%s,%s] has not been defined.' % (maker, model, release)) #-- alabel = 'load_genotypes-setup-%s' % time.time() self.asetup = self.kb.factory.create(self.kb.ActionSetup, {'label' : alabel, 'conf' : ''}).save() #-- dmaker, dmodel, drelease = 'CRS4', 'load_genotypes', '0.1' dlabel = '%s-%s-%s' % (dmaker, dmodel, drelease) device = self.kb.get_device(dlabel) if not device: device = self.kb.factory.create(self.kb.Device, {'label' : dlabel, 'maker' : dmaker, 'model' : dmodel, 'release' : drelease}).save() self.device = device #-- FIXME this will break if study is not defined. self.study = self.kb.get_study(study_label) def check_snp_markers_set(self, marker_types, marker_names): self.logger.info('start checking snp_markers_set') mdefs, msetc = self.kb.get_snp_markers_set_content(self.mset) rs_labels = mdefs['rs_label'] for t, n in it.izip(marker_types, marker_names): if t == 'M': if not n in rs_labels: msg = 'marker %s is not in the specified SNPMarkersSet' % n self.logger.critical(msg) raise ValueError(msg) self.logger.info('done checking snp_markers_set') def create_action(self, target): conf = {'setup' : self.asetup, 'device' : self.device, 'actionCategory' : self.kb.ActionCategory.MEASUREMENT, 'operator' : 'Alfred E. Neumann', 'context' : self.study, 'target' : target, } action = self.kb.factory.create(self.kb.ActionOnVessel, conf).save() return action def create_data_sample(self, action, label): conf = {'snpMarkersSet' : self.mset, 'label' : label, 'status' : self.kb.DataSampleStatus.USABLE, 'action' : action} return self.kb.factory.create(self.kb.GenotypeDataSample, conf).save() def load(self, pedfile, datfile, conf_value=1.0): pr = PedReader(pedfile, datfile, conf_value) self.check_snp_markers_set(pr.marker_types, pr.marker_names) #-- self.logger.info('start loading from pedfile %s' % pedfile.name) for x in pr: sample = self.kb.get_vessel(x['sample_label']) if not sample: self.logger.error('No sample with label %s in VL' % x['sample_label']) continue action = self.create_action(sample) avid = action.id action.unload() data_sample = self.create_data_sample(action, x['label']) data_object = self.kb.add_gdo_data_object(avid, data_sample, x['probs'], x['confs']) self.logger.info('-- loaded %s' % x['label']) self.logger.info('done loading from pedfile %s' % pedfile.name)
OME_USER = os.getenv('OME_USER', 'test') OME_PASSWD = os.getenv('OME_PASSWD', 'test') kb = KB(driver="omero")(OME_HOST, OME_USER, OME_PASSWD) """ .. The first thing we will do is to select a markers set. See FIXME:XXX for its definition. We will first obtain an handle to it, and then invoke a '.load_markers()' that will bring in memory the actual definition data. """ mset_name = 'FakeTaqSet01' mset0 = kb.get_snp_markers_set(label=mset_name) mset0.load_markers() """ .. For the time being, we can think the SNPMarkerSet mset0 as analogous to an array of markers. The following is a list of expressions that are expected to be legal. """ len(mset0) mset0[0::10] mset0[11] mset0[1].label mset0[1].rs_label
OME_HOST = os.getenv('OME_HOST', 'localhost') OME_USER = os.getenv('OME_USER', 'test') OME_PASSWD = os.getenv('OME_PASSWD', 'test') kb = KB(driver="omero")(OME_HOST, OME_USER, OME_PASSWD) """ .. The first thing we will do is to select a markers set. See FIXME:XXX for its definition. We will first obtain an handle to it, and then invoke a '.load_markers()' that will bring in memory the actual definition data. """ mset_name = 'FakeTaqSet01' mset0 = kb.get_snp_markers_set(label=mset_name) mset0.load_markers() """ .. For the time being, we can think the SNPMarkerSet mset0 as analogous to an array of markers. The following is a list of expressions that are expected to be legal. """ len(mset0) mset0[0::10] mset0[11] mset0[1].label mset0[1].rs_label """ ..
if args.logfile: kwargs['filename'] = args.logfile logging.basicConfig(**kwargs) logger = logging.getLogger() try: host = args.host or vlu.ome_host() user = args.user or vlu.ome_user() passwd = args.passwd or vlu.ome_passwd() except ValueError, ve: logger.critical(ve) sys.exit(ve) kb = KB(driver="omero")(host, user, passwd) logger.info("getting data samples") ms = kb.get_snp_markers_set(label=args.marker_set) if ms is None: msg = "marker set %s not present in kb, bailing out" % args.marker_set logger.critical(msg) sys.exit(msg) query = "from GenotypeDataSample g where g.snpMarkersSet.id = :id" params = {"id": ms.omero_id} gds = kb.find_all_by_query(query, params) logger.info("found %d data samples for marker set %s" % (len(gds), args.marker_set)) logger.info("updating dep tree") kb.update_dependency_tree() individuals = [get_individual(kb, ds) for ds in gds] ds_by_ind_id = dict((i.id, ds) for i, ds in zip(individuals, gds)) logger.info("getting families") families = get_all_families(kb)
class App(object): def __init__(self, host, user, passwd, study_label, maker, model, release): self.kb = KB(driver='omero')(host, user, passwd) self.mset = self.kb.get_snp_markers_set(maker, model, release) self.logger = logging.getLogger() if not self.mset: raise ValueError('SNPMarkersSet[%s,%s,%s] has not been defined.' % (maker, model, release)) #-- alabel = 'load_genotypes-setup-%s' % time.time() self.asetup = self.kb.factory.create(self.kb.ActionSetup, { 'label': alabel, 'conf': '' }).save() #-- dmaker, dmodel, drelease = 'CRS4', 'load_genotypes', '0.1' dlabel = '%s-%s-%s' % (dmaker, dmodel, drelease) device = self.kb.get_device(dlabel) if not device: device = self.kb.factory.create( self.kb.Device, { 'label': dlabel, 'maker': dmaker, 'model': dmodel, 'release': drelease }).save() self.device = device #-- FIXME this will break if study is not defined. self.study = self.kb.get_study(study_label) def check_snp_markers_set(self, marker_types, marker_names): self.logger.info('start checking snp_markers_set') mdefs, msetc = self.kb.get_snp_markers_set_content(self.mset) rs_labels = mdefs['rs_label'] for t, n in it.izip(marker_types, marker_names): if t == 'M': if not n in rs_labels: msg = 'marker %s is not in the specified SNPMarkersSet' % n self.logger.critical(msg) raise ValueError(msg) self.logger.info('done checking snp_markers_set') def create_action(self, target): conf = { 'setup': self.asetup, 'device': self.device, 'actionCategory': self.kb.ActionCategory.MEASUREMENT, 'operator': 'Alfred E. Neumann', 'context': self.study, 'target': target, } action = self.kb.factory.create(self.kb.ActionOnVessel, conf).save() return action def create_data_sample(self, action, label): conf = { 'snpMarkersSet': self.mset, 'label': label, 'status': self.kb.DataSampleStatus.USABLE, 'action': action } return self.kb.factory.create(self.kb.GenotypeDataSample, conf).save() def load(self, pedfile, datfile, conf_value=1.0): pr = PedReader(pedfile, datfile, conf_value) self.check_snp_markers_set(pr.marker_types, pr.marker_names) #-- self.logger.info('start loading from pedfile %s' % pedfile.name) for x in pr: sample = self.kb.get_vessel(x['sample_label']) if not sample: self.logger.error('No sample with label %s in VL' % x['sample_label']) continue action = self.create_action(sample) avid = action.id action.unload() data_sample = self.create_data_sample(action, x['label']) data_object = self.kb.add_gdo_data_object(avid, data_sample, x['probs'], x['confs']) self.logger.info('-- loaded %s' % x['label']) self.logger.info('done loading from pedfile %s' % pedfile.name)