def test_base(self): def extract_data_sample(group, mset, dsample_name): by_individual = {} for i in self.kb.get_individuals(group): gds = filter(lambda x: x.snpMarkersSet == mset, self.kb.get_data_samples(i, dsample_name)) assert len(gds) == 1 by_individual[i.id] = gds[0] return by_individual study = self.kb.get_study("TEST01") family = self.kb.get_individuals(study) mset = self.kb.get_snp_markers_set(label="FakeTaqSet01") gds_by_individual = extract_data_sample(study, mset, "GenotypeDataSample") pw = PedWriter(mset, base_path="./foo") pw.write_map() pw.write_family(study.id, family, gds_by_individual) pw.close()
def test_base(self): def extract_data_sample(group, mset, dsample_name): by_individual = {} for i in self.kb.get_individuals(group): gds = filter(lambda x: x.snpMarkersSet == mset, self.kb.get_data_samples(i, dsample_name)) assert (len(gds) == 1) by_individual[i.id] = gds[0] return by_individual study = self.kb.get_study('TEST01') family = self.kb.get_individuals(study) mset = self.kb.get_snp_markers_set(label='FakeTaqSet01') gds_by_individual = extract_data_sample(study, mset, 'GenotypeDataSample') pw = PedWriter(mset, base_path="./foo") pw.write_map() pw.write_family(study.id, family, gds_by_individual) pw.close()
do = kb.add_gdo_data_object(action, data_sample, probs, conf) data_sample_by_id[ind.id] = data_sample """ .. Note how we first create a DataSample object (GenotypeDataSample) which basically keeps track of the fact that there exists a genotyping data set defined on a given snp markers set, and then we provide an actual data object that describes the physical object that contains the real data. The idea is that there could be many instances, data equivalent, that link to the same DataSample, e.g., on different file systems, in different formats and so on. """ """ .. As an example, we will now write out the information we have just saved as a plink pedfile. """ from bl.vl.genotype.io import PedWriter pw = PedWriter(mset, base_path="/tmp/foo") pw.write_map() family_label = study.label pw.write_family(family_label, family, data_sample_by_id) pw.close()
ms = kb.get_snp_markers_set(label=args.marker_set) if ms is None: msg = "marker set %s not present in kb, bailing out" % args.marker_set logger.critical(msg) sys.exit(msg) query = "from GenotypeDataSample g where g.snpMarkersSet.id = :id" params = {"id": ms.omero_id} gds = kb.find_all_by_query(query, params) logger.info("found %d data samples for marker set %s" % (len(gds), args.marker_set)) logger.info("updating dep tree") kb.update_dependency_tree() individuals = [get_individual(kb, ds) for ds in gds] ds_by_ind_id = dict((i.id, ds) for i, ds in zip(individuals, gds)) logger.info("getting families") families = get_all_families(kb) ped_writer = PedWriter(ms, base_path=args.prefix) logger.info("writing map file") ped_writer.write_map() logger.info("writing ped file") for i, fam in enumerate(families): if set(ds_by_ind_id.get(i.id) for i in fam) != set([None]): fam_label = "FAM_%d" % (i+1) logger.info("writing family %s" % fam_label) ped_writer.write_family(fam_label, fam, ds_by_ind_id) logger.info("all finished") if __name__ == "__main__": main(sys.argv[1:])
data_sample = kb.factory.create(kb.GenotypeDataSample, conf).save() probs, conf = make_fake_data(mset) do = kb.add_gdo_data_object(action, data_sample, probs, conf) data_sample_by_id[ind.id] = data_sample """ .. Note how we first create a DataSample object (GenotypeDataSample) which basically keeps track of the fact that there exists a genotyping data set defined on a given snp markers set, and then we provide an actual data object that describes the physical object that contains the real data. The idea is that there could be many instances, data equivalent, that link to the same DataSample, e.g., on different file systems, in different formats and so on. """ """ .. As an example, we will now write out the information we have just saved as a plink pedfile. """ from bl.vl.genotype.io import PedWriter pw = PedWriter(mset, base_path="/tmp/foo") pw.write_map() family_label = study.label pw.write_family(family_label, family, data_sample_by_id) pw.close()