def run_dataset(self, case, algorithm): bp = "test/{}/".format(case) test_file = bp + "test.spats.tmp" try: shutil.copyfile(bp + "ds.spats", test_file) db = PairDB(test_file) s = Spats() db.load_run(s.run) if not s.run.cotrans and algorithm == "native": return s.run.writeback_results = True s.run.result_set_name = "test" s.run.algorithm = algorithm s.run.quiet = True s.loadTargets(db) if not s._processor.exists(): # just ignore the native test if it's not available self.assertEqual("native", algorithm) return s.process_pair_db(db, batch_size = 1024) # small batch_size just to exercise multiprocessing code msg = None count = 0 for res in db.differing_results("test", "test_validation"): msg = str([str(x) for x in res]) count += 1 self.assertEqual(0, count, "{} differing results: {} / {} \n{}".format(count, case, algorithm, msg)) finally: if os.path.exists(test_file): os.remove(test_file)
def tags(): bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/" from spats_shape_seq.db import PairDB pair_db = PairDB(bp + "db/pairs.db") if True: print "Parsing to db..." pair_db.wipe() pair_db.add_targets_table(bp + "cotrans_single.fa") pair_db.parse(bp + "data/EJS_6_F_10mM_NaF_Rep1_GCCAAT_R1.fastq", bp + "data/EJS_6_F_10mM_NaF_Rep1_GCCAAT_R2.fastq", sample_size=100000) from spats_shape_seq import Spats from spats_shape_seq.tag import TagProcessor from spats_shape_seq.util import reverse_complement s = Spats() s.run._processor_class = TagProcessor s.run.writeback_results = True s.run.result_set_name = "tags" s.run.num_workers = 1 s.run.cotrans = True s.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC' s.loadTargets(pair_db) s.run.allow_indeterminate = True s.run.allowed_target_errors = 2 s.run.allowed_adapter_errors = 2 p = s._processor for target in pair_db.targets(): p.addTagTarget(target[0], target[1]) p.addTagTarget(target[0] + "_rc", reverse_complement(target[1])) p.addTagTarget("adapter_t_rc", reverse_complement(s.run.adapter_t)) p.addTagTarget("adapter_b", s.run.adapter_b) if s.run.cotrans: p.addTagTarget("linker_cotrans", s.run.cotrans_linker) p.addTagTarget("linker_cotrans_rc", reverse_complement(s.run.cotrans_linker)) s.process_pair_db(pair_db) rsid = pair_db.result_set_id_for_name(s.run.result_set_name) pair_db.count_tags(rsid) print pair_db.tag_counts(rsid)
def tags(): bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/" from spats_shape_seq.db import PairDB pair_db = PairDB(bp + "db/pairs.db") if True: print "Parsing to db..." pair_db.wipe() pair_db.add_targets_table(bp + "cotrans_single.fa") pair_db.parse(bp + "data/EJS_6_F_10mM_NaF_Rep1_GCCAAT_R1.fastq", bp + "data/EJS_6_F_10mM_NaF_Rep1_GCCAAT_R2.fastq", sample_size = 100000) from spats_shape_seq import Spats from spats_shape_seq.tag import TagProcessor from spats_shape_seq.util import reverse_complement s = Spats() s.run._processor_class = TagProcessor s.run.writeback_results = True s.run.result_set_name = "tags" s.run.num_workers = 1 s.run.cotrans = True s.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC' s.loadTargets(pair_db) s.run.allow_indeterminate = True s.run.allowed_target_errors = 2 s.run.allowed_adapter_errors = 2 p = s._processor for target in pair_db.targets(): p.addTagTarget(target[0], target[1]) p.addTagTarget(target[0] + "_rc", reverse_complement(target[1])) p.addTagTarget("adapter_t_rc", reverse_complement(s.run.adapter_t)) p.addTagTarget("adapter_b", s.run.adapter_b) if s.run.cotrans: p.addTagTarget("linker_cotrans", s.run.cotrans_linker) p.addTagTarget("linker_cotrans_rc", reverse_complement(s.run.cotrans_linker)) s.process_pair_db(pair_db) rsid = pair_db.result_set_id_for_name(s.run.result_set_name) pair_db.count_tags(rsid) print pair_db.tag_counts(rsid)
def _loadDBAndModel(self): rsnames = self._db.result_sets() if rsnames: self.result_set_id = self._db.result_set_id_for_name(rsnames[0]) self._db.index_results() self.has_tags = bool(self.result_set_id) else: self.result_set_id = -1 self.has_tags = False self.has_counters = self._db.has_counters() s = Spats() self._db.load_run(s.run) s.run._p_use_tag_processor = True s.loadTargets(self._db) if self.has_counters: self._db.load_counters("spats", s.counters) if self.has_tags: p = s._processor for t in s._targets.targets: p.addTagTarget(t.name, t.seq) p.addTagTarget(t.name + "_rc", reverse_complement(t.seq)) self.colors._colors[t.name.lower()] = self.colors.color("target") p.addTagTarget("adapter_t_rc", reverse_complement(s.run.adapter_t)) p.addTagTarget("adapter_b", s.run.adapter_b) if s.run.cotrans: p.addTagTarget("linker_cotrans", s.run.cotrans_linker) p.addTagTarget("linker_cotrans_rc", reverse_complement(s.run.cotrans_linker)) if s.run._p_extra_tags: for tag, seq in s.run._p_extra_tags.iteritems(): p.addTagTarget(tag, seq) if not self.has_counters: p.counters.load_from_db_data(self._db.counter_data_for_results(self.result_set_id)) self._spats = s