Пример #1
0
def show_failure_types():
    from spats_clean import Spats, Pair, FastqRecord
    spats = Spats("test/5s/5s.fa", "test/5s")
    spats.setup()
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/"

    with open(bp + "t11/x/filtered_R1.fq", 'rb') as r1_in:
        with open(bp + "t11/x/filtered_R2.fq", 'rb') as r2_in:
            r1_record = FastqRecord()
            r2_record = FastqRecord()
            pair = Pair()
            while True:
                r1_record.read(r1_in)
                if not r1_record.identifier:
                    break
                r2_record.read(r2_in)
                pair.set_from_records(r1_record, r2_record)

                spats.process_pair(pair)

                summary = "{} :: {}".format(
                    pair.identifier,
                    pair.site if pair.has_site else pair.failure)
                if pair.r1.match_errors:
                    summary += " R1!: {}".format(pair.r1.match_errors)
                if pair.r1.adapter_errors:
                    summary += " R1A!: {}, adapter_len={}".format(
                        pair.r1.adapter_errors, pair.r1._rtrim)
                if pair.r2.match_errors:
                    summary += " R2!: {}".format(pair.r2.match_errors)
                if pair.r2.adapter_errors:
                    summary += " R2A!: {}, adapter_len={}".format(
                        pair.r2.adapter_errors, pair.r2._rtrim - 4)
                print summary
Пример #2
0
 def setUp(self):
     from spats_shape_seq import Spats
     self.spats = Spats()
     self.spats.run.cotrans = True
     self.spats.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC'
     self.spats.run.collapse_left_prefixes = True
     self.spats.addTargets("test/cotrans/cotrans_single.fa")
Пример #3
0
def d5s_run():
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/"

    #from spats_shape_seq.db import PairDB
    #pair_db = PairDB(bp + "dev_out/pairs.db")
    #if False:
    #    pair_db.add_targets_table(bp + "5s/5S.fa")
    #    pair_db.parse(bp + "5s/data/17571-AD1AW-KEW11-5S-2p1-18x-23FEB15-GGCTAC_S10_L001_R1_001.fastq",
    #                  bp + "5s/data/17571-AD1AW-KEW11-5S-2p1-18x-23FEB15-GGCTAC_S10_L001_R2_001.fastq")

    from spats_shape_seq import Spats
    from spats_shape_seq.partial import PartialFindProcessor
    s = Spats()
    #s.run._processor_class = PartialFindProcessor
    s.run.skip_database = True
    #s.run.writeback_results = True
    #s.run.resume_processing = True
    #s.run.result_set_name = "lookup"
    s.addTargets(bp + "5s/5S.fa")
    #s.process_pair_db(pair_db)
    s.process_pair_data(
        bp +
        "5s/data/17571-AD1AW-KEW11-5S-2p1-18x-23FEB15-GGCTAC_S10_L001_R1_001.fastq",
        bp +
        "5s/data/17571-AD1AW-KEW11-5S-2p1-18x-23FEB15-GGCTAC_S10_L001_R2_001.fastq"
    )
    s.compute_profiles()
    s.write_reactivities(bp + "dev_out/rx2.out")
Пример #4
0
 def setUp(self):
     try:
         self.spats = Spats()
         self.test_set.spats_setUp(self.spats)
     except Exception as e:
         print("exception caught on testset '{}' setup : {}".format(
             self.test_set.name, e))
         raise e
Пример #5
0
 def setUp(self):
     self.spats = Spats()
     self.spats.run.count_mutations = True
     self.spats.run.mutations_require_quality_score = ord('.') - ord('!')
     self.spats.run.allowed_target_errors = 1
     self.spats.run.ignore_stops_with_mismatched_overlap = True
     self.spats.run.adapter_b = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG"
     self.setup_processor()
Пример #6
0
 def run_algorithm(self, alg):
     from spats_shape_seq import Spats
     self.spats = Spats()
     self.spats.run.algorithm = alg
     self.spats.run.count_mutations = True
     self.spats.run.allowed_target_errors = True
     self.spats.run.ignore_stops_with_mismatched_overlap = True
     self.spats.addTargets("test/SRP/SRP.fa")
     self.run_pairs()
Пример #7
0
def cotrans_debug():
    from spats_shape_seq import Spats
    s = Spats()
    s.run.cotrans = True
    #s.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC'
    #s.run.algorithm = "find_partial"
    #s.run._p_v102_compat = True
    s.run.minimum_target_match_length = 10
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/"
    s.addTargets(bp + "cotrans_single.fa")

    from spats_shape_seq.pair import Pair
    pair = Pair()

    import cjb.util
    d = cjb.util.jsonAtPath("/tmp/spats_test.json")
    pair.set_from_data(str(d['id']), str(d['r1']), str(d['r2']))
    #c = ['683779', 'TCCGGTCCTTGGTGCCCGAGTCAGAAAAAAATAGAA', 'TCTATTTTTTTCTGACTCGGGCACCAAGGACCGGAA', 82, 71]
    #c = [ "1116:19486:8968", "TCCGGTCCTTGGTGCCCGAGTCAGTCCTTCCTCCTA", "GAGTCTATTTTTTTAGGAGGAAGGACTGACTCGGGC", 93, 68 ]
    #c = [ "301028", "AAGTGTCCTTGGTGCCCGAGTCAGAGATAGATCGGA", "ATCTCTGACTCGGGCACCAAGGACACTTAGATCGGA", 96, 92 ]
    #c = [ "31631284", "TTCAGTCCTTGGTGCCCGAGTCAGAGATAGATCGGA", "ATCTCTGACTCGGGCACCAATGACCGGAAGATCGGA", 96, 92 ]
    #c = [ "7232", "AGGTGTCCTTGGTGCCCGAGTCAGTAGCTAAGAAAT", "TTATAGGCGATGGAGTTCGCCATAAACGCTGCTTAG", -1, -1 ]
    #c = [ "16845404", "AAATGTCCTTGGTGCCCGAGTCAGACTGGTAGGAGT", "TCTTATAGGCGATGGAGTTCGCCATAAACGCTGCTT", -1, -1 ]
    #c = [ "24102328", "AAGCGTCCTTGGTGCCCGAGTCAGGAGTCATAGATC", "ATGACTCCTGACTCGGGCACCAAGGACGCTTAGATC", 46, 39 ]
    #c = [ "51216106", "GGGTGTCCTTGGTGCCCGAGTCAGATTAGCTAAGCA", "AGCTAATCTGACTCGGGCACCAAGGACGCTGCTTAG", 41, 34 ]
    c = [
        "1116:19486:8968", "TCCGGTCCTTGGTGCCCGAGTCAGTCCTTCCTCCTA",
        "GAGTCTATTTTTTTAGGAGGAAGGACTGACTCGGGC", 93, 68
    ]
    #c = [ "41823514", "GAATGTCCTTGGTGCCCGAGTCAGAACTCCAAGATC", "TGGAGTTCTGACTCGGGCACCAAGGACATTCAGATC", -1, -1 ]
    #c = [ "180", "AAGCTGTCCTTGGTGCCCGAGTCAGGAAAAGTTCTT", "TTTTTTTAGGAGGAAGGATCTATGAGCAAAGGAGAA", 120, 75 ]
    #c = [ "67219", "GAGTGTCCTTGGTGCCCGAGTCAGTCGACAACTCCA", "TTATAGGCGATGGAGTTCGCCATAAACGCTGCTTAG", 134, 0 ]
    #c = [ "58726", "GGATGTCCTTGGTGCCCGAGTCAGCCTTAGATCGGA", "AAGGCTGACTCGGGCACCAAGGACATCCAGATCGGA", None, None ]
    #c = [ "188425", "GGACGTCCTTGGTGCCCGAGTCAGTATAGATCGGAA", "ATACTGACTCGGGCACCAAGGACTTCCAGATCGGAA", 24, 21 ]
    #c = [ "jjb_L21", "GGACGTCCTTGGTGCCCGAGTCAGGGCGAACTAGAT", "AGTTCGCCCTGACTCGGGCACCAAGGACGTCCAGAT", 21, 13 ]
    #c = [ "jjb_L20", "GGACGTCCTTGGTGCCCGAGTCAGGCGAACTCAGAT", "GAGTTCGCCTGACTCGGGCACCAAGGACGTCCAGAT", 20, 12 ]
    #c = [ "jjb_L19", "GGACGTCCTTGGTGCCCGAGTCAGCGAACTCCAGAT", "GGAGTTCGCTGACTCGGGCACCAAGGACGTCCAGAT", None, None ]
    #c = [ "406149", "AGGTGTCCTTGGTGCCCGAGTCAGGACAACTCCAGT", "TTATAGGCGATGGAGTTCGCCATAAACGCTGCTTAG", 132, 0 ]
    #c = [ "89185", "TCCAGTCCTTGGTGCCCGAGTCAGCTAAGCAGCGTT", "AATGACTCCTACCAGTATCACTACTGGTAGGAGTCT", 36, 38 ]
    #c = [ "3185000", "GAACGTCCTTGGTGCCCGAGTCAGGTTTATGGCGAA", "TCGCCATAAACCTGACTCGGGCACCAAGGACGTTCC", -1, -1 ]
    #c =     [ "jjb_3185000'", "GAACGTCCTTGGTGCCCGAGTCAGGTTTATGGCGAA", "TCGCCATAAACCTGACTCGGGCACCAAGGACGTTCA", None, None ]
    #c = ['1', 'TCTGAGATCGGAAGAGCACACGTCTGAACTCCAGT', 'CAGAAGATCGGAAGAGCGTCGTGTAGGGAAAGAGT', None, None]
    #c = ['24941', 'TCCAGTCCTTGGTGCCCGAGTCAGAGACTCCTACCA', 'TATAGGCGATGGAGTTCGCCATAAACGCTGCTTAGC', -1, -1]
    c = [
        'jjbn', 'TTTGGTCCTTGGTGCCCGAGTCAGTAAAAAAATAGA',
        'TCTATTTTTTTACTGACTCGGGCACCAAGGACCAAA', 83, 71
    ]
    pair.set_from_data(c[0], c[1], c[2])
    print "{}\n{} / {}".format(pair.identifier, pair.r1.original_seq,
                               pair.r2.original_seq)
    s.process_pair(pair)
    if pair.has_site:
        print "{}: {} / {}".format(pair.target.name, pair.site, pair.end)
    else:
        print "FAIL: {}".format(pair.failure)
Пример #8
0
def spats(target, r1, r2, out, show_sites=True):
    from spats_shape_seq import Spats, spats_config
    s = Spats()
    s.addTargets(target)
    s.addMasks("RRRY", "YYYR")
    if show_sites:
        spats_config.show_id_to_site = True
    s.process_pair_data(r1, r2)
    if not show_sites:
        s.compute_profiles()
        s.write_reactivities(out + "/rx.out")
Пример #9
0
def dbrun():
    db_path = sys.argv[2]
    run_name = sys.argv[3]
    from spats_shape_seq import Spats
    from spats_shape_seq.db import PairDB
    db = PairDB(db_path)
    s = Spats()
    s.run.writeback_results = True
    s.run.result_set_name = run_name
    #s.run.resume_processing = True
    s.process_pair_db(db)
Пример #10
0
    def test_minimum_length(self):
        from spats_shape_seq import Spats
        self.spats = Spats()
        self.spats.run.algorithm = "find_partial"
        self.spats.run.minimum_target_match_length = 11
        self.spats.addTargets("test/5s/5s.fa")
        self.assertEqual(11, self.spats._targets.minimum_match_length)
        case = [
            '1109:22737:14675', 'TCCAGTCCTTGGAGATCGGAAGAGCACACGTCTGA',
            'CCAAGGACTGGAAGATCGGAAGAGCGTCGTGTAGG', None
        ]
        self.run_case(case)

        # this case only matches if the minimum length is set to 8
        self.spats.run.minimum_target_match_length = 8
        self.spats = Spats()
        self.spats.run.algorithm = "lookup"
        self.spats.addTargets("test/5s/5s.fa")
        case[3] = 135
        self.run_case(case)
Пример #11
0
def tmut_case():
    from spats_shape_seq import Spats
    from spats_shape_seq.db import PairDB
    from spats_shape_seq.diagram import diagram

    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/mutsl/"

    spats = Spats(cotrans=False)
    spats.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC'
    spats.run.count_mutations = True
    spats.run.algorithm = "find_partial"
    spats.run.allowed_target_errors = 1
    spats.run.adapter_b = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG"
    spats.run._process_all_pairs = True
    spats.run.writeback_results = True
    spats.run.num_workers = 1
    spats.run.result_set_name = "mut"
    spats.addTargets(bp + "mut_single.fa")

    from spats_shape_seq.pair import Pair
    pair = Pair()

    #c = [ 'GAATGTCCTTGGTGCCCGAGTCAGTCCTTGGTGCCCGAGTCAGTCCTTGGTTCCCGAGTCACTCCTTTGTTCCCC', 'AGGACTGACTCGGGCACCAAGGACTTTCTCGTTCACCTATTTCTTTCTCTTCCCCCTTTTTCTTTCTCTTTCTCC' ]
    #c = [ 'GAGCGTCCTTGGTGCCCGAGTCAGATGCCGACCCGGGTGGGGGCCCTGCCAGCTACATCCCGGCACACGCGTCAT', 'TAGGTCAGGTCCGGAAGGAAGCAGCCAAGGCAGATGACGCGTGTGCCGGGATGTAGCTGGCAGGGCCCCCACCCG' ]
    #c = [ 'GAATGTCCTTGGTGCCCGAGTCAGGACACGCGTCATCTGCCTTGGCTGCTTCCTTCCGGACCTGACCTGGTAAAC', 'ATCGGGGGCTCTGTTGGTTCCCCCGCAACGCTACTCTGTTTACCAGGTCAGGTCCGGAAGGAAGCAGCCAAGTCA' ]
    #c = [ 'AGGCGTCCTTGGTGCCCGAGTCAGCCTTGGCTGCTTCCTTCCGGACCTGACCTGGTAAACAGAGTAGCGTTGCGG', 'ATCGGGGGCTCTGTTGGTTCCCCCGCAACGCTACTCTGTTTACCAGGTCAGGTCCGGAAGGAAGCAGCCAAGTCT' ]
    #c = [ 'TTCAGTCCTTGGTGCCCGAGTCAGCCAGCTACATCCCGGCACACGCGTCATCTGCCTTGGCTGCTTCCTTCCGGA', 'AGGTCAGATCCGGAAGGAAGCAGCCAAGGCAGATGACGCGTGTGCCGGGATGTAGCTGGCTGACTCGGGCACCAA' ]
    #c = [ 'AAATGTCCTTGGTGCCCGAGTCAGATCTGCCTTAAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGA', 'TAAGGCAGATCTGACTCGGGCACCAAGGACATTTAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCG' ]
    #c = [ 'CTCAGTCCTTGGTGCCCGAGTCAGTGAGCTAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTC', 'AGCTCACTGACTCGGGCACCAAGGACTGAGAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGG' ]
    #c = [ 'AAGCGTCCTTGGTGCCCGAGTCAGTGGAGGTAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCT', 'ACCTCCACTGACTCGGGCACCAAGGACGCTTAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTG' ]
    #c = [ 'TCCGGTCCTTGGTGCCCGAGTCAGATGTAGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGT', 'ACATCTGACTCGGGCACCAAGGACCGGAAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTC' ]
    #c = [ 'TTTAAGTCCTTGGTGCCCGAGTCAGGTCATCTGCCTTGGCTGCTTCCTTCCGGACCTGACCTGGTAAACAGAGTA', 'TACTCTGTTTACCAGGTCAGGTCCGGAAGGAAGCAGCCAAGGCAGATGACCTGACTCGGGCACCAAGGACTTAAA' ]
    #c = [ 'TTCACAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAATCACTAT', 'GGACAAGCAATGCTTACCTTGATGTTGAACTTTTGAATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGA' ]
    #c = [ 'AGATCAACAAGAATTAGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAATCACTAT', 'ACAAGCAATGCTTGCCTTGATGTTGAACTTTTGAATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGAAC' ]
    #c = [ 'AAATCAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAATCACTAT', 'AATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGATT' ]
    #c = [ 'TCCGCAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAATCACTAT', 'ATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGCGGA' ]
    #c = [ 'TCCACAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCATTTGCTCATCATTAACCTCCTGAATCACTAT', 'GGACAAGCAATGCTTGCCTTGATGTTGAACTTTTGAATAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGA' ]
    #c = [ 'GGGTCAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCATTTAGATCGGAAGAGCACAC', 'AAATGATGAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGACCCAGATCGGAAGAGCGTCG' ]
    c = [
        'GAACCAACAAGAATTGGGACAACTCCAGTGAAAGGTTCTTCTCCTTTGCTCATCATTAACCTCCTGAAGATCGGA',
        'TCAGGAGGTTAATGATGAGCAAAGGAGAAGAACCTTTCACTGGAGTTGTCCCAATTCTTGTTGGTTCAGATCGGA'
    ]
    #c = [ 'CCTACAACAAGAATTGGGACAACTCCAGTGAGAAGTTCTTCTCCTTTGCTCATCATTAAGATCGGAAGAGCACAC', 'TAATGATGAGCAAAGGAGAAGAACTTCTCACTGGAGTTGTCCCAATTCTTGTTGTAGGAGATCGGAAGAGCGTCG' ]
    #c = [ 'CTTGCAACAAGAATTGGGACAACTCCAGTGAAAAGTTCTTCTCCTTTGCTCATCTTTAACCTCCTGAATCACTAA', 'TAGTGATTCAGGAGGTTAATGATGAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGCAAGA' ]
    pair.set_from_data('x', c[0], c[1])
    spats.process_pair(pair)
    print diagram(pair, spats.run)
    if pair.has_site:
        print "{}: {} / {} {}".format(pair.target.name, pair.site, pair.end,
                                      pair.mutations)
    else:
        print "FAIL: {}".format(pair.failure)
Пример #12
0
def ligation_run():
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/Shape_Seq_ligation/"
    from spats_shape_seq import Spats
    s = Spats()
    #s.config.debug = True
    #s.run.minimum_target_match_length = 10
    #s.run.num_workers = 1
    #from spats_shape_seq.partial import PartialFindProcessor
    #s.run._processor_class = PartialFindProcessor
    s.run.skip_database = True
    s.addTargets(bp + "panel_RNAs_complete.fa")
    s.process_pair_data(bp + "data/KEW1_S1_L001_R1_001.fastq",
                        bp + "data/KEW1_S1_L001_R2_001.fastq")
Пример #13
0
def cotrans_test():
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/"
    from spats_shape_seq import Spats
    s = Spats()
    from spats_shape_seq.partial import PartialFindProcessor
    #s.run._processor_class = PartialFindProcessor
    s.addTargets(bp + "F_wt.fa")
    from spats_shape_seq.pair import Pair
    pair = Pair()
    pair.set_from_data('x', 'GAGCGTCCTTGGTGCCCGAGTCAGAAATAGACTCCT',
                       'TATCACTACTGGTAGGAGTCTATTTCTGACTCGGGC')
    s.process_pair(pair)
    print "{}: {}".format(pair.target.name, pair.site)
Пример #14
0
def d5s_writeback_run():
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/"

    from spats_shape_seq.db import PairDB
    pair_db = PairDB(bp + "dev_out/pairs.db")
    pair_db.add_targets_table(bp + "5s/5S.fa")

    from spats_shape_seq import Spats
    s = Spats()
    s.addTargets(bp + "5s/5S.fa")
    s.writeback_results = True
    s.result_set_name = "pure_python"
    s.process_pair_db(pair_db)
Пример #15
0
 def __init__(self, reads_data, cotrans = False):
     self._reads_data = reads_data
     self._pair_db = reads_data.pair_db
     s = Spats()
     s.run._p_use_tag_processor = True
     s.run.cotrans = cotrans
     s.run.writeback_results = True
     s.run.result_set_name = "tags"
     s.run.allow_indeterminate = True
     s.run.allowed_target_errors = 2
     s.run.allowed_adapter_errors = 2
     s.run.num_workers = 8
     self._spats = s
     self._extra_tag_targets = []
     self._plugins = {}
Пример #16
0
def cotrans_run():
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/"
    from spats_shape_seq import Spats
    s = Spats()
    #from spats_shape_seq.partial import PartialFindProcessor
    #s.run._processor_class = PartialFindProcessor
    s.run.skip_database = True
    #s.run.writeback_results = True
    #s.run.resume_processing = True
    #s.run.result_set_name = "lookup"
    s.addTargets(bp + "F_wt.fa")
    s.process_pair_data(bp + "data/EJS_6_F_10mM_NaF_Rep1_GCCAAT_R1.fastq",
                        bp + "data/EJS_6_F_10mM_NaF_Rep1_GCCAAT_R2.fastq")
    s.compute_profiles()
    s.write_reactivities(bp + "dev_out/rx.out")
Пример #17
0
def make_test_dataset():
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/data/"
    from spats_shape_seq import Spats
    from spats_shape_seq.db import PairDB
    pair_db = PairDB(bp + "ds.spats")
    pair_db.add_targets_table(bp + "../cotrans_single.fa")
    pair_db.parse(bp + "med_R1.fq", bp + "med_R2.fq")
    s = Spats(cotrans=True)
    s.run.num_workers = 1
    s.run.writeback_results = True
    s.run._process_all_pairs = True
    s.run.algorithm = "find_partial"
    s.run.result_set_name = "test_validation"
    s.process_pair_db(pair_db)
    pair_db.store_run(s.run)
    pair_db.store_counters('spats', s.counters)
Пример #18
0
def prof_run():
    from spats_shape_seq import Spats
    spats = Spats()
    #spats.run.cotrans = True
    #spats.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC'
    #spats.run.writeback_results = False
    spats.run._process_all_pairs = True
    spats.run.skip_database = True
    spats.run.algorithm = "lookup"
    spats.run.count_mutations = True
    spats.run.num_workers = 1

    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/pdc_muts/PDC_tweaked/PDC_09_001_6/"
    spats.addTargets(bp + "target.fa")
    spats.process_pair_data(bp + "2k_R1.fastq", bp + "2k_R2.fastq")
    exit(0)
Пример #19
0
def indels_run():
    from spats_shape_seq import Spats
    s = Spats()
    s.run.algorithm = True
    s.run.count_indels = True
    s.run.count_mutations = True
    s.run.allowed_target_errors = 8
    s.collapse_left_prefixes = True
    s.run.ignore_stops_with_mismatched_overlap = True
    s.run.allow_negative_values = True
    s.mutations_require_quality_score = 30
    bp = "/Users/steve/mos/tasks/oughxX/code"
    s.addTargets(bp + "/test/hairpin/hairpinA_circ.fa")
    rp = bp + "/TESTING/cmp_muts_favored/steve_test"
    s.process_pair_data(rp + "/R1_match_failures.fastq",
                        rp + "/R2_match_failures.fastq")
    exit(0)
Пример #20
0
def test_refactor():
    from spats_clean import Spats
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/5sq_dev/"
    out = bp + "t3/"
    s = Spats(bp + "5S.fa", out)
    s.setup()
    s.process_pair_data(
        bp +
        "data/17571-AD1AW-KEW11-5S-2p1-18x-23FEB15-GGCTAC_S10_L001_R1_001.fastq",
        bp +
        "data/17571-AD1AW-KEW11-5S-2p1-18x-23FEB15-GGCTAC_S10_L001_R2_001.fastq"
    )
    s.compute_profiles()
    s.write_reactivities()
    import subprocess
    subprocess.check_call(["diff", bp + "t2/rx.out", out + "/rx.out"])
    print "Diff OK"
Пример #21
0
def tags():
    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/"

    from spats_shape_seq.db import PairDB
    pair_db = PairDB(bp + "db/pairs.db")
    if True:
        print "Parsing to db..."
        pair_db.wipe()
        pair_db.add_targets_table(bp + "cotrans_single.fa")
        pair_db.parse(bp + "data/EJS_6_F_10mM_NaF_Rep1_GCCAAT_R1.fastq",
                      bp + "data/EJS_6_F_10mM_NaF_Rep1_GCCAAT_R2.fastq",
                      sample_size=100000)

    from spats_shape_seq import Spats
    from spats_shape_seq.tag import TagProcessor
    from spats_shape_seq.util import reverse_complement
    s = Spats()
    s.run._processor_class = TagProcessor
    s.run.writeback_results = True
    s.run.result_set_name = "tags"
    s.run.num_workers = 1
    s.run.cotrans = True
    s.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC'
    s.loadTargets(pair_db)

    s.run.allow_indeterminate = True
    s.run.allowed_target_errors = 2
    s.run.allowed_adapter_errors = 2

    p = s._processor
    for target in pair_db.targets():
        p.addTagTarget(target[0], target[1])
        p.addTagTarget(target[0] + "_rc", reverse_complement(target[1]))
    p.addTagTarget("adapter_t_rc", reverse_complement(s.run.adapter_t))
    p.addTagTarget("adapter_b", s.run.adapter_b)
    if s.run.cotrans:
        p.addTagTarget("linker_cotrans", s.run.cotrans_linker)
        p.addTagTarget("linker_cotrans_rc",
                       reverse_complement(s.run.cotrans_linker))

    s.process_pair_db(pair_db)
    rsid = pair_db.result_set_id_for_name(s.run.result_set_name)
    pair_db.count_tags(rsid)
    print pair_db.tag_counts(rsid)
Пример #22
0
class TestDatasets(unittest.TestCase):
    def test_datasets(self):
        for case in cases:
            for alg in algorithms:
                if os.environ.get('SKIP_SLOW_TESTS') and alg == 'native':
                    raise nose.SkipTest('skipping slow tests')
                self.run_dataset(case, alg)
        print("Ran {} datasets.".format(len(cases)))

    def run_dataset(self, case, algorithm):
        bp = "test/{}/".format(case)
        test_file = bp + "test.spats.tmp"
        try:
            shutil.copyfile(bp + "ds.spats", test_file)
            db = PairDB(test_file)
            s = Spats()
            db.load_run(s.run)
            if not s.run.cotrans and algorithm == "native":
                return
            s.run.writeback_results = True
            s.run.result_set_name = "test"
            s.run.algorithm = algorithm
            s.run.quiet = True
            s.loadTargets(db)
            if not s._processor.exists():
                # just ignore the native test if it's not available
                self.assertEqual("native", algorithm)
                return
            s.process_pair_db(
                db, batch_size=1024
            )  # small batch_size just to exercise multiprocessing code
            msg = None
            count = 0
            for res in db.differing_results("test", "test_validation"):
                msg = str([str(x) for x in res])
                count += 1
            self.assertEqual(
                0, count, "{} differing results: {} / {} \n{}".format(
                    count, case, algorithm, msg))
        finally:
            if os.path.exists(test_file):
                os.remove(test_file)
Пример #23
0
def tag_test():
    from spats_shape_seq import Spats
    s = Spats()
    s.run.cotrans = True
    s.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC'
    s.run.algorithm = "find_partial"

    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/"
    s.addTargets(bp + "cotrans_single.fa")

    from spats_shape_seq.pair import Pair
    pair = Pair()
    import cjb.util
    d = cjb.util.jsonAtPath("/tmp/spats_test.json")
    pair.set_from_data(str(d['id']), str(d['r1']), str(d['r2']))
    print "{}\n{} / {}".format(pair.identifier, pair.r1.original_seq,
                               pair.r2.original_seq)
    s.process_pair(pair)
    if pair.has_site:
        print "{}: {} / {}".format(pair.target.name, pair.site, pair.right)
    else:
        print "FAIL: {}".format(pair.failure)
Пример #24
0
    def _loadDBAndModel(self):

        rsnames = self._db.result_sets()
        if rsnames:
            self.result_set_id = self._db.result_set_id_for_name(rsnames[0])
            self._db.index_results()
            self.has_tags = bool(self.result_set_id)
        else:
            self.result_set_id = -1
            self.has_tags = False

        self.has_counters = self._db.has_counters()

        s = Spats()
        self._db.load_run(s.run)
        s.run._p_use_tag_processor = True
        s.loadTargets(self._db)
        if self.has_counters:
            self._db.load_counters("spats", s.counters)

        if self.has_tags:
            p = s._processor
            for t in s._targets.targets:
                p.addTagTarget(t.name, t.seq)
                p.addTagTarget(t.name + "_rc", reverse_complement(t.seq))
                self.colors._colors[t.name.lower()] = self.colors.color("target")
            p.addTagTarget("adapter_t_rc", reverse_complement(s.run.adapter_t))
            p.addTagTarget("adapter_b", s.run.adapter_b)
            if s.run.cotrans:
                p.addTagTarget("linker_cotrans", s.run.cotrans_linker)
                p.addTagTarget("linker_cotrans_rc", reverse_complement(s.run.cotrans_linker))
            if s.run._p_extra_tags:
                for tag, seq in s.run._p_extra_tags.iteritems():
                    p.addTagTarget(tag, seq)
            if not self.has_counters:
                p.counters.load_from_db_data(self._db.counter_data_for_results(self.result_set_id))

        self._spats = s
Пример #25
0
def tmut():
    from spats_shape_seq import Spats
    from spats_shape_seq.db import PairDB
    from spats_shape_seq.diagram import diagram

    bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/mutsl/"

    pair_db = PairDB(bp + "ds_cmp.spats")
    if True:
        print "Parsing to db..."
        pair_db.wipe()
        pair_db.add_targets_table(bp + "mut_single.fa")
        fq_name = "mut2"
        pair_db.parse(bp + fq_name + "_R1.fastq", bp + fq_name + "_R2.fastq")

    spatss = []
    for alg in ["find_partial", "lookup"]:
        spats = Spats(cotrans=False)
        spats.run.cotrans_linker = 'CTGACTCGGGCACCAAGGAC'
        spats.run.count_mutations = True
        spats.run.algorithm = alg
        spats.run.allowed_target_errors = 1
        spats.run.adapter_b = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG"
        spats.run._process_all_pairs = True
        spats.run.writeback_results = True
        spats.run.num_workers = 1
        spats.run.result_set_name = "mut_" + alg

        spats.process_pair_db(pair_db)
        pair_db.store_run(spats.run)
        pair_db.store_counters(spats.run.result_set_name, spats.counters)
        spatss.append(spats)

    rdiff_func(bp + "ds_cmp.spats",
               "mut_find_partial",
               "mut_lookup",
               diag_spats=spatss[0])
Пример #26
0
def diag_case():
    from spats_shape_seq import Spats
    from spats_shape_seq.pair import Pair
    from spats_shape_seq.tests.test_mut import cases
    #from spats_shape_seq.tests.test_pairs import prefix_cases as cases
    from spats_shape_seq.diagram import diagram
    #spats_config.minimum_target_match_length = 8
    spats = Spats()
    #spats.addTargets("test/5s/5s.fa")
    spats.addTargets("test/mut/mut_single.fa")
    spats.run.debug = True
    spats.run.algorithm = "find_partial"
    spats.run.count_mutations = True
    #spats.run.mutations_require_quality_score = ord('.') - ord('!')
    spats.run.allowed_target_errors = 1
    spats.run.ignore_stops_with_mismatched_overlap = True
    spats.run.adapter_b = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG"

    #spats.run.collapse_left_prefixes = True
    spats._case_errors = False

    def run_case(case):
        pair = Pair()
        pair.set_from_data(case[0], case[1], case[2])
        spats.process_pair(pair)
        print diagram(pair, spats.run)
        if case[3] != pair.site:
            spats._case_errors = True
            print "******* mismatch: {} != {}".format(case[3], pair.site)

    for case in cases:
        if case[0].startswith("*"):
            run_case(case)
    spats.run.debug = False
    if spats._case_errors:
        raise Exception("Case failed")
Пример #27
0
 def setUp(self):
     from spats_shape_seq import Spats
     self.spats = Spats()
     self.spats.run.collapse_left_prefixes = True
     self.spats.addTargets("test/5s/5s.fa")
Пример #28
0
 def setUp(self):
     from spats_shape_seq import Spats
     self.spats = Spats()
     self.spats.run.minimum_target_match_length = 10
     self.spats.addTargets("test/panel_RNAs/panel_RNAs_complete.fa")
Пример #29
0
 def setUp(self):
     from spats_shape_seq import Spats
     self.spats = Spats()
     self.spats.addTargets("test/5s/5su.fa")
Пример #30
0
 def run_algorithm(self, alg):
     from spats_shape_seq import Spats
     self.spats = Spats()
     self.spats.run.algorithm = alg
     self.spats.addTargets("test/5SrRNA/5SrRNA.fa")
     self.run_pairs()