def test_outdir(self):
        outdir = tempfile.mkdtemp(prefix='splitseq_')

        mask_sequence(self.file_missed,
                      self.file_query,
                      outdir,
                      max_evalue=0.1,
                      min_fragment_length=40)

        exp_filecontents = {
            'non_match':
            [('>NZ_GG666849.1_2_251-330 # 798 # 2885 # -1 # ID=1_2;parti'
              'al=00;start_type=TTG;rbs_motif=AGxAGG/AGGxGG;rbs_spacer=5-10bp'
              ';gc_cont=0.499\n'),
             ('IGIQGDTYSEDEDYPELPRTANGRLSSYILVNHKEQVHVYNQIATKLGLQKESGEVVMLPSQ'
              'FINRFSLRNEHGRGIPDQ\n')],
            'match': []
        }
        obs_filecontents = dict()
        for type_ in ('match', 'non_match'):
            filename = outdir + '.' + type_
            f = open(filename, 'r')
            obs_filecontents[type_] = f.readlines()
            f.close()
            os.remove(filename)
        shutil.rmtree(outdir)

        self.assertDictEqual(obs_filecontents, exp_filecontents)
    def test_outdir(self):
        outdir = tempfile.mkdtemp(prefix='splitseq_')

        mask_sequence(self.file_missed,
                      self.file_query,
                      outdir,
                      max_evalue=0.1,
                      min_fragment_length=40)

        exp_filecontents = {
            'non_match':
            [('>NZ_GG666849.1_2_251-330 # 798 # 2885 # -1 # ID=1_2;parti'
              'al=00;start_type=TTG;rbs_motif=AGxAGG/AGGxGG;rbs_spacer=5-10bp'
              ';gc_cont=0.499\n'),
             ('IGIQGDTYSEDEDYPELPRTANGRLSSYILVNHKEQVHVYNQIATKLGLQKESGEVVMLPSQ'
              'FINRFSLRNEHGRGIPDQ\n')],
            'match': []}
        obs_filecontents = dict()
        for type_ in ('match', 'non_match'):
            filename = outdir+'.'+type_
            f = open(filename, 'r')
            obs_filecontents[type_] = f.readlines()
            f.close()
            os.remove(filename)
        shutil.rmtree(outdir)

        self.assertDictEqual(obs_filecontents, exp_filecontents)
    def test_split_search_parseerror_2(self):
        mask_sequence(self.file_hhsearch2,
                      self.file_fasta2,
                      min_prob=95.0,
                      min_fragment_length=40)

        parse_pdb_match(self.file_hhsearch2)
    def test_split_search_parseerror_2(self):
        mask_sequence(self.file_hhsearch2,
                      self.file_fasta2,
                      min_prob=95.0,
                      min_fragment_length=40)

        parse_pdb_match(self.file_hhsearch2)
    def test_mask_sequence(self):
        obs_res = mask_sequence(self.file_missed,
                                self.file_query,
                                None,
                                max_evalue=0.1,
                                min_fragment_length=40)

        # check that hit selection works correct
        self.assertEqual(obs_res, self.exp_hits)
    def test_mask_sequence(self):
        obs_res = mask_sequence(self.file_missed,
                                self.file_query,
                                None,
                                max_evalue=0.1,
                                min_fragment_length=40)

        # check that hit selection works correct
        self.assertEqual(obs_res, self.exp_hits)
    def test_level2(self):
        obs = mask_sequence(self.file_out2, self.file_fasta2, max_evalue=0.1,
                            min_fragment_length=40)

        positions = []
        for type_ in sorted(obs.keys()):
            for header in sorted(obs[type_]):
                start, stop = \
                    header[0].split(' # ')[0].split('_')[-1].split('-')
                positions.append((type_, start, stop))

        self.assertEqual(positions, self.pos2)
Пример #8
0
 def test_pretty_output(self):
     pretty_fp = get_data_path('test_split_search/NC_000913.3_2.pretty')
     with open(pretty_fp, 'r') as f:
         pretty = f.read()
     mask_obs = mask_sequence(self.file_a,
                              self.file_query,
                              min_fragment_length=5)
     with captured_output() as (out, err):
         pretty_output(mask_obs)
     output = out.getvalue()
     self.maxDiff = None
     self.assertEqual(output, pretty)
Пример #9
0
    def test_mask_sequence_information(self):
        seq = ('MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDALP'
               'NISDAERIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINAALIC'
               'RGEKMSIAIMAGVLEARGHNVTVIDPVEKLLAVGHYLESTVDIAESTRRIAASRIPADHMVL'
               'MAGFTAGNEKGELVVLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKS'
               'MSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRDEDELPVKGIS'
               'NLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLITQSSSEYSISFCVPQSDCVRAER'
               'AMQEEFYLELKEGLLEPLAVTERLAIISVVGDGMRTLRGISAKFFAALARANINIVAIAQGS'
               'SERSISVVVNNDDATTGVRVTHQMLFN')
        header = ('gi|556503834|ref|NC_000913.3|_2_1-461 # 2j0w_A # 337 # 2799'
                  ' # 1 # ID=1_2;partial=00;start_type=ATG;rbs_motif=GGAG/GAGG'
                  ';rbs_spacer=5-10bp;gc_cont=0.531')

        exp = (header, seq)
        obs = mask_sequence(self.file_a,
                            self.file_query,
                            min_fragment_length=450)
        self.assertEqual(obs['match'][0], exp)

        filename = '/tmp/test.mfa'
        mask_sequence(self.file_a,
                      self.file_query,
                      filename,
                      min_fragment_length=450)

        f = open(filename + '.match', 'r')
        obs = f.readlines()
        f.close()
        os.remove(filename + '.match')
        self.assertIn(seq + "\n", obs)
        self.assertIn(">" + header + "\n", obs)

        f = open(filename + '.non_match', 'r')
        obs = f.readlines()
        f.close()
        os.remove(filename + '.non_match')
        self.assertFalse(obs)

        with self.assertRaises(IOError):
            mask_sequence(self.file_a, self.file_query, '/dev')
    def test_mask_sequence(self):
        match_header_2 = (
            'gi|556503834|ref|NC_000913.3|_2_464-815 # 1ebf_A # 337 # 2799 # '
            '1 # ID=1_2;partial=00;start_type=ATG;rbs_motif=GGAG/GAGG;rbs_spa'
            'cer=5-10bp;gc_cont=0.531')

        # test default behaviour
        obs = mask_sequence(self.fp_out, self.fp_seqs)
        self.assertEqual([m[0] for m in obs['match']], [
            ('gi|556503834|ref|NC_000913.3|_2_1-461 # 2j0w_A # 337 # 2799 # 1 '
             '# ID=1_2;partial=00;start_type=ATG;rbs_motif=GGAG/GAGG;rbs_space'
             'r=5-10bp;gc_cont=0.531'), match_header_2
        ])

        # restrict hits to satisfy at least 33% sequence identifty
        # --> first match will differ from default above
        obs = mask_sequence(self.fp_out, self.fp_seqs, min_identity=0.33)
        self.assertEqual([m[0] for m in obs['match']], [
            ('gi|556503834|ref|NC_000913.3|_2_1-462 # 3c1m_A # 337 # 2799 # 1 '
             '# ID=1_2;partial=00;start_type=ATG;rbs_motif=GGAG/GAGG;rbs_space'
             'r=5-10bp;gc_cont=0.531'), match_header_2
        ])
Пример #11
0
    def test_level2(self):
        obs = mask_sequence(self.file_out2,
                            self.file_fasta2,
                            max_evalue=0.1,
                            min_fragment_length=40)

        positions = []
        for type_ in sorted(obs.keys()):
            for header in sorted(obs[type_]):
                start, stop = \
                    header[0].split(' # ')[0].split('_')[-1].split('-')
                positions.append((type_, start, stop))

        self.assertEqual(positions, self.pos2)
    def test_mask_sequence(self):
        obs = mask_sequence(self.fp_out,
                            self.fp_seqs,
                            subsequences_fp='kurt_',
                            min_prob=95.0,
                            max_evalue=0.1,
                            min_fragment_length=40)

        self.assertEqual(
            obs['match'][0][1],
            ('TMEELLTSLQKKCGTECEEAHRQLVCALNGLAGIHIIKGEYALAAELYREVLRSSEEHKGKLK'
             'TDSLQRLHATHNLMELLIARHPGIPPTLRDGRLEEEAKQLREHYMSKCNTEVAEAQQALYPVQ'
             'QTIHELQRKIHSNSPWWLNVIHRAIEFTIDEELVQRVRNEITSNYKQQTGKLSMSEKFRDCRG'
             'LQFLLTTQMEELNKCQKLVREAVKNLEGPPSRNVIESATVCHLRPARLPLNCCVFCKADELFT'
             'EYESKLFSNTVKGQTAIFEEMIEDEEGLVDDRAPTTTRGLWAISETERSMKAILSFAKSHRFD'
             'VEFVDEGSTSMDLFEAWKKEYKLLHEYWMALRNRVSAVDELAMATERLRVRDPREPKPNPPVL'
             'HIIEPHEVEQNRIKLLNDKAVATSQLQKKLGQLLYLTNLEK'))

        exp_0 = {
            'Probab':
            100.0,
            'Template_Neff':
            8.5,
            'P-value':
            2.8e-85,
            'Similarity':
            1.445,
            'Sum_probs':
            363.5,
            'Score':
            555.49,
            'Cols':
            419,
            'No':
            1,
            'Identities':
            1.0,
            'SS':
            0.0,
            'alignment': {
                'Q T0831': {
                    'start':
                    1,
                    'end':
                    419,
                    'sequence':
                    ('TMEELLTSLQKKCGTECEEAHRQLVCALNGLAGIHIIKGEYALAAELYREVLRSSE'
                     'EHKGKLKTDSLQRLHATHNLMELLIARHPGIPPTLRDGRLEEEAKQLREHYMSKCN'
                     'TEVAEAQQALYPVQQTIHELQRKIHSNSPWWLNVIHRAIEFTIDEELVQRVRNEIT'
                     'SNYKQQTGKLSMSEKFRDCRGLQFLLTTQMEELNKCQKLVREAVKNLEGPPSRNVI'
                     'ESATVCHLRPARLPLNCCVFCKADELFTEYESKLFSNTVKGQTAIFEEMIEDEEGL'
                     'VDDRAPTTTRGLWAISETERSMKAILSFAKSHRFDVEFVDEGSTSMDLFEAWKKEY'
                     'KLLHEYWMALRNRVSAVDELAMATERLRVRDPREPKPNPPVLHIIEPHEVEQNRIK'
                     'LLNDKAVATSQLQKKLGQLLYLTNLEK'),
                    'totallen':
                    419
                },
                'Q Consensus': {
                    'start':
                    1,
                    'end':
                    419,
                    'sequence':
                    ('tmeelltslqkkcgteceeahrqlvcalnglagihiikgeyalaaelyrevlrsse'
                     'ehkgklktdslqrlhathnlmelliarhpgipptlrdgrleeeakqlrehymskcn'
                     'tevaeaqqalypvqqtihelqrkihsnspwwlnvihraieftideelvqrvrneit'
                     'snykqqtgklsmsekfrdcrglqfllttqmeelnkcqklvreavknlegppsrnvi'
                     'esatvchlrparlplnccvfckadelfteyesklfsntvkgqtaifeemiedeegl'
                     'vddraptttrglwaisetersmkailsfakshrfdvefvdegstsmdlfeawkkey'
                     'kllheywmalrnrvsavdelamaterlrvrdprepkpnppvlhiiepheveqnrik'
                     'llndkavatsqlqkklgqllyltnlek'),
                    'totallen':
                    419
                },
                'column score': {
                    'sequence':
                    ('||+|++..|-++|-+|||+++|++|.++|||||||||+|+|..|+++||+||+..+'
                     '++++++++|+||++|+.|||.+++...+||+||+++|..+.+++.+++..|++++.'
                     '..+..|++.+.++.+.+++++.+.++.++||+.+++.+++..++..++++|+++++'
                     '.+|.+..|..++..+|++.+||.+.+++.+++|.++.+-+.+++++|++||..+++'
                     '+++..||++|.+-+...|.+|++++.|..||+.||+.+.+|.+..+++++++++|.'
                     '.++.....++|.|+.|+.|+.+|.|++|++++.|+.+++.+|..-++++++|||||'
                     '+.++.+|++.+..++|.|||.|++-|+|.++|.++.|+||..++|.|+++++.+.+'
                     '+.+++.++...|++++|||.||.||.|')
                },
                'T Consensus': {
                    'start':
                    2,
                    'end':
                    420,
                    'sequence':
                    ('tmeell~~Li~k~~~eceea~R~~v~~~NgLAgl~~l~~~~~~A~~~YrevL~~~~'
                     '~~~~~~~~D~Lq~iH~l~NL~~~l~~~~~~~~~~~~~~~l~~~~~~l~~~Yl~~~~'
                     '~~~~~a~~~~~~~~~~~~~~~~~~~~~~~Ww~~~l~~~~~~~~~~~l~~~i~~~l~'
                     '~~~~~~~~~~~~~~~~~s~~gL~~~l~~~l~~L~~~R~~l~~~l~~L~~~~~~~~v'
                     '~~~~~Ch~~~~~~~~~~C~~C~~~~~~~~yE~~Lf~~~~~~~~~~~~~~~~~~~~~'
                     '~~~~~~~~~~g~~~~S~~e~~lk~i~~~~r~~~~~~~~~~~~~~hl~~le~~rkEf'
                     '~~~r~lw~~~~~~l~a~DEL~ma~~Rlrl~~~~e~~~~~~~~~~i~~~ev~~~~~~'
                     '~~~e~~~a~~~l~r~~gqLrYL~nL~k'),
                    'totallen':
                    420
                },
                'T 4QN1_A': {
                    'start':
                    2,
                    'end':
                    420,
                    'sequence':
                    ('TMEELLTSLQKKCGTECEEAHRQLVCALNGLAGIHIIKGEYALAAELYREVLRSSE'
                     'EHKGKLKTDSLQRLHATHNLMELLIARHPGIPPTLRDGRLEEEAKQLREHYMSKCN'
                     'TEVAEAQQALYPVQQTIHELQRKIHSNSPWWLNVIHRAIEFTIDEELVQRVRNEIT'
                     'SNYKQQTGKLSMSEKFRDCRGLQFLLTTQMEELNKCQKLVREAVKNLEGPPSRNVI'
                     'ESATVCHLRPARLPLNCCVFCKADELFTEYESKLFSNTVKGQTAIFEEMIEDEEGL'
                     'VDDRAPTTTRGLWAISETERSMKAILSFAKSHRFDVEFVDEGSTSMDLFEAWKKEY'
                     'KLLHEYWMALRNRVSAVDELAMATERLRVRDPREPKPNPPVLHIIEPHEVEQNRIK'
                     'LLNDKAVATSQLQKKLGQLLYLTNLEK'),
                    'totallen':
                    420
                },
                'T ss_dssp': {
                    'sequence':
                    ('HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHTCHHHHHHHHHHHHHHHH'
                     'HTTTTCCCCHHHHHHHHHHHHHCCCCCTTSSCCCTTTTTHHHHHHHHHHHHHHHHH'
                     'HHHHHHHHTTHHHHHHHHHHHHSSCSSSCHHHHHHHHHHHTTCHHHHHHHHHHHHC'
                     'CC----------GGGCSSHHHHHHHHHHHHHHHHHHHHHHHHHHHTTCSSCCHHHH'
                     'HHHCCCCCSCSSSCCCCSHHHHHHHHHHHHHHHHBCCC------------------'
                     '-----------CCSBCHHHHHHHHHHHHHHHTTCCHHHHHHHHHHHHHHHHHHHHH'
                     'HHHHHHHHHHHHHHHHHHHHHHHHCCCEECCC---------CCEECTTCHHHHHHH'
                     'HHHHHHHHHHHHHHHHHHHHHHHTTCC')
                },
                'T ss_pred': {
                    'sequence':
                    ('CHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHhCCHHHHHHHHHHHHHHHH'
                     'HhhcCCccchHHHHHHHhhHHHHHHhcCCCCCCCcchhHHHHHHHHHHHHHHHHHH'
                     'HHHHHHHHHHHHHHHHHHHHHHhhccCCcHHHHHHHHHHHCCCcHHHHHHHHHHHH'
                     'hhcccccCCcccccccccHHHHHHHHHHHHHHHHHHHHHHHHHHHhhcCCCcHHHH'
                     'HHhhcCCCCCCCCCCCCCCccccHHHHHHHHHHHhhcccCCCccchHhhhhccccc'
                     'cccCCCcccCCcccccHHHHHHHHHHHHHHhcCCCHHHHHHHHHHHHHHHHHHHHH'
                     'HHHHHHHHHHHHHHHHHHHHHHchhhheeCCCCCCCCCCCcccccCHHHHHHHHHH'
                     'HHHHHHHHHHHHHHHHHHHHHHhcccC')
                },
                'Confidence': {
                    'sequence':
                    ('79999999999999999999999999999999999999999999999999999999'
                     '99999999999999999999999999999999999999999999999999999999'
                     '99999999999999999999988888999999999998877899999999999999'
                     '99987777778999999999999999999999999999999999999999999999'
                     '99999999999877789999999999999999999999999999999999999999'
                     '99888889999999999999999999999999999999999999999999999999'
                     '99999999999999999999999999999999999999999999999999999999'
                     '999999999999999999999999976')
                },
            },
            'Aligned_cols':
            419,
            'E-value':
            1.6e-80,
            'Hit': ('4QN1_A E3 ubiquitin-protein ligase SHPRH; SHPRH, E3 ligas'
                    'e, RING, Ubiquitin; 2.48A {H**o sapiens}')
        }
        obs = parse_pdb_match(self.fp_out)
        for k in obs[0].keys():
            if type(obs[0][k]) == dict():
                self.assertCountEqual(obs[0][k], exp_0[k])
            else:
                self.assertEqual(obs[0][k], exp_0[k])

        with open(get_data_path('test_split_search/T0831_block0.out'),
                  'r') as f:
            block = "".join(f.readlines())
        obs = _parse_hit_block(block)
        for k in obs.keys():
            if type(obs[k]) == dict():
                self.assertCountEqual(obs[k], exp_0[k])
            else:
                self.assertEqual(obs[k], exp_0[k])
Пример #13
0
    def test_mask_sequence_filtering(self):
        s1 = (('gi|556503834|ref|NC_000913.3|_2_1-461 # 2j0w_A # 337 # 2799 # '
               '1 # ID=1_2;partial=00;start_type=ATG;rbs_motif=GGAG/GAGG;rbs_s'
               'pacer=5-10bp;gc_cont=0.531'),
              ('MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDALP'
               'NISDAERIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINAALIC'
               'RGEKMSIAIMAGVLEARGHNVTVIDPVEKLLAVGHYLESTVDIAESTRRIAASRIPADHMVL'
               'MAGFTAGNEKGELVVLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKS'
               'MSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRDEDELPVKGIS'
               'NLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLITQSSSEYSISFCVPQSDCVRAER'
               'AMQEEFYLELKEGLLEPLAVTERLAIISVVGDGMRTLRGISAKFFAALARANINIVAIAQGS'
               'SERSISVVVNNDDATTGVRVTHQMLFN'))
        s2 = (('gi|556503834|ref|NC_000913.3|_2_462-463 # 337 # 2799 # 1 # ID='
               '1_2;partial=00;start_type=ATG;rbs_motif=GGAG/GAGG;rbs_spacer='
               '5-10bp;gc_cont=0.531'), 'TD')
        s3 = (('gi|556503834|ref|NC_000913.3|_2_464-815 # 1ebf_A # 337 # 2799 '
               '# 1 # ID=1_2;partial=00;start_type=ATG;rbs_motif=GGAG/GAGG;rbs'
               '_spacer=5-10bp;gc_cont=0.531'),
              ('QVIEVFVIGVGGVGGALLEQLKRQQSWLKNKHIDLRVCGVANSKALLTNVHGLNLENWQEEL'
               'AQAKEPFNLGRLIRLVKEYHLLNPVIVDCTSSQAVADQYADFLREGFHVVTPNKKANTSSMD'
               'YYHQLRYAAEKSRRKFLYDTNVGAGLPVIENLQNLLNAGDELMKFSGILSGSLSYIFGKLDE'
               'GMSFSEATTLAREMGYTEPDPRDDLSGMDVARKLLILARETGRELELADIEIEPVLPAEFNA'
               'EGDVAAFMANLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDPLFKVKN'
               'GENALAFYSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLS'))
        s4 = (('gi|556503834|ref|NC_000913.3|_2_816-820 # 337 # 2799 # 1 # ID='
               '1_2;partial=00;start_type=ATG;rbs_motif=GGAG/GAGG;rbs_spacer='
               '5-10bp;gc_cont=0.531', 'WKLGV'))
        s5 = (('gi|556503834|ref|NC_000913.3|_2_462-820 # 337 # 2799 # 1 # ID'
               '=1_2;partial=00;start_type=ATG;rbs_motif=GGAG/GAGG;rbs_spacer='
               '5-10bp;gc_cont=0.531'),
              ('TDQVIEVFVIGVGGVGGALLEQLKRQQSWLKNKHIDLRVCGVANSKALLTNVHGLNLENWQE'
               'ELAQAKEPFNLGRLIRLVKEYHLLNPVIVDCTSSQAVADQYADFLREGFHVVTPNKKANTSS'
               'MDYYHQLRYAAEKSRRKFLYDTNVGAGLPVIENLQNLLNAGDELMKFSGILSGSLSYIFGKL'
               'DEGMSFSEATTLAREMGYTEPDPRDDLSGMDVARKLLILARETGRELELADIEIEPVLPAEF'
               'NAEGDVAAFMANLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDPLFKV'
               'KNGENALAFYSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLSWKLGV'))
        s6 = (('gi|556503834|ref|NC_000913.3|_2_1-820 # 337 # 2799 # 1 # ID='
               '1_2;partial=00;start_type=ATG;rbs_motif=GGAG/GAGG;rbs_spacer='
               '5-10bp;gc_cont=0.531'), self.query)

        obs = mask_sequence(self.file_a, self.file_query, None)
        exp = {'match': [s1, s3], 'non_match': [s2, s4]}
        self.assertEqual(obs, exp)

        obs = mask_sequence(self.file_a, self.file_query, min_prob=100.0)
        self.assertEqual(obs, {'match': [s1, s3], 'non_match': [s2, s4]})

        obs = mask_sequence(self.file_a, self.file_query, max_evalue=4.1e-58)
        self.assertEqual(obs, {'match': [s1], 'non_match': [s5]})

        obs = mask_sequence(self.file_a, self.file_query, max_pvalue=1e-58)
        self.assertEqual(obs, {'match': [s1], 'non_match': [s5]})

        obs = mask_sequence(self.file_a,
                            self.file_query,
                            min_fragment_length=500)
        self.assertEqual(obs, {'match': [], 'non_match': [s6]})

        obs = mask_sequence(self.file_a,
                            self.file_query,
                            min_prob=99.0,
                            max_evalue=4.90e-41,
                            max_pvalue=0.00011,
                            min_fragment_length=200)
        self.assertEqual(obs, {'match': [s1, s3], 'non_match': []})

        obs = mask_sequence(self.file_a,
                            self.file_query,
                            min_prob=99.0,
                            max_evalue=4.90e-41,
                            max_pvalue=0.00011,
                            min_fragment_length=4)
        self.assertEqual(obs, {'match': [s1, s3], 'non_match': [s4]})