def test__03_prediction_06(self): print('test pred 6') expected = '06B' refs_dir = os.path.join(data_dir,'serotype_object') cd_cluster = os.path.join(data_dir,'test_serotype_2_cluster.tsv') fw_read = os.path.join(data_dir,'06B_1.fq.gz') bw_read = os.path.join(data_dir,'06B_2.fq.gz') prefix = os.path.join(data_dir,'06B_1') cluster = 'cluster' assemblie_file = os.path.join(prefix,'assemblies.fa') s = serotyping.Serotyping(refs_dir,fw_read, bw_read, prefix,'--noclean') s.cluster_serotype_dict = { 'cluster' : ['06A','06B','06C','06D','06E'], 'NT' : ['NT'], 'cluster_1' : ['07A','07F'] } s.best_serotype = '06A' s.cluster_count = { 'cluster': 5, 'NT': 1, 'cluster_1': 2 } s._prediction(assemblie_file,cluster) self.assertEqual(expected,s.sero) shutil.rmtree(prefix)
def test_06F(self): expected = '06F' refs_dir = os.path.join(data_dir,'serotype_object') cd_cluster = os.path.join(data_dir,'test_serotype_2_cluster.tsv') fw_read = os.path.join(data_dir,'09V','09V_1.fq.gz') bw_read = os.path.join(data_dir,'09V','09V_2.fq.gz') prefix = os.path.join(data_dir,'06F') kmer_size = '51' cluster = 'cluster' assemblie_file = os.path.join(data_dir,'06F','assemblies.fa') s = serotyping.Serotyping(refs_dir, fw_read, bw_read, prefix,'--noclean') s.cluster_serotype_dict = { 'cluster' : ['06A','06B','06C','06D','06E','06F','06G'], 'NT' : ['NT'], 'cluster_1' : ['07A','07F'], 'cluster_2':['11A','11B','11C','11D','11F'] } s.best_serotype = '06A' s.cluster_count = { 'cluster': 7, 'NT': 1, 'cluster_1': 2, 'cluster_2': 3 } s._prediction(assemblie_file,cluster) self.assertEqual(expected,s.sero)
def test_07C(self): expected = '07C' refs_dir = os.path.join(data_dir,'serotype_object') cd_cluster = os.path.join(data_dir,'test_serotype_2_cluster.tsv') fw_read = os.path.join(data_dir,'09V','09V_1.fq.gz') bw_read = os.path.join(data_dir,'09V','09V_2.fq.gz') prefix = os.path.join(data_dir,'ERR1439287') kmer_size = '51' cluster = 'cluster_1' assemblie_file = os.path.join(data_dir,'ERR1439287','assemblies.fa') s = serotyping.Serotyping(refs_dir, fw_read, bw_read, prefix,'--noclean') s.cluster_serotype_dict = { 'cluster' : ['06A','06B','06C','06D','06E'], 'NT' : ['NT'], 'cluster_1' : ['07B','07C','40'], 'cluster_2':['33A','33F','37'] } s.best_serotype = '07C' s.cluster_count = { 'cluster': 5, 'NT': 1, 'cluster_1': 3, 'cluster_2': 3 } s._prediction(assemblie_file,cluster) self.assertEqual(expected,s.sero)
def test_mixed_sample_15B_C(self): expected = '15B/15C' refs_dir = os.path.join(data_dir,'serotype_object') cd_cluster = os.path.join(data_dir,'test_serotype_2_cluster.tsv') fw_read = os.path.join(data_dir,'15B_C','15B_C_1.fq') bw_read = os.path.join(data_dir,'15B_C','15B_C_2.fq') prefix = os.path.join(data_dir,'15B_C') cluster = 'cluster_3' assemblie_file = os.path.join(data_dir,'15B_C','assemblies.fa') s = serotyping.Serotyping(refs_dir, fw_read, bw_read, prefix,'--noclean') s.cluster_serotype_dict = { 'cluster' : ['06A','06B','06C','06D','06E'], 'NT' : ['NT'], 'cluster_1' : ['07A','07F'], 'cluster_2':['10A','10B'], 'cluster_3':['15A','15B','15C','15F'] } s.best_serotype = '15C' s.cluster_count = { 'cluster': 5, 'NT': 1, 'cluster_1': 2, 'cluster_2': 2, 'cluster_3': 4 } s._prediction(assemblie_file,cluster) self.assertEqual(expected,s.sero)
def full_run_09V(self): expected = '09V' refs_dir = os.path.join(data_dir,'serotype_object') fw_read = os.path.join(data_dir,'09V','09V_1.fq.gz') bw_read = os.path.join(data_dir,'09V','09V_2.fq.gz') prefix = os.path.join('09V') s = serotyping.Serotyping(refs_dir, fw_read, bw_read, prefix,'--noclean') s.run() self.assertEqual(expected,s.sero)
def test_run_kmc(self): expected = '06A' refs_dir = os.path.join(data_dir,'serotype_object') cd_cluster = os.path.join(data_dir,'test_serotype_2_cluster.tsv') fw_read = os.path.join(data_dir,'06B_1.fq.gz') bw_read = os.path.join(data_dir,'06B_2.fq.gz') meta_data = os.path.join(data_dir,'expected_pneumocat_meta.tsv') prefix = '06B_1' kmer_size = '51' kmer_db = 'kmer_db' ariba_cluster_db = 'ariba_db' reference_fasta =os.path.join(data_dir,'serotype_object','reference.fasta') s = serotyping.Serotyping( refs_dir,fw_read, bw_read, prefix,'--noclean') s._run_kmc() self.assertEqual(expected,s.best_serotype)
def test_01_run_ariba_on_cluster(self): print('test ariba') refs_dir = os.path.join(data_dir,'serotype_object') cd_cluster = os.path.join(data_dir,'test_serotype_2_cluster.tsv') fw_read = os.path.join(data_dir,'06B_1.fq.gz') bw_read = os.path.join(data_dir,'06B_2.fq.gz') meta_data = os.path.join(data_dir,'expected_pneumocat_meta.tsv') prefix = os.path.join(data_dir,'06B_1') reference_fasta =os.path.join(data_dir,'serotype_object','reference.fasta') cluster = 'cluster' s = serotyping.Serotyping( refs_dir,fw_read, bw_read, prefix,'--noclean') s.cluster_serotype_dict = { 'cluster' : ['06A','06B','06C','06D','06E'], 'NT' : ['NT'], 'cluster_1' : ['07A','07F'] } s._run_ariba_on_cluster(cluster)
def run(options): if (options.read1 == options.read2): print( 'Same file provided for forwards and reverse reads. Cannot continue', file=sys.stderr) sys.exit(1) if (options.read1.rsplit('_', 1)[0] != options.read2.rsplit('_', 1)[0]): print( 'Names for forwards and reverse reads does not match. Cannot continue', file=sys.stderr) sys.exit(1) sero = serotyping.Serotyping(options.databases, options.read1, options.read2, options.prefix, clean=(not options.noclean)) cov = options.coverage sero.run()