def test_MrAIC(self): """Tests that creating genetrees with the --mraic command set calculates the correct evolutionary models. """ test_data = open('alignments/3.oneliners','rU') mr_job = ProcessPhyloData(['-r', 'local', '--setup-cmd', 'mkdir -p tmp','--gene-trees', '--mraic', '--archive=../gzips/osx.phylo.tar.gz#bin', "-"]) mr_job.sandbox(stdin=test_data) results = [] with mr_job.make_runner() as runner: runner.run() for line in runner.stream_output(): # Use the job's specified protocol to read the output key, value = mr_job.parse_output_line(line) results.append(value) # get models from output models = [ item.split(" ")[1].strip("'").split('=')[-1] for item in results] # check results. # result looks like: ['GTR', 'HKY', 'HKY']) self.assertIn('GTR', models) self.assertIn('HKY', models) self.assertEqual(len(results),3)
def test_Bootstrapping(self): """Tests that bootstraps produces the correct number or replicates""" test_data = open('alignments/3.oneliners','rU') mr_job = ProcessPhyloData(['-r', 'local', '--setup-cmd', 'mkdir -p tmp', '--full-analysis', '--bootreps=5', '--archive=../gzips/osx.phylo.tar.gz#bin', "-"]) mr_job.sandbox(stdin=test_data) results = [] with mr_job.make_runner() as runner: runner.run() for line in runner.stream_output(): # Use the job's specified protocol to read the output key, value = mr_job.parse_output_line(line) results.append(value) self.assertEqual(len(results), 15)
def test_MrAIC(self): """Tests that creating genetrees with the --mraic command set calculates the correct evolutionary models. """ test_data = open('alignments/3.oneliners', 'rU') mr_job = ProcessPhyloData([ '-r', 'local', '--setup-cmd', 'mkdir -p tmp', '--gene-trees', '--mraic', '--archive=../gzips/osx.phylo.tar.gz#bin', "-" ]) mr_job.sandbox(stdin=test_data) results = [] with mr_job.make_runner() as runner: runner.run() for line in runner.stream_output(): # Use the job's specified protocol to read the output key, value = mr_job.parse_output_line(line) results.append(value) # get models from output models = [ item.split(" ")[1].strip("'").split('=')[-1] for item in results ] # check results. # result looks like: ['GTR', 'HKY', 'HKY']) self.assertIn('GTR', models) self.assertIn('HKY', models) self.assertEqual(len(results), 3)
def test_GeneTrees(self): """Tests that creating genetrees with Cloudforest makes the appropriate number of trees. """ test_data = open('alignments/3.oneliners', 'rU') mr_job = ProcessPhyloData([ '-r', 'local', '--setup-cmd', 'mkdir -p tmp', '--gene-trees', '--archive=../gzips/osx.phylo.tar.gz#bin', "-"] ) mr_job.sandbox(stdin=test_data) results = [] with mr_job.make_runner() as runner: runner.run() for line in runner.stream_output(): # Use the job's specified protocol to read the output key, value = mr_job.parse_output_line(line) results.append(value) self.assertEqual(len(results), 3)
def test_Bootstrapping(self): """Tests that bootstraps produces the correct number or replicates""" test_data = open('alignments/3.oneliners', 'rU') mr_job = ProcessPhyloData([ '-r', 'local', '--setup-cmd', 'mkdir -p tmp', '--full-analysis', '--bootreps=5', '--archive=../gzips/osx.phylo.tar.gz#bin', "-" ]) mr_job.sandbox(stdin=test_data) results = [] with mr_job.make_runner() as runner: runner.run() for line in runner.stream_output(): # Use the job's specified protocol to read the output key, value = mr_job.parse_output_line(line) results.append(value) self.assertEqual(len(results), 15)
def test_GeneTrees(self): """Tests that creating genetrees with Cloudforest makes the appropriate number of trees. """ test_data = open('alignments/3.oneliners', 'rU') mr_job = ProcessPhyloData([ '-r', 'local', '--setup-cmd', 'mkdir -p tmp', '--gene-trees', '--archive=../gzips/osx.phylo.tar.gz#bin', "-" ]) mr_job.sandbox(stdin=test_data) results = [] with mr_job.make_runner() as runner: runner.run() for line in runner.stream_output(): # Use the job's specified protocol to read the output key, value = mr_job.parse_output_line(line) results.append(value) self.assertEqual(len(results), 3)
def test_get_genetrees(self): """[MrJob] Generate genetrees""" test_data = open('alignments/3.oneliners', 'rU') mr_job = ProcessPhyloData([ '-r', 'local', '--setup-cmd', 'mkdir -p tmp', '--gene-trees', '--archive=../gzips/osx.phylo.tar.gz#bin', "-"] ) mr_job.sandbox(stdin=test_data) results = [] with mr_job.make_runner() as runner: runner.run() for line in runner.stream_output(): # Use the job's specified protocol to read the output key, value = mr_job.parse_output_line(line) results.append(value) #TODO:better checking of tree structure here - do distance checking self.assertEqual(len(results), 3)
def test_MrAIC(self): """[MrJob] Select model and generate genetrees""" test_data = open('alignments/3.oneliners', 'rU') mr_job = ProcessPhyloData([ '-r', 'local', '--setup-cmd', 'mkdir -p tmp', '--gene-trees', '--mraic', '--archive=../gzips/osx.phylo.tar.gz#bin', '-'] ) mr_job.sandbox(stdin=test_data) results = [] with mr_job.make_runner() as runner: runner.run() for line in runner.stream_output(): # Use the job's specified protocol to read the output key, value = mr_job.parse_output_line(line) results.append(value) # check models expected_models = ['HKY', 'GTR', 'HKY'] observed_models = self.get_models_from_mrjob_out(results) assert observed_models == expected_models