def setUp(self): # Temporary input file self.tmp_otu_filepath = get_tmp_filename( prefix='R_test_otu_table_', suffix='.txt' ) seq_file = open(self.tmp_otu_filepath, 'w') seq_file.write(test_otu_table) seq_file.close() self.tmp_map_filepath = get_tmp_filename( prefix='R_test_map_', suffix='.txt' ) seq_file = open(self.tmp_map_filepath, 'w') seq_file.write(test_map) seq_file.close() self.files_to_remove = \ [self.tmp_otu_filepath, self.tmp_map_filepath] # Prep input files in R format output_dir = mkdtemp() self.dirs_to_remove = [output_dir] # get random forests results mkdir(join(output_dir, 'random_forest')) self.results = run_supervised_learning( self.tmp_otu_filepath, self.tmp_map_filepath,'Individual', ntree=100, errortype='oob', output_dir=output_dir)
def setUp(self): self.files_to_remove = [] self.dirs_to_remove = [] # Create example output directory tmp_dir = get_qiime_temp_dir() self.test_out = get_tmp_filename(tmp_dir=tmp_dir, prefix='qiime_parallel_tests_', suffix='', result_constructor=str) self.dirs_to_remove.append(self.test_out) create_dir(self.test_out) # Create example input file self.inseqs1_fp = get_tmp_filename(tmp_dir=self.test_out, prefix='qiime_inseqs', suffix='.fasta') inseqs1_f = open(self.inseqs1_fp,'w') inseqs1_f.write(inseqs1) inseqs1_f.close() self.files_to_remove.append(self.inseqs1_fp) # Define number of seconds a test can run for before timing out # and failing initiate_timeout(60)
def setUp(self): # Temporary input file self.tmp_pc_fp = get_tmp_filename( prefix='R_test_pcoa', suffix='.txt' ) seq_file = open(self.tmp_pc_fp, 'w') seq_file.write(test_pc) seq_file.close() self.tmp_map_fp = get_tmp_filename( prefix='R_test_map_', suffix='.txt' ) map_file = open(self.tmp_map_fp, 'w') map_file.write(test_map) map_file.close() self.files_to_remove = \ [self.tmp_pc_fp, self.tmp_map_fp] # Prep input files in R format self.output_dir = mkdtemp() self.dirs_to_remove = [self.output_dir]
def test_get_common_OTUs(self): """get_common_OTUs works""" # create the temporary OTU tables otu_table1 = '\n'.join(['#Full OTU Counts', '#OTU ID\tsample1\tsample2\tsample3\tConsensus Lineage', '0\t0\t2\t0\tlineage0', '1\t1\t0\t0\tlineage1', '2\t1\t1\t1\tlineage2']) otu_table2 = '\n'.join(['#Full OTU Counts', '#OTU ID\tsample1\tsample2\tsample3\tConsensus Lineage', '0\t0\t2\t0\tlineage0', '1\t1\t0\t0\tlineage1', '2\t0\t1\t1\tlineage2']) otu_table3 = '\n'.join(['#Full OTU Counts', '#OTU ID\tsample1\tsample2\tsample3\tConsensus Lineage', '0\t0\t2\t0\tlineage0', '2\t1\t1\t1\tlineage2']) category_info = {'sample1':'0.1', 'sample2':'0.2', 'sample3':'0.3'} OTU_list = ['1', '0', '2'] fp1 = get_tmp_filename() fp2 = get_tmp_filename() fp3 = get_tmp_filename() try: f1 = open(fp1,'w') f2 = open(fp2,'w') f3 = open(fp3,'w') except IOError, e: raise e,"Could not create temporary files: %s, %s" %(f1,f2, f3)
def setUp(self): self._files_to_remove = [] self.fasta_file_path = get_tmp_filename(prefix='fastq_', \ suffix='.fastq') fastq_file = open(self.fasta_file_path, 'w') fastq_file.write(fastq_test_string) fastq_file.close() #Error testing files false_fasta_file = '/' false_qual_file = '/' self.read_only_output_dir = get_tmp_filename(prefix = 'read_only_', \ suffix = '/') create_dir(self.read_only_output_dir) chmod(self.read_only_output_dir, 0577) self.output_dir = get_tmp_filename(prefix = 'convert_fastaqual_fastq_',\ suffix = '/') self.output_dir += sep create_dir(self.output_dir) self._files_to_remove.append(self.fasta_file_path)
def setUp(self): # create the temporary input files self.tmp_seq_filepath = get_tmp_filename(prefix="ReferenceRepSetPickerTest_", suffix=".fasta") seq_file = open(self.tmp_seq_filepath, "w") seq_file.write(dna_seqs) seq_file.close() self.ref_seq_filepath = get_tmp_filename(prefix="ReferenceRepSetPickerTest_", suffix=".fasta") seq_file = open(self.ref_seq_filepath, "w") seq_file.write(reference_seqs) seq_file.close() self.tmp_otu_filepath = get_tmp_filename(prefix="ReferenceRepSetPickerTest_", suffix=".otu") otu_file = open(self.tmp_otu_filepath, "w") otu_file.write(otus_w_ref) otu_file.close() self.result_filepath = get_tmp_filename(prefix="ReferenceRepSetPickerTest_", suffix=".fasta") otu_file = open(self.result_filepath, "w") otu_file.write(otus_w_ref) otu_file.close() self.files_to_remove = [ self.tmp_seq_filepath, self.tmp_otu_filepath, self.ref_seq_filepath, self.result_filepath, ] self.params = {"Algorithm": "first", "ChoiceF": first_id}
def setUp(self): # Temporary input file self.tmp_otu_filepath = get_tmp_filename(prefix="R_test_otu_table_", suffix=".txt") seq_file = open(self.tmp_otu_filepath, "w") seq_file.write(test_otu_table) seq_file.close() self.tmp_map_filepath = get_tmp_filename(prefix="R_test_map_", suffix=".txt") seq_file = open(self.tmp_map_filepath, "w") seq_file.write(test_map) seq_file.close() self.files_to_remove = [self.tmp_otu_filepath, self.tmp_map_filepath] # Prep input files in R format output_dir = mkdtemp() self.dirs_to_remove = [output_dir] # get random forests results mkdir(join(output_dir, "random_forest")) self.results = run_supervised_learning( self.tmp_otu_filepath, self.tmp_map_filepath, "Individual", ntree=100, errortype="oob", output_dir=output_dir, )
def setUp(self): self.infernal_test1_input_fp = get_tmp_filename( prefix='InfernalAlignerTests_',suffix='.fasta') open(self.infernal_test1_input_fp,'w').write(infernal_test1_input_fasta) self.infernal_test1_template_fp = get_tmp_filename( prefix='InfernalAlignerTests_',suffix='template.sto') open(self.infernal_test1_template_fp,'w').\ write(infernal_test1_template_stockholm) # create temp file names (and touch them so we can reliably # clean them up) self.result_fp = get_tmp_filename( prefix='InfernalAlignerTests_',suffix='.fasta') open(self.result_fp,'w').close() self.log_fp = get_tmp_filename( prefix='InfernalAlignerTests_',suffix='.log') open(self.log_fp,'w').close() self._paths_to_clean_up = [ self.infernal_test1_input_fp, self.result_fp, self.log_fp, self.infernal_test1_template_fp, ] self.infernal_test1_aligner = InfernalAligner({ 'template_filepath': self.infernal_test1_template_fp, }) self.infernal_test1_expected_aln = \ LoadSeqs(data=infernal_test1_expected_alignment,aligned=Alignment,\ moltype=DNA)
def null_from_data(data, tpk, Rseed=None): """generates null from dirichlet model of data based on row sums Inputs: tpk - int, total prior knowledge to allow the rdirichlet code. higher tpk will result in the simulated table more closely matching the initial data. Rseed - int/None, whether or not to seed R at a given value. """ prior_vals = data.sum(1) pvs_str = 'c(%s)' % ','.join(map(str,prior_vals)) sams_str = data.shape[1] # num cols seq_depth = data.sum(0).mean() out_fp = get_tmp_filename() command_str = COMMAND_STR % (pvs_str, tpk, sams_str, seq_depth, out_fp) if Rseed!=None: command_str = command_str[:23]+'set.seed('+str(Rseed)+')\n'+\ command_str[23:] command_file = get_tmp_filename() open(command_file, 'w').write(command_str) cmd_status, cmd_output = getstatusoutput('R --slave < ' + command_file) if cmd_status==32512: raise ValueError, 'Most likely you do not have R installed, ' +\ 'which is a dependency for QIIME' elif cmd_status==256: raise ValueError, 'Most likely you do not have gtools library ' +\ 'installed in R installed, which is a dependency for QIIME' lines = map(str.rstrip , open(out_fp).readlines()) return array([map(float,line.split(',')) for line in lines])
def _create_directory_structure(self, correct=True): """Creates a directory structure for check_exist_filepaths function Returns: base_dir: the base directory path mapping_fps: a list with the relative paths from base_dir to the created mapping files If correct=False, it adds one mapping file to the mapping_fps list that do not exists """ base_dir = get_tmp_filename(tmp_dir=self.tmp_dir, suffix='') mkdir(base_dir) self._dirs_to_clean_up.append(base_dir) mapping_fps = [] for i in range(5): mapping_fp = get_tmp_filename(tmp_dir='', suffix='.txt') mapping_fps.append(mapping_fp) path_to_create = join(base_dir, mapping_fp) f = open(path_to_create, 'w') f.close() self._paths_to_clean_up.append(path_to_create) if not correct: mapping_fps.append(get_tmp_filename(tmp_dir='', suffix='.txt')) return base_dir, mapping_fps
def test_test_wrapper_multiple(self): """test_wrapper_multiple works""" # create the temporary OTU tables otu_table1 = '\n'.join(['#Full OTU Counts', '#OTU ID\tsample1\tsample2\tsample3\tsample4\tConsensus Lineage', '0\t1\t2\t0\t1\tlineage0', '1\t1\t0\t0\t1\tlineage1', '2\t1\t1\t1\t1\tlineage2']) otu_table2 = '\n'.join(['#Full OTU Counts', '#OTU ID\tsample1\tsample2\tsample3\tsample4\tConsensus Lineage', '0\t0\t2\t0\t1\tlineage0', '1\t1\t0\t0\t1\tlineage1', '2\t0\t1\t1\t1\tlineage2']) otu_table3 = '\n'.join(['#Full OTU Counts', '#OTU ID\tsample1\tsample2\tsample3\tConsensus Lineage', '0\t0\t2\t0\t1\tlineage0', '2\t1\t1\t1\t1\tlineage2']) category_mapping = ['#SampleID\tcat1\tcat2', 'sample1\tA\t0', 'sample2\tA\t8.0', 'sample3\tB\t1.0', 'sample4\tB\t1.0'] OTU_list = ['1', '0'] fp1 = get_tmp_filename() fp2 = get_tmp_filename() fp3 = get_tmp_filename() try: f1 = open(fp1,'w') f2 = open(fp2,'w') f3 = open(fp3,'w') except IOError, e: raise e,"Could not create temporary files: %s, %s" %(f1,f2, f3)
def test_plot_rank_abundance_graphs_dense(self): """plot_rank_abundance_graphs works with any number of samples (DenseOTUTable)""" self.otu_table = parse_biom_table_str(otu_table_dense) self.dir = get_tmp_filename(tmp_dir=self.tmp_dir, prefix="test_plot_rank_abundance", suffix="/") create_dir(self.dir) self._dirs_to_remove.append(self.dir) tmp_fname = get_tmp_filename(tmp_dir=self.dir) #test empty sample name self.assertRaises(ValueError, plot_rank_abundance_graphs, tmp_fname,'', self.otu_table) #test invalid sample name self.assertRaises(ValueError, plot_rank_abundance_graphs, tmp_fname, 'Invalid_sample_name', self.otu_table) #test with two samples file_type="pdf" tmp_file = abspath(self.dir+"rank_abundance_cols_0_2."+file_type) plot_rank_abundance_graphs(tmp_file, 'S3,S5', self.otu_table, file_type=file_type) self.assertTrue(exists(tmp_file)) self.files_to_remove.append(tmp_file) # test with all samples tmp_file = abspath(self.dir+"rank_abundance_cols_0_1_2."+file_type) plot_rank_abundance_graphs(tmp_file,'*', self.otu_table,file_type=file_type) self.files_to_remove.append(tmp_file) self.assertTrue(exists(tmp_file))
def setUp(self): """ """ self.files_to_remove = [] self.dirs_to_remove = [] tmp_dir = get_qiime_temp_dir() self.test_out = get_tmp_filename(tmp_dir=tmp_dir, prefix='qiime_parallel_tests_', suffix='', result_constructor=str) self.dirs_to_remove.append(self.test_out) create_dir(self.test_out) self.template_fp = get_tmp_filename(tmp_dir=self.test_out, prefix='qiime_template', suffix='.fasta') template_f = open(self.template_fp,'w') template_f.write(pynast_test1_template_fasta) template_f.close() self.files_to_remove.append(self.template_fp) self.inseqs1_fp = get_tmp_filename(tmp_dir=self.test_out, prefix='qiime_inseqs', suffix='.fasta') inseqs1_f = open(self.inseqs1_fp,'w') inseqs1_f.write(inseqs1) inseqs1_f.close() self.files_to_remove.append(self.inseqs1_fp) initiate_timeout(60)
def test_call_log_file(self): """GenericRepSetPicker.__call__ writes log when expected """ tmp_log_filepath = get_tmp_filename( prefix='GenericRepSetPickerTest.test_call_output_to_file_l_', suffix='.txt') tmp_result_filepath = get_tmp_filename( prefix='GenericRepSetPickerTest.test_call_output_to_file_r_', suffix='.txt') app = GenericRepSetPicker(params=self.params) obs = app(self.tmp_seq_filepath, self.tmp_otu_filepath, result_path=tmp_result_filepath, log_path=tmp_log_filepath) log_file = open(tmp_log_filepath) log_file_str = log_file.read() log_file.close() # remove the temp files before running the test, so in # case it fails the temp file is still cleaned up remove(tmp_log_filepath) remove(tmp_result_filepath) log_file_exp = ["GenericRepSetPicker parameters:", 'Algorithm:first', "Application:None", 'ChoiceF:first', 'ChoiceFRequiresSeqs:False', "Result path: %s" % tmp_result_filepath, ] # compare data in log file to fake expected log file for i, j in zip(log_file_str.splitlines(), log_file_exp): if not i.startswith('ChoiceF:'): # can't test, different each time self.assertEqual(i, j)
def model2_table(otu_sums, samples, seq_depth, tpk): """Return OTU table drawn from dirichlet distribution with given params. Inputs: otu_sums - array of floats, weights you want to give to each otu in terms of its probability mass for sampling. basically how large you want the sum of that otu to be compared to the others. samples - int, number of samples for each otu. seq_depth - the sum of each col of the data table, the number of observations. tpk - total prior knowledge. controls how spiky the distribution will be. higher total prior knowledge will allow it to be much spikier which means less deviation away from otu_sums. """ prior_vals = otu_sums pvs_str = 'c(%s)' % ','.join(map(str,prior_vals)) out_fp = get_tmp_filename() command_str = COMMAND_STR % (pvs_str, tpk, samples, seq_depth, out_fp) command_file = get_tmp_filename() o = open(command_file, 'w') o.write(command_str) o.close() os.system('R --slave < ' + command_file) o = open(out_fp) lines = map(str.rstrip ,o.readlines()) o.close() return array([map(float,line.split(',')) for line in lines])
def setUp(self): """ """ self.files_to_remove = [] self.dirs_to_remove = [] tmp_dir = get_qiime_temp_dir() self.test_out = get_tmp_filename(tmp_dir=tmp_dir, prefix='qiime_parallel_taxonomy_assigner_tests_', suffix='', result_constructor=str) self.dirs_to_remove.append(self.test_out) create_dir(self.test_out) self.tmp_seq_filepath = get_tmp_filename(tmp_dir=self.test_out, prefix='qiime_parallel_taxonomy_assigner_tests_input', suffix='.fasta') seq_file = open(self.tmp_seq_filepath, 'w') seq_file.write(blast_test_seqs.toFasta()) seq_file.close() self.files_to_remove.append(self.tmp_seq_filepath) self.id_to_taxonomy_file = NamedTemporaryFile( prefix='qiime_parallel_taxonomy_assigner_tests_id_to_taxonomy', suffix='.txt',dir=tmp_dir) self.id_to_taxonomy_file.write(blast_id_to_taxonomy) self.id_to_taxonomy_file.seek(0) self.reference_seqs_file = NamedTemporaryFile( prefix='qiime_parallel_taxonomy_assigner_tests_ref_seqs', suffix='.fasta',dir=tmp_dir) self.reference_seqs_file.write(blast_reference_seqs.toFasta()) self.reference_seqs_file.seek(0) initiate_timeout(60)
def test_process_illumina_single_end_read_file_bc_in_seq(self): """process_illumina_single_end_read_file: functions as expected with bc in seq """ output_seqs_fp = get_tmp_filename(\ prefix='ParseIlluminaTests',suffix='.fasta') output_qual_fp = get_tmp_filename(\ prefix='ParseIlluminaTests',suffix='.txt') read_fp = get_tmp_filename(\ prefix='ParseIlluminaTests',suffix='.txt') open(read_fp,'w').write('\n'.join(self.illumina_read3_bc_in_seq)) self.files_to_remove.append(read_fp) actual = process_illumina_single_end_read_file(read_fp,output_seqs_fp, output_qual_fp,barcode_to_sample_id=self.barcode_to_sample_id4, barcode_length=12,store_unassigned=True,max_bad_run_length=0, quality_threshold=1e-5,min_per_read_length=70,rev_comp=False, rev_comp_barcode=False,barcode_in_seq=True, seq_max_N=0,start_seq_id=0) self.files_to_remove.append(output_seqs_fp) self.files_to_remove.append(output_qual_fp) # next_seq_id is returned correctly self.assertEqual(actual,2) # correct seq file is returned self.assertEqual([l.strip() for l in list(open(output_seqs_fp))],\ self.expected_seq_file_bc_in_seq1) # correct qual file is returned self.assertEqual([l.strip() for l in list(open(output_qual_fp))],\ self.expected_qual_file_bc_in_seq1)
def test_process_illumina_paired_end_read_files3(self): """process_illumina_paired_end_read_files: functions as expected with seq_max_N """ output_seqs_fp = get_tmp_filename(\ prefix='ParseIlluminaTests',suffix='.fasta') output_qual_fp = get_tmp_filename(\ prefix='ParseIlluminaTests',suffix='.txt') read1_fp = get_tmp_filename(\ prefix='ParseIlluminaTests',suffix='.txt') read2_fp = get_tmp_filename(\ prefix='ParseIlluminaTests',suffix='.txt') open(read1_fp,'w').write('\n'.join(self.illumina_read1_seq_N)) self.files_to_remove.append(read1_fp) open(read2_fp,'w').write('\n'.join(self.illumina_read2_seq_N)) self.files_to_remove.append(read2_fp) # seq_max_N=2 allows both sequences actual = process_illumina_paired_end_read_files(\ read1_fp,read2_fp,output_seqs_fp,output_qual_fp,\ barcode_to_sample_id=self.barcode_to_sample_id3,\ barcode_length=6,rev_comp_barcode=True,\ store_unassigned=True,max_bad_run_length=0,\ quality_threshold=1e-5,min_per_read_length=70,\ start_seq_id=0,seq_max_N=2) self.files_to_remove.append(output_seqs_fp) self.files_to_remove.append(output_qual_fp) # next_seq_id is returned correctly self.assertEqual(actual,2) # correct seq file is returned self.assertEqual([l.strip() for l in list(open(output_seqs_fp))],\ self.expected_seqs_file3) # correct qual file is returned self.assertEqual([l.strip() for l in list(open(output_qual_fp))],\ self.expected_qual_file3) # Lower seq_max_N returns no results actual = process_illumina_paired_end_read_files(\ read1_fp,read2_fp,output_seqs_fp,output_qual_fp,\ barcode_to_sample_id=self.barcode_to_sample_id3,\ barcode_length=6,rev_comp_barcode=True,\ store_unassigned=True,max_bad_run_length=0,\ quality_threshold=1e-5,min_per_read_length=70,\ start_seq_id=0,seq_max_N=0) # next_seq_id is returned correctly self.assertEqual(actual,0) # correct seq file is returned self.assertEqual([l.strip() for l in list(open(output_seqs_fp))],[]) # correct qual file is returned self.assertEqual([l.strip() for l in list(open(output_qual_fp))],[])
def setUp(self): """Define some test data.""" self.qiime_config = load_qiime_config() self.dirs_to_remove = [] self.tmp_dir = self.qiime_config['temp_dir'] or '/tmp/' if not exists(self.tmp_dir): makedirs(self.tmp_dir) # if test creates the temp dir, also remove it self.dirs_to_remove.append(self.tmp_dir) self.otu_table1 = table_factory(data=array([[2, 0, 0, 1], [1, 1, 1, 1], [0, 0, 0, 0]]).T, sample_ids=list('XYZ'), observation_ids=list('abcd'), constructor=DenseOTUTable) self.otu_table1_fp = get_tmp_filename(tmp_dir=self.tmp_dir, prefix='alpha_diversity_tests', suffix='.biom', result_constructor=str) open(self.otu_table1_fp, 'w').write( format_biom_table(self.otu_table1)) self.otu_table2 = table_factory(data=array([[2, 0, 0, 1], [1, 1, 1, 1], [0, 0, 0, 0]]).T, sample_ids=list('XYZ'), observation_ids=['a', 'b', 'c', 'd_'], constructor=DenseOTUTable) self.otu_table2_fp = get_tmp_filename(tmp_dir=self.tmp_dir, prefix='alpha_diversity_tests', suffix='.biom', result_constructor=str) open(self.otu_table2_fp, 'w').write( format_biom_table(self.otu_table2)) self.single_sample_otu_table = table_factory( data=array([[2, 0, 0, 1]]).T, sample_ids=list('X'), observation_ids=list( 'abcd'), constructor=DenseOTUTable) self.single_sample_otu_table_fp = get_tmp_filename( tmp_dir=self.tmp_dir, prefix='alpha_diversity_tests', suffix='.biom', result_constructor=str) open(self.single_sample_otu_table_fp, 'w').write( format_biom_table(self.single_sample_otu_table)) self.tree1 = parse_newick('((a:2,b:3):2,(c:1,d:2):7);') self.tree2 = parse_newick("((a:2,'b':3):2,(c:1,'d_':2):7);") self.files_to_remove = [self.otu_table1_fp, self.otu_table2_fp, self.single_sample_otu_table_fp]
def setUp(self): self.qiime_config = load_qiime_config() self.tmp_dir = self.qiime_config['temp_dir'] or '/tmp/' self.l19_data = numpy.array([ [7, 1, 0, 0, 0, 0, 0, 0, 0], [4, 2, 0, 0, 0, 1, 0, 0, 0], [2, 4, 0, 0, 0, 1, 0, 0, 0], [1, 7, 0, 0, 0, 0, 0, 0, 0], [0, 8, 0, 0, 0, 0, 0, 0, 0], [0, 7, 1, 0, 0, 0, 0, 0, 0], [0, 4, 2, 0, 0, 0, 2, 0, 0], [0, 2, 4, 0, 0, 0, 1, 0, 0], [0, 1, 7, 0, 0, 0, 0, 0, 0], [0, 0, 8, 0, 0, 0, 0, 0, 0], [0, 0, 7, 1, 0, 0, 0, 0, 0], [0, 0, 4, 2, 0, 0, 0, 3, 0], [0, 0, 2, 4, 0, 0, 0, 1, 0], [0, 0, 1, 7, 0, 0, 0, 0, 0], [0, 0, 0, 8, 0, 0, 0, 0, 0], [0, 0, 0, 7, 1, 0, 0, 0, 0], [0, 0, 0, 4, 2, 0, 0, 0, 4], [0, 0, 0, 2, 4, 0, 0, 0, 1], [0, 0, 0, 1, 7, 0, 0, 0, 0] ]) self.l19_sample_names = [ 'sam1', 'sam2', 'sam3', 'sam4', 'sam5', 'sam6', 'sam7', 'sam8', 'sam9', 'sam_middle', 'sam11', 'sam12', 'sam13', 'sam14', 'sam15', 'sam16', 'sam17', 'sam18', 'sam19'] self.l19_taxon_names = ['tax1', 'tax2', 'tax3', 'tax4', 'endbigtaxon', 'tax6', 'tax7', 'tax8', 'tax9'] self.l19_taxon_names_w_underscore = ['ta_x1', 'tax2', 'tax3', 'tax4', 'endbigtaxon', 'tax6', 'tax7', 'tax8', 'tax9'] l19_str = format_biom_table(DenseOTUTable(self.l19_data.T, self.l19_sample_names, self.l19_taxon_names)) self.l19_fp = get_tmp_filename(tmp_dir=self.tmp_dir, prefix='test_bdiv_otu_table', suffix='.blom') open(self.l19_fp, 'w').write(l19_str) l19_str_w_underscore = format_biom_table(DenseOTUTable(self.l19_data.T, self.l19_sample_names, self.l19_taxon_names_w_underscore)) self.l19_str_w_underscore_fp = get_tmp_filename(tmp_dir=self.tmp_dir, prefix='test_bdiv_otu_table', suffix='.blom') open(self.l19_str_w_underscore_fp, 'w').write(l19_str_w_underscore) self.l19_tree_str = '((((tax7:0.1,tax3:0.2):.98,tax8:.3, tax4:.3):.4,\ ((tax1:0.3, tax6:.09):0.43,tax2:0.4):0.5):.2, (tax9:0.3, endbigtaxon:.08));' self.l19_tree = parse_newick(self.l19_tree_str, PhyloNode) self.files_to_remove = [self.l19_fp, self.l19_str_w_underscore_fp] self.folders_to_remove = []
def setUp(self): self.home = environ["HOME"] self.queue = "friendlyq" self.tmp_result_file = get_tmp_filename(tmp_dir=self.home, prefix="/test_hello_", suffix=".txt") self.command = "echo hello > %s\n" % self.tmp_result_file self.tmp_name = get_tmp_filename(tmp_dir="/tmp", prefix="make_cluster_jobs_test_", suffix=".txt") fh = open(self.tmp_name, "w") fh.write(self.command) fh.close()
def test_make_html_file(self): """The HTML file is stored in the correct location""" # Generate the PCoA output directory pcoa_dir = get_tmp_filename(tmp_dir=self.tmp_dir, suffix='') self._create_pcoa_output_structure(pcoa_dir) # Add the PCoA output to the cleaning paths self._dirs_to_clean_up = [pcoa_dir] # Perform the test html_fp = get_tmp_filename(tmp_dir=self.tmp_dir, suffix='.html') make_html_file(pcoa_dir, html_fp) self.assertTrue(exists(html_fp))
def setUp(self): """ """ tmp_dir = get_qiime_temp_dir() self.test_out = get_tmp_filename(tmp_dir=tmp_dir, prefix='qiime_parallel_tests_', suffix='', result_constructor=str) create_dir(self.test_out) self.dirs_to_remove = [self.test_out] self.output_fp = join(self.test_out, 'fmap.txt') self.failure_fp = join(self.test_out, 'fail.txt') self.usearch_fp = join(self.test_out, 'out.uc') self.bl6_fp = join(self.test_out, 'out.bl6') self.log_fp = join(self.test_out, 'fmap.log') self.files_to_remove = [ self.output_fp, self.failure_fp, self.usearch_fp, self.log_fp, self.bl6_fp ] self.refseqs1_fp = get_tmp_filename(tmp_dir=self.test_out, prefix='qiime_refseqs', suffix='.fasta') refseqs1_f = open(self.refseqs1_fp, 'w') refseqs1_f.write(refseqs1) refseqs1_f.close() self.files_to_remove.append(self.refseqs1_fp) self.refseqs2_fp = get_tmp_filename(tmp_dir=self.test_out, prefix='qiime_refseqs', suffix='.fasta') refseqs2_f = open(self.refseqs2_fp, 'w') refseqs2_f.write(refseqs2) refseqs2_f.close() self.files_to_remove.append(self.refseqs2_fp) self.inseqs1_fp = get_tmp_filename(tmp_dir=self.test_out, prefix='qiime_inseqs', suffix='.fasta') inseqs1_f = open(self.inseqs1_fp, 'w') inseqs1_f.write(inseqs1) inseqs1_f.close() self.files_to_remove.append(self.inseqs1_fp) self.inseqs2_fp = get_tmp_filename(tmp_dir=self.test_out, prefix='qiime_inseqs', suffix='.fasta') inseqs2_f = open(self.inseqs2_fp, 'w') inseqs2_f.write(inseqs2) inseqs2_f.close() self.files_to_remove.append(self.inseqs2_fp) initiate_timeout(60)
def setUp(self): self.home = environ["HOME"] self.server_socket = None self.tmp_result_file = get_tmp_filename(tmp_dir=self.home, prefix="/test_hello_", suffix=".txt") self.tmp_dir = get_tmp_filename(tmp_dir=self.home, prefix="test_cluster_util", suffix="/") self.files_to_remove = [self.tmp_result_file] self.command = "echo hello > %s\n" % self.tmp_result_file signal.signal(signal.SIGALRM, timeout) # set the 'alarm' to go off in allowed_seconds seconds signal.alarm(allowed_seconds_per_test)
def setUp(self): self.otu_f = get_tmp_filename(prefix="OtuTaxonomyTest_", suffix=".txt") self.otu_match_f = get_tmp_filename(prefix="OtuMatchTest", suffix="fasta") self._paths_to_clean_up = [self.otu_f, self.otu_match_f] otu_file = open(self.otu_f, "w") otu_match = open(self.otu_match_f, "w") otu_file.write(otus_taxonomy_strings) otu_match.write(otu_strings) otu_file.close() otu_match.close()
def test_process_illumina_single_end_read_file2(self): """process_illumina_single_end_read_file: alt seq max N """ output_seqs_fp = get_tmp_filename(\ prefix='ParseIlluminaTests',suffix='.fasta') output_qual_fp = get_tmp_filename(\ prefix='ParseIlluminaTests',suffix='.txt') read_fp = get_tmp_filename(\ prefix='ParseIlluminaTests',suffix='.txt') open(read_fp,'w').write('\n'.join(self.illumina_read1_seq_N)) self.files_to_remove.append(read_fp) ## seq_max_N=1 allows both sequences actual = process_illumina_single_end_read_file(read_fp,output_seqs_fp, output_qual_fp,barcode_to_sample_id=self.barcode_to_sample_id1, barcode_length=6,store_unassigned=True,max_bad_run_length=0, quality_threshold=1e-5,min_per_read_length=70,rev_comp=False, rev_comp_barcode=True,barcode_in_seq=False,seq_max_N=1,start_seq_id=0) self.files_to_remove.append(output_seqs_fp) self.files_to_remove.append(output_qual_fp) # next_seq_id is returned correctly self.assertEqual(actual,2) # correct seq file is returned self.assertEqual([l.strip() for l in list(open(output_seqs_fp))],\ self.expected_5prime_seqs_file2) # correct qual file is returned self.assertEqual([l.strip() for l in list(open(output_qual_fp))],\ self.expected_5prime_qual_file2) ## Lower seq_max_N yields no results actual = process_illumina_single_end_read_file(read_fp,output_seqs_fp, output_qual_fp,barcode_to_sample_id=self.barcode_to_sample_id1, barcode_length=6,store_unassigned=True,max_bad_run_length=0, quality_threshold=1e-5,min_per_read_length=70,rev_comp=False, rev_comp_barcode=True,barcode_in_seq=False,seq_max_N=0,start_seq_id=0) # next_seq_id is returned correctly self.assertEqual(actual,0) # correct seq file is returned self.assertEqual([l.strip() for l in list(open(output_seqs_fp))],\ []) # correct qual file is returned self.assertEqual([l.strip() for l in list(open(output_qual_fp))],\ [])
def setUp(self): self.otu_map1 = [('0',['seq1','seq2','seq5']), ('1',['seq3','seq4']), ('2',['seq6','seq7','seq8'])] self.tmp_fp1 = get_tmp_filename(prefix='FormatTests_',suffix='.txt') self.tmp_fp2 = get_tmp_filename(prefix='FormatTests_',suffix='.txt') self.files_to_remove = [] self.add_taxa_summary = {'s1':[1,2],'s2':[3,4]} self.add_taxa_header = ['sample_id','foo','bar'] self.add_taxa_order = ['a;b;c','d;e;f'] self.add_taxa_mapping = [['s1','something1','something2'], ['s2','something3','something4'], ['s3','something5','something6']] self.biom1 = parse_biom_table(biom1.split('\n')) self.expected_formatted_html_no_errors_warnings =\ expected_formatted_html_no_errors_warnings self.expected_formatted_html_errors =\ expected_formatted_html_errors self.expected_formatted_html_warnings =\ expected_formatted_html_warnings self.expected_formatted_html_data_nonloc_error =\ expected_formatted_html_data_nonloc_error # For testing formatting of correlation vectors. self.corr_vec1 = [('S1', 'T1', 0.7777777777, 0, 0, 0, 0, (0.5, 1.0))] self.corr_vec2 = [('S1', 'T1', 0.7777777777, 0, 0, 0, 0, (0.5, 1.0)), ('S2', 'T2', 0.1, 0.05, 0.15, 0.04, 0.12, (-0.1, 0.2)), ('S3', 'T3', 100.68, 0.9, 1, 1, 1, (-0.4, -0.2))] self.corr_vec3 = [('S1', 'T1', 0.7777777777, 0, 0, 0, 0, (None, None))] # For testing statistical method formatters. self.overview_dm = DistanceMatrix.parseDistanceMatrix( overview_dm_lines) self.overview_map = MetadataMap.parseMetadataMap(overview_map_lines) self.soils_dm = DistanceMatrix.parseDistanceMatrix(soils_dm_lines) self.soils_map = MetadataMap.parseMetadataMap(soils_map_lines) self.anosim_overview = Anosim(self.overview_map, self.overview_dm, 'Treatment') self.permanova_overview = Permanova(self.overview_map, self.overview_dm, 'Treatment') self.best_overview = Best(self.overview_dm, self.overview_map, ['DOB']) self.best_88_soils = Best(self.soils_dm, self.soils_map, ['TOT_ORG_CARB', 'SILT_CLAY', 'ELEVATION', 'SOIL_MOISTURE_DEFICIT', 'CARB_NITRO_RATIO', 'ANNUAL_SEASON_TEMP', 'ANNUAL_SEASON_PRECPT', 'PH', 'CMIN_RATE', 'LONGITUDE', 'LATITUDE'])
def make_jobs(commands, job_prefix, queue, jobs_dir="jobs/", walltime="06:00:00", nodes=1, ncpus=16, mem=16, keep_output="oe"): """prepare qsub text files. command: list of commands job_prefix: a short, descriptive name for the job. queue: name of the queue to submit to jobs_dir: path to directory where job submision scripts are written walltime: the maximal walltime ncpus: number of cpus nodes: number of nodes keep_output: keep standard error, standard out, both, or neither o=std out, e=std err, oe=both, n=neither """ filenames=[] create_dir(jobs_dir) job_list_name = get_tmp_filename(tmp_dir=jobs_dir, prefix=job_prefix+"_joblist_", suffix = ".txt") job_log_name = get_tmp_filename(tmp_dir=jobs_dir, prefix=job_prefix+"_prallel_job_log", suffix = ".txt") out_fh_list = open(job_list_name,"w") for command in commands[0:len(commands)-1]: out_fh_list.write(command+"\n") out_fh_list.close() job_name = get_tmp_filename(tmp_dir=jobs_dir, prefix=job_prefix+"_", suffix = ".txt") out_fh = open(job_name,"w") #num_nodes = int(math.ceil((len(commands)-1)/8.0)) # If you use the lab queue, then change the num_nodes and ncpus as: num_nodes = 1 ncpus = len(commands) - 1 out_fh.write(QSUB_TEXT % (walltime, num_nodes, ncpus, mem, queue, job_prefix, keep_output, job_list_name, len(commands)-1, job_log_name, commands[-1])) out_fh.close() filenames.append(job_name) return filenames
def setUp(self): """ """ tmp_dir = get_qiime_temp_dir() self.test_out = get_tmp_filename(tmp_dir=tmp_dir, prefix='qiime_parallel_tests_', suffix='', result_constructor=str) create_dir(self.test_out) self.dirs_to_remove = [self.test_out] self.output_fp = join(self.test_out,'fmap.txt') self.failure_fp = join(self.test_out,'fail.txt') self.usearch_fp = join(self.test_out,'out.uc') self.bl6_fp = join(self.test_out,'out.bl6') self.log_fp = join(self.test_out,'fmap.log') self.files_to_remove = [self.output_fp, self.failure_fp, self.usearch_fp, self.log_fp, self.bl6_fp] self.refseqs1_fp = get_tmp_filename(tmp_dir=self.test_out, prefix='qiime_refseqs', suffix='.fasta') refseqs1_f = open(self.refseqs1_fp,'w') refseqs1_f.write(refseqs1) refseqs1_f.close() self.files_to_remove.append(self.refseqs1_fp) self.refseqs2_fp = get_tmp_filename(tmp_dir=self.test_out, prefix='qiime_refseqs', suffix='.fasta') refseqs2_f = open(self.refseqs2_fp,'w') refseqs2_f.write(refseqs2) refseqs2_f.close() self.files_to_remove.append(self.refseqs2_fp) self.inseqs1_fp = get_tmp_filename(tmp_dir=self.test_out, prefix='qiime_inseqs', suffix='.fasta') inseqs1_f = open(self.inseqs1_fp,'w') inseqs1_f.write(inseqs1) inseqs1_f.close() self.files_to_remove.append(self.inseqs1_fp) self.inseqs2_fp = get_tmp_filename(tmp_dir=self.test_out, prefix='qiime_inseqs', suffix='.fasta') inseqs2_f = open(self.inseqs2_fp,'w') inseqs2_f.write(inseqs2) inseqs2_f.close() self.files_to_remove.append(self.inseqs2_fp) initiate_timeout(60)
def setUp(self): self.otu_f=get_tmp_filename( prefix='OtuTaxonomyTest_',suffix='.txt') self.otu_match_f=get_tmp_filename( prefix='OtuMatchTest',suffix='fasta') self._paths_to_clean_up=\ [self.otu_f,self.otu_match_f] otu_file=open(self.otu_f,'w') otu_match=open(self.otu_match_f,'w') otu_file.write(otus_taxonomy_strings) otu_match.write(otu_strings) otu_file.close() otu_match.close()
def setUp(self): self.pynast_test1_input_fp = get_tmp_filename( prefix='PyNastAlignerTests_', suffix='.fasta') open(self.pynast_test1_input_fp, 'w').write(pynast_test1_input_fasta) self.pynast_test1_template_fp = get_tmp_filename( prefix='PyNastAlignerTests_', suffix='template.fasta') open(self.pynast_test1_template_fp, 'w').\ write(pynast_test1_template_fasta) self.pynast_test_template_w_dots_fp = get_tmp_filename( prefix='PyNastAlignerTests_', suffix='template.fasta') open(self.pynast_test_template_w_dots_fp, 'w').\ write(pynast_test1_template_fasta.replace('-', '.')) self.pynast_test_template_w_u_fp = get_tmp_filename( prefix='PyNastAlignerTests_', suffix='template.fasta') open(self.pynast_test_template_w_u_fp, 'w').\ write(pynast_test1_template_fasta.replace('T', 'U')) self.pynast_test_template_w_lower_fp = get_tmp_filename( prefix='PyNastAlignerTests_', suffix='template.fasta') open(self.pynast_test_template_w_lower_fp, 'w').\ write(pynast_test1_template_fasta.lower()) # create temp file names (and touch them so we can reliably # clean them up) self.result_fp = get_tmp_filename( prefix='PyNastAlignerTests_', suffix='.fasta') open(self.result_fp, 'w').close() self.failure_fp = get_tmp_filename( prefix='PyNastAlignerTests_', suffix='.fasta') open(self.failure_fp, 'w').close() self.log_fp = get_tmp_filename( prefix='PyNastAlignerTests_', suffix='.log') open(self.log_fp, 'w').close() self._paths_to_clean_up = [ self.pynast_test1_input_fp, self.result_fp, self.failure_fp, self.log_fp, self.pynast_test1_template_fp, self.pynast_test_template_w_dots_fp, self.pynast_test_template_w_u_fp, self.pynast_test_template_w_lower_fp ] self.pynast_test1_aligner = PyNastAligner({ 'template_filepath': self.pynast_test1_template_fp, 'min_len': 15, }) self.pynast_test1_expected_aln = \ LoadSeqs( data=pynast_test1_expected_alignment, aligned=DenseAlignment) self.pynast_test1_expected_fail = \ LoadSeqs(data=pynast_test1_expected_failure, aligned=False)
def test_single_file_nj(self): """ single_file_nj should throw no errors""" titles = ["hi", "ho", "yo"] distdata = numpy.array([[0, 0.5, 0.3], [0.5, 0.0, 0.9], [0.3, 0.9, 0.0]]) fname = get_tmp_filename(prefix="nj_", suffix=".txt") f = open(fname, "w") self._paths_to_clean_up.append(fname) f.write(format_distance_matrix(titles, distdata)) f.close() fname2 = get_tmp_filename(prefix="nj_", suffix=".txt", result_constructor=str) self._paths_to_clean_up.append(fname2) single_file_nj(fname, fname2) assert os.path.exists(fname2)
def setUp(self): self.home = environ['HOME'] self.queue = "friendlyq" self.tmp_result_file = get_tmp_filename(tmp_dir=self.home, prefix="/test_hello_", suffix=".txt") self.command = "echo hello > %s\n" % self.tmp_result_file self.tmp_name = get_tmp_filename(tmp_dir="/tmp", prefix="make_cluster_jobs_test_", suffix=".txt") fh = open(self.tmp_name, "w") fh.write(self.command) fh.close()
def setUp(self): self.qiime_config = load_qiime_config() self.tmp_dir = self.qiime_config['temp_dir'] or '/tmp/' self.otu_table_data = numpy.array([[2, 1, 0], [0, 5, 0], [0, 3, 0], [1, 2, 0]]) self.sample_names = list('YXZ') self.taxon_names = list('bacd') self.otu_metadata = [{ 'domain': 'Archaea' }, { 'domain': 'Bacteria' }, { 'domain': 'Bacteria' }, { 'domain': 'Bacteria' }] self.otu_table = table_factory(self.otu_table_data, self.sample_names, self.taxon_names) self.otu_table_meta = table_factory( self.otu_table_data, self.sample_names, self.taxon_names, observation_metadata=self.otu_metadata) self.otu_table_str = format_biom_table(self.otu_table) self.otu_table_meta_str = format_biom_table(self.otu_table_meta) self.otu_table_fp = get_tmp_filename(tmp_dir=self.tmp_dir, prefix='test_rarefaction', suffix='.biom') self.otu_table_meta_fp = get_tmp_filename(tmp_dir=self.tmp_dir, prefix='test_rarefaction', suffix='.biom') self.rare_dir = get_tmp_filename(tmp_dir=self.tmp_dir, prefix='test_rarefaction_dir', suffix='', result_constructor=str) os.mkdir(self.rare_dir) open(self.otu_table_fp, 'w').write(self.otu_table_str) open(self.otu_table_meta_fp, 'w').write(self.otu_table_meta_str) self._paths_to_clean_up = [self.otu_table_fp, self.otu_table_meta_fp] self._dirs_to_clean_up = [self.rare_dir]
def test_make_html_file(self): """The HTML file is created in the right location""" html_fp = get_tmp_filename(tmp_dir=self.tmp_dir) make_html_file(self.mapping_file_lines, html_fp) self.assertTrue( path.exists(html_fp), 'The html file was not created in the appropiate location')
def _create_pcoa_output_structure(self, output_dir): """Creates the directory structure of the PCoA analysis""" # Create base dir mkdir(output_dir) # Create 2d plots structure for continuous and discrete coloring self._create_2d_directory(output_dir, 'weighted_unifrac_2d_continuous') self._create_2d_directory(output_dir, 'weighted_unifrac_2d_discrete') # Create the log file f = open(get_tmp_filename(tmp_dir=output_dir, prefix='log_', suffix='.txt'), 'w') f.close() # Create the prefs.txt file f = open(join(output_dir, 'prefs.txt'), 'w') f.close() # Create the distance matrix file f = open(join(output_dir, 'weighted_unifrac_dm.txt'), 'w') f.close() # Create the principal coordinate file f = open(join(output_dir, 'weighted_unifrac_pc.txt'), 'w') f.close() # Create the index.html file from Emperor f = open(join(output_dir, 'index.html'), 'w') f.close() # Create the 'emperor_required_resources' folder mkdir(join(output_dir, 'emperor_required_resources'))
def test_write_sff_header(self): """write_sff_header writes a correct sff header""" expected = """Common Header: Magic Number:\t0x2E736666 Version:\t0001 Index Offset:\t7773224 Index Length:\t93365 # of Reads:\t114 Header Length:\t440 Key Length:\t4 # of Flows:\t400 Flowgram Code:\t1 Flow Chars:\tTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG Key Sequence:\tTCAG """.split('\n') header = {'Version':"0001", 'Magic Number': '0x2E736666', 'Index Offset': '7773224', 'Index Length': '93365', '# of Reads': '114', 'Header Length': '440', 'Key Length': '4', '# of Flows': '400', 'Flowgram Code': '1', 'Flow Chars': 'TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG', 'Key Sequence': 'TCAG'} tmp_name = get_tmp_filename(prefix="test_write_sff_header") fh = open(tmp_name,"w") write_sff_header(header, fh, num=400) fh.close() fh = open(tmp_name,"U") lines =list(fh) remove(tmp_name) self.assertEqualItems(lines, map(lambda a: a +"\n", expected))
def test_combine_mappings(self): """combine_mappings works as expected""" self.tmp_dir = get_tmp_filename(tmp_dir="./", suffix="/") mkdir(self.tmp_dir) combine_mappings(fasta, denoiser_mapping, denoised_seqs, otu_picker_map, self.tmp_dir) observed_otu_map = "".join( list(open(self.tmp_dir + "/denoised_otu_map.txt"))) expected_otu_map = """1:\tS1_1\tS1_2\tS2_4\tS2_5 2:\tS2_3\tS1_6 """ self.assertEqual(observed_otu_map, expected_otu_map) observed_fasta = "".join( list(open(self.tmp_dir + "/denoised_all.fasta"))) expected_fasta = """>S1_1 Read1 AAA >S1_2 Read2 TTT >S2_3 Read3 GGG """ self.assertEqual(observed_fasta, expected_fasta)
def cleanup_sff(flowgrams, header, outhandle=None, outdir = "/tmp", min_length=150, max_length=400): """Clean a sff file and returns name of clean file and number of clean flowgrams. flowgrams: a list of flowgrams header: the header for the flowgrams outhandle: handle flowgrams will be written to if set, can be stdout outdir: if outhandle is not set, random file will be created in outdir min_length, max_length: hard sequence contraints, default are set for GS FLX, increase for Titanium, decrease for GS20 NOTE: It is strongly recommended to use a proper quality filtering as in QIIME's split_libraries.py . This function is intended as a last resort and should hardly ever be used, """ clean_filename = "" if not outhandle: clean_filename = get_tmp_filename(tmp_dir=outdir, prefix="cleanup_sff", suffix = ".sff.txt") outhandle = open(clean_filename, "w") l = filter_sff_file(flowgrams, header, [lambda f: within_length(f, min_length, max_length), lambda f: f.hasProperKey()], outhandle) return (clean_filename,l)
def test_split_fasta_diff_num_seqs_per_file(self): """split_fasta funcs as expected when diff num seqs go to each file """ filename_prefix = get_tmp_filename(tmp_dir=get_qiime_temp_dir(), prefix='split_fasta_tests', suffix='', result_constructor=str) infile = ['>seq1','AACCTTAA','>seq2','TTAACC','AATTAA',\ '>seq3','CCTT--AA'] actual = split_fasta(infile, 2, filename_prefix) actual_seqs = [] for fp in actual: actual_seqs += list(open(fp)) remove_files(actual) expected = ['%s.%d.fasta' % (filename_prefix,i) for i in range(2)] # list of file paths is as expected self.assertEqual(actual,expected) # building seq collections from infile and the split files result in # equivalent seq collections self.assertEqual(\ LoadSeqs(data=infile,aligned=False),\ LoadSeqs(data=actual_seqs,aligned=False))
def setUp(self): self.files_to_remove = [] self.dirs_to_remove = [] # Create example output directory tmp_dir = get_qiime_temp_dir() self.test_out = get_tmp_filename(tmp_dir=tmp_dir, prefix='core_qiime_analyses_test_', suffix='', result_constructor=str) self.dirs_to_remove.append(self.test_out) create_dir(self.test_out) # Get input data self.test_data = get_test_data_fps() self.qiime_config = load_qiime_config() self.qiime_config['jobs_to_start'] = 2 self.qiime_config['seconds_to_sleep'] = 1 # suppress stderr during tests (one of the systems calls in the # workflow prints a warning, and we can't suppress that warning with # warnings.filterwarnings) here because it comes from within the code # executed through the system call. Found this trick here: # http://stackoverflow.com/questions/9949633/suppressing-print-as-stdout-python self.saved_stderr = sys.stderr sys.stderr = StringIO() # Define number of seconds a test can run for before timing out # and failing initiate_timeout(420)
def setUp(self): """Set up some test variables""" self.newick = "((s1:0.2,s2:0.2):0.6,s3:0.8);" self.tree = parse_newick(self.newick, PhyloNode) self.newick_scaled = "((s1:25,s2:25):75,s3:100);" self.tree_scaled = parse_newick(self.newick_scaled, PhyloNode) self.tree_scaled.scaleBranchLengths(max_length=100, ultrametric=True) self.num_trees_considered = 10 self.trans_values = {(None, None) : ("#FFFFFF", ""), (None, 0.5): ("#dddddd", "< 50%"), (0.5, 0.7): ("#99CCFF", "50-70%"), (0.7, 0.9): ("#82FF8B", "70-90%"), (0.9, 0.999): ("#F8FE83", "90-99.9%"), (0.999, None): ("#FF8582", "> 99.9%")} self.jack_newick = "((s1:0.2,s2:0.2)0.8:0.6,s3:0.8)1.0;" self.jack_tree = parse_newick(self.jack_newick, PhyloNode) self.jack_newick_scaled = "((s1:25,s2:25)0.8:75,s3:100)1.0;" self.jack_tree_scaled = parse_newick(self.jack_newick_scaled, PhyloNode) self.jack_tree_scaled.scaleBranchLengths(max_length=100, ultrametric=True) self.support = { 'trees_considered': 10, 'support_dict': {"node0":1.0, "node1":0.8}} self.qiime_config = load_qiime_config() self.tmp_dir = self.qiime_config['temp_dir'] or '/tmp/' self.output_file = get_tmp_filename(tmp_dir = self.tmp_dir) dict_mapping_data = {} dict_mapping_data["s1"] = { 'Description':'s1 test description', 'NumIndividuals':'100', 'BarcodeSequence':'AAAAAAAAAACT', 'LinkerPrimerSequence':'AAAAAAAAAAAAAAAAAAAAA', 'ExampleHeader1':'Value1', 'ExampleHeader2':'Val2'} dict_mapping_data["s2"] = { 'Description':'s2 test description', 'NumIndividuals':'200', 'BarcodeSequence':'CAAAAAAAAACT', 'LinkerPrimerSequence':'AAAAAAAAAAAAAAAAAAAAA', 'ExampleHeader1':'Value2', 'ExampleHeader2':'Val1'} dict_mapping_data["s3"] = { 'Description':'s3 test description', 'NumIndividuals':'300', 'BarcodeSequence':'GAAAAAAAAACT', 'LinkerPrimerSequence':'AAAAAAAAAAAAAAAAAAAAA', 'ExampleHeader1':'Value2', 'ExampleHeader2':'Val3'} self.mapping_data = [dict_mapping_data, "Example comment string for test"] self._paths_to_clean_up = []
def test_call_invalid_id(self): """ReferenceRepSetPicker.__call__ expected clusters default params""" app = ReferenceRepSetPicker(params={ 'Algorithm': 'first', 'ChoiceF': first_id }) tmp_otu_filepath = get_tmp_filename( prefix='ReferenceRepSetPickerTest_', suffix='.otu') otu_file = open(tmp_otu_filepath, 'w') # replace a valid sequence identifier with an invalid # sequence identifier (i.e., one that we don't have a sequence for) otu_file.write(otus_w_ref.replace('R27DLI_4812', 'bad_seq_identifier')) otu_file.close() self.files_to_remove.append(tmp_otu_filepath) # returning in dict self.assertRaises(KeyError, app, self.tmp_seq_filepath, tmp_otu_filepath, self.ref_seq_filepath) # writing to file self.assertRaises(KeyError, app, self.tmp_seq_filepath, tmp_otu_filepath, self.ref_seq_filepath, result_path=self.result_filepath)
def test_call_output_to_file_sorted(self): """GenericRepSetPicker.__call__ output to file sorts when requested """ tmp_result_filepath = get_tmp_filename( prefix='GenericRepSetPickerTest.test_call_output_to_file_', suffix='.txt') app = GenericRepSetPicker(params=self.params) obs = app(self.tmp_seq_filepath, self.tmp_otu_filepath, result_path=tmp_result_filepath, sort_by='seq_id') result_file = open(tmp_result_filepath) result_file_str = result_file.read() result_file.close() # remove the result file before running the test, so in # case it fails the temp file is still cleaned up remove(tmp_result_filepath) # compare data in result file to fake expected file self.assertEqual(result_file_str, rep_seqs_result_file_sorted_exp) # confirm that nothing is returned when result_path is specified self.assertEqual(obs, None)
def main(commandline_args=None): parser, opts, args = parse_command_line_parameters(**script_info) if not opts.sff_fp: parser.error('Required option flowgram file path (-i) not specified') elif not files_exist(opts.sff_fp): parser.error('Flowgram file path does not exist:\n %s \n Pass a valid one via -i.' % opts.sff_fp) if(opts.checkpoint_fp): bp_fp = opts.checkpoint_fp if not exists(bp_fp): parser.error('Specified checkpoint file does not exist: %s' % bp_fp) #peek into sff.txt files to make sure they are parseable #cat_sff_fles is lazy and only reads header flowgrams, header = cat_sff_files(map(open, opts.sff_fp.split(','))) if(opts.split and opts.preprocess_fp): parser.error('Options --split and --preprocess_fp are exclusive') if(opts.preprocess_fp): pp_fp = opts.preprocess_fp if not exists(opts.preprocess_fp): parser.error('Specified preprocess directory does not exist: %s' % opts.preprocess_fp) if not files_exist('%s/prefix_mapping.txt,%s/prefix_dereplicated.fasta' %(pp_fp, pp_fp)): parser.error('Specified preprocess directory does not contain expected files: ' +\ 'prefix_mapping.txt and prefix_dereplicated.fasta') if opts.titanium: opts.error_profile = DENOISER_DATA_DIR+'Titanium_error_profile.dat' opts.low_cutoff = 4 opts.high_cutoff = 5 if not exists(opts.error_profile): parser.error('Specified error profile %s does not exist' % opts.error_profile) if opts.output_dir: #make sure it always ends on / tmpoutdir=opts.output_dir+"/" else: #make random dir in current dir tmpoutdir = get_tmp_filename(tmp_dir="", prefix="denoiser_", suffix="/") create_dir(tmpoutdir, not opts.force) log_fp = 'denoiser.log' if opts.split: denoise_per_sample(opts.sff_fp, opts.fasta_fp, tmpoutdir, opts.cluster, opts.num_cpus, opts.squeeze, opts.percent_id, opts.bail, opts.primer, opts.low_cutoff, opts.high_cutoff, log_fp, opts.low_memory, opts.verbose, opts.error_profile, opts.max_num_iter, opts.titanium) else: denoise_seqs(opts.sff_fp, opts.fasta_fp, tmpoutdir, opts.preprocess_fp, opts.cluster, opts.num_cpus, opts.squeeze, opts.percent_id, opts.bail, opts.primer, opts.low_cutoff, opts.high_cutoff, log_fp, opts.low_memory, opts.verbose, opts.error_profile, opts.max_num_iter, opts.titanium, opts.checkpoint_fp)
def test_store_cluster(self): """store_clusters stores the centroid seqs for each cluster.""" self.tmpdir = get_tmp_filename(tmp_dir="./", suffix="_store_clusters/") create_dir(self.tmpdir) self.files_to_remove.append(self.tmpdir+"singletons.fasta") self.files_to_remove.append(self.tmpdir+"centroids.fasta") #empty map results in empty files store_clusters({}, self.tiny_test, self.tmpdir) actual_centroids = list(MinimalFastaParser(open(self.tmpdir+"centroids.fasta"))) self.assertEqual(actual_centroids, []) actual_singletons = list(MinimalFastaParser(open(self.tmpdir+"singletons.fasta"))) self.assertEqual(actual_singletons, []) #non-empty map creates non-empty files, centroids sorted by size mapping = {'FZTHQMS01B8T1H':[], 'FZTHQMS01DE1KN':['FZTHQMS01EHAJG'], 'FZTHQMS01EHAJG':[1,2,3]} # content doesn't really matter centroids = [('FZTHQMS01EHAJG | cluster size: 4', 'CATGCTGCCTCCCGTAGGAGTTTGGACCGTGTCTCAGTTCCAATGTGGGGGACCTTCCTCTCAGAACCCCTATCCATCGAAGGTTTGGTGAGCCGTTACCTCACCAACTGCCTAATGGAACGCATCCCCATCGATAACCGAAATTCTTTAATAACAAGACCATGCGGTCTGATTATACCATCGGGTATTAATCTTTCTTTCGAAAGGCTATCCCCGAGTTATCGGCAGGTTGGATACGTGTTACTCACCCGTGCGCCGGTCGCCA'), ('FZTHQMS01DE1KN | cluster size: 2','CATGCTGCCTCCCGTAGGAGTTTGGACCGTGTCTCAGTTCCAATGTGGGGGACCTTCCTCTCAGAACCCCTATCCATCGAAGGTTTGGTGAGCCGTTACCTCACCAACTGCCTAATGGAACGCATCCCCATCGATAACCGAAATTCTTTAATAACAAGACCATGCGGTCTGATTATACCATCGGGTATTAATCTTTCTTTCGAAAGGCTATCCCCGAGTTATCGGCAGGTTGGATACGTGTTACTCACCCGTGCGCCGGTCGCCA')] singletons= [('FZTHQMS01B8T1H', 'CATGCTGCCTCCCGTAGGAGTTTGGACCGTGTCTCAGTTCCAATGTGGGGGACCTTCCTCTCAGAACCCCTATCCATCGAAGGTTTGGTGAGCCGTTACCTCACCAACTGCCTAATGGAACGCATCCCCATCGATAACCGAAATTCTTTAATAATTAAACCATGCGGTTTTATTATACCATCGGGTATTAATCTTTCTTTCGAAAGGCTATCCCCGAGTTATCGGCAGGTTGGATACGTGTTACTCACCCGTGCGCCGGTCGCCATCACTTA')] store_clusters(mapping, self.tiny_test, self.tmpdir) actual_centroids = list(MinimalFastaParser(open(self.tmpdir+"centroids.fasta"))) self.assertEqual(actual_centroids, centroids) actual_singletons = list(MinimalFastaParser(open(self.tmpdir+"singletons.fasta"))) self.assertEqual(actual_singletons,singletons)
def setUp(self): """ """ self.test_data = get_test_data_fps() self.files_to_remove = [] self.dirs_to_remove = [] # Create example output directory tmp_dir = get_qiime_temp_dir() self.test_out = get_tmp_filename(tmp_dir=tmp_dir, prefix='core_qiime_analyses_test_', suffix='', result_constructor=str) self.dirs_to_remove.append(self.test_out) create_dir(self.test_out) self.qiime_config = load_qiime_config() self.params = parse_qiime_parameters(params_f1) # suppress stderr during tests (one of the systems calls in the # workflow prints a warning, and we can't suppress that warning with # warnings.filterwarnings) here because it comes from within the code # executed through the system call. Found this trick here: # http://stackoverflow.com/questions/9949633/suppressing-print-as-stdout-python self.saved_stderr = sys.stderr sys.stderr = StringIO() initiate_timeout(180)
def make_jobs(commands, job_prefix, queue, jobs_dir="jobs/", walltime="72:00:00", ncpus=1, nodes=1, keep_output="oe"): """prepare qsub text files. command: list of commands job_prefix: a short, descriptive name for the job. queue: name of the queue to submit to jobs_dir: path to directory where job submision scripts are written walltime: the maximal walltime ncpus: number of cpus nodes: number of nodes keep_output: keep standard error, standard out, both, or neither o=std out, e=std err, oe=both, n=neither """ filenames=[] create_dir(jobs_dir) for command in commands: job_name = get_tmp_filename(tmp_dir=jobs_dir, prefix=job_prefix+"_", suffix = ".txt") out_fh = open(job_name,"w") out_fh.write(QSUB_TEXT % (walltime, ncpus, nodes, queue, job_prefix, keep_output, command)) out_fh.close() filenames.append(job_name) return filenames
def setUp(self): '''setup the files for testing pplacer''' # create a list of files to cleanup self._paths_to_clean_up = [] self._dirs_to_clean_up = [] # get a tmp filename to use self.basename = splitext(get_tmp_filename())[0] self.align_map = { 'seq0000005': 'Species005', 'seq0000004': 'Species004', 'seq0000007': 'Species007', 'seq0000006': 'Species006', 'seq0000001': 'Species001', 'seq0000003': 'Species003', 'seq0000002': 'Species002' } # create and write out RAxML stats file self.tmp_tree_fname = self.basename + '.tre' tree_out = open(self.tmp_tree_fname, 'w') tree_out.write(STARTING_TREE) tree_out.close() self._paths_to_clean_up.append(self.tmp_tree_fname)
def setUp(self): """Set up some test variables""" self.qiime_config = load_qiime_config() self.tmp_dir = self.qiime_config['temp_dir'] or '/tmp/' self.input_file = get_tmp_filename(tmp_dir=self.tmp_dir) self.support_lines = support_lines.splitlines() self._paths_to_clean_up = []
def test_split_fasta_diff_num_seqs_per_file_alt(self): """split_fasta funcs always catches all seqs """ # start with 59 seqs (b/c it's prime, so should make more # confusing splits) in_seqs = LoadSeqs(data=[('seq%s' % k,'AACCTTAA') for k in range(59)]) infile = in_seqs.toFasta().split('\n') # test seqs_per_file from 1 to 1000 for i in range(1,1000): filename_prefix = get_tmp_filename(tmp_dir=get_qiime_temp_dir(), prefix='split_fasta_tests', suffix='', result_constructor=str) actual = split_fasta(infile, i, filename_prefix) actual_seqs = [] for fp in actual: actual_seqs += list(open(fp)) # remove the files now, so if the test fails they still get # cleaned up remove_files(actual) # building seq collections from infile and the split files result in # equivalent seq collections self.assertEqual(\ LoadSeqs(data=infile,aligned=False),\ LoadSeqs(data=actual_seqs,aligned=False))
def test_call_ref_only(self): """ReferenceRepSetPicker.__call__ functions with no non-refseqs""" tmp_otu_filepath = get_tmp_filename( prefix='ReferenceRepSetPickerTest_', suffix='.otu') otu_file = open(tmp_otu_filepath, 'w') otu_file.write(otus_all_ref) otu_file.close() self.files_to_remove.append(tmp_otu_filepath) exp = { 'ref1': ('ref1', 'GGGGGGGAAAAAAAAAAAAA'), 'ref0': ('ref0', 'CCCAAAAAAATTTTTT') } # passing only reference (not input seqs) app = ReferenceRepSetPicker(params={ 'Algorithm': 'first', 'ChoiceF': first_id }) obs = app(None, tmp_otu_filepath, self.ref_seq_filepath) self.assertEqual(obs, exp) # passing reference and input seqs app = ReferenceRepSetPicker(params={ 'Algorithm': 'first', 'ChoiceF': first_id }) obs = app(self.tmp_seq_filepath, tmp_otu_filepath, self.ref_seq_filepath) self.assertEqual(obs, exp)
def test_single_file_nj(self): """ single_file_nj should throw no errors""" titles = ['hi','ho','yo'] distdata = numpy.array([[0,.5,.3],[.5,0.,.9],[.3,.9,0.]]) fname = get_tmp_filename(prefix='nj_',suffix='.txt') f = open(fname,'w') self._paths_to_clean_up.append(fname) f.write(format_distance_matrix(titles, distdata)) f.close() fname2 = get_tmp_filename(prefix='nj_',suffix='.txt', result_constructor=str) self._paths_to_clean_up.append(fname2) single_file_nj(fname,fname2) assert(os.path.exists(fname2))
def setUp(self): self.input_fp = get_tmp_filename(\ prefix='CogentAlignerTests_',suffix='.fasta') open(self.input_fp, 'w').write(seqs_for_muscle) self._paths_to_clean_up =\ [self.input_fp] self.muscle_module = alignment_module_names['muscle']
def setUp(self): """ """ self.files_to_remove = [] self.dirs_to_remove = [] tmp_dir = get_qiime_temp_dir() self.test_out = get_tmp_filename(tmp_dir=tmp_dir, prefix='qiime_parallel_tests_', suffix='', result_constructor=str) self.dirs_to_remove.append(self.test_out) create_dir(self.test_out) self.refseqs1_fp = get_tmp_filename(tmp_dir=self.test_out, prefix='qiime_refseqs', suffix='.fasta') refseqs1_f = open(self.refseqs1_fp, 'w') refseqs1_f.write(refseqs1) refseqs1_f.close() self.files_to_remove.append(self.refseqs1_fp) self.refseqs2_fp = get_tmp_filename(tmp_dir=self.test_out, prefix='qiime_refseqs', suffix='.fasta') refseqs2_f = open(self.refseqs2_fp, 'w') refseqs2_f.write(refseqs2) refseqs2_f.close() self.files_to_remove.append(self.refseqs2_fp) self.inseqs1_fp = get_tmp_filename(tmp_dir=self.test_out, prefix='qiime_inseqs', suffix='.fasta') inseqs1_f = open(self.inseqs1_fp, 'w') inseqs1_f.write(inseqs1) inseqs1_f.close() self.files_to_remove.append(self.inseqs1_fp) self.inseqs2_fp = get_tmp_filename(tmp_dir=self.test_out, prefix='qiime_inseqs', suffix='.fasta') inseqs2_f = open(self.inseqs2_fp, 'w') inseqs2_f.write(inseqs2) inseqs2_f.close() self.files_to_remove.append(self.inseqs2_fp) initiate_timeout(60)
def setUp(self): """ """ self.files_to_remove = [] self.dirs_to_remove = [] tmp_dir = get_qiime_temp_dir() self.test_out = get_tmp_filename( tmp_dir=tmp_dir, prefix='qiime_parallel_taxonomy_assigner_tests_', suffix='', result_constructor=str) self.dirs_to_remove.append(self.test_out) create_dir(self.test_out) # Temporary input file self.tmp_seq_filepath = get_tmp_filename( tmp_dir=self.test_out, prefix='qiime_parallel_taxonomy_assigner_tests_input', suffix='.fasta') seq_file = open(self.tmp_seq_filepath, 'w') seq_file.write(rdp_test_seqs) seq_file.close() self.files_to_remove.append(self.tmp_seq_filepath) self.id_to_taxonomy_file = NamedTemporaryFile( prefix='qiime_parallel_taxonomy_assigner_tests_id_to_taxonomy', suffix='.txt', dir=tmp_dir) self.id_to_taxonomy_file.write(rdp_id_to_taxonomy) self.id_to_taxonomy_file.seek(0) self.reference_seqs_file = NamedTemporaryFile( prefix='qiime_parallel_taxonomy_assigner_tests_ref_seqs', suffix='.fasta', dir=tmp_dir) self.reference_seqs_file.write(rdp_reference_seqs) self.reference_seqs_file.seek(0) jar_fp = getenv('RDP_JAR_PATH') jar_basename = basename(jar_fp) if '2.2' not in jar_basename: raise ApplicationError( "RDP_JAR_PATH does not point to version 2.2 of the " "RDP Classifier.") initiate_timeout(60)
def setUp(self): self.home = environ['HOME'] self.server_socket = None self.tmp_result_file = get_tmp_filename(tmp_dir=self.home, prefix="/test_hello_", suffix=".txt") self.tmp_dir = get_tmp_filename(tmp_dir=self.home, prefix="test_cluster_util", suffix="/") self.files_to_remove = [self.tmp_result_file] self.command = "echo hello > %s\n" % self.tmp_result_file signal.signal(signal.SIGALRM, timeout) # set the 'alarm' to go off in allowed_seconds seconds signal.alarm(allowed_seconds_per_test)
def setUp(self): # create the temporary input files self.tmp_seq_filepath = get_tmp_filename( prefix='GenericRepSetPickerTest_', suffix='.fasta') seq_file = open(self.tmp_seq_filepath, 'w') seq_file.write(dna_seqs) seq_file.close() self.tmp_otu_filepath = get_tmp_filename( prefix='GenericRepSetPickerTest_', suffix='.otu') otu_file = open(self.tmp_otu_filepath, 'w') otu_file.write(otus) otu_file.close() self.files_to_remove = [self.tmp_seq_filepath, self.tmp_otu_filepath] self.params = {'Algorithm': 'first', 'ChoiceF': first_id}
def setUp(self): """ """ self.files_to_remove = [] tmp_dir = get_qiime_temp_dir() self.test_fp = get_tmp_filename(tmp_dir=tmp_dir, prefix='bufWriterTest', suffix='.txt') self.files_to_remove.append(self.test_fp)