def test_run_process_fasta_through_split_lib(self):
        """run_run_process_fasta_through_split_lib runs without error"""

        self.files_to_remove.append(join(self.wf_out, "fasta_mapping_file.txt"))

        # process the sequence data
        run_process_fasta_through_split_lib(
            0,
            "Fasting_subset",
            input_fp=",".join(self.fasta_fps),
            mapping_fp=self.fasta_map_fp,
            output_dir=self.wf_out,
            command_handler=call_commands_serially,
            params=self.params,
            qiime_config=self.qiime_config,
            write_to_all_fasta=False,
            status_update_callback=no_status_updates,
        )

        # get the file basename
        input_file_basename = splitext(split(self.sff_fp)[1])[0]

        # get the split-library sequence fpath
        split_lib_seqs_fp = join(self.wf_out, "split_libraries", "seqs.fna")

        # get the head of files
        split_lib_head = get_top_fastq_two_lines(open(split_lib_seqs_fp, "U"))

        split_lib_seqs_only = [split_lib_head[1], split_lib_head[3]]

        # check results
        self.assertEqual("".join(split_lib_seqs_only), exp_fasta_split_lib_seqs_only)

        # Check that the log file is created and has size > 0
        log_fp = glob(join(self.wf_out, "log*.txt"))[0]
        self.assertTrue(getsize(log_fp) > 0)
    def test_submit_processed_data_to_db_fasta(self):
           """submit_processed_data_to_db_fasta runs without error"""

           run_process_fasta_through_split_lib(0,'Fasting_subset',\
            input_fp=','.join(self.fasta_fps),\
            mapping_fp=self.fasta_map_fp,\
            output_dir=self.wf_out, \
            command_handler=call_commands_serially,\
            params=self.params,\
            qiime_config=self.qiime_config,\
            write_to_all_fasta=False,\
            status_update_callback=no_status_updates)


           input_file_basename = splitext(split(self.sff_fp)[1])[0]
           otu_fp = join(self.wf_out,'picked_otus','seqs_otus.txt')
           split_lib_seqs_fp = join(self.wf_out,'split_libraries',\
                                    'seqs.fna')

           run_chain_pick_otus(split_lib_seqs_fp,
                               output_dir=self.gg_out,
                               command_handler=call_commands_serially,
                               params=self.params,
                               qiime_config=self.qiime_config,parallel=True,
                               status_update_callback=no_status_updates)

           input_fname = splitext(split(self.sff_fp)[-1])[0]
           db_input_fp = join(self.wf_out,input_fname)


           analysis_id=submit_fasta_and_split_lib(data_access,
                                                ','.join(self.fasta_fps),
                                                0, self.wf_out)

           load_otu_mapping(data_access,self.wf_out,analysis_id)

           print 'Analysis ID is: %s' % str(analysis_id)
           print 'Testing the SEQ_RUN loading!'
           exp_sff_md5=['412eee0be168a285415d9e4db3dbbf2f']
           exp_num_seqs=22

           exp_instr_code='FASTA'
           exp_sff_fname=['test_split_lib_seqs']
           con = data_access.getSFFDatabaseConnection()
           cur = con.cursor()
           seq_run_info="""select j.seq_run_id,f.sff_filename,f.number_of_reads,f.md5_checksum,
                 h.instrument_code
                 from analysis j
                 inner join seq_run_to_sff_file s on j.seq_run_id=s.seq_run_id
                 inner join sff_file f on f.sff_file_id=s.sff_file_id
                 inner join split_library_read_map slrm on j.seq_run_id=slrm.seq_run_id
                 inner join sequencing_run h on h.seq_run_id=s.seq_run_id"""
           seq_run_info+=" where j.analysis_id=%s and slrm.sample_name=\'test.PCx354.281526\'" % (str(analysis_id))
           results = cur.execute(seq_run_info)

           #print 'Calling getTestFlowData...'
           for data in results:
               obs_seq_run_id,obs_sff_filename,obs_num_of_reads,obs_sff_md5,\
               obs_instrument_code = data

           print 'After getTestSeqRunData...'

           self.assertTrue(obs_sff_filename in exp_sff_fname)
           self.assertEqual(obs_num_of_reads,exp_num_seqs)
           self.assertTrue(obs_sff_md5 in exp_sff_md5)
           self.assertEqual(obs_instrument_code,exp_instr_code)

           print 'Done testing SEQ_RUN!'

           print 'Testing Split-Library Data'
           exp_split_lib_seq='TTGGGCCGTGTCTCAGTCCCAATGTGGCCGATCAGTCTCTTAACTCGGCTATGCATCATTGCCTTGGTAAGCCGTTACCTTACCAACTAGCTAATGCACCGCAGGTCCATCCAAGAGTGATAGCAGAACCATCTTTCAAACTCTAGACATGCGTCTAGTGTTGTTATCCGGTATTAGCATCTGTTTCCAGGTGTTATCCCAGTCTCTTGGG'
           exp_split_lib_md5='412eee0be168a285415d9e4db3dbbf2f'
           exp_split_lib_seq_md5='59843d3394983f2caa26f583014a3389'

           split_lib_info="""select distinct j.seq_run_id,slrm.ssu_sequence_id,l.command,l.md5_checksum,
                 s.sequence_string,s.md5_checksum
                 from analysis j
                 inner join split_library_read_map slrm on j.seq_run_id=slrm.seq_run_id and j.split_library_run_id=slrm.split_library_run_id
                 inner join ssu_sequence s on slrm.ssu_sequence_id=s.ssu_sequence_id
                 inner join split_library_run l on j.split_library_run_id=l.split_library_run_id"""
           split_lib_info+=" where j.analysis_id=%s and slrm.sample_name=\'test.PCx354.281526\'" % (str(analysis_id))

           results = cur.execute(split_lib_info)

           #print 'Calling getTestFlowData...'
           for data in results:
               obs_seq_run_id,obs_ssu_seq_id,obs_split_lib_cmd,obs_split_lib_md5,\
               obs_split_lib_seq,obs_split_lib_seq_md5 = data

           self.assertEqual(obs_split_lib_md5,exp_split_lib_md5)
           self.assertEqual(obs_split_lib_seq,exp_split_lib_seq)
           self.assertEqual(obs_split_lib_seq_md5,exp_split_lib_seq_md5)

           print 'Testing OTU Data!'

           #exp_prokmsa=97550
           exp_otu_md5='cec9b6c184ffdb12d9de4450034ab775'
           exp_threshold=97

           otu_info="""select distinct j.seq_run_id,slrm.ssu_sequence_id,ot.reference_id,gr.ssu_sequence_id,
               ot.reference_id,j.otu_picking_run_id,p.command,p.md5_sum_input_file,
               p.threshold
               from analysis j
               inner join split_library_read_map slrm on j.seq_run_id=slrm.seq_run_id and j.split_library_run_id=slrm.split_library_run_id
               inner join otu_table ot on j.otu_run_set_id=ot.otu_run_set_id
               inner join gg_plus_denovo_reference gr on ot.reference_id=gr.reference_id
               inner join otu_picking_run p on j.otu_picking_run_id=p.otu_picking_run_id"""
           otu_info+=" where j.analysis_id=%s and slrm.sample_name=\'test.PCx354.281526\'" % (str(analysis_id))

           results = cur.execute(otu_info)

           for data in results:
               obs_seq_run_id,obs_ssu_seq_id,obs_otu_id,obs_otu_ssu_id,\
               obs_prokmsa,obs_otu_picking_run_id,obs_pick_otu_cmd,\
               obs_otu_md5,obs_threshold = data

           #self.assertEqual(obs_prokmsa,exp_prokmsa)
           self.assertEqual(obs_otu_md5,exp_otu_md5)
           self.assertEqual(obs_threshold,exp_threshold)

           otu_fail_info="""select distinct j.seq_run_id,f.ssu_sequence_id
                 from analysis j
                 inner join split_library_read_map slrm on j.seq_run_id=slrm.seq_run_id
                 inner join otu_picking_failures f on slrm.ssu_sequence_id=f.ssu_sequence_id"""
           otu_fail_info+=" where j.analysis_id=%s and slrm.sample_name=\'test.PCx635.281531\'" % (str(analysis_id))

           results = cur.execute(otu_fail_info)

           for data in results:
               obs_seq_run_id,obs_ssu_id= data


           self.failIfEqual(obs_seq_run_id,0)
           self.failIfEqual(obs_ssu_id,0)

           valid=data_access.deleteTestAnalysis(True,analysis_id)
           if not valid:
               print "Error: Could not delete data from DB!"