# Build the index import subprocess import os FNULL = open(os.devnull, 'w') ind = 1392 index_dir = "/nfs1/Koslicki_Lab/koslickd/MinHash/Out/Temp/SNAP" out_dir = "/nfs1/Koslicki_Lab/koslickd/MinHash/Out/Temp/" sam_out_file = os.path.join(out_dir, "out.sam") sample_file = "/nfs1/Koslicki_Lab/koslickd/MinHash/Data/SRR172902.fastq" # reference_file = CEs[ind].input_file_name # Until I get snap on math0, or scipy on math1 reference_file = "/nfs1/Koslicki_Lab/koslickd/CommonKmers/TrainingOnRepoPhlAnPython/TrainDataIn/Genomes/G000008565.fna" fastq_out_basename = os.path.join( out_dir, os.path.basename(sample_file) + "_" + os.path.basename(reference_file)) MH.build_reference(reference_file, index_dir) MH.align_reads(index_dir, sample_file, sam_out_file, filt='aligned') MH.sam2fastq(sam_out_file, fastq_out_basename + "_aligned.fastq") MH.align_reads(index_dir, sample_file, sam_out_file, filt='unaligned') MH.sam2fastq(sam_out_file, fastq_out_basename + "_unaligned.fastq") os.remove(sam_out_file) # More efficient thing to do is figure out how to get the output SAM to split over aligned and unaligned, rather than reading it twice... # Since snap-aligner also accepts streaming inputs, if I didn't care to save the aligned reads, I could just chain a bunch of calls to it together with different references! ################################### # Test packaged top down approach import sys sys.path.append( '/nfs1/Koslicki_Lab/koslickd/Repositories/MinHashMetagenomics/src/') import os
# Build the index import subprocess import os FNULL = open(os.devnull, 'w') ind = 1392 index_dir = "/nfs1/Koslicki_Lab/koslickd/MinHash/Out/Temp/SNAP" out_dir = "/nfs1/Koslicki_Lab/koslickd/MinHash/Out/Temp/" sam_out_file = os.path.join(out_dir, "out.sam") sample_file = "/nfs1/Koslicki_Lab/koslickd/MinHash/Data/SRR172902.fastq" # reference_file = CEs[ind].input_file_name # Until I get snap on math0, or scipy on math1 reference_file = "/nfs1/Koslicki_Lab/koslickd/CommonKmers/TrainingOnRepoPhlAnPython/TrainDataIn/Genomes/G000008565.fna" fastq_out_basename = os.path.join(out_dir, os.path.basename(sample_file) + "_" + os.path.basename(reference_file)) MH.build_reference(reference_file, index_dir) MH.align_reads(index_dir, sample_file, sam_out_file, filt='aligned') MH.sam2fastq(sam_out_file, fastq_out_basename + "_aligned.fastq") MH.align_reads(index_dir, sample_file, sam_out_file, filt='unaligned') MH.sam2fastq(sam_out_file, fastq_out_basename + "_unaligned.fastq") os.remove(sam_out_file) # More efficient thing to do is figure out how to get the output SAM to split over aligned and unaligned, rather than reading it twice... # Since snap-aligner also accepts streaming inputs, if I didn't care to save the aligned reads, I could just chain a bunch of calls to it together with different references! ################################### # Test packaged top down approach import sys sys.path.append('/nfs1/Koslicki_Lab/koslickd/Repositories/MinHashMetagenomics/src/') import os import MinHash as MH