Python MinHash.build_reference示例

# Build the index
import subprocess
import os
FNULL = open(os.devnull, 'w')
ind = 1392
index_dir = "/nfs1/Koslicki_Lab/koslickd/MinHash/Out/Temp/SNAP"
out_dir = "/nfs1/Koslicki_Lab/koslickd/MinHash/Out/Temp/"
sam_out_file = os.path.join(out_dir, "out.sam")
sample_file = "/nfs1/Koslicki_Lab/koslickd/MinHash/Data/SRR172902.fastq"
# reference_file = CEs[ind].input_file_name # Until I get snap on math0, or scipy on math1
reference_file = "/nfs1/Koslicki_Lab/koslickd/CommonKmers/TrainingOnRepoPhlAnPython/TrainDataIn/Genomes/G000008565.fna"
fastq_out_basename = os.path.join(
    out_dir,
    os.path.basename(sample_file) + "_" + os.path.basename(reference_file))

MH.build_reference(reference_file, index_dir)
MH.align_reads(index_dir, sample_file, sam_out_file, filt='aligned')
MH.sam2fastq(sam_out_file, fastq_out_basename + "_aligned.fastq")
MH.align_reads(index_dir, sample_file, sam_out_file, filt='unaligned')
MH.sam2fastq(sam_out_file, fastq_out_basename + "_unaligned.fastq")
os.remove(sam_out_file)

# More efficient thing to do is figure out how to get the output SAM to split over aligned and unaligned, rather than reading it twice...
# Since snap-aligner also accepts streaming inputs, if I didn't care to save the aligned reads, I could just chain a bunch of calls to it together with different references!

###################################
# Test packaged top down approach
import sys
sys.path.append(
    '/nfs1/Koslicki_Lab/koslickd/Repositories/MinHashMetagenomics/src/')
import os

示例#2

显示文件

文件： MockCommunity.py 项目： dkoslicki/MinHashMetagenomics

# Build the index
import subprocess
import os
FNULL = open(os.devnull, 'w')
ind = 1392
index_dir = "/nfs1/Koslicki_Lab/koslickd/MinHash/Out/Temp/SNAP"
out_dir = "/nfs1/Koslicki_Lab/koslickd/MinHash/Out/Temp/"
sam_out_file = os.path.join(out_dir, "out.sam")
sample_file = "/nfs1/Koslicki_Lab/koslickd/MinHash/Data/SRR172902.fastq"
# reference_file = CEs[ind].input_file_name # Until I get snap on math0, or scipy on math1
reference_file = "/nfs1/Koslicki_Lab/koslickd/CommonKmers/TrainingOnRepoPhlAnPython/TrainDataIn/Genomes/G000008565.fna"
fastq_out_basename = os.path.join(out_dir, os.path.basename(sample_file) + "_" + os.path.basename(reference_file))



MH.build_reference(reference_file, index_dir)
MH.align_reads(index_dir, sample_file, sam_out_file, filt='aligned')
MH.sam2fastq(sam_out_file, fastq_out_basename + "_aligned.fastq")
MH.align_reads(index_dir, sample_file, sam_out_file, filt='unaligned')
MH.sam2fastq(sam_out_file, fastq_out_basename + "_unaligned.fastq")
os.remove(sam_out_file)

# More efficient thing to do is figure out how to get the output SAM to split over aligned and unaligned, rather than reading it twice...
# Since snap-aligner also accepts streaming inputs, if I didn't care to save the aligned reads, I could just chain a bunch of calls to it together with different references!

###################################
# Test packaged top down approach
import sys
sys.path.append('/nfs1/Koslicki_Lab/koslickd/Repositories/MinHashMetagenomics/src/')
import os
import MinHash as MH