def align_reference_to_contigs( locus, reference, contig_file ): output_file = 'HLA-%s.m1' % locus blasr_args = {'nproc': 8, 'out': output_file, 'bestn': 1, 'noSplitSubreads': True} run_blasr( reference, contig_file, blasr_args ) check_output_file( output_file ) return output_file
def align_amplicons( filetype, sequence_5p, sequence_3p ): blasr_args = {'bestn': 1, 'out': 'test.m5', 'm': 5, 'noSplitSubreads': True} if filetype == 'fastq': temp_5p = write_temp_fasta( sequence_5p ) temp_3p = write_temp_fasta( sequence_3p ) align_left = run_blasr( temp_5p.name, temp_3p.name, blasr_args, verbose=True ) elif filetype == 'fasta': assert fasta_size( sequence_5p ) == 2 assert fasta_size( sequence_3p ) == 2 align_left = run_blasr( sequence_5p, sequence_3p, blasr_args ) else: raise ValueError return align_left
def align_subreads( self, white_list, reference_file ): """ Align the subreads in a Whitelist to the created reference """ basename = '.'.join( reference_file.split('.')[:-1] ) alignment_file = '%s.m1' % basename reference_count = fasta_size( reference_file ) blasr_args = { 'nproc': self._nproc, 'out': alignment_file, 'bestn': 1, 'nCandidates': reference_count, 'noSplitSubreads': True } run_blasr( white_list, reference_file, blasr_args ) check_output_file( alignment_file ) return alignment_file
def _align_fasta( query, reference, format ): """ Align a single query sequence to all valid references """ suffix = '.m%s' % format temp_align = tempfile.NamedTemporaryFile( suffix=suffix, delete=False ) reference_count = fasta_size( reference ) blasr_args = {'nproc': NPROC, 'out': temp_align.name, 'bestn': reference_count, 'nCandidates': reference_count, 'm': format, 'noSplitSubreads': True} run_blasr( query, reference, blasr_args ) # Parse the output for return and delete the file alignments = list( BlasrReader( temp_align.name )) os.unlink( temp_align.name ) return alignments
def align_amplicons(sequence_file, reference): sequence_root = '.'.join(sequence_file.split('.')[:-1]) output_file = sequence_root + '.m5' blasr_args = { 'bestn': 1, 'out': output_file, 'm': 5, 'noSplitSubreads': True } return run_blasr(sequence_file, reference, blasr_args, verbose=True)
def _align_subreads( subread_fasta, reference_fasta, locus ): """ Align all locus-specific subreads against the appropriate references """ location = os.path.dirname( subread_fasta ) alignment_file = os.path.join(location, 'temp.m1') subread_count = fasta_size( subread_fasta ) reference_count = fasta_size( reference_fasta ) blasr_args = {'nproc': 8, 'out': alignment_file, 'bestn': 1, 'nCandidates': reference_count, 'noSplitSubreads': True} log.info("Aligning %s reads against %s references for %s" % (subread_count, reference_count, locus)) run_blasr( subread_fasta, reference_fasta, blasr_args ) check_output_file( alignment_file ) return alignment_file
def _align_fasta(query, reference, format): """ Align a single query sequence to all valid references """ suffix = '.m%s' % format temp_align = tempfile.NamedTemporaryFile(suffix=suffix, delete=False) reference_count = fasta_size(reference) blasr_args = { 'nproc': NPROC, 'out': temp_align.name, 'bestn': reference_count, 'nCandidates': reference_count, 'm': format, 'noSplitSubreads': True } run_blasr(query, reference, blasr_args) # Parse the output for return and delete the file alignments = list(BlasrReader(temp_align.name)) os.unlink(temp_align.name) return alignments
def _align_subreads(subread_fasta, reference_fasta, locus): """ Align all locus-specific subreads against the appropriate references """ location = os.path.dirname(subread_fasta) alignment_file = os.path.join(location, 'temp.m1') subread_count = fasta_size(subread_fasta) reference_count = fasta_size(reference_fasta) blasr_args = { 'nproc': 8, 'out': alignment_file, 'bestn': 1, 'nCandidates': reference_count, 'noSplitSubreads': True } log.info("Aligning %s reads against %s references for %s" % (subread_count, reference_count, locus)) run_blasr(subread_fasta, reference_fasta, blasr_args) check_output_file(alignment_file) return alignment_file
def align_best_reference(query, reference, output=None): """ Align the output of AA to the references and return """ output = _get_output_file(query, output, 'm1') # Run Blasr ref_count = fasta_size(reference) log.info("Aligning %s sequences to %s references" % (query, ref_count)) blasr_args = {'nproc': nproc, 'out': output, 'bestn': 1, 'nCandidates': ref_count, 'noSplitSubreads': True} if reference_has_index( reference ): blasr_args['sa'] = reference + '.sa' run_blasr(query, reference, blasr_args) # Check the output file if valid_file( output ): return output return None
def align_amplicons(filetype, sequence_5p, sequence_3p): blasr_args = { 'bestn': 1, 'out': 'test.m5', 'm': 5, 'noSplitSubreads': True } if filetype == 'fastq': temp_5p = write_temp_fasta(sequence_5p) temp_3p = write_temp_fasta(sequence_3p) align_left = run_blasr(temp_5p.name, temp_3p.name, blasr_args, verbose=True) elif filetype == 'fasta': assert fasta_size(sequence_5p) == 2 assert fasta_size(sequence_3p) == 2 align_left = run_blasr(sequence_5p, sequence_3p, blasr_args) else: raise ValueError return align_left
def full_align_best_reference(query, reference, output=None): """ Align the output of AA to the references and return """ # Figure out the output and remove it if it exists output = _get_output_file(query, output, 'm5') # Run Blasr ref_count = fasta_size(reference) log.info("Aligning %s sequences to %s references" % (query, ref_count)) blasr_args = {'nproc': nproc, 'out': output, 'm': 5, 'bestn': 1, 'nCandidates': ref_count, 'noSplitSubreads': True} if reference_has_index( reference ): blasr_args['sa'] = reference + '.sa' run_blasr(query, reference, blasr_args) # Check the output file check_output_file(output) return output
def _align_exons( query, exon_fasta, directory ): """ Align all supplied exon sequences to a "Query" Fasta """ exon_num = exon_fasta[-7] alignment_file = 'exon%s.m1' % exon_num alignment_path = os.path.join( directory, alignment_file ) blasr_args = {'nproc': NPROC, 'out': alignment_path, 'minSubreadLength': 20, 'minReadLength': 20, 'maxScore': 0, 'bestn': 1, 'noSplitSubreads': True} run_blasr( exon_fasta, query, blasr_args ) count = count_hits( alignment_path ) if count > 0: log.info('Found %s hits for Exon #%s' % (count, exon_num)) return alignment_path log.info('No hits found for Exon #%s' % exon_num) return None
def _align_exons(query, exon_fasta, directory): """ Align all supplied exon sequences to a "Query" Fasta """ exon_num = exon_fasta[-7] alignment_file = 'exon%s.m1' % exon_num alignment_path = os.path.join(directory, alignment_file) blasr_args = { 'nproc': NPROC, 'out': alignment_path, 'minSubreadLength': 20, 'minReadLength': 20, 'maxScore': 0, 'bestn': 1, 'noSplitSubreads': True } run_blasr(exon_fasta, query, blasr_args) count = count_hits(alignment_path) if count > 0: log.info('Found %s hits for Exon #%s' % (count, exon_num)) return alignment_path log.info('No hits found for Exon #%s' % exon_num) return None
def align_amplicons(sequence_file, reference): sequence_root = ".".join(sequence_file.split(".")[:-1]) output_file = sequence_root + ".m5" blasr_args = {"bestn": 1, "out": output_file, "m": 5, "noSplitSubreads": True} return run_blasr(sequence_file, reference, blasr_args, verbose=True)