示例#1
0
def align_reference_to_contigs( locus, reference, contig_file ):
    output_file = 'HLA-%s.m1' % locus
    blasr_args = {'nproc': 8,
                  'out': output_file,
                  'bestn': 1,
                  'noSplitSubreads': True}
    run_blasr( reference, contig_file, blasr_args )
    check_output_file( output_file )
    return output_file
示例#2
0
def align_amplicons( filetype, sequence_5p, sequence_3p ):
    blasr_args = {'bestn': 1,
                  'out': 'test.m5',
                  'm': 5,
                  'noSplitSubreads': True}
    if filetype == 'fastq':
        temp_5p = write_temp_fasta( sequence_5p )
        temp_3p = write_temp_fasta( sequence_3p )
        align_left = run_blasr( temp_5p.name, temp_3p.name, blasr_args, verbose=True )
    elif filetype == 'fasta':
        assert fasta_size( sequence_5p ) == 2
        assert fasta_size( sequence_3p ) == 2
        align_left = run_blasr( sequence_5p, sequence_3p, blasr_args )
    else:
        raise ValueError
    return align_left
 def align_subreads( self, white_list, reference_file ):
     """
     Align the subreads in a Whitelist to the created reference
     """
     basename = '.'.join( reference_file.split('.')[:-1] )
     alignment_file = '%s.m1' % basename
     reference_count = fasta_size( reference_file )
     blasr_args = { 'nproc': self._nproc,
                    'out': alignment_file,
                    'bestn': 1,
                    'nCandidates': reference_count,
                    'noSplitSubreads': True }
     run_blasr( white_list,
                reference_file,
                blasr_args )
     check_output_file( alignment_file )
     return alignment_file
示例#4
0
def _align_fasta( query, reference, format ):
    """
    Align a single query sequence to all valid references
    """
    suffix = '.m%s' % format
    temp_align = tempfile.NamedTemporaryFile( suffix=suffix, delete=False )
    reference_count = fasta_size( reference )
    blasr_args = {'nproc': NPROC,
                  'out': temp_align.name,
                  'bestn': reference_count,
                  'nCandidates': reference_count,
                  'm': format,
                  'noSplitSubreads': True}
    run_blasr( query, reference, blasr_args )
    # Parse the output for return and delete the file
    alignments = list( BlasrReader( temp_align.name ))
    os.unlink( temp_align.name )
    return alignments
示例#5
0
def align_amplicons(sequence_file, reference):
    sequence_root = '.'.join(sequence_file.split('.')[:-1])
    output_file = sequence_root + '.m5'
    blasr_args = {
        'bestn': 1,
        'out': output_file,
        'm': 5,
        'noSplitSubreads': True
    }
    return run_blasr(sequence_file, reference, blasr_args, verbose=True)
示例#6
0
def _align_subreads( subread_fasta, reference_fasta, locus ):
    """
    Align all locus-specific subreads against the appropriate references
    """
    location = os.path.dirname( subread_fasta )
    alignment_file = os.path.join(location, 'temp.m1')
    subread_count = fasta_size( subread_fasta )
    reference_count = fasta_size( reference_fasta )
    blasr_args = {'nproc': 8,
                  'out': alignment_file,
                  'bestn': 1,
                  'nCandidates': reference_count,
                  'noSplitSubreads': True}
    log.info("Aligning %s reads against %s references for %s" % (subread_count, 
                                                                 reference_count,
                                                                 locus))
    run_blasr( subread_fasta, reference_fasta, blasr_args )
    check_output_file( alignment_file )
    return alignment_file
示例#7
0
def _align_fasta(query, reference, format):
    """
    Align a single query sequence to all valid references
    """
    suffix = '.m%s' % format
    temp_align = tempfile.NamedTemporaryFile(suffix=suffix, delete=False)
    reference_count = fasta_size(reference)
    blasr_args = {
        'nproc': NPROC,
        'out': temp_align.name,
        'bestn': reference_count,
        'nCandidates': reference_count,
        'm': format,
        'noSplitSubreads': True
    }
    run_blasr(query, reference, blasr_args)
    # Parse the output for return and delete the file
    alignments = list(BlasrReader(temp_align.name))
    os.unlink(temp_align.name)
    return alignments
示例#8
0
def _align_subreads(subread_fasta, reference_fasta, locus):
    """
    Align all locus-specific subreads against the appropriate references
    """
    location = os.path.dirname(subread_fasta)
    alignment_file = os.path.join(location, 'temp.m1')
    subread_count = fasta_size(subread_fasta)
    reference_count = fasta_size(reference_fasta)
    blasr_args = {
        'nproc': 8,
        'out': alignment_file,
        'bestn': 1,
        'nCandidates': reference_count,
        'noSplitSubreads': True
    }
    log.info("Aligning %s reads against %s references for %s" %
             (subread_count, reference_count, locus))
    run_blasr(subread_fasta, reference_fasta, blasr_args)
    check_output_file(alignment_file)
    return alignment_file
示例#9
0
def align_best_reference(query, reference, output=None):
    """
    Align the output of AA to the references and return
    """
    output = _get_output_file(query, output, 'm1')
    # Run Blasr
    ref_count = fasta_size(reference)
    log.info("Aligning %s sequences to %s references" % (query, ref_count))
    blasr_args = {'nproc': nproc,
                  'out': output,
                  'bestn': 1,
                  'nCandidates': ref_count,
                  'noSplitSubreads': True}
    if reference_has_index( reference ):
        blasr_args['sa'] = reference + '.sa'
    run_blasr(query, reference, blasr_args)
    # Check the output file
    if valid_file( output ):
        return output
    return None
示例#10
0
def align_amplicons(filetype, sequence_5p, sequence_3p):
    blasr_args = {
        'bestn': 1,
        'out': 'test.m5',
        'm': 5,
        'noSplitSubreads': True
    }
    if filetype == 'fastq':
        temp_5p = write_temp_fasta(sequence_5p)
        temp_3p = write_temp_fasta(sequence_3p)
        align_left = run_blasr(temp_5p.name,
                               temp_3p.name,
                               blasr_args,
                               verbose=True)
    elif filetype == 'fasta':
        assert fasta_size(sequence_5p) == 2
        assert fasta_size(sequence_3p) == 2
        align_left = run_blasr(sequence_5p, sequence_3p, blasr_args)
    else:
        raise ValueError
    return align_left
示例#11
0
def full_align_best_reference(query, reference, output=None):
    """
    Align the output of AA to the references and return
    """
    # Figure out the output and remove it if it exists
    output = _get_output_file(query, output, 'm5')
    # Run Blasr
    ref_count = fasta_size(reference)
    log.info("Aligning %s sequences to %s references" % (query, ref_count))
    blasr_args = {'nproc': nproc,
                  'out': output,
                  'm': 5,
                  'bestn': 1,
                  'nCandidates': ref_count,
                  'noSplitSubreads': True}
    if reference_has_index( reference ):
        blasr_args['sa'] = reference + '.sa'
    run_blasr(query, reference, blasr_args)
    # Check the output file
    check_output_file(output)
    return output
示例#12
0
def _align_exons( query, exon_fasta, directory ):
    """
    Align all supplied exon sequences to a "Query" Fasta
    """
    exon_num = exon_fasta[-7]
    alignment_file = 'exon%s.m1' % exon_num
    alignment_path = os.path.join( directory, alignment_file )
    blasr_args = {'nproc': NPROC,
                  'out': alignment_path,
                  'minSubreadLength': 20,
                  'minReadLength': 20,
                  'maxScore': 0,
                  'bestn': 1,
                  'noSplitSubreads': True}
    run_blasr( exon_fasta,
               query,
               blasr_args )
    count = count_hits( alignment_path )
    if count > 0:
        log.info('Found %s hits for Exon #%s' % (count, exon_num))
        return alignment_path
    log.info('No hits found for Exon #%s' % exon_num)
    return None
示例#13
0
def _align_exons(query, exon_fasta, directory):
    """
    Align all supplied exon sequences to a "Query" Fasta
    """
    exon_num = exon_fasta[-7]
    alignment_file = 'exon%s.m1' % exon_num
    alignment_path = os.path.join(directory, alignment_file)
    blasr_args = {
        'nproc': NPROC,
        'out': alignment_path,
        'minSubreadLength': 20,
        'minReadLength': 20,
        'maxScore': 0,
        'bestn': 1,
        'noSplitSubreads': True
    }
    run_blasr(exon_fasta, query, blasr_args)
    count = count_hits(alignment_path)
    if count > 0:
        log.info('Found %s hits for Exon #%s' % (count, exon_num))
        return alignment_path
    log.info('No hits found for Exon #%s' % exon_num)
    return None
示例#14
0
def align_amplicons(sequence_file, reference):
    sequence_root = ".".join(sequence_file.split(".")[:-1])
    output_file = sequence_root + ".m5"
    blasr_args = {"bestn": 1, "out": output_file, "m": 5, "noSplitSubreads": True}
    return run_blasr(sequence_file, reference, blasr_args, verbose=True)