Exemplo n.º 1
0
Arquivo: bwa.py Projeto: jrherr/pyBWA
def compile_refs( refs ):
    '''
        Compile all given refs into a single file to be indexed

        @TODO -- Write tests

        @param refs - Directory/file of fasta formatted files
        @return path to concatted indexed reference file
    '''
    ref_files = []
    ref_extensions = ('.fa', '.fasta', '.fna', '.fas')

    if os.path.isdir( refs ):
        logger.info( "Compiling and concatting refs inside of {}".format(refs) )
        files = glob.glob( os.path.join( refs, '*' ) )
        logger.debug( "All files inside of {}: {}".format( files, refs ) )
        ref_files = [f for f in files if os.path.splitext(f)[1] in ref_extensions]
        logger.debug( "Filtering files down to only files with extensions in {}".format(ref_extensions) )
        logger.debug( "Filtered files to concat: {}".format( ref_files ) )
        try:
            seqio.concat_files( ref_files, 'reference.fa' )
        except (OSError,IOError,ValueError) as e:
            logger.error( "There was an error with the references in {}".format(refs) )
            logger.error( str( e ) )
            sys.exit(1)
        return 'reference.fa'
    else:
        return refs
Exemplo n.º 2
0
Arquivo: bwa.py Projeto: jrherr/pyBWA
def compile_reads( reads, outputfile='reads.fastq' ):
    '''
        Compile all given reads from directory of reads or just return reads if it is fastq
        If reads is sff file then convert to fastq

        @param reads - Directory/file of .fastq or .sff
        @param outputfile - File path of single fastq file output
        @return fastq with all reads from reads
    '''
    if os.path.isdir( reads ):
        reads = seqio.get_reads( reads )
    elif isinstance( reads, str ):
        # Single read file given
        if os.path.splitext( reads )[1] == '.sff':
            # Just convert the single reads
            return seqio.sffs_to_fastq( [reads], outputfile )
        else:
            # Already fastq so nothing to do
            #  This is a bad assumption
            return reads
    
    # Empty read list
    if not len( reads ):
        return []

    # Get only sff files to convert
    sffs = fnmatch.filter( reads, '*.sff' )
    tmpsfffastq = None
    if len( sffs ):
        tmpsfffastq = os.path.join(
            os.path.dirname( outputfile ),
            'sff.' + os.path.basename( outputfile )
        )
        logger.info( "Concatting and Converting {} to fastq".format(sffs) )
        sfffastq = [seqio.sffs_to_fastq( sffs, tmpsfffastq )]
    else:
        sfffastq = []

    fastqs = fnmatch.filter( reads, '*.fastq' )
    # Concat fastq files and sff converted fastq files into
    #  outputfile
    converts = fastqs + sfffastq
    logger.info( "Concatting {} to {}".format(
            converts, outputfile
        )
    )
    seqio.concat_files( fastqs + sfffastq, outputfile )
    if tmpsfffastq is not None:
        os.unlink( tmpsfffastq )
    return outputfile