def compile_refs( refs ): ''' Compile all given refs into a single file to be indexed @TODO -- Write tests @param refs - Directory/file of fasta formatted files @return path to concatted indexed reference file ''' ref_files = [] ref_extensions = ('.fa', '.fasta', '.fna', '.fas') if os.path.isdir( refs ): logger.info( "Compiling and concatting refs inside of {}".format(refs) ) files = glob.glob( os.path.join( refs, '*' ) ) logger.debug( "All files inside of {}: {}".format( files, refs ) ) ref_files = [f for f in files if os.path.splitext(f)[1] in ref_extensions] logger.debug( "Filtering files down to only files with extensions in {}".format(ref_extensions) ) logger.debug( "Filtered files to concat: {}".format( ref_files ) ) try: seqio.concat_files( ref_files, 'reference.fa' ) except (OSError,IOError,ValueError) as e: logger.error( "There was an error with the references in {}".format(refs) ) logger.error( str( e ) ) sys.exit(1) return 'reference.fa' else: return refs
def compile_reads( reads, outputfile='reads.fastq' ): ''' Compile all given reads from directory of reads or just return reads if it is fastq If reads is sff file then convert to fastq @param reads - Directory/file of .fastq or .sff @param outputfile - File path of single fastq file output @return fastq with all reads from reads ''' if os.path.isdir( reads ): reads = seqio.get_reads( reads ) elif isinstance( reads, str ): # Single read file given if os.path.splitext( reads )[1] == '.sff': # Just convert the single reads return seqio.sffs_to_fastq( [reads], outputfile ) else: # Already fastq so nothing to do # This is a bad assumption return reads # Empty read list if not len( reads ): return [] # Get only sff files to convert sffs = fnmatch.filter( reads, '*.sff' ) tmpsfffastq = None if len( sffs ): tmpsfffastq = os.path.join( os.path.dirname( outputfile ), 'sff.' + os.path.basename( outputfile ) ) logger.info( "Concatting and Converting {} to fastq".format(sffs) ) sfffastq = [seqio.sffs_to_fastq( sffs, tmpsfffastq )] else: sfffastq = [] fastqs = fnmatch.filter( reads, '*.fastq' ) # Concat fastq files and sff converted fastq files into # outputfile converts = fastqs + sfffastq logger.info( "Concatting {} to {}".format( converts, outputfile ) ) seqio.concat_files( fastqs + sfffastq, outputfile ) if tmpsfffastq is not None: os.unlink( tmpsfffastq ) return outputfile