def separate_amplicons( input_data, reference_fofn, target_loci, output=None ): """ Public interface for _separate_subreads """ # Check and set the input and output, as needed log.info("Separating amplicons for these loci[%s]" % ','.join(target_loci)) file_list = _parse_input( input_data ) output = output or _get_output_file( input_data ) references = parse_reference_dict( reference_fofn ) # Iterate over the input subread files, splitting as needed new_files = [] for filepath in file_list: locus = get_file_locus( filepath ) if locus in target_loci: if is_amplicon_specific( filepath ): log.info("Subreads for Locus %s already split, skipping..." % locus) new_files.append( filepath ) else: log.info("Subreads for Locus %s already split, skipping..." % locus) reference_fasta = references[locus] new_file_list = _separate_amplicons( file_list, reference_fasta, locus) continue # Otherwise, separate the sequences and write the results log.info("Separating subreads by amplicon for Locus %s" % locus) new_file_list = _separate_amplicons( file_list, reference_fasta, locus) write_list_file( new_file_list, output )
def separate_amplicons(input_data, reference_fofn, target_loci, output=None): """ Public interface for _separate_subreads """ # Check and set the input and output, as needed log.info("Separating amplicons for these loci[%s]" % ','.join(target_loci)) file_list = _parse_input(input_data) output = output or _get_output_file(input_data) references = parse_reference_dict(reference_fofn) # Iterate over the input subread files, splitting as needed new_files = [] for filepath in file_list: locus = get_file_locus(filepath) if locus in target_loci: if is_amplicon_specific(filepath): log.info("Subreads for Locus %s already split, skipping..." % locus) new_files.append(filepath) else: log.info("Subreads for Locus %s already split, skipping..." % locus) reference_fasta = references[locus] new_file_list = _separate_amplicons(file_list, reference_fasta, locus) continue # Otherwise, separate the sequences and write the results log.info("Separating subreads by amplicon for Locus %s" % locus) new_file_list = _separate_amplicons(file_list, reference_fasta, locus) write_list_file(new_file_list, output)
def summarize_amp_analysis( input_dir, output_dir ): create_directory( output_dir ) log.info('Combining AmpliconAnalysis output from "{0}" to "{1}"'.format(input_dir, output_dir)) results = list( find_amp_assem_results(input_dir) ) output_files = list( output_amp_assem_results(results, output_dir) ) result_output = os.path.join( output_dir, "AmpliconAssembly_Results.fofn" ) write_list_file( output_files, result_output ) return result_output
def summarize_amp_analysis(input_dir, output_dir): create_directory(output_dir) log.info('Combining AmpliconAnalysis output from "{0}" to "{1}"'.format( input_dir, output_dir)) results = list(find_amp_assem_results(input_dir)) output_files = list(output_amp_assem_results(results, output_dir)) result_output = os.path.join(output_dir, "AmpliconAssembly_Results.fofn") write_list_file(output_files, result_output) return result_output
def combine_clusense_output(input_dir, output_dir): create_directory( output_dir ) log.info('Combining clusense output from "{0}" in "{1}"'.format(input_dir, output_dir)) clusense_dirs = find_clusense_dirs( input_dir ) clusense_clusters = find_clusense_clusters( clusense_dirs ) cns_files, read_files = output_clusters( clusense_clusters, output_dir ) cns_output = os.path.join( output_dir, CNS_FOFN ) write_list_file( cns_files, cns_output ) read_output = os.path.join( output_dir, READ_FOFN ) write_list_file( read_files, read_output ) return cns_output, read_output
def combine_clusense_output(input_dir, output_dir): create_directory(output_dir) log.info('Combining clusense output from "{0}" in "{1}"'.format( input_dir, output_dir)) clusense_dirs = find_clusense_dirs(input_dir) clusense_clusters = find_clusense_clusters(clusense_dirs) cns_files, read_files = output_clusters(clusense_clusters, output_dir) cns_output = os.path.join(output_dir, CNS_FOFN) write_list_file(cns_files, cns_output) read_output = os.path.join(output_dir, READ_FOFN) write_list_file(read_files, read_output) return cns_output, read_output
def create_baxh5_fofn( input_file, output_file ): log.info("Converting %s into a FOFN of BaxH5 files" % input_file) if input_file.endswith('.fofn'): baxh5_files = _parse_fofn( input_file ) elif input_file.endswith('.bas.h5'): baxh5_files = _parse_bash5( input_file ) elif input_file.endswith('.bax.h5'): baxh5_files = [input_file] elif input_file.endswith('.fa') or input_file.endswith('.fasta'): baxh5_files = [] else: msg = 'Invalid input filetype "%s"' % input_file log.info( msg ) raise TypeError( msg ) log.info("Writing a total of %s BaxH5 files to BaxH5 Fofn" % len(baxh5_files)) write_list_file( baxh5_files, output_file ) return output_file
def separate_amplicons( subread_input, reference_fofn, locus, output=None ): """ Public interfact for _separate_subreads """ # Convert input to list if needed if isinstance(subread_input, str): file_list = read_list_file( subread_input ) if output is None: output = subread_input elif isinstance(subread_input, list): file_list = subread_input if output is None: msg = 'Output file must be specified with file-list input!' log.error( msg ) raise ValueError( msg ) # If the inputs are valid, check that the files haven't already been split if _split_exists( file_list, locus ): log.info("Separating subreads by amplicon for Locus %s" % locus) return # Otherwise, separate the sequences and write the results log.info("Separating subreads by amplicon for Locus %s" % locus) reference_fasta = _parse_reference_fofn( reference_fofn, locus ) new_file_list = _separate_amplicons( file_list, reference_fasta, locus) write_list_file( new_file_list, output )
def separate_amplicons(subread_input, reference_fofn, locus, output=None): """ Public interfact for _separate_subreads """ # Convert input to list if needed if isinstance(subread_input, str): file_list = read_list_file(subread_input) if output is None: output = subread_input elif isinstance(subread_input, list): file_list = subread_input if output is None: msg = 'Output file must be specified with file-list input!' log.error(msg) raise ValueError(msg) # If the inputs are valid, check that the files haven't already been split if _split_exists(file_list, locus): log.info("Separating subreads by amplicon for Locus %s" % locus) return # Otherwise, separate the sequences and write the results log.info("Separating subreads by amplicon for Locus %s" % locus) reference_fasta = _parse_reference_fofn(reference_fofn, locus) new_file_list = _separate_amplicons(file_list, reference_fasta, locus) write_list_file(new_file_list, output)