def main(): parser = argparse.ArgumentParser( description="concat_fasta", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--input_dir', type=dir_path, required=True, help="dir with fasta") parser.add_argument('--extra_fasta_files', nargs='+', default=[], help="extra fasta file list to include in the " \ "sequence") parser.add_argument('--output', type=str, required=True, help="output fasta") args = parser.parse_args() in_dir = args.input_dir out_file = args.output in_extra = args.extra_fasta_files in_fasta = glob.glob("{}/*fasta".format(in_dir)) sequences = read_sequences(*in_fasta, *in_extra) Bio.SeqIO.write(sequences, out_file, 'fasta')
import argparse from augur.align import read_sequences if __name__ == '__main__': parser = argparse.ArgumentParser( description="Combine and dedup FASTAs", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--input', type=str, nargs="+", metavar="FASTA", required=True, help="input FASTAs") parser.add_argument('--output', type=str, metavar="FASTA", required=True, help="output FASTA") args = parser.parse_args() # Read sequences with augur to benefit from additional checks for duplicates. sequences = read_sequences(*args.input) # Convert dictionary of sequences by id to a list, for compatibility with # augur versions <9.0.0. if isinstance(sequences, dict): sequences = list(sequences.values()) SeqIO.write(sequences, args.output, 'fasta')
def test_read_seq_compare(self): data_file = pathlib.Path("tests/data/align/aa-seq_h3n2_ha_2y_2HA1_dup.fasta") with pytest.raises(align.AlignmentError): assert align.read_sequences(str(data_file))
def test_read_sequences(self): data_file = pathlib.Path('tests/data/align/test_aligned_sequences.fasta') result = align.read_sequences(str(data_file)) assert len(result) == 4
import argparse from augur.align import read_sequences from Bio import SeqIO from pathlib import Path if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--sequences", required=True, nargs="+", help="FASTA file of sequences to partition into smaller chunks") parser.add_argument("--sequences-per-group", required=True, type=int, help="number of sequences to include in each group") parser.add_argument("--output-dir", required=True, help="directory to write out partitioned sequences") args = parser.parse_args() # Read sequences with augur to benefit from additional checks for duplicates. sequences = list(read_sequences(*args.sequences).values()) # Create the requested output directory. output_dir = Path(args.output_dir) output_dir.mkdir(exist_ok=True) # Determine partition indices. indices = list(range(0, len(sequences), args.sequences_per_group)) # Add a final index to represent the end of the last partition. if indices[-1] != len(sequences): indices.append(len(sequences)) # Partition sequences into groups of no more than the requested number. for i in range(len(indices) - 1): # Save partitioned sequences to a new FASTA file named after the partition number.
"--sequences", required=True, nargs="+", help="FASTA file of sequences to partition into smaller chunks") parser.add_argument("--sequences-per-group", required=True, type=int, help="number of sequences to include in each group") parser.add_argument("--output-dir", required=True, help="directory to write out partitioned sequences") args = parser.parse_args() # Read sequences with augur to benefit from additional checks for duplicates. sequences = read_sequences(*args.sequences) # Convert dictionary of sequences by id to a list, for compatibility with # augur versions <9.0.0. if isinstance(sequences, dict): sequences = list(sequences.values()) # Create the requested output directory. output_dir = Path(args.output_dir) output_dir.mkdir(exist_ok=True) # Determine partition indices. indices = list(range(0, len(sequences), args.sequences_per_group)) # Add a final index to represent the end of the last partition. if indices[-1] != len(sequences):
from Bio import SeqIO import argparse from augur.align import read_sequences if __name__ == '__main__': parser = argparse.ArgumentParser( description="Combine and dedup FASTAs", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--input', type=str, nargs="+", metavar="FASTA", required=True, help="input FASTAs") parser.add_argument('--output', type=str, metavar="FASTA", required=True, help="output FASTA") args = parser.parse_args() # Read sequences with augur to benefit from additional checks for duplicates. sequences = list(read_sequences(*args.input).values()) SeqIO.write(sequences, args.output, 'fasta')
from Bio import SeqIO import argparse from augur.align import read_sequences if __name__ == '__main__': parser = argparse.ArgumentParser( description="Combine and dedup FASTAs", formatter_class=argparse.ArgumentDefaultsHelpFormatter ) parser.add_argument('--input', type=str, nargs="+", metavar="FASTA", required=True, help="input FASTAs") parser.add_argument('--output', type=str, metavar="FASTA", required=True, help="output FASTA") args = parser.parse_args() # Read sequences with augur to benefit from additional checks for duplicates. sequences = list(read_sequences(*args.input)) SeqIO.write(sequences, args.output, 'fasta')
"--sequences", required=True, nargs="+", help="FASTA file of sequences to partition into smaller chunks") parser.add_argument("--sequences-per-group", required=True, type=int, help="number of sequences to include in each group") parser.add_argument("--output-dir", required=True, help="directory to write out partitioned sequences") args = parser.parse_args() # Read sequences with augur to benefit from additional checks for duplicates. sequences = list(read_sequences(*args.sequences)) # Create the requested output directory. output_dir = Path(args.output_dir) output_dir.mkdir(exist_ok=True) # Determine partition indices. indices = list(range(0, len(sequences), args.sequences_per_group)) # Add a final index to represent the end of the last partition. if indices[-1] != len(sequences): indices.append(len(sequences)) # Partition sequences into groups of no more than the requested number. for i in range(len(indices) - 1): # Save partitioned sequences to a new FASTA file named after the partition number.