示例#1
0
def main():
    parser = argparse.ArgumentParser(
        description="concat_fasta",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    parser.add_argument('--input_dir',
                        type=dir_path,
                        required=True,
                        help="dir with fasta")
    parser.add_argument('--extra_fasta_files', nargs='+', default=[], help="extra fasta file list to include in the " \
                                                                          "sequence")
    parser.add_argument('--output',
                        type=str,
                        required=True,
                        help="output fasta")

    args = parser.parse_args()

    in_dir = args.input_dir
    out_file = args.output
    in_extra = args.extra_fasta_files

    in_fasta = glob.glob("{}/*fasta".format(in_dir))

    sequences = read_sequences(*in_fasta, *in_extra)

    Bio.SeqIO.write(sequences, out_file, 'fasta')
示例#2
0
import argparse
from augur.align import read_sequences

if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description="Combine and dedup FASTAs",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    parser.add_argument('--input',
                        type=str,
                        nargs="+",
                        metavar="FASTA",
                        required=True,
                        help="input FASTAs")
    parser.add_argument('--output',
                        type=str,
                        metavar="FASTA",
                        required=True,
                        help="output FASTA")
    args = parser.parse_args()

    # Read sequences with augur to benefit from additional checks for duplicates.
    sequences = read_sequences(*args.input)

    # Convert dictionary of sequences by id to a list, for compatibility with
    # augur versions <9.0.0.
    if isinstance(sequences, dict):
        sequences = list(sequences.values())

    SeqIO.write(sequences, args.output, 'fasta')
示例#3
0
 def test_read_seq_compare(self):
     data_file = pathlib.Path("tests/data/align/aa-seq_h3n2_ha_2y_2HA1_dup.fasta")
     with pytest.raises(align.AlignmentError):
         assert align.read_sequences(str(data_file))
示例#4
0
 def test_read_sequences(self):
     data_file = pathlib.Path('tests/data/align/test_aligned_sequences.fasta')
     result = align.read_sequences(str(data_file))
     assert len(result) == 4
import argparse
from augur.align import read_sequences
from Bio import SeqIO
from pathlib import Path


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--sequences", required=True, nargs="+", help="FASTA file of sequences to partition into smaller chunks")
    parser.add_argument("--sequences-per-group", required=True, type=int, help="number of sequences to include in each group")
    parser.add_argument("--output-dir", required=True, help="directory to write out partitioned sequences")

    args = parser.parse_args()

    # Read sequences with augur to benefit from additional checks for duplicates.
    sequences = list(read_sequences(*args.sequences).values())

    # Create the requested output directory.
    output_dir = Path(args.output_dir)
    output_dir.mkdir(exist_ok=True)

    # Determine partition indices.
    indices = list(range(0, len(sequences), args.sequences_per_group))

    # Add a final index to represent the end of the last partition.
    if indices[-1] != len(sequences):
        indices.append(len(sequences))

    # Partition sequences into groups of no more than the requested number.
    for i in range(len(indices) - 1):
        # Save partitioned sequences to a new FASTA file named after the partition number.
示例#6
0
        "--sequences",
        required=True,
        nargs="+",
        help="FASTA file of sequences to partition into smaller chunks")
    parser.add_argument("--sequences-per-group",
                        required=True,
                        type=int,
                        help="number of sequences to include in each group")
    parser.add_argument("--output-dir",
                        required=True,
                        help="directory to write out partitioned sequences")

    args = parser.parse_args()

    # Read sequences with augur to benefit from additional checks for duplicates.
    sequences = read_sequences(*args.sequences)

    # Convert dictionary of sequences by id to a list, for compatibility with
    # augur versions <9.0.0.
    if isinstance(sequences, dict):
        sequences = list(sequences.values())

    # Create the requested output directory.
    output_dir = Path(args.output_dir)
    output_dir.mkdir(exist_ok=True)

    # Determine partition indices.
    indices = list(range(0, len(sequences), args.sequences_per_group))

    # Add a final index to represent the end of the last partition.
    if indices[-1] != len(sequences):
示例#7
0
from Bio import SeqIO
import argparse
from augur.align import read_sequences

if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description="Combine and dedup FASTAs",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    parser.add_argument('--input',
                        type=str,
                        nargs="+",
                        metavar="FASTA",
                        required=True,
                        help="input FASTAs")
    parser.add_argument('--output',
                        type=str,
                        metavar="FASTA",
                        required=True,
                        help="output FASTA")
    args = parser.parse_args()

    # Read sequences with augur to benefit from additional checks for duplicates.
    sequences = list(read_sequences(*args.input).values())

    SeqIO.write(sequences, args.output, 'fasta')
示例#8
0
from Bio import SeqIO
import argparse
from augur.align import read_sequences

if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description="Combine and dedup FASTAs",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument('--input', type=str,  nargs="+", metavar="FASTA", required=True, help="input FASTAs")
    parser.add_argument('--output', type=str, metavar="FASTA", required=True, help="output FASTA")
    args = parser.parse_args()

    # Read sequences with augur to benefit from additional checks for duplicates.
    sequences = list(read_sequences(*args.input))

    SeqIO.write(sequences, args.output, 'fasta')
示例#9
0
        "--sequences",
        required=True,
        nargs="+",
        help="FASTA file of sequences to partition into smaller chunks")
    parser.add_argument("--sequences-per-group",
                        required=True,
                        type=int,
                        help="number of sequences to include in each group")
    parser.add_argument("--output-dir",
                        required=True,
                        help="directory to write out partitioned sequences")

    args = parser.parse_args()

    # Read sequences with augur to benefit from additional checks for duplicates.
    sequences = list(read_sequences(*args.sequences))

    # Create the requested output directory.
    output_dir = Path(args.output_dir)
    output_dir.mkdir(exist_ok=True)

    # Determine partition indices.
    indices = list(range(0, len(sequences), args.sequences_per_group))

    # Add a final index to represent the end of the last partition.
    if indices[-1] != len(sequences):
        indices.append(len(sequences))

    # Partition sequences into groups of no more than the requested number.
    for i in range(len(indices) - 1):
        # Save partitioned sequences to a new FASTA file named after the partition number.