Пример #1
0
from wub.util import seq as seq_util

# Parse command line arguments:
parser = argparse.ArgumentParser(
    description='Reverse (but not complement!) sequences and qualities in fastq file.')
parser.add_argument('input_fastq', nargs='?', help='Input fastq (default: stdin).',
                    type=argparse.FileType('r'), default=sys.stdin)
parser.add_argument('output_fastq', nargs='?', help='Output fastq (default: stdout)',
                    type=argparse.FileType('w'), default=sys.stdout)


def reverse_seq_records(input_iterator):
    """Reverse SeqRecord objects.

    :param input_iterator: Iterator of SeqRecord objects.
    :returns: Generator of reversed SeqRecord objects.
    :rtype: generator
    """
    for record in input_iterator:
        yield record[::-1]


if __name__ == '__main__':
    args = parser.parse_args()

    input_iterator = seq_util.read_seq_records(
        args.input_fastq, format='fastq')
    output_iterator = reverse_seq_records(input_iterator)
    seq_util.write_seq_records(
        output_iterator, args.output_fastq, format='fastq')
Пример #2
0
    # Read in chromosomes of the input genome:
    chromosomes = list(seq_util.read_seq_records(args.input_fasta))

    # Process error weights:
    error_weights = np.array(parse_util.separated_list_to_floats(args.w))
    # Normalise error weights to probabilities:
    error_weights = parse_util.normalise_array(error_weights)
    error_weights = dict(
        zip(['substitution', 'insertion', 'deletion'], error_weights))

    sw = None
    if args.s is not None:
        sw = sam_writer.SamWriter(args.s, build_sam_header(chromosomes))

    simulation_iterator = simulate_sequencing(chromosomes, args.m, args.a,
                                              args.l, args.u, args.e,
                                              error_weights, args.b, args.q,
                                              args.n, sw)

    if not args.Q and args.output_fastq != sys.stdout:
        simulation_iterator = tqdm.tqdm(simulation_iterator, total=args.n)

    for simmed, sam in simulation_iterator:
        seq_util.write_seq_records(simmed, args.output_fastq, format='fastq')
        if sw is not None:
            sw.write(sam)

    if sw is not None:
        sw.close()
Пример #3
0
                    type=argparse.FileType('w'),
                    default=sys.stdout)


def record_filter(input_iter, in_format, read_names):
    """ Filter SeqRecord objects by length and mean quality.

    :param input_iter: Iterator of SeqRecord objects.
    :param in_format: Input format.
    :param to_alphabet: Convert to this alphabet.
    :returns: SeqRecord object.
    :rtype: generator
    """
    for record in input_iter:
        if record.id in read_names:
            yield record


if __name__ == '__main__':
    args = parser.parse_args()

    input_iterator = seq_util.read_seq_records(args.input_fastx, format=args.i)

    names = args.n.split(',')

    output_iterator = record_filter(input_iterator, args.i, names)

    seq_util.write_seq_records(output_iterator,
                               args.output_fastx,
                               format=args.o)
Пример #4
0
    :param error_weights: Relative frequency of substitutions,insertions,deletions.
    :returns: Generator of SeqRecord objects.
    :rtype: generator
    """
    for record in input_iter:
        mutated_seq = sim_seq.simulate_sequencing_errors(record.seq, error_rate, error_weights).seq
        record.seq = Seq(mutated_seq)
        yield record


if __name__ == '__main__':
    args = parser.parse_args()

    # Set random seed:
    if args.z is not None:
        np.random.seed(args.z)

    # Process error weights:
    error_weights = np.array(parse_util.separated_list_to_floats(args.w))
    # Normalise error weights to probabilities:
    error_weights = parse_util.normalise_array(error_weights)
    error_weights = dict(
        zip(['substitution', 'insertion', 'deletion'], error_weights))

    input_iterator = seq_util.read_seq_records(args.input_fasta, format='fasta')

    simulation_iterator = simulate_errors(input_iterator, args.e, error_weights)

    seq_util.write_seq_records(
        simulation_iterator, args.output_fasta, format='fasta')
Пример #5
0

if __name__ == '__main__':
    args = parser.parse_args()

    input_iterator = seq_util.read_seq_records(args.input_fastx, format=args.i)

    if args.b is None:
        # Splitting one record per file:
        for record in input_iterator:
            bn = path.basename(args.input_fastx.name)
            ext = bn.rsplit('.', 1)[-1]
            fh = open(
                path.join(args.output_dir, "{}.{}".format(record.id, ext)),
                'w')
            seq_util.write_seq_records([record], fh, format=args.o)
            fh.flush()
            fh.close()
    else:
        # Split into batches:
        input_iterator = batch_iterator(input_iterator, args.b)
        i = 0
        for records in input_iterator:
            bn = path.basename(args.input_fastx.name)
            fh = open(path.join(args.output_dir, "batch_{}_{}".format(i, bn)),
                      'w')
            seq_util.write_seq_records(records, fh, format=args.o)
            fh.flush()
            fh.close()
            i += 1