Пример #1
0
def strip_bases(args):
    """
    Strip the 1st and last 'N' bases from mapping consensuses

    Uses: 
        * args.cons
        * args.seqs_of_interest
        * arg.strip

    To avoid the effects of lead in and lead out coverage resulting in 
    uncalled bases
    
    :param args: the argparse args containing args.strip value

    :type args: argparse args

    :rtype: the updated args to reflect the args.cons & 
            args.seqs_of_interest location
    """
    # Get in the fasta files in the consensus directory
    fasta_in = util.get_fasta_files(args.cons)
    # Build a stripped directory
    new_cons_dir = os.path.join(args.cons, 'stripped')
    try:
        os.mkdir(new_cons_dir)
    except OSError:
        sys.stderr.write("A stripped directory exists. Overwriting\n")
    # Update the args.cons to the stripped directory
    args.cons = new_cons_dir
    args.strip = int(args.strip)
    # Strip the start and end
    for fa in fasta_in:
        tmp = os.path.basename(fa)
        out = os.path.join(args.cons, tmp)
        with open(fa, "rU") as fin, open(out, 'w') as fout:
            records = SeqIO.parse(fin, "fasta")
            for rec in records:
                rec.seq = rec.seq[args.strip:-args.strip]
                SeqIO.write(rec, fout, "fasta")
    # Trim the db as well
    tmp = args.seqs_of_interest.split('.')
    stripped_db = '.'.join(tmp[:-1])+'_trimmed.'+tmp[-1]
    with open(args.seqs_of_interest, "rU") as fin, open(stripped_db, 'w') as fout:
        records = SeqIO.parse(fin, "fasta")
        for rec in records:
            rec.seq = rec.seq[args.strip:-args.strip]
            SeqIO.write(rec, fout, "fasta")
    #Update the args.seqs_of_interest
    args.seqs_of_interest = stripped_db
    return args
Пример #2
0
def do_run(args, data_path, match_score, vfs_list):
    """
    Perform a SeqFindR run
    """
    matrix, y_label = [], []
    in_files = util.get_fasta_files(data_path)
    # Reorder if requested 
    if args.index_file != None:
        in_files = util.order_inputs(args.index_file, in_files)
    for subject in in_files:
        strain_id = blast.make_BLAST_database(subject)
        y_label.append(strain_id)
        database      = os.path.basename(subject)
        blast_xml     = blast.run_BLAST(args.seqs_of_interest, os.path.join(os.getcwd(), "DBs/"+database), args)
        accepted_hits = blast.parse_BLAST(blast_xml, float(args.tol))
        row = build_matrix_row(vfs_list, accepted_hits, match_score)
        row.insert(0,strain_id)
        matrix.append(row)
    return matrix, y_label