Пример #1
0
def _sam_to_bam(bam_fn):
    if not bam_fn.endswith("bam"):
        bam_out = "%s.bam" % os.path.splitext(bam_fn)[0]
        cmd = "samtools view -Sbh {bam_fn} -o {bam_out}"
        do.run(cmd.format(**locals()))
        return bam_out
    return bam_fn
Пример #2
0
def _sam_to_bam(bam_fn):
    if not bam_fn.endswith("bam"):
        bam_out = "%s.bam" % os.path.splitext(bam_fn)[0]
        cmd = "samtools view -Sbh {bam_fn} -o {bam_out}"
        do.run(cmd.format(**locals()))
        return bam_out
    return bam_fn
Пример #3
0
def _cmd_miraligner(fn, out_file, species, hairpin):
    """
    Run miraligner for miRNA annotation
    """
    tool = _get_miraligner()
    path_db = op.dirname(op.abspath(hairpin))
    opts = "-Xms750m -Xmx4g"
    cmd = "{tool} -freq -i {fn} -o {out_file} -s {species} -db {path_db} -sub 1 -trim 3 -add 3"
    if not file_exists(out_file):
        do.run(cmd.format(**locals()), "miraligner with %s" % fn)
        shutil.move(out_file + ".mirna", out_file)
    return out_file
Пример #4
0
def _download_mirbase(args, version="CURRENT"):
    """
    Download files from mirbase
    """
    if not args.hairpin or not args.mirna:
        logger.info("Working with version %s" % version)
        hairpin_fn = op.join(op.abspath(args.out), "hairpin.fa.gz")
        mirna_fn = op.join(op.abspath(args.out), "miRNA.str.gz")
        if not file_exists(hairpin_fn):
            cmd_h = "wget ftp://mirbase.org/pub/mirbase/%s/hairpin.fa.gz -O %s &&  gunzip -f !$" % (
                version, hairpin_fn)
            do.run(cmd_h, "download hairpin")
        if not file_exists(mirna_fn):
            cmd_m = "wget ftp://mirbase.org/pub/mirbase/%s/miRNA.str.gz -O %s && gunzip -f !$" % (
                version, mirna_fn)
            do.run(cmd_m, "download mirna")
    else:
        return args.hairpin, args.mirna
Пример #5
0
def _bam_sort(bam_fn):
    bam_sort_by_n = op.splitext(bam_fn)[0] + "_sort.bam"
    if not file_exists(bam_sort_by_n):
        do.run(("samtools sort -n -o {bam_sort_by_n} {bam_fn}").format(
            **locals()))
    return bam_sort_by_n
Пример #6
0
def _bam_sort(bam_fn):
    bam_sort_by_n = os.path.splitext(bam_fn)[0] + "_sort.bam"
    runner.run(
        ("samtools sort -n -o {bam_sort_by_n} {bam_fn}").format(**locals()))
    return bam_sort_by_n
Пример #7
0
def _sam_to_bam(bam_fn):
    bam_out = "%s.bam" % os.path.splitext(bam_fn)[0]
    cmd = "samtools view -Sbh {bam_fn} -o {bam_out}"
    runner.run(cmd.format(**locals()))
    return bam_fn
Пример #8
0
    # read sequences and score hits (ignore same sequence)
    handle = pysam.Samfile(sam, "rb")
    for line in handle:
        reference = handle.getrname(line.reference_id)
        name = line.query_name
        # sequence = line.query_sequence if not line.is_reverse else reverse_complement(line.query_sequence)
        if reference == name:
            continue
        # print([reference, name, line.get_tag("NM")])
        distance = line.get_tag("NM")
        uniques[name].append(distance)
        uniques[reference].append(distance)
    # read parsed data and keep the ones with score > 10 edit distance
    for name in uniques:
        if min(uniques[name]) < 5:
            if name in source:
                source[name] = None
    return source


# Map all vs all with razers3
source = _read_fasta(args.fa)
sam = os.path.join(os.path.dirname(args.out), "modified.bam")
runner.run((
    "razers3 -dr 5 -i 75 -rr 80 -f -so 1 -o {output} {target} {query}").format(
        output=sam, target=args.fa, query=args.fa))
uniques = _parse_hits(sam, source)

# Write uniques to fasta
_write_fasta(uniques, args.out, args.max_size)
Пример #9
0
def _bam_sort(bam_fn):
    bam_sort_by_n = op.splitext(bam_fn)[0] + "_sort.bam"
    if not file_exists(bam_sort_by_n):
        do.run(("samtools sort -n -o {bam_sort_by_n} {bam_fn}").format(
            **locals()))
    return bam_sort_by_n
Пример #10
0
parser.add_argument("--universe", help="Set up universe sequences to avoid duplication.",
                    default=None)

args = parser.parse_args()
random.seed(args.seed)

mylog.initialize_logger(os.path.dirname(os.path.abspath(args.out)))
logger = mylog.getLogger(__name__)

# Read file to get all sequences longer than size - 2
size = args.size - 2
source = _read_fasta(args.fa, size)
logger.info("%s was read: %s sequences were loaded" % (args.fa, len(source)))
source = _update_ends(source)
logger.info("source updated with extended nts: %s" % source)

# Map all vs all with razers3
modified = _write_fasta(source, os.path.join(os.path.dirname(args.out), "modified.fa"))
sam = os.path.join(os.path.dirname(args.out), "modified.bam")
runner.run(("razers3 -i 75 -rr 80 -f -so 1 -o {output} {target} {query}").format(output=sam, target=modified, query=modified))
uniques = _parse_hits(sam, source)
print(uniques)
if args.universe:
    sam = os.path.join(os.path.dirname(args.out), "modified_vs_universe.sam")
    runner.run(("razers3 -i 75 -rr 80 -f -o {output} {target} {query}").format(output=sam, target=args.universe, query=modified))
    uniques = _parse_hits(sam, uniques)
print(uniques)

# Write uniques to fasta
_write_fasta(uniques, args.out)
Пример #11
0
def get_fasta(bed_file, ref, out_fa):
    """Run bedtools to get fasta from bed file"""
    cmd = "bedtools getfasta -s -fi {ref} -bed {bed_file} -fo {out_fa}"
    run(cmd.format(**locals()))