示例#1
0
def run_cluster(*data):
    """
    Run seqcluster cluster to detect smallRNA clusters
    """
    sample = data[0][0]
    work_dir = dd.get_work_dir(sample)
    out_dir = op.join(work_dir, "seqcluster", "cluster")
    out_dir = op.abspath(safe_makedir(out_dir))
    prepare_dir = op.join(work_dir, "seqcluster", "prepare")
    bam_file = op.join(work_dir, "align", "seqs.bam")
    cluster_dir = _cluster(bam_file, prepare_dir, out_dir, dd.get_ref_file(sample), dd.get_srna_gtf_file(sample))
    sample["report"] = _report(sample, dd.get_ref_file(sample))
    sample["seqcluster"] = out_dir

    out_mirna = _make_isomir_counts(data, out_dir=op.join(work_dir, "mirbase"))
    if out_mirna:
        sample = dd.set_mirna_counts(sample, out_mirna[0])
        sample = dd.set_isomir_counts(sample, out_mirna[1])

    out_novel = _make_isomir_counts(data, "seqbuster_novel", op.join(work_dir, "mirdeep2"), "_novel")
    novel_db = mirdeep.run(data)
    if out_novel:
        sample = dd.set_novel_mirna_counts(sample, out_novel[0])
        sample = dd.set_novel_isomir_counts(sample, out_novel[1])
    data[0][0] = sample
    return data
示例#2
0
def run_cluster(*data):
    """
    Run seqcluster cluster to detect smallRNA clusters
    """
    work_dir = dd.get_work_dir(data[0][0])
    out_dir = os.path.join(work_dir, "seqcluster", "cluster")
    out_dir = os.path.abspath(safe_makedir(out_dir))
    prepare_dir = op.join(work_dir, "seqcluster", "prepare")
    bam_file = op.join(work_dir, "align", "seqs.bam")
    cluster_dir = _cluster(bam_file, prepare_dir, out_dir, dd.get_ref_file(data[0][0]), dd.get_srna_gtf_file(data[0][0]))
    report_file = _report(data[0][0], dd.get_ref_file(data[0][0]))
    for sample in data:
        sample[0]["seqcluster"] = out_dir
    out_mirna, out_isomir = _make_isomir_counts(data)
    data[0][0]["mirna_counts"] = out_mirna
    data[0][0]["isomir_counts"] = out_isomir
    mirdeep.run(data)
    return data
示例#3
0
def run_cluster(*data):
    """
    Run seqcluster cluster to detect smallRNA clusters
    """
    work_dir = dd.get_work_dir(data[0][0])
    out_dir = os.path.join(work_dir, "seqcluster", "cluster")
    out_dir = os.path.abspath(safe_makedir(out_dir))
    prepare_dir = op.join(work_dir, "seqcluster", "prepare")
    bam_file = op.join(work_dir, "align", "seqs.bam")
    cluster_dir = _cluster(bam_file, prepare_dir, out_dir, dd.get_ref_file(data[0][0]), dd.get_srna_gtf_file(data[0][0]))
    report_file = _report(data[0][0], dd.get_ref_file(data[0][0]))
    for sample in data:
        sample[0]["seqcluster"] = out_dir
    out_mirna, out_isomir = _make_isomir_counts(data)
    data[0][0]["mirna_counts"] = out_mirna
    data[0][0]["isomir_counts"] = out_isomir
    novel_db = mirdeep.run(data)
    return data
示例#4
0
def run_align(*data):
    """
    Prepare data to run alignment step, only once for each project
    """
    work_dir = dd.get_work_dir(data[0][0])
    out_dir = op.join(work_dir, "seqcluster", "prepare")
    seq_out = op.join(out_dir, "seqs.fastq")
    bam_dir = op.join(work_dir, "align")
    new_bam_file = op.join(bam_dir, "seqs.bam")
    tools = dd.get_expression_caller(data[0][0])
    if not file_exists(new_bam_file):
        sample = process_alignment(data[0][0], [seq_out, None])
        bam_file = dd.get_work_bam(sample[0][0])
        shutil.move(bam_file, new_bam_file)
        shutil.move(bam_file + ".bai", new_bam_file + ".bai")
        shutil.rmtree(op.join(bam_dir, sample[0][0]["rgnames"]['sample']))
    for sample in data:
        sample[0]["align_bam"] = sample[0]["clean_fastq"]

    if "mirdeep2" in tools:
        novel_db = mirdeep.run(data)
    return data
示例#5
0
def run_align(*data):
    """
    Prepare data to run alignment step, only once for each project
    """
    work_dir = dd.get_work_dir(data[0][0])
    out_dir = op.join(work_dir, "seqcluster", "prepare")
    seq_out = op.join(out_dir, "seqs.fastq")
    bam_dir = op.join(work_dir, "align")
    new_bam_file = op.join(bam_dir, "seqs.bam")
    tools = dd.get_expression_caller(data[0][0])
    if not file_exists(new_bam_file):
        sample = process_alignment(data[0][0], [seq_out, None])
        bam_file = dd.get_work_bam(sample[0][0])
        shutil.move(bam_file, new_bam_file)
        shutil.move(bam_file + ".bai", new_bam_file + ".bai")
        shutil.rmtree(op.join(bam_dir, sample[0][0]["rgnames"]['sample']))
    for sample in data:
        # sample[0]["align_bam"] = sample[0]["clean_fastq"]
        sample[0]["cluster_bam"] = new_bam_file

    if "mirdeep2" in tools:
        novel_db = mirdeep.run(data)
    return data