Пример #1
0
def main(**job_inputs):
    input_vcfs = [
        dx_utils.download_and_gunzip_file(f, skip_decompress=True)
        for f in job_inputs['input_vcfs']
    ]
    input_ref = dx_utils.download_and_gunzip_file(job_inputs['ref_fasta'])

    # create index files for inputs
    dx_utils.run_cmd(['samtools', 'faidx', input_ref])
    map(dx_utils.run_cmd, ['tabix {0}'.format(vcf) for vcf in input_vcfs])
    with open(VCF_FOFN, 'w') as fh:
        fh.write('\n'.join(input_vcfs))

    # get the bcftools version and help doc
    cmd = ['bcftools', '--help']
    dx_utils.run_cmd(cmd)

    output_prefix = job_inputs.get('output_prefix', '')
    output_bcf = output_prefix + 'concat' + '.bcf'
    # concatenate the bcf/vcf files
    concat_cmd = ['bcftools', 'concat', '-f', VCF_FOFN]
    view_cmd = ['bcftools', 'view', '-Ou', '-e\'type="ref"\'']
    norm_cmd = [
        'bcftools', 'norm', '-Ob', '-f', input_ref, '-o', output_bcf,
        '--threads={0}'.format(multiprocessing.cpu_count())
    ]
    # run the commands
    dx_utils.run_pipe(concat_cmd, view_cmd, norm_cmd)

    # index the concatenated bcf file
    dx_utils.run_cmd(['bcftools', 'index', output_bcf])

    # call consensus
    output_fasta = output_prefix + 'consensus.fasta'
    consensus_filter = 'QUAL>1 && (GT="AA" || GT="Aa")'
    consensus_cmd = [
        'bcftools', 'consensus', '-i', consensus_filter, '-Hla', '-f',
        input_ref, output_bcf
    ]
    dx_utils.run_pipe(consensus_cmd, outputFile=output_fasta)

    # save the changes to vcf
    output_vcf = output_prefix + 'changes.vcf.gz'
    vcf_cmd = [
        'bcftools', 'view', '-i', consensus_filter, '-Oz',
        '--threads={0}'.format(multiprocessing.cpu_count()), output_bcf
    ]
    dx_utils.run_pipe(vcf_cmd, outputFile=output_vcf)

    consensus_link = dx_utils.gzip_and_upload(output_fasta)
    print(consensus_link)
    output = {}
    output['consensus_fasta'] = consensus_link
    output['consensus_vcf'] = dxpy.dxlink(dxpy.upload_local_file(output_vcf))

    return output
Пример #2
0
def main(input_assembly,
         hic_alignments,
         restriction_enzyme_bases,
         filter_alignments,
         input_assembly_graph=None):
    # make sure we can run salsa
    dx_utils.run_cmd("python /opt/SALSA/run_pipeline.py -h")

    input_assembly = dx_utils.download_and_gunzip_file(input_assembly)
    alignment_prefix = input_assembly.split(".fasta")[0]

    # process inputs and convert to bed
    first_file = True
    for bam_file in hic_alignments:
        fn = dxpy.describe(bam_file['$dnanexus_link'])['name']
        cmd = 'dx cat {0}'.format(bam_file['$dnanexus_link'])
        prefix, suffix = os.path.splitext(fn)
        if suffix == '.gz':
            cmd += '| gunzip '
            fn = prefix
        cmd += '| bedtools bamtobed -i stdin'
        if first_file:
            cmd += ' > {0}.bed'.format(alignment_prefix)
            first_file = False
        else:
            cmd += ' >> {0}.bed'.format(alignment_prefix)
        dx_utils.run_cmd(cmd)

    # index the ref
    cmd = 'samtools faidx {0} '.format(input_assembly)
    dx_utils.run_cmd(cmd)

    # if we were asked to filter by contig names, make a bed file and subset the input bed
    if filter_alignments == True:
        f = open('%s.fai' % (input_assembly))
        o = open("%s.contigs.bed" % (alignment_prefix), 'w')
        for line in f:
            line = line.strip().split()
            o.write("%s\t1\t%s\n" % (line[0], line[1]))

        f.close()
        o.close()

        cmd = 'bedtools intersect -wa -a {0}.bed -b {0}.contigs.bed > {0}.filtered.bed'.format(
            alignment_prefix)
        dx_utils.run_cmd(cmd)
    else:
        cmd = 'ln -s {0}.bed {0}.filtered.bed'.format(alignment_prefix)
        dx_utils.run_cmd(cmd)

    # now sort the bed file
    cmd = "sort -T . -k4 {0}.filtered.bed > {0}.sorted.bed".format(
        alignment_prefix)
    dx_utils.run_cmd(cmd)

    cmd = 'python /opt/SALSA/run_pipeline.py -a {0} -b {1}.sorted.bed -l {0}.fai -o {2} -e {3} -m yes -p yes '
    cmd = cmd.format(input_assembly, alignment_prefix, './',
                     ','.join(restriction_enzyme_bases))
    if input_assembly_graph is not None:
        cmd = "%s -g input_assembly_graph" % (cmd)

    dx_utils.run_cmd(cmd)

    output = {}

    # final scaffold
    final_fasta = glob.glob('scaffold*FINAL.fasta')[0]
    output['final_scaffold_fasta'] = dx_utils.gzip_and_upload(final_fasta)

    # final agp
    final_agp = glob.glob('scaffold*FINAL.agp')[0]
    output['final_scaffold_agp'] = dx_utils.gzip_and_upload(final_agp)

    # alignment_iteration_1 bed
    alignment_iteration_1 = glob.glob('alignment_iteration_1.bed')[0]
    output['alignment_iteration_1'] = dx_utils.gzip_and_upload(
        alignment_iteration_1)

    # scaffold_length_iteration_1
    scaffold_length_iteration_1 = glob.glob('scaffold_length_iteration_1')[0]
    output['scaffold_length_iteration_1'] = dx_utils.gzip_and_upload(
        scaffold_length_iteration_1)

    # all others
    files = glob.glob('scaffold*fasta')
    files.extend(glob.glob('scaffold*agp'))
    if final_fasta in files:
        files.remove(final_fasta)
    if final_agp in files:
        files.remove(final_agp)
    print files

    output['scaffold'] = dx_utils.tar_files_and_upload(files, alignment_prefix)

    return output
Пример #3
0
def main(**job_inputs):
    bionano_cmap_1_link = job_inputs['bng_enzyme1']
    bionano_cmap_2_link = job_inputs['bng_enzyme2']
    ngs_fasta_link = job_inputs['ngs_fasta_or_cmap']
    args_xml_link = job_inputs.get('args_xml')

    # Download all the inputs
    bionano_cmap_1_filename = os.path.join(
        '/home/dnanexus', download_and_gunzip_file(bionano_cmap_1_link))
    bionano_cmap_2_filename = os.path.join(
        '/home/dnanexus', download_and_gunzip_file(bionano_cmap_2_link))
    ngs_fasta_filename = os.path.join('/home/dnanexus',
                                      download_and_gunzip_file(ngs_fasta_link))

    if args_xml_link:
        args_xml_filename = download_and_gunzip_file(args_xml_link)
    else:
        args_xml_filename = os.path.join(HYBRID_DIR, 'TGH',
                                         'hybridScaffold_two_enzymes.xml')
    output_dir = "hybrid_scaffold_output"

    run_cmd('mkdir {0}'.format(output_dir))
    results_tar = output_dir + '_results.tar'

    cmd = "Rscript {dir}/runTGH.R --help".format(dir=HYBRID_DIR)
    run_cmd(cmd)

    scaffold_cmd = (
        "Rscript {dir}/runTGH.R -N {ngs_fasta} "
        "-b1 {bng1} -b2 {bng2} -O {outdir} -R {refaligner} -t {results} ".
        format(dir=HYBRID_DIR,
               ngs_fasta=ngs_fasta_filename,
               bng1=bionano_cmap_1_filename,
               bng2=bionano_cmap_2_filename,
               outdir=output_dir,
               refaligner=os.path.join(TOOLS_DIR, 'RefAligner'),
               results=results_tar))
    scaffold_cmd += '-e1 {enzyme1} -e2 {enzyme2} '.format(
        enzyme1=job_inputs['enzyme1_name'], enzyme2=job_inputs['enzyme2_name'])

    if job_inputs.get("cuts1_file") and job_inputs.get("cuts2_file"):
        cuts1_file = download_and_gunzip_file(job_inputs["cuts1_file"])
        cuts2_file = download_and_gunzip_file(job_inputs["cuts2_file"])
        scaffold_cmd += '-m1 {cuts1} -m2 {cuts2} '.format(cuts1=cuts1_file,
                                                          cuts2=cuts2_file)

    scaffold_cmd += ' {args_xml}'.format(args_xml=args_xml_filename)
    run_cmd(scaffold_cmd)

    scaffold_final = glob.glob(
        os.path.join(output_dir, 'TGH_M1', 'AGPExport', '*HYBRID*'))

    if not scaffold_final:
        print("ERROR: No hybrid scaffolds produced.")
        hybrid_scaffold_log = os.path.join(output_dir, 'TGH.log')
        run_cmd('tail -n 50 {0}'.format(hybrid_scaffold_log))

    scaffold_final_ncbi = glob.glob(
        os.path.join(output_dir, 'hybrid_scaffolds*',
                     '*_HYBRID_SCAFFOLD_NCBI.fasta'))
    unscaffolded_final = glob.glob(
        os.path.join(output_dir, 'hybrid_scaffolds*',
                     '*_HYBRID_SCAFFOLD_NOT_SCAFFOLDED.fasta'))
    output = {
        "scaffold_fasta": [
            dxpy.dxlink(dxpy.upload_local_file(f)) for f in scaffold_final
            if f.endswith(".fasta")
        ],
        "scaffold_output":
        [dxpy.dxlink(dxpy.upload_local_file(f)) for f in scaffold_final],
        "ncbi_scaffold_final":
        dx_utils.gzip_and_upload(scaffold_final_ncbi[0]),
        "unscaffolded_final":
        dx_utils.gzip_and_upload(unscaffolded_final[0])
    }

    tar_name = "hybrid_scaffold_output.tar.gz"
    tar_cmd = "tar czvf {tar_name} {outdir}".format(tar_name=tar_name,
                                                    outdir=output_dir)
    run_cmd(tar_cmd)
    output_id = dxpy.upload_local_file(tar_name)

    output["scaffold_targz"] = dxpy.dxlink(output_id)

    return output
Пример #4
0
def main(**job_inputs):
    bionano_cmap_link = job_inputs['refinefinal_merged_cmap']
    ngs_fasta_link = job_inputs['ngs_fasta_or_cmap']
    args_xml_link = job_inputs.get('args_xml')

    # Download all the inputs
    bionano_cmap_filename = download_and_gunzip_file(bionano_cmap_link)
    ngs_fasta_filename = download_and_gunzip_file(ngs_fasta_link)
    if args_xml_link:
        args_xml_filename = download_and_gunzip_file(args_xml_link)
    else:
        args_xml_filename = os.path.join(HYBRID_DIR, 'hybridScaffold_config.xml')

    output_dir = "hybrid_scaffold_output"

    scaffold_cmd = (
        "perl {dir}/hybridScaffold.pl -n {ngs_fasta} -b {cmap} "
        "-o {outdir} -c {args_xml} "
        .format(dir=HYBRID_DIR, ngs_fasta=ngs_fasta_filename, cmap=bionano_cmap_filename,
                args_xml=args_xml_filename, outdir=output_dir))
    scaffold_cmd += '-r {refaligner} '.format(refaligner=os.path.join(TOOLS_DIR, 'RefAligner'))

    if "conflict_resolution_file" in job_inputs:
        conflict_resolution_file = download_and_gunzip_file(job_inputs["conflict_resolution_file"])
        scaffold_cmd += '-M {cr_file} '.format(conflict_resolution_file)
    else:
        scaffold_cmd += '-B {b_level} -N {n_level} '.format(
            b_level=job_inputs["b_conflict_filter"], n_level=job_inputs["n_conflict_filter"])

    if job_inputs["generate_molecules"] is True:
        scaffold_cmd += '-x '
        scaffold_cmd += '-p {0}'.format(SCRIPTS_DIR)

        try:
            molecules_bnx_file = download_and_gunzip_file(job_inputs["molecules_bnx_file"])
            scaffold_cmd += '-m {0} '.format(molecules_bnx_file)

        except KeyError:
            raise dxpy.AppError("Molecules BNX file required for Align Molecules flag (-x)")

        try:
            optargs_xml = download_and_gunzip_file(job_inputs["optargs_xml"])
            scaffold_cmd += '-q {0} '.format(optargs_xml)

        except KeyError:
            raise dxpy.AppError("OptArgs XML file required for Align Molecules flag (-x)")

    if job_inputs["generate_chimeric"] is True:
        scaffold_cmd += '-y '

        if molecules_bnx_file:
            scaffold_cmd += '-m {0} '.format(molecules_bnx_file)

        else:
            try:
                molecules_bnx_file = download_and_gunzip_file(job_inputs["molecules_bnx_file"])
                scaffold_cmd += '-m {0} '.format(molecules_bnx_file)

            except KeyError:
                raise dxpy.AppError("Molecules BNX file required for Generate Molecules flag")

        if "err_files" in job_inputs:
            err_files = [download_and_gunzip_file(err_file) for err_file in job_inputs["err_files"]]
            err_cmd = ' '.join(['-e {0}'.format(err) for err in err_files])
            scaffold_cmd += err_cmd
    run_cmd(scaffold_cmd)

    run_cmd('tree {0}'.format(output_dir))

    scaffold_final_ncbi = glob.glob(
        os.path.join(output_dir, 'hybrid_scaffolds*', '*_HYBRID_SCAFFOLD_NCBI.fasta'))
    unscaffolded_final = glob.glob(
        os.path.join(output_dir, 'hybrid_scaffolds*', '*_HYBRID_SCAFFOLD_NOT_SCAFFOLDED.fasta'))
    scaffold_final = glob.glob(
        os.path.join(output_dir, 'hybrid_scaffolds*', '*_HYBRID_SCAFFOLD.fasta'))
    scaffold_final.extend(glob.glob(
        os.path.join(output_dir, 'hybrid_scaffolds*', '*_HYBRID_SCAFFOLD.cmap')))
    scaffold_final.extend(glob.glob(
        os.path.join(output_dir, 'hybrid_scaffolds*', '*_HYBRID_SCAFFOLD.agp')))
    scaffold_output = glob.glob(
        os.path.join(output_dir, 'hybrid_scaffolds*', '*_HYBRID_SCAFFOLD*'))
    cut_and_conflict = glob.glob(os.path.join(output_dir, 'hybrid_scaffolds*', 'conflicts*.txt'))
    cut_and_conflict.extend(glob.glob(os.path.join(output_dir, 'hybrid_scaffolds*', '*_annotations.bed')))
    output = {"scaffold_final": [dx_utils.gzip_and_upload(f) for f in scaffold_final],
            "scaffold_output": [dx_utils.gzip_and_upload(f) for f in scaffold_output if f not in scaffold_final],
            "cut_and_conflict": [dxpy.dxlink(dxpy.upload_local_file(f)) for f in cut_and_conflict],
            "ncbi_scaffold_final": dx_utils.gzip_and_upload(scaffold_final_ncbi[0]),
            "unscaffolded_final": dx_utils.gzip_and_upload(unscaffolded_final[0])}
    
    tar_name = "hybrid_scaffold_output.tar.gz"
    tar_cmd = "tar czvf {tar_name} {outdir}".format(
        tar_name=tar_name,
        outdir=output_dir)
    dx_utils.run_cmd(tar_cmd)
    output_id = dxpy.upload_local_file(tar_name)
    
    output["scaffold_targz"] = dxpy.dxlink(output_id)
    return output
def main(**job_inputs):
    bionano_cmap_link = job_inputs['refinefinal_merged_cmap']
    ngs_fasta_link = job_inputs['ngs_fasta_or_cmap']
    args_xml_link = job_inputs.get('args_xml')

    # Download all the inputs
    bionano_cmap_filename = dx_utils.download_and_gunzip_file(bionano_cmap_link)
    ngs_fasta_filename = dx_utils.download_and_gunzip_file(ngs_fasta_link)
    if args_xml_link:
        args_xml_filename = dx_utils.download_and_gunzip_file(args_xml_link)
    else:
        args_xml_filename = os.path.join(HYBRID_DIR, 'hybridScaffold_config.xml')

    output_dir = "hybrid_scaffold_output"

    scaffold_cmd = ["perl", os.path.join(HYBRID_DIR, "hybridScaffold.pl"), "-n", ngs_fasta_filename,
                    "-b", bionano_cmap_filename, "-o", output_dir, "-c", args_xml_filename,
                    "-r", os.path.join(TOOLS_DIR, 'RefAligner')]

    if "conflict_resolution_file" in job_inputs:
        conflict_resolution_file = dx_utils.download_and_gunzip_file(job_inputs["conflict_resolution_file"])
        scaffold_cmd += ["-M", conflict_resolution_file]
    else:
        scaffold_cmd += ["-B", str(job_inputs["b_conflict_filter"]), "-N", str(job_inputs["n_conflict_filter"])]

    molecules_bnx_file = None
    if job_inputs["generate_molecules"] is True:
        scaffold_cmd += ["-x", "-p", SCRIPTS_DIR]

        try:
            molecules_bnx_file = dx_utils.download_and_gunzip_file(job_inputs["molecules_bnx_file"])
            scaffold_cmd += ["-m", molecules_bnx_file]

        except KeyError:
            raise dxpy.AppError("Molecules BNX file required for Align Molecules flag (-x)")

        try:
            optargs_xml = dx_utils.download_and_gunzip_file(job_inputs["optargs_xml"])
            scaffold_cmd += ["-q", optargs_xml]

        except KeyError:
            raise dxpy.AppError("OptArgs XML file required for Align Molecules flag (-x)")

    if job_inputs["generate_chimeric"] is True:
        scaffold_cmd += ["-y"]

        if molecules_bnx_file:
            scaffold_cmd += ["-m", molecules_bnx_file]

        else:
            try:
                molecules_bnx_file = dx_utils.download_and_gunzip_file(job_inputs["molecules_bnx_file"])
                scaffold_cmd += ["-m", molecules_bnx_file]

            except KeyError:
                raise dxpy.AppError("Molecules BNX file required for Generate Molecules flag")

        if "err_files" in job_inputs:
            err_files = [dx_utils.download_and_gunzip_file(err_file) for err_file in job_inputs["err_files"]]
            for err in err_files:
                scaffold_cmd += ["-e", err]
    dx_utils.run_cmd(scaffold_cmd)

    dx_utils.run_cmd(["tree", output_dir])

    scaffold_final_ncbi = glob.glob(
        os.path.join(output_dir, 'hybrid_scaffolds', '*_HYBRID_SCAFFOLD_NCBI.fasta'))[0]
    unscaffolded_final = glob.glob(
        os.path.join(output_dir, 'hybrid_scaffolds', '*_HYBRID_SCAFFOLD_NOT_SCAFFOLDED.fasta'))[0]
    scaffold_final = glob.glob(
        os.path.join(output_dir, 'hybrid_scaffolds', '*_HYBRID_SCAFFOLD.fasta'))
    scaffold_final.extend(glob.glob(
        os.path.join(output_dir, 'hybrid_scaffolds', '*_HYBRID_SCAFFOLD.cmap')))
    scaffold_final.extend(glob.glob(
        os.path.join(output_dir, 'hybrid_scaffolds', '*_HYBRID_SCAFFOLD.agp')))

    scaffold_output = glob.glob(os.path.join(output_dir, 'hybrid_scaffolds', '*_HYBRID_SCAFFOLD.xmap'))
    scaffold_output.extend(glob.glob(os.path.join(output_dir, 'hybrid_scaffolds', '*_HYBRID_SCAFFOLD_q.cmap')))
    scaffold_output.extend(glob.glob(os.path.join(output_dir, 'hybrid_scaffolds', '*_HYBRID_SCAFFOLD_r.cmap')))
    scaffold_output = [f for f in scaffold_output if f not in scaffold_final]

    cut_and_conflict = glob.glob(os.path.join(output_dir, 'hybrid_scaffolds*', 'conflicts*.txt'))
    cut_and_conflict.extend(glob.glob(os.path.join(output_dir, 'hybrid_scaffolds*', '*_annotations.bed')))

    # make sure output files don't have colons
    dx_utils.run_cmd(["sed", "-i.bak", "s/:/_/g", scaffold_final_ncbi])
    dx_utils.run_cmd(["sed", "-i.bak", "s/:/_/g", unscaffolded_final])

    # upload outputs
    output = {"scaffold_final": [dx_utils.gzip_and_upload(f) for f in scaffold_final],
            "scaffold_output": [dx_utils.gzip_and_upload(f) for f in scaffold_output],
            "cut_and_conflict": [dxpy.dxlink(dxpy.upload_local_file(f)) for f in cut_and_conflict],
            "ncbi_scaffold_final": dx_utils.gzip_and_upload(scaffold_final_ncbi),
            "unscaffolded_final": dx_utils.gzip_and_upload(unscaffolded_final)}

    tar_name = "hybrid_scaffold_output.tar.gz"
    tar_cmd = "tar czvf {tar_name} {outdir}".format(
        tar_name=tar_name,
        outdir=output_dir)
    dx_utils.run_cmd(tar_cmd)
    output_id = dxpy.upload_local_file(tar_name)
    
    output["scaffold_targz"] = dxpy.dxlink(output_id)
    return output
Пример #6
0
def main(**job_inputs):
    bionano_cmap_1_link = job_inputs['bng_enzyme1']
    bionano_cmap_2_link = job_inputs['bng_enzyme2']
    ngs_fasta_link = job_inputs['ngs_fasta_or_cmap']
    args_xml_link = job_inputs.get('args_xml')

    # Download all the inputs
    bionano_cmap_1_filename = os.path.join(
        '/home/dnanexus', dx_utils.download_and_gunzip_file(bionano_cmap_1_link))
    bionano_cmap_2_filename = os.path.join(
        '/home/dnanexus', dx_utils.download_and_gunzip_file(bionano_cmap_2_link))
    ngs_fasta_filename = os.path.join('/home/dnanexus', dx_utils.download_and_gunzip_file(ngs_fasta_link))

    if args_xml_link:
        args_xml_filename = dx_utils.download_and_gunzip_file(args_xml_link)
    else:
        args_xml_filename = os.path.join(HYBRID_DIR, 'TGH', 'hybridScaffold_two_enzymes.xml')
    output_dir = "hybrid_scaffold_output"

    dx_utils.run_cmd(['mkdir', output_dir])
    results_tar = output_dir + '_results.tar'

    cmd = ["Rscript", os.path.join(HYBRID_DIR, "runTGH.R"), "--help"]
    dx_utils.run_cmd(cmd)

    scaffold_cmd = ["Rscript", os.path.join(HYBRID_DIR, "runTGH.R"), "-N", ngs_fasta_filename,
                    "-b1", bionano_cmap_1_filename, "-b2", bionano_cmap_2_filename, "-O", output_dir,
                    "-R", os.path.join(TOOLS_DIR, 'RefAligner'), "-t", results_tar,
                    "-e1", job_inputs['enzyme1_name'], "-e2", job_inputs['enzyme2_name']]

    if job_inputs.get("cuts1_file") and job_inputs.get("cuts2_file"):
        cuts1_file = dx_utils.download_and_gunzip_file(job_inputs["cuts1_file"])
        cuts2_file = dx_utils.download_and_gunzip_file(job_inputs["cuts2_file"])
        scaffold_cmd += ["-m1", cuts1_file, "-m2", cuts2_file]

    scaffold_cmd += [args_xml_filename]
    dx_utils.run_cmd(scaffold_cmd)

    # try locating the outputs
    final_dirs = ["TGH_M2", "TGH_M1",  "two_enzyme_hybrid_scaffold_M2", "two_enzyme_hybrid_scaffold_M1"]
    for possible_loc in final_dirs:
        scaffold_final = glob.glob(os.path.join(output_dir, possible_loc, 'AGPExport', '*HYBRID_Export.fasta'))

        if scaffold_final:
            scaffold_final_ncbi = glob.glob(
                os.path.join(output_dir, possible_loc, 'AGPExport', '*HYBRID_Export_NCBI.fasta'))[0]
            unscaffolded_final = glob.glob(
                os.path.join(output_dir, possible_loc, 'AGPExport', '*HYBRID_Export_NOT_SCAFFOLDED.fasta'))[0]

            scaffold_output = glob.glob(os.path.join(output_dir, possible_loc, '*_HYBRID_Export.agp'))
            scaffold_output.extend(glob.glob(os.path.join(output_dir, possible_loc, '*_HYBRID_Export.xmap')))
            scaffold_output.extend(glob.glob(os.path.join(output_dir, possible_loc, '*_HYBRID_Export_q.cmap')))
            scaffold_output.extend(glob.glob(os.path.join(output_dir, possible_loc, '*_HYBRID_Export_r.cmap')))
            scaffold_output = [f for f in scaffold_output if f not in scaffold_final]
            break

    # if still not found, something went wrong
    if not scaffold_final:
        hybrid_scaffold_log = os.path.join(output_dir, 'TGH.log')
        dx_utils.run_cmd(["tail", "-n", "50", hybrid_scaffold_log])
        raise dxpy.AppError("ERROR: No hybrid scaffolds produced.")

    # make sure output files don't have colons
    dx_utils.run_cmd(["sed", "-i.bak", "s/:/_/g", scaffold_final_ncbi])
    dx_utils.run_cmd(["sed", "-i.bak", "s/:/_/g", unscaffolded_final])

    output = {
        "scaffold_fasta": [dxpy.dxlink(dxpy.upload_local_file(f)) for f in scaffold_final if f.endswith(".fasta")],
        "scaffold_output": [dxpy.dxlink(dxpy.upload_local_file(f)) for f in scaffold_output],
        "ncbi_scaffold_final": dx_utils.gzip_and_upload(scaffold_final_ncbi),
        "unscaffolded_final": dx_utils.gzip_and_upload(unscaffolded_final)
        }
    
    tar_name = "hybrid_scaffold_output.tar.gz"
    tar_cmd = "tar czvf {tar_name} {outdir}".format(
        tar_name=tar_name,
        outdir=output_dir)
    dx_utils.run_cmd(tar_cmd)
    output_id = dxpy.upload_local_file(tar_name)

    output["scaffold_targz"] = dxpy.dxlink(output_id)

    return output