def run(self):
        ''' Run pacbio_smrtanalysis RS_sequencing'''
        original_dir = os.getcwd()
        os.chdir(self.working_directory)
        no_bsub_option = "--no_bsub" if self.no_bsub else ""
        # 		pacbio_smrtanalysis --memory 6 --reference /path/to/reference.fa RS_Resequencing Outputdir *.bax.h5
        command = " ".join([
            self.pacbio_exec, "--memory 6", no_bsub_option, "--reference",
            self.input_file, "RS_Resequencing", self.output_directory,
            self.read_data + "/*.bax.h5"
        ])

        if (os.path.getsize(self.input_file)):
            fastaqutils.syscall(command)
            #			subprocess.call(command, shell=True)
            self._produce_summary(command)
        else:
            self._produce_summary("File empty: " + self.input_file)

        # Wait for bsub to finish. Look into using process.wait() here
# 		while not os.path.exists(self._build_default_filename()):
# 			time.sleep(30)
#
# 		# Move results file and produce summary
# 		shutil.move(self._build_default_filename(), self._build_final_filename())

# 		We have abandoned this effort of trying to wait for the bsub quiver job to finish
# 		When run in the pipeline, we can carry out this step as a separate task
# 		When run on the command line the user will just have to live with the quiver output (consensus.fasta)

# Clean up quiver directory
# 		if not self.debug and os.path.exists(os.path.join(self.working_directory, self.output_directory)):
# 			shutil.rmtree(os.path.join(self.working_directory, self.output_directory))

        os.chdir(original_dir)
Exemplo n.º 2
0
    def test_system_call(self):
        '''Test that system call appears to work and die as it should'''
        test_file = os.path.join(data_dir, 'utils_test_system_call.txt')
        tmp_out = 'utils_test_syscall.tmp'
        utils.syscall('cat ' + test_file + ' > ' + tmp_out)
        self.assertTrue(filecmp.cmp(tmp_out, test_file))
        os.unlink(tmp_out)

        with self.assertRaises(utils.Error):
            utils.syscall('thisisveryunlikelytoebarealcommandandshouldthrowerror')

        utils.syscall('echo "this is not the right string" > ' + tmp_out)
        self.assertFalse(filecmp.cmp(tmp_out, test_file))
        os.unlink(tmp_out)

        s = utils.syscall_get_stdout('echo bingo')
        self.assertListEqual(["bingo"], s)
        with self.assertRaises(utils.Error):
            utils.syscall_get_stdout('thisisveryunlikelytoebarealcommandandshouldthrowerror')
Exemplo n.º 3
0
    def test_system_call(self):
        '''Test that system call appears to work and die as it should'''
        test_file = os.path.join(data_dir, 'utils_test_system_call.txt')
        tmp_out = 'utils_test_syscall.tmp'
        utils.syscall('cat ' + test_file + ' > ' + tmp_out)
        self.assertTrue(filecmp.cmp(tmp_out, test_file))
        os.unlink(tmp_out)

        with self.assertRaises(utils.Error):
            utils.syscall(
                'thisisveryunlikelytoebarealcommandandshouldthrowerror')

        utils.syscall('echo "this is not the right string" > ' + tmp_out)
        self.assertFalse(filecmp.cmp(tmp_out, test_file))
        os.unlink(tmp_out)

        s = utils.syscall_get_stdout('echo bingo')
        self.assertListEqual(["bingo"], s)
        with self.assertRaises(utils.Error):
            utils.syscall_get_stdout(
                'thisisveryunlikelytoebarealcommandandshouldthrowerror')
                    help='Name of reference fasta file',
                    metavar='reference.fa')
parser.add_argument('outprefix', help='Prefix of output files')
options = parser.parse_args()

ref_seqs = {}
tasks.file_to_dict(options.ref_fa, ref_seqs)

nucmer_out_prefix = options.outprefix + '.nucmer'
nucmer_out_delta = nucmer_out_prefix + '.delta'
nucmer_out_filter = nucmer_out_prefix + '.delta-filter'
nucmer_out_coords = nucmer_out_filter + '.coords'

# run nucmer of contigs vs ref
utils.syscall(' '.join([
    'nucmer', options.nucmer_options, '-p', nucmer_out_prefix, options.ref_fa,
    options.contigs_fa
]))
utils.syscall(' '.join([
    'delta-filter', '-i 98 -l 180 -q', nucmer_out_delta, '>', nucmer_out_filter
]))
utils.syscall(' '.join(
    ['show-coords', '-dTlro', nucmer_out_filter, '>', nucmer_out_coords]))

# load hits into hash. key=ref_name, value=another hash with key=qry_name, value=list of hit positions in that ref seq
nucmer_hits = {}
contigs_to_print = {}

nucmer_reader = nucmer_file_reader(nucmer_out_coords)

for hit in nucmer_reader:
    if hit.ref_name not in nucmer_hits:
Exemplo n.º 5
0
            if (not sam1.is_reverse) and (not sam2.is_reverse):
                nodes[sam2.qname].add(sam1.qname)
            elif sam1.is_reverse and sam2.is_reverse:
                nodes[sam1.qname].add(sam2.qname)
            else:
                nodes[sam1.qname].add(sam2.qname)
                nodes[sam2.qname].add(sam1.qname)

# make pdf of the graph using graphviz
cmd = 'echo "digraph G {'
first = True

for node, l in sorted(nodes.items()):
    if first:
        first = False
    else:
        cmd += ';'

    if len(l):
        cmd += ';'.join([node + '->' + x for x in l])
    else:
        cmd += node

cmd += '}"  | dot -Tpdf > ' + options.outprefix + '.pdf'

make_graph = options.outprefix + '.make_graph.sh'
f = utils.open_file_write(make_graph)
print(cmd, file=f)
utils.close(f)
utils.syscall('bash ' + make_graph)
parser.add_argument("--nucmer_options", help="Options when running nucmer [%(default)s]", default="")
parser.add_argument("contigs_fa", help="Name of contigs fasta file", metavar="contigs.fa")
parser.add_argument("ref_fa", help="Name of reference fasta file", metavar="reference.fa")
parser.add_argument("outprefix", help="Prefix of output files")
options = parser.parse_args()

ref_seqs = {}
tasks.file_to_dict(options.ref_fa, ref_seqs)

nucmer_out_prefix = options.outprefix + ".nucmer"
nucmer_out_delta = nucmer_out_prefix + ".delta"
nucmer_out_filter = nucmer_out_prefix + ".delta-filter"
nucmer_out_coords = nucmer_out_filter + ".coords"

# run nucmer of contigs vs ref
utils.syscall(" ".join(["nucmer", options.nucmer_options, "-p", nucmer_out_prefix, options.ref_fa, options.contigs_fa]))
utils.syscall(" ".join(["delta-filter", "-i 98 -l 180 -q", nucmer_out_delta, ">", nucmer_out_filter]))
utils.syscall(" ".join(["show-coords", "-dTlro", nucmer_out_filter, ">", nucmer_out_coords]))

# load hits into hash. key=ref_name, value=another hash with key=qry_name, value=list of hit positions in that ref seq
nucmer_hits = {}
contigs_to_print = {}

nucmer_reader = nucmer_file_reader(nucmer_out_coords)

for hit in nucmer_reader:
    if hit.ref_name not in nucmer_hits:
        nucmer_hits[hit.ref_name] = {}

    if hit.qry_name not in nucmer_hits[hit.ref_name]:
        nucmer_hits[hit.ref_name][hit.qry_name] = []
            if (not sam1.is_reverse) and (not sam2.is_reverse):
                nodes[sam2.qname].add(sam1.qname)
            elif sam1.is_reverse and sam2.is_reverse:
                nodes[sam1.qname].add(sam2.qname)
            else:
                nodes[sam1.qname].add(sam2.qname)
                nodes[sam2.qname].add(sam1.qname)

# make pdf of the graph using graphviz
cmd = 'echo "digraph G {'
first = True

for node, l in sorted(nodes.items()):
    if first:
        first = False
    else:
        cmd += ";"

    if len(l):
        cmd += ";".join([node + "->" + x for x in l])
    else:
        cmd += node

cmd += '}"  | dot -Tpdf > ' + options.outprefix + ".pdf"

make_graph = options.outprefix + ".make_graph.sh"
f = utils.open_file_write(make_graph)
print(cmd, file=f)
utils.close(f)
utils.syscall("bash " + make_graph)