Пример #1
0
	def _build_profile(self, msa: str, model_construction: str='fast') -> str:
		if model_construction not in {'hand', 'fast'}:
			raise ValueError(f'HMMBuild: invalid model construction {model_construction}')
		with utils.tmpdir_manager() as query_tmp_dir:
			input_query = query_tmp_dir / Path('query.msa')
			output_hmm_path = query_tmp_dir / Path('output.hmm')
			with open(input_query, 'w') as f:
				f.write(msa)

			cmd = [self.binary_path.as_posix()]
			if model_construction == 'hand':
				cmd += [f'--{model_construction}']
			if singlemx:
				cmd += ['--singlemx']
			cmd += ['--amino', output_hmm_path.as_posix(), input_query.as_posix()]

			print(f"Launching subprocess {''.join(cmd)}")
			process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
			with utils.timing(f'HMMBuild query'):
				stdout, stderr = process.communicate()
				retcode = process.wait()
			if retcode:
				raise RuntimeError(f"HMMBuild failed:\nstdout:\n{stdout.decode('utf-8')}\nstderr:\n{stderr.decode('utf-8')}")
			with open(output_hmm_path, encoding='utf-8') as f:
				hmm = f.read()
		return hmm
Пример #2
0
	def query(self, a3m: str) -> str:
		with utils.tmpdir_manager() as query_tmp_dir:
			input_path = query_tmp_dir / Path('query.a3m')
			hhr_path = query_tmp_dir / Path('output.hhr')
			with open(input_path, 'w') as f:
				f.write(a3m)

			db_cmd = []
			for db_path in self.databases:
				db_cmd += ['-d', db_path.as_posix()]
			cmd = [
				self.binary_path.as_posix(),
				'-i', input_path.as_posix(),
				'-o', hhr_path.as_posix(),
				'-maxseq', str(self.maxseq)
			] + db_cmd

			print(f'Launching subprocess {"".join(cmd)}')
			process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
			with utils.timing(f'HHSearch query'):
				stdout, stderr = process.communicate()
				retcode = process.wait()
			if retcode:
				raise RuntimeError(f'HHSearch failed:\nstdout:\n{stdout.decode("utf-8")}\nstderr:\n{stderr[:100000].decode("utf-8")}')

			with open(hhr_path) as f:
				hhr = f.read()
				
		return hhr
Пример #3
0
	def _query_chunk(self, input_fasta_path:Path, database_path:Path) -> Mapping[str, Any]:
		with utils.tmpdir_manager() as query_tmp_dir:
			sto_path = query_tmp_dir / Path('output.sto')
			cmd_flags = [
				'-o', '/dev/null',
				'-A', sto_path.as_posix(),
				'--noali',
				'--F1', str(self.filter_f1),
				'--F2', str(self.filter_f2),
				'--F3', str(self.filter_f3),
				'--incE', str(self.e_value),
				'-E', str(self.e_value),
				'--cpu', str(self.n_cpu),
				'-N', str(self.n_iter)
			]

			if self.get_tblout:
				tblout_path = query_tmp_dir / Path('tblout.txt')
				cmdflags.extend(['-tblout', tblout_path.as_posix()])

			if not (self.z_value is None):
				cmdflags.extend(['-Z', str(self.z_value)])

			if not (self.dom_e is None):
				cmdflags.extend(['--domE', str(self.dom_e)])
			
			if not (self.incdom_e is None):
				cmdflags.extend(['--incdomE', str(self.incdom_e)])

			cmd = [self.binary_path.as_posix()] + cmd_flags + [input_fasta_path.as_posix(), database_path.as_posix()]
			print(f"Launching subprocess {''.join(cmd)}")
			process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
			with utils.timing(f'Jackhammer {database_path.name} query'):
				_, stderr = process.communicate()
				retcode = process.wait()
			if retcode:
				raise RuntimeError(f"Jackhammer failed: {stderr.decode('utf-8')}")
			
			tbl=''
			if self.get_tblout:
				with open(tblout_path) as f:
					tbl = f.read()
			
			with open(sto_path) as f:
				sto = f.read()

			raw_output = dict(
				sto = sto,
				tbl = tbl,
				stderr = stderr,
				n_iter = self.n_iter,
				e_value = self.e_value
			)
			return raw_output
Пример #4
0
    def query(self, input_fasta_path: Path) -> Mapping[str, Any]:
        with utils.tmpdir_manager() as query_tmp_dir:
            a3m_path = query_tmp_dir / Path('output.a3m')

            db_cmd = []
            for db_path in self.databases:
                db_cmd += ['-d', db_path.as_posix()]

            cmd = [
                self.binary_path, '-i',
                input_fasta_path.as_posix(), '-cpu',
                str(self.n_cpu), '-oa3m',
                a3m_path.as_posix(), '-o', '/dev/null', '-n',
                str(self.n_iter), '-e',
                str(self.e_value), '-maxseq',
                str(self.maxseq), '-realign_max',
                str(self.realign_max), '-maxfilt',
                str(self.maxfilt), '-min_prefilter_hits',
                str(self.min_prefilter_hits)
            ]
            if self.all_seqs:
                cmd += ['-all']
            if self.alt:
                cmd += ['-alt', str(self.alt)]
            if self.p != HHBlits._DEFAULT_P:
                cmd += ['-P', str(self.p)]
            if self.z != HHBlits._DEFAULT_Z:
                cmd += ['-Z', str(self.z)]
            cmd += db_cmd

            print(f'Launching subprocess {"".join(cmd)}')
            process = subprocess.Popen(cmd,
                                       stdout=subprocess.PIPE,
                                       stderr=subprocess.PIPE)
            with utils.timing(f'HHBlits query'):
                stdout, stderr = process.communicate()
                retcode = process.wait()
            if retcode:
                raise RuntimeError(
                    f"HHBlits failed:\nstdout:\n{stdout.decode('utf-8')}\nstderr:\n{stderr[:500000].decode('utf-8')}"
                )

            with open(a3m_path) as f:
                a3m = f.read()

        raw_output = dict(a3m=a3m,
                          output=stdout,
                          stderr=stderr,
                          n_iter=self.n_iter,
                          e_value=self.e_value)
        return raw_output
Пример #5
0
    def align(self, sequences: Sequence[str]) -> str:
        def _to_a3m(sequences: Sequence[str]) -> str:
            names = [f'sequence {i}' for i in range(1, len(sequences) + 1)]
            a3m = []
            for sequence, name in zip(sequences, names):
                a3m.append(u'>' + name + u'\n')
                a3m.append(sequence + u'\n')
            return ''.join(a3m)

        print(f'Aligning {len(sequences)} sequences')

        for seq in sequences:
            if len(seq) < 6:
                raise ValueError(
                    f'Kalign: sequences should be at least 6 res long, got {seq}, {len(seq)}'
                )
        with utils.tmpdir_manager() as query_tmp_dir:
            input_fasta_path = tmpdir_manager / Path('input.fasta')
            output_a3m_path = tmpdir_manager / Path('output.a3m')

            with open(input_fasta_path, 'w') as f:
                f.write(_to_a3m(sequences))

            cmd = [
                self.binary_path.as_posix(), '-i',
                input_fasta_path.as_posix(), '-o',
                output_a3m_path.as_posix(), '-format', 'fasta'
            ]

            print(f"Launching subprocess {''.join(cmd)}")
            process = subprocess.Popen(cmd,
                                       stdout=subprocess.PIPE,
                                       stderr=subprocess.PIPE)
            with utils.timing(f'Kalign query'):
                stdout, stderr = process.communicate()
                retcode = process.wait()
            if retcode:
                raise RuntimeError(
                    f"Kalign failed:\nstdout:\n{stdout.decode('utf-8')}\nstderr:\n{stderr.decode('utf-8')}"
                )

            with open(output_a3m_path) as f:
                a3m = f.read()

        return a3m
Пример #6
0
    def query(self, hmm: str) -> str:
        with utils.tmpdir_manager() as query_tmp_dir:
            hmm_input_path = query_tmp_dir / Path('query.hmm')
            output_a3m_path = query_tmp_dir / Path('output.a3m')
            with open(hmm_input_path, 'w') as f:
                f.write(hmm)

            cmd = [
                self.binary_path.as_posix(), '--noali', '--cpu',
                str(self.n_cpu)
            ]

            if self.flags:
                cmd += self.flags

            cmd += [
                '-A',
                output_a3m_path.as_posix(),
                hmm_input_path.as_posix(), self.database_path
            ]

            print(f"Launching subprocess {''.join(cmd)}")
            process = subprocess.Popen(cmd,
                                       stdout=subprocess.PIPE,
                                       stderr=subprocess.PIPE)
            with utils.timing(f'HMMSearch query'):
                stdout, stderr = process.communicate()
                retcode = process.wait()
            if retcode:
                raise RuntimeError(
                    f"HMMSearch failed:\nstdout:\n{stdout.decode('utf-8')}\nstderr:\n{stderr.decode('utf-8')}"
                )

            with open(output_a3m_path) as f:
                a3m = f.read()

        return a3m