def align(self, input_): if self.clustalw_exe is None: raise InitError("clustalw executable is not set") input_ = self._fix_input(input_) input_path = tempfile.mktemp() output_path = tempfile.mktemp() write_fasta(input_path, input_) cmd = [self.clustalw_exe, '-TYPE=PROTEIN', '-OUTPUT=FASTA', '-PWMATRIX=BLOSUM', '-OUTFILE=%s' % output_path, '-INFILE=%s' % input_path] try: p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) p.wait() if p.returncode != 0: raise RuntimeError("%s for %s" % (p.stderr.read().decode('ascii'), str(input_))) return Alignment(self._fix_output(parse_fasta(output_path))) finally: for path in [input_path, output_path]: if os.path.isfile(path): os.remove(path)
def align(self, input_): if self.clustalw_exe is None: raise InitError("clustalw executable is not set") input_path = tempfile.mktemp() output_path = tempfile.mktemp() write_fasta(input_path, input_) cmd = [self.clustalw_exe, '-TYPE=PROTEIN', '-OUTPUT=FASTA', '-PWMATRIX=BLOSUM', '-OUTFILE=%s' % output_path, '-INFILE=%s' % input_path] try: p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) p.wait() if p.returncode != 0: raise RuntimeError("%s for %s" % (p.stderr.read().decode('ascii'), str(input_))) return Alignment(parse_fasta(output_path)) finally: for path in [input_path, output_path]: if os.path.isfile(path): os.remove(path)
def align(self, template_sequence, template_secstr, target_sequence, gap_open=-13.0, gap_extend=-0.4, modifier=3.0): _log.debug("kmad align\n{}\n{}\n{}".format(template_sequence, template_secstr, target_sequence)) # Prevent kmad from adding insertions in bulges, replace those. template_secstr = self._remove_bulges(template_secstr, 'H', 3) template_secstr = self._remove_bulges(template_secstr, 'E', 3) if len(template_sequence) <= 0: raise ValueError("empty template sequence") if len(template_sequence) != len(template_secstr): raise ValueError( "template sequence ({}) has different length than secondary structure ({})" .format(len(template_sequence), len(template_secstr))) kmad_template_sequence = self._to_kmad_sequence( template_sequence, template_secstr) kmad_target_sequence = self._to_kmad_sequence(target_sequence) input_path = tempfile.mktemp() output_path = tempfile.mktemp() write_fasta(input_path, { 'target': kmad_target_sequence, 'template': kmad_template_sequence }) try: self._run_kmad(input_path, output_path, gap_open, gap_extend, modifier) output_path += '_al' aligned = parse_fasta(output_path) _log.debug("kmad aligned\n{}\n{}".format(aligned['target'], aligned['template'])) finally: for path in [input_path, output_path]: if os.path.isfile(path): os.remove(path) alignment = TargetTemplateAlignment(aligned['target'], aligned['template']) return alignment
def align(self, template_sequence, template_secstr, target_sequence, gap_open=-13.0, gap_extend=-0.4, modifier=3.0): _log.debug("kmad align\n{}\n{}\n{}".format(template_sequence, template_secstr, target_sequence)) # Prevent kmad from adding insertions in bulges, replace those. template_secstr = self._remove_bulges(template_secstr, 'H', 3) template_secstr = self._remove_bulges(template_secstr, 'E', 3) if len(template_sequence) <= 0: raise ValueError("empty template sequence") if len(template_sequence) != len(template_secstr): raise ValueError("template sequence ({}) has different length than secondary structure ({})" .format(len(template_sequence), len(template_secstr))) kmad_template_sequence = self._to_kmad_sequence(template_sequence, template_secstr) kmad_target_sequence = self._to_kmad_sequence(target_sequence) input_path = tempfile.mktemp() output_path = tempfile.mktemp() write_fasta(input_path, {'target': kmad_target_sequence, 'template': kmad_template_sequence}) try: self._run_kmad(input_path, output_path, gap_open, gap_extend, modifier) output_path += '_al' aligned = parse_fasta(output_path) _log.debug("kmad aligned\n{}\n{}".format(aligned['target'], aligned['template'])) finally: for path in [input_path, output_path]: if os.path.isfile(path): os.remove(path) alignment = TargetTemplateAlignment(aligned['target'], aligned['template']) return alignment
args = arg_parser.parse_args() tmp_dir = tempfile.mkdtemp() model_storage.model_dir = tmp_dir final_output_dir = settings.MODEL_DIR if args.output_dir: final_output_dir = args.output_dir if not os.path.isdir(final_output_dir): raise ValueError("Not a directory: {}".format(final_output_dir)) try: sequence = list(parse_fasta(args.fasta).values())[0] species_id = args.species.upper() if args.template: pdbid, chain_id = args.template.split('_') template_id = TemplateID(pdbid, chain_id) else: template_id = None domain_alignments = domain_aligner.get_domain_alignments(sequence, args.position, template_id) _log.info("{} domain alignments".format(len(domain_alignments))) ts = [ModelThread(sequence, species_id, ali, final_output_dir) for ali in domain_alignments] for t in ts: t.start()
def pick_random_sequences(n): sprot_sequences = parse_fasta(SPROT_FASTA) keys = random.sample(sprot_sequences.keys(), n) return {key: sprot_sequences[key] for key in keys}
args = arg_parser.parse_args() tmp_dir = tempfile.mkdtemp() model_storage.model_dir = tmp_dir final_output_dir = settings.MODEL_DIR if args.output_dir: final_output_dir = args.output_dir if not os.path.isdir(final_output_dir): raise ValueError("Not a directory: {}".format(final_output_dir)) try: sequence = parse_fasta(args.fasta).values()[0] species_id = args.species.upper() if args.template: pdbid, chain_id = args.template.split('_') template_id = TemplateID(pdbid, chain_id) else: template_id = None domain_alignments = domain_aligner.get_domain_alignments(sequence, args.position, template_id) _log.info("{} domain alignments".format(len(domain_alignments))) ts = [ModelThread(sequence, species_id, ali, final_output_dir) for ali in domain_alignments] for t in ts: t.start()
'--template', help="underscore separated template pdbid and chain") args = arg_parser.parse_args() tmp_dir = tempfile.mkdtemp() model_storage.model_dir = tmp_dir final_output_dir = settings.MODEL_DIR if args.output_dir: final_output_dir = args.output_dir if not os.path.isdir(final_output_dir): raise ValueError("Not a directory: {}".format(final_output_dir)) try: sequence = list(parse_fasta(args.fasta).values())[0] species_id = args.species.upper() if args.template: pdbid, chain_id = args.template.split('_') template_id = TemplateID(pdbid, chain_id) else: template_id = None domain_alignments = domain_aligner.get_domain_alignments( sequence, args.position, template_id) _log.info("{} domain alignments".format(len(domain_alignments))) ts = [ ModelThread(sequence, species_id, ali, final_output_dir)
def pick_random_sequences(n): sprot_sequences = parse_fasta(SPROT_FASTA) keys = random.sample(sprot_sequences.keys(), n) return {key:sprot_sequences[key] for key in keys}