def __init__(self, base_dir, *, ff_tmpl='__SFAM__-ff-__FF_NUM__.sto'): self.base_dir = base_dir if re.search(r'[^a-zA-Z_.\-*]', ff_tmpl): raise err.InvalidInputError( "ff_tmpl contains unexpected characters") self.ff_tmpl = ff_tmpl
def search_by_domain_id(self, domain_id): """Return the filename of the FunFam alignment containing the domain id.""" if not is_valid_domain_id(domain_id): raise err.InvalidInputError('{} is not a valid domain id'.format( repr(domain_id))) # replace template placeholders with '*' glob_path = re.sub(r'__([A-Z_]+)__', '*', self.ff_tmpl) grep_args = (self.grep_path, '--include', glob_path, '-l', '^' + domain_id, '-R', self.base_dir) LOG.debug("search_by_domain_id: sys: " + " ".join(grep_args)) try: # note: this returns bytes (not strings) grep_out = subprocess.check_output(grep_args).decode('ascii') except subprocess.CalledProcessError as e: if e.returncode == 1: # grep telling us it didn't find any matches raise err.NoMatchesError( 'failed to find domain id {} with cmd {}'.format( domain_id, str(e.cmd))) else: LOG.error( 'CMD: {}\nCODE: {}\nOUTPUT: {}\nSTDERR: "{}"\nSTDOUT: "{}"\n' .format(e.cmd, e.returncode, e.output, e.stderr, e.stdout)) raise except: raise FileNotFoundError( "Encountered error trying to find domain_id '{}' (grep: `{}`)". format(domain_id, " ".join(grep_args))) ff_files = grep_out.splitlines() if len(ff_files) == 0: raise FileNotFoundError( "Failed to find FunFam alignment for domain_id '{}' (grep: `{}`)" .format(domain_id, " ".join(grep_args))) elif len(ff_files) > 1: raise err.GeneralError( "Found more than one FunFam file ({}) containing the domain id '{}' (grep: `{}`):\n{}\n" .format( len(ff_files), domain_id, " ".join(grep_args), "\n".join(ff_files), )) LOG.debug("search_by_domain_id: found funfam alignment {}".format( repr(ff_files[0]))) return ff_files[0]
def get_by_id(cls, aa_str): """Return the AminoAcid object by the given single character aa code.""" aa_str = str(aa_str) aa_obj = None if len(aa_str) == 1: aa_obj = cls._aa_by_one[aa_str.upper()] elif len(aa_str) == 3: aa_obj = cls._aa_by_three[aa_str.upper()] else: raise err.InvalidInputError( "expected either 1- or 3-character amino acid id (not: '{}')". format(aa_str)) return aa_obj
def run_alignment(self, alignment, *, column_gap=None, group_gap=None, mclachlan=False): """Runs `groupsim` against a given alignment.""" # mclachan max score is 5: normalise to 0-1 before storing maxscore = 5 if mclachlan else 1 fasta_tmp = tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix=".fa") fasta_tmp_filename = fasta_tmp.name if not column_gap: column_gap = self.column_gap if not group_gap: group_gap = self.group_gap column_gap = float(column_gap) group_gap = float(group_gap) assert (column_gap > 0 and column_gap < 1) assert (group_gap > 0 and group_gap < 1) # write out the alignment with funfam numbers appended to sequence ids # >1ebgB02/127-436|7431 aln_copy = alignment.copy() for seq in aln_copy.seqs: if not seq.cluster_id: raise err.InvalidInputError(( "need to set_cluster_id() on alignment sequences before running groupsim: {}" ).format(seq.__dict__)) seq.set_uid('{}|{}'.format(seq.uid, str(seq.cluster_id))) source_ids = {s.cluster_id for s in aln_copy.seqs} # lower-case aa -> gaps # '.' -> '-' for s in aln_copy.seqs: s.set_all_gap_chars(gap_char='-') s.set_lower_case_to_gap(gap_char='-') aln_copy.write_fasta(fasta_tmp_filename) groupsim_args = [ self.python2path, self.groupsim_path, '-c', str(self.column_gap), '-g', str(self.group_gap) ] if mclachlan: groupsim_args.extend(['-m', self.mclachlan_path]) groupsim_args.append(fasta_tmp_filename) groupsim_args.extend(source_ids) groupsim_args = [str(a) for a in groupsim_args] LOG.debug("running groupsim: sys: %s", " ".join(groupsim_args)) try: p = Popen(groupsim_args, stdout=PIPE, stderr=PIPE, universal_newlines=True) groupsim_out, _ = p.communicate() except CalledProcessError as e: LOG.error( 'CMD: %s\nCODE: %s\nOUTPUT: %s\nSTDERR: "%s"\nSTDOUT: "%s"\n', e.cmd, e.returncode, e.output, e.stderr, e.stdout) raise e except: raise FileNotFoundError( "Encountered unexpected error running GroupSim: `{}`".format( " ".join(groupsim_args))) gs_io = io.StringIO(groupsim_out) res = GroupsimResult.from_io(gs_io, maxscore=maxscore) return res