def test_makeheader_1(self): tags = [ 'MAINID', '1', {'A', 'B', 'C'}, 'MADEUP', 'A made up protein information' ] expected_header = '>crops|MAINID_1|Chains A,B,C|Source: MADEUP|A made up protein information' new_header = cit.makeheader(mainid=tags[0], seqid=tags[1], chains=tags[2], source=tags[3], extrainfo=tags[4]) self.assertEqual(new_header, expected_header)
def update_cropsheader(self): """Update `cropsheader`. Useful after updating any information from the sequence.""" if self.oligomer_id is None: tag1 = 'NoID' else: tag1 = self.oligomer_id tag2 = self.infostring if self.ncrops() == 0 and 'cropseq' not in self.seqs: pass else: if tag2[-1] != "|": tag2 += '|' tag2 += self.cropinfo() self.crops_header = makeheader(mainid=tag1, seqid=self.name, chains=self.chains, source=self.source, extrainfo=tag2)
def __repr__(self): chtype = self.biotype if self.biotype is not None else 'Undefined' if 'mainseq' not in self.seqs: logging.critical("'mainseq' sequence not found.") raise ValueError if len(self.seqs['mainseq']) <= 20: showseq = self.seqs['mainseq'] else: showseq = (self.seqs['mainseq'][:10] + '[...]' + self.seqs['mainseq'][len(self.seqs['mainseq']) - 10:]) tempolig = self.oligomer_id if self.oligomer_id is not None else 'NOID' shortid = makeheader(mainid=tempolig, seqid=self.name, chains=self.chains, short=True) string = (self._kind + " object " + shortid + " (seq=" + str(showseq) + ", type=" + chtype + ", length=" + str(len(self.seqs['mainseq'])) + ")") return string
def dumpmap(self, out, split=False): """Write header and cropmap to a file. If file exists, output is appended. :param out: An output filepath (str) or an open file. :type out: str, file :param backmap: If True, the output will be self.cropbackmap, defaults to False. :type backmap: bool, optional :param split: If True, identical maps are dumped for every chain, defaults to False. :type split: bool, optional :raises `TypeError`: If `out` is neither a string nor an open file. :raises `ValueError`: If one or both of `cropmap` and `cropbackmap` are empty. :raises `KeyError`: If object contains no chains. """ if not isinstance(out, str) and not isinstance(out, io.IOBase): logging.critical("Argument 'out' should be a string or a file.") raise TypeError if self.cropmap is None: stringerr = "Cropmap not found in sequence." logging.critical(stringerr) raise ValueError if (self.chains is None or (isinstance(self.chains, set) and len(self.chains) == 0)): logging.critical('No chains defined in sequence.') raise KeyError outheader = [] if split: chset = [] for ch in self.chains: chset.append({ch}) else: chset = [self.chains] if self.oligomer_id is None: tag1 = 'NoID' else: tag1 = self.oligomer_id tag2 = self.infostring if self.ncrops() == 0 and 'cropseq' not in self.seqs: pass else: if self.infostring[-1] != "|": tag2 += '|' tag2 += self.cropinfo() for ch in chset: outheader.append( makeheader(mainid=tag1, seqid=self.name, chains=ch, source=self.source, extrainfo=tag2)) output = '' for header in outheader: if isinstance(out, io.IOBase): out.write(header + '\n') for key, value in self.cropmap.items(): if value is not None: out.write(str(key) + ' ' + str(value) + '\n') else: out.write(str(key) + ' 0\n') else: output += header + os.linesep for key, value in self.cropmap.items(): if value is not None: output += str(key) + ' ' + str(value) + os.linesep else: output += str(key) + ' 0' + os.linesep if isinstance(out, io.IOBase) is False: if out.lower() == 'string': return output else: outpath = out op = 'a' if os.path.isfile(outpath) else 'w' with open(outpath, op) as out: out.write(output) return
def dump(self, out, split=False, oneline=False): """Write header and main sequence to a file. If the file exists, output is appended. :param out: An output filepath (str), 'string', or an open file. :type out: str, file :param split: If True, identical sequences are dumped for every chain, defaults to False. :type split: bool, optional :param oneline: If True, sequences are not split in 80 residue-lines, defaults to False. :type oneline: bool, optional :raises `TypeError`: If `out` is neither a string nor an open file. :raises `KeyError`: If object contains no chains. :return: A string containing the output if and only if out=='string'. :rtype: str """ if not isinstance(out, str) and not isinstance(out, io.IOBase): logging.critical("Argument 'out' should be a string or a file.") raise TypeError if (self.chains is None or (isinstance(self.chains, set) and len(self.chains) == 0)): logging.critical('No chains defined in sequence.') raise KeyError outheader = [] if split: chset = [] for ch in self.chains: chset.append({ch}) else: chset = [self.chains] if self.oligomer_id is None: tag1 = 'NoID' else: tag1 = self.oligomer_id tag2 = self.infostring if self.ncrops() == 0 and 'cropseq' not in self.seqs: pass else: if self.infostring[-1] != "|": tag2 += '|' tag2 += self.cropinfo() for ch in chset: outheader.append( makeheader(mainid=tag1, seqid=self.name, chains=ch, source=self.source, extrainfo=tag2)) if not oneline: lenseq = len(self.seqs['mainseq']) nlines = int((lenseq - 1) / 80) + 1 output = '' for header in outheader: if isinstance(out, io.IOBase) is True: out.write(header + '\n') if oneline: out.write(self.seqs['mainseq'] + '\n') else: for n in range(nlines): out.write(self.seqs['mainseq'][n * 80:(n + 1) * 80] + '\n') else: output += header + os.linesep if oneline: output += self.seqs['mainseq'] + os.linesep else: for n in range(nlines): output += self.seqs['mainseq'][n * 80:(n + 1) * 80] + os.linesep if isinstance(out, io.IOBase) is False: if out.lower() == 'string': return output else: outpath = out op = 'a' if os.path.isfile(outpath) else 'w' with open(outpath, op) as out: out.write(output) return
def __init__(self, seqid=None, oligomer=None, seq=None, chains=None, source=None, header=None, biotype=None, extrainfo=None): self.oligomer_id = None self.name = None self.chains = set() self.source = None self.source_headers = [] self.crops_header = None self.seqs = {} self.biotype = None self.infostring = None self.cropmap = None self.cropbackmap = None self.msa = None self.cropmsa = None self.intervals = None if header is not None: if isinstance(header, str): self.source_headers.append(header) try: header_info = retrieve_id(header) except Exception: logging.warning( 'Header format not recognised. Information not extracted.' ) header_info = None else: logging.critical("Argument 'header' should be a string.") raise TypeError else: header_info = None if seqid is not None: if isinstance(seqid, str): self.name = seqid elif isinstance(seqid, int): self.name = str(seqid) else: logging.critical("Sequence ID 'seqid' should be a string.") raise TypeError else: if header_info is not None: if 'seqid' in header_info: self.name = header_info['seqid'] else: self.name = '1' if seq is not None: if isinstance(seq, str): self.seqs['mainseq'] = seq else: logging.critical("Chain sequence 'seq' should be a string.") raise TypeError else: self.seqs['mainseq'] = '' if oligomer is not None: if isinstance(oligomer, str): self.oligomer_id = oligomer else: logging.critical("Oligomer ID 'oligomer' should be a string.") raise TypeError else: if header_info is not None: if 'mainid' in header_info: self.oligomer_id = header_info['mainid'] if chains is not None: if isinstance(chains, set): for ch in chains: if isinstance(ch, str): self.chains.add(ch) else: logging.critical( "Chain IDs in 'chains' set should be strings.") raise TypeError else: logging.critical( "Argument 'chains' should be a set of strings.") raise TypeError else: if header_info is not None: if 'chains' in header_info: self.chains = header_info['chains'] if source is not None: if isinstance(source, str): self.source = source else: logging.critical("Argument 'source' should be a string.") raise TypeError else: if header_info is not None: if 'source' in header_info: self.source = header_info['source'] if biotype is not None: if biotype.lower() == 'guess': self.biotype = guess_type(seq) else: self.biotype = biotype else: self.biotype = None if extrainfo is not None: if isinstance(extrainfo, str): self.infostring = extrainfo else: logging.critical("Argument 'extrainfo' should be a string.") raise TypeError else: if header_info is not None: if 'comments' in header_info: self.infostring = header_info['comments'] else: self.infostring = "" if oligomer is None: self.crops_header = makeheader(mainid='NOID', seqid=self.name, chains=self.chains, source=self.source, extrainfo=self.infostring) else: self.crops_header = makeheader(mainid=self.oligomer_id, seqid=self.name, chains=self.chains, source=self.source, extrainfo=self.infostring)