示例#1
0
    def test_makeheader_1(self):
        tags = [
            'MAINID', '1', {'A', 'B', 'C'}, 'MADEUP',
            'A made up protein information'
        ]

        expected_header = '>crops|MAINID_1|Chains A,B,C|Source: MADEUP|A made up protein information'

        new_header = cit.makeheader(mainid=tags[0],
                                    seqid=tags[1],
                                    chains=tags[2],
                                    source=tags[3],
                                    extrainfo=tags[4])

        self.assertEqual(new_header, expected_header)
示例#2
0
    def update_cropsheader(self):
        """Update `cropsheader`. Useful after updating any information from the sequence."""
        if self.oligomer_id is None:
            tag1 = 'NoID'
        else:
            tag1 = self.oligomer_id
        tag2 = self.infostring
        if self.ncrops() == 0 and 'cropseq' not in self.seqs:
            pass
        else:
            if tag2[-1] != "|":
                tag2 += '|'
            tag2 += self.cropinfo()

        self.crops_header = makeheader(mainid=tag1,
                                       seqid=self.name,
                                       chains=self.chains,
                                       source=self.source,
                                       extrainfo=tag2)
示例#3
0
 def __repr__(self):
     chtype = self.biotype if self.biotype is not None else 'Undefined'
     if 'mainseq' not in self.seqs:
         logging.critical("'mainseq' sequence not found.")
         raise ValueError
     if len(self.seqs['mainseq']) <= 20:
         showseq = self.seqs['mainseq']
     else:
         showseq = (self.seqs['mainseq'][:10] + '[...]' +
                    self.seqs['mainseq'][len(self.seqs['mainseq']) - 10:])
     tempolig = self.oligomer_id if self.oligomer_id is not None else 'NOID'
     shortid = makeheader(mainid=tempolig,
                          seqid=self.name,
                          chains=self.chains,
                          short=True)
     string = (self._kind + " object " + shortid + " (seq=" + str(showseq) +
               ", type=" + chtype + ", length=" +
               str(len(self.seqs['mainseq'])) + ")")
     return string
示例#4
0
    def dumpmap(self, out, split=False):
        """Write header and cropmap to a file. If file exists, output is appended.

        :param out: An output filepath (str) or an open file.
        :type out: str, file
        :param backmap: If True, the output will be self.cropbackmap, defaults to False.
        :type backmap: bool, optional
        :param split: If True, identical maps are dumped for every chain, defaults to False.
        :type split: bool, optional

        :raises `TypeError`: If `out` is neither a string nor an open file.
        :raises `ValueError`: If one or both of `cropmap` and `cropbackmap` are empty.
        :raises `KeyError`: If object contains no chains.

        """
        if not isinstance(out, str) and not isinstance(out, io.IOBase):
            logging.critical("Argument 'out' should be a string or a file.")
            raise TypeError

        if self.cropmap is None:
            stringerr = "Cropmap not found in sequence."
            logging.critical(stringerr)
            raise ValueError

        if (self.chains is None
                or (isinstance(self.chains, set) and len(self.chains) == 0)):
            logging.critical('No chains defined in sequence.')
            raise KeyError

        outheader = []

        if split:
            chset = []
            for ch in self.chains:
                chset.append({ch})
        else:
            chset = [self.chains]

        if self.oligomer_id is None:
            tag1 = 'NoID'
        else:
            tag1 = self.oligomer_id
        tag2 = self.infostring
        if self.ncrops() == 0 and 'cropseq' not in self.seqs:
            pass
        else:
            if self.infostring[-1] != "|":
                tag2 += '|'
            tag2 += self.cropinfo()

        for ch in chset:
            outheader.append(
                makeheader(mainid=tag1,
                           seqid=self.name,
                           chains=ch,
                           source=self.source,
                           extrainfo=tag2))
        output = ''
        for header in outheader:
            if isinstance(out, io.IOBase):
                out.write(header + '\n')
                for key, value in self.cropmap.items():
                    if value is not None:
                        out.write(str(key) + '  ' + str(value) + '\n')
                    else:
                        out.write(str(key) + '  0\n')
            else:
                output += header + os.linesep
                for key, value in self.cropmap.items():
                    if value is not None:
                        output += str(key) + '  ' + str(value) + os.linesep
                    else:
                        output += str(key) + '  0' + os.linesep

        if isinstance(out, io.IOBase) is False:
            if out.lower() == 'string':
                return output
            else:
                outpath = out
                op = 'a' if os.path.isfile(outpath) else 'w'
                with open(outpath, op) as out:
                    out.write(output)
        return
示例#5
0
    def dump(self, out, split=False, oneline=False):
        """Write header and main sequence to a file. If the file exists, output is appended.

        :param out: An output filepath (str), 'string', or an open file.
        :type out: str, file
        :param split: If True, identical sequences are dumped for every chain, defaults to False.
        :type split: bool, optional
        :param oneline: If True, sequences are not split in 80 residue-lines, defaults to False.
        :type oneline: bool, optional

        :raises `TypeError`: If `out` is neither a string nor an open file.
        :raises `KeyError`: If object contains no chains.

        :return: A string containing the output if and only if out=='string'.
        :rtype: str

        """
        if not isinstance(out, str) and not isinstance(out, io.IOBase):
            logging.critical("Argument 'out' should be a string or a file.")
            raise TypeError

        if (self.chains is None
                or (isinstance(self.chains, set) and len(self.chains) == 0)):
            logging.critical('No chains defined in sequence.')
            raise KeyError

        outheader = []

        if split:
            chset = []
            for ch in self.chains:
                chset.append({ch})
        else:
            chset = [self.chains]

        if self.oligomer_id is None:
            tag1 = 'NoID'
        else:
            tag1 = self.oligomer_id
        tag2 = self.infostring
        if self.ncrops() == 0 and 'cropseq' not in self.seqs:
            pass
        else:
            if self.infostring[-1] != "|":
                tag2 += '|'
            tag2 += self.cropinfo()

        for ch in chset:
            outheader.append(
                makeheader(mainid=tag1,
                           seqid=self.name,
                           chains=ch,
                           source=self.source,
                           extrainfo=tag2))

        if not oneline:
            lenseq = len(self.seqs['mainseq'])
            nlines = int((lenseq - 1) / 80) + 1
        output = ''
        for header in outheader:
            if isinstance(out, io.IOBase) is True:
                out.write(header + '\n')
                if oneline:
                    out.write(self.seqs['mainseq'] + '\n')
                else:
                    for n in range(nlines):
                        out.write(self.seqs['mainseq'][n * 80:(n + 1) * 80] +
                                  '\n')
            else:
                output += header + os.linesep
                if oneline:
                    output += self.seqs['mainseq'] + os.linesep
                else:
                    for n in range(nlines):
                        output += self.seqs['mainseq'][n * 80:(n + 1) *
                                                       80] + os.linesep
        if isinstance(out, io.IOBase) is False:
            if out.lower() == 'string':
                return output
            else:
                outpath = out
                op = 'a' if os.path.isfile(outpath) else 'w'
                with open(outpath, op) as out:
                    out.write(output)
        return
示例#6
0
    def __init__(self,
                 seqid=None,
                 oligomer=None,
                 seq=None,
                 chains=None,
                 source=None,
                 header=None,
                 biotype=None,
                 extrainfo=None):
        self.oligomer_id = None
        self.name = None
        self.chains = set()
        self.source = None
        self.source_headers = []
        self.crops_header = None
        self.seqs = {}
        self.biotype = None
        self.infostring = None
        self.cropmap = None
        self.cropbackmap = None
        self.msa = None
        self.cropmsa = None
        self.intervals = None

        if header is not None:
            if isinstance(header, str):
                self.source_headers.append(header)
                try:
                    header_info = retrieve_id(header)
                except Exception:
                    logging.warning(
                        'Header format not recognised. Information not extracted.'
                    )
                    header_info = None
            else:
                logging.critical("Argument 'header' should be a string.")
                raise TypeError
        else:
            header_info = None

        if seqid is not None:
            if isinstance(seqid, str):
                self.name = seqid
            elif isinstance(seqid, int):
                self.name = str(seqid)
            else:
                logging.critical("Sequence ID 'seqid' should be a string.")
                raise TypeError
        else:
            if header_info is not None:
                if 'seqid' in header_info:
                    self.name = header_info['seqid']
            else:
                self.name = '1'

        if seq is not None:
            if isinstance(seq, str):
                self.seqs['mainseq'] = seq
            else:
                logging.critical("Chain sequence 'seq' should be a string.")
                raise TypeError
        else:
            self.seqs['mainseq'] = ''

        if oligomer is not None:
            if isinstance(oligomer, str):
                self.oligomer_id = oligomer
            else:
                logging.critical("Oligomer ID 'oligomer' should be a string.")
                raise TypeError
        else:
            if header_info is not None:
                if 'mainid' in header_info:
                    self.oligomer_id = header_info['mainid']

        if chains is not None:
            if isinstance(chains, set):
                for ch in chains:
                    if isinstance(ch, str):
                        self.chains.add(ch)
                    else:
                        logging.critical(
                            "Chain IDs in 'chains' set should be strings.")
                        raise TypeError
            else:
                logging.critical(
                    "Argument 'chains' should be a set of strings.")
                raise TypeError
        else:
            if header_info is not None:
                if 'chains' in header_info:
                    self.chains = header_info['chains']

        if source is not None:
            if isinstance(source, str):
                self.source = source
            else:
                logging.critical("Argument 'source' should be a string.")
                raise TypeError
        else:
            if header_info is not None:
                if 'source' in header_info:
                    self.source = header_info['source']

        if biotype is not None:
            if biotype.lower() == 'guess':
                self.biotype = guess_type(seq)
            else:
                self.biotype = biotype
        else:
            self.biotype = None

        if extrainfo is not None:
            if isinstance(extrainfo, str):
                self.infostring = extrainfo
            else:
                logging.critical("Argument 'extrainfo' should be a string.")
                raise TypeError
        else:
            if header_info is not None:
                if 'comments' in header_info:
                    self.infostring = header_info['comments']
            else:
                self.infostring = ""

        if oligomer is None:
            self.crops_header = makeheader(mainid='NOID',
                                           seqid=self.name,
                                           chains=self.chains,
                                           source=self.source,
                                           extrainfo=self.infostring)
        else:
            self.crops_header = makeheader(mainid=self.oligomer_id,
                                           seqid=self.name,
                                           chains=self.chains,
                                           source=self.source,
                                           extrainfo=self.infostring)