Exemplo n.º 1
0
 def _process(self):
     tmoFile = File(self._pdbtmfile,'w', True)
     for xmlfile in Path.list_files(os.path.join(self._local,'pdbtm/database/'), '*.xml'):
         xmldata = TM(pdb = os.path.splitext(os.path.split(xmlfile)[1])[0].upper())
         skip_chains = set()
         read = False
         fdxml = open(xmlfile)
         for line in fdxml:
             if line.startswith('    <TMRES>'):     xmldata.tmres  = line
             elif line.startswith('    <TMTYPE'):   xmldata.tmtype = line
             elif line.startswith('    <PDBKWRES'): xmldata.kwres  = line
             elif line.startswith('  <SIDEDEFINITION'):
                 m = re.search('Side1="(\S+)"', line)
                 xmldata.side = m.group(1)
             elif line.startswith('      <APPLY_TO_CHAIN'):
                 m = re.search('NEW_CHAINID=\"(\S{1})\"', line)
                 if m: skip_chains.add(m.group(1))
             elif line.startswith('  <CHAIN '):
                 m = re.search('CHAINID=\"(\S{1})\" NUM_TM=\"(\d{1})\" TYPE=\"(\S+)\"', line)
                 if m:
                     chain, num, tmtype = m.group(1), m.group(2), m.group(3)
                     if not chain in skip_chains:
                         cdata = tuple([chain, num, tmtype])
                         xmldata.set_chain(cdata)
                         read = True
             elif line.startswith('    <REGION ') and read:
                 m = re.search('pdb_beg=\"(\-*\d+\w*)\"[\s\S]+pdb_end=\"(\-*\d+\w*)\"\s+type=\"(\w{1})\"', line)
                 ini, end, tmtype = m.group(1), m.group(2), m.group(3)
                 xmldata.set_chain(cdata, tuple([ini, end, tmtype]))
             elif line.startswith('  </CHAIN>'): read = False
         fdxml.close()
         if len(xmldata.chains) > 0:
             tmoFile.write(str(xmldata)+"\n")
     tmoFile.close()
Exemplo n.º 2
0
 def format2file(self, filename, extension='pdb', center=False):
     if extension not in ('pdb', 'js'):
         raise AttributeError('Not accepted extension')
     structure = File('.'.join([filename, extension]), 'w')
     if extension == 'pdb': structure.write(self.pdb_format(center=center))
     elif extension == 'js': structure.write(self.js_format(center=center))
     structure.close()
Exemplo n.º 3
0
    def _process(self):
        enzymes = self._parse_enzclass() + self._parse_enzymedat()
        enzymes.sort()

        enzFile = File(self._enzfile, 'w', True)
        for e in enzymes:
            enzFile.write(repr(e) + "\n")
        enzFile.close()
Exemplo n.º 4
0
    def _process(self):

        targets = self._process_targets()
        drugs = self._process_drugs(targets)

        drugFile = File(self._drugfile, 'w', True)
        for d in drugs:
            drugFile.write(repr(d) + "\n")
        drugFile.close()
Exemplo n.º 5
0
    def get_FASTA_IDX_by_names_to_file(self, names, outfile):

        fastafile = Fasta(self.PDBseq)
        selectedfasta = fastafile.retrieve(copy.deepcopy(names))
        output_fasta = File(outfile, 'w')
        for sequence in selectedfasta:
            output_fasta.write(sequence.format('FASTA') + "\n")
        output_fasta.close()
        idxfile = self.PDBseq + '.idx'
        output_idx = File(outfile + '.idx', 'w')
        input_idx = File(idxfile, 'r')
        for line in input_idx.descriptor:
            info = line.split()
            pdbname = info[0][1:]
            if pdbname in names:
                output_idx.write(line)
        input_idx.close()
        output_idx.close()
Exemplo n.º 6
0
    def _process(self):
        go_dic = {}
        parseFile = File(os.path.join(self.local, self._gfile), 'r')
        go = None
        for line in parseFile.descriptor:
            line = re.sub('\'', '\\\'', line)
            if line.startswith('[Term]'):
                if go is not None:
                    go_dic[go.id] = go
            if line.startswith('id:'):
                go = GOterm(id=line.split()[1].strip())
                continue
            if line.startswith('name:'):
                go.name = " ".join(line.split()[1:]).strip()
                continue
            if line.startswith('namespace:'):
                go.namespace = line.split()[1].strip()
                continue
            if line.startswith('alt_id:'):
                go.alt_id.append(line.split()[1].strip())
                continue
            if line.startswith('is_obsolete:'):
                go.obsolete = True
                continue
            if line.startswith('is_a:'):
                go.parents.add(line.split()[1].strip())
                continue
            if line.startswith('relationship:'):
                go.relations.append(
                    (line.split()[1].strip(), line.split()[2].strip()))
                continue
            if line.startswith('[Typedef]'):
                go_dic[go.id] = go
                break
        parseFile.close()

        for go in go_dic:
            go_dic[go].parents = self._search_parents(go_dic, go)

        goFile = File(self._gofile, 'w', True)
        for go in go_dic:
            go_dic[go].parents.add(go)
            goFile.write(str(go_dic[go]) + "\n")
        goFile.close()
Exemplo n.º 7
0
    def _process(self):
        inh = {}
        nodefile = File(file_name=self._nodes, action='r')
        for line in nodefile.descriptor:
            line = re.sub('\'', '\\\'', line)
            line_data = line.split('|')
            inh[line_data[0].strip()] = TaxID(line_data[0].strip())
            inh[line_data[0].strip()].parent = line_data[1].strip()
            inh[line_data[0].strip()].rank = line_data[2].strip()
        nodefile.close()

        namefile = File(file_name=self._names, action='r')
        for line in namefile.descriptor:
            line = re.sub('\'', '\\\'', line)
            line_data = line.split('|')
            if line_data[3].strip() == 'scientific name':
                inh[line_data[0].strip()].name = line_data[1].strip()
        namefile.close()

        delefile = File(file_name=self._delet, action='r')
        for line in delefile.descriptor:
            data = line.split('|')
            inh[data[0].strip()] = TaxID(data[0].strip())
            inh[data[0].strip()].old = True
        delefile.close()

        mrgefile = File(file_name=self._merged, action='r')
        for line in mrgefile.descriptor:
            data = line.split('|')
            inh[data[0].strip()] = TaxID(data[0].strip())
            inh[data[0].strip()].old = True
            inh[data[0].strip()].new = data[1].strip()
        mrgefile.close()

        taxFile = File(self._taxid, 'w', True)
        for taxid in inh:
            taxFile.write(str(inh[taxid]) + "\n")
        taxFile.close()
Exemplo n.º 8
0
 def _parse_uniprot_file(self, source, destination, fasta, code):
     sourceFile = File(source, 'r')
     destinFile = File(destination, 'w', True)
     fastaFile = File(fasta, 'w', True)
     protein = None
     for line in sourceFile.descriptor:
         if line.startswith('ID'):
             protein = Uniprot(line.split()[1].strip(), code)
         if line.startswith('AC'):
             protein.accession = line.split()[1:]
         if line.startswith('OX'):
             protein.taxid = line.split()[1]
         if line.startswith('OH'):
             protein.hosts = line.split()[1]
         if line.startswith('DR'):
             protein.databases = line.split()[1:3]
         if line.startswith('  '):
             protein.sequence = line.strip().replace(' ', '')
         if line.startswith('//'):
             destinFile.write(str(protein) + "\n")
             fastaFile.write(repr(protein) + "\n")
     sourceFile.close()
     destinFile.close()