示例#1
0
def sortarchs(inputdir, outputdir):
    
    archsdir              = outputdir
    Path.mkdir(archsdir)
    sorted_archs          = {}
    loop_file_name        = os.path.join(archsdir, 'ArchDB.{0}.db')
    loop_split_file_name  = os.path.join(archsdir, 'ArchDB.{0}.{1:02d}-{2:02d}.db')
    sections_ini          = [ 0, 4, 7,14,21]
    sections_end          = [ 4, 6,13,20, 0]
    for archfile in Path.list_files(root = inputdir, pattern = '*.archObj'):
        filename = os.path.basename(archfile)
        data     = filename.split('_')
        length   = int(data[0])
        archtype = data[1] 
        sorted_archs.setdefault(archtype,{}).setdefault(length,[])
        sorted_archs[archtype][length].append(archfile)
    
    for archtype in sorted_archs:
        SBIglobals.alert('verbose', None, "ARCHS: " + archtype + "\n")
        fd  = File(loop_file_name.format(archtype), 'w')
        fdp = []
        for x in range(len(sections_ini)):
            fdp.append(File(loop_split_file_name.format(archtype, sections_ini[x], sections_end[x]), 'w'))
        for length in sorted(sorted_archs[archtype]):
            SBIglobals.alert('verbose', None, '\t{0}'.format(length))
            for archfile in sorted_archs[archtype][length]:
                SBIglobals.alert('verbose', None, '\t\t{0}\n'.format(archfile))
                nsp = Arch.load(archfile)
                fd.descriptor.write(nsp.archtype_format() + "\n")
                for x in range(len(fdp)):
                    if length >= sections_ini[x] and (sections_end[x] == 0 or length <= sections_end[x]):
                        fdp[x].descriptor.write(nsp.archtype_format() + "\n")
        fd.close()
        for x in range(len(fdp)):
            fdp[x].close()
示例#2
0
def Arch2SQL(database, sqlfile, verbose):

    if verbose:
        sys.stderr.write("Retrieving data from {0} ...\n".format(database))
    newsource = Source(name='PDBarch', source="http://www-pdb.org/")
    outdir = os.path.join(os.path.join(os.path.abspath(sqlfile), '00'))
    Path.mkdir(outdir)
    sql_fd = gzip.open(os.path.join(outdir, '0000.sql.gz'), 'wb')
    sql_fd.write(start_transaction())
    sql_fd.write(newsource.toSQL())
    sql_fd.write(end_transaction())
    sql_fd.close()

    files_list_by_pdb = {}
    subdirs = ['archobj', 'superobj']
    for subdir in subdirs:
        for archobjfile in Path.list_files(os.path.join(database, subdir)):
            if archobjfile.endswith('.archObj'):
                data = tuple(
                    os.path.splitext(
                        os.path.split(archobjfile)[-1])[0].split('_')[2:])
                files_list_by_pdb[data] = archobjfile

    old_pdb = None
    newArchSet = None
    for dofdata in sorted(files_list_by_pdb):
        pdb = dofdata[0] + '_' + dofdata[1]
        if pdb != old_pdb:
            if old_pdb is not None:
                sql_fd.write(newArchSet.toSQL())
                sql_fd.write(end_transaction())
                sql_fd.close()
            outdir = os.path.join(
                os.path.join(os.path.abspath(sqlfile),
                             dofdata[0][1:3].lower()))
            Path.mkdir(outdir)
            if verbose:
                sys.stderr.write("Retrieving loops from {0} ...\n".format(pdb))
            sql_fd = gzip.open(os.path.join(outdir, pdb + '.sql.gz'), 'wb')
            sql_fd.write(start_transaction())
            if verbose:
                sys.stderr.write("Printing data from {0} ...\n".format(pdb))
            old_pdb = pdb
            newArchSet = Arch(pdb)
        newArchSet.archs = SSpair.load(files_list_by_pdb[dofdata])

    sql_fd.write(newArchSet.toSQL())
    sql_fd.write(end_transaction())
    sql_fd.close()
    if verbose:
        sys.stderr.write("End execution.\n")
    def get_PDB(self, pdbID):
        if self.has_local:
            rootdir = os.path.join(self.local, pdbID.lower()[1:3])
            for pdb_file in Path.list_files(root=rootdir, pattern='*.ent.gz'):
                newfile = File(file_name=pdb_file, action='r')
                if newfile.prefix.lstrip('pdb').upper() == pdbID.upper():
                    return pdb_file

        #If we do not find it in local (or we do not have a local) we search it on the FTP
        pdb_file = 'pdb' + pdbID.lower() + '.ent.gz'
        source = 'ftp://' + PDBftp['address'] + os.path.join(
            PDBftp['structures'], pdbID[1:3].lower(), pdb_file)
        try:
            urllib.urlretrieve(source, pdb_file)
        except:
            return False
        return os.path.abspath(pdb_file)
 def _process(self):
     tmoFile = File(self._pdbtmfile, 'w', True)
     for xmlfile in Path.list_files(
             os.path.join(self._local, 'pdbtm/database/'), '*.xml'):
         xmldata = TM(
             pdb=os.path.splitext(os.path.split(xmlfile)[1])[0].upper())
         skip_chains = set()
         read = False
         fdxml = open(xmlfile)
         for line in fdxml:
             if line.startswith('    <TMRES>'): xmldata.tmres = line
             elif line.startswith('    <TMTYPE'): xmldata.tmtype = line
             elif line.startswith('    <PDBKWRES'): xmldata.kwres = line
             elif line.startswith('  <SIDEDEFINITION'):
                 m = re.search('Side1="(\S+)"', line)
                 xmldata.side = m.group(1)
             elif line.startswith('      <APPLY_TO_CHAIN'):
                 m = re.search('NEW_CHAINID=\"(\S{1})\"', line)
                 if m: skip_chains.add(m.group(1))
             elif line.startswith('  <CHAIN '):
                 m = re.search(
                     'CHAINID=\"(\S{1})\" NUM_TM=\"(\d{1})\" TYPE=\"(\S+)\"',
                     line)
                 if m:
                     chain, num, tmtype = m.group(1), m.group(2), m.group(3)
                     if not chain in skip_chains:
                         cdata = tuple([chain, num, tmtype])
                         xmldata.set_chain(cdata)
                         read = True
             elif line.startswith('    <REGION ') and read:
                 m = re.search(
                     'pdb_beg=\"(\-*\d+\w*)\"[\s\S]+pdb_end=\"(\-*\d+\w*)\"\s+type=\"(\w{1})\"',
                     line)
                 ini, end, tmtype = m.group(1), m.group(2), m.group(3)
                 xmldata.set_chain(cdata, tuple([ini, end, tmtype]))
             elif line.startswith('  </CHAIN>'):
                 read = False
         fdxml.close()
         if len(xmldata.chains) > 0:
             tmoFile.write(str(xmldata) + "\n")
     tmoFile.close()
示例#5
0
def DS2SQL(database, looplist, sqlfile, verbose):

    Path.mkdir(sqlfile)
    for dsfile in Path.list_files(database):
        subclasstype = os.path.split(dsfile)[-1].split('.')[1]
        classification = Cclass(subclasstype)
        if verbose:
            sys.stderr.write(
                "Retrieving data for subclass {0} ...\n".format(subclasstype))
        loops = readlist(looplist, subclasstype)
        sql_fd = gzip.open(os.path.join(sqlfile, subclasstype + '.sql.gz'),
                           'wb')
        sql_fd.write(start_transaction())
        sql_in = open(dsfile)
        read = False
        for line in sql_in:
            dataline = line.rstrip('\n')
            #SKIP LINES
            if line.startswith('==') or line.startswith('***') or len(
                    line.strip()) == 0 or line.startswith(
                        '---- P R O T E I N    C O D E  ----'):
                continue
            if line.startswith('CONSENSUS & MULTIPLE ALIGNEMENT IN THE'):
                data = line.split(':')[-1].strip().split()
                classification.subclasses = Subclass(
                    tuple([data[0].strip(), data[3].strip()]), data[4])
                workscls = classification.lastsubclass
                read = True
                continue
            if line.startswith('GLOBAL STATISTICS'):
                read = False
                continue
            if read:
                if line.startswith(
                        '        SEQUENCE   ALIGNEMENT                           :'
                ):
                    parse_mode, counter = 'P', 0
                elif line.startswith(
                        '       ACCESSIBLE SURFACE ALIGNEMENT                    :'
                ):
                    parse_mode, counter = 'E', 0
                elif line.startswith(
                        '           RAMACHANDRAN                                 :'
                ):
                    parse_mode, counter = 'R', 0
                elif line.startswith(
                        '        SECONDARY STRUCTURE                             :'
                ):
                    parse_mode, counter = 'S', 0
                elif line.startswith('--------- CONSENSUS THORNTON       :'):
                    workscls.add_consensus(dataline, 'DS', loops)
                elif line.startswith('--------- CONSENSUS TOPOLOGY'):
                    workscls.add_topology(dataline, 'DS')
                elif line.startswith('CENTROIDE POLAR COORD.   :'):
                    workscls.add_coordinates(dataline)
                elif line.startswith('--------- RAMACHANDRAN PATTERN     :'):
                    workscls.ram_pat = re.sub(
                        '\(X\)', '',
                        dataline.split(':')[1].strip().strip('.'))
                elif line.startswith('--------- SEQUENCE  PATTERN        :'):
                    workscls.seq_pat = re.sub(
                        '\(X\)', '',
                        dataline.split(':')[1].strip().strip('.'))
                elif line.startswith('--------- BURIAL    PATTERN        :'):
                    workscls.exp_pat = re.sub(
                        '\(X\)', '',
                        dataline.split(':')[1].strip().strip('.'))

                elif line.startswith('                             '
                                     ) and len(dataline) < 400:
                    if parse_mode == 'P': workscls.loops = Loop(info=dataline)
                    if parse_mode == 'E':
                        workscls.loops[counter].add_surface(info=dataline)
                        counter += 1
                    if parse_mode == 'R':
                        workscls.loops[counter].add_ramachandran(info=dataline)
                        counter += 1
                    if parse_mode == 'S':
                        workscls.loops[counter].add_secondary_str(
                            info=dataline)
                        counter += 1

        sql_fd.write(classification.toSQL('DS'))

        sql_in.close()
        sql_fd.write(end_transaction())
        sql_fd.close()

    if verbose: sys.stderr.write("End execution.\n")
 def localPDBeChems(self):
     for chem_file in Path.list_files(root = self.local, pattern = '*.cif'):
         yield chem_file
 def localPDBs(self):
     for pdb_file in Path.list_files(root=self.local, pattern='*.ent.gz'):
         yield pdb_file