Exemplo n.º 1
0
def builtArchDB(pdblist, pdbdir, outdir):
    pdb_connect = PDBlink(local=pdbdir)
    for pdbinfo in pdblist:
        pdb, chain = pdbinfo
        subdir = pdb[1:3].lower()
        pdbfile = pdb_connect.get_PDB(pdb)
        SBIglobals.alert('verbose', None,
                         'Processing file: {0}'.format(pdbfile))
        archs = archer.build_archs(sourcepdb=pdbfile,
                                   chain=chain,
                                   limit_distance=25)
        for archkey in archs:
            if len(archs[archkey]) > 0:
                Path.mkdir(os.path.join(outdir[archkey], subdir))
                Path.mkdir(os.path.join(outdir['STRUC'], subdir))
            for arch in archs[archkey]:
                pyobjName = "_".join([
                    str(arch.aminoacid_distance), arch.type, arch.identifier
                ]) + '.archObj'
                arch.dump(
                    os.path.join(os.path.join(outdir[archkey], subdir),
                                 pyobjName))
                arch.format2file(filename=os.path.join(
                    os.path.join(outdir['STRUC'], subdir), arch.identifier),
                                 extension='pdb',
                                 center=True)
                arch.format2file(filename=os.path.join(
                    os.path.join(outdir['STRUC'], subdir), arch.identifier),
                                 extension='js',
                                 center=True)
Exemplo n.º 2
0
def Enzyme2SQL(database, sqlfile, skip_download, verbose):
    enzyme_connect = Enzymelink(local=database)
    newsource = None
    if not skip_download:
        if verbose:
            sys.stderr.write(
                "Downloading Enzyme database to {0} ...\n".format(database))
        enzyme_connect.download()
        newsource = Source(name='enzyme', source=enzyme_connect.source)
        if verbose: sys.stderr.write("Download Finished.\n")
    else:
        if verbose: sys.stderr.write("Using previously downloaded database.\n")

    if verbose: sys.stderr.write("Parsing Enzyme.\n")
    if verbose: sys.stderr.write("Writing {0} ....\n".format(sqlfile))
    Path.mkdir(os.path.split(os.path.abspath(sqlfile))[0])
    sql_fd = gzip.open(sqlfile, 'wb')
    sql_fd.write(start_transaction())
    if newsource is not None:
        sql_fd.write(newsource.toSQL())

    transfers = []
    for enz_line in enzyme_connect.localEnzymes:
        newenz = Enzyme(inline=enz_line)
        sql_fd.write(newenz.toSQL())
        if newenz.has_transfers:
            transfers.append(newenz.transfered2SQL())

    sql_fd.write("".join(transfers))
    sql_fd.write(end_transaction())
    sql_fd.close()
    if verbose: sys.stderr.write("End execution.\n")
Exemplo n.º 3
0
def SCOP2SQL(database, sqlfile, skip_download, verbose):
    scop_connect = SCOPlink(local = database)
    newsource      = None
    if not skip_download:
        if verbose: sys.stderr.write("Downloading SCOP database to {0} ...\n".format(database))
        scop_connect.download()
        newsource = Source(name = 'enzyme', source = scop_connect.source)
        if verbose: sys.stderr.write("Download Finished.\n")
    else:
        if verbose: sys.stderr.write("Using previously downloaded database.\n")

    if verbose: sys.stderr.write("Parsing SCOP.\n")
    if verbose: sys.stderr.write("Writing {0} ....\n".format(sqlfile))
    Path.mkdir(os.path.split(os.path.abspath(sqlfile))[0])
    sql_fd = gzip.open(sqlfile, 'wb')
    sql_fd.write(start_transaction())
    if newsource is not None:
        sql_fd.write(newsource.toSQL())

    transfers = []
    scop_obj = SCOP()
    for line in scop_connect.descriptions:
        scop_obj.add_description(line.strip())
    for line in scop_connect.relations:
        scop_obj.add_relation(line.strip())

    sql_fd.write(SCOP.prepdbdeleted())
    sql_fd.write(scop_obj.toSQL())
    sql_fd.write(SCOP.afterpdbdeleted())
    
    sql_fd.write(end_transaction())
    sql_fd.close()
    if verbose: sys.stderr.write("End execution.\n")
Exemplo n.º 4
0
def sortarchs(inputdir, outputdir):
    
    archsdir              = outputdir
    Path.mkdir(archsdir)
    sorted_archs          = {}
    loop_file_name        = os.path.join(archsdir, 'ArchDB.{0}.db')
    loop_split_file_name  = os.path.join(archsdir, 'ArchDB.{0}.{1:02d}-{2:02d}.db')
    sections_ini          = [ 0, 4, 7,14,21]
    sections_end          = [ 4, 6,13,20, 0]
    for archfile in Path.list_files(root = inputdir, pattern = '*.archObj'):
        filename = os.path.basename(archfile)
        data     = filename.split('_')
        length   = int(data[0])
        archtype = data[1] 
        sorted_archs.setdefault(archtype,{}).setdefault(length,[])
        sorted_archs[archtype][length].append(archfile)
    
    for archtype in sorted_archs:
        SBIglobals.alert('verbose', None, "ARCHS: " + archtype + "\n")
        fd  = File(loop_file_name.format(archtype), 'w')
        fdp = []
        for x in range(len(sections_ini)):
            fdp.append(File(loop_split_file_name.format(archtype, sections_ini[x], sections_end[x]), 'w'))
        for length in sorted(sorted_archs[archtype]):
            SBIglobals.alert('verbose', None, '\t{0}'.format(length))
            for archfile in sorted_archs[archtype][length]:
                SBIglobals.alert('verbose', None, '\t\t{0}\n'.format(archfile))
                nsp = Arch.load(archfile)
                fd.descriptor.write(nsp.archtype_format() + "\n")
                for x in range(len(fdp)):
                    if length >= sections_ini[x] and (sections_end[x] == 0 or length <= sections_end[x]):
                        fdp[x].descriptor.write(nsp.archtype_format() + "\n")
        fd.close()
        for x in range(len(fdp)):
            fdp[x].close()
Exemplo n.º 5
0
def DrugBank2SQL(database, sqlfile, skip_download, verbose):
    drugbank_connect = DrugBanklink(local = database)
    newsource        = None
    if not skip_download:
        if verbose: sys.stderr.write("Downloading drugBank database to {0} ...\n".format(database))
        # drugbank_connect.download()
        newsource = Source(name = 'DrugBank', source = drugbank_connect.source)
        if verbose: sys.stderr.write("Download Finished.\n")
    else:
        if verbose: sys.stderr.write("Using previously downloaded database.\n")

    if verbose: sys.stderr.write("Parsing drugBank.\n")
    if verbose: sys.stderr.write("Writing {0} ....\n".format(sqlfile))
    Path.mkdir(os.path.split(os.path.abspath(sqlfile))[0])
    sql_fd = gzip.open(sqlfile, 'wb')
    sql_fd.write(start_transaction())
    sql_fd.write(Drug.preuniprotdeleted())
    if newsource is not None:
        sql_fd.write(newsource.toSQL())

    for drg_line in drugbank_connect.localDrugs:
        newdrg = Drug(inline = drg_line)
        sql_fd.write(newdrg.toSQL())

    sql_fd.write(Drug.afteruniprotdeleted())
    sql_fd.write(end_transaction())
    sql_fd.close()
    if verbose: sys.stderr.write("End execution.\n")
Exemplo n.º 6
0
def TaxID2SQL(database, sqlfile, skip_download, verbose):
    taxid_connect = TaxIDlink(local=database)
    newsource = None
    if not skip_download:
        if verbose:
            sys.stderr.write(
                "Downloading TaxID database to {0} ...\n".format(database))
        taxid_connect.download()
        newsource = Source(name='taxid', source=taxid_connect.source)
        if verbose: sys.stderr.write("Download Finished.\n")
    else:
        if verbose: sys.stderr.write("Using previously downloaded database.\n")

    has_new = []
    if verbose: sys.stderr.write("Parsing TaxID.\n")
    if verbose: sys.stderr.write("Writing {0} ....\n".format(sqlfile))
    Path.mkdir(os.path.split(os.path.abspath(sqlfile))[0])
    sql_fd = gzip.open(sqlfile, 'wb')
    sql_fd.write(start_transaction())
    if newsource is not None:
        sql_fd.write(newsource.toSQL())

    for tax_line in taxid_connect.localTaxIDs:
        newtax = TaxID(inline=tax_line)
        if newtax.has_new: has_new.append(newtax.toSQL())
        else: sql_fd.write(newtax.toSQL() + "\n")

    sql_fd.write("\n".join(has_new) + "\n")
    sql_fd.write(end_transaction())
    sql_fd.close()
    if verbose: sys.stderr.write("End execution.\n")
Exemplo n.º 7
0
def PDBTM2SQL(database, sqlfile, skip_download, verbose):
    pdbtm_connect = PDBTMlink(local=database)
    newsource = None
    if not skip_download:
        if verbose:
            sys.stderr.write(
                "Downloading PDBTM database to {0} ...\n".format(database))
        #pdbtm_connect.download()
        newsource = Source(name='enzyme', source=pdbtm_connect.source)
        if verbose: sys.stderr.write("Download Finished.\n")
    else:
        if verbose: sys.stderr.write("Using previously downloaded database.\n")

    if verbose: sys.stderr.write("Parsing PDBTM.\n")
    if verbose: sys.stderr.write("Writing {0} ....\n".format(sqlfile))
    Path.mkdir(os.path.split(os.path.abspath(sqlfile))[0])
    sql_fd = gzip.open(sqlfile, 'wb')
    sql_fd.write(start_transaction())
    sql_fd.write(TM.prepdbdeleted())
    if newsource is not None:
        sql_fd.write(newsource.toSQL())

    sql_fd.write(TM.regions2SQL())
    for line in pdbtm_connect.localTM:
        tmdata = TM(inline=line)
        sql_fd.write(tmdata.toSQL())

    sql_fd.write(TM.afterpdbdeleted())
    sql_fd.write(end_transaction())
    sql_fd.close()
    if verbose: sys.stderr.write("End execution.\n")
Exemplo n.º 8
0
def PDBeChem2SQL(database, sqlfile, skip_download, verbose):
    pdbechem_connect = PDBeChemlink(local = database)
    newsource        = None
    if not skip_download:
        if verbose: sys.stderr.write("Downloading PDBeChem database to {0} ...\n".format(database))
        pdbechem_connect.download()
        newsource = Source(name   = 'PDBeChem', source = pdbechem_connect.source)
        if verbose: sys.stderr.write("Download Finished.\n")
    else:
        if verbose: sys.stderr.write("Using previously downloaded database.\n")

    noparent_chems = []
    parent_chems   = []
    if verbose: sys.stderr.write("Parsing PDBeChem.\n")
    for chem_file in pdbechem_connect.localPDBeChems:
        if verbose: sys.stderr.write("\tReading {0} ....\n".format(chem_file))
        newchem = PDBeChem(chem_file)
        if newchem.parent is None: noparent_chems.append(newchem.toSQL())
        else: parent_chems.append(newchem.toSQL())

    if verbose: sys.stderr.write("Writing {0} ....\n".format(sqlfile))
    Path.mkdir(os.path.split(os.path.abspath(sqlfile))[0])
    sql_fd = gzip.open(sqlfile, 'wb')
    sql_fd.write(start_transaction())
    if newsource is not None:
        sql_fd.write(newsource.toSQL())
    for e in element_dic.values():
        newelement = Element(e.number, e.symbol, e.name)
        sql_fd.write(newelement.toSQL() + "\n")
    sql_fd.write("\n".join(noparent_chems) + "\n")
    sql_fd.write("\n".join(parent_chems) + "\n")
    sql_fd.write(end_transaction())
    sql_fd.close()
    if verbose: sys.stderr.write("End execution.\n")
Exemplo n.º 9
0
 def __init__(self, local):
     '''
     @param:    local
     @pdef:     local directory to store the database. Create if not exist
     @ptype:    {String}
     '''
     self._local = local
     Path.mkdir(self._local)
Exemplo n.º 10
0
    def download(self):
        if not self.has_local:
            raise NameError('A local SCOP database directory must be defined.')

        Path.mkdir(self.local)
        urllib.urlretrieve(SCOPftp['desc'], self._desc)
        urllib.urlretrieve(SCOPftp['rel'],  self._rel)

        return True
Exemplo n.º 11
0
    def execute_query(self, query_file, blast_output_file = None,
                      work_directory = os.getcwd()):
        '''
        Execute BLAST given a query sequence.

        @param:    query_file
        @pdef:     Fasta file with the query sequence.
        @pdefault: 'QuerySequence'
        @ptype:    {String} or {File} or {Fasta}

        @param:    blast_output_file
        @pdef:     name of the temporary BLAST output file.
        @pdefault: query_file.prefix + job.pid + .blast.xml.out
        @ptype:    {String}

        @param:    work_directory
        @pdef:     Directory to which the temporary files will be created.
        @pdefault: Current working directory.
        @ptype:    {String}

        @raises: {AttributeError} if query_file is multi-fasta.
        @raises: {BlastError} in BLAST execution or output parsing errors.

        @returns: {BlastResult}
        '''
        if isinstance(query_file, basestring) or isinstance(query_file, File):
            newFasta = Fasta(fasta_file = query_file)
        elif isinstance(query_file, Fasta):
            newFasta = query_file

        if newFasta.is_multifasta:
            msg = 'Blasts can only be executed one at a time due to XML output restrictions.'
            raise AttributeError(msg)

        # All the sequence is unknown, it will crash blast
        newFasta.load()
        query_sequence = newFasta.sequence
        if len(re.sub(r'[Xx]', '', query_sequence.sequence)) == 0:
            SBIg.warn(self, 'Created an empty BlastResult.')
            return BlastResult(query_name     = query_sequence.id,
                               query_sequence = query_sequence.sequence)

        Path.mkdir(work_directory)
        file_prefixes = ".".join([newFasta.file.prefix, str(os.getpid())])
        file_prefixes = os.path.join(work_directory, file_prefixes)
        tmp_output    = file_prefixes + ".blast.xml.out"

        tmp_output = tmp_output if blast_output_file is None else os.path.join(work_directory, blast_output_file)

        self._execute(input_file = newFasta, output_file = tmp_output)

        blast_result = self._parse_blast(newFasta.sequence.sequence, tmp_output)

        self._clean([tmp_output, ])

        return blast_result
Exemplo n.º 12
0
    def download(self):
        if not self.has_local:
            raise NameError('A local GO database directory must be defined.')

        Path.mkdir(self.local)
        destination = os.path.join(self.local, self._gfile)

        urllib.urlretrieve(GOftp['source'], destination)
        self._process()

        return True
Exemplo n.º 13
0
    def download(self):
        if not self.has_local:
            raise NameError(
                'A local drugBank database directory must be defined.')

        Path.mkdir(self.local)
        urllib.urlretrieve(drugBankftp['targets'], self._target)
        urllib.urlretrieve(drugBankftp['main'], self._main)

        self._process()

        return True
Exemplo n.º 14
0
    def download(self):
        if not self.has_local:
            raise NameError(
                'A local Enzyme database directory must be defined.')

        Path.mkdir(self.local)
        urllib.urlretrieve(Enzymeftp['dat'], self._dfile)
        urllib.urlretrieve(Enzymeftp['cls'], self._cfile)

        self._process()

        return True
Exemplo n.º 15
0
    def download(self):
        if not self.has_local:
            raise NameError(
                'A local drugBank database directory must be defined.')

        Path.mkdir(self.local)
        here = os.getcwd()
        os.chdir(self.local)
        os.system("svn export {0}".format(PDBTMftp['svn']))

        self._process()

        return True
Exemplo n.º 16
0
def Arch2SQL(database, sqlfile, verbose):

    if verbose:
        sys.stderr.write("Retrieving data from {0} ...\n".format(database))
    newsource = Source(name='PDBarch', source="http://www-pdb.org/")
    outdir = os.path.join(os.path.join(os.path.abspath(sqlfile), '00'))
    Path.mkdir(outdir)
    sql_fd = gzip.open(os.path.join(outdir, '0000.sql.gz'), 'wb')
    sql_fd.write(start_transaction())
    sql_fd.write(newsource.toSQL())
    sql_fd.write(end_transaction())
    sql_fd.close()

    files_list_by_pdb = {}
    subdirs = ['archobj', 'superobj']
    for subdir in subdirs:
        for archobjfile in Path.list_files(os.path.join(database, subdir)):
            if archobjfile.endswith('.archObj'):
                data = tuple(
                    os.path.splitext(
                        os.path.split(archobjfile)[-1])[0].split('_')[2:])
                files_list_by_pdb[data] = archobjfile

    old_pdb = None
    newArchSet = None
    for dofdata in sorted(files_list_by_pdb):
        pdb = dofdata[0] + '_' + dofdata[1]
        if pdb != old_pdb:
            if old_pdb is not None:
                sql_fd.write(newArchSet.toSQL())
                sql_fd.write(end_transaction())
                sql_fd.close()
            outdir = os.path.join(
                os.path.join(os.path.abspath(sqlfile),
                             dofdata[0][1:3].lower()))
            Path.mkdir(outdir)
            if verbose:
                sys.stderr.write("Retrieving loops from {0} ...\n".format(pdb))
            sql_fd = gzip.open(os.path.join(outdir, pdb + '.sql.gz'), 'wb')
            sql_fd.write(start_transaction())
            if verbose:
                sys.stderr.write("Printing data from {0} ...\n".format(pdb))
            old_pdb = pdb
            newArchSet = Arch(pdb)
        newArchSet.archs = SSpair.load(files_list_by_pdb[dofdata])

    sql_fd.write(newArchSet.toSQL())
    sql_fd.write(end_transaction())
    sql_fd.close()
    if verbose:
        sys.stderr.write("End execution.\n")
Exemplo n.º 17
0
    def make_PDBseq(self, log_file, resolution_threshold=None):
        if not self.has_local:
            raise NameError(
                'A local PDB database must be defined to do create a PDBseq database.'
            )
        outdir = self.PDBseq if self.PDBseq is not None else os.curdir

        Path.mkdir(self.PDBseq)
        fasta_file = File(file_name=os.path.join(outdir, 'PDBseq.fa'),
                          action='w',
                          overwrite=True)
        fasta_fd = fasta_file.descriptor
        idx_file = File(file_name=os.path.join(outdir, 'PDBseq.fa.idx'),
                        action='w',
                        overwrite=True)
        idx_fd = idx_file.descriptor
        # if resolution_threshold is not None:
        #     filtered_file_name = self.get_PDBseq_filtered(resolution_threshold)
        #     filtered_file      = File(file_name = filtered_file_name, action = 'w', overwrite = True)
        #     filtered_fd        = filtered_file.descriptor
        #     resolutions        = self.get_resolutions(resolution_threshold = resolution_threshold)
        log_file = File(file_name=log_file, action='w', overwrite=True)
        log_idx = log_file.descriptor

        for pdb_file in self.localPDBs:
            log_idx.write("Reading File: {0}\n".format(pdb_file))
            newPDB = PDB(pdb_file=pdb_file, dehydrate=True)
            fasta_idx = newPDB.FASTA_IDX(nucleotide=False)
            if len(fasta_idx['FASTA']) != len(fasta_idx['IDX']):
                log_idx.write(
                    'ERROR!!!!! Number of fastas and indexes are different for pdb {0}!!\n'
                    .format(newPDB.id))
            if len(fasta_idx['FASTA']) > 0:
                log_idx.write('\tPrinting FASTA and IDX...\n')
            else:
                log_idx.write('\tProblably just a nucleotide PDB...\n')
            for c in range(len(fasta_idx['FASTA'])):
                sequence = fasta_idx['FASTA'][c].split('\n')[1]
                sequence = sequence.replace('X', '').replace('x', '')
                if len(sequence) > 0:
                    fasta_fd.write(fasta_idx['FASTA'][c] + "\n")
                    if resolution_threshold is not None and newPDB.id in resolutions and not newPDB.is_all_ca:
                        filtered_fd.write(fasta_idx['FASTA'][c] + "\n")
                    idx_fd.write(fasta_idx['IDX'][c] + "\n")
            del (newPDB)

        #CLOSE & END
        fasta_file.close()
        idx_file.close()
        if resolution_threshold is not None:
            filtered_fd.close()
Exemplo n.º 18
0
    def download(self):
        if not self.has_local:
            raise NameError('A local TaxID database directory must be defined.')

        Path.mkdir(self.local)
        destination = os.path.join(self.local, 'taxdmp.zip')
        urllib.urlretrieve(taxIDftp['global'], destination)
        command = ['unzip', '-o', destination, '-d', self.local]
        p = subprocess.Popen(command, stdout = subprocess.PIPE, stderr = subprocess.PIPE)
        out, err = p.communicate()

        self._process()

        return True
    def download(self):
        if not self.has_local:
            raise NameError('A local PDBeChem database directory must be defined.')

        Path.mkdir(self.local)
        destination = os.path.join(self.local, 'mmcif.tar.gz')
        try:
            urllib.urlretrieve(PDBeChemftp['global'], destination)
        except:
            return False
        command = ['tar', 'zxvf', destination, '-C', self.local]
        p = subprocess.Popen(command, stdout = subprocess.PIPE, stderr = subprocess.PIPE)
        out, err = p.communicate()

        return True
Exemplo n.º 20
0
def CDhit2SQL(database, sqlfile, verbose):

    if verbose:
        sys.stderr.write("Retrieving data from {0} ...\n".format(database))
    cdhit = CDhit(database)

    if verbose: sys.stderr.write("Writing {0} ....\n".format(sqlfile))
    Path.mkdir(os.path.split(os.path.abspath(sqlfile))[0])
    sql_fd = gzip.open(sqlfile, 'wb')
    sql_fd.write(start_transaction())

    sql_fd.write(cdhit.toSQL())

    sql_fd.write(end_transaction())
    sql_fd.close()
    if verbose: sys.stderr.write("End execution.\n")
Exemplo n.º 21
0
def Uniprot2SQL(database, sqlfile, skip_download, verbose):
    uniprot_connect = Uniprotlink(local=database)
    newsource = None

    if not options.skip_download:
        if verbose:
            sys.stderr.write(
                "Downloading Uniprot database to {0} ...\n".format(database))
        uniprot_connect.download()
        newsource = Source(name='uniprot', source=uniprot_connect.source)
        if verbose: sys.stderr.write("Download Finished.\n")
    else:
        if verbose: sys.stderr.write("Using previously downloaded database.\n")

    file_counter = 1
    file_sequence = 0
    file_sql_name = sqlfile.replace('_', '{0:03}')

    if verbose: sys.stderr.write("Parsing Uniprot.\n")
    if verbose:
        sys.stderr.write("Writing {0} ....\n".format(
            file_sql_name.format(file_counter)))
    Path.mkdir(os.path.split(os.path.abspath(sqlfile))[0])
    sql_fd = gzip.open(file_sql_name.format(file_counter), 'wb')
    sql_fd.write(start_transaction())
    if newsource is not None:
        sql_fd.write(newsource.toSQL())

    for uni_line in uniprot_connect.localUniprots:
        newuni = Uniprot(inline=uni_line)
        if file_sequence > 500000:
            sql_fd.write(end_transaction())
            sql_fd.close()
            file_sequence = 0
            file_counter += 1
            if verbose:
                sys.stderr.write("Writing {0} ....\n".format(
                    file_sql_name.format(file_counter)))
            sql_fd = gzip.open(file_sql_name.format(file_counter), 'wb')
            sql_fd.write(start_transaction())

        sql_fd.write(newuni.toSQL())
        file_sequence += 1
    if verbose: sys.stderr.write("End execution.\n")
Exemplo n.º 22
0
def GO2SQL(database, sqlfile, skip_download, verbose):
    go_connect = GOlink(local=database)
    newsource = None
    if not skip_download:
        if verbose:
            sys.stderr.write(
                "Downloading GO database to {0} ...\n".format(database))
        go_connect.download()
        newsource = Source(name='GO', source=go_connect.source)
        if verbose: sys.stderr.write("Download Finished.\n")
    else:
        if verbose: sys.stderr.write("Using previously downloaded database.\n")

    with_parents = []
    with_relations = []

    if verbose: sys.stderr.write("Parsing GO.\n")
    if verbose: sys.stderr.write("Writing {0} ....\n".format(sqlfile))
    Path.mkdir(os.path.split(os.path.abspath(sqlfile))[0])
    sql_fd = gzip.open(sqlfile, 'wb')
    sql_fd.write(start_transaction())
    if newsource is not None:
        sql_fd.write(newsource.toSQL())

    for go_line in go_connect.localGOs:
        newGO = GOterm(inline=go_line)
        sql_fd.write(newGO.toSQL() + "\n")
        if len(newGO.relations) > 0:
            with_relations.append(newGO)
        if len(newGO.parents) > 0:
            with_parents.append(newGO)

    for GO in with_relations:
        sql_fd.write(GO.relations2SQL() + "\n")
    for GO in with_parents:
        sql_fd.write(GO.parents2SQL() + "\n")

    sql_fd.write(end_transaction())
    sql_fd.close()
    if verbose: sys.stderr.write("End execution.\n")
Exemplo n.º 23
0
    def sync_PDB(self, log_file=None):
        if not self.has_local:
            raise NameError(
                'A local PDB database must be defined to sync with.')

        Path.mkdir(self.local)

        command = [
            'rsync', '-rlpt', '-v', '-z', '--port=' + PDBrsync['port'],
            PDBrsync['address'], self.local
        ]

        p = subprocess.Popen(command,
                             stdout=open(log_file, 'w')
                             if log_file is not None else subprocess.PIPE,
                             stderr=subprocess.PIPE)

        SBIglobals.alert('verbose', self,
                         'Executing: {0}'.format(" ".join(command)))

        out, err = p.communicate()
        if err.strip() != '':
            raise SystemError('{0}'.format(err))
Exemplo n.º 24
0
    def get_PDB(self, pdbID):
        if self.has_local:
            rootdir = os.path.join(self.local, pdbID.lower()[1:3])
            for pdb_file in Path.list_files(root=rootdir, pattern='*.ent.gz'):
                newfile = File(file_name=pdb_file, action='r')
                if newfile.prefix.lstrip('pdb').upper() == pdbID.upper():
                    return pdb_file

        #If we do not find it in local (or we do not have a local) we search it on the FTP
        pdb_file = 'pdb' + pdbID.lower() + '.ent.gz'
        source = 'ftp://' + PDBftp['address'] + os.path.join(
            PDBftp['structures'], pdbID[1:3].lower(), pdb_file)
        try:
            urllib.urlretrieve(source, pdb_file)
        except:
            return False
        return os.path.abspath(pdb_file)
Exemplo n.º 25
0
 def _process(self):
     tmoFile = File(self._pdbtmfile, 'w', True)
     for xmlfile in Path.list_files(
             os.path.join(self._local, 'pdbtm/database/'), '*.xml'):
         xmldata = TM(
             pdb=os.path.splitext(os.path.split(xmlfile)[1])[0].upper())
         skip_chains = set()
         read = False
         fdxml = open(xmlfile)
         for line in fdxml:
             if line.startswith('    <TMRES>'): xmldata.tmres = line
             elif line.startswith('    <TMTYPE'): xmldata.tmtype = line
             elif line.startswith('    <PDBKWRES'): xmldata.kwres = line
             elif line.startswith('  <SIDEDEFINITION'):
                 m = re.search('Side1="(\S+)"', line)
                 xmldata.side = m.group(1)
             elif line.startswith('      <APPLY_TO_CHAIN'):
                 m = re.search('NEW_CHAINID=\"(\S{1})\"', line)
                 if m: skip_chains.add(m.group(1))
             elif line.startswith('  <CHAIN '):
                 m = re.search(
                     'CHAINID=\"(\S{1})\" NUM_TM=\"(\d{1})\" TYPE=\"(\S+)\"',
                     line)
                 if m:
                     chain, num, tmtype = m.group(1), m.group(2), m.group(3)
                     if not chain in skip_chains:
                         cdata = tuple([chain, num, tmtype])
                         xmldata.set_chain(cdata)
                         read = True
             elif line.startswith('    <REGION ') and read:
                 m = re.search(
                     'pdb_beg=\"(\-*\d+\w*)\"[\s\S]+pdb_end=\"(\-*\d+\w*)\"\s+type=\"(\w{1})\"',
                     line)
                 ini, end, tmtype = m.group(1), m.group(2), m.group(3)
                 xmldata.set_chain(cdata, tuple([ini, end, tmtype]))
             elif line.startswith('  </CHAIN>'):
                 read = False
         fdxml.close()
         if len(xmldata.chains) > 0:
             tmoFile.write(str(xmldata) + "\n")
     tmoFile.close()
Exemplo n.º 26
0
 def localPDBs(self):
     for pdb_file in Path.list_files(root=self.local, pattern='*.ent.gz'):
         yield pdb_file
Exemplo n.º 27
0
def PDB2SQL(database, seqdatabase, listfiles, sqlfile, skip_download, verbose):
    pdb_connect = PDBlink(local=database, PDBseq=seqdatabase)
    newsource = None

    if not skip_download:
        if verbose:
            sys.stderr.write(
                "Syncronizing PDB database to {0} ...\n".format(database))
        pdb_connect.sync_PDB(log_file=os.path.join(database, 'PDB.sync.log'))
        newsource = Source(name='PDB', source=pdb_connect.source)
        if verbose:
            sys.stderr.write(
                "Creating PDBseq in {0} ...\n".format(seqdatabase))
        pdb_connect.make_PDBseq(
            log_file=os.path.join(seqdatabase, 'PDB.seq.log'))
        if verbose: sys.stderr.write("Download Finished.\n")
        outdir = os.path.abspath(os.path.join(sqlfile, '00'))
        Path.mkdir(outdir)
        sql_fd = gzip.open(os.path.join(outdir, '0000.sql.gz'), 'wb')
        sql_fd.write(start_transaction())
        sql_fd.write(newsource.toSQL())
        sql_fd.write(end_transaction())
        sql_fd.close()
    else:
        if verbose: sys.stderr.write("Using previously downloaded database.\n")

    files2check = set()
    if listfiles is not None:
        fd = open(listfiles)
        for line in fd:
            files2check.add(line.strip())
        fd.close()
        logfd = open(listfiles + ".log", "w")
    else:
        logfd = open("PDB2SQL.log", "w")
    import traceback
    for pdbfile in pdb_connect.localPDBs:
        try:
            if listfiles is not None and pdbfile not in files2check:
                if len(files2check) == 0: break
                continue
            #else:
            # files2check.add(pdbfile)
            # files2check.remove(pdbfile)
            if verbose: sys.stderr.write("Working file {0}\n".format(pdbfile))
            newPDB = PDB(pdb_file=pdbfile)
            outsqldir = os.path.join(sqlfile, newPDB.id[1:3].lower())
            Path.mkdir(outsqldir)
            outsqlfile = os.path.join(outsqldir, newPDB.id + '.sql.gz')
            # outsqlfile = os.path.join(os.getcwd(), newPDB.id + '.sql.gz')
            if verbose:
                sys.stderr.write(
                    "\tOutput SQL file is {0}.\n".format(outsqlfile))
            sql_fd = gzip.open(outsqlfile, 'wb')
            sql_fd.write(start_transaction())
            sql_fd.write(PDB.preuniprotdeleted())
            sql_fd.write(newPDB.toSQL())
            sql_fd.write(PDB.afteruniprotdeleted())
            sql_fd.write(end_transaction())
            sql_fd.close()
        except KeyboardInterrupt:
            raise
        except:
            if verbose:
                sys.stderr.write("\tAn error occurred. Check log file\n")
            SBIglobals.alert(
                'error', None,
                '\tAn error occurred for {0} . Check log file'.format(pdbfile))
            logfd.write("FILE {0}\n".format(pdbfile))
            logfd.write(traceback.format_exc())
            logfd.write("\n")
 def localPDBeChems(self):
     for chem_file in Path.list_files(root = self.local, pattern = '*.cif'):
         yield chem_file
Exemplo n.º 29
0
    def execute_query_seq(self, sequenceID = None, sequence          = None,
                          blast_input_file = None, blast_output_file = None,
                          work_directory   = os.getcwd()):
        '''
        Execute BLAST given a query sequence.

        @param:    sequenceID
        @pdef:     name of the query sequence.
        @pdefault: 'QuerySequence'
        @pclash:   If sequence is not provided, it assumes that the sequenceID
                   belongs to a protein in the database and, thus, it searches
                   for it. Either sequenceID or sequence needs to be provided.
        @ptype:    {String}

        @param:    sequence
        @pdef:     query sequence.
        @pdefault: _None_
        @pclash:   Either sequenceID or sequence needs to be provided.
        @ptype:    {String}

        @param:    blast_input_file
        @pdef:     name of the temporary fasta file to use as BLAST input.
        @pdefault: job.pid + clock + .tmp.fa
        @ptype:    {String}

        @param:    blast_output_file
        @pdef:     name of the temporary BLAST output file.
        @pdefault: job.pid + clock + .blast.xml.out
        @ptype:    {String}

        @param:    work_directory
        @pdef:     Directory to which the temporary files will be created.
        @pdefault: Current working directory.
        @ptype:    {String}

        @raises: {AttributeError} if neither sequenceID nor sequence are
                  provided or if sequenceID is a list of sequence names.
        @raises: {BlastError} in BLAST execution or output parsing errors.

        @returns: {BlastResult}
        '''
        if sequenceID is None and sequence is None:
            msg = 'Either a sequence or sequenceID is needed to perform the blast.'
            raise AttributeError(msg)

        if isinstance(sequenceID, (list, set, tuple)):
            msg = 'Blasts can only be executed one at a time due to XML output restrictions.'
            raise AttributeError(msg)

        sequenceID = 'QuerySequence' if sequenceID is None else sequenceID

        # Given only a code implies that the protein of interest is in the
        # database itself
        if sequence is None:
            grabbedSequence = self._database.retrieve(sequenceID)
            sequenceID      = grabbedSequence[0].id
            sequence        = grabbedSequence[0].sequence

        # All the sequence is unknown, it will crash blast
        if len(re.sub(r'[Xx]', '', sequence)) == 0:
            SBIg.warn(self, 'Created an empty BlastResult.')
            return BlastResult(query_name     = sequenceID,
                               query_sequence = sequence)

        Path.mkdir(work_directory)
        file_prefixes = ".".join([str(os.getpid()), str(int(time.clock()*100000))])
        file_prefixes = os.path.join(work_directory, file_prefixes)
        tmp_input     = file_prefixes + ".tmp.fa"
        tmp_output    = file_prefixes + ".blast.xml.out"

        tmp_input  = tmp_input  if blast_input_file  is None else os.path.join(work_directory, blast_input_file)
        tmp_output = tmp_output if blast_output_file is None else os.path.join(work_directory, blast_output_file)

        QueryFasta = Fasta.build(file_name = tmp_input, sequence_id = sequenceID,
                                 sequence  = sequence,  force       = True)

        self._execute(input_file = QueryFasta, output_file = tmp_output)

        blast_result = self._parse_blast(sequence, tmp_output)

        self._clean([tmp_input, tmp_output])

        return blast_result
Exemplo n.º 30
0
def DS2SQL(database, looplist, sqlfile, verbose):

    Path.mkdir(sqlfile)
    for dsfile in Path.list_files(database):
        subclasstype = os.path.split(dsfile)[-1].split('.')[1]
        classification = Cclass(subclasstype)
        if verbose:
            sys.stderr.write(
                "Retrieving data for subclass {0} ...\n".format(subclasstype))
        loops = readlist(looplist, subclasstype)
        sql_fd = gzip.open(os.path.join(sqlfile, subclasstype + '.sql.gz'),
                           'wb')
        sql_fd.write(start_transaction())
        sql_in = open(dsfile)
        read = False
        for line in sql_in:
            dataline = line.rstrip('\n')
            #SKIP LINES
            if line.startswith('==') or line.startswith('***') or len(
                    line.strip()) == 0 or line.startswith(
                        '---- P R O T E I N    C O D E  ----'):
                continue
            if line.startswith('CONSENSUS & MULTIPLE ALIGNEMENT IN THE'):
                data = line.split(':')[-1].strip().split()
                classification.subclasses = Subclass(
                    tuple([data[0].strip(), data[3].strip()]), data[4])
                workscls = classification.lastsubclass
                read = True
                continue
            if line.startswith('GLOBAL STATISTICS'):
                read = False
                continue
            if read:
                if line.startswith(
                        '        SEQUENCE   ALIGNEMENT                           :'
                ):
                    parse_mode, counter = 'P', 0
                elif line.startswith(
                        '       ACCESSIBLE SURFACE ALIGNEMENT                    :'
                ):
                    parse_mode, counter = 'E', 0
                elif line.startswith(
                        '           RAMACHANDRAN                                 :'
                ):
                    parse_mode, counter = 'R', 0
                elif line.startswith(
                        '        SECONDARY STRUCTURE                             :'
                ):
                    parse_mode, counter = 'S', 0
                elif line.startswith('--------- CONSENSUS THORNTON       :'):
                    workscls.add_consensus(dataline, 'DS', loops)
                elif line.startswith('--------- CONSENSUS TOPOLOGY'):
                    workscls.add_topology(dataline, 'DS')
                elif line.startswith('CENTROIDE POLAR COORD.   :'):
                    workscls.add_coordinates(dataline)
                elif line.startswith('--------- RAMACHANDRAN PATTERN     :'):
                    workscls.ram_pat = re.sub(
                        '\(X\)', '',
                        dataline.split(':')[1].strip().strip('.'))
                elif line.startswith('--------- SEQUENCE  PATTERN        :'):
                    workscls.seq_pat = re.sub(
                        '\(X\)', '',
                        dataline.split(':')[1].strip().strip('.'))
                elif line.startswith('--------- BURIAL    PATTERN        :'):
                    workscls.exp_pat = re.sub(
                        '\(X\)', '',
                        dataline.split(':')[1].strip().strip('.'))

                elif line.startswith('                             '
                                     ) and len(dataline) < 400:
                    if parse_mode == 'P': workscls.loops = Loop(info=dataline)
                    if parse_mode == 'E':
                        workscls.loops[counter].add_surface(info=dataline)
                        counter += 1
                    if parse_mode == 'R':
                        workscls.loops[counter].add_ramachandran(info=dataline)
                        counter += 1
                    if parse_mode == 'S':
                        workscls.loops[counter].add_secondary_str(
                            info=dataline)
                        counter += 1

        sql_fd.write(classification.toSQL('DS'))

        sql_in.close()
        sql_fd.write(end_transaction())
        sql_fd.close()

    if verbose: sys.stderr.write("End execution.\n")