from Bio.PDB.PDBParser import PDBParser
from mongoengine.errors import DoesNotExist

from SNDG import init_log
from SNDG.BioMongo.Model import BioProperties
from SNDG.BioMongo.Model import Cluster
from SNDG.BioMongo.Model.Structure import ExperimentalStructure, ResidueSet
from SNDG.BioMongo.Process.BioMongoDB import BioMongoDB
from SNDG.Structure.CompoundTypes import get_compound_type
from SNDG.Structure.FPocket import FPocket
from SNDG.Structure.PDBs import PDBs


# from Bia.Programs.Cluster.CDHit import CDHit

init_log()
_log = logging.getLogger(__name__)

from SNDG.Structure.CompoundTypes import compound_type


def update_clusters():
    for cluster_name, seqs in CDHit().clustered_seq_iterator("/data/databases/pdb/processed/seqs_from_pdb95.fasta"):
        _log.debug(cluster_name)

        cristals = []
        cluster = Cluster(name=cluster_name, type="PDB_Segments_95")
        for seq in seqs:
            seq_id, seq_start, seq_end, clust_start, clust_end = seq
            pdb, chain, start, end = seq_id.split("_")
            cristals.append(pdb)
示例#2
0
#!/usr/bin/env python

import os
import time

from SNDG import mkdir, execute, execute_from, init_log
from SNDG.WebServices import download_file
from SNDG.Structure.PDBs import PDBs

init_log("/tmp/createdb.log")


def old_or_inexistent(filepath, period=30):
    return not os.path.exists(filepath) or ((
        (time.time() - os.path.getatime(filepath)) / 60 / 60 / 24) > period)


#os.environ["http_proxy"] = "http://proxy.fcen.uba.ar:8080"
#os.environ["ftp_proxy"] = "http://proxy.fcen.uba.ar:8080"

mkdir("/data/pdb/")
download_file("ftp://ftp.wwpdb.org/pub/pdb/derived_data/index/entries.idx",
              "/data/pdb/entries.idx",
              ovewrite=True)

pdbs = PDBs("/data/pdb/")
pdbs.download_pdb_seq_ses()
pdbs.update_pdb_dir()
mkdir("/data/pdb/processed/")
pdbs.pdbs_seq_for_modelling()
execute("makeblastdb -dbtype prot -in /data/pdb/processed/seqs_from_pdb.fasta")
示例#3
0
        protein_fasta = tmp_dir + "/proteins.fasta"
        if not os.path.exists(protein_fasta) or (
                not os.path.getsize(protein_fasta)):
            with open(protein_fasta, "w") as h:
                for p in Protein.objects(organism=seq_col_name).no_cache():
                    bpio.write(SeqRecord(id=p.gene[0], seq=Seq(p.seq)), h,
                               "fasta")

        genome = SeqCollection.objects(name=seq_col_name).get()
        genome.ncbi_assembly = seq_col_name
        if not genome.statistics:

            self.mdb.index_seq_collection(seq_col_name, pathways=False)
            self.mdb.build_statistics(seq_col_name)

            _log.info("Sequence collection %s created correctly " %
                      seq_col_name)


if __name__ == '__main__':
    init_log("/tmp/sndg2.log")
    logger = logging.getLogger('peewee')
    logger.setLevel(logging.ERROR)
    dep = Deployer()
    connect_to_db(password="******")
    import mysql.connector
    dep.mdb = "saureus"
    #dep.annotation_tax = "158879"
    dep.init()
    tax_db.initialize(MySQLDatabase('bioseq', user='******', passwd="mito"))
示例#4
0
@author: eze
'''

import logging
from argparse import ArgumentParser, RawDescriptionHelpFormatter

import math
from Bio.PDB.PDBParser import PDBParser

from tqdm import tqdm

from SNDG import init_log
from SNDG.Structure.PDBdb import *
from SNDG.Structure.PDBs import PDBs

init_log(rootloglevel=logging.INFO)
_log = logging.getLogger(__name__)

if __name__ == "__main__":

    parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter)

    parser.add_argument("-p", "--dbpass", required=True)
    parser.add_argument("-i", "--pdb_dir", default="/data/databases/pdb/")
    parser.add_argument("-db", "--dbname", default="pdbdb")
    parser.add_argument("-u", "--dbuser", default="root")

    args = parser.parse_args()
    from peewee import MySQLDatabase

    mysql_db = MySQLDatabase(args.dbname,
示例#5
0
import logging
import pymongo
from mongoengine.connection import connect
from pymongo.mongo_client import MongoClient

from SNDG import init_log
from SNDG.BioMongo.Model.SeqCollection import Genome, SeqColDruggabilityParam
from SNDG.BioMongo.Process.BioCyc2Mongo import BioCyc
from SNDG.BioMongo.Process.StructureAnotator import StructureAnotator
from SNDG.BioMongo.Process.StructureIndexer import StructuromeIndexer
from SNDG.BioMongo.Process.PathwaysAnnotator import PathwaysAnnotator
from SNDG.BioMongo.Process.BioMongoDB import BioMongoDB
from pprint import pprint

init_log("/tmp/validate.log")
_log = logging.getLogger(__name__)
db = MongoClient().tdr

import re

regx_go = re.compile("^go:")  # , re.IGNORECASE)
regx_ec = re.compile("^ec:")


def validate_pathways_protein(g):
    for dp in BioCyc.pathways_search_params:
        if not g.has_druggability_param(dp[0]):
            print "%s no tiene el atributo %s" % (g.name, dp[0])

    count = db.proteins.count({
示例#6
0
from SNDG import init_log
from SNDG.Structure.FPocket import FPocket

if __name__ == '__main__':
    parser = argparse.ArgumentParser()

    parser.add_argument("-d", "--models_directory", required=True)
    parser.add_argument("-l", "--log_path", default=None)

    args = parser.parse_args()

    assert os.path.exists(args.models_directory), "%s does not exists" % args.models_directory

    if not args.log_path:
        args.log_path = args.models_directory + "/pocketome.log"
    init_log(args.log_path, logging.INFO)
    _log = logging.getLogger("pocketome")

    with tqdm(glob.glob(args.models_directory + "/*.pdb")) as pbar:
        for pdb in pbar:
            pbar.set_description(pdb)
            try:
                pocket_data = pdb + ".json"
                if not os.path.exists(pocket_data):
                    fpo = FPocket(pdb)
                    result = fpo.hunt_pockets()
                    result.save(pocket_data)
                    result.delete_dir()
            except Exception as e:
                _log.warn(e)
示例#7
0
                                                ["uniprot", "db", "value"])
                                        })
                                except IntegrityError:
                                    pass


if __name__ == '__main__':
    from SNDG import init_log
    import logging

    from SNDG.BioMongo.Process.BioMongoDB import BioMongoDB
    mysqldb = ProteinAnnotator.connect_to_db(database="unipmap",
                                             user="******",
                                             password="******")

    pa = ProteinAnnotator()
    # pa.connect_to_db(password="******")
    # pa.create_db()
    # pa.populate_sql("/data/uniprot/idmapping_filtered.dat",
    #                "/data/uniprot/goa/goa_uniprot_all.gpa")
    tmpdir = "/tmp/lepto/Lepto-CLM-U50"
    logging.getLogger("peewee").setLevel(logging.WARN)
    init_log(log_file_path=tmpdir + "/ann.log")
    mdb = BioMongoDB("tdr")
    tax = 1958811

    list(Mapping.select().where(Mapping.uniprot == "12"))
    n = "Lepto-CLM-U50"

    from SNDG.BioMongo.Process.Importer import update_proteins
    update_proteins(tmpdir, tmpdir + "/genome.fasta", n, tax, db_init=mysqldb)