Пример #1
0
class Pair:
    def __init__(self, receptordb, liganddb):
        self.ligand = Ligand(liganddb)
        self.receptor = Receptor(receptordb)

    def foreachPair(self, pid, sink):
        self.ligand.foreachId(lambda cid: sink(pid, cid))

    def foreach(self, sink, criteria=''):
        self.receptor.foreachId(lambda pid: self.foreachPair(pid, sink))

    def size(self, rcriteria='1=1', lcriteria='1=1'):
        ligands = self.ligand.size(lcriteria)
        receptors = self.receptor.size(rcriteria)
        return ligands * receptors

    def close(self):
        self.ligand.close()
        self.receptor.close()
Пример #2
0
def main():
    # Parse command-line args.
    args = parse_args()
    # Make the libraries used by the Ligand class shut up.
    Ligand(None).quiet()
    # Set random seed.
    numpy.random.seed(args.seed)
    # Perform the GA search.
    gnn = init_gnn(args)
    # Perform the GA search.
    ga_best = ga_search(args, gnn)
Пример #3
0
def run():
    ligand = None
    protein = request.form['protein']

    data = dict(request.form)

    if request.files:
        file_obj = request.files
        for f in file_obj:
            ligand = request.files.get(f)

    word = Ligand.save(ligand, app.instance_path)
    return render_template("results.html", ligand=word, protein=protein)
Пример #4
0
def init_gnn(args):
    # Setup gnn fitness oracle.
    protein = load_protein_file(args.protein)
    ligand = Ligand("CCC").get_dict()
    target = numpy.array([0.0, 1.0], dtype=numpy.float32)
    target = {
        'n_node': 0,
        'n_edge': 0,
        'nodes': None,
        'edges': None,
        'senders': None,
        'receivers': None,
        'globals': target
    }
    ops = build_gnn(args, (protein, ligand, target))
    config = tf.ConfigProto(allow_soft_placement=True,
                            log_device_placement=False)
    config.gpu_options.allow_growth = False
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    saver.restore(sess, args.model)
    return (sess, ops)
Пример #5
0
def postLigand(ligand):
    word = Ligand.save(ligand, app.instance_path)
    return word
Пример #6
0
def pdf():
    ligand = {"EGFR": 1, "EGFE": 2}
    ml = {"Support Vector Machines": [0.5, 0.5], "KNN": [0.4, 0.6]}
    pdf = Ligand.makePDF(ligand, ml)
    return app.send_static_file(pdf)
Пример #7
0
def Loadpdb(pdb=None, hetatm= True, verbose=False):
    try:
        assert(pdb != None) #Check if filehandle to PDB file is passed
    except AssertionError:
        sys.exit("**No filehandle passed**. Pass a filehandle (to a pdb file) as an argument to Loadpdb.")
    
    AtomNumber=0 #Keeps track of atom indices (assigned in the order atoms listed in input file)
    mol_data={} #Key: molid; Value: Molecule_Type Object; Keep track of different molecules (different chains or molecule type) in input structure
    first_res =True  #To identify molecule type of every molecule in input structure and accordingly define Molecule object.
    Prev_res=0 # to keep track of residue change 
    Prev_chain='aa' # to keep track of chain change in HETATM section
    atmTohet = True #To determine transition from ATOM to HETATM record
    frame_tag = '' # To keep track of multi-frame entry (multiple entry for same molecule type with same chain id)
    '''Load the PDB structure file'''
    for line in pdb:
        if line[0:4]=="ATOM" and line[12:16].upper().strip() not in ["OXT"]:
            AtomNumber+=1
            AtomName, ResName, Chain, ResNo, CordX, CordY, CordZ, Occ, Bfac = Pdbcordsec(line)
            atm = Atom(AtomName, AtomNumber, ResName, Chain, ResNo, CordX, CordY, CordZ)
            
            #Check for unrecognized residue and new molecule
            if not first_res:
                if ResName.lower() not in Mol_types['protein'] + Mol_types['lipid'] + Mol_types['ligand']:
                    print "*** Unrecognized residue name: "+ ResName+ " ***.\nAdded %s as Ligand." % ResName
                    sys.exit("In file configstruc.py: Add missing residue name("+ ResName+ ") to appropriate molecule in Mol_types")
                if ResNo != Prev_res or (Prev_chain != Chain and mol.molecule_type().lower()=='ligand'):
                    if mol.molecule_type().lower()=='ligand':
                        mol_data[Molecule.molid] = deepcopy(mol) #copy mol object into dictionary
                        first_res = True
                    elif Prev_chain != Chain or ResName.lower() not in Mol_types[mol.molecule_type().lower()]: #Either Chain is different or New residue doesn't belong to current molecule type
                        mol_data[Molecule.molid] = deepcopy(mol) #copy mol object into dictionary
                        first_res = True
                    elif frame_tag.lower() in ['endmdl', 'ter', 'end'] and Prev_chain == Chain: #Different molecule (of same molecule type) with same chain id; as in trajectory frames
                        mol_data[Molecule.molid] = deepcopy(mol) #copy mol object into dictionary
                        first_res = True
                        
                        
            if first_res: #Initialize mol for new chain or molecule
                if ResName.lower() in Mol_types['protein']:
                    mol = Protein()
                elif ResName.lower() in Mol_types['lipid']:
                    mol = Lipid()
                elif ResName.lower() in Mol_types['ligand']:
                    mol = Ligand()
                else:
                    print "*** Unrecognized residue name: "+ ResName+ " ***.\n Cannot initialize Molecule object."
                    sys.exit("In file configstruc.py: Add missing residue name("+ ResName+ ") to appropriate molecule in Mol_types")
                first_res = False
                frame_tag = ''
            
            mol.AddToResidue(atom=atm, occ=Occ, bfac=Bfac)
            mol.atmidx.append(AtomNumber)
            Prev_res= ResNo
            Prev_chain=Chain
        elif line[0:6]=="HETATM" and hetatm == True:
            if atmTohet:
                mol_data[Molecule.molid] = deepcopy(mol) #copy mol object into dictionary
                first_res = True
                atmTohet = False
            AtomNumber+=1
            AtomName, ResName, Chain, ResNo, CordX, CordY, CordZ, Occ, Bfac = Pdbcordsec(line)
            atm = Atom(AtomName, AtomNumber, ResName, Chain, ResNo, CordX, CordY, CordZ)
            
            #Check for new ligand molecule 
            if not first_res and (ResNo != Prev_res or Prev_chain != Chain):
                mol_data[Molecule.molid] = deepcopy(mol) #copy mol object into dictionary
                first_res = True
            
            #Initialize mol for new chain or molecule            
            if first_res: 
                if ResName.lower() in Mol_types['ligand']:
                    mol = Ligand()
                else:
                    print "*** Unrecognized residue name: "+ ResName+ " ***.\n Cannot initialize Ligand object."
                    sys.exit("In file configstruc.py: Add missing residue name ("+ ResName+ ") to ligand molecule in Mol_types")
                first_res = False
            
            mol.AddToResidue(atom=atm, occ=Occ, bfac=Bfac)
            mol.atmidx.append(AtomNumber)
            Prev_res= ResNo
            Prev_chain=Chain
        elif line[0:3].lower() in ["ter", "end"] or line[0:6].lower() == "endmdl":
            frame_tag = line[0:3]
    #append the last mol object to mol_data
    mol_data[Molecule.molid] = deepcopy(mol) #copy mol object into dictionary
    
    if verbose:
        print "Number of molecules in input file: ", len(mol_data), "\n"
    #Update mol_data[molid].nor, mol_data[molid].resids, and check for chain breaks in non-ligand molecules
    for key in sorted(mol_data):
        if verbose:
            print "Molid:", key,"Molecule_Type:",mol_data[key].molecule_type()
        mol_data[key].resids = sorted(mol_data[key].residue)
        mol_data[key].nor = len(mol_data[key].resids)
        if mol_data[key].molecule_type().lower() != 'ligand':
            #Check for chain breaks in non-ligand molecule
            resids_diff=numpy.array(mol_data[key].resids[1:]) - numpy.array(mol_data[key].resids[:-1])
            if mol_data[key].nor != (numpy.sum(resids_diff)+1):
                break_indices = numpy.where(resids_diff > 1)
                print "Chain break encountered in molecule",key, "at residue positions: "
                for res in break_indices[0]:
                    print mol_data[key].resids[res],
                print "\n"
                mol_data[key].chain_break = True
    return mol_data 
Пример #8
0
import os
from parameter import Parameter
from ligand import Ligand
from receptor import Receptor
from path import Path


def print_pairs(pid, ligand):
    ligand.foreachId(lambda cid: print(pid, cid, sep='\t'))


p = Parameter()
pt = Path(p)

ligand = Ligand(pt.data + p._('project.wizard.ligand.db'))
receptor = Receptor(pt.data + p._('project.wizard.receptor.db'))

receptor.foreachId(lambda id: print_pairs(id, ligand))

ligand.close()
receptor.close()
Пример #9
0
import os
from pdbqtdaemon import PDBQTDaemon
from parameter import Parameter
from ligand import Ligand
from path import Path

process_name = 'prepare_ligand'

p = Parameter()
pt = Path(p)
user = p._('user')
payload = p.i('daemon.sbatch.payload')
db = Ligand(pt.liganddb)


class PrepareLigand(PDBQTDaemon):
    def __init__(self):
        PDBQTDaemon.__init__(self, user, db, pt, payload, process_name)

    def molecule_done(self, id):
        return os.path.isfile(pt.ligandpdbqt(id))

    def prepare(self, id, templ):
        os.chdir(self.path.docking_ligand)

        log_name = '{}/{}_{}.out'.format(self.path.log, self.proc_name, id)
        templ = self.template.format(log_name, self.path.ligand(id), id)
        target = '{}_{}_{}.sbatch'.format(self.path.project_name,
                                          self.proc_name, id)
        open(target, 'w').write(templ)
Пример #10
0
from parameter import Parameter
from ligand import Ligand
from receptor import Receptor


def line(id, ligand):
    ids = [str(id)]
    ligand.foreachId(lambda id: ids.append(str(id)))
    return '\t'.join(ids)


p = Parameter()

ligand = Ligand(p._('project.ligand.db'))
receptor = Receptor(p._('project.receptor.db'))

receptor.foreachId(lambda id: print(line(id, ligand)))

ligand.close()
receptor.close()
Пример #11
0
def Loadpdb(pdb=None, hetatm=True, verbose=False):
    try:
        assert (pdb != None)  #Check if filehandle to PDB file is passed
    except AssertionError:
        sys.exit(
            "**No filehandle passed**. Pass a filehandle (to a pdb file) as an argument to Loadpdb. "
        )

    AtomNumber = 0  #Keeps track of atom indices (assigned in the order atoms listed i input file)
    mol_data = {
    }  #Key: molid; Keep track of different molecules (different chains or molecule type) in input structure
    check_het = False  #To keep track of new Hetero residue
    first_res = True  #To identify molecule type of every molecule in input structure and accordingly define Molecule object.
    Prev_res = 0  # to keep track of residue change in HETATM section; a new Molecule object is assigned for every residue.
    Prev_chain = 'a'  # to keep track of chain change in HETATM section
    '''Load the PDB structure file'''
    for line in pdb:
        if line[0:4] == "ATOM" and line[12:16].upper().strip() not in ["OXT"]:
            AtomNumber += 1
            AtomName, ResName, Chain, ResNo, CordX, CordY, CordZ, Occ, Bfac = Pdbcordsec(
                line)
            atm = Atom(AtomName, AtomNumber, ResName, Chain, ResNo, CordX,
                       CordY, CordZ)

            if first_res:  #Initialize mol for new chain or molecule
                if ResName.lower() in Mol_types['protein']:
                    mol = Protein()
                elif ResName.lower() in Mol_types['ligand']:
                    mol = Ligand()
                else:
                    print "*** Unrecognized residue name: " + ResName + " ***.\n Cannot initialize Molecule object."
                    sys.exit(
                        "In file configstruc.py: Add missing residue name(" +
                        ResName + ") to appropriate molecule in Mol_types")
                first_res = False

            mol.AddToResidue(atom=atm, occ=Occ, bfac=Bfac)
            mol.atmidx.append(AtomNumber)
        elif line[0:3] == "TER":
            mol_data[Molecule.molid] = deepcopy(
                mol)  #copy mol object into dictionary
            first_res = True  # mol object will be initialized to molecule type of next molecule
        elif line[0:6] == "HETATM" and hetatm == True:
            AtomNumber += 1
            AtomName, ResName, Chain, ResNo, CordX, CordY, CordZ, Occ, Bfac = Pdbcordsec(
                line)
            atm = Atom(AtomName, AtomNumber, ResName, Chain, ResNo, CordX,
                       CordY, CordZ)

            #Check for new molecule
            if (
                    ResNo != Prev_res or Prev_chain != Chain
            ) and check_het == True:  #For first HETATM check_het is always False
                mol_data[Molecule.molid] = deepcopy(
                    mol)  #copy mol object into dictionary
                first_res = True

            #Initialize mol for new chain or molecule
            if first_res:
                if ResName.lower() in Mol_types['ligand']:
                    mol = Ligand()
                else:
                    print "*** Unrecognized residue name: " + ResName + " ***.\n Cannot initialize Ligand object."
                    sys.exit(
                        "In file configstruc.py: Add missing residue name (" +
                        ResName + ") to ligand molecule in Mol_types")
                first_res = False

            mol.AddToResidue(atom=atm, occ=Occ, bfac=Bfac)
            mol.atmidx.append(AtomNumber)
            if Prev_res == 0:
                check_het = True
            Prev_res = ResNo
            Prev_chain = Chain
    if hetatm:  #If HETATM record was added; append the last hetero residue object to mol_data
        mol_data[Molecule.molid] = deepcopy(
            mol)  #copy mol object into dictionary

    if verbose:
        print "Number of molecules in input file: ", len(mol_data), "\n"
    #Update mol_data[molid].nor, mol_data[molid].resids, and check for chain breaks in non-ligand molecules
    for key in sorted(mol_data):
        if verbose:
            print "Molid:", key, "Molecule_Type:", mol_data[key].molecule_type(
            )
        if mol_data[key].molecule_type().lower() != 'ligand':
            mol_data[key].resids = sorted(mol_data[key].residue)
            mol_data[key].nor = len(mol_data[key].resids)
            #Check for chain breaks in protein
            resids_diff = numpy.array(mol_data[key].resids[1:]) - numpy.array(
                mol_data[key].resids[:-1])
            if mol_data[key].nor != (numpy.sum(resids_diff) + 1):
                break_indices = numpy.where(resids_diff > 1)
                print "Chain break encountered in molecule", key, "at residue positions: "
                for res in break_indices[0]:
                    print mol_data[key].resids[res],
                print "\n"
    return mol_data
Пример #12
0
import os
from parameter import Parameter
from ligand import Ligand

p = Parameter()

dir = p._('project.dir') + '/'
project_dir = dir + p._('project.wizard.create.name') + '/'
data_dir = project_dir + 'data/'

molecule = Ligand(data_dir + p._('project.wizard.ligand.db'))

columns = p._('project.wizard.ligand_report.columns')
molecule.foreachRecord(columns, lambda id, fields: print(id, fields, sep='\t'))

molecule.close()
Пример #13
0
import os
from parameter import Parameter
from ligand import Ligand

p = Parameter()

#locate tree
dir = p._('project.dir')
name = p._('project.wizard.create.name')
project_dir = dir + '/' + name
data_dir = project_dir + '/data'

filename = '{}/stage/{}'.format(project_dir, p._('project.wizard.load.ligand'))

#import data
molecule = Ligand(data_dir + '/' + p._('project.wizard.ligand.db'))
molecule.clear()

for id in open(filename):
    id = id.strip('\n')
    molecule.add(id)

molecule.commit()


# download ligand structures
def save(id, s, format, dir):
    filename = '{}/{}.{}'.format(dir, id, format)
    with open(filename, 'w') as file:
        file.write(s)
    print(id)
Пример #14
0
 def __init__(self, receptordb, liganddb):
     self.ligand = Ligand(liganddb)
     self.receptor = Receptor(receptordb)
Пример #15
0
def getSampleScores(protein, active):
    dict = Ligand.getSample(protein, active)
    return make_response(jsonify(dict))
Пример #16
0
def ga_search(args, gnn):
    # Create initial population.
    protein = load_protein_file(args.protein)
    population = [Ligand(smiles) for smiles in args.initpop.split(',')]
    population += [
        population[0].clone() for i in range(args.popsz - len(population))
    ]
    for ind in population[1:]:
        ind.mutate(args.mu)
    # Generation loop.
    print("Start genetic algorithm search (%d):" % args.gens)
    best = population[0]
    best_fit = WORST_ERROR
    images = []
    for gen in range(args.gens):
        # Stdout print.
        print("  Generation %d:" % gen)
        print("    PopSize:   %d" % len(population))
        # Compute fitness.
        tstart = time.time()
        fits = eval_inds(population, gnn, protein)
        tfend = time.time()
        viable = sum(fit != WORST_ERROR for fit in fits)
        # Viability selection.
        if viable is 0:
            print("    Population went extinct!")
            break
        print("    nViable:   %d" % viable)
        indexes = list(range(len(fits)))
        indexes.sort(key=fits.__getitem__)
        fits = list(map(fits.__getitem__, indexes))
        population = list(map(population.__getitem__, indexes))
        population = population[0:int(args.popsz * args.sigma)]
        elite, efits = ([], [])
        for ndx, ind in enumerate(population):
            if ind.smiles not in elite:
                elite.append(ind.smiles)
                efits.append(fits[ndx])
                if len(elite) == 3:
                    break
        population = [
            ind for i, ind in enumerate(population) if fits[i] != WORST_ERROR
        ]
        fits = fits[0:len(population)]
        # Some initial stats.
        if fits[0] < best_fit:
            best_fit = fits[0]
            best = population[0]
            if args.img:
                images.append("%s/best_%d.png" % (args.out, gen))
                best.write_image(images[-1])
                write_gif(images, "%s/best.gif" % (args.out))
        print("    Fitness:   %f" % (sum(fits) / len(fits)))
        print("    BestFit:   %f" % (best_fit))
        print("    Best:      %s" % (best.smiles))
        print("    Elite:")
        for ndx, ind in enumerate(elite):
            print("      (%.4f) %s" % (efits[ndx], ind))
        # Reproduction.
        opop = len(population)
        population.append(population[0].clone())
        while len(population) < args.popsz:
            # Crossover.
            indi = population[numpy.random.randint(0, opop)]
            indj = population[numpy.random.randint(0, opop)]
            inds = indi.mate(indj)
            # Mutation.
            for ind in inds:
                ind.mutate(args.mu)
                population.append(ind)
        # End of generation timing and print.
        tend = time.time()
        print("    Time:      %.2f s (fit %.3f, ga %.3f)" %
              (tend - tstart, tfend - tstart, tend - tfend))
    # Return best found.
    return best
Пример #17
0
from path import Path
from ligand import Ligand

p = Parameter()
pt = Path(p)

#locate tree
dir = p._('project.dir')
name = p._('project.wizard.create.name')
project_dir = dir + '/' + name
data_dir = project_dir + '/data'

filename = '{}/stage/{}'.format(project_dir, p._('project.wizard.load.ligand'))

#import data
molecule = Ligand(pt.liganddb)
molecule.clear()

for id in open(filename):
    id = id.strip('\n')
    molecule.add(id)

molecule.commit()


# download ligand structures
def instage(id, pt):
    return pt.filename('{}.pdb'.format(id), pt.stage, '*.pdb') != None


def copy(id, dir, pt, pids):