Exemplo n.º 1
0
    def __init__(self, folderName):
        self.basePath = os.path.join(FLAGS.workplace, folderName)
        self.tempFolderPath = os.path.join(self.basePath, 'temp')
        self.dockingFolderPath = os.path.join(self.basePath, 'dock')
        self.thread_num = FLAGS.thread_num
        self.process_num = FLAGS.process_num

        try_create_chain_folder(self.tempFolderPath)
        try_create_chain_folder(self.dockingFolderPath)
Exemplo n.º 2
0
    def __init__(self):
        self.parse()

        self.tempFolderPath = tempfile.mkdtemp()

        self.basePath = os.path.join(self.workplace, 'blast')
        self.formsPath = os.path.join(self.basePath, 'forms')
        self.err_log_file = os.path.join(self.basePath, 'err.log')
        self.mergedPath = os.path.join(self.basePath, 'merged_forms')

        try_create_chain_folder(self.tempFolderPath)
        try_create_chain_folder(self.formsPath)
Exemplo n.º 3
0
    def downloads(self,item):
        download_address = self.get_address(item)
        if os.path.exists(os.path.join(FLAGS.rowdata_folder,item+'.pdb')):
            print item," exists"
            return None
        print 'download ',item
        os.system('wget -P {}  {}'.format(FLAGS.rowdata_folder,download_address))

        pdbname = item.lower()
        ligand_folder = os.path.join(FLAGS.splited_ligand_folder,pdbname)
        try_create_chain_folder(ligand_folder)

        try:
            parsed = prody.parsePDB(os.path.join(FLAGS.rowdata_folder,item+'.pdb'))
        except:
            self.error_log('can not parse {}.\n'.format(item))
            return None
        
        hetero = parsed.select('(hetero and not water) or resname ATP or resname ADP or sesname AMP or resname GTP or resname GDP or resname GMP')
        receptor = parsed.select('protein or nucleic')
        if receptor is None:
            self.error_log("{} doesn't have receptor.\n".format(item))
            return None
        if hetero is None:
            self.error_log("{} doesn't have ligand.\n".format(item))
            return None
        ligand_flags = False
        for each in prody.HierView(hetero).iterResidues():
            if each.numAtoms() <= 10:
                continue
            else:
                ligand_flags = True
                ResId = each.getResindex()
                ligand_path = os.path.join(FLAGS.splited_ligand_folder,pdbname,"{}_{}_ligand.pdb".format(pdbname,ResId))
                try_create_chain_parent_folder(ligand_path)
                prody.writePDB(ligand_path,each)

        if ligand_flags:
            receptor_path = os.path.join(FLAGS.splited_receptor_folder,pdbname+'.pdb')
            prody.writePDB(receptor_path,receptor)
        else:
            self.error_log("{} doesn't convert, not ligand have more than 10 atoms.\n".format(item))
Exemplo n.º 4
0
    def __init__(self):
        self.parse()

        try_create_chain_folder(self.formsPath)
Exemplo n.º 5
0
def convert_database_to_av4(database_path,
                            positives_folder=None,
                            decoys_folder=None,
                            receptors_folder=None):
    """Crawls the folder (receptors in this case) and saves every PDB it finds
    into .npy array with 1) coordinates 2) mapped to the atom name number """

    # make a directory where the av4 form of the output will be written
    output_path = str(database_path + '_av4')
    if not os.path.exists(output_path):
        os.makedirs(output_path)

    def save_av4(filepath, labels, elements, multiframe_coords):
        labels = np.asarray(labels, dtype=np.int32)
        elements = np.asarray(elements, dtype=np.int32)
        multiframe_coords = np.asarray(multiframe_coords, dtype=np.float32)

        if not (int(len(multiframe_coords[:, 0]) == int(len(elements)))):
            raise Exception(
                'Number of atom elements is not equal to the number of coordinates'
            )

        if multiframe_coords.ndim == 2:
            if not int(len(labels)) == 1:
                raise Exception(
                    'Number labels is not equal to the number of coordinate frames'
                )
        else:
            if not (int(len(multiframe_coords[0, 0, :]) == int(len(labels)))):
                raise Exception(
                    'Number labels is not equal to the number of coordinate frames'
                )

        number_of_examples = np.array([len(labels)], dtype=np.int32)
        av4_record = number_of_examples.tobytes()
        av4_record += labels.tobytes()
        av4_record += elements.tobytes()
        av4_record += multiframe_coords.tobytes()
        f = open(filepath + ".av4", 'w')
        f.write(av4_record)
        f.close()

    count = 0
    database_ligand_path = os.path.join(database_path, 'actives')
    database_receptor_path = os.path.join(database_path, 'receptors')
    for receptor in os.listdir(database_ligand_path):
        for ligand_name in os.listdir(
                os.path.join(database_ligand_path, receptor)):
            count += 1

            destFile = os.path.join(output_path, receptor,
                                    ligand_name + ".av4")
            if os.path.exists(destFile):
                continue
            if FLAGS.orchestra_arrayjob and FLAGS.orchestra_jobid % FLAGS.orchestra_jobsize != count % FLAGS.orchestra_jobsize:
                continue
            ligand_folder = os.path.join(database_ligand_path, receptor,
                                         ligand_name)

            splited_ligands = os.listdir(ligand_folder)

            if len(splited_ligands) == 0:
                with open(os.path.join(database_path, 'empty.txt'),
                          'a') as fout:
                    fout.write(ligand_folder + '\n')
                continue

            path_to_receptor = os.path.join(database_receptor_path,
                                            receptor + '.pdb')
            path_to_first_ligand = os.path.join(ligand_folder,
                                                splited_ligands[0])

            try:
                prody_receptor = prody.parsePDB(path_to_receptor)
                prody_first_ligand = prody.parsePDB(path_to_first_ligand)

                multiframe_ligand_coords = prody_first_ligand.getCoords()
                # for decoys set all the label as 0
                #labels = np.array([0])
                # for actives set all the label as 1
                labels = np.array([1])

                # if have more than one ligands, write them as one multiframe ligand
                if len(splited_ligands) > 1:
                    for rest_ligand in splited_ligands[1:]:
                        prody_rest = prody.parsePDB(
                            os.path.join(ligand_folder, rest_ligand))

                        # see if decoy is same as the initial ligand
                        if not all(
                                np.asarray(prody_rest.getElements()) ==
                                np.asarray(prody_first_ligand.getElements())):
                            raise Exception(
                                'attempting to add ligand with different order of atoms'
                            )

                        multiframe_ligand_coords = np.dstack(
                            (multiframe_ligand_coords, prody_rest.getCoords()))
                        # for decoys set all the label as 0
                        #labels = np.concatenate((labels, [0]))
                        # for actives set all teh label as 1
                        labels = np.concatenate((labels, [1]))

            except Exception as e:
                print e
                stats.ligands_failed += 1
                print "ligands parsed:", stats.ligands_parsed, "ligands failed:", stats.ligands_failed
                continue

            stats.ligands_parsed += 1
            print "ligands parsed:", stats.ligands_parsed, "ligands failed:", stats.ligands_failed

            # create an output path to write binaries for protein and ligands
            path_to_pdb_subfolder = os.path.join(output_path, receptor)

            try_create_chain_folder(path_to_pdb_subfolder)

            # convert atomnames to tags and write the data to disk
            def atom_to_number(atomname):
                atomic_tag_number = atom_dictionary.ATM[atomname.lower()]
                return atomic_tag_number

            print prody_receptor.getElements()

            receptor_elements = map(atom_to_number,
                                    prody_receptor.getElements())
            ligand_elements = map(atom_to_number,
                                  prody_first_ligand.getElements())

            receptor_output_path = os.path.join(path_to_pdb_subfolder,
                                                receptor)
            save_av4(receptor_output_path, [0], receptor_elements,
                     prody_receptor.getCoords())
            ligand_output_path = os.path.join(path_to_pdb_subfolder,
                                              ligand_name)
            save_av4(ligand_output_path, labels, ligand_elements,
                     multiframe_ligand_coords)