Exemplo n.º 1
0
    def run(self):

        project_strucids = paf.get_strucids_from_project(self.protein)

        # get crystal names for protein according to db
        conn, c = dbf.connectDB()

        c.execute('select crystal_name from proasis_hits where protein=%s', (self.protein,))
        rows = c.fetchall()
        crystal_list = []

        for row in rows:
            crystal_list.append(str(row[0]))

        crystal_list = list(set(crystal_list))

        db_strucids = []
        # status_list = []
        # good_list = []

        file_checks = {'crystal': [], 'bound_state': [], 'mod_date': [], 'pdb': [], 'mtz': [], '2fofc': [], 'fofc': [],
                       'ligs': []}

        # get info for crystals identified
        for crystal in list(set(crystal_list)):

            c.execute(
                "select strucid, bound_conf, modification_date from proasis_hits "
                "where crystal_name like %s and strucid NOT LIKE ''",
                (crystal,))

            bound_list = []
            strucid_list = []
            mod_date_list = []

            rows = c.fetchall()

            for row in rows:
                strucid = str(row[0])
                strucid_list.append(strucid)

                bound_conf = str(row[1])
                bound_list.append(bound_conf)

                modification_date = str(row[2])
                mod_date_list.append(modification_date)

            unique_bound = list(set(bound_list))
            unique_modification_date = list(set(mod_date_list))
            unique_strucids = list(set(strucid_list))

            for ids in unique_strucids:
                db_strucids.append(ids)

            c.execute("select strucid from proasis_leads where protein=%s and strucid!=''", (self.protein,))
            rows = c.fetchall()

            for row in rows:
                db_strucids.append(str(row[0]))

        #     # get info about crystals in proasis_hits (ones identified as in refinement) that haven't made it to
        #     # proasis
        #
        #     if sum([len(unique_modification_date), len(unique_bound), len(unique_strucids)]) == 0:
        #
        #         c.execute(
        #             "select bound_conf, modification_date, exists_pdb, exists_mtz, exists_2fofc, exists_fofc, "
        #             "ligand_list from proasis_hits where crystal_name like %s",
        #             (crystal,))
        #         rows = c.fetchall()
        #
        #         for row in rows:
        #             file_checks['crystal'].append(crystal)
        #             file_checks['bound_state'].append(str(row[0]))
        #             file_checks['mod_date'].append(str(row[1]))
        #             file_checks['pdb'].append(str(row[2]))
        #             file_checks['mtz'].append(str(row[3]))
        #             file_checks['2fofc'].append(str(row[4]))
        #             file_checks['fofc'].append(str(row[5]))
        #             file_checks['ligs'].append(str(row[6]))

        # clear up mismatching entries
        in_common = list(set(db_strucids) & set(project_strucids))
        for strucid in db_strucids:
            if strucid not in in_common:
                print(self.protein + ': ' + strucid + ' found in database but not in proasis')
                print('removing entry from db...')
                c.execute("UPDATE proasis_hits set strucid='' where strucid=%s", (strucid,))
                conn.commit()
                c.execute("UPDATE proasis_leads set strucid='' where strucid=%s", (strucid,))
                conn.commit()
                print('\n')

        for strucid in project_strucids:
            if strucid not in in_common:
                print(self.protein + ': ' + strucid + ' found in proasis but not in db')
                print('removing entry from proasis...')
                paf.delete_structure(strucid)
                print('\n')


            # good_structures = {'crystal': [], 'bound_state': [], 'mod_date': [], 'strucid': []}
            # if len(set([len(unique_modification_date), len(unique_bound), len(unique_strucids)])) == 1:
            #     status_list.append(0)
            #     good_list.append(crystal)
            #     for i in range(0, len(unique_bound)):
            #         good_structures['crystal'].append(crystal)
            #         good_structures['bound_state'].append(unique_bound[i])
            #         good_structures['mod_date'].append(unique_modification_date[i])
            #         good_structures['strucid'].append(unique_strucids[i])

            # for i in range(0, len(good_structures['strucid'])):
            #     if good_structures['strucid'][i] not in project_strucids:
            #         print('missing or incorrect strucid in db for '
            #               + str(good_structures['crystal'][i] + ' (' + str(good_structures['strucid'][i]) + ')'))

        #         for key in file_checks.keys():
        #             if '0' in file_checks[key]:
        #                 error_list.append(str('missing ' + str(key) + ' file!'))
        #
        #             if 'None' in file_checks[key]:
        #                 error_list.append(str('None value found for ' + str(key)))
        #
        #     elif len(set([len(unique_modification_date), len(unique_bound), len(unique_strucids)])) > 1:
        #         status_list.append(1)
        #
        # error_frame = pd.DataFrame.from_dict(file_checks)
        # cols = ['crystal', 'bound_state', 'mod_date', 'ligs', 'mtz', 'pdb', '2fofc', 'fofc']
        # error_frame = error_frame[cols]
        # error_frame.sort_values(by=['crystal'], inplace=True)
        #
        # good_frame = pd.DataFrame.from_dict(good_structures)
        # cols = ['crystal', 'bound_state', 'mod_date', 'strucid']
        # good_frame = good_frame[cols]
        # good_frame.sort_values(by=['crystal'], inplace=True)
        #
        #

        # clean up repeats
        repeats = find_proasis_repeats(self.protein)
        for i, x in enumerate(repeats['crystal']):
            bound_list = repeats['bound_confs'][i]
            strucids = repeats['strucids'][i]

            if len(bound_list)==len(strucids):
                if len(list(set(bound_list)))==1:
                    print(str('identical uploaded structures: ' + str(strucids)) + ' (' + x + ')')
                    print('removing repeat structures from proasis, and updating database...')
                    to_delete_strucs=strucids[1:]
                    to_delete_confs=bound_list[1:]

                    for j in range(0, len(to_delete_strucs)):
                        c.execute('DELETE FROM proasis_hits WHERE strucid=%s and bound_conf=%s', (to_delete_strucs[j],
                                                                                                  to_delete_confs[j]))
                        conn.commit()
                        paf.delete_structure(to_delete_strucs[j])

        pd.DataFrame.from_dict(repeats).to_csv('test.csv')
Exemplo n.º 2
0
    def run(self):
        fail_count = 0
        # select anything 'in refinement' (3) or above
        refinement = Refinement.objects.filter(outcome__gte=3)
        # set up info for each entry that matches the filter
        for obj in refinement:
            # set up blank fields for entries in proasis hits table
            bound_conf = ''
            files = []
            mtz = ''
            two_fofc = ''
            fofc = ''
            mod_date = ''
            proasis_hit_entry = ''
            entry = ''
            confs = []
            ligand_list = []

            # if there is a pdb file named in the bound_conf field, use it as the upload structure for proasis
            if obj.bound_conf:
                if os.path.isfile(obj.bound_conf):
                    bound_conf = obj.bound_conf
            # otherwise, use the most recent pdb file (according to soakdb)
            elif obj.pdb_latest:
                if os.path.isfile(obj.pdb_latest):
                    # if this is from a refinement folder, find the bound-state pdb file, rather than the ensemble
                    if 'Refine' in obj.pdb_latest:
                        search_path = '/'.join(obj.pdb_latest.split('/')[:-1])
                        files = glob.glob(
                            str(search_path + '/refine*bound*.pdb'))
                        if len(files) == 1:
                            bound_conf = files[0]
                    else:
                        # if can't find bound state, just use the latest pdb file
                        bound_conf = obj.pdb_latest
            else:
                # no pdb = no proasis upload (same for mtz, two_fofc and fofc)
                # TODO: Turn this into a function instead of repeating file check
                fail_count += 1
                continue

            mtz = db_functions.check_file_status('refine.mtz', bound_conf)
            two_fofc = db_functions.check_file_status('2fofc.map', bound_conf)
            fofc = db_functions.check_file_status('fofc.map', bound_conf)

            if not mtz[0] or not two_fofc[0] or not fofc[0]:
                fail_count += 1
                continue

            # if a suitable pdb file is found, then search for ligands
            if bound_conf:
                try:
                    pdb_file = open(bound_conf, 'r')
                    ligand_list = []
                    for line in pdb_file:
                        if "LIG" in line:
                            try:
                                # ligands identified by 'LIG', with preceeding '.' for alt conf letter
                                lig_string = re.search(r".LIG.......",
                                                       line).group()
                                # just use lig string instead of separating into list items (to handle altconfs)
                                # TODO: This has changed from a list of ['LIG','RES','ID'] to string. Check usage
                                ligand_list.append(lig_string)
                            except:
                                continue
                # if no ligands are found in the pdb file, no upload to proasis (checked that no strucs. had alternative
                # labels in them)
                except:
                    ligand_list = None

            if not ligand_list:
                continue

            # get a unique list of ligands
            unique_ligands = list(set(ligand_list))
            # remove the first letter (alt conf) from unique ligands
            lig_no_conf = [l[1:] for l in unique_ligands]

            for l in lig_no_conf:
                # check whether there are more than 1 entries for any of the lig strings without alt conf
                if lig_no_conf.count(l) > 1:
                    # this is an alt conf situation - add the alt confs to the conf list
                    confs.extend([lig for lig in unique_ligands if l in lig])

            # get the date the pdb file was modified
            mod_date = misc_functions.get_mod_date(bound_conf)

            if mod_date:
                # if there's already an entry for that structure
                if ProasisHits.objects.filter(
                        refinement=obj,
                        crystal_name=obj.crystal_name).exists():
                    # if there are no alternate conformations
                    if not confs:
                        # get the relevant entry
                        entry = ProasisHits.objects.get(
                            refinement=obj, crystal_name=obj.crystal_name)
                        # if the pdb file is older than the current one, or it has not been uploaded to proasis
                        if entry.modification_date < mod_date or not entry.strucid:
                            # delete structure and remove files to remove from proasis is strucid exists
                            if entry.strucid:
                                proasis_api_funcs.delete_structure(
                                    entry.strucid)
                                entry.strucid = None
                                entry.save()
                                if self.hit_directory in entry.pdb_file:
                                    os.remove(entry.pdb_file)
                                if self.hit_directory in entry.mtz:
                                    os.remove(entry.mtz)
                                if self.hit_directory in entry.two_fofc:
                                    os.remove(entry.two_fofc)
                                if self.hit_directory in entry.fofc:
                                    os.remove(entry.fofc)
                            # otherwise, just update the relevant fields
                            entry.pdb_file = bound_conf
                            entry.modification_date = mod_date
                            entry.mtz = mtz[1]
                            entry.two_fofc = two_fofc[1]
                            entry.fofc = fofc[1]
                            entry.ligand_list = unique_ligands
                            entry.save()
                    # if there ARE alternate conformations
                    else:
                        # for each conformation
                        for conf in confs:
                            # do the same as above, but setting the altconf field too
                            # TODO: functionalise to add altconfs and not repeat method
                            entry = ProasisHits.objects.get(
                                refinement=obj,
                                crystal_name=obj.crystal_name,
                                altconf=conf)
                            if entry.modification_date < mod_date or not entry.strucid:
                                if entry.strucid:
                                    proasis_api_funcs.delete_structure(
                                        entry.strucid)
                                    entry.strucid = None
                                    entry.save()
                                    if self.hit_directory in entry.pdb_file:
                                        os.remove(entry.pdb_file)
                                    if self.hit_directory in entry.mtz:
                                        os.remove(entry.mtz)
                                    if self.hit_directory in entry.two_fofc:
                                        os.remove(entry.two_fofc)
                                    if self.hit_directory in entry.fofc:
                                        os.remove(entry.fofc)

                                entry.pdb_file = bound_conf
                                entry.modification_date = mod_date
                                entry.mtz = mtz[1]
                                entry.two_fofc = two_fofc[1]
                                entry.fofc = fofc[1]
                                entry.ligand_list = unique_ligands
                                entry.altconf = conf
                                entry.save()

                # if there's not already an entry for that structure
                else:
                    # if no altconfs
                    if not confs:
                        # create entry without an altconf
                        proasis_hit_entry = ProasisHits.objects.get_or_create(
                            refinement=obj,
                            crystal_name=obj.crystal_name,
                            pdb_file=bound_conf,
                            modification_date=mod_date,
                            mtz=mtz[1],
                            two_fofc=two_fofc[1],
                            fofc=fofc[1],
                            ligand_list=unique_ligands)
                    # if altconfs
                    if confs:
                        for conf in confs:
                            # create an entry for each altconf
                            # TODO: The pdb file will need to be edited later to pull out other altconfs of the same lig

                            proasis_hit_entry = ProasisHits.objects.get_or_create(
                                refinement=obj,
                                crystal_name=obj.crystal_name,
                                pdb_file=bound_conf,
                                modification_date=mod_date,
                                mtz=mtz[1],
                                two_fofc=two_fofc[1],
                                fofc=fofc[1],
                                ligand_list=unique_ligands,
                                altconf=conf)

                dimple = Dimple.objects.filter(crystal_name=obj.crystal_name)
                if dimple.count() == 1:
                    if dimple[0].reference and dimple[
                            0].reference.reference_pdb:
                        if os.path.isfile(dimple[0].reference.reference_pdb):
                            proasis_lead_entry = ProasisLeads.objects.get_or_create(
                                reference_pdb=dimple[0].reference)
                        else:
                            if ProasisLeads.objects.filter(
                                    reference_pdb=dimple[0].reference).exists(
                                    ):
                                print('removing...')
                                proasis_lead_entry = ProasisLeads.objects.get(
                                    reference_pdb=dimple[0].reference)
                                proasis_lead_entry.delete()

        print(fail_count)

        with self.output().open('w') as f:
            f.write('')