Python Ligand примеры использования

Язык программирования: Python

Пространство имен/Пакет: ligand.models

Класс/Тип: Ligand

Примеров на hotexamples.com: 13

Python Ligand - 13 примеров найдено. Это лучшие примеры Python кода для ligand.models.Ligand, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Ligand(8)

save(7)

canonical(6)

name(6)

properities(6)

load_by_name(4)

ambigious_alias(3)

load_from_pubchem(3)

load_by_gtop_id(1)

pdbe(1)

Пример #1

Показать файл

Файл: upload_excel_bias.py Проект: magdchat/protwis

    def create_empty_ligand(self, ligand_name):
        # gtoplig webresource
        lp = self.build_ligand_properties()
        ligand = Ligand()
        ligand.properities = lp
        ligand.name = ligand_name
        ligand.canonical = True
        ligand.ambigious_alias = False
        ligand.pdbe = None
        try:
            ligand.save()
        except IntegrityError:
            self.logger.info("empty ligand found")
            return Ligand.objects.get(name=ligand_name, canonical=True)

        return ligand

Пример #2

Показать файл

 def chose_reference_from_assays(self, assays):
     references = list()
     final_assay = list()
     reference_ligand = Ligand()
     for i in reversed(assays):
         if (i['quantitive_activity'] and i['quantitive_activity'] is not None and
             i['quantitive_efficacy'] and i['quantitive_efficacy'] is not None and
             i['ligand'] is not None):
             reference_ligand=i['ligand']
     reference_return = assays.copy()
     assay_return = assays.copy()
     references=self.filter_reference_assay(reference_return,reference_ligand)
     final_assay=self.filter_assay_reference(assay_return,reference_ligand)
     self.logger.info('return reference assay')
     return references, final_assay

Пример #3

Показать файл

def get_or_make_ligand(ligand_id, type_id, name=None):
    if type_id == 'PubChem CID' or type_id == 'SMILES':
        if type_id == 'PubChem CID':
            pubchem_lookup_value = 'cid'
        elif type_id == 'SMILES':
            pubchem_lookup_value = 'smiles'

        try:
            web_resource = WebResource.objects.get(slug='pubchem')
        except:
            # abort if pdb resource is not found
            raise Exception('PubChem resource not found, aborting!')
        if name:
            ligand_name = name
        else:
            ligand_name = False

        try:
            # if this name is canonical and it has a ligand record already
            if (ligand_name == False):

                l = None
                ls = Ligand.objects.filter(
                    canonical=True,
                    properities__web_links__web_resource=web_resource,
                    properities__web_links__index=ligand_id)

                for ligand in ls:
                    l = ligand
                    #print (l)
                    break
                if l == None:
                    l = Ligand.objects.get(
                        canonical=True,
                        properities__web_links__web_resource=web_resource,
                        properities__web_links__index=ligand_id)

            else:
                l = Ligand.objects.get(
                    name=ligand_name,
                    canonical=True,
                    properities__web_links__web_resource=web_resource,
                    properities__web_links__index=ligand_id)

            #l = Ligand.objects.get(name=ligand_name, canonical=True,
            #    properities__web_links__web_resource=web_resource,
            #    properities__web_links__index=ligand_id)
            #
        except Ligand.DoesNotExist:
            try:
                # if exists under different name
                l_canonical = Ligand.objects.get(
                    properities__web_links__web_resource=web_resource,
                    properities__web_links__index=ligand_id,
                    canonical=True)
                #print (created)
                try:
                    l, created = Ligand.objects.get_or_create(
                        properities=l_canonical.properities,
                        name=ligand_name,
                        canonical=False)
                except IntegrityError:
                    l = Ligand.objects.get(properities=l_canonical.properities,
                                           name=ligand_name,
                                           canonical=False)
            except Ligand.DoesNotExist:
                # fetch ligand from pubchem
                default_ligand_type = 'Small molecule'
                lt, created = LigandType.objects.get_or_create(
                    slug=slugify(default_ligand_type),
                    defaults={'name': default_ligand_type})
                l = Ligand()
                #print (ligand_name)
                l = l.load_from_pubchem(pubchem_lookup_value, ligand_id, lt,
                                        ligand_name)
                #print (l)
                if l == None and type_id == 'SMILES':  #insert manually if smiles and unfound in pubchem
                    try:
                        l = Ligand.objects.get(name=ligand_name,
                                               canonical=True,
                                               properities__smiles=ligand_id)
                    except Ligand.DoesNotExist:
                        try:
                            l = Ligand.objects.get(
                                name__startswith=ligand_name,
                                canonical=True,
                                properities__smiles=ligand_id
                            )  #if no properities exist
                        except Ligand.DoesNotExist:
                            try:
                                l = Ligand.objects.get(
                                    name=ligand_name,
                                    canonical=True,
                                    properities__smiles=None
                                )  #if no properities exist
                                l.properities.smiles = ligand_id
                                l.properities.save()
                                l.save()
                            except Ligand.DoesNotExist:
                                ## now insert a new ligand, but first make sure name is unique
                                if Ligand.objects.filter(
                                        name=ligand_name).exists():
                                    ls = Ligand.objects.filter(
                                        name__startswith=ligand_name,
                                        canonical=True).order_by("pk")
                                    for l_temp in ls:
                                        last = l_temp.name.split("_")[-1]
                                    if last == ligand_name:  #no addition yet
                                        ligand_name = ligand_name + "_1"
                                    else:
                                        ligand_name = ligand_name + "_" + str(
                                            int(last) + 1)
                                l = Ligand()
                                l.name = ligand_name
                                lp = LigandProperities()
                                lp.smiles = ligand_id
                                lp.ligand_type = lt
                                lp.save()
                                l.properities = lp
                                l.canonical = True  #maybe false, but that would break stuff.
                                l.ambigious_alias = False
                                try:
                                    l.save()
                                except IntegrityError:
                                    l = Ligand.objects.get(name=ligand_name,
                                                           canonical=True)

    elif name:

        # if this name is canonical and it has a ligand record already
        if Ligand.objects.filter(name=name, canonical=True).exists():
            l = Ligand.objects.get(name=name, canonical=True)

        # if this matches an alias that only has "one" parent canonical name - eg distinct
        elif Ligand.objects.filter(name=name,
                                   canonical=False,
                                   ambigious_alias=False).exists():
            l = Ligand.objects.get(name=name,
                                   canonical=False,
                                   ambigious_alias=False)

        # if this matches an alias that only has several canonical parents, must investigate, start
        # with empty.
        elif Ligand.objects.filter(name=name,
                                   canonical=False,
                                   ambigious_alias=True).exists():
            lp = LigandProperities()
            lp.save()
            l = Ligand()
            l.properities = lp
            l.name = name
            l.canonical = False
            l.ambigious_alias = True
            l.save()
            l.load_by_name(name)

        # if neither a canonical or alias exists, create the records. Remember to check for
        # canonical / alias status.
        else:
            lp = LigandProperities()
            lp.save()
            l = Ligand()
            l.properities = lp
            l.name = str(name)
            l.canonical = True
            l.ambigious_alias = False
            try:
                l.save()
                l.load_by_name(str(name))
            except IntegrityError:
                l = Ligand.objects.get(name=str(name), canonical=True)
    else:
        l = None

    return l

Пример #4

Показать файл

Файл: build_mutant_data.py Проект: GPCRmd/GPCRmd

    def main_func(self, positions, iteration):
        # filenames
        if not positions[1]:
            rows = self.data[positions[0]:]
        else:
            rows = self.data[positions[0]:positions[1]]

        missing_proteins = {}
        mutants_for_proteins = {}

        c = 0
        skipped = 0
        inserted = 0
        bulk_m = []
        bulk_r = []
        current_sheet = time.time()

        for r in rows:
            # print(source_file,c)
            # PRINT IF ERRORS OCCUR
            # self.logger.info('File '+str(r['source_file'])+' number '+str(c))
            current = time.time()
            c += 1
            # if c%100==0:
            #     self.logger.info('Parsed '+str(c)+' mutant data entries')

            # publication
            try:  #fix if it thinks it's float.
                float(r['reference'])
                r['reference'] = str(int(r['reference']))
                float(r['review'])
                r['review'] = str(int(r['review']))
            except ValueError:
                pass

            if r['reference'].isdigit():  #assume pubmed
                pub_type = 'pubmed'
            else:  #assume doi
                pub_type = 'doi'

            if r['reference'] not in self.publication_cache:
                try:
                    pub = Publication.objects.get(
                        web_link__index=r['reference'],
                        web_link__web_resource__slug=pub_type)
                except Publication.DoesNotExist:
                    pub = Publication()
                    try:
                        pub.web_link = WebLink.objects.get(
                            index=r['reference'], web_resource__slug=pub_type)
                    except WebLink.DoesNotExist:
                        wl = WebLink.objects.create(
                            index=r['reference'],
                            web_resource=WebResource.objects.get(
                                slug=pub_type))
                        pub.web_link = wl

                    if pub_type == 'doi':
                        pub.update_from_doi(doi=r['reference'])
                    elif pub_type == 'pubmed':
                        pub.update_from_pubmed_data(index=r['reference'])
                    try:
                        pub.save()
                    except:
                        self.logger.error('error with reference ' +
                                          str(r['reference']) + ' ' + pub_type)
                        continue  #if something off with publication, skip.
                self.publication_cache[r['reference']] = pub
            else:
                pub = self.publication_cache[r['reference']]

            # print(r['review'],r['reference'])
            if r['review'].isdigit():  #assume pubmed
                pub_type = 'pubmed'
            else:  #assume doi
                pub_type = 'doi'

            # print(r['review'],pub_type)
            if r['review']:
                if r['review'] not in self.publication_cache:
                    try:
                        pub_review = Publication.objects.get(
                            web_link__index=r['review'],
                            web_link__web_resource__slug=pub_type)
                    except Publication.DoesNotExist:
                        pub_review = Publication()
                        try:
                            pub_review.web_link = WebLink.objects.get(
                                index=r['review'], web_resource__slug=pub_type)
                        except WebLink.DoesNotExist:
                            wl = WebLink.objects.create(
                                index=r['review'],
                                web_resource=WebResource.objects.get(
                                    slug=pub_type))
                            pub_review.web_link = wl

                        if pub_type == 'doi':
                            pub_review.update_from_doi(doi=r['review'])
                        elif pub_type == 'pubmed':
                            pub_review.update_from_pubmed_data(
                                index=r['review'])
                        try:
                            pub_review.save()
                        except:
                            self.logger.error('error with review ' +
                                              str(r['review']) + ' ' +
                                              pub_type)
                            continue  #if something off with publication, skip.
                        self.publication_cache[r['review']] = pub_review
                else:
                    pub_review = self.publication_cache[r['review']]
            else:
                pub_review = None

            l = None
            if str(r['ligand_name']) in self.ligand_cache:
                if r['ligand_id'] in self.ligand_cache[str(r['ligand_name'])]:
                    l = self.ligand_cache[str(
                        r['ligand_name'])][r['ligand_id']]
            else:
                self.ligand_cache[str(r['ligand_name'])] = {}

            if not l:
                l = get_or_make_ligand(r['ligand_id'], r['ligand_type'],
                                       str(r['ligand_name']))
                self.ligand_cache[str(r['ligand_name'])][r['ligand_id']] = l

            l_ref = None
            if str(r['exp_mu_ligand_ref']) in self.ref_ligand_cache:
                l_ref = self.ref_ligand_cache[str(r['exp_mu_ligand_ref'])]
            else:
                if Ligand.objects.filter(
                        name=r['exp_mu_ligand_ref'], canonical=True
                ).exists(
                ):  #if this name is canonical and it has a ligand record already
                    l_ref = Ligand.objects.get(name=r['exp_mu_ligand_ref'],
                                               canonical=True)
                elif Ligand.objects.filter(
                        name=r['exp_mu_ligand_ref'],
                        canonical=False,
                        ambigious_alias=False
                ).exists(
                ):  #if this matches an alias that only has "one" parent canonical name - eg distinct
                    l_ref = Ligand.objects.get(name=r['exp_mu_ligand_ref'],
                                               canonical=False,
                                               ambigious_alias=False)
                elif Ligand.objects.filter(
                        name=r['exp_mu_ligand_ref'],
                        canonical=False,
                        ambigious_alias=True
                ).exists(
                ):  #if this matches an alias that only has several canonical parents, must investigate, start with empty.
                    lp = LigandProperities()
                    lp.save()
                    l_ref = Ligand()
                    l_ref.properities = lp
                    l_ref.name = r['exp_mu_ligand_ref']
                    l_ref.canonical = False
                    l_ref.ambigious_alias = True
                    l_ref.save()
                    l_ref.load_by_name(r['exp_mu_ligand_ref'])
                    l_ref.save()
                elif Ligand.objects.filter(name=r['exp_mu_ligand_ref'],
                                           canonical=False).exists(
                                           ):  #amigious_alias not specified
                    l_ref = Ligand.objects.get(name=r['exp_mu_ligand_ref'],
                                               canonical=False)
                    l_ref.ambigious_alias = False
                    l_ref.save()
                elif r['exp_mu_ligand_ref']:  #if neither a canonical or alias exists, create the records. Remember to check for canonical / alias status.
                    lp = LigandProperities()
                    lp.save()
                    l_ref = Ligand()
                    l_ref.properities = lp
                    l_ref.name = r['exp_mu_ligand_ref']
                    l_ref.canonical = True
                    l_ref.ambigious_alias = False
                    l_ref.save()
                    l_ref.load_by_name(r['exp_mu_ligand_ref'])
                    try:
                        l_ref.save()
                    except IntegrityError:
                        l_ref = Ligand.objects.get(name=r['exp_mu_ligand_ref'],
                                                   canonical=False)
                        print("error failing ligand, duplicate?")
                        # logger.error("FAILED SAVING LIGAND, duplicate?")
                else:
                    l_ref = None
                self.ref_ligand_cache[str(r['exp_mu_ligand_ref'])] = l_ref

            protein_id = 0
            residue_id = 0

            protein = Protein.objects.filter(entry_name=r['protein'])
            if protein.exists():
                protein = protein.get()
                if r['protein'] in mutants_for_proteins:
                    mutants_for_proteins[r['protein']] += 1
                else:
                    mutants_for_proteins[r['protein']] = 1

            else:
                skipped += 1
                if r['protein'] in missing_proteins:
                    missing_proteins[r['protein']] += 1
                else:
                    missing_proteins[r['protein']] = 1
                    self.logger.error('Skipped due to no protein ' +
                                      r['protein'])
                continue

            res = Residue.objects.filter(
                protein_conformation__protein=protein,
                amino_acid=r['mutation_from'],
                sequence_number=r['mutation_pos'])  #FIXME MAKE AA CHECK
            if res.exists():
                res = res.get()
            else:
                self.logger.error('Skipped due to no residue or mismatch AA ' +
                                  r['protein'] + ' pos:' +
                                  str(r['mutation_pos']) + ' AA:' +
                                  r['mutation_from'])
                skipped += 1
                continue

            if r['ligand_class']:
                l_role, created = LigandRole.objects.get_or_create(
                    name=r['ligand_class'],
                    defaults={'slug': slugify(r['ligand_class'])[:50]
                              })  # FIXME this should not be needed
            else:
                l_role = None

            if r['exp_type']:
                exp_type_id, created = MutationExperimentalType.objects.get_or_create(
                    type=r['exp_type'])
            else:
                exp_type_id = None

            if r['exp_func']:
                exp_func_id, created = MutationFunc.objects.get_or_create(
                    func=r['exp_func'])
            else:
                exp_func_id = None

            if r['exp_mu_effect_ligand_prop'] or r['exp_mu_effect_qual']:
                exp_qual_id, created = MutationQual.objects.get_or_create(
                    qual=r['exp_mu_effect_qual'],
                    prop=r['exp_mu_effect_ligand_prop'])
            else:
                exp_qual_id = None

            if r['opt_type'] or r['opt_wt'] or r['opt_mu'] or r[
                    'opt_sign'] or r['opt_percentage'] or r['opt_qual'] or r[
                        'opt_agonist']:
                exp_opt_id, created = MutationOptional.objects.get_or_create(
                    type=r['opt_type'],
                    wt=r['opt_wt'],
                    mu=r['opt_mu'],
                    sign=r['opt_sign'],
                    percentage=r['opt_percentage'],
                    qual=r['opt_qual'],
                    agonist=r['opt_agonist'])
            else:
                exp_opt_id = None

            try:
                mutation, created = Mutation.objects.get_or_create(
                    amino_acid=r['mutation_to'], protein=protein, residue=res)
            except IntegrityError:
                mutation = Mutation.objects.get(amino_acid=r['mutation_to'],
                                                protein=protein,
                                                residue=res)
            logtypes = ['pEC50', 'pIC50', 'pK']

            foldchange = 0
            typefold = ''
            if r['exp_wt_value'] != 0 and r[
                    'exp_mu_value_raw'] != 0:  #fix for new format

                if re.match("(" + ")|(".join(logtypes) + ")",
                            r['exp_type']):  #-log values!
                    foldchange = round(
                        math.pow(10, -r['exp_mu_value_raw']) /
                        pow(10, -r['exp_wt_value']), 3)
                    typefold = r['exp_type'] + "_log"
                else:
                    foldchange = round(
                        r['exp_mu_value_raw'] / r['exp_wt_value'], 3)
                    typefold = r['exp_type'] + "_not_log"

                if foldchange < 1 and foldchange != 0:
                    foldchange = -round((1 / foldchange), 3)
            elif r['fold_effect'] != 0:
                foldchange = round(r['fold_effect'], 3)
                if foldchange < 1: foldchange = -round((1 / foldchange), 3)

            raw_experiment = self.insert_raw(r)
            bulk = MutationExperiment(
                refs=pub,
                review=pub_review,
                protein=protein,
                residue=res,
                ligand=l,
                ligand_role=l_role,
                ligand_ref=l_ref,
                #raw = raw_experiment, #raw_experiment, OR None
                optional=exp_opt_id,
                exp_type=exp_type_id,
                exp_func=exp_func_id,
                exp_qual=exp_qual_id,
                mutation=mutation,
                wt_value=r['exp_wt_value'],  #
                wt_unit=r['exp_wt_unit'],
                mu_value=r['exp_mu_value_raw'],
                mu_sign=r['exp_mu_effect_sign'],
                foldchange=foldchange)
            # mut_id = obj.id
            bulk_r.append(raw_experiment)
            bulk_m.append(bulk)
            inserted += 1
            end = time.time()
            diff = round(end - current, 2)
            #print(diff)

        self.logger.info('Parsed ' + str(c) +
                         ' mutant data entries. Skipped ' + str(skipped))

        current = time.time()

        raws = MutationRaw.objects.bulk_create(bulk_r)
        for i, me in enumerate(bulk_m):
            me.raw = raws[i]
        MutationExperiment.objects.bulk_create(bulk_m)
        end = time.time()
        diff = round(end - current, 2)
        current_sheet
        diff_2 = round(end - current_sheet, 2)
        print("overall", diff_2, "bulk", diff, len(bulk_m), "skipped",
              str(skipped))

Пример #5

Показать файл

    def main_func(self, positions, iteration, count, lock):
        # filenames
        # if not positions[1]:
        #     rows = self.data[positions[0]:]
        # else:
        #     rows = self.data[positions[0]:positions[1]]

        missing_proteins = {}
        mutants_for_proteins = {}
        wrong_uniport_ids = {}

        c = 0
        skipped = 0
        inserted = 0
        bulk_m = []
        bulk_r = []
        current_sheet = time.time()

        rows = self.data_all
        while count.value < len(rows):
            with lock:
                r = rows[count.value]
                count.value += 1
        # for r in rows:
        # print(r['source_file'],c)
        # PRINT IF ERRORS OCCUR
        #self.logger.info('File '+str(r['source_file'])+' number '+str(c))
            current = time.time()
            c += 1
            # if c%100==0:
            #     self.logger.info('Parsed '+str(c)+' mutant data entries')

            # publication
            try:  #fix if it thinks it's float.
                float(r['reference'])
                r['reference'] = str(int(r['reference']))
                float(r['review'])
                r['review'] = str(int(r['review']))
            except ValueError:
                pass

            if r['reference'].isdigit():  #assume pubmed
                pub_type = 'pubmed'
            else:  #assume doi
                pub_type = 'doi'

            if r['reference'] not in self.publication_cache:
                try:
                    wl = WebLink.objects.get(index=r['reference'],
                                             web_resource__slug=pub_type)
                except WebLink.DoesNotExist:
                    try:
                        wl = WebLink.objects.create(
                            index=r['reference'],
                            web_resource=WebResource.objects.get(
                                slug=pub_type))
                    except IntegrityError:
                        wl = WebLink.objects.get(index=r['reference'],
                                                 web_resource__slug=pub_type)

                try:
                    pub = Publication.objects.get(web_link=wl)
                except Publication.DoesNotExist:
                    pub = Publication()
                    try:
                        pub.web_link = wl
                        pub.save()
                    except IntegrityError:
                        pub = Publication.objects.get(web_link=wl)

                    if pub_type == 'doi':
                        pub.update_from_doi(doi=r['reference'])
                    elif pub_type == 'pubmed':
                        pub.update_from_pubmed_data(index=r['reference'])
                    try:
                        pub.save()
                    except:
                        self.logger.error('error with reference ' +
                                          str(r['reference']) + ' ' + pub_type)
                        continue  #if something off with publication, skip.
                self.publication_cache[r['reference']] = pub
            else:
                pub = self.publication_cache[r['reference']]

            # print(r['review'],r['reference'])
            if r['review'].isdigit():  #assume pubmed
                pub_type = 'pubmed'
            elif r['review'].startswith('http'):
                pub_type = 'raw_link'
            else:  #assume doi
                pub_type = 'doi'

            # print(r['review'],pub_type)
            if r['review']:
                if r['review'] not in self.publication_cache:
                    try:
                        wl = WebLink.objects.get(index=r['review'],
                                                 web_resource__slug=pub_type)
                    except WebLink.DoesNotExist:
                        try:
                            wl = WebLink.objects.create(
                                index=r['review'],
                                web_resource=WebResource.objects.get(
                                    slug=pub_type))
                        except IntegrityError:
                            wl = WebLink.objects.get(
                                index=r['review'], web_resource__slug=pub_type)

                    try:
                        pub_review = Publication.objects.get(web_link=wl)
                    except Publication.DoesNotExist:
                        pub_review = Publication()
                        try:
                            pub_review.web_link = wl
                            pub_review.save()
                        except IntegrityError:
                            pub_review = Publication.objects.get(web_link=wl)

                        if pub_type == 'doi':
                            pub_review.update_from_doi(doi=r['review'])
                        elif pub_type == 'pubmed':
                            pub_review.update_from_pubmed_data(
                                index=r['review'])
                        try:
                            pub_review.save()
                        except:
                            self.logger.error('error with review ' +
                                              str(r['review']) + ' ' +
                                              pub_type)
                            continue  #if something off with publication, skip.
                        self.publication_cache[r['review']] = pub_review
                else:
                    pub_review = self.publication_cache[r['review']]
            else:
                pub_review = None

            l = None
            if str(r['ligand_name']) in self.ligand_cache:
                if r['ligand_id'] in self.ligand_cache[str(r['ligand_name'])]:
                    l = self.ligand_cache[str(
                        r['ligand_name'])][r['ligand_id']]
            else:
                self.ligand_cache[str(r['ligand_name'])] = {}

            if not l:
                try:
                    l = get_or_make_ligand(r['ligand_id'], r['ligand_type'],
                                           str(r['ligand_name']))
                except Exception as msg:
                    print(
                        'Something errored with ligand, aborting entry of mutation',
                        r['ligand_name'], r['ligand_type'], r['ligand_id'],
                        r['source_file'])
                    print(msg)
                    traceback.print_exc()
                    continue
                self.ligand_cache[str(r['ligand_name'])][r['ligand_id']] = l

            l_ref = None
            if str(r['exp_mu_ligand_ref']) in self.ref_ligand_cache:
                l_ref = self.ref_ligand_cache[str(r['exp_mu_ligand_ref'])]
            else:
                if Ligand.objects.filter(
                        name=r['exp_mu_ligand_ref'], canonical=True
                ).exists(
                ):  #if this name is canonical and it has a ligand record already
                    l_ref = Ligand.objects.get(name=r['exp_mu_ligand_ref'],
                                               canonical=True)
                elif Ligand.objects.filter(
                        name=r['exp_mu_ligand_ref'],
                        canonical=False,
                        ambigious_alias=False
                ).exists(
                ):  #if this matches an alias that only has "one" parent canonical name - eg distinct
                    l_ref = Ligand.objects.get(name=r['exp_mu_ligand_ref'],
                                               canonical=False,
                                               ambigious_alias=False)
                elif Ligand.objects.filter(
                        name=r['exp_mu_ligand_ref'],
                        canonical=False,
                        ambigious_alias=True
                ).exists(
                ):  #if this matches an alias that only has several canonical parents, must investigate, start with empty.
                    lp = LigandProperities()
                    lp.save()
                    l_ref = Ligand()
                    l_ref.properities = lp
                    l_ref.name = r['exp_mu_ligand_ref']
                    l_ref.canonical = False
                    l_ref.ambigious_alias = True
                    l_ref.save()
                    l_ref.load_by_name(r['exp_mu_ligand_ref'])
                    l_ref.save()
                elif Ligand.objects.filter(name=r['exp_mu_ligand_ref'],
                                           canonical=False).exists(
                                           ):  #amigious_alias not specified
                    l_ref = Ligand.objects.get(name=r['exp_mu_ligand_ref'],
                                               canonical=False)
                    l_ref.ambigious_alias = False
                    l_ref.save()
                elif r['exp_mu_ligand_ref']:  #if neither a canonical or alias exists, create the records. Remember to check for canonical / alias status.
                    lp = LigandProperities()
                    lp.save()
                    l_ref = Ligand()
                    l_ref.properities = lp
                    l_ref.name = r['exp_mu_ligand_ref']
                    l_ref.canonical = True
                    l_ref.ambigious_alias = False
                    try:
                        l_ref.save()
                        l_ref.load_by_name(r['exp_mu_ligand_ref'])
                    except IntegrityError:
                        if Ligand.objects.filter(name=r['exp_mu_ligand_ref'],
                                                 canonical=True).exists():
                            l_ref = Ligand.objects.get(
                                name=r['exp_mu_ligand_ref'], canonical=True)
                        else:
                            l_ref = Ligand.objects.get(
                                name=r['exp_mu_ligand_ref'], canonical=False)
                        # print("error failing ligand, duplicate?")
                    try:
                        l_ref.save()
                    except IntegrityError:
                        l_ref = Ligand.objects.get(name=r['exp_mu_ligand_ref'],
                                                   canonical=False)
                        # print("error failing ligand, duplicate?")
                        # logger.error("FAILED SAVING LIGAND, duplicate?")
                else:
                    l_ref = None
                self.ref_ligand_cache[str(r['exp_mu_ligand_ref'])] = l_ref

            protein_id = 0
            residue_id = 0

            protein = Protein.objects.filter(entry_name=r['protein'])
            if protein.exists():
                protein = protein.get()
                if r['protein'] in mutants_for_proteins:
                    mutants_for_proteins[r['protein']] += 1
                else:
                    mutants_for_proteins[r['protein']] = 1

            elif r['protein'] not in missing_proteins:

                try:
                    r['protein'] = wrong_uniport_ids[r['protein']]
                    real_uniprot = wrong_uniport_ids[r['protein']]
                    protein = Protein.objects.get(entry_name=r['protein'])
                    # print('fetched with lookup table',r['protein'])
                except:
                    # look for it as uniprot
                    protein = Protein.objects.filter(
                        web_links__web_resource__slug='uniprot',
                        web_links__index=r['protein'].upper())
                    if protein.exists():
                        protein = protein.get()
                        real_uniprot = protein.entry_name
                        if r['protein'] in mutants_for_proteins:
                            mutants_for_proteins[r['protein']] += 1
                        else:
                            mutants_for_proteins[r['protein']] = 1
                    else:
                        # Try to lookup in uniprot to catch typing errors / variants in entry_name
                        url = 'http://www.uniprot.org/uniprot/$index.xml'
                        cache_dir = ['uniprot', 'id']
                        uniprot_protein = fetch_from_web_api(url,
                                                             r['protein'],
                                                             cache_dir,
                                                             xml=True)
                        try:
                            real_uniprot = uniprot_protein.find(
                                './/{http://uniprot.org/uniprot}name'
                            ).text.lower()
                            protein = Protein.objects.get(
                                entry_name=real_uniprot)
                        except:
                            skipped += 1
                            if r['protein'] in missing_proteins:
                                missing_proteins[r['protein']] += 1
                            else:
                                missing_proteins[r['protein']] = 1
                                # print('Skipped due to no protein '+ r['protein'])
                                self.logger.error(
                                    'Skipped due to no protein ' +
                                    r['protein'])
                            continue
                    wrong_uniport_ids[r['protein']] = protein.entry_name
                    r['protein'] = real_uniprot
            else:
                missing_proteins[r['protein']] += 1
                continue

            res = Residue.objects.filter(
                protein_conformation__protein=protein,
                amino_acid=r['mutation_from'],
                sequence_number=r['mutation_pos'])  #FIXME MAKE AA CHECK
            if res.exists():
                res = res.get()
            else:
                self.logger.error('Skipped due to no residue or mismatch AA ' +
                                  r['protein'] + ' pos:' +
                                  str(r['mutation_pos']) + ' AA:' +
                                  r['mutation_from'])
                # print('Skipped due to no residue or mismatch AA ' + r['protein'] + ' pos:'+str(r['mutation_pos']) + ' AA:'+r['mutation_from'],r['source_file'])
                skipped += 1
                continue

            if r['ligand_class']:
                try:
                    l_role, created = LigandRole.objects.get_or_create(
                        name=r['ligand_class'],
                        defaults={'slug': slugify(r['ligand_class'])[:50]
                                  })  # FIXME this should not be needed
                except Exception as e:
                    if LigandRole.objects.filter(
                            slug=slugify(r['ligand_class'])[:50]).exists():
                        l_role = LigandRole.objects.get(
                            slug=slugify(r['ligand_class'])[:50])
                        if l_role.name == slugify(r['ligand_class'])[:50]:
                            #if name of role is same as slug, then it was created by constructs script, replace it
                            l_role.name = r['ligand_class']
                            l_role.save()
                    else:
                        print(e)
                        print("Error with", r['ligand_class'],
                              slugify(r['ligand_class'])[:50])
                        l_role, created = LigandRole.objects.get_or_create(
                            slug=slugify(r['ligand_class'])
                            [:50])  # FIXME this should not be needed
            else:
                l_role = None

            if r['exp_type']:
                exp_type_id, created = MutationExperimentalType.objects.get_or_create(
                    type=r['exp_type'])
            else:
                exp_type_id = None

            if r['exp_func']:
                exp_func_id, created = MutationFunc.objects.get_or_create(
                    func=r['exp_func'])
            else:
                exp_func_id = None

            if r['exp_mu_effect_ligand_prop'] or r['exp_mu_effect_qual']:
                exp_qual_id, created = MutationQual.objects.get_or_create(
                    qual=r['exp_mu_effect_qual'],
                    prop=r['exp_mu_effect_ligand_prop'])
            else:
                exp_qual_id = None

            # if r['opt_type'] or r['opt_wt'] or r['opt_mu'] or r['opt_sign'] or r['opt_percentage'] or r['opt_qual'] or r['opt_agonist']:
            #     exp_opt_id, created =  MutationOptional.objects.get_or_create(type=r['opt_type'], wt=r['opt_wt'], mu=r['opt_mu'], sign=r['opt_sign'], percentage=r['opt_percentage'], qual=r['opt_qual'], agonist=r['opt_agonist'])
            # else:
            #     exp_opt_id = None

            try:
                mutation, created = Mutation.objects.get_or_create(
                    amino_acid=r['mutation_to'], protein=protein, residue=res)
            except IntegrityError:
                mutation = Mutation.objects.get(amino_acid=r['mutation_to'],
                                                protein=protein,
                                                residue=res)
            logtypes = ['pEC50', 'pIC50', 'pK']

            foldchange = 0
            typefold = ''
            if r['exp_wt_value'] != 0 and r[
                    'exp_mu_value_raw'] != 0:  #fix for new format
                if re.match("(" + ")|(".join(logtypes) + ")",
                            r['exp_type']):  #-log values!
                    try:
                        foldchange = round(
                            math.pow(10, -r['exp_mu_value_raw']) /
                            pow(10, -r['exp_wt_value']), 3)
                    except:
                        print(r)
                    typefold = r['exp_type'] + "_log"
                elif "%" == r['exp_wt_unit']:
                    # if % then it's a difference case, then lower value is bad. Otherwise it's conc and lower is better
                    foldchange = round(
                        r['exp_wt_value'] / r['exp_mu_value_raw'], 3)
                else:
                    foldchange = round(
                        r['exp_mu_value_raw'] / r['exp_wt_value'], 3)
                    typefold = r['exp_type'] + "_not_log"
                if foldchange > 0 and foldchange < 1 and foldchange != 0:
                    foldchange = -round((1 / foldchange), 3)
            elif r['fold_effect'] != 0:
                foldchange = round(r['fold_effect'], 3)
                if foldchange < 1: foldchange = -round((1 / foldchange), 3)
            r['fold_effect'] = foldchange

            raw_experiment = self.insert_raw(r)
            # raw_experiment.save()
            bulk = MutationExperiment(
                refs=pub,
                review=pub_review,
                submitting_group=r['submitting_group'],
                data_container=r['data_container'],
                data_container_number=r['data_container_number'],
                protein=protein,
                residue=res,
                ligand=l,
                ligand_role=l_role,
                ligand_ref=l_ref,
                # raw = raw_experiment, #raw_experiment, OR None
                # optional = exp_opt_id,
                exp_type=exp_type_id,
                exp_func=exp_func_id,
                exp_qual=exp_qual_id,
                mutation=mutation,
                wt_value=r['exp_wt_value'],  #
                wt_unit=r['exp_wt_unit'],
                mu_value=r['exp_mu_value_raw'],
                mu_sign=r['exp_mu_effect_sign'],
                foldchange=foldchange,
                opt_receptor_expression=r['opt_receptor_expression'],
                opt_basal_activity=r['opt_basal_activity'],
                opt_gain_of_activity=r['opt_gain_of_activity'],
                opt_ligand_emax=r['opt_ligand_emax'],
                opt_agonist=r['opt_agonist'],
            )
            # for line,val in r.items():
            #     val = str(val)
            #     if len(val)>100:
            #         print(line,"too long",val)
            # mut_id = obj.id
            bulk_r.append(raw_experiment)
            bulk_m.append(bulk)
            # try:
            #     bulk.save()
            # except Exception as e:
            #     print(e)
            #     print(r)
            #     break
            #print('saved ',r['source_file'])
            inserted += 1
            end = time.time()
            diff = round(end - current, 2)
            #print(diff)

        self.logger.info('Parsed ' + str(c) +
                         ' mutant data entries. Skipped ' + str(skipped))

        current = time.time()

        raws = MutationRaw.objects.bulk_create(bulk_r)
        for i, me in enumerate(bulk_m):
            me.raw = raws[i]
        MutationExperiment.objects.bulk_create(bulk_m)
        end = time.time()
        diff = round(end - current, 2)
        # current_sheet
        diff_2 = round(end - current_sheet, 2)
        print("overall", diff_2, "bulk", diff, len(bulk_m), "skipped",
              str(skipped))
        sorted_missing_proteins = sorted(missing_proteins.items(),
                                         key=operator.itemgetter(1),
                                         reverse=True)

Пример #6

Показать файл

Файл: build_mutant_data.py Проект: akumar03/protwis

    def create_mutant_data(self, filenames):
        self.logger.info('CREATING MUTANT DATA')
        
        # what files should be parsed?
        if not filenames:
            filenames = os.listdir(self.structure_data_dir)

        missing_proteins = {}
        mutants_for_proteins = {}

        for source_file in filenames:
            source_file_path = os.sep.join([self.structure_data_dir, source_file])
            if os.path.isfile(source_file_path) and source_file[0] != '.':
                self.logger.info('Reading file {}'.format(source_file_path))
                # read the yaml file

                if source_file[-4:]=='xlsx' or source_file[-3:]=='xls':
                    rows = self.loaddatafromexcel(source_file_path)
                    rows = self.analyse_rows(rows)
                elif source_file[-4:]=='yaml':
                    rows = yaml.load(open(source_file_path, 'r'))
                    temp = []
                    for r in rows:
                        d = {}
                        d['reference'] = r['pubmed']
                        d['protein'] = r['entry_name'].replace("__","_").lower()
                        d['mutation_pos'] = r['seq']
                        d['mutation_from'] = r['from_res']
                        d['mutation_to'] = r['to_res']
                        d['ligand_name'] = ''
                        d['ligand_type'] = ''
                        d['ligand_id'] = ''
                        d['ligand_class'] = ''
                        d['exp_type'] = ''
                        d['exp_func'] = ''
                        d['exp_wt_value'] = 0
                        d['exp_wt_unit'] = ''
                        d['exp_mu_effect_sign'] = ''
                        d['exp_mu_value_raw'] = 0
                        d['fold_effect'] = 0
                        d['exp_mu_effect_qual'] = ''
                        d['exp_mu_effect_ligand_prop'] = ''
                        d['exp_mu_ligand_ref'] = ''
                        d['opt_type'] = ''
                        d['opt_wt'] = 0
                        d['opt_mu'] = 0
                        d['opt_sign'] = ''
                        d['opt_percentage'] = 0
                        d['opt_qual'] = ''
                        d['opt_agonist'] = ''
                        if len(d['mutation_to'])>1 or len(d['mutation_from'])>1: #if something is off with amino acid
                            continue
                        temp.append(d)
                    rows = temp
                else:
                    self.logger.info('unknown format'.source_file)
                    continue

                c = 0
                skipped = 0
                inserted = 0
                for r in rows:
                    c += 1
                    if c%1000==0: 
                        self.logger.info('Parsed '+str(c)+' mutant data entries')

                    # publication
                    try: #fix if it thinks it's float.
                        float(r['reference'])
                        r['reference'] = str(int(r['reference']))
                    except ValueError:
                        pass

                    if r['reference'].isdigit(): #assume pubmed
                        pub_type = 'pubmed'
                    else: #assume doi
                        pub_type = 'doi'

                    try:
                        pub = Publication.objects.get(web_link__index=r['reference'], web_link__web_resource__slug=pub_type)
                    except Publication.DoesNotExist:
                        pub = Publication()
                        try:
                            pub.web_link = WebLink.objects.get(index=r['reference'], web_resource__slug=pub_type)
                        except WebLink.DoesNotExist:
                            wl = WebLink.objects.create(index=r['reference'],
                                web_resource = WebResource.objects.get(slug=pub_type))
                            pub.web_link = wl

                        if pub_type == 'doi':
                            pub.update_from_doi(doi=r['reference'])
                        elif pub_type == 'pubmed':
                            pub.update_from_pubmed_data(index=r['reference'])
                        try:
                            pub.save()
                        except:
                            self.logger.error('error with reference ' + str(r['reference']) + ' ' + pub_type)
                            continue #if something off with publication, skip.

                    if r['ligand_type']=='PubChem CID' or r['ligand_type']=='SMILES':
                        if r['ligand_type']=='PubChem CID':
                            pubchem_lookup_value = 'cid'
                        elif r['ligand_type']=='SMILES':
                            pubchem_lookup_value = 'smiles'

                        try:
                            web_resource = WebResource.objects.get(slug='pubchem')
                        except:
                            # abort if pdb resource is not found
                            raise Exception('PubChem resource not found, aborting!')

                        if 'ligand_name' in r and r['ligand_name']:
                            ligand_name = str(r['ligand_name'])
                        else:
                            ligand_name = False

                        try:
                            # if this name is canonical and it has a ligand record already
                            l = Ligand.objects.get(name=ligand_name, canonical=True,
                                properities__web_links__web_resource=web_resource,
                                properities__web_links__index=r['ligand_id'])
                        except Ligand.DoesNotExist:
                            try:
                                # if exists under different name
                                l_canonical = Ligand.objects.get(properities__web_links__web_resource=web_resource,
                                    properities__web_links__index=r['ligand_id'], canonical=True)
                                l, created = Ligand.objects.get_or_create(properities = l_canonical.properities,
                                    name = ligand_name, canonical = False)
                                if created:
                                    self.logger.info('Created ligand {}'.format(l.name))
                            except Ligand.DoesNotExist:
                                # fetch ligand from pubchem
                                default_ligand_type = 'Small molecule'
                                lt, created = LigandType.objects.get_or_create(slug=slugify(default_ligand_type),
                                    defaults={'name': default_ligand_type})
                                l = Ligand()
                                l = l.load_from_pubchem(pubchem_lookup_value, r['ligand_id'], lt, ligand_name)
                        
                    elif r['ligand_name']:
                        
                        # if this name is canonical and it has a ligand record already
                        if Ligand.objects.filter(name=r['ligand_name'], canonical=True).exists():
                            l = Ligand.objects.get(name=r['ligand_name'], canonical=True)
                        
                        # if this matches an alias that only has "one" parent canonical name - eg distinct
                        elif Ligand.objects.filter(name=r['ligand_name'], canonical=False,
                            ambigious_alias=False).exists():
                            l = Ligand.objects.get(name=r['ligand_name'], canonical=False, ambigious_alias=False)
                        
                        # if this matches an alias that only has several canonical parents, must investigate, start
                        # with empty.
                        elif Ligand.objects.filter(name=r['ligand_name'], canonical=False,
                            ambigious_alias=True).exists():
                            lp = LigandProperities()
                            lp.save()
                            l = Ligand()
                            l.properities = lp
                            l.name = r['ligand_name']
                            l.canonical = False
                            l.ambigious_alias = True
                            l.save()
                            l.load_by_name(r['ligand_name'])
                        
                        # if neither a canonical or alias exists, create the records. Remember to check for
                        # canonical / alias status.
                        else:
                            lp = LigandProperities()
                            lp.save()
                            l = Ligand()
                            l.properities = lp
                            l.name = str(r['ligand_name'])
                            l.canonical = True
                            l.ambigious_alias = False
                            l.save()
                            l.load_by_name(str(r['ligand_name']))
                    else:
                        l = None

                    if Ligand.objects.filter(name=r['exp_mu_ligand_ref'], canonical=True).exists(): #if this name is canonical and it has a ligand record already
                        l_ref = Ligand.objects.get(name=r['exp_mu_ligand_ref'], canonical=True)
                    elif Ligand.objects.filter(name=r['exp_mu_ligand_ref'], canonical=False, ambigious_alias=False).exists(): #if this matches an alias that only has "one" parent canonical name - eg distinct
                        l_ref = Ligand.objects.get(name=r['exp_mu_ligand_ref'], canonical=False, ambigious_alias=False)
                    elif Ligand.objects.filter(name=r['exp_mu_ligand_ref'], canonical=False, ambigious_alias=True).exists(): #if this matches an alias that only has several canonical parents, must investigate, start with empty.
                        lp = LigandProperities()
                        lp.save()
                        l_ref = Ligand()
                        l_ref.properities = lp
                        l_ref.name = r['exp_mu_ligand_ref']
                        l_ref.canonical = False
                        l_ref.ambigious_alias = True
                        l_ref.save()
                        l_ref.load_by_name(r['exp_mu_ligand_ref'])
                        l_ref.save()
                    elif r['exp_mu_ligand_ref']: #if neither a canonical or alias exists, create the records. Remember to check for canonical / alias status.
                        lp = LigandProperities()
                        lp.save()
                        l_ref = Ligand()
                        l_ref.properities = lp
                        l_ref.name = r['exp_mu_ligand_ref']
                        l_ref.canonical = True
                        l_ref.ambigious_alias = False
                        l_ref.save()
                        l_ref.load_by_name(r['exp_mu_ligand_ref'])
                        l_ref.save()
                    else:
                        l_ref = None

                    protein_id = 0
                    residue_id = 0

                    protein=Protein.objects.filter(entry_name=r['protein'])
                    if protein.exists():
                        protein=protein.get()
                        if r['protein'] in mutants_for_proteins:
                            mutants_for_proteins[r['protein']] += 1
                        else:
                            mutants_for_proteins[r['protein']] = 1

                    else:
                        skipped += 1
                        if r['protein'] in missing_proteins:
                            missing_proteins[r['protein']] += 1
                        else:
                            missing_proteins[r['protein']] = 1
                            self.logger.error('Skipped due to no protein '+ r['protein'])
                        continue

                    res=Residue.objects.filter(protein_conformation__protein=protein,sequence_number=r['mutation_pos'])
                    if res.exists():
                        res=res.get()
                    else:
                        self.logger.error('Skipped due to no residue ' + r['protein'] + ' pos:'+str(r['mutation_pos']))
                        skipped += 1
                        continue

                    if r['ligand_class']:
                        l_role, created = LigandRole.objects.get_or_create(name=r['ligand_class'],
                            defaults={'slug': slugify(r['ligand_class'])[:50]}) # FIXME this should not be needed
                    else:
                        l_role = None

                    if r['exp_type']:
                        exp_type_id, created = MutationExperimentalType.objects.get_or_create(type=r['exp_type'])
                    else:
                        exp_type_id = None

                    if r['exp_func']:
                        exp_func_id, created = MutationFunc.objects.get_or_create(func=r['exp_func'])
                    else:
                        exp_func_id = None

                    if r['exp_mu_effect_ligand_prop'] or r['exp_mu_effect_qual']:
                        exp_qual_id, created = MutationQual.objects.get_or_create(qual=r['exp_mu_effect_qual'], prop=r['exp_mu_effect_ligand_prop'])
                    else:
                        exp_qual_id = None

                    if r['opt_type'] or r['opt_wt'] or r['opt_mu'] or r['opt_sign'] or r['opt_percentage'] or r['opt_qual'] or r['opt_agonist']:
                        exp_opt_id, created =  MutationOptional.objects.get_or_create(type=r['opt_type'], wt=r['opt_wt'], mu=r['opt_mu'], sign=r['opt_sign'], percentage=r['opt_percentage'], qual=r['opt_qual'], agonist=r['opt_agonist'])
                    else:
                        exp_opt_id = None

                    mutation, created =  Mutation.objects.get_or_create(amino_acid=r['mutation_to'],protein=protein, residue=res)

                    
                    logtypes = ['pEC50','pIC50','pK']
                    
                    
                    foldchange = 0
                    typefold = ''
                    if r['exp_wt_value']!=0 and r['exp_mu_value_raw']!=0: #fix for new format
                                
                        if re.match("(" + ")|(".join(logtypes) + ")", r['exp_type']):  #-log values!
                            foldchange = round(math.pow(10,-r['exp_mu_value_raw'])/pow(10,-r['exp_wt_value']),3);
                            typefold = r['exp_type']+"_log"
                        else:
                            foldchange = round(r['exp_mu_value_raw']/r['exp_wt_value'],3);
                            typefold = r['exp_type']+"_not_log"
                        
                        
                        if foldchange<1 and foldchange!=0:
                            foldchange = -round((1/foldchange),3)
                    elif r['fold_effect']!=0:
                            foldchange = round(r['fold_effect'],3);
                            if foldchange<1: foldchange = -round((1/foldchange),3);
                    

                    raw_experiment = self.insert_raw(r)
                    obj, created = MutationExperiment.objects.get_or_create(
                    refs=pub, 
                    protein=protein, 
                    residue=res, 
                    ligand=l, 
                    ligand_role=l_role, 
                    ligand_ref = l_ref,
                    raw = raw_experiment,
                    optional = exp_opt_id,
                    exp_type=exp_type_id, 
                    exp_func=exp_func_id, 
                    exp_qual = exp_qual_id,

                    mutation=mutation, 
                    wt_value=r['exp_wt_value'], #
                    wt_unit=r['exp_wt_unit'], 

                    mu_value = r['exp_mu_value_raw'],
                    mu_sign = r['exp_mu_effect_sign'], 
                    foldchange = foldchange
                    )
                    mut_id = obj.id
                    inserted += 1

                self.logger.info('Parsed '+str(c)+' mutant data entries. Skipped '+str(skipped))

        sorted_missing_proteins = sorted(missing_proteins.items(), key=operator.itemgetter(1),reverse=True)
        sorted_mutants_for_proteins = sorted(mutants_for_proteins.items(), key=operator.itemgetter(1),reverse=True)

        self.logger.info('COMPLETED CREATING MUTANTS')

Пример #7

Показать файл

Файл: functions.py Проект: pszgaspar/protwis

def get_or_make_ligand(ligand_id,type_id, name = None):
    if type_id=='PubChem CID' or type_id=='SMILES':
        if type_id=='PubChem CID':
            pubchem_lookup_value = 'cid'
        elif type_id=='SMILES':
            pubchem_lookup_value = 'smiles'

        try:
            web_resource = WebResource.objects.get(slug='pubchem')
        except:
            # abort if pdb resource is not found
            raise Exception('PubChem resource not found, aborting!')
        if name:
            ligand_name = name
        else:
            ligand_name = False

        try:
            # if this name is canonical and it has a ligand record already
            if (ligand_name==False):
            
                l = None
                ls = Ligand.objects.filter(canonical=True,
                   properities__web_links__web_resource=web_resource,
                   properities__web_links__index=ligand_id)
               
                for ligand in ls:
                    l = ligand
                    #print (l)
                    break
                if l == None:
                    l = Ligand.objects.get(canonical=True,
                    properities__web_links__web_resource=web_resource,
                    properities__web_links__index=ligand_id)
                    
            else:
               l = Ligand.objects.get(name=ligand_name, canonical=True,
                   properities__web_links__web_resource=web_resource,
                   properities__web_links__index=ligand_id)
            
            #l = Ligand.objects.get(name=ligand_name, canonical=True,
            #    properities__web_links__web_resource=web_resource,
            #    properities__web_links__index=ligand_id)
            #
        except Ligand.DoesNotExist:
            try:
                # if exists under different name
                l_canonical = Ligand.objects.get(properities__web_links__web_resource=web_resource,
                    properities__web_links__index=ligand_id, canonical=True)
                #print (created)
                try:
                    l, created = Ligand.objects.get_or_create(properities = l_canonical.properities,
                        name = ligand_name, canonical = False)
                except IntegrityError:
                    l = Ligand.objects.get(properities = l_canonical.properities,
                        name = ligand_name, canonical = False)
            except Ligand.DoesNotExist:
                # fetch ligand from pubchem
                default_ligand_type = 'Small molecule'
                lt, created = LigandType.objects.get_or_create(slug=slugify(default_ligand_type),
                    defaults={'name': default_ligand_type})
                l = Ligand()
                #print (ligand_name)
                l = l.load_from_pubchem(pubchem_lookup_value, ligand_id, lt, ligand_name)
                #print (l)
                if l == None and type_id=='SMILES': #insert manually if smiles and unfound in pubchem
                    try:
                        l = Ligand.objects.get(name=ligand_name, canonical=True,
                                                properities__smiles=ligand_id)
                    except Ligand.DoesNotExist:
                        try:   
                            l = Ligand.objects.get(name__startswith=ligand_name, canonical=True,properities__smiles=ligand_id) #if no properities exist
                        except Ligand.DoesNotExist: 
                            try:   
                                l = Ligand.objects.get(name=ligand_name, canonical=True,properities__smiles=None) #if no properities exist
                                l.properities.smiles = ligand_id
                                l.properities.save()
                                l.save()
                            except Ligand.DoesNotExist: 
                                ## now insert a new ligand, but first make sure name is unique
                                if Ligand.objects.filter(name=ligand_name).exists():
                                    ls = Ligand.objects.filter(name__startswith=ligand_name, canonical=True).order_by("pk")
                                    for l_temp in ls:
                                        last = l_temp.name.split("_")[-1]
                                    if last==ligand_name: #no addition yet
                                        ligand_name = ligand_name +"_1"
                                    else:
                                        ligand_name = ligand_name +"_"+str(int(last)+1)
                                l = Ligand()
                                l.name = ligand_name
                                lp = LigandProperities()
                                lp.smiles = ligand_id
                                lp.ligand_type = lt
                                lp.save()
                                l.properities = lp
                                l.canonical = True #maybe false, but that would break stuff.
                                l.ambigious_alias = False
                                try:
                                    l.save()
                                except IntegrityError:
                                    l = Ligand.objects.get(name=ligand_name, canonical=True)
            
    elif name:
        
        # if this name is canonical and it has a ligand record already
        if Ligand.objects.filter(name=name, canonical=True).exists():
            l = Ligand.objects.get(name=name, canonical=True)
        
        # if this matches an alias that only has "one" parent canonical name - eg distinct
        elif Ligand.objects.filter(name=name, canonical=False,
            ambigious_alias=False).exists():
            l = Ligand.objects.get(name=name, canonical=False, ambigious_alias=False)
        
        # if this matches an alias that only has several canonical parents, must investigate, start
        # with empty.
        elif Ligand.objects.filter(name=name, canonical=False,
            ambigious_alias=True).exists():
            lp = LigandProperities()
            lp.save()
            l = Ligand()
            l.properities = lp
            l.name = name
            l.canonical = False
            l.ambigious_alias = True
            l.save()
            l.load_by_name(name)
        
        # if neither a canonical or alias exists, create the records. Remember to check for
        # canonical / alias status.
        else:
            lp = LigandProperities()
            lp.save()
            l = Ligand()
            l.properities = lp
            l.name = str(name)
            l.canonical = True
            l.ambigious_alias = False
            try:
                l.save()
                l.load_by_name(str(name))
            except IntegrityError:
                l = Ligand.objects.get(name=str(name), canonical=True)
    else:
        l = None
    
    return l

Пример #8

Показать файл

Файл: build_ligands_from_cache.py Проект: AlibekMamyrbekov/protwis

    def main_func(self, positions, iteration, count, lock):

        # print(positions,iteration,count,lock)
        ligands = self.ligand_dump

        while count.value < len(ligands):
            with lock:
                l = ligands[count.value]
                count.value += 1
                if count.value % 10000 == 0:
                    print('{} Status {} out of {}'.format(
                        datetime.datetime.strftime(datetime.datetime.now(),
                                                   '%Y-%m-%d %H:%M:%S'),
                        count.value, len(ligands)))

            if 'logp' not in l:
                # temp skip to only use "full" annotated ligands
                continue

            lp = LigandProperities.objects.filter(
                inchikey=l['inchikey']).first()
            ligand = None

            if lp:
                # Check if inchikey is there
                ligand = Ligand.objects.filter(
                    name=l['name'], properities=lp).prefetch_related(
                        'properities__ligand_type', 'properities__web_links',
                        'properities__vendors').first()

            # The name with corresponding inchikey is there, assume all is good and skip.
            # Will add links to make sure they're there.
            if not ligand:
                if lp:
                    print(l['name'], 'is there! (but not by name, only inchi')
                    ligand = Ligand()
                    ligand.properities = lp
                    ligand.name = l['name']
                    ligand.canonical = l['canonical']
                    ligand.ambigious_alias = l['ambigious_alias']
                    ligand.save()
                else:
                    # No ligand seems to match by inchikey -- start creating it.
                    # Make LigandProperities first
                    lt, created = LigandType.objects.get_or_create(
                        slug=l['ligand_type__slug'],
                        defaults={'name': l['ligand_type__name']})
                    lp = LigandProperities()
                    lp.inchikey = l['inchikey']
                    lp.smiles = l['smiles']
                    lp.mw = l['mw']
                    lp.logp = l['logp']
                    lp.rotatable_bonds = l['rotatable_bonds']
                    lp.hacc = l['hacc']
                    lp.hdon = l['hdon']
                    lp.ligand_type = lt

                    lp.save()

                    ligand = Ligand()
                    ligand.properities = lp
                    ligand.name = l['name']
                    ligand.canonical = l['canonical']
                    ligand.ambigious_alias = l['ambigious_alias']
                    ligand.save()

            # create links - impossible to make duplicates so no need to check if there already
            if ligand.properities.web_links.count() < len(l['web_links']):
                for link in l['web_links']:
                    wr = WebResource.objects.get(slug=link['web_resource'])
                    wl, created = WebLink.objects.get_or_create(
                        index=link['index'], web_resource=wr)
                    ligand.properities.web_links.add(wl)

            # create vendors - impossible to make duplicates so no need to check if there already
            if ligand.properities.vendors.count() < len(l['vendors']):
                for link in l['vendors']:
                    lv = LigandVendors.objects.get(slug=link['vendor_slug'])
                    check = LigandVendorLink.objects.filter(
                        sid=link['sid']).exists()
                    if not check:
                        lvl = LigandVendorLink()
                        lvl.sid = link['sid']
                        lvl.vendor = lv
                        lvl.lp = ligand.properities
                        lvl.vendor_external_id = link['vendor_external_id']
                        lvl.url = link['url']
                        lvl.save()

Пример #9

Показать файл

    def main_func(self, positions, iteration):
        # filenames
        if not positions[1]:
            filenames = self.filenames[positions[0]:]
        else:
            filenames = self.filenames[positions[0]:positions[1]]

        for source_file in filenames:
            source_file_path = os.sep.join([self.structure_data_dir, source_file])
            if os.path.isfile(source_file_path) and source_file[0] != '.':
                self.logger.info('Reading file {}'.format(source_file_path))
                # read the yaml file
                with open(source_file_path, 'r') as f:
                    sd = yaml.load(f)
                    
                    # is this a representative structure (will be used to guide structure-based alignments)?
                    representative = False
                    if 'representative' in sd and sd['representative']:
                        representative = True

                    # only process representative structures on first iteration
                    if not representative and iteration == 1:
                        continue

                    # skip representative structures on second iteration
                    if representative and iteration == 2:
                        continue

                    # is there a construct?
                    if 'construct' not in sd:
                        self.logger.error('No construct specified, skipping!')
                        continue

                    # does the construct exists?
                    try:
                        con = Protein.objects.get(entry_name=sd['construct'])
                    except Protein.DoesNotExist:
                        self.logger.error('Construct {} does not exists, skipping!'.format(sd['construct']))
                        continue

                    # create a structure record
                    try:
                        s = Structure.objects.get(protein_conformation__protein=con)
                    except Structure.DoesNotExist:
                        s = Structure()
                        s.representative = representative

                    # protein state
                    if 'state' not in sd:
                        self.logger.warning('State not defined, using default state {}'.format(
                            settings.DEFAULT_PROTEIN_STATE))
                        state = settings.DEFAULT_STATE.title()
                    else:
                        state = sd['state']
                    state_slug = slugify(state)
                    try:
                        ps, created = ProteinState.objects.get_or_create(slug=state_slug, defaults={'name': state})
                        if created:
                            self.logger.info('Created protein state {}'.format(ps.name))
                    except IntegrityError:
                        ps = ProteinState.objects.get(slug=state_slug)
                    s.state = ps

                    # protein conformation
                    try:
                        s.protein_conformation = ProteinConformation.objects.get(protein=con)
                    except ProteinConformation.DoesNotExist:
                        self.logger.error('Protein conformation for construct {} does not exists'.format(con))
                        continue
                    if s.protein_conformation.state is not state:
                        ProteinConformation.objects.filter(protein=con).update(state=ps)

                    # get the PDB file and save to DB
                    sd['pdb'] = sd['pdb'].upper()
                    if not os.path.exists(self.pdb_data_dir):
                        os.makedirs(self.pdb_data_dir)
                    
                    pdb_path = os.sep.join([self.pdb_data_dir, sd['pdb'] + '.pdb'])
                    if not os.path.isfile(pdb_path):
                        self.logger.info('Fetching PDB file {}'.format(sd['pdb']))
                        url = 'http://www.rcsb.org/pdb/files/%s.pdb' % sd['pdb']
                        pdbdata_raw = urlopen(url).read().decode('utf-8')
                        with open(pdb_path, 'w') as f:
                            f.write(pdbdata_raw)
                    else:
                        with open(pdb_path, 'r') as pdb_file:
                            pdbdata_raw = pdb_file.read()
                    
                    pdbdata, created = PdbData.objects.get_or_create(pdb=pdbdata_raw)
                    s.pdb_data = pdbdata

                    # UPDATE HETSYN with its PDB reference instead + GRAB PUB DATE, PMID, DOI AND RESOLUTION
                    hetsyn = {}
                    hetsyn_reverse = {}
                    for line in pdbdata_raw.splitlines():
                        if line.startswith('HETSYN'): 
                            m = re.match("HETSYN[\s]+([\w]{3})[\s]+(.+)",line) ### need to fix bad PDB formatting where col4 and col5 are put together for some reason -- usually seen when the id is +1000
                            if (m):
                                hetsyn[m.group(2).strip()] = m.group(1).upper()
                                hetsyn_reverse[m.group(1)] = m.group(2).strip().upper()
                        if line.startswith('HETNAM'): 
                            m = re.match("HETNAM[\s]+([\w]{3})[\s]+(.+)",line) ### need to fix bad PDB formatting where col4 and col5 are put together for some reason -- usually seen when the id is +1000
                            if (m):
                                hetsyn[m.group(2).strip()] = m.group(1).upper()
                                hetsyn_reverse[m.group(1)] = m.group(2).strip().upper()
                        if line.startswith('REVDAT   1'):
                            sd['publication_date'] = line[13:22]
                        if line.startswith('JRNL        PMID'):
                            sd['pubmed_id'] = line[19:].strip()
                        if line.startswith('JRNL        DOI'):
                            sd['doi_id'] = line[19:].strip()

                    if len(hetsyn) == 0:
                        self.logger.info("PDB file contained NO hetsyn")

                    with open(pdb_path,'r') as header:
                        header_dict = parse_pdb_header(header)
                    sd['publication_date'] = header_dict['release_date']
                    sd['resolution'] = str(header_dict['resolution']).strip()
                    sd['structure_method'] = header_dict['structure_method']

                    # structure type
                    if 'structure_method' in sd and sd['structure_method']:
                        structure_type = sd['structure_method'].capitalize()
                        structure_type_slug = slugify(sd['structure_method'])
                        
                        try:
                            st, created = StructureType.objects.get_or_create(slug=structure_type_slug,
                                defaults={'name': structure_type})
                            if created:
                                self.logger.info('Created structure type {}'.format(st))
                        except IntegrityError:
                            st = StructureType.objects.get(slug=structure_type_slug)
                        s.structure_type = st
                    else:
                        self.logger.warning('No structure type specified in PDB file {}'.format(sd['pdb']))

                    matched = 0
                    if 'ligand' in sd and sd['ligand']:
                        if isinstance(sd['ligand'], list):
                            ligands = sd['ligand']
                        else:
                            ligands = [sd['ligand']]
                        for ligand in ligands:
                            if 'name' in ligand:
                                if ligand['name'].upper() in hetsyn:
                                    self.logger.info('Ligand {} matched to PDB records'.format(ligand['name']))
                                    matched = 1
                                    ligand['name'] = hetsyn[ligand['name'].upper()]
                                elif ligand['name'].upper() in hetsyn_reverse:
                                    matched = 1

                    if matched==0 and len(hetsyn)>0:
                        self.logger.info('No ligand names found in HET in structure {}'.format(sd['pdb']))

                    # REMOVE? can be used to dump structure files with updated ligands
                    # yaml.dump(sd, open(source_file_path, 'w'), indent=4)

                    # pdb code
                    if 'pdb' in sd:
                        try:
                            web_resource = WebResource.objects.get(slug='pdb')
                        except:
                            # abort if pdb resource is not found
                            raise Exception('PDB resource not found, aborting!')
                        s.pdb_code, created = WebLink.objects.get_or_create(index=sd['pdb'],
                            web_resource=web_resource)
                    else:
                        self.logger.error('PDB code not specified for structure {}, skipping!'.format(sd['pdb']))
                        continue

                    # insert into plain text fields
                    if 'preferred_chain' in sd:
                        s.preferred_chain = sd['preferred_chain']
                    else:
                        self.logger.warning('Preferred chain not specified for structure {}'.format(sd['pdb']))
                    if 'resolution' in sd:
                        s.resolution = float(sd['resolution'])
                    else:
                        self.logger.warning('Resolution not specified for structure {}'.format(sd['pdb']))
                    if 'publication_date' in sd:
                        s.publication_date = sd['publication_date']
                    else:
                        self.logger.warning('Publication date not specified for structure {}'.format(sd['pdb']))

                    # publication
                    try:                     
                        if 'doi_id' in sd:
                            try:
                                s.publication = Publication.objects.get(web_link__index=sd['doi_id'])
                            except Publication.DoesNotExist as e:
                                p = Publication()
                                try:
                                    p.web_link = WebLink.objects.get(index=sd['doi_id'], web_resource__slug='doi')
                                except WebLink.DoesNotExist:
                                    wl = WebLink.objects.create(index=sd['doi_id'],
                                        web_resource = WebResource.objects.get(slug='doi'))
                                    p.web_link = wl
                                p.update_from_doi(doi=sd['doi_id'])
                                p.save()
                                s.publication = p
                        elif 'pubmed_id' in sd:
                            try:
                                s.publication = Publication.objects.get(web_link__index=sd['pubmed_id'])
                            except Publication.DoesNotExist as e:
                                p = Publication()
                                try:
                                    p.web_link = WebLink.objects.get(index=sd['pubmed_id'],
                                        web_resource__slug='pubmed')
                                except WebLink.DoesNotExist:
                                    wl = WebLink.objects.create(index=sd['pubmed_id'],
                                        web_resource = WebResource.objects.get(slug='pubmed'))
                                    p.web_link = wl
                                p.update_from_pubmed_data(index=sd['pubmed_id'])
                                p.save()
                                s.publication = p
                    except:
                        self.logger.error('Error saving publication'.format(ps.name))

                    # save structure before adding M2M relations
                    s.save()

                    #Delete previous interaction data to prevent errors.
                    ResidueFragmentInteraction.objects.filter(structure_ligand_pair__structure=s).delete()
                    StructureLigandInteraction.objects.filter(structure=s).delete()
                    #Remove previous Rotamers/Residues to prepare repopulate
                    Fragment.objects.filter(structure=s).delete()
                    Rotamer.objects.filter(structure=s).all().delete()
                    Residue.objects.filter(protein_conformation=s.protein_conformation).all().delete()

                    # endogenous ligand(s)
                    default_ligand_type = 'Small molecule'
                    if representative and 'endogenous_ligand' in sd and sd['endogenous_ligand']:
                        if isinstance(sd['endogenous_ligand'], list):
                            endogenous_ligands = sd['endogenous_ligand']
                        else:
                            endogenous_ligands = [sd['endogenous_ligand']]
                        for endogenous_ligand in endogenous_ligands:
                            if endogenous_ligand['type']:
                                lt, created = LigandType.objects.get_or_create(slug=slugify(endogenous_ligand['type']),
                                    defaults={'name': endogenous_ligand['type']})
                            else:
                                lt, created = LigandType.objects.get_or_create(slug=slugify(default_ligand_type),
                                    defaults={'name': default_ligand_type})
                            ligand = Ligand()

                            if 'iupharId' not in endogenous_ligand:
                                endogenous_ligand['iupharId'] = 0

                            ligand = ligand.load_by_gtop_id(endogenous_ligand['name'], endogenous_ligand['iupharId'],
                                lt)
                            try:
                                s.protein_conformation.protein.parent.endogenous_ligands.add(ligand)
                            except IntegrityError:
                                self.logger.info('Endogenous ligand for protein {}, already added. Skipping.'.format(
                                    s.protein_conformation.protein.parent))

                    # ligands
                    if 'ligand' in sd and sd['ligand']:
                        if isinstance(sd['ligand'], list):
                            ligands = sd['ligand']
                        else:
                            ligands = [sd['ligand']]
                        for ligand in ligands:
                            l = False
                            peptide_chain = ""
                            if 'chain' in ligand:
                                peptide_chain = ligand['chain']
                                ligand['name'] = 'pep'
                            if ligand['name'] and ligand['name'] != 'None': # some inserted as none.

                                # use annoted ligand type or default type
                                if ligand['type']:
                                    lt, created = LigandType.objects.get_or_create(slug=slugify(ligand['type']),
                                        defaults={'name': ligand['type']})
                                else:
                                    lt, created = LigandType.objects.get_or_create(
                                        slug=slugify(default_ligand_type), defaults={'name': default_ligand_type})

                                # set pdb reference for structure-ligand interaction
                                pdb_reference = ligand['name']

                                # use pubchem_id
                                if 'pubchemId' in ligand and ligand['pubchemId'] and ligand['pubchemId'] != 'None':
                                    # create ligand
                                    l = Ligand()


                                    # update ligand by pubchem id
                                    ligand_title = False
                                    if 'title' in ligand and ligand['title']:
                                        ligand_title = ligand['title']
                                    l = l.load_from_pubchem('cid', ligand['pubchemId'], lt, ligand_title)


                                # if no pubchem id is specified, use name
                                else:
                                    # use ligand title, if specified
                                    if 'title' in ligand and ligand['title']:
                                        ligand['name'] = ligand['title']

                                    # create empty properties
                                    lp = LigandProperities.objects.create()
                                    
                                    # create the ligand
                                    try:
                                        l, created = Ligand.objects.get_or_create(name=ligand['name'], canonical=True,
                                            defaults={'properities': lp, 'ambigious_alias': False})
                                        if created:
                                            self.logger.info('Created ligand {}'.format(ligand['name']))
                                        else:
                                            pass
                                    except IntegrityError:
                                        l = Ligand.objects.get(name=ligand['name'], canonical=True)

                                    # save ligand
                                    l.save()
                            else:
                                continue

                            # structure-ligand interaction
                            if l and ligand['role']:
                                role_slug = slugify(ligand['role'])
                                try:
                                    lr, created = LigandRole.objects.get_or_create(slug=role_slug,
                                    defaults={'name': ligand['role']})
                                    if created:
                                        self.logger.info('Created ligand role {}'.format(ligand['role']))
                                except IntegrityError:
                                    lr = LigandRole.objects.get(slug=role_slug)

                                i, created = StructureLigandInteraction.objects.get_or_create(structure=s,
                                    ligand=l, ligand_role=lr, annotated=True,
                                    defaults={'pdb_reference': pdb_reference})
                                if i.pdb_reference != pdb_reference:
                                    i.pdb_reference = pdb_reference
                                    i.save()


                    
                    # structure segments
                    if 'segments' in sd and sd['segments']:
                        for segment, positions in sd['segments'].items():
                            # fetch (create if needed) sequence segment
                            try:
                                protein_segment = ProteinSegment.objects.get(slug=segment)
                            except ProteinSegment.DoesNotExist:
                                self.logger.error('Segment {} not found'.format(segment))
                                continue

                            struct_seg, created = StructureSegment.objects.update_or_create(structure=s,
                                protein_segment=protein_segment, defaults={'start': positions[0], 'end': positions[1]})
                    # all representive structures should have defined segments
                    elif representative:
                        self.logger.warning('Segments not defined for representative structure {}'.format(sd['pdb']))

                    # structure segments for modeling
                    if 'segments_in_structure' in sd and sd['segments_in_structure']:
                        for segment, positions in sd['segments_in_structure'].items():
                            # fetch (create if needed) sequence segment
                            try:
                                protein_segment = ProteinSegment.objects.get(slug=segment)
                            except ProteinSegment.DoesNotExist:
                                self.logger.error('Segment {} not found'.format(segment))
                                continue

                            struct_seg_mod, created = StructureSegmentModeling.objects.update_or_create(structure=s,
                                protein_segment=protein_segment, defaults={'start': positions[0], 'end': positions[1]})

                    # structure coordinates
                    if 'coordinates' in sd and sd['coordinates']:
                        for segment, coordinates in sd['coordinates'].items():
                            # fetch (create if needed) sequence segment
                            try:
                                protein_segment = ProteinSegment.objects.get(slug=segment)
                            except ProteinSegment.DoesNotExist:
                                self.logger.error('Segment {} not found'.format(segment))
                                continue

                            # fetch (create if needed) coordinates description
                            try:
                                description, created = StructureCoordinatesDescription.objects.get_or_create(
                                    text=coordinates)
                                if created:
                                    self.logger.info('Created structure coordinate description {}'.format(coordinates))
                            except IntegrityError:
                                description = StructureCoordinatesDescription.objects.get(text=coordinates)

                            sc = StructureCoordinates()
                            sc.structure = s
                            sc.protein_segment = protein_segment
                            sc.description = description
                            sc.save()

                    # structure engineering
                    if 'engineering' in sd and sd['engineering']:
                        for segment, engineering in sd['engineering'].items():
                            # fetch (create if needed) sequence segment
                            try:
                                protein_segment = ProteinSegment.objects.get(slug=segment)
                            except ProteinSegment.DoesNotExist:
                                self.logger.error('Segment {} not found'.format(segment))
                                continue

                            # fetch (create if needed) engineering description
                            try:
                                description, created = StructureEngineeringDescription.objects.get_or_create(
                                    text=engineering)
                                if created:
                                    self.logger.info('Created structure coordinate description {}'.format(engineering))
                            except IntegrityError:
                                description = StructureEngineeringDescription.objects.get(text=engineering)

                            se = StructureEngineering()
                            se.structure = s
                            se.protein_segment = protein_segment
                            se.description = description
                            se.save()

                    # protein anomalies
                    scheme = s.protein_conformation.protein.residue_numbering_scheme
                    if 'bulges' in sd and sd['bulges']:
                        pa_slug = 'bulge'
                        try:
                            pab, created = ProteinAnomalyType.objects.get_or_create(slug=pa_slug, defaults={
                                'name': 'Bulge'})
                            if created:
                                self.logger.info('Created protein anomaly type {}'.format(pab))
                        except IntegrityError:
                            pab = ProteinAnomalyType.objects.get(slug=pa_slug)
                        
                        for segment, bulges in sd['bulges'].items():
                            for bulge in bulges:
                                try:
                                    gn, created = ResidueGenericNumber.objects.get_or_create(label=bulge,
                                        scheme=scheme, defaults={'protein_segment': ProteinSegment.objects.get(
                                        slug=segment)})
                                    if created:
                                        self.logger.info('Created generic number {}'.format(gn))
                                except IntegrityError:
                                    gn =  ResidueGenericNumber.objects.get(label=bulge, scheme=scheme)

                                try:
                                    pa, created = ProteinAnomaly.objects.get_or_create(anomaly_type=pab,
                                        generic_number=gn)
                                    if created:
                                        self.logger.info('Created protein anomaly {}'.format(pa))
                                except IntegrityError:
                                    pa, created = ProteinAnomaly.objects.get(anomaly_type=pab, generic_number=gn)

                                s.protein_anomalies.add(pa)
                    if 'constrictions' in sd and sd['constrictions']:
                        pa_slug = 'constriction'
                        try:
                            pac, created = ProteinAnomalyType.objects.get_or_create(slug=pa_slug, defaults={
                                'name': 'Constriction'})
                            if created:
                                self.logger.info('Created protein anomaly type {}'.format(pac))
                        except IntegrityError:
                            pac = ProteinAnomalyType.objects.get(slug=pa_slug)
                        
                        for segment, constrictions in sd['constrictions'].items():
                            for constriction in constrictions:
                                try:
                                    gn, created = ResidueGenericNumber.objects.get_or_create(label=constriction,
                                        scheme=scheme, defaults={'protein_segment': ProteinSegment.objects.get(
                                        slug=segment)})
                                    if created:
                                        self.logger.info('Created generic number {}'.format(gn))
                                except IntegrityError:
                                    gn =  ResidueGenericNumber.objects.get(label=constriction, scheme=scheme)

                                try:
                                    pa, created = ProteinAnomaly.objects.get_or_create(anomaly_type=pac,
                                        generic_number=gn)
                                    if created:
                                        self.logger.info('Created protein anomaly {}'.format(pa))
                                except IntegrityError:
                                    pa, created = ProteinAnomaly.objects.get(anomaly_type=pac, generic_number=gn)

                                s.protein_anomalies.add(pa)
                    
                    # stabilizing agents, FIXME - redesign this!
                    # fusion proteins moved to constructs, use this for G-proteins and other agents?
                    aux_proteins = []
                    if 'signaling_protein' in sd and sd['signaling_protein'] and sd['signaling_protein'] != 'None':
                        aux_proteins.append('signaling_protein')
                    if 'auxiliary_protein' in sd and sd['auxiliary_protein'] and sd['auxiliary_protein'] != 'None':
                        aux_proteins.append('auxiliary_protein')
                    for index in aux_proteins:
                        if isinstance(sd[index], list):
                            aps = sd[index]
                        else:
                            aps = [sd[index]]
                        for aux_protein in aps:
                            aux_protein_slug = slugify(aux_protein)[:50]
                            try:
                                sa, created = StructureStabilizingAgent.objects.get_or_create(
                                    slug=aux_protein_slug, defaults={'name': aux_protein})
                            except IntegrityError:
                                sa = StructureStabilizingAgent.objects.get(slug=aux_protein_slug)
                            s.stabilizing_agents.add(sa)

                    # save structure
                    s.save()

                    self.logger.info('Calculate rotamers / residues')
                    self.create_rotamers(s,pdb_path)

                    self.logger.info('Calculate interactions') #Should not error anymore. If it does, fix.
                    runcalculation(sd['pdb'],peptide_chain)
                    parsecalculation(sd['pdb'],False)

Пример #10

Показать файл

    def create_mutant_data(self, filenames):
        self.logger.info('CREATING MUTANT DATA')

        # what files should be parsed?
        if not filenames:
            filenames = os.listdir(self.structure_data_dir)

        missing_proteins = {}
        mutants_for_proteins = {}

        for source_file in filenames:
            source_file_path = os.sep.join(
                [self.structure_data_dir, source_file])
            if os.path.isfile(source_file_path) and source_file[0] != '.':
                self.logger.info('Reading file {}'.format(source_file_path))
                # read the yaml file

                if source_file[-4:] == 'xlsx' or source_file[-3:] == 'xls':
                    rows = self.loaddatafromexcel(source_file_path)
                    rows = self.analyse_rows(rows)
                elif source_file[-4:] == 'yaml':
                    rows = yaml.load(open(source_file_path, 'r'))
                    temp = []
                    for r in rows:
                        d = {}
                        d['reference'] = r['pubmed']
                        d['protein'] = r['entry_name'].replace("__",
                                                               "_").lower()
                        d['mutation_pos'] = r['seq']
                        d['mutation_from'] = r['from_res']
                        d['mutation_to'] = r['to_res']
                        d['ligand_name'] = ''
                        d['ligand_type'] = ''
                        d['ligand_id'] = ''
                        d['ligand_class'] = ''
                        d['exp_type'] = ''
                        d['exp_func'] = ''
                        d['exp_wt_value'] = 0
                        d['exp_wt_unit'] = ''
                        d['exp_mu_effect_sign'] = ''
                        d['exp_mu_value_raw'] = 0
                        d['fold_effect'] = 0
                        d['exp_mu_effect_qual'] = ''
                        d['exp_mu_effect_ligand_prop'] = ''
                        d['exp_mu_ligand_ref'] = ''
                        d['opt_type'] = ''
                        d['opt_wt'] = 0
                        d['opt_mu'] = 0
                        d['opt_sign'] = ''
                        d['opt_percentage'] = 0
                        d['opt_qual'] = ''
                        d['opt_agonist'] = ''
                        if len(d['mutation_to']) > 1 or len(
                                d['mutation_from']
                        ) > 1:  #if something is off with amino acid
                            continue
                        temp.append(d)
                    rows = temp
                else:
                    self.logger.info('unknown format'.source_file)
                    continue

                c = 0
                skipped = 0
                inserted = 0
                for r in rows:
                    c += 1
                    if c % 1000 == 0:
                        self.logger.info('Parsed ' + str(c) +
                                         ' mutant data entries')

                    # publication
                    try:  #fix if it thinks it's float.
                        float(r['reference'])
                        r['reference'] = str(int(r['reference']))
                    except ValueError:
                        pass

                    if r['reference'].isdigit():  #assume pubmed
                        pub_type = 'pubmed'
                    else:  #assume doi
                        pub_type = 'doi'

                    try:
                        pub = Publication.objects.get(
                            web_link__index=r['reference'],
                            web_link__web_resource__slug=pub_type)
                    except Publication.DoesNotExist:
                        pub = Publication()
                        try:
                            pub.web_link = WebLink.objects.get(
                                index=r['reference'],
                                web_resource__slug=pub_type)
                        except WebLink.DoesNotExist:
                            wl = WebLink.objects.create(
                                index=r['reference'],
                                web_resource=WebResource.objects.get(
                                    slug=pub_type))
                            pub.web_link = wl

                        if pub_type == 'doi':
                            pub.update_from_doi(doi=r['reference'])
                        elif pub_type == 'pubmed':
                            pub.update_from_pubmed_data(index=r['reference'])
                        try:
                            pub.save()
                        except:
                            self.logger.error('error with reference ' +
                                              str(r['reference']) + ' ' +
                                              pub_type)
                            continue  #if something off with publication, skip.

                    if r['ligand_type'] == 'PubChem CID' or r[
                            'ligand_type'] == 'SMILES':
                        if r['ligand_type'] == 'PubChem CID':
                            pubchem_lookup_value = 'cid'
                        elif r['ligand_type'] == 'SMILES':
                            pubchem_lookup_value = 'smiles'

                        try:
                            web_resource = WebResource.objects.get(
                                slug='pubchem')
                        except:
                            # abort if pdb resource is not found
                            raise Exception(
                                'PubChem resource not found, aborting!')

                        if 'ligand_name' in r and r['ligand_name']:
                            ligand_name = str(r['ligand_name'])
                        else:
                            ligand_name = False

                        try:
                            # if this name is canonical and it has a ligand record already
                            l = Ligand.objects.get(
                                name=ligand_name,
                                canonical=True,
                                properities__web_links__web_resource=
                                web_resource,
                                properities__web_links__index=r['ligand_id'])
                        except Ligand.DoesNotExist:
                            try:
                                # if exists under different name
                                l_canonical = Ligand.objects.get(
                                    properities__web_links__web_resource=
                                    web_resource,
                                    properities__web_links__index=r[
                                        'ligand_id'],
                                    canonical=True)
                                l, created = Ligand.objects.get_or_create(
                                    properities=l_canonical.properities,
                                    name=ligand_name,
                                    canonical=False)
                                if created:
                                    self.logger.info(
                                        'Created ligand {}'.format(l.name))
                            except Ligand.DoesNotExist:
                                # fetch ligand from pubchem
                                default_ligand_type = 'Small molecule'
                                lt, created = LigandType.objects.get_or_create(
                                    slug=slugify(default_ligand_type),
                                    defaults={'name': default_ligand_type})
                                l = Ligand()
                                l = l.load_from_pubchem(
                                    pubchem_lookup_value, r['ligand_id'], lt,
                                    ligand_name)

                    elif r['ligand_name']:

                        # if this name is canonical and it has a ligand record already
                        if Ligand.objects.filter(name=r['ligand_name'],
                                                 canonical=True).exists():
                            l = Ligand.objects.get(name=r['ligand_name'],
                                                   canonical=True)

                        # if this matches an alias that only has "one" parent canonical name - eg distinct
                        elif Ligand.objects.filter(
                                name=r['ligand_name'],
                                canonical=False,
                                ambigious_alias=False).exists():
                            l = Ligand.objects.get(name=r['ligand_name'],
                                                   canonical=False,
                                                   ambigious_alias=False)

                        # if this matches an alias that only has several canonical parents, must investigate, start
                        # with empty.
                        elif Ligand.objects.filter(
                                name=r['ligand_name'],
                                canonical=False,
                                ambigious_alias=True).exists():
                            lp = LigandProperities()
                            lp.save()
                            l = Ligand()
                            l.properities = lp
                            l.name = r['ligand_name']
                            l.canonical = False
                            l.ambigious_alias = True
                            l.save()
                            l.load_by_name(r['ligand_name'])

                        # if neither a canonical or alias exists, create the records. Remember to check for
                        # canonical / alias status.
                        else:
                            lp = LigandProperities()
                            lp.save()
                            l = Ligand()
                            l.properities = lp
                            l.name = str(r['ligand_name'])
                            l.canonical = True
                            l.ambigious_alias = False
                            l.save()
                            l.load_by_name(str(r['ligand_name']))
                    else:
                        l = None

                    if Ligand.objects.filter(
                            name=r['exp_mu_ligand_ref'], canonical=True
                    ).exists(
                    ):  #if this name is canonical and it has a ligand record already
                        l_ref = Ligand.objects.get(name=r['exp_mu_ligand_ref'],
                                                   canonical=True)
                    elif Ligand.objects.filter(
                            name=r['exp_mu_ligand_ref'],
                            canonical=False,
                            ambigious_alias=False
                    ).exists(
                    ):  #if this matches an alias that only has "one" parent canonical name - eg distinct
                        l_ref = Ligand.objects.get(name=r['exp_mu_ligand_ref'],
                                                   canonical=False,
                                                   ambigious_alias=False)
                    elif Ligand.objects.filter(
                            name=r['exp_mu_ligand_ref'],
                            canonical=False,
                            ambigious_alias=True
                    ).exists(
                    ):  #if this matches an alias that only has several canonical parents, must investigate, start with empty.
                        lp = LigandProperities()
                        lp.save()
                        l_ref = Ligand()
                        l_ref.properities = lp
                        l_ref.name = r['exp_mu_ligand_ref']
                        l_ref.canonical = False
                        l_ref.ambigious_alias = True
                        l_ref.save()
                        l_ref.load_by_name(r['exp_mu_ligand_ref'])
                        l_ref.save()
                    elif r['exp_mu_ligand_ref']:  #if neither a canonical or alias exists, create the records. Remember to check for canonical / alias status.
                        lp = LigandProperities()
                        lp.save()
                        l_ref = Ligand()
                        l_ref.properities = lp
                        l_ref.name = r['exp_mu_ligand_ref']
                        l_ref.canonical = True
                        l_ref.ambigious_alias = False
                        l_ref.save()
                        l_ref.load_by_name(r['exp_mu_ligand_ref'])
                        l_ref.save()
                    else:
                        l_ref = None

                    protein_id = 0
                    residue_id = 0

                    protein = Protein.objects.filter(entry_name=r['protein'])
                    if protein.exists():
                        protein = protein.get()
                        if r['protein'] in mutants_for_proteins:
                            mutants_for_proteins[r['protein']] += 1
                        else:
                            mutants_for_proteins[r['protein']] = 1

                    else:
                        skipped += 1
                        if r['protein'] in missing_proteins:
                            missing_proteins[r['protein']] += 1
                        else:
                            missing_proteins[r['protein']] = 1
                            self.logger.error('Skipped due to no protein ' +
                                              r['protein'])
                        continue

                    res = Residue.objects.filter(
                        protein_conformation__protein=protein,
                        sequence_number=r['mutation_pos'])
                    if res.exists():
                        res = res.get()
                    else:
                        self.logger.error('Skipped due to no residue ' +
                                          r['protein'] + ' pos:' +
                                          str(r['mutation_pos']))
                        skipped += 1
                        continue

                    if r['ligand_class']:
                        l_role, created = LigandRole.objects.get_or_create(
                            name=r['ligand_class'],
                            defaults={'slug': slugify(r['ligand_class'])[:50]
                                      })  # FIXME this should not be needed
                    else:
                        l_role = None

                    if r['exp_type']:
                        exp_type_id, created = MutationExperimentalType.objects.get_or_create(
                            type=r['exp_type'])
                    else:
                        exp_type_id = None

                    if r['exp_func']:
                        exp_func_id, created = MutationFunc.objects.get_or_create(
                            func=r['exp_func'])
                    else:
                        exp_func_id = None

                    if r['exp_mu_effect_ligand_prop'] or r[
                            'exp_mu_effect_qual']:
                        exp_qual_id, created = MutationQual.objects.get_or_create(
                            qual=r['exp_mu_effect_qual'],
                            prop=r['exp_mu_effect_ligand_prop'])
                    else:
                        exp_qual_id = None

                    if r['opt_type'] or r['opt_wt'] or r['opt_mu'] or r[
                            'opt_sign'] or r['opt_percentage'] or r[
                                'opt_qual'] or r['opt_agonist']:
                        exp_opt_id, created = MutationOptional.objects.get_or_create(
                            type=r['opt_type'],
                            wt=r['opt_wt'],
                            mu=r['opt_mu'],
                            sign=r['opt_sign'],
                            percentage=r['opt_percentage'],
                            qual=r['opt_qual'],
                            agonist=r['opt_agonist'])
                    else:
                        exp_opt_id = None

                    mutation, created = Mutation.objects.get_or_create(
                        amino_acid=r['mutation_to'],
                        protein=protein,
                        residue=res)

                    logtypes = ['pEC50', 'pIC50', 'pK']

                    foldchange = 0
                    typefold = ''
                    if r['exp_wt_value'] != 0 and r[
                            'exp_mu_value_raw'] != 0:  #fix for new format

                        if re.match("(" + ")|(".join(logtypes) + ")",
                                    r['exp_type']):  #-log values!
                            foldchange = round(
                                math.pow(10, -r['exp_mu_value_raw']) /
                                pow(10, -r['exp_wt_value']), 3)
                            typefold = r['exp_type'] + "_log"
                        else:
                            foldchange = round(
                                r['exp_mu_value_raw'] / r['exp_wt_value'], 3)
                            typefold = r['exp_type'] + "_not_log"

                        if foldchange < 1 and foldchange != 0:
                            foldchange = -round((1 / foldchange), 3)
                    elif r['fold_effect'] != 0:
                        foldchange = round(r['fold_effect'], 3)
                        if foldchange < 1:
                            foldchange = -round((1 / foldchange), 3)

                    raw_experiment = self.insert_raw(r)
                    obj, created = MutationExperiment.objects.get_or_create(
                        refs=pub,
                        protein=protein,
                        residue=res,
                        ligand=l,
                        ligand_role=l_role,
                        ligand_ref=l_ref,
                        raw=raw_experiment,
                        optional=exp_opt_id,
                        exp_type=exp_type_id,
                        exp_func=exp_func_id,
                        exp_qual=exp_qual_id,
                        mutation=mutation,
                        wt_value=r['exp_wt_value'],  #
                        wt_unit=r['exp_wt_unit'],
                        mu_value=r['exp_mu_value_raw'],
                        mu_sign=r['exp_mu_effect_sign'],
                        foldchange=foldchange)
                    mut_id = obj.id
                    inserted += 1

                self.logger.info('Parsed ' + str(c) +
                                 ' mutant data entries. Skipped ' +
                                 str(skipped))

        sorted_missing_proteins = sorted(missing_proteins.items(),
                                         key=operator.itemgetter(1),
                                         reverse=True)
        sorted_mutants_for_proteins = sorted(mutants_for_proteins.items(),
                                             key=operator.itemgetter(1),
                                             reverse=True)

        self.logger.info('COMPLETED CREATING MUTANTS')

Пример #11

Показать файл

Файл: build_ligands_from_cache.py Проект: pszgaspar/protwis

    def main_func(self, positions, iteration,count,lock):

        # print(positions,iteration,count,lock)
        ligands = self.ligand_dump
        while count.value<len(ligands):
            with lock:
                l = ligands[count.value]
                count.value +=1 
                if count.value % 10000 == 0:
                    print('{} Status {} out of {}'.format(
                    datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S'), count.value, len(ligands)))

            if 'logp' not in l:
                # temp skip to only use "full" annotated ligands
                continue

            lp = LigandProperities.objects.filter(inchikey=l['inchikey']).first()
            ligand = None
            if lp:
                # Check if inchikey is there
                ligand = Ligand.objects.filter(name=l['name'], properities=lp).prefetch_related('properities__ligand_type','properities__web_links','properities__vendors').first()

            # The name with corresponding inchikey is there, assume all is good and skip.
            # Will add links to make sure they're there.
            if not ligand:
                if lp:
                    print(l['name'],'is there! (but not by name, only inchi')
                    ligand = Ligand()
                    ligand.properities = lp
                    ligand.name = l['name']
                    ligand.canonical = l['canonical']
                    ligand.ambigious_alias = l['ambigious_alias']
                    ligand.save()
                else:
                    # No ligand seems to match by inchikey -- start creating it.
                    # Make LigandProperities first
                    lt, created = LigandType.objects.get_or_create(slug=l['ligand_type__slug'],defaults = {'name':l['ligand_type__name']})
                    lp = LigandProperities()
                    lp.inchikey = l['inchikey']
                    lp.smiles = l['smiles']
                    lp.mw = l['mw']
                    lp.logp = l['logp']
                    lp.rotatable_bonds = l['rotatable_bonds']
                    lp.hacc = l['hacc']
                    lp.hdon = l['hdon']
                    lp.ligand_type = lt

                    lp.save()

                    ligand = Ligand()
                    ligand.properities = lp
                    ligand.name = l['name']
                    ligand.canonical = l['canonical']
                    ligand.ambigious_alias = l['ambigious_alias']
                    ligand.save()


            # create links - impossible to make duplicates so no need to check if there already
            if ligand.properities.web_links.count()<len(l['web_links']):
                for link in l['web_links']:
                    wr = WebResource.objects.get(slug=link['web_resource'])
                    wl, created = WebLink.objects.get_or_create(index=link['index'], web_resource=wr)
                    ligand.properities.web_links.add(wl)

            # create vendors - impossible to make duplicates so no need to check if there already
            if ligand.properities.vendors.count()<len(l['vendors']):
                for link in l['vendors']:
                    lv = LigandVendors.objects.get(slug = link['vendor_slug'])
                    check = LigandVendorLink.objects.filter(sid=link['sid']).exists()
                    if not check:
                        lvl = LigandVendorLink()
                        lvl.sid = link['sid']
                        lvl.vendor = lv
                        lvl.lp = ligand.properities
                        lvl.vendor_external_id = link['vendor_external_id']
                        lvl.url = link['url']
                        lvl.save()

Пример #12

Показать файл

Файл: build_mutant_data.py Проект: protwis/protwis

    def main_func(self, positions, iteration):
        # filenames
        if not positions[1]:
            rows = self.data[positions[0]:]
        else:
            rows = self.data[positions[0]:positions[1]]


        missing_proteins = {}
        mutants_for_proteins = {}

        c = 0
        skipped = 0
        inserted = 0
        bulk_m = []
        bulk_r = []
        current_sheet = time.time()

        for r in rows:
            # print(source_file,c)
            # PRINT IF ERRORS OCCUR
            # self.logger.info('File '+str(r['source_file'])+' number '+str(c))
            current = time.time()
            c += 1
            # if c%100==0:
            #     self.logger.info('Parsed '+str(c)+' mutant data entries')

            # publication
            try: #fix if it thinks it's float.
                float(r['reference'])
                r['reference'] = str(int(r['reference']))
                float(r['review'])
                r['review'] = str(int(r['review']))
            except ValueError:
                pass

            if r['reference'].isdigit(): #assume pubmed
                pub_type = 'pubmed'
            else: #assume doi
                pub_type = 'doi'

            if r['reference'] not in self.publication_cache:
                try:
                    pub = Publication.objects.get(web_link__index=r['reference'], web_link__web_resource__slug=pub_type)
                except Publication.DoesNotExist:
                    pub = Publication()
                    try:
                        pub.web_link = WebLink.objects.get(index=r['reference'], web_resource__slug=pub_type)
                    except WebLink.DoesNotExist:
                        wl = WebLink.objects.create(index=r['reference'],
                            web_resource = WebResource.objects.get(slug=pub_type))
                        pub.web_link = wl

                    if pub_type == 'doi':
                        pub.update_from_doi(doi=r['reference'])
                    elif pub_type == 'pubmed':
                        pub.update_from_pubmed_data(index=r['reference'])
                    try:
                        pub.save()
                    except:
                        self.logger.error('error with reference ' + str(r['reference']) + ' ' + pub_type)
                        continue #if something off with publication, skip.
                self.publication_cache[r['reference']] = pub
            else:
                pub = self.publication_cache[r['reference']]

            # print(r['review'],r['reference'])
            if r['review'].isdigit(): #assume pubmed
                pub_type = 'pubmed'
            else: #assume doi
                pub_type = 'doi'

            # print(r['review'],pub_type)
            if r['review']:
                if r['review'] not in self.publication_cache:
                    try:
                        pub_review = Publication.objects.get(web_link__index=r['review'], web_link__web_resource__slug=pub_type)
                    except Publication.DoesNotExist:
                        pub_review = Publication()
                        try:
                            pub_review.web_link = WebLink.objects.get(index=r['review'], web_resource__slug=pub_type)
                        except WebLink.DoesNotExist:
                            wl = WebLink.objects.create(index=r['review'],
                                web_resource = WebResource.objects.get(slug=pub_type))
                            pub_review.web_link = wl

                        if pub_type == 'doi':
                            pub_review.update_from_doi(doi=r['review'])
                        elif pub_type == 'pubmed':
                            pub_review.update_from_pubmed_data(index=r['review'])
                        try:
                            pub_review.save()
                        except:
                            self.logger.error('error with review ' + str(r['review']) + ' ' + pub_type)
                            continue #if something off with publication, skip.
                        self.publication_cache[r['review']] = pub_review
                else:
                    pub_review = self.publication_cache[r['review']]
            else:
                pub_review = None

            l = None
            if str(r['ligand_name']) in self.ligand_cache:
                if r['ligand_id'] in self.ligand_cache[str(r['ligand_name'])]:
                    l = self.ligand_cache[str(r['ligand_name'])][r['ligand_id']]
            else:
                self.ligand_cache[str(r['ligand_name'])] = {}

            if not l:
                l = get_or_make_ligand(r['ligand_id'],r['ligand_type'],str(r['ligand_name']))
                self.ligand_cache[str(r['ligand_name'])][r['ligand_id']] = l


            l_ref = None
            if str(r['exp_mu_ligand_ref']) in self.ref_ligand_cache:
                l_ref = self.ref_ligand_cache[str(r['exp_mu_ligand_ref'])]
            else:
                if Ligand.objects.filter(name=r['exp_mu_ligand_ref'], canonical=True).exists(): #if this name is canonical and it has a ligand record already
                    l_ref = Ligand.objects.get(name=r['exp_mu_ligand_ref'], canonical=True)
                elif Ligand.objects.filter(name=r['exp_mu_ligand_ref'], canonical=False, ambigious_alias=False).exists(): #if this matches an alias that only has "one" parent canonical name - eg distinct
                    l_ref = Ligand.objects.get(name=r['exp_mu_ligand_ref'], canonical=False, ambigious_alias=False)
                elif Ligand.objects.filter(name=r['exp_mu_ligand_ref'], canonical=False, ambigious_alias=True).exists(): #if this matches an alias that only has several canonical parents, must investigate, start with empty.
                    lp = LigandProperities()
                    lp.save()
                    l_ref = Ligand()
                    l_ref.properities = lp
                    l_ref.name = r['exp_mu_ligand_ref']
                    l_ref.canonical = False
                    l_ref.ambigious_alias = True
                    l_ref.save()
                    l_ref.load_by_name(r['exp_mu_ligand_ref'])
                    l_ref.save()
                elif Ligand.objects.filter(name=r['exp_mu_ligand_ref'], canonical=False).exists(): #amigious_alias not specified
                    l_ref = Ligand.objects.get(name=r['exp_mu_ligand_ref'], canonical=False)
                    l_ref.ambigious_alias = False
                    l_ref.save()
                elif r['exp_mu_ligand_ref']: #if neither a canonical or alias exists, create the records. Remember to check for canonical / alias status.
                    lp = LigandProperities()
                    lp.save()
                    l_ref = Ligand()
                    l_ref.properities = lp
                    l_ref.name = r['exp_mu_ligand_ref']
                    l_ref.canonical = True
                    l_ref.ambigious_alias = False
                    l_ref.save()
                    l_ref.load_by_name(r['exp_mu_ligand_ref'])
                    try:
                        l_ref.save()
                    except IntegrityError:
                        l_ref = Ligand.objects.get(name=r['exp_mu_ligand_ref'], canonical=False)
                        print("error failing ligand, duplicate?")
                        # logger.error("FAILED SAVING LIGAND, duplicate?")
                else:
                    l_ref = None
                self.ref_ligand_cache[str(r['exp_mu_ligand_ref'])] = l_ref


            protein_id = 0
            residue_id = 0

            protein=Protein.objects.filter(entry_name=r['protein'])
            if protein.exists():
                protein=protein.get()
                if r['protein'] in mutants_for_proteins:
                    mutants_for_proteins[r['protein']] += 1
                else:
                    mutants_for_proteins[r['protein']] = 1

            else:
                skipped += 1
                if r['protein'] in missing_proteins:
                    missing_proteins[r['protein']] += 1
                else:
                    missing_proteins[r['protein']] = 1
                    self.logger.error('Skipped due to no protein '+ r['protein'])
                continue

            res=Residue.objects.filter(protein_conformation__protein=protein,amino_acid=r['mutation_from'],sequence_number=r['mutation_pos']) #FIXME MAKE AA CHECK
            if res.exists():
                res=res.get()
            else:
                self.logger.error('Skipped due to no residue or mismatch AA ' + r['protein'] + ' pos:'+str(r['mutation_pos']) + ' AA:'+r['mutation_from'])
                skipped += 1
                continue

            if r['ligand_class']:
                l_role, created = LigandRole.objects.get_or_create(name=r['ligand_class'],
                    defaults={'slug': slugify(r['ligand_class'])[:50]}) # FIXME this should not be needed
            else:
                l_role = None

            if r['exp_type']:
                exp_type_id, created = MutationExperimentalType.objects.get_or_create(type=r['exp_type'])
            else:
                exp_type_id = None

            if r['exp_func']:
                exp_func_id, created = MutationFunc.objects.get_or_create(func=r['exp_func'])
            else:
                exp_func_id = None

            if r['exp_mu_effect_ligand_prop'] or r['exp_mu_effect_qual']:
                exp_qual_id, created = MutationQual.objects.get_or_create(qual=r['exp_mu_effect_qual'], prop=r['exp_mu_effect_ligand_prop'])
            else:
                exp_qual_id = None

            if r['opt_type'] or r['opt_wt'] or r['opt_mu'] or r['opt_sign'] or r['opt_percentage'] or r['opt_qual'] or r['opt_agonist']:
                exp_opt_id, created =  MutationOptional.objects.get_or_create(type=r['opt_type'], wt=r['opt_wt'], mu=r['opt_mu'], sign=r['opt_sign'], percentage=r['opt_percentage'], qual=r['opt_qual'], agonist=r['opt_agonist'])
            else:
                exp_opt_id = None

            try:
                mutation, created =  Mutation.objects.get_or_create(amino_acid=r['mutation_to'],protein=protein, residue=res)
            except IntegrityError:
                mutation = Mutation.objects.get(amino_acid=r['mutation_to'],protein=protein, residue=res)
            logtypes = ['pEC50','pIC50','pK']

            foldchange = 0
            typefold = ''
            if r['exp_wt_value']!=0 and r['exp_mu_value_raw']!=0: #fix for new format

                if re.match("(" + ")|(".join(logtypes) + ")", r['exp_type']):  #-log values!
                    foldchange = round(math.pow(10,-r['exp_mu_value_raw'])/pow(10,-r['exp_wt_value']),3);
                    typefold = r['exp_type']+"_log"
                else:
                    foldchange = round(r['exp_mu_value_raw']/r['exp_wt_value'],3);
                    typefold = r['exp_type']+"_not_log"


                if foldchange<1 and foldchange!=0:
                    foldchange = -round((1/foldchange),3)
            elif r['fold_effect']!=0:
                    foldchange = round(r['fold_effect'],3);
                    if foldchange<1: foldchange = -round((1/foldchange),3);


            raw_experiment = self.insert_raw(r)
            bulk = MutationExperiment(
            refs=pub,
            review=pub_review,
            protein=protein,
            residue=res,
            ligand=l,
            ligand_role=l_role,
            ligand_ref = l_ref,
            #raw = raw_experiment, #raw_experiment, OR None
            optional = exp_opt_id,
            exp_type=exp_type_id,
            exp_func=exp_func_id,
            exp_qual = exp_qual_id,

            mutation=mutation,
            wt_value=r['exp_wt_value'], #
            wt_unit=r['exp_wt_unit'],

            mu_value = r['exp_mu_value_raw'],
            mu_sign = r['exp_mu_effect_sign'],
            foldchange = foldchange
            )
            # mut_id = obj.id
            bulk_r.append(raw_experiment)
            bulk_m.append(bulk)
            inserted += 1
            end = time.time()
            diff = round(end - current,2)
            #print(diff)

        self.logger.info('Parsed '+str(c)+' mutant data entries. Skipped '+str(skipped))

        current = time.time()

        raws = MutationRaw.objects.bulk_create(bulk_r)
        for i,me in enumerate(bulk_m):
            me.raw = raws[i]
        MutationExperiment.objects.bulk_create(bulk_m)
        end = time.time()
        diff = round(end - current,2)
        current_sheet
        diff_2 = round(end - current_sheet,2)
        print("overall",diff_2,"bulk",diff,len(bulk_m),"skipped",str(skipped))

Пример #13

Показать файл

Файл: build_mutant_data.py Проект: pszgaspar/protwis

    def main_func(self, positions, iteration,count,lock):
        # filenames
        # if not positions[1]:
        #     rows = self.data[positions[0]:]
        # else:
        #     rows = self.data[positions[0]:positions[1]]


        missing_proteins = {}
        mutants_for_proteins = {}
        wrong_uniport_ids = {}

        c = 0
        skipped = 0
        inserted = 0
        bulk_m = []
        bulk_r = []
        current_sheet = time.time()

        rows = self.data_all
        while count.value<len(rows):
            with lock:
                r = rows[count.value]
                count.value +=1 
        # for r in rows:
            # print(r['source_file'],c)
            # PRINT IF ERRORS OCCUR
            #self.logger.info('File '+str(r['source_file'])+' number '+str(c))
            current = time.time()
            c += 1
            # if c%100==0:
            #     self.logger.info('Parsed '+str(c)+' mutant data entries')

            # publication
            try: #fix if it thinks it's float.
                float(r['reference'])
                r['reference'] = str(int(r['reference']))
                float(r['review'])
                r['review'] = str(int(r['review']))
            except ValueError:
                pass

            if r['reference'].isdigit(): #assume pubmed
                pub_type = 'pubmed'
            else: #assume doi
                pub_type = 'doi'

            if r['reference'] not in self.publication_cache:
                try:
                    wl = WebLink.objects.get(index=r['reference'], web_resource__slug=pub_type)
                except WebLink.DoesNotExist:
                    try:
                        wl = WebLink.objects.create(index=r['reference'],
                                web_resource = WebResource.objects.get(slug=pub_type))
                    except IntegrityError:
                        wl = WebLink.objects.get(index=r['reference'], web_resource__slug=pub_type)


                try:
                    pub = Publication.objects.get(web_link=wl)
                except Publication.DoesNotExist:
                    pub = Publication()
                    try:
                        pub.web_link = wl
                        pub.save()
                    except IntegrityError:
                        pub = Publication.objects.get(web_link=wl)


                    if pub_type == 'doi':
                        pub.update_from_doi(doi=r['reference'])
                    elif pub_type == 'pubmed':
                        pub.update_from_pubmed_data(index=r['reference'])
                    try:
                        pub.save()
                    except:
                        self.logger.error('error with reference ' + str(r['reference']) + ' ' + pub_type)
                        continue #if something off with publication, skip.
                self.publication_cache[r['reference']] = pub
            else:
                pub = self.publication_cache[r['reference']]

            # print(r['review'],r['reference'])
            if r['review'].isdigit(): #assume pubmed
                pub_type = 'pubmed'
            elif r['review'].startswith('http'):
                pub_type = 'raw_link'
            else: #assume doi
                pub_type = 'doi'

            # print(r['review'],pub_type)
            if r['review']:
                if r['review'] not in self.publication_cache:
                    try:
                        wl = WebLink.objects.get(index=r['review'], web_resource__slug=pub_type)
                    except WebLink.DoesNotExist:
                        try:
                            wl = WebLink.objects.create(index=r['review'],
                                    web_resource = WebResource.objects.get(slug=pub_type))
                        except IntegrityError:
                            wl = WebLink.objects.get(index=r['review'], web_resource__slug=pub_type)

                    try:
                        pub_review = Publication.objects.get(web_link=wl)
                    except Publication.DoesNotExist:
                        pub_review = Publication()
                        try:
                            pub_review.web_link = wl
                            pub_review.save()
                        except IntegrityError:
                            pub_review = Publication.objects.get(web_link=wl)


                        if pub_type == 'doi':
                            pub_review.update_from_doi(doi=r['review'])
                        elif pub_type == 'pubmed':
                            pub_review.update_from_pubmed_data(index=r['review'])
                        try:
                            pub_review.save()
                        except:
                            self.logger.error('error with review ' + str(r['review']) + ' ' + pub_type)
                            continue #if something off with publication, skip.
                        self.publication_cache[r['review']] = pub_review
                else:
                    pub_review = self.publication_cache[r['review']]
            else:
                pub_review = None

            l = None
            if str(r['ligand_name']) in self.ligand_cache:
                if r['ligand_id'] in self.ligand_cache[str(r['ligand_name'])]:
                    l = self.ligand_cache[str(r['ligand_name'])][r['ligand_id']]
            else:
                self.ligand_cache[str(r['ligand_name'])] = {}

            if not l:
                try:
                    l = get_or_make_ligand(r['ligand_id'],r['ligand_type'],str(r['ligand_name']))
                except Exception as msg:
                    print('Something errored with ligand, aborting entry of mutation',r['ligand_name'],r['ligand_type'],r['ligand_id'],r['source_file'])
                    print(msg)
                    traceback.print_exc()
                    continue
                self.ligand_cache[str(r['ligand_name'])][r['ligand_id']] = l


            l_ref = None
            if str(r['exp_mu_ligand_ref']) in self.ref_ligand_cache:
                l_ref = self.ref_ligand_cache[str(r['exp_mu_ligand_ref'])]
            else:
                if Ligand.objects.filter(name=r['exp_mu_ligand_ref'], canonical=True).exists(): #if this name is canonical and it has a ligand record already
                    l_ref = Ligand.objects.get(name=r['exp_mu_ligand_ref'], canonical=True)
                elif Ligand.objects.filter(name=r['exp_mu_ligand_ref'], canonical=False, ambigious_alias=False).exists(): #if this matches an alias that only has "one" parent canonical name - eg distinct
                    l_ref = Ligand.objects.get(name=r['exp_mu_ligand_ref'], canonical=False, ambigious_alias=False)
                elif Ligand.objects.filter(name=r['exp_mu_ligand_ref'], canonical=False, ambigious_alias=True).exists(): #if this matches an alias that only has several canonical parents, must investigate, start with empty.
                    lp = LigandProperities()
                    lp.save()
                    l_ref = Ligand()
                    l_ref.properities = lp
                    l_ref.name = r['exp_mu_ligand_ref']
                    l_ref.canonical = False
                    l_ref.ambigious_alias = True
                    l_ref.save()
                    l_ref.load_by_name(r['exp_mu_ligand_ref'])
                    l_ref.save()
                elif Ligand.objects.filter(name=r['exp_mu_ligand_ref'], canonical=False).exists(): #amigious_alias not specified
                    l_ref = Ligand.objects.get(name=r['exp_mu_ligand_ref'], canonical=False)
                    l_ref.ambigious_alias = False
                    l_ref.save()
                elif r['exp_mu_ligand_ref']: #if neither a canonical or alias exists, create the records. Remember to check for canonical / alias status.
                    lp = LigandProperities()
                    lp.save()
                    l_ref = Ligand()
                    l_ref.properities = lp
                    l_ref.name = r['exp_mu_ligand_ref']
                    l_ref.canonical = True
                    l_ref.ambigious_alias = False
                    try:
                        l_ref.save()
                        l_ref.load_by_name(r['exp_mu_ligand_ref'])
                    except IntegrityError:
                        if Ligand.objects.filter(name=r['exp_mu_ligand_ref'], canonical=True).exists():
                            l_ref = Ligand.objects.get(name=r['exp_mu_ligand_ref'], canonical=True)
                        else:
                            l_ref = Ligand.objects.get(name=r['exp_mu_ligand_ref'], canonical=False)
                        # print("error failing ligand, duplicate?")
                    try:
                        l_ref.save()
                    except IntegrityError:
                        l_ref = Ligand.objects.get(name=r['exp_mu_ligand_ref'], canonical=False)
                        # print("error failing ligand, duplicate?")
                        # logger.error("FAILED SAVING LIGAND, duplicate?")
                else:
                    l_ref = None
                self.ref_ligand_cache[str(r['exp_mu_ligand_ref'])] = l_ref


            protein_id = 0
            residue_id = 0

            protein=Protein.objects.filter(entry_name=r['protein'])
            if protein.exists():
                protein=protein.get()
                if r['protein'] in mutants_for_proteins:
                    mutants_for_proteins[r['protein']] += 1
                else:
                    mutants_for_proteins[r['protein']] = 1

            elif r['protein'] not in missing_proteins:

                try:
                    r['protein'] = wrong_uniport_ids[r['protein']]
                    real_uniprot = wrong_uniport_ids[r['protein']]
                    protein=Protein.objects.get(entry_name=r['protein'])
                    # print('fetched with lookup table',r['protein'])
                except:
                    # look for it as uniprot
                    protein=Protein.objects.filter(web_links__web_resource__slug='uniprot', web_links__index=r['protein'].upper())
                    if protein.exists():
                        protein=protein.get()
                        real_uniprot = protein.entry_name
                        if r['protein'] in mutants_for_proteins:
                            mutants_for_proteins[r['protein']] += 1
                        else:
                            mutants_for_proteins[r['protein']] = 1
                    else:
                        # Try to lookup in uniprot to catch typing errors / variants in entry_name
                        url = 'http://www.uniprot.org/uniprot/$index.xml'
                        cache_dir = ['uniprot', 'id']
                        uniprot_protein = fetch_from_web_api(url, r['protein'], cache_dir, xml = True)
                        try:
                            real_uniprot = uniprot_protein.find('.//{http://uniprot.org/uniprot}name').text.lower()
                            protein=Protein.objects.get(entry_name=real_uniprot)
                        except:
                            skipped += 1
                            if r['protein'] in missing_proteins:
                                missing_proteins[r['protein']] += 1
                            else:
                                missing_proteins[r['protein']] = 1
                                # print('Skipped due to no protein '+ r['protein'])
                                self.logger.error('Skipped due to no protein '+ r['protein'])
                            continue
                    wrong_uniport_ids[r['protein']] = protein.entry_name
                    r['protein'] = real_uniprot
            else:
                missing_proteins[r['protein']] += 1
                continue


            res=Residue.objects.filter(protein_conformation__protein=protein,amino_acid=r['mutation_from'],sequence_number=r['mutation_pos']) #FIXME MAKE AA CHECK
            if res.exists():
                res=res.get()
            else:
                self.logger.error('Skipped due to no residue or mismatch AA ' + r['protein'] + ' pos:'+str(r['mutation_pos']) + ' AA:'+r['mutation_from'])
                # print('Skipped due to no residue or mismatch AA ' + r['protein'] + ' pos:'+str(r['mutation_pos']) + ' AA:'+r['mutation_from'],r['source_file'])
                skipped += 1
                continue

            if r['ligand_class']:
                try:
                    l_role, created = LigandRole.objects.get_or_create(name=r['ligand_class'],
                        defaults={'slug': slugify(r['ligand_class'])[:50]}) # FIXME this should not be needed
                except Exception as e:
                    if LigandRole.objects.filter(slug=slugify(r['ligand_class'])[:50]).exists():
                        l_role = LigandRole.objects.get(slug=slugify(r['ligand_class'])[:50])
                        if l_role.name == slugify(r['ligand_class'])[:50]:
                            #if name of role is same as slug, then it was created by constructs script, replace it
                            l_role.name = r['ligand_class']
                            l_role.save()
                    else:
                        print(e)
                        print("Error with",r['ligand_class'],slugify(r['ligand_class'])[:50] )
                        l_role, created = LigandRole.objects.get_or_create(slug=slugify(r['ligand_class'])[:50]) # FIXME this should not be needed
            else:
                l_role = None

            if r['exp_type']:
                exp_type_id, created = MutationExperimentalType.objects.get_or_create(type=r['exp_type'])
            else:
                exp_type_id = None

            if r['exp_func']:
                exp_func_id, created = MutationFunc.objects.get_or_create(func=r['exp_func'])
            else:
                exp_func_id = None

            if r['exp_mu_effect_ligand_prop'] or r['exp_mu_effect_qual']:
                exp_qual_id, created = MutationQual.objects.get_or_create(qual=r['exp_mu_effect_qual'], prop=r['exp_mu_effect_ligand_prop'])
            else:
                exp_qual_id = None

            # if r['opt_type'] or r['opt_wt'] or r['opt_mu'] or r['opt_sign'] or r['opt_percentage'] or r['opt_qual'] or r['opt_agonist']:
            #     exp_opt_id, created =  MutationOptional.objects.get_or_create(type=r['opt_type'], wt=r['opt_wt'], mu=r['opt_mu'], sign=r['opt_sign'], percentage=r['opt_percentage'], qual=r['opt_qual'], agonist=r['opt_agonist'])
            # else:
            #     exp_opt_id = None

            try:
                mutation, created =  Mutation.objects.get_or_create(amino_acid=r['mutation_to'],protein=protein, residue=res)
            except IntegrityError:
                mutation = Mutation.objects.get(amino_acid=r['mutation_to'],protein=protein, residue=res)
            logtypes = ['pEC50','pIC50','pK']

            foldchange = 0
            typefold = ''
            if r['exp_wt_value']!=0 and r['exp_mu_value_raw']!=0: #fix for new format
                if re.match("(" + ")|(".join(logtypes) + ")", r['exp_type']):  #-log values!
                    try:
                        foldchange = round(math.pow(10,-r['exp_mu_value_raw'])/pow(10,-r['exp_wt_value']),3);
                    except:
                        print(r)
                    typefold = r['exp_type']+"_log"
                elif "%"==r['exp_wt_unit']:
                    # if % then it's a difference case, then lower value is bad. Otherwise it's conc and lower is better
                    foldchange = round(r['exp_wt_value']/r['exp_mu_value_raw'],3);
                else:
                    foldchange = round(r['exp_mu_value_raw']/r['exp_wt_value'],3);
                    typefold = r['exp_type']+"_not_log"
                if foldchange>0 and foldchange<1 and foldchange!=0:
                    foldchange = -round((1/foldchange),3)
            elif r['fold_effect']!=0:
                    foldchange = round(r['fold_effect'],3);
                    if foldchange<1: foldchange = -round((1/foldchange),3);
            r['fold_effect'] = foldchange
            
            raw_experiment = self.insert_raw(r)
            # raw_experiment.save()
            bulk = MutationExperiment(
            refs=pub,
            review=pub_review,
            submitting_group = r['submitting_group'],
            data_container = r['data_container'],
            data_container_number = r['data_container_number'],
            protein=protein,
            residue=res,
            ligand=l,
            ligand_role=l_role,
            ligand_ref = l_ref,
            # raw = raw_experiment, #raw_experiment, OR None
            # optional = exp_opt_id,
            exp_type=exp_type_id,
            exp_func=exp_func_id,
            exp_qual = exp_qual_id,

            mutation=mutation,
            wt_value=r['exp_wt_value'], #
            wt_unit=r['exp_wt_unit'],

            mu_value = r['exp_mu_value_raw'],
            mu_sign = r['exp_mu_effect_sign'],
            foldchange = foldchange,
            opt_receptor_expression = r['opt_receptor_expression'],
            opt_basal_activity = r['opt_basal_activity'],
            opt_gain_of_activity = r['opt_gain_of_activity'],
            opt_ligand_emax = r['opt_ligand_emax'],
            opt_agonist =  r['opt_agonist'],
            )
            # for line,val in r.items():
            #     val = str(val)
            #     if len(val)>100:
            #         print(line,"too long",val)
            # mut_id = obj.id
            bulk_r.append(raw_experiment)
            bulk_m.append(bulk)
            # try:
            #     bulk.save()
            # except Exception as e:
            #     print(e)
            #     print(r)
            #     break
            #print('saved ',r['source_file'])
            inserted += 1
            end = time.time()
            diff = round(end - current,2)
            #print(diff)

        self.logger.info('Parsed '+str(c)+' mutant data entries. Skipped '+str(skipped))

        current = time.time()

        raws = MutationRaw.objects.bulk_create(bulk_r)
        for i,me in enumerate(bulk_m):
            me.raw = raws[i]
        MutationExperiment.objects.bulk_create(bulk_m)
        end = time.time()
        diff = round(end - current,2)
        current_sheet
        diff_2 = round(end - current_sheet,2)
        print("overall",diff_2,"bulk",diff,len(bulk_m),"skipped",str(skipped))
        sorted_missing_proteins = sorted(missing_proteins.items(), key=operator.itemgetter(1),reverse=True)