Пример #1
0
 def build_ligand_properties(self):
     lp = LigandProperities()
     lt =  LigandType.objects.get(name = 'small molecule')
     lp.ligand_type = lt
     lp.smiles = None
     lp.inchikey = None
     lp.sequence= None
     lp.mw = None
     lp.rotatable_bonds = None
     lp.hacc = None
     lp.hdon = None
     lp.logp = None
     lp.save()
     self.logger.info("Could not create ligand, empty is returned")
     return lp
Пример #2
0
def get_or_make_ligand(ligand_id, type_id, name=None):
    if type_id == 'PubChem CID' or type_id == 'SMILES':
        if type_id == 'PubChem CID':
            pubchem_lookup_value = 'cid'
        elif type_id == 'SMILES':
            pubchem_lookup_value = 'smiles'

        try:
            web_resource = WebResource.objects.get(slug='pubchem')
        except:
            # abort if pdb resource is not found
            raise Exception('PubChem resource not found, aborting!')
        if name:
            ligand_name = name
        else:
            ligand_name = False

        try:
            # if this name is canonical and it has a ligand record already
            if (ligand_name == False):

                l = None
                ls = Ligand.objects.filter(
                    canonical=True,
                    properities__web_links__web_resource=web_resource,
                    properities__web_links__index=ligand_id)

                for ligand in ls:
                    l = ligand
                    #print (l)
                    break
                if l == None:
                    l = Ligand.objects.get(
                        canonical=True,
                        properities__web_links__web_resource=web_resource,
                        properities__web_links__index=ligand_id)

            else:
                l = Ligand.objects.get(
                    name=ligand_name,
                    canonical=True,
                    properities__web_links__web_resource=web_resource,
                    properities__web_links__index=ligand_id)

            #l = Ligand.objects.get(name=ligand_name, canonical=True,
            #    properities__web_links__web_resource=web_resource,
            #    properities__web_links__index=ligand_id)
            #
        except Ligand.DoesNotExist:
            try:
                # if exists under different name
                l_canonical = Ligand.objects.get(
                    properities__web_links__web_resource=web_resource,
                    properities__web_links__index=ligand_id,
                    canonical=True)
                #print (created)
                try:
                    l, created = Ligand.objects.get_or_create(
                        properities=l_canonical.properities,
                        name=ligand_name,
                        canonical=False)
                except IntegrityError:
                    l = Ligand.objects.get(properities=l_canonical.properities,
                                           name=ligand_name,
                                           canonical=False)
            except Ligand.DoesNotExist:
                # fetch ligand from pubchem
                default_ligand_type = 'Small molecule'
                lt, created = LigandType.objects.get_or_create(
                    slug=slugify(default_ligand_type),
                    defaults={'name': default_ligand_type})
                l = Ligand()
                #print (ligand_name)
                l = l.load_from_pubchem(pubchem_lookup_value, ligand_id, lt,
                                        ligand_name)
                #print (l)
                if l == None and type_id == 'SMILES':  #insert manually if smiles and unfound in pubchem
                    try:
                        l = Ligand.objects.get(name=ligand_name,
                                               canonical=True,
                                               properities__smiles=ligand_id)
                    except Ligand.DoesNotExist:
                        try:
                            l = Ligand.objects.get(
                                name__startswith=ligand_name,
                                canonical=True,
                                properities__smiles=ligand_id
                            )  #if no properities exist
                        except Ligand.DoesNotExist:
                            try:
                                l = Ligand.objects.get(
                                    name=ligand_name,
                                    canonical=True,
                                    properities__smiles=None
                                )  #if no properities exist
                                l.properities.smiles = ligand_id
                                l.properities.save()
                                l.save()
                            except Ligand.DoesNotExist:
                                ## now insert a new ligand, but first make sure name is unique
                                if Ligand.objects.filter(
                                        name=ligand_name).exists():
                                    ls = Ligand.objects.filter(
                                        name__startswith=ligand_name,
                                        canonical=True).order_by("pk")
                                    for l_temp in ls:
                                        last = l_temp.name.split("_")[-1]
                                    if last == ligand_name:  #no addition yet
                                        ligand_name = ligand_name + "_1"
                                    else:
                                        ligand_name = ligand_name + "_" + str(
                                            int(last) + 1)
                                l = Ligand()
                                l.name = ligand_name
                                lp = LigandProperities()
                                lp.smiles = ligand_id
                                lp.ligand_type = lt
                                lp.save()
                                l.properities = lp
                                l.canonical = True  #maybe false, but that would break stuff.
                                l.ambigious_alias = False
                                try:
                                    l.save()
                                except IntegrityError:
                                    l = Ligand.objects.get(name=ligand_name,
                                                           canonical=True)

    elif name:

        # if this name is canonical and it has a ligand record already
        if Ligand.objects.filter(name=name, canonical=True).exists():
            l = Ligand.objects.get(name=name, canonical=True)

        # if this matches an alias that only has "one" parent canonical name - eg distinct
        elif Ligand.objects.filter(name=name,
                                   canonical=False,
                                   ambigious_alias=False).exists():
            l = Ligand.objects.get(name=name,
                                   canonical=False,
                                   ambigious_alias=False)

        # if this matches an alias that only has several canonical parents, must investigate, start
        # with empty.
        elif Ligand.objects.filter(name=name,
                                   canonical=False,
                                   ambigious_alias=True).exists():
            lp = LigandProperities()
            lp.save()
            l = Ligand()
            l.properities = lp
            l.name = name
            l.canonical = False
            l.ambigious_alias = True
            l.save()
            l.load_by_name(name)

        # if neither a canonical or alias exists, create the records. Remember to check for
        # canonical / alias status.
        else:
            lp = LigandProperities()
            lp.save()
            l = Ligand()
            l.properities = lp
            l.name = str(name)
            l.canonical = True
            l.ambigious_alias = False
            try:
                l.save()
                l.load_by_name(str(name))
            except IntegrityError:
                l = Ligand.objects.get(name=str(name), canonical=True)
    else:
        l = None

    return l
Пример #3
0
    def create_mutant_data(self, filenames):
        self.logger.info('CREATING MUTANT DATA')
        
        # what files should be parsed?
        if not filenames:
            filenames = os.listdir(self.structure_data_dir)

        missing_proteins = {}
        mutants_for_proteins = {}

        for source_file in filenames:
            source_file_path = os.sep.join([self.structure_data_dir, source_file])
            if os.path.isfile(source_file_path) and source_file[0] != '.':
                self.logger.info('Reading file {}'.format(source_file_path))
                # read the yaml file

                if source_file[-4:]=='xlsx' or source_file[-3:]=='xls':
                    rows = self.loaddatafromexcel(source_file_path)
                    rows = self.analyse_rows(rows)
                elif source_file[-4:]=='yaml':
                    rows = yaml.load(open(source_file_path, 'r'))
                    temp = []
                    for r in rows:
                        d = {}
                        d['reference'] = r['pubmed']
                        d['protein'] = r['entry_name'].replace("__","_").lower()
                        d['mutation_pos'] = r['seq']
                        d['mutation_from'] = r['from_res']
                        d['mutation_to'] = r['to_res']
                        d['ligand_name'] = ''
                        d['ligand_type'] = ''
                        d['ligand_id'] = ''
                        d['ligand_class'] = ''
                        d['exp_type'] = ''
                        d['exp_func'] = ''
                        d['exp_wt_value'] = 0
                        d['exp_wt_unit'] = ''
                        d['exp_mu_effect_sign'] = ''
                        d['exp_mu_value_raw'] = 0
                        d['fold_effect'] = 0
                        d['exp_mu_effect_qual'] = ''
                        d['exp_mu_effect_ligand_prop'] = ''
                        d['exp_mu_ligand_ref'] = ''
                        d['opt_type'] = ''
                        d['opt_wt'] = 0
                        d['opt_mu'] = 0
                        d['opt_sign'] = ''
                        d['opt_percentage'] = 0
                        d['opt_qual'] = ''
                        d['opt_agonist'] = ''
                        if len(d['mutation_to'])>1 or len(d['mutation_from'])>1: #if something is off with amino acid
                            continue
                        temp.append(d)
                    rows = temp
                else:
                    self.logger.info('unknown format'.source_file)
                    continue

                c = 0
                skipped = 0
                inserted = 0
                for r in rows:
                    c += 1
                    if c%1000==0: 
                        self.logger.info('Parsed '+str(c)+' mutant data entries')

                    # publication
                    try: #fix if it thinks it's float.
                        float(r['reference'])
                        r['reference'] = str(int(r['reference']))
                    except ValueError:
                        pass

                    if r['reference'].isdigit(): #assume pubmed
                        pub_type = 'pubmed'
                    else: #assume doi
                        pub_type = 'doi'

                    try:
                        pub = Publication.objects.get(web_link__index=r['reference'], web_link__web_resource__slug=pub_type)
                    except Publication.DoesNotExist:
                        pub = Publication()
                        try:
                            pub.web_link = WebLink.objects.get(index=r['reference'], web_resource__slug=pub_type)
                        except WebLink.DoesNotExist:
                            wl = WebLink.objects.create(index=r['reference'],
                                web_resource = WebResource.objects.get(slug=pub_type))
                            pub.web_link = wl

                        if pub_type == 'doi':
                            pub.update_from_doi(doi=r['reference'])
                        elif pub_type == 'pubmed':
                            pub.update_from_pubmed_data(index=r['reference'])
                        try:
                            pub.save()
                        except:
                            self.logger.error('error with reference ' + str(r['reference']) + ' ' + pub_type)
                            continue #if something off with publication, skip.

                    if r['ligand_type']=='PubChem CID' or r['ligand_type']=='SMILES':
                        if r['ligand_type']=='PubChem CID':
                            pubchem_lookup_value = 'cid'
                        elif r['ligand_type']=='SMILES':
                            pubchem_lookup_value = 'smiles'

                        try:
                            web_resource = WebResource.objects.get(slug='pubchem')
                        except:
                            # abort if pdb resource is not found
                            raise Exception('PubChem resource not found, aborting!')

                        if 'ligand_name' in r and r['ligand_name']:
                            ligand_name = str(r['ligand_name'])
                        else:
                            ligand_name = False

                        try:
                            # if this name is canonical and it has a ligand record already
                            l = Ligand.objects.get(name=ligand_name, canonical=True,
                                properities__web_links__web_resource=web_resource,
                                properities__web_links__index=r['ligand_id'])
                        except Ligand.DoesNotExist:
                            try:
                                # if exists under different name
                                l_canonical = Ligand.objects.get(properities__web_links__web_resource=web_resource,
                                    properities__web_links__index=r['ligand_id'], canonical=True)
                                l, created = Ligand.objects.get_or_create(properities = l_canonical.properities,
                                    name = ligand_name, canonical = False)
                                if created:
                                    self.logger.info('Created ligand {}'.format(l.name))
                            except Ligand.DoesNotExist:
                                # fetch ligand from pubchem
                                default_ligand_type = 'Small molecule'
                                lt, created = LigandType.objects.get_or_create(slug=slugify(default_ligand_type),
                                    defaults={'name': default_ligand_type})
                                l = Ligand()
                                l = l.load_from_pubchem(pubchem_lookup_value, r['ligand_id'], lt, ligand_name)
                                if l == None and r['ligand_type']=='SMILES': #insert manually if smiles and unfound in pubchem
                                    try:
                                        l = Ligand.objects.get(name=ligand_name, canonical=True,
                                                                properities__smiles=r['ligand_id'])
                                    except Ligand.DoesNotExist:
                                        l = Ligand()
                                        l.name = ligand_name
                                        lp = LigandProperities()
                                        lp.smiles = r['ligand_id']
                                        lp.ligand_type = lt
                                        lp.save()
                                        l.properities = lp
                                        l.canonical = True #maybe false, but that would break stuff.
                                        l.ambigious_alias = False
                                        l.save()
                                        self.logger.info('Created Ligand {} manually'.format(l.name))
                        
                    elif r['ligand_name']:
                        
                        # if this name is canonical and it has a ligand record already
                        if Ligand.objects.filter(name=r['ligand_name'], canonical=True).exists():
                            l = Ligand.objects.get(name=r['ligand_name'], canonical=True)
                        
                        # if this matches an alias that only has "one" parent canonical name - eg distinct
                        elif Ligand.objects.filter(name=r['ligand_name'], canonical=False,
                            ambigious_alias=False).exists():
                            l = Ligand.objects.get(name=r['ligand_name'], canonical=False, ambigious_alias=False)
                        
                        # if this matches an alias that only has several canonical parents, must investigate, start
                        # with empty.
                        elif Ligand.objects.filter(name=r['ligand_name'], canonical=False,
                            ambigious_alias=True).exists():
                            lp = LigandProperities()
                            lp.save()
                            l = Ligand()
                            l.properities = lp
                            l.name = r['ligand_name']
                            l.canonical = False
                            l.ambigious_alias = True
                            l.save()
                            l.load_by_name(r['ligand_name'])
                        
                        # if neither a canonical or alias exists, create the records. Remember to check for
                        # canonical / alias status.
                        else:
                            lp = LigandProperities()
                            lp.save()
                            l = Ligand()
                            l.properities = lp
                            l.name = str(r['ligand_name'])
                            l.canonical = True
                            l.ambigious_alias = False
                            l.save()
                            l.load_by_name(str(r['ligand_name']))
                    else:
                        l = None

                    if Ligand.objects.filter(name=r['exp_mu_ligand_ref'], canonical=True).exists(): #if this name is canonical and it has a ligand record already
                        l_ref = Ligand.objects.get(name=r['exp_mu_ligand_ref'], canonical=True)
                    elif Ligand.objects.filter(name=r['exp_mu_ligand_ref'], canonical=False, ambigious_alias=False).exists(): #if this matches an alias that only has "one" parent canonical name - eg distinct
                        l_ref = Ligand.objects.get(name=r['exp_mu_ligand_ref'], canonical=False, ambigious_alias=False)
                    elif Ligand.objects.filter(name=r['exp_mu_ligand_ref'], canonical=False, ambigious_alias=True).exists(): #if this matches an alias that only has several canonical parents, must investigate, start with empty.
                        lp = LigandProperities()
                        lp.save()
                        l_ref = Ligand()
                        l_ref.properities = lp
                        l_ref.name = r['exp_mu_ligand_ref']
                        l_ref.canonical = False
                        l_ref.ambigious_alias = True
                        l_ref.save()
                        l_ref.load_by_name(r['exp_mu_ligand_ref'])
                        l_ref.save()
                    elif r['exp_mu_ligand_ref']: #if neither a canonical or alias exists, create the records. Remember to check for canonical / alias status.
                        lp = LigandProperities()
                        lp.save()
                        l_ref = Ligand()
                        l_ref.properities = lp
                        l_ref.name = r['exp_mu_ligand_ref']
                        l_ref.canonical = True
                        l_ref.ambigious_alias = False
                        l_ref.save()
                        l_ref.load_by_name(r['exp_mu_ligand_ref'])
                        l_ref.save()
                    else:
                        l_ref = None

                    protein_id = 0
                    residue_id = 0

                    protein=Protein.objects.filter(entry_name=r['protein'])
                    if protein.exists():
                        protein=protein.get()
                        if r['protein'] in mutants_for_proteins:
                            mutants_for_proteins[r['protein']] += 1
                        else:
                            mutants_for_proteins[r['protein']] = 1

                    else:
                        skipped += 1
                        if r['protein'] in missing_proteins:
                            missing_proteins[r['protein']] += 1
                        else:
                            missing_proteins[r['protein']] = 1
                            self.logger.error('Skipped due to no protein '+ r['protein'])
                        continue

                    res=Residue.objects.filter(protein_conformation__protein=protein,amino_acid=r['mutation_from'],sequence_number=r['mutation_pos']) #FIXME MAKE AA CHECK
                    if res.exists():
                        res=res.get()
                    else:
                        self.logger.error('Skipped due to no residue or mismatch AA ' + r['protein'] + ' pos:'+str(r['mutation_pos']) + ' AA:'+r['mutation_from'])
                        skipped += 1
                        continue

                    if r['ligand_class']:
                        l_role, created = LigandRole.objects.get_or_create(name=r['ligand_class'],
                            defaults={'slug': slugify(r['ligand_class'])[:50]}) # FIXME this should not be needed
                    else:
                        l_role = None

                    if r['exp_type']:
                        exp_type_id, created = MutationExperimentalType.objects.get_or_create(type=r['exp_type'])
                    else:
                        exp_type_id = None

                    if r['exp_func']:
                        exp_func_id, created = MutationFunc.objects.get_or_create(func=r['exp_func'])
                    else:
                        exp_func_id = None

                    if r['exp_mu_effect_ligand_prop'] or r['exp_mu_effect_qual']:
                        exp_qual_id, created = MutationQual.objects.get_or_create(qual=r['exp_mu_effect_qual'], prop=r['exp_mu_effect_ligand_prop'])
                    else:
                        exp_qual_id = None

                    if r['opt_type'] or r['opt_wt'] or r['opt_mu'] or r['opt_sign'] or r['opt_percentage'] or r['opt_qual'] or r['opt_agonist']:
                        exp_opt_id, created =  MutationOptional.objects.get_or_create(type=r['opt_type'], wt=r['opt_wt'], mu=r['opt_mu'], sign=r['opt_sign'], percentage=r['opt_percentage'], qual=r['opt_qual'], agonist=r['opt_agonist'])
                    else:
                        exp_opt_id = None

                    mutation, created =  Mutation.objects.get_or_create(amino_acid=r['mutation_to'],protein=protein, residue=res)

                    
                    logtypes = ['pEC50','pIC50','pK']
                    
                    
                    foldchange = 0
                    typefold = ''
                    if r['exp_wt_value']!=0 and r['exp_mu_value_raw']!=0: #fix for new format
                                
                        if re.match("(" + ")|(".join(logtypes) + ")", r['exp_type']):  #-log values!
                            foldchange = round(math.pow(10,-r['exp_mu_value_raw'])/pow(10,-r['exp_wt_value']),3);
                            typefold = r['exp_type']+"_log"
                        else:
                            foldchange = round(r['exp_mu_value_raw']/r['exp_wt_value'],3);
                            typefold = r['exp_type']+"_not_log"
                        
                        
                        if foldchange<1 and foldchange!=0:
                            foldchange = -round((1/foldchange),3)
                    elif r['fold_effect']!=0:
                            foldchange = round(r['fold_effect'],3);
                            if foldchange<1: foldchange = -round((1/foldchange),3);
                    

                    raw_experiment = self.insert_raw(r)
                    obj, created = MutationExperiment.objects.get_or_create(
                    refs=pub, 
                    protein=protein, 
                    residue=res, 
                    ligand=l, 
                    ligand_role=l_role, 
                    ligand_ref = l_ref,
                    raw = raw_experiment,
                    optional = exp_opt_id,
                    exp_type=exp_type_id, 
                    exp_func=exp_func_id, 
                    exp_qual = exp_qual_id,

                    mutation=mutation, 
                    wt_value=r['exp_wt_value'], #
                    wt_unit=r['exp_wt_unit'], 

                    mu_value = r['exp_mu_value_raw'],
                    mu_sign = r['exp_mu_effect_sign'], 
                    foldchange = foldchange
                    )
                    mut_id = obj.id
                    inserted += 1

                self.logger.info('Parsed '+str(c)+' mutant data entries. Skipped '+str(skipped))

        sorted_missing_proteins = sorted(missing_proteins.items(), key=operator.itemgetter(1),reverse=True)
        sorted_mutants_for_proteins = sorted(mutants_for_proteins.items(), key=operator.itemgetter(1),reverse=True)

        self.logger.info('COMPLETED CREATING MUTANTS')
Пример #4
0
def get_or_make_ligand(ligand_id,type_id, name = None):
    if type_id=='PubChem CID' or type_id=='SMILES':
        if type_id=='PubChem CID':
            pubchem_lookup_value = 'cid'
        elif type_id=='SMILES':
            pubchem_lookup_value = 'smiles'

        try:
            web_resource = WebResource.objects.get(slug='pubchem')
        except:
            # abort if pdb resource is not found
            raise Exception('PubChem resource not found, aborting!')
        if name:
            ligand_name = name
        else:
            ligand_name = False

        try:
            # if this name is canonical and it has a ligand record already
            if (ligand_name==False):
            
                l = None
                ls = Ligand.objects.filter(canonical=True,
                   properities__web_links__web_resource=web_resource,
                   properities__web_links__index=ligand_id)
               
                for ligand in ls:
                    l = ligand
                    #print (l)
                    break
                if l == None:
                    l = Ligand.objects.get(canonical=True,
                    properities__web_links__web_resource=web_resource,
                    properities__web_links__index=ligand_id)
                    
            else:
               l = Ligand.objects.get(name=ligand_name, canonical=True,
                   properities__web_links__web_resource=web_resource,
                   properities__web_links__index=ligand_id)
            
            #l = Ligand.objects.get(name=ligand_name, canonical=True,
            #    properities__web_links__web_resource=web_resource,
            #    properities__web_links__index=ligand_id)
            #
        except Ligand.DoesNotExist:
            try:
                # if exists under different name
                l_canonical = Ligand.objects.get(properities__web_links__web_resource=web_resource,
                    properities__web_links__index=ligand_id, canonical=True)
                #print (created)
                try:
                    l, created = Ligand.objects.get_or_create(properities = l_canonical.properities,
                        name = ligand_name, canonical = False)
                except IntegrityError:
                    l = Ligand.objects.get(properities = l_canonical.properities,
                        name = ligand_name, canonical = False)
            except Ligand.DoesNotExist:
                # fetch ligand from pubchem
                default_ligand_type = 'Small molecule'
                lt, created = LigandType.objects.get_or_create(slug=slugify(default_ligand_type),
                    defaults={'name': default_ligand_type})
                l = Ligand()
                #print (ligand_name)
                l = l.load_from_pubchem(pubchem_lookup_value, ligand_id, lt, ligand_name)
                #print (l)
                if l == None and type_id=='SMILES': #insert manually if smiles and unfound in pubchem
                    try:
                        l = Ligand.objects.get(name=ligand_name, canonical=True,
                                                properities__smiles=ligand_id)
                    except Ligand.DoesNotExist:
                        try:   
                            l = Ligand.objects.get(name__startswith=ligand_name, canonical=True,properities__smiles=ligand_id) #if no properities exist
                        except Ligand.DoesNotExist: 
                            try:   
                                l = Ligand.objects.get(name=ligand_name, canonical=True,properities__smiles=None) #if no properities exist
                                l.properities.smiles = ligand_id
                                l.properities.save()
                                l.save()
                            except Ligand.DoesNotExist: 
                                ## now insert a new ligand, but first make sure name is unique
                                if Ligand.objects.filter(name=ligand_name).exists():
                                    ls = Ligand.objects.filter(name__startswith=ligand_name, canonical=True).order_by("pk")
                                    for l_temp in ls:
                                        last = l_temp.name.split("_")[-1]
                                    if last==ligand_name: #no addition yet
                                        ligand_name = ligand_name +"_1"
                                    else:
                                        ligand_name = ligand_name +"_"+str(int(last)+1)
                                l = Ligand()
                                l.name = ligand_name
                                lp = LigandProperities()
                                lp.smiles = ligand_id
                                lp.ligand_type = lt
                                lp.save()
                                l.properities = lp
                                l.canonical = True #maybe false, but that would break stuff.
                                l.ambigious_alias = False
                                try:
                                    l.save()
                                except IntegrityError:
                                    l = Ligand.objects.get(name=ligand_name, canonical=True)
            
    elif name:
        
        # if this name is canonical and it has a ligand record already
        if Ligand.objects.filter(name=name, canonical=True).exists():
            l = Ligand.objects.get(name=name, canonical=True)
        
        # if this matches an alias that only has "one" parent canonical name - eg distinct
        elif Ligand.objects.filter(name=name, canonical=False,
            ambigious_alias=False).exists():
            l = Ligand.objects.get(name=name, canonical=False, ambigious_alias=False)
        
        # if this matches an alias that only has several canonical parents, must investigate, start
        # with empty.
        elif Ligand.objects.filter(name=name, canonical=False,
            ambigious_alias=True).exists():
            lp = LigandProperities()
            lp.save()
            l = Ligand()
            l.properities = lp
            l.name = name
            l.canonical = False
            l.ambigious_alias = True
            l.save()
            l.load_by_name(name)
        
        # if neither a canonical or alias exists, create the records. Remember to check for
        # canonical / alias status.
        else:
            lp = LigandProperities()
            lp.save()
            l = Ligand()
            l.properities = lp
            l.name = str(name)
            l.canonical = True
            l.ambigious_alias = False
            try:
                l.save()
                l.load_by_name(str(name))
            except IntegrityError:
                l = Ligand.objects.get(name=str(name), canonical=True)
    else:
        l = None
    
    return l
    def main_func(self, positions, iteration, count, lock):

        # print(positions,iteration,count,lock)
        ligands = self.ligand_dump

        while count.value < len(ligands):
            with lock:
                l = ligands[count.value]
                count.value += 1
                if count.value % 10000 == 0:
                    print('{} Status {} out of {}'.format(
                        datetime.datetime.strftime(datetime.datetime.now(),
                                                   '%Y-%m-%d %H:%M:%S'),
                        count.value, len(ligands)))

            if 'logp' not in l:
                # temp skip to only use "full" annotated ligands
                continue

            lp = LigandProperities.objects.filter(
                inchikey=l['inchikey']).first()
            ligand = None

            if lp:
                # Check if inchikey is there
                ligand = Ligand.objects.filter(
                    name=l['name'], properities=lp).prefetch_related(
                        'properities__ligand_type', 'properities__web_links',
                        'properities__vendors').first()

            # The name with corresponding inchikey is there, assume all is good and skip.
            # Will add links to make sure they're there.
            if not ligand:
                if lp:
                    print(l['name'], 'is there! (but not by name, only inchi')
                    ligand = Ligand()
                    ligand.properities = lp
                    ligand.name = l['name']
                    ligand.canonical = l['canonical']
                    ligand.ambigious_alias = l['ambigious_alias']
                    ligand.save()
                else:
                    # No ligand seems to match by inchikey -- start creating it.
                    # Make LigandProperities first
                    lt, created = LigandType.objects.get_or_create(
                        slug=l['ligand_type__slug'],
                        defaults={'name': l['ligand_type__name']})
                    lp = LigandProperities()
                    lp.inchikey = l['inchikey']
                    lp.smiles = l['smiles']
                    lp.mw = l['mw']
                    lp.logp = l['logp']
                    lp.rotatable_bonds = l['rotatable_bonds']
                    lp.hacc = l['hacc']
                    lp.hdon = l['hdon']
                    lp.ligand_type = lt

                    lp.save()

                    ligand = Ligand()
                    ligand.properities = lp
                    ligand.name = l['name']
                    ligand.canonical = l['canonical']
                    ligand.ambigious_alias = l['ambigious_alias']
                    ligand.save()

            # create links - impossible to make duplicates so no need to check if there already
            if ligand.properities.web_links.count() < len(l['web_links']):
                for link in l['web_links']:
                    wr = WebResource.objects.get(slug=link['web_resource'])
                    wl, created = WebLink.objects.get_or_create(
                        index=link['index'], web_resource=wr)
                    ligand.properities.web_links.add(wl)

            # create vendors - impossible to make duplicates so no need to check if there already
            if ligand.properities.vendors.count() < len(l['vendors']):
                for link in l['vendors']:
                    lv = LigandVendors.objects.get(slug=link['vendor_slug'])
                    check = LigandVendorLink.objects.filter(
                        sid=link['sid']).exists()
                    if not check:
                        lvl = LigandVendorLink()
                        lvl.sid = link['sid']
                        lvl.vendor = lv
                        lvl.lp = ligand.properities
                        lvl.vendor_external_id = link['vendor_external_id']
                        lvl.url = link['url']
                        lvl.save()
Пример #6
0
    def main_func(self, positions, iteration,count,lock):

        # print(positions,iteration,count,lock)
        ligands = self.ligand_dump
        while count.value<len(ligands):
            with lock:
                l = ligands[count.value]
                count.value +=1 
                if count.value % 10000 == 0:
                    print('{} Status {} out of {}'.format(
                    datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S'), count.value, len(ligands)))

            if 'logp' not in l:
                # temp skip to only use "full" annotated ligands
                continue

            lp = LigandProperities.objects.filter(inchikey=l['inchikey']).first()
            ligand = None
            if lp:
                # Check if inchikey is there
                ligand = Ligand.objects.filter(name=l['name'], properities=lp).prefetch_related('properities__ligand_type','properities__web_links','properities__vendors').first()

            # The name with corresponding inchikey is there, assume all is good and skip.
            # Will add links to make sure they're there.
            if not ligand:
                if lp:
                    print(l['name'],'is there! (but not by name, only inchi')
                    ligand = Ligand()
                    ligand.properities = lp
                    ligand.name = l['name']
                    ligand.canonical = l['canonical']
                    ligand.ambigious_alias = l['ambigious_alias']
                    ligand.save()
                else:
                    # No ligand seems to match by inchikey -- start creating it.
                    # Make LigandProperities first
                    lt, created = LigandType.objects.get_or_create(slug=l['ligand_type__slug'],defaults = {'name':l['ligand_type__name']})
                    lp = LigandProperities()
                    lp.inchikey = l['inchikey']
                    lp.smiles = l['smiles']
                    lp.mw = l['mw']
                    lp.logp = l['logp']
                    lp.rotatable_bonds = l['rotatable_bonds']
                    lp.hacc = l['hacc']
                    lp.hdon = l['hdon']
                    lp.ligand_type = lt

                    lp.save()

                    ligand = Ligand()
                    ligand.properities = lp
                    ligand.name = l['name']
                    ligand.canonical = l['canonical']
                    ligand.ambigious_alias = l['ambigious_alias']
                    ligand.save()


            # create links - impossible to make duplicates so no need to check if there already
            if ligand.properities.web_links.count()<len(l['web_links']):
                for link in l['web_links']:
                    wr = WebResource.objects.get(slug=link['web_resource'])
                    wl, created = WebLink.objects.get_or_create(index=link['index'], web_resource=wr)
                    ligand.properities.web_links.add(wl)

            # create vendors - impossible to make duplicates so no need to check if there already
            if ligand.properities.vendors.count()<len(l['vendors']):
                for link in l['vendors']:
                    lv = LigandVendors.objects.get(slug = link['vendor_slug'])
                    check = LigandVendorLink.objects.filter(sid=link['sid']).exists()
                    if not check:
                        lvl = LigandVendorLink()
                        lvl.sid = link['sid']
                        lvl.vendor = lv
                        lvl.lp = ligand.properities
                        lvl.vendor_external_id = link['vendor_external_id']
                        lvl.url = link['url']
                        lvl.save()
Пример #7
0
    def create_mutant_data(self, filenames):
        self.logger.info('CREATING MUTANT DATA')

        # what files should be parsed?
        if not filenames:
            filenames = os.listdir(self.structure_data_dir)

        missing_proteins = {}
        mutants_for_proteins = {}

        for source_file in filenames:
            source_file_path = os.sep.join(
                [self.structure_data_dir, source_file])
            if os.path.isfile(source_file_path) and source_file[0] != '.':
                self.logger.info('Reading file {}'.format(source_file_path))
                # read the yaml file

                if source_file[-4:] == 'xlsx' or source_file[-3:] == 'xls':
                    rows = self.loaddatafromexcel(source_file_path)
                    rows = self.analyse_rows(rows)
                elif source_file[-4:] == 'yaml':
                    rows = yaml.load(open(source_file_path, 'r'))
                    temp = []
                    for r in rows:
                        d = {}
                        d['reference'] = r['pubmed']
                        d['protein'] = r['entry_name'].replace("__",
                                                               "_").lower()
                        d['mutation_pos'] = r['seq']
                        d['mutation_from'] = r['from_res']
                        d['mutation_to'] = r['to_res']
                        d['ligand_name'] = ''
                        d['ligand_type'] = ''
                        d['ligand_id'] = ''
                        d['ligand_class'] = ''
                        d['exp_type'] = ''
                        d['exp_func'] = ''
                        d['exp_wt_value'] = 0
                        d['exp_wt_unit'] = ''
                        d['exp_mu_effect_sign'] = ''
                        d['exp_mu_value_raw'] = 0
                        d['fold_effect'] = 0
                        d['exp_mu_effect_qual'] = ''
                        d['exp_mu_effect_ligand_prop'] = ''
                        d['exp_mu_ligand_ref'] = ''
                        d['opt_type'] = ''
                        d['opt_wt'] = 0
                        d['opt_mu'] = 0
                        d['opt_sign'] = ''
                        d['opt_percentage'] = 0
                        d['opt_qual'] = ''
                        d['opt_agonist'] = ''
                        if len(d['mutation_to']) > 1 or len(
                                d['mutation_from']
                        ) > 1:  #if something is off with amino acid
                            continue
                        temp.append(d)
                    rows = temp
                else:
                    self.logger.info('unknown format'.source_file)
                    continue

                c = 0
                skipped = 0
                inserted = 0
                for r in rows:
                    c += 1
                    if c % 1000 == 0:
                        self.logger.info('Parsed ' + str(c) +
                                         ' mutant data entries')

                    # publication
                    try:  #fix if it thinks it's float.
                        float(r['reference'])
                        r['reference'] = str(int(r['reference']))
                    except ValueError:
                        pass

                    if r['reference'].isdigit():  #assume pubmed
                        pub_type = 'pubmed'
                    else:  #assume doi
                        pub_type = 'doi'

                    try:
                        pub = Publication.objects.get(
                            web_link__index=r['reference'],
                            web_link__web_resource__slug=pub_type)
                    except Publication.DoesNotExist:
                        pub = Publication()
                        try:
                            pub.web_link = WebLink.objects.get(
                                index=r['reference'],
                                web_resource__slug=pub_type)
                        except WebLink.DoesNotExist:
                            wl = WebLink.objects.create(
                                index=r['reference'],
                                web_resource=WebResource.objects.get(
                                    slug=pub_type))
                            pub.web_link = wl

                        if pub_type == 'doi':
                            pub.update_from_doi(doi=r['reference'])
                        elif pub_type == 'pubmed':
                            pub.update_from_pubmed_data(index=r['reference'])
                        try:
                            pub.save()
                        except:
                            self.logger.error('error with reference ' +
                                              str(r['reference']) + ' ' +
                                              pub_type)
                            continue  #if something off with publication, skip.

                    if r['ligand_type'] == 'PubChem CID' or r[
                            'ligand_type'] == 'SMILES':
                        if r['ligand_type'] == 'PubChem CID':
                            pubchem_lookup_value = 'cid'
                        elif r['ligand_type'] == 'SMILES':
                            pubchem_lookup_value = 'smiles'

                        try:
                            web_resource = WebResource.objects.get(
                                slug='pubchem')
                        except:
                            # abort if pdb resource is not found
                            raise Exception(
                                'PubChem resource not found, aborting!')

                        if 'ligand_name' in r and r['ligand_name']:
                            ligand_name = str(r['ligand_name'])
                        else:
                            ligand_name = False

                        try:
                            # if this name is canonical and it has a ligand record already
                            l = Ligand.objects.get(
                                name=ligand_name,
                                canonical=True,
                                properities__web_links__web_resource=
                                web_resource,
                                properities__web_links__index=r['ligand_id'])
                        except Ligand.DoesNotExist:
                            try:
                                # if exists under different name
                                l_canonical = Ligand.objects.get(
                                    properities__web_links__web_resource=
                                    web_resource,
                                    properities__web_links__index=r[
                                        'ligand_id'],
                                    canonical=True)
                                l, created = Ligand.objects.get_or_create(
                                    properities=l_canonical.properities,
                                    name=ligand_name,
                                    canonical=False)
                                if created:
                                    self.logger.info(
                                        'Created ligand {}'.format(l.name))
                            except Ligand.DoesNotExist:
                                # fetch ligand from pubchem
                                default_ligand_type = 'Small molecule'
                                lt, created = LigandType.objects.get_or_create(
                                    slug=slugify(default_ligand_type),
                                    defaults={'name': default_ligand_type})
                                l = Ligand()
                                l = l.load_from_pubchem(
                                    pubchem_lookup_value, r['ligand_id'], lt,
                                    ligand_name)
                                if l == None and r[
                                        'ligand_type'] == 'SMILES':  #insert manually if smiles and unfound in pubchem
                                    try:
                                        l = Ligand.objects.get(
                                            name=ligand_name,
                                            canonical=True,
                                            properities__smiles=r['ligand_id'])
                                    except Ligand.DoesNotExist:
                                        l = Ligand()
                                        l.name = ligand_name
                                        lp = LigandProperities()
                                        lp.smiles = r['ligand_id']
                                        lp.ligand_type = lt
                                        lp.save()
                                        l.properities = lp
                                        l.canonical = True  #maybe false, but that would break stuff.
                                        l.ambigious_alias = False
                                        l.save()
                                        self.logger.info(
                                            'Created Ligand {} manually'.
                                            format(l.name))

                    elif r['ligand_name']:

                        # if this name is canonical and it has a ligand record already
                        if Ligand.objects.filter(name=r['ligand_name'],
                                                 canonical=True).exists():
                            l = Ligand.objects.get(name=r['ligand_name'],
                                                   canonical=True)

                        # if this matches an alias that only has "one" parent canonical name - eg distinct
                        elif Ligand.objects.filter(
                                name=r['ligand_name'],
                                canonical=False,
                                ambigious_alias=False).exists():
                            l = Ligand.objects.get(name=r['ligand_name'],
                                                   canonical=False,
                                                   ambigious_alias=False)

                        # if this matches an alias that only has several canonical parents, must investigate, start
                        # with empty.
                        elif Ligand.objects.filter(
                                name=r['ligand_name'],
                                canonical=False,
                                ambigious_alias=True).exists():
                            lp = LigandProperities()
                            lp.save()
                            l = Ligand()
                            l.properities = lp
                            l.name = r['ligand_name']
                            l.canonical = False
                            l.ambigious_alias = True
                            l.save()
                            l.load_by_name(r['ligand_name'])

                        # if neither a canonical or alias exists, create the records. Remember to check for
                        # canonical / alias status.
                        else:
                            lp = LigandProperities()
                            lp.save()
                            l = Ligand()
                            l.properities = lp
                            l.name = str(r['ligand_name'])
                            l.canonical = True
                            l.ambigious_alias = False
                            l.save()
                            l.load_by_name(str(r['ligand_name']))
                    else:
                        l = None

                    if Ligand.objects.filter(
                            name=r['exp_mu_ligand_ref'], canonical=True
                    ).exists(
                    ):  #if this name is canonical and it has a ligand record already
                        l_ref = Ligand.objects.get(name=r['exp_mu_ligand_ref'],
                                                   canonical=True)
                    elif Ligand.objects.filter(
                            name=r['exp_mu_ligand_ref'],
                            canonical=False,
                            ambigious_alias=False
                    ).exists(
                    ):  #if this matches an alias that only has "one" parent canonical name - eg distinct
                        l_ref = Ligand.objects.get(name=r['exp_mu_ligand_ref'],
                                                   canonical=False,
                                                   ambigious_alias=False)
                    elif Ligand.objects.filter(
                            name=r['exp_mu_ligand_ref'],
                            canonical=False,
                            ambigious_alias=True
                    ).exists(
                    ):  #if this matches an alias that only has several canonical parents, must investigate, start with empty.
                        lp = LigandProperities()
                        lp.save()
                        l_ref = Ligand()
                        l_ref.properities = lp
                        l_ref.name = r['exp_mu_ligand_ref']
                        l_ref.canonical = False
                        l_ref.ambigious_alias = True
                        l_ref.save()
                        l_ref.load_by_name(r['exp_mu_ligand_ref'])
                        l_ref.save()
                    elif r['exp_mu_ligand_ref']:  #if neither a canonical or alias exists, create the records. Remember to check for canonical / alias status.
                        lp = LigandProperities()
                        lp.save()
                        l_ref = Ligand()
                        l_ref.properities = lp
                        l_ref.name = r['exp_mu_ligand_ref']
                        l_ref.canonical = True
                        l_ref.ambigious_alias = False
                        l_ref.save()
                        l_ref.load_by_name(r['exp_mu_ligand_ref'])
                        l_ref.save()
                    else:
                        l_ref = None

                    protein_id = 0
                    residue_id = 0

                    protein = Protein.objects.filter(entry_name=r['protein'])
                    if protein.exists():
                        protein = protein.get()
                        if r['protein'] in mutants_for_proteins:
                            mutants_for_proteins[r['protein']] += 1
                        else:
                            mutants_for_proteins[r['protein']] = 1

                    else:
                        skipped += 1
                        if r['protein'] in missing_proteins:
                            missing_proteins[r['protein']] += 1
                        else:
                            missing_proteins[r['protein']] = 1
                            self.logger.error('Skipped due to no protein ' +
                                              r['protein'])
                        continue

                    res = Residue.objects.filter(
                        protein_conformation__protein=protein,
                        amino_acid=r['mutation_from'],
                        sequence_number=r['mutation_pos']
                    )  #FIXME MAKE AA CHECK
                    if res.exists():
                        res = res.get()
                    else:
                        self.logger.error(
                            'Skipped due to no residue or mismatch AA ' +
                            r['protein'] + ' pos:' + str(r['mutation_pos']) +
                            ' AA:' + r['mutation_from'])
                        skipped += 1
                        continue

                    if r['ligand_class']:
                        l_role, created = LigandRole.objects.get_or_create(
                            name=r['ligand_class'],
                            defaults={'slug': slugify(r['ligand_class'])[:50]
                                      })  # FIXME this should not be needed
                    else:
                        l_role = None

                    if r['exp_type']:
                        exp_type_id, created = MutationExperimentalType.objects.get_or_create(
                            type=r['exp_type'])
                    else:
                        exp_type_id = None

                    if r['exp_func']:
                        exp_func_id, created = MutationFunc.objects.get_or_create(
                            func=r['exp_func'])
                    else:
                        exp_func_id = None

                    if r['exp_mu_effect_ligand_prop'] or r[
                            'exp_mu_effect_qual']:
                        exp_qual_id, created = MutationQual.objects.get_or_create(
                            qual=r['exp_mu_effect_qual'],
                            prop=r['exp_mu_effect_ligand_prop'])
                    else:
                        exp_qual_id = None

                    if r['opt_type'] or r['opt_wt'] or r['opt_mu'] or r[
                            'opt_sign'] or r['opt_percentage'] or r[
                                'opt_qual'] or r['opt_agonist']:
                        exp_opt_id, created = MutationOptional.objects.get_or_create(
                            type=r['opt_type'],
                            wt=r['opt_wt'],
                            mu=r['opt_mu'],
                            sign=r['opt_sign'],
                            percentage=r['opt_percentage'],
                            qual=r['opt_qual'],
                            agonist=r['opt_agonist'])
                    else:
                        exp_opt_id = None

                    mutation, created = Mutation.objects.get_or_create(
                        amino_acid=r['mutation_to'],
                        protein=protein,
                        residue=res)

                    logtypes = ['pEC50', 'pIC50', 'pK']

                    foldchange = 0
                    typefold = ''
                    if r['exp_wt_value'] != 0 and r[
                            'exp_mu_value_raw'] != 0:  #fix for new format

                        if re.match("(" + ")|(".join(logtypes) + ")",
                                    r['exp_type']):  #-log values!
                            foldchange = round(
                                math.pow(10, -r['exp_mu_value_raw']) /
                                pow(10, -r['exp_wt_value']), 3)
                            typefold = r['exp_type'] + "_log"
                        else:
                            foldchange = round(
                                r['exp_mu_value_raw'] / r['exp_wt_value'], 3)
                            typefold = r['exp_type'] + "_not_log"

                        if foldchange < 1 and foldchange != 0:
                            foldchange = -round((1 / foldchange), 3)
                    elif r['fold_effect'] != 0:
                        foldchange = round(r['fold_effect'], 3)
                        if foldchange < 1:
                            foldchange = -round((1 / foldchange), 3)

                    raw_experiment = self.insert_raw(r)
                    obj, created = MutationExperiment.objects.get_or_create(
                        refs=pub,
                        protein=protein,
                        residue=res,
                        ligand=l,
                        ligand_role=l_role,
                        ligand_ref=l_ref,
                        raw=raw_experiment,
                        optional=exp_opt_id,
                        exp_type=exp_type_id,
                        exp_func=exp_func_id,
                        exp_qual=exp_qual_id,
                        mutation=mutation,
                        wt_value=r['exp_wt_value'],  #
                        wt_unit=r['exp_wt_unit'],
                        mu_value=r['exp_mu_value_raw'],
                        mu_sign=r['exp_mu_effect_sign'],
                        foldchange=foldchange)
                    mut_id = obj.id
                    inserted += 1

                self.logger.info('Parsed ' + str(c) +
                                 ' mutant data entries. Skipped ' +
                                 str(skipped))

        sorted_missing_proteins = sorted(missing_proteins.items(),
                                         key=operator.itemgetter(1),
                                         reverse=True)
        sorted_mutants_for_proteins = sorted(mutants_for_proteins.items(),
                                             key=operator.itemgetter(1),
                                             reverse=True)

        self.logger.info('COMPLETED CREATING MUTANTS')