示例#1
0
def sa_calc(polymer_pdb, radius):
    # pdb files are needed for calculation surface area
    mol_file = Chem.MolFromMolFile(polymer_pdb)
    # hydrogens are removed in the mol file
    pdb_file = Chem.AddHs(mol_file, addCoords = True)
    # convert mol file to pdb file in rdkit
    Chem.MolToPDBFile(pdb_file, out_dir+NAME+'_new.pdb')

	# hydrogens are removed in the default option
    option_with_Hs =  {    'hetatm' : True,
                           'hydrogen' : True,
                           'join-models' : False,
                           'skip-unknown' : False,
                           'halt-at-unknown' : False    }

    # calculate solvent accessible surface area(probe radius = 1.4 Å or 3.6 Å)
    para = freesasa.Parameters()
    freesasa.Parameters.setProbeRadius(para, radius)
    # calculate sa for different type of polymers
    free_struct = freesasa.Structure(out_dir+NAME+'_new.pdb', options = option_with_Hs)
    free_calc = freesasa.calc(free_struct, para)
    total = free_calc.totalArea()
    # round to 4 decimals
    decimal = round(total, 4)
    print (f'Total SASA is {decimal} Å^2 when probe radius is {radius} Å.')
    atom_number = mol_file.GetNumAtoms()
    normalized_sa = round(decimal / atom_number, 4)

    # save data to a txt file
    with open (out_dir + 'Average surface area.txt', 'a+') as Asa:
       Asa.write(f'The normalized surface area of {NAME} is ' + str(normalized_sa) + ' Å^2 with the probe size of ' + str(radius) + 'Å.\n'
        )
    print ('Nomalized solvent accessible surface area is '+ str(normalized_sa) + ' Å^2 with the probe size of ' + str(radius) + 'Å.\n')
示例#2
0
def freesasa_cb(prody_parsed, probe_radius=1.4):
    cb_sele = prody_parsed.select(
        'protein and (backbone or name CB) and not element H D')
    coords = list(x for y in cb_sele.getCoords() for x in y)
    radii = list(freesasa.Classifier().radius(x, y) for x, y in zip(cb_sele.getResnames(), \
        cb_sele.getNames()))
    return freesasa.calcCoord(
        coords, radii, freesasa.Parameters({'probe-radius': probe_radius}))
示例#3
0
def test_freesasa_lookup(pdb_seq_object):
    # Just test a few of the options
    for metric in ["total", "polar", "relativeTotal", "relativeMainChain"]:

        fs = FreeSASALookup(metric=metric,
                            pdb_directory=TEST_DATA_DIR,
                            sifts_directory=TEST_DATA_DIR,
                            download_sifts=False)

        res = fs(pdb_seq_object)

        # Save reference results if they have been deliberately changed
        # np.save(os.path.join(FILE_DIR, 'reference_FreeSASA_{}_results'.format(metric)), res)

        expected = np.load(
            os.path.join(FILE_DIR,
                         'reference_FreeSASA_{}_results.npy'.format(metric)))

        np.testing.assert_almost_equal(expected, res)

    # Test with some changed Parameters
    import freesasa
    fs = FreeSASALookup(metric='total',
                        pdb_directory=TEST_DATA_DIR,
                        sifts_directory=TEST_DATA_DIR,
                        download_sifts=False,
                        freesasa_parameters=freesasa.Parameters({
                            'algorithm':
                            freesasa.LeeRichards,
                            'n-slices':
                            100
                        }))

    res = fs(pdb_seq_object)

    # Save reference results if they have been deliberately changed
    # np.save(os.path.join(FILE_DIR, 'reference_FreeSASA_custom_params_results'), res)

    expected = np.load(
        os.path.join(FILE_DIR, 'reference_FreeSASA_custom_params_results.npy'))

    np.testing.assert_almost_equal(expected, res)
示例#4
0
def getAtomSASA(structure, classifier=None, probe_radius=1.4, mi=0, **kwargs):
    
    if(classifier is None):
        # initialize new classifier
        classifier = Radius(**kwargs)
        
    freesasa_structure = getFreeSASAStructureFromModel(structure, classifier=classifier)
    SASA = freesasa.calc(freesasa_structure, freesasa.Parameters({"probe-radius": probe_radius}))
    
    # get atom SASA
    N = structure.nAtoms()
    for i in range(N):
        sasa = SASA.atomArea(i)
        resi = freesasa_structure.residueNumber(i).strip()
        cid = freesasa_structure.chainLabel(i).strip()
        if(resi[-1].isdigit()):
            ins = " "
        else:
            ins = resi[-1]
            resi = resi[:-1]
        aname = structure.atomName(i).strip()
        structure[mi][cid][(' ', int(resi), ins)][aname].xtra["sasa"] = sasa
def sa_conformers(file_1, func_1, file_2, func_2, units, radius):
    # turn off cache
    stk.OPTIONS['cache'] = False
    
    # number of conformers
    N = 10
    """
    functional groups:
       ['diol'] and ['dibromine']/['difluorene']
       or
       ['bromine'] and ['bromine']/['iodine']
    """
    name_1 = file_1.replace('.mol', '')
    unit_1 = stk.StructUnit2(file_1, func_1)

    name_2 = file_2.replace('.mol', '')
    unit_2 = stk.StructUnit2(file_2, func_2)

    # make polymer
    NAME = name_1+'_'+name_2+'_AB_poly'
    print(f'Creating polymer: {NAME}')
    polymer = stk.Polymer([unit_1, unit_2], stk.Linear('AB', [0, 0], n=units, ends='h'))
    # write unoptimized structure
    polymer.write(NAME+'.mol')
    mol_polymer = rdkit.MolFromMolFile(NAME + '.mol')
    #print(f'{NAME} has {polymer.mol.get_no_atoms()} atoms!')
    print(f'Optimizing polymer {NAME} and saving {N} conformers')
    # clean molecule with ETKDG
    embedder = stk.UFF(use_cache=False)
    embedder.optimize(polymer, conformer=-1)
    # write optimized polymer to json
    polymer.dump(NAME+'_opt.json')
    polymer.write(NAME+'_opt.mol')
    # make N conformers of the polymer molecule
    etkdg = rdkit.ETKDGv2()
    etkdg.randomSeed = 1000
    etkdg.verbose = True
    etkdg.maxIterations = 200000
    cids = rdkit.EmbedMultipleConfs(
        mol=polymer.mol, 
        numConfs=N,
        params=etkdg
    )
    print(f'Made {len(cids)} conformers...')
    print(f'Warning! I have not implemented an optimization of the ETKDG cleaned polymers!')

    # iterate over conformers and save structure
    file_dir = '/home/fanyuzhao/Monomers/OH+F/dimer/conformers/'
    new_dir = file_dir+NAME+'_'+str(units)+'_'+str(radius)+'/'
    for cid in cids:
        # build directories
        if not os.path.exists(new_dir):
            os.makedirs(new_dir)
        # write optimized polymer to mol
        polymer.write(new_dir+NAME+'_'+str(cid)+'_opt.mol', conformer=cid)
        # write optimized polymer to pdb
        polymer.write(new_dir+NAME+'_'+str(cid)+'_opt.pdb', conformer=cid)
        print(f'Done! {N} ETKDG conformers of polymer written to {NAME}_{N}_opt.mol/pdb')

    # pdb file from stk can not be read in freesasa
    # save the new pdb file in rdkit from mol files
    for item in os.listdir(new_dir):
        if item.endswith('.mol'):
            file_pdb = item.replace('.mol', '')
            a = rdkit.MolFromMolFile(os.path.join(new_dir, item))
            # hydrogens are removed when converting the file in rdkit
            b = rdkit.AddHs(a, addCoords = True)
            rdkit.MolToPDBFile(b, new_dir + file_pdb + '_new.pdb')

    # calculate solvent accessible surface area(probe radius = 1.4Å and 3.6Å)
    # hydrogens are removed in the default option
    # hetatm are ignored in the default option
    options_with_Hs =  {    'hetatm' : True,
                            'hydrogen' : True,
                            'join-models' : False,
                            'skip-unknown' : False,
                            'halt-at-unknown' : False    }

    sa_list = []
    pdb_list = []
    # loop all new pdb files
    for pdb in os.listdir(new_dir):
        if pdb.endswith("_new.pdb"):
            # use freesasa to calculate SASA
            para = freesasa.Parameters()
            freesasa.Parameters.setProbeRadius(para, radius)
            free_struct = freesasa.Structure(os.path.join(new_dir, pdb), options = options_with_Hs)
            free_calc = freesasa.calc(free_struct, para)
            total = free_calc.totalArea()
            # keep 3 decimals
            decimal = round(total, 4)
            sa_list.append(decimal)
            name_pdb = pdb.replace('.pdb', '')
            pdb_list.append(name_pdb)
    # calculate average SASA(probe radius = 1.4Å)
    sa_average = round(sum(sa_list) / len(sa_list), 4)
    atom_number = mol_polymer.GetNumAtoms()
    normalized_sa = round(sa_average / atom_number, 4)
    with open (file_dir + 'Average surface area of conformers.txt', 'a+') as Asa:
        Asa.write(f'The normalized surface area of {NAME}_{units} is ' + str(normalized_sa) + ' Å^2 with the probe size of ' + str(radius) + f'Å and chain length of {units}.\n')
    print ('The avarage surface area of the conformers is ' + str(sa_average) + ' Å^2 with the probe size of ' + str(radius) + 'Å.')

    # save data to a csv table
    # save pdb file and surface area to a directory
    dic = {p: s for p, s in zip(pdb_list, sa_list)}
    download_dict = new_dir + 'Solvent accessible surface area of ' + NAME +'.csv'
    csv = open(download_dict, 'w')
    columnTitleRow = "Polymer_name, SASA\n"
    csv.write(columnTitleRow)

    for key in dic.keys():
        Polymer_name = key
        SASA = dic[key]
        row = Polymer_name + "," + str(SASA) + "\n"
        csv.write(row)
    print ('Nomalized solvent accessible surface area is '+ str(normalized_sa) + ' Å^2 with the probe size of ' + str(radius) + 'Å.')
示例#6
0
def openfile():
    global prob, probab, te
    global my_seq
    global anti
    global structure, structure_id, filename
    global antigenicity, hydro, flex, sec
    global m, a, c, b, length, j, k
    global hydroph, flexi, access
    anti = []
    sec = []
    probab = []
    from tkinter import filedialog
    root = Tk()
    root.filename = filedialog.askopenfilename(
        initialdir="/",
        title="Select file",
        filetypes=(("pdb files", "*.pdb"), ("pdb files", "*.pdb")))
    filename = root.filename
    print(filename)
    structure_id = "1e6j"
    structure = PDBParser().get_structure(structure_id, root.filename)
    ppb = PPBuilder()
    for pp in ppb.build_peptides(structure):
        my_seq = pp.get_sequence()  # type: Seq
        print(my_seq)
    for model in structure:
        for chain in model:
            print(chain)
    sequence = list(my_seq)
    m = ''.join(sequence)
    print(m)
    length = len(m)  # type: int
    print("Sequence consist of", length, "Amino Acids")
    from Bio.SeqUtils.ProtParam import ProteinAnalysis
    analysed_seq = ProteinAnalysis(m)
    print("Molecular weight = ", analysed_seq.molecular_weight())
    print("Amino Acid Count = ", analysed_seq.count_amino_acids())
    print("Secondary structure fraction =",
          analysed_seq.secondary_structure_fraction())
    kd = {
        'A': 1.8,
        'R': -4.5,
        'N': -3.5,
        'D': -3.5,
        'C': 2.5,
        'Q': -3.5,
        'E': -3.5,
        'G': -0.4,
        'H': -3.2,
        'I': 4.5,
        'L': 3.8,
        'K': -3.9,
        'M': 1.9,
        'F': 2.8,
        'P': -1.6,
        'S': -0.8,
        'T': -0.7,
        'W': -0.9,
        'Y': -1.3,
        'V': 4.2
    }
    c = list(analysed_seq.flexibility())
    b = list(analysed_seq.protein_scale(kd, 10, 1.0))
    hydro = list(analysed_seq.protein_scale(kd, 10, 1.0))
    flex = list(analysed_seq.flexibility())
    hydroph = list(analysed_seq.protein_scale(kd, 10, 1.0))
    flexi = list(analysed_seq.flexibility())

    i = 1
    j = -1  # type: int
    k = 9
    while i <= (length - 10):
        print("Sequence is = ", m[j + 1:k + 1])
        print("Flexibility value = ", c[j + 1])
        print("Hydrophilicity value = ", b[j + 1])
        ana_seq = ''.join(m[j + 1:k + 1])
        analyze_seq = ProteinAnalysis(ana_seq)
        # For Secondary structure Analysis
        print("Secondary structure fraction =",
              analyze_seq.secondary_structure_fraction())
        a = list(analyze_seq.secondary_structure_fraction())
        a = a[0]
        sec.append(a)
        i += 1
        j += 1
        k += 1
    f = length
    r = 1
    y = 10
    global acc, logacc
    acc = []
    for i in range(0, f):
        str1 = "accessibility, resi "
        str2 = str(r) + "-" + str(y)
        saving = str1 + str2
        print(saving)
        r = r + 1
        y = y + 1
        structure = freesasa.Structure("1e6j.pdb")
        resulta = freesasa.calc(structure)
        area_classes = freesasa.classifyResults(resulta, structure)
        print("Total : %.2f A2" % resulta.totalArea())
        for key in area_classes:
            print(key, ": %.2f A2" % area_classes[key])
        resulta = freesasa.calc(
            structure,
            freesasa.Parameters({
                'algorithm': freesasa.LeeRichards,
                'n-slices': 10
            }))
        selections = freesasa.selectArea(('alanine, resn ala', saving),
                                         structure, resulta)
        for key in selections:
            print(key, ": %.2f A2" % selections[key])
            a = selections[key]
            acc.append(a)

    l = acc[0::2]
    access = l
    print(acc)
    print(l)
    logacc = [math.log(y, 10) for y in l]

    print(logacc)
示例#7
0
def calculate_sasa(pdbfile, chain, multichain=True, relative_type='sidechain'):
    """

    :param pdbfile: String of PDB file name.
    :param chain: String or List of chain identifiers.
    :param multichain: Boolean. True to separate chains. This allows SASA calculation for a single unattached monomer.
    False if you want to calculate SASA for the structure 'as-is'.
    :return: Pandas Dataframe of residue number, types, and sasa values as columns.
    """
    import freesasa as fs
    dict_max_acc = {
        # Miller max acc: Miller et al. 1987 https://doi.org/10.1016/0022-2836(87)90038-6
        # Wilke: Tien et al. 2013 https://doi.org/10.1371/journal.pone.0080635
        # Sander: Sander & Rost 1994 https://doi.org/10.1002/prot.340200303
        "Miller": {
            "ALA": 113.0,
            "ARG": 241.0,
            "ASN": 158.0,
            "ASP": 151.0,
            "CYS": 140.0,
            "GLN": 189.0,
            "GLU": 183.0,
            "GLY": 85.0,
            "HIS": 194.0,
            "ILE": 182.0,
            "LEU": 180.0,
            "LYS": 211.0,
            "MET": 204.0,
            "PHE": 218.0,
            "PRO": 143.0,
            "SER": 122.0,
            "THR": 146.0,
            "TRP": 259.0,
            "TYR": 229.0,
            "VAL": 160.0,
        },
        "Wilke": {
            "ALA": 129.0,
            "ARG": 274.0,
            "ASN": 195.0,
            "ASP": 193.0,
            "CYS": 167.0,
            "GLN": 225.0,
            "GLU": 223.0,
            "GLY": 104.0,
            "HIS": 224.0,
            "ILE": 197.0,
            "LEU": 201.0,
            "LYS": 236.0,
            "MET": 224.0,
            "PHE": 240.0,
            "PRO": 159.0,
            "SER": 155.0,
            "THR": 172.0,
            "TRP": 285.0,
            "TYR": 263.0,
            "VAL": 174.0,
            "MSE": 224.0,
            "SEC": 167.0,
        },
        "Sander": {
            "ALA": 106.0,
            "ARG": 248.0,
            "ASN": 157.0,
            "ASP": 163.0,
            "CYS": 135.0,
            "GLN": 198.0,
            "GLU": 194.0,
            "GLY": 84.0,
            "HIS": 184.0,
            "ILE": 169.0,
            "LEU": 164.0,
            "LYS": 205.0,
            "MET": 188.0,
            "PHE": 197.0,
            "PRO": 136.0,
            "SER": 130.0,
            "THR": 142.0,
            "TRP": 227.0,
            "TYR": 222.0,
            "VAL": 142.0,
        },
    }
    theoreticalMaxASA = dict_max_acc["Wilke"]

    # Calculates SASA for unseparated chains.
    if not multichain:
        structure = fs.Structure(pdbfile)
    else:
        # Separate chains if multichain structure. This allows SASA calculation for a single unattached monomer.
        structures = fs.structureArray(pdbfile, options={"separate-chains": True})
        chains = []
        for c in range(len(structures)):
            chains.append(structures[c].chainLabel(1))
        structure = structures[chains.index(chain)]
        print("using {} separating chains {}".format(chains.index(chain), chains))

    print("Number of atoms of {}: {}".format(pdbfile, structure.nAtoms()))
    result = fs.calc(structure, fs.Parameters({'algorithm': fs.ShrakeRupley, 'n-points': 10000}))
    res = result.residueAreas()
    residue = []
    resnum = []
    total = []
    apolar = []
    mainchain = []
    sidechain = []
    ratio = []

    for idx, v in res[chain].items():
        residue.append(v.residueType)
        resnum.append(v.residueNumber)
        total.append(v.total)
        apolar.append(v.apolar)
        mainchain.append(v.mainChain)
        sidechain.append(v.sideChain)
        if v.residueType == 'GLY':
            ratio.append(100 * v.mainChain / theoreticalMaxASA[v.residueType])
        elif v.residueType not in theoreticalMaxASA.keys():
            possibleSASA = []
            for i, maxSASA in enumerate(theoreticalMaxASA.values()):
                # If the residue is unknown but has a SASA,
                # calculate the rSASA dividing by theoretical maxSASA and then use the average of that value
                possibleSASA.append(100 * v.sideChain / maxSASA)
            ratio.append(np.average(possibleSASA))
        else:
            if relative_type == 'sidechain':
                ratio.append(100 * v.sideChain / theoreticalMaxASA[v.residueType])
            else:
                ratio.append(100 * v.total / theoreticalMaxASA[v.residueType])

        # if v.hasRelativeAreas:
        #     ratio.append(v.relativeSideChain)
        # else:
        #     ratio.append(np.nan)

    df_sasa = pd.DataFrame({'Residue': residue, 'Residue_num': resnum, 'Chain': chain, 'Total': total, 'Apolar': apolar,
                            'Backbone': mainchain, 'Sidechain': sidechain, 'Ratio': ratio})
    area_class = fs.classifyResults(result, structure)
    print("Total : %.2f A2" % result.totalArea())
    for key in area_class:
        print(key, ": %.2f A2" % area_class[key])

    return df_sasa
import freesasa

savedData = open('SASA.txt', 'w+')
structure = freesasa.Structure("3lau.pdb")

result = freesasa.calc(
    structure,
    freesasa.Parameters({
        'algorithm': freesasa.LeeRichards,
        'n-slices': 100
    }))
print(result.nAtoms())

for i in range(1, result.nAtoms() + 1):
    details = '(' + structure.atomName(i) + ',' + str(
        result.atomArea(i)) + ' )'
    print(details)
    savedData.writelines(details + '\n')

area_classes = freesasa.classifyResults(result, structure)
print(area_classes)
print("Total : %.2f A2" % result.totalArea())
for key in area_classes:
    print(key, ": %.2f A2" % area_classes[key])
示例#9
0
def	get_DNA_H_SASA(pdb_file,csvfileout,chain=None,resids=[],seq=None,probe_radius=1.4,slicen=100,vdw_set=None,Hcontrib=[1.0]*7,n_threads=1,verbose=False):
	"""
	Function is a warapper to the FREESASA library to calculate the Surface Accessible Surface Area out
	atoms in pdb_file, then expreacts the SASA deoxiribose hydrogen atoms and sums it up
	for every nucleotide with coefficients Hcontrib.
	chain - name of the DNA chain of interest in pdb_file, if chain has no name leave blank ('')
	resids - a list of resids to calculate H-SASA values.
	seq - seqeunce of the DNA strand, string or biopython Seq object.
	Hcontrib - coefficients for individual SASA of deoxyribose hydrogens for summing them up into H-SASA profile,
		order [H1' H2' H2'' H3' H4' H5' H5'']
	Note: chain, resids, seq, Hcontrib - can be also a list of two or more instances,
			to make calculation for several chains, spans of resids or combinations of Hcontrib at once.
			In this case number of elements in chain, resids, Hcontrib should be the same,
			and the algorithm will iterate through all list simultaneously (i.e. no combination will be tried).
			Chains should be of the same length.
	probe_radius - size of probe to roll.
	slicen - number of slices per atom, controls precision of the calculation.
	vdw_set - seleting the set of VdW radii:
		None - default for FREESASA used
		charmm36-rmin - rmin from charmm36 forcefield
		abmer10-rmin - rmin from AMBER10 forcefield


	Return
	--------
	CSV file csvfileout with columns of H-SASA profiles along the sequence.
	"""
	chain=[chain] if isinstance(chain,basestring) else list(chain)
	if len(chain)>1:
		assert len(chain)==len(resids)
		assert len(chain)==len(seq)
		assert len(chain)==len(Hcontrib)
	else:
		resids=[resids]
		seq=[seq]
		Hcontrib=[Hcontrib]

	if not verbose:
		freesasa.setVerbosity(freesasa.nowarnings)
	hatoms=['H1\'','H2\'','H2\'','H3\'','H4\'','H5\'','H5\'\'']

	if vdw_set=='charmm36-rmin':
		#Open config from package in a tricky way, independent of package installation mode
		temp2 = tempfile.NamedTemporaryFile(delete=False)
		conffile = pkgutil.get_data('hydroid', 'pkgdata/charmm36_rmin.config')
		temp2.write(conffile)
		temp2.seek(0)
		temp2.close()
		classifier = freesasa.Classifier(temp2.name)
		os.remove(temp2.name)
		####
		structure = freesasa.Structure(pdb_file,classifier, options={'hydrogen' : True,'hetatm' : True})
	elif vdw_set=='amber10-rmin':
		#Open config from package in a tricky way, independent of package installation mode
		temp2 = tempfile.NamedTemporaryFile(delete=False)
		conffile = pkgutil.get_data('hydroid', 'pkgdata/amber10_rmin.config')
		temp2.write(conffile)
		temp2.seek(0)
		temp2.close()

		classifier = freesasa.Classifier(temp2.name)
		os.remove(temp2.name)
		
		####
		structure = freesasa.Structure(pdb_file,classifier, options={'hydrogen' : True,'hetatm' : True})
	else:
		structure = freesasa.Structure(pdb_file, options={'hydrogen' : True,'hetatm' : True})
	print "Launching FreeSASA calculation..."
	result = freesasa.calc(structure,freesasa.Parameters({'algorithm' : freesasa.LeeRichards,'n-slices' : slicen,'probe-radius':probe_radius,'n-threads':n_threads}))
	# result = freesasa.calc(structure,freesasa.Parameters({'algorithm' : freesasa.ShrakeRupley,'n-slices' : slicen,'n-threads':n_threads}))
	print "Calculation done"
	
	print "Extracting SASA values ..."
	
	res=dict()
	
	for ch,rids,Hcont,i in zip(chain,resids,Hcontrib,range(len(chain))):
		res[i]=pd.Series()

		if (np.array(Hcont)==1.0).all():
		#simplified procedure, we can do it faster: we need to calculate all H-SASA at once
			sels=[]
			for resid in rids:
				if len(ch)>0:
					sels.append('%d,(chain %s) and (resi %s%d) and (name %s)'%(resid, ch,'\\' if resid<0 else '', resid, '+'.join(hatoms)))
				else:
					sels.append('%d,(resi %s%d) and (name %s)'%(resid,'\\' if resid<0 else '', resid, '+'.join(hatoms)))
			selections = freesasa.selectArea(sels,structure, result)
			res[i]=res[i].add(pd.Series(selections)*1.0,fill_value=0)
		else:
		#regular procedure
			for hat,hcont in zip(hatoms,Hcont):
				sels=[]
				if hcont!=0:
					for resid in rids:
						if len(ch)>0:
							sels.append('%d,(chain %s) and (resi %s%d) and (name %s)'%(resid, ch,'\\' if resid<0 else '', resid, hat))
						else:
							sels.append('%d,(resi %s%d) and (name %s)'%(resid,'\\' if resid<0 else '', resid, hat))
				selections = freesasa.selectArea(sels,structure, result)
				res[i]=res[i].add(pd.Series(selections)*float(hcont),fill_value=0)

	for i in range(len(chain)):
		res[i].index=res[i].index.map(int)
		res[i]=res[i].sort_index()
	if len(chain)==1:
		df=pd.DataFrame({'resid':res[0].index,'Site':['%d%s'%(n,l) for n,l in zip(range(1,1+len(seq[0])),seq[0])],'H-SASA':res[0].values})
	else:
		df=pd.DataFrame()
		for ch,i in zip(chain,range(len(chain))):
			# print res[i]
			# print seq[i]
			ndf=pd.DataFrame({'resid_%d'%i:res[i].index,'Site_%d'%i:['%d%s'%(n,l) for n,l in zip(range(1,1+len(seq[i])),seq[i])],'H-SASA_%d'%i:res[i].values})
			df=pd.concat([df,ndf],axis=1)
	print "Outputting H-SASA profile to %s"%csvfileout
	df.to_csv(csvfileout)
示例#10
0
def _get_free_sasa(t, parameters=None, classifier=None, options=None):
    '''Get factions of alpha, beta and coil within a chain
    '''

    key = t[0]
    structure = t[1]
    if structure.num_chains != 1:
        raise Exception(
            "This method can only be applied to single polyer chain.")

    dsspQ8, dsspQ3 = '', ''

    groupIndex = 0
    atomIndex = 0

    freesasaStructure = freesasa.Structure()
    if (classifier is None):
        classifier = freesasa.Classifier()
    optbitfield = freesasa.Structure._get_structure_options(
        options or freesasa.Structure.defaultOptions)

    for i in range(0, structure.num_models):
        print("model: " + str(i + 1))

        for j in range(0, structure.chains_per_model[i]):
            chainName = structure.chain_name_list[chainIndex]
            chainId = structure.chain_id_list[chainIndex]
            groups = structure.groups_per_chain[chainIndex]

            entityType = structure.entity_list[
                chainToEntityIndex[chainIndex]]["type"]

            #if not entityType == "polymer": continue

            prev_coords = None
            coords = None
            for k in range(0, structure.groups_per_chain[chainIndex]):
                groupId = structure.group_id_list[groupIndex]
                insertionCode = structure.ins_code_list[groupIndex]
                secStruct = structure.sec_struct_list[groupIndex]
                seqIndex = structure.sequence_index_list[groupIndex]

                groupType = structure.group_type_list[groupIndex]
                groupName = structure.group_list[groupType]["groupName"]

                for i, name in enumerate(
                        structure.group_list[groupType]["atomNameList"]):
                    if (classifier.classify(groupName, name) is 'Unknown'):
                        if (optbitfield & freesasa.FREESASA_SKIP_UNKNOWN):
                            continue
                        if (optbitfield & freesasa.FREESASA_HALT_AT_UNKNOWN):
                            raise Exception("Halting at unknown atom")

                    freesasaStructure.addAtom(
                        name, groupName, seqIndex, chainName,
                        structure.x_coord_list[atomIndex + i],
                        structure.y_coord_list[atomIndex + i],
                        structure.z_coord_list[atomIndex + i])

                atomIndex += len(
                    structure.group_list[groupType]["atomNameList"])
                groupIndex += 1

    freesasaStructure.setRadiiWithClassifier(classifier)
    freesasaResult = freesasa.calc(freesasaStructure, parameters
                                   or freesasa.Parameters())
    sasa_classes = classifyResults(freesasaResult, freesasaStructure,
                                   classifier)

    return Row([key, sasa_classes.totalArea])
示例#11
0
    def run_freesasa_custom(self, npoints, verbose=False):
        c = fs.Classifier(self.database_classifier)
        #print(classifier_path)
        structure = fs.Structure(
            self.pdb_path,
            c,
            ({
                'hetatm': False,  # False: skip HETATM
                # True: include HETATM
                'hydrogen': True,  # False: ignore hydrogens
                # True: include hydrogens
                'join-models': False,  # False: Only use the first MODEL
                # True: Include all MODELs
                'skip-unknown': False,  # False: Guess radius for unknown atoms
                #     based on element
                # True: Skip unknown atoms
                'halt-at-unknown':
                False  # False: set radius for unknown atoms,
                #    that can not be guessed to 0.
                # True: Throw exception on unknown atoms.
            }))

        #result =fs.calc(structure,fs.Parameters({'algorithm' : fs.ShrakeRupley,
        #                                            'probe-radius' : 1.4,
        #                                            'n-points' : 1000}))
        result = fs.calc(
            structure,
            fs.Parameters({
                'algorithm': fs.LeeRichards,
                'probe-radius': 1.4,
                'n-slices': npoints
            }))

        area_prot = result.totalArea()
        #energy_prot=result.totalArea()*g+b

        structureArray = fs.structureArray(self.pdb_path, {
            'separate-chains': True,
            'hydrogen': True,
            'separate-models': False
        }, c)
        #if verbose: print(structureArray)
        #en_list=[]
        area_list = []
        for model in structureArray:
            #print(dir(model))
            #result = fs.calc(model,fs.Parameters({'algorithm' : fs.ShrakeRupley,
            #                                        'probe-radius' : 1.4,
            #                                        'n-points' : 1000}))
            result = fs.calc(
                model,
                fs.Parameters({
                    'algorithm': fs.LeeRichards,
                    'probe-radius': 1.4,
                    'n-slices': npoints
                }))
            #energy=result.totalArea()*g+b
            area = result.totalArea()
            #print(model.chainLabel(1) ,area,'En:',energy)
            area_list.append(area)

        #area_monA, area_monB = area_list
        areas = [area_prot, area_list[0], area_list[1]]

        return areas
示例#12
0
def getAtomSASA(structure,
                classifier=None,
                probe_radius=1.4,
                mi=0,
                feature_name="sasa",
                binary=False,
                threshold=1.0,
                bonds=None,
                impute_hydrogens=False,
                include_hydrogens=False,
                **kwargs):

    if classifier is None:
        # initialize new classifier
        classifier = Radius(**kwargs)

    options = {
        'hydrogen': include_hydrogens,
        'hetatm': False,
        'join-models': False,
        'skip-unknown': False,
        'halt-at-unknown': False
    }
    freesasa_structure = getFreeSASAStructureFromModel(structure,
                                                       options,
                                                       classifier=classifier)
    SASA = freesasa.calc(freesasa_structure,
                         freesasa.Parameters({"probe-radius": probe_radius}))

    # get atom SASA
    N = freesasa_structure.nAtoms()
    for i in range(N):
        sasa = SASA.atomArea(i)
        resi = freesasa_structure.residueNumber(i).strip()
        cid = freesasa_structure.chainLabel(i).strip()
        if resi[-1].isdigit():
            ins = " "
        else:
            ins = resi[-1]
            resi = resi[:-1]
        aname = freesasa_structure.atomName(i).strip()
        if binary:
            sasa = int(sasa > threshold)

        if structure.get_level() == "S":
            structure[mi][cid][(' ', int(resi),
                                ins)][aname].xtra[feature_name] = sasa
        else:
            structure[cid][(' ', int(resi),
                            ins)][aname].xtra[feature_name] = sasa

    # use parent atom as hydrogen sasa value if we include them
    if impute_hydrogens:
        if bonds is None:
            # use default bond data
            bonds = data.covalent_bond_data

        for residue in structure.get_residues():
            resn = residue.get_resname().strip()
            for atom in residue:
                if atom.element != 'H':
                    continue
                aname = atom.get_name().strip()
                parent_atom = bonds[resn][aname]['bonded_atoms'][0]
                if (parent_atom in residue) and (feature_name
                                                 in residue[parent_atom].xtra):
                    atom.xtra[feature_name] = residue[parent_atom].xtra[
                        feature_name]

    return feature_name