示例#1
0
def main(): #programm, mis kysib valgu fasta faili ja annab selle kohta parameetrid
    fasta = input()
    sequence = read_fasta(fasta)
    print(sequence)
    analysed_seq = ProteinAnalysis(str(sequence))
    print("\n","Molekulaarmass:",analysed_seq.molecular_weight())
    print("\n","Aminohapete arv:",analysed_seq.count_amino_acids())
    print("\n","Isoelektriline punkt:",analysed_seq.isoelectric_point())
    text_file = open("Valgu_parameetrid.txt", "w")
    text_file.write(str(analysed_seq.molecular_weight()))
    text_file.write("\n")
    text_file.write(str(analysed_seq.count_amino_acids()))
    text_file.write("\n")
    text_file.write(str(analysed_seq.isoelectric_point()))
    text_file.close()
示例#2
0
def protParam(seq):
    params = ProteinAnalysis(seq)
    mw = params.molecular_weight()
    c_aa = params.count_amino_acids()
    p_aa = params.get_amino_acids_percent()
    gravy = params.gravy()
    aromaticity = params.aromaticity()
    isoelectric_point = params.isoelectric_point()
    ext_coeff = sum([c_aa["W"]*5690,c_aa["Y"]*1280,c_aa["C"]*120])
    mgml = ext_coeff * (1./mw)
    
    print("Amino acid count")
    pprint.pprint(c_aa)
    print("Amino acid percent")
    pprint.pprint(p_aa)
    print("Molecular weight")
    print("%f Da"%mw)
    print("Gravy")
    print(gravy)
    print("Isoelectric point")
    print(isoelectric_point)
    print("Aromaticity")
    print(aromaticity)
    print("Extinction coefficient: %d M-1cm-1 (Assuming reduced)"%ext_coeff)
    print("")
def get_protein_analysis(aa):
    protein_analysis = ProteinAnalysis(aa)
    analyze = [protein_analysis.molecular_weight(), 
        protein_analysis.aromaticity(),
        protein_analysis.instability_index(),
        protein_analysis.isoelectric_point(),
        protein_analysis.gravy()] + list(
        protein_analysis.secondary_structure_fraction())
    return analyze
示例#4
0
def draw_sequence(sequence, mode = 'simple', alphabet = None):
        
    if mode == 'protparams':
        returndiv = DIV()
        from Bio.SeqUtils.ProtParam import ProteinAnalysis
        seq_div=DIV(_style='font-family:monospace',_class='raw-sequence')
        spacer=len(str(len(sequence)))+1
        for i,pos in enumerate(sequence):
            if i==0:
                seq_div.append(XML((str(i+1)+' ').rjust(spacer).replace(' ',' ')))
            if i%10==0 and i!=0:
                seq_div.append(' ')
            if i%60==0 and i!=0:
                seq_div.append(XML((str(i)).ljust(spacer).replace(' ',' ')))
                seq_div.append(BR())
                seq_div.append(XML((str(i+1)+' ').rjust(spacer).replace(' ',' ')))
            seq_div.append(SPAN(pos,_class='seq-position',_title = i+1))
        returndiv.append(seq_div)
        returndiv.append(H3('Protein Parameters'))
        params_table = TABLE(_style= "width:200px;")
        
        protpar=ProteinAnalysis(sequence)
        params_table.append(TR(SPAN('Length:',_class = 'line-header'), '%i aa'%len(sequence)))
        try:
            params_table.append(TR(SPAN('MW:',_class = 'line-header'), '%i KDa'%round(protpar.molecular_weight()/1000,0)))
        except KeyError:
            pass
        try:
            params_table.append(TR(SPAN('pI:',_class = 'line-header'), '%1.2f'%protpar.isoelectric_point()))
        except KeyError:
            pass
        returndiv.append(params_table)
        return returndiv
        
    if mode == 'simple':
        seq_div=DIV(_style='font-family:monospace',_class='raw-sequence')
        spacer=len(str(len(sequence)))+1
        for i,pos in enumerate(sequence):
            if i==0:
                seq_div.append(XML((str(i+1)+' ').rjust(spacer).replace(' ',' ')))
            if i%10==0 and i!=0:
                seq_div.append(' ')
            if i%60==0 and i!=0:
                seq_div.append(XML((str(i)).ljust(spacer).replace(' ',' ')))
                seq_div.append(BR())
                seq_div.append(XML((str(i+1)+' ').rjust(spacer).replace(' ',' ')))
            seq_div.append(SPAN(pos,_class='seq-position', _title = i+1))
        return seq_div
示例#5
0
def properties(toxin_faa,antitoxin_faa,out):

    # Build a dictionary of {locus:[{properties:values},{properties:values}]}
    from collections import defaultdict
    loci = defaultdict(list)
    from Bio import SeqIO
    for f in [toxin_faa,antitoxin_faa]:
        # Parse FASTA files
        with open(f,'rU') as handle:
            for record in SeqIO.parse(handle,'fasta'):
                locus,start = getNameAndPosition(record)
                if not start:
                    continue
                aaseq = str(record.seq).strip("*")
                # Omit sequences with missing positions or premature stops
                # give them 0 as flag for missing data instead
                if "*" not in aaseq and "X" not in aaseq:
                    data = ProteinAnalysis(aaseq)
                    loci[locus].append({ 'start':  start,
                                         'pI':     data.isoelectric_point(),
                                         'weight': data.molecular_weight(),
                                         'instability': data.instability_index() })
                else:
                    loci[locus].append({ 'start': start,
                                         'pI': 0, 'weight':0 ,
                                         'instability': 0 })

        
    # Order genes in a locus positionally
    loci = orderPairs(loci)

    # Write to output fil
    outfile = ".".join([out,"properties","txt"])
    with open(outfile,'w') as o:
        header = "\t".join(["locus",
                            "gene1_pI","gene2_pI",
                            "gene1_weight","gene2_weight",
                            "gene1_instability","gene2_instability" ])

        o.write("#"+ header.upper() + "\n")
        for locus, gene in loci.iteritems():
            if len(gene) != 2:
                continue
            line = map(str, [ locus,gene[0]['pI'],gene[1]['pI'],
                              gene[0]['weight'],gene[1]['weight'],
                              gene[0]['instability'],gene[1]['instability'] ])
            o.write("\t".join(line)+"\n")
    return outfile
def main():
	ieps = []
	seqid = []
	inputfile = "/isi/olga/xin/Halophile_project/output/20160421/SS37_aa.faa"
	outputfile = "/isi/olga/xin/Halophile_project/output/20160421/SS37_reads_isp.txt"
	f = open(inputfile, 'rU')
	sequences = SeqIO.parse(f, "fasta")
	for record in sequences:
		seqid.append(record.id)
		seq = str(record.seq)
		seq_pa = ProteinAnalysis(seq)
		ie = seq_pa.isoelectric_point() 
		ieps.append(ie)
	read_ieps = np.column_stack((seqid, ieps))
	df = pd.DataFrame(read_ieps)
	df.to_csv(outputfile, sep = '\t', header = False)
示例#7
0
    def __init__(self, sequence):
        self.sequence = sequence
        self.sequence_length = len(sequence)
        analysis = ProteinAnalysis(sequence)

        self.amino_acid_percents = analysis.get_amino_acids_percent()
        self.amino_acids_composition = calculate_amino_acids_composition(sequence)
        self.aromaticity = analysis.aromaticity()
        self.instability = analysis.instability_index()
        self.flexibility = calculate_flexibility(sequence)
        protein_scale_parameters = [{'name': 'Hydrophilicity', 'dictionary': hw},
                                    {'name': 'Surface accessibility', 'dictionary': em},
                                    {'name': 'Janin Interior to surface transfer energy scale', 'dictionary': ja},
                                    {'name': 'Bulkiness', 'dictionary': bulkiness},
                                    {'name': 'Polarity', 'dictionary': polarity},
                                    {'name': 'Buried residues', 'dictionary': buried_residues},
                                    {'name': 'Average area buried', 'dictionary': average_area_buried},
                                    {'name': 'Retention time', 'dictionary': retention_time}]
        self.protein_scales = calculate_protein_scales(analysis, protein_scale_parameters)
        self.isoelectric_point = analysis.isoelectric_point()
        self.secondary_structure_fraction = calculate_secondary_structure_fraction(analysis)
        self.molecular_weight = analysis.molecular_weight()
        self.kyte_plot = analysis.gravy()
        self.pefing = calculate_pefing(sequence)

        # next parameters are calculated using R.Peptides
        r('require(Peptides)')
        r('sequence = "{0}"'.format(sequence))
        self.aliphatic_index = r('aindex(sequence)')[0]
        self.boman_index = r('boman(sequence)')[0]
        self.charges = calculate_charges(sequence, 1.0, 14.0, 0.5, 'Lehninger')
        self.hydrophobicity = r('seq(sequence)')[0]
        angles = [{'name': 'Alpha-helix', 'angle': -47},
                  {'name': '3-10-helix', 'angle': -26},
                  {'name': 'Pi-helix', 'angle': -80},
                  {'name': 'Omega', 'angle': 180},
                  {'name': 'Antiparallel beta-sheet', 'angle': 135},
                  {'name': 'Parallel beta-sheet', 'angle': 113}]
        if self.amino_acid_percents['P'] + self.amino_acid_percents['G'] > 0.3:
            angles.append({'name': 'Polygly-polypro helix', 'angle': 153})
        self.hydrophobic_moments = calculate_hydrophobic_moments(sequence, angles)
        self.kidera_factors = calculate_kidera_factors(sequence)
        self.peptide_types = calculate_peptide_types(sequence, angles)
示例#8
0
def protein_analysis():
    if session.username == None: redirect(URL(r=request,f='../account/log_in'))
    from Bio.SeqUtils.ProtParam import ProteinAnalysis
    form = FORM(TABLE(
            TR("Amino acid sequence:  ",
               TEXTAREA(_type="text", _name="sequence",
                        requires=IS_NOT_EMPTY())),
            INPUT(_type="submit", _value="SUBMIT")))
    if form.accepts(request.vars,session):
        session['sequence'] = seqClean(form.vars.sequence.upper())
        X = ProteinAnalysis(session['sequence'])
        session['aa_count'] = X.count_amino_acids()
        session['percent_aa'] = X.get_amino_acids_percent()
        session['mw'] = X.molecular_weight()
        session['aromaticity'] = X.aromaticity()
        session['instability'] = X.instability_index()
        session['flexibility'] = X.flexibility()
        session['pI'] = X.isoelectric_point()
        session['sec_struct'] = X.secondary_structure_fraction()
        redirect(URL(r=request, f='protein_analysis_output'))
    return dict(form=form)
示例#9
0
class Peptide(PolyIon):
    """Peptide represents single protein chains in solution.

    Peptides properties are based entirely on analysis of the sequence of the
    peptide.
    """

    _state = {'name': 'Name of the peptide.',
              'sequence': 'Amino acid sequence of the peptide.'
              }

    _sequence = None
    _analysis = None

    # TODO: move h to function or constants. Unify with pitts?
    _h_max = 1
    _h_min = 2./3.
    _h = 5./6.

    def __init__(self, name=None, sequence=None):
        self._name = name
        self._sequence = sequence
        self._analysis = ProteinAnalysis(str(self.sequence))

    @property
    def molecular_weight(self):
        return SeqUtils.molecular_weight(self.sequence, 'protein')

    def charge(self, pH=None, ionic_strength=None, temperature=None,
               moment=1):
        """Return the time-averaged charge of the peptide.

        :param pH
        :param ionic_strength
        :param temperature
        """
        pH, ionic_strength, temperature = \
            self._resolve_context(pH, ionic_strength, temperature)

        amino_acid_count = self._analysis.count_amino_acids()

        pos_pKs = dict(positive_pKs)
        neg_pKs = dict(negative_pKs)

        nterm = self.sequence[0]
        cterm = self.sequence[-1]

        if nterm in pKnterminal:
            pos_pKs['Nterm'] = pKnterminal[nterm]
        if cterm in pKcterminal:
            neg_pKs['Cterm'] = pKcterminal[cterm]

        charge = IsoelectricPoint(self.sequence,
                                  amino_acid_count)._chargeR(pH,
                                                             pos_pKs,
                                                             neg_pKs)
        return charge**moment

    def isoelectric_point(self, ionic_strength=None, temperature=None):
        """Return the isoelectric point of the peptide."""
        # _, ionic_strength, temperature = \
        #     self._resolve_context(None, ionic_strength, temperature)
        return self._analysis.isoelectric_point()

    def volume(self):
        """Return the approximate volume of the folded peptide in m^3."""
        v = self.molecular_weight / avogadro / self.density() / lpm3 / gpkg
        return v

    def radius(self):
        """Return the approximate radius of the folded peptide in m."""
        return (self.volume() * 3. / 4. / pi) ** (1. / 3.)

    def density(self):
        """Return the approximate density of the folded peptide in kg/L."""
        return 1.410 + 0.145 * exp(-self.molecular_weight / 13.)

    def mobility(self, pH=None, ionic_strength=None, temperature=None):
        """Return the effective mobility of the ion in m^2/V/s.

        If a context solution is available, mobility uses the full Onsager-Fuoss
        correction to mobility. Otherwise, the Robinson-Stokes model is used.

        :param pH
        :param ionic_strength
        :param temperature
        """
        pH, ionic_strength, temperature = \
            self._resolve_context(pH, ionic_strength, temperature)

        mobility = self.charge(pH) * elementary_charge /\
            (6 * pi * self._solvent.viscosity(temperature) * self.radius() *
             (1 + self.radius() /
              self._solvent.debye(ionic_strength, temperature)
              )
             ) * self._h
        return mobility
示例#10
0
from Bio.SeqUtils.ProtParam import ProteinAnalysis
from Bio.SeqUtils import ProtParamData
import sys
import json

inp = json.loads(sys.argv[1])

seq = inp["Sequence"]

X = ProteinAnalysis(seq)

data = dict()

if "MW" in inp["Options"]:
	data["MW"] = X.molecular_weight()

if "EC280" in inp["Options"]:
	aa_count = X.count_amino_acids()
	if "hasDisulfide" in inp["Options"]:
		data["EC280"] = 1490 * aa_count["Y"] + 5500 * aa_count["W"] + 62.5 * aa_count["C"]
	else:
		data["EC280"] = 1490 * aa_count["Y"] + 5500 * aa_count["W"]

if "PI" in inp["Options"]:
	data["PI"] = X.isoelectric_point()

if "AACont" in inp["Options"]:
	ratios = X.get_amino_acids_percent()
	data["AACont"] = {aa: ratios[aa] * 100. for aa in ratios}

print json.dumps(data)
示例#11
0
#!/usr/bin/env python

import sys
from Bio import SeqIO
from Bio.SeqUtils.ProtParam import ProteinAnalysis

sys.stdout.write(
    "ID\tMW\tIP\tgravy\tlength\tinstability\tmonoisotpoic\tSequence\n")

for record in SeqIO.parse(sys.stdin, "fasta"):
    a = ProteinAnalysis(str(record.seq))

    properties = list()
    properties.append(record.id)
    properties.append(a.molecular_weight())
    properties.append(a.isoelectric_point())
    properties.append(a.gravy())
    properties.append(a.length)
    properties.append(a.instability_index())
    properties.append(a.aromaticity())
    # always last column to make the output more readable
    properties.append(a.sequence)
    sys.stdout.write('\t'.join(map(str, properties)) + "\n")
示例#12
0
from Bio.SeqUtils.ProtParam import ProteinAnalysis
from Bio.SeqUtils import ProtParamData
from Bio import SeqIO
with open('../../samples/pdbaa') as fh:
   for rec in SeqIO.parse(fh,'fasta'):
       myprot = ProteinAnalysis(str(rec.seq))
       print(myprot.count_amino_acids())
       print(myprot.get_amino_acids_percent())
       print(myprot.molecular_weight())
       print(myprot.aromaticity())
       print(myprot.instability_index())
       print(myprot.flexibility())
       print(myprot.isoelectric_point())
       print(myprot.secondary_structure_fraction())
       print(myprot.protein_scale(ProtParamData.kd, 9, .4))
def feature_extractor(result,
                      sequence,
                      simplified_sequence,
                      suffix,
                      bipeptide=False,
                      secondary_struct=True,
                      side_charge=True,
                      peptide=True,
                      aromaticity=True,
                      instability=True,
                      average_h=True,
                      side_charge_ave=True,
                      gravy=False):

    if (peptide):
        for acid in AMINO_ACIDS:
            result["{}_composition_{}".format(acid,
                                              suffix)] = aminoacid_composition(
                                                  sequence, acid)

    if (bipeptide):
        for acid1 in AMINO_ACIDS:
            for acid2 in AMINO_ACIDS:
                result["{}{}_composition_{}".format(
                    acid1, acid2,
                    suffix)] = aminoacid_composition(sequence, acid1 + acid2)

    if (average_h):
        result["hydrophobicity_{}".format(suffix)] = average_hydrophobicity(
            sequence)

    result["total_positive_hydrophobicity_{}".format(
        suffix)] = total_hydrophobicity(sequence, "positive")
    result["total_negative_hydrophobicity_{}".format(
        suffix)] = total_hydrophobicity(sequence, "negative")

    if (side_charge):
        if (side_charge_ave):
            result["side_chain_charge_{}".format(
                suffix)] = average_side_chain_charge(sequence)
        result["total_positive_charge_{}".format(suffix)] = total_charge(
            sequence, "positive")
        result["total_negative_charge_{}".format(suffix)] = total_charge(
            sequence, "negative")

    tools = ProteinAnalysis(sequence)
    tools_simplified_sequence = ProteinAnalysis(simplified_sequence)

    if (secondary_struct):
        helix, turn, sheet = tools.secondary_structure_fraction()
        result["helix_{}".format(suffix)] = helix
        result["turn_{}".format(suffix)] = turn
        result["sheet_{}".format(suffix)] = sheet

    if (aromaticity):
        result["aromaticity_{}".format(suffix)] = tools.aromaticity()
    result["isoelectric_point_{}".format(suffix)] = tools.isoelectric_point()

    result["molecular_weight_{}".format(
        suffix)] = tools_simplified_sequence.molecular_weight()

    if (instability):
        result["instability_index_{}".format(
            suffix)] = tools_simplified_sequence.instability_index()

    if (gravy):
        result["gravy_{}".format(suffix)] = tools_simplified_sequence.gravy()

    return result
from Bio.SeqUtils.ProtParam import ProteinAnalysis
from Bio import SeqIO
import sys
handle = open(sys.argv[1], 'rU')
records = list(SeqIO.parse(handle, "fasta"))
for record in records:
    prot = ProteinAnalysis(str(record.seq))
    print prot.isoelectric_point()
示例#15
0
def feature_extraction(outdir=os.getcwd()):
    path_ = outdir
    _ = (os.path.join(path_,
                      "data/data.txt"))  # path to input data (sequence) folder

    print('Reading data...')
    if os.path.exists(_):
        data = pd.read_table(_)  #read the file as Pandas DataFrame
    print('Clearing existing files...')
    shutil.rmtree(path_ + "/data")

    seq_list, cls_list = data['sequence'].tolist(), data['class'].tolist(
    )  # get the sequence and class to lists

    pth = path_ + '/output/'
    if not os.path.exists(pth): os.makedirs(pth)

    #try:[os.remove(filenames[0]+x) for filenames in os.walk(pth) for x in (filenames[2])]		# remove the file if already exist
    #except Exception:pass
    p = '/config/attrib'
    filepath = pkg_resources.resource_filename(__name__, p)
    attr = open(filepath, "rb")

    attr = pickle.load(
        attr)  # load the pickle file with attribue names (for weka)
    with open(pth + "/weka_output.arff", "a+") as wk:
        wk.write("".join('{}\n'.format(x) for x in attr))

    def format_output(
        aa_count, cnt
    ):  # write the extracted feature values to arff (weka), txt(svm) and csv file
        a = (dict(zip(it.count(), list(aa_count.values()))))
        if cnt == 1:
            with open(pth + "svm_out.txt", "a+") as s:
                s.write("+1 " + ' '.join("{}:{}".format(k, v)
                                         for k, v in a.items()) + "\n")
            with open(pth + "weka_output.arff", "a+") as w:
                w.write(' '.join("{},".format(x)
                                 for x in list(aa_count.values())) + " serk\n")
            with open(pth + "tain_DL.csv", "a+") as DPL:
                DPL.write(''.join("{},".format(x)
                                  for x in list(aa_count.values())) +
                          str(round(aromat, 3)) + "," +
                          str(round(fraction[0], 3)) + "," +
                          str(round(fraction[1], 3)) + "," +
                          str(round(fraction[2], 3)) + "," +
                          str(round(iso, 3)) + "," + str(mol_w) + "," +
                          str(ins) + "," + str(cnt) + "\n")
        else:
            with open(path_ + "svm_out.txt", "a+") as s:
                s.write("-1 " + ' '.join("{}:{}".format(k, v)
                                         for k, v in a.items()) + "\n")
            with open(pth + "weka_output.arff", "a+") as w:
                w.write(' '.join("{},".format(x)
                                 for x in list(aa_count.values())) + " loc\n")
            with open(pth + "tain_DL.csv", "a+") as DPL:
                DPL.write(''.join("{},".format(x)
                                  for x in list(aa_count.values())) +
                          str(round(aromat, 3)) + "," +
                          str(round(fraction[0], 3)) + "," +
                          str(round(fraction[1], 3)) + "," +
                          str(round(fraction[2], 3)) + "," +
                          str(round(iso, 3)) + "," + str(mol_w) + "," +
                          str(ins) + "," + "0" + "\n")

    for seq, cl in zip(seq_list,
                       cls_list):  # main loop to extract the features
        _ = ProteinAnalysis(seq)  # Biopython protein analysis package
        aa_count = (_.count_amino_acids())  # amino acid count
        aromat, fraction, iso = _.aromaticity(
        ), _.secondary_structure_fraction(), _.isoelectric_point()
        try:
            mol_w, ins = ("%0.2f" %
                          _.molecular_weight()), ("%0.2f" %
                                                  _.instability_index())
        except Exception:
            mol_w, ins = mol_w, ins  # aromaticity, sec_strucure_fraction, iso_electric point , molecular weight, instability index
        format_output(aa_count, cl)
    print("Feature extraction complete...")
    print("Extracted features are saved in" + outdir +
          "/ directory in .txt, .arff and .csv formats")
示例#16
0
 def get_isoelectric_point(self):
     from Bio.SeqUtils.ProtParam import ProteinAnalysis
     analysis = ProteinAnalysis(self.seq)
     return analysis.isoelectric_point()
    if os.path.isfile(os.path.join(path, entry)):
        if entry.endswith('.pdb'):
            areas = get_area_classes(entry)
            polar_area.append(areas[0])
            apolar_area.append(areas[1])
            total_area.append(areas[2])

for entry in os.listdir(path):
    if os.path.isfile(os.path.join(path, entry)):
        if entry.endswith('.pdb'):
            for record in SeqIO.parse(entry, "pdb-atom"):
                sequence = str(record.seq).replace('X', 'G')
                protein = ProteinAnalysis(sequence)
                p_len.append(len(sequence))
                mol_w.append(protein.molecular_weight())
                iso_p.append(protein.isoelectric_point())
                smell.append(protein.aromaticity())
                taste_factor.append(protein.gravy())
                insta_ind.append(protein.instability_index())
                helter_skeler.append(protein.secondary_structure_fraction()[0])
                turnip.append(protein.secondary_structure_fraction()[1])
                garfield.append(protein.secondary_structure_fraction()[2])
                for x in amino_acids:
                    n = protein.count_amino_acids()[x]
                    for y in d_count.keys():
                        if y[-1] == x:
                            d_count[y].append(n)
                for a in amino_acids:
                    m = protein.get_amino_acids_percent()[a]
                    for b in d_perc.keys():
                        if b[-1] == a:
示例#18
0

#print()
#print("Protein Analysis Started")
# open csv file and read in data row by rows
# process each row for pI, MW, etc
with open('xtal_2.csv') as csvfile:
    readCSV = csv.reader(csvfile, delimiter=',')
    for row in readCSV:
        pid = row[0]
        pseq = row[1].upper()
        pseq = pseq.replace('X', 'G')
        xtal = pcrystal(pid, pseq)
        xtal.label = int(row[2])
        analysed_seq = ProteinAnalysis(pseq)
        xtal.pI = analysed_seq.isoelectric_point()
        xtal.MW = analysed_seq.molecular_weight()
        #print()
        #print(pseq)
        #print(xtal.id)
        #print(xtal.seq.upper())
        #print(xtal.label)
        #print ('protein isoelectric point is %.2f' % xtal.pI)
        #print ('protein molecular weight is %.2f' % xtal.MW)
        #print ('protein length is ', xtal.size)
        #print ('percent hydrophobic is %.2f' % xtal.phobic)
        #print ('percent hydrophillic is %.2f' % xtal.phillic)
        #if (xtal.label == 0):
        #print("Crystallization Failed!")
        #else:
        #print("Protein Crystallized!")
from Bio.SeqUtils.ProtParam import ProteinAnalysis
from Bio import SeqIO
import sys
handle = open(sys.argv[1], 'rU')
records = list(SeqIO.parse(handle, "fasta"))
for record in records:
	prot = ProteinAnalysis(str(record.seq))
	print prot.isoelectric_point()
示例#20
0
#!/usr/bin/env python

# Calculating the molecular weight and isoelectric point for a FASTA file containing numerous sequences

#to keep the program for general use, argparse and sys used to allow the input pile to be user defined through the command line

import argparse
import sys

parser = argparse.ArgumentParser(
    description='Calculate mw and pi for protein sequences.')

parser.add_argument('infile',
                    nargs='?',
                    type=argparse.FileType('r'),
                    default=sys.stdin)
args = parser.parse_args()

#to read from a FASTA file with a loop over entries using SeqIO define the FASTA sequences and analyse them by ProteinAnalysis
#display the sequence names, molecular weight and isoelectric point

from Bio.SeqUtils.ProtParam import ProteinAnalysis
from Bio import SeqIO
for record in SeqIO.parse(args.infile, "fasta"):
    seq = str(record.seq)
    my_prot = ProteinAnalysis(seq)
    print '{}\t {}\t {}'.format(record.id, my_prot.molecular_weight(),
                                my_prot.isoelectric_point())
def get_biopython_features(X):
    res = np.zeros((X.shape[0], 6))
    for i,seq in enumerate(X):
        analysed_seq = ProteinAnalysis(seq)
        res[i] = np.array([analysed_seq.molecular_weight()]+[analysed_seq.instability_index()] + [analysed_seq.isoelectric_point()] + list(analysed_seq.secondary_structure_fraction()))
        
    return res
示例#22
0
     handle,
     "fasta",
     alphabet=IUPAC.protein
 ):
     start_time = time.time()
     record_list = record.description.split("|")
     # get meta data
     acc_code = record_list[0]
     organism = record_list[1]
     EC_code = record_list[2].replace("__", " ")
     species = record_list[3].replace("__", " ")
     note = record_list[4]
     # get unmodified pI
     seq = record.seq
     seq_obj = ProteinAnalysis(''.join(seq))
     pi = seq_obj.isoelectric_point()
     count_sequences_done += 1
     modifier = '0'
     if pi < param_dict['cutoff_pi']:
         category = '0'
     else:
         category = '1'
     # output to CSV
     with open(param_dict['out_CSV_pi'], 'a') as f:
         string = file+','
         string += acc_code+','
         string += organism+','
         string += EC_code+','
         string += species+','
         string += note+','
         string += '{0:.2f}'.format(pi)+','
示例#23
0
文件: tools.py 项目: Ravasz/ed
def iso_e(protS):
    """return the isoelectric point of protS string protein sequence"""
    from Bio.SeqUtils.ProtParam import ProteinAnalysis

    protA = ProteinAnalysis(protS)
    return protA.isoelectric_point()
示例#24
0
y_axis = []
x_axis = data_mwt


for record in SeqIO.parse(seq_file, "fasta"):      #for record in SeqIO.parse(seq_file, "fasta"):
    temp_seq=str(record.seq)
    analysis_seq=ProteinAnalysis(temp_seq)
    if ("ribosomal protein" in record.description or "ribosomal subunit" in record.description):
    #if ("ribosomal protein" in record.description or "ribosomal subunit" in record.description or "Ribosomal" in record.description):
        
        if (analysis_seq.molecular_weight() < 20000):
            data_mwt.append('%.2f'%(analysis_seq.molecular_weight()))
            y_axis.append(1)
            
            text_out.setTextColor(QColor('blue'))
            text_out.append(str(len(data_mwt)) + "," + record.description + "," + '%.2f'%(analysis_seq.molecular_weight()) + "," + '%.2f'%(analysis_seq.isoelectric_point()))
            
            
            
        
        #new=sorted(data_mwt)
        #data_mwt.append(list(zip(['%.2f'%(analysis_seq.molecular_weight())])))   
        #print(record.description + "  =  " + '%.2f'%(analysis_seq.molecular_weight()))
        
        csv_write = csv.writer(output)
        #row_wise = zip([record.description],['%.2f'%(analysis_seq.molecular_weight())],['%.2f'%(analysis_seq.isoelectric_point())])
        #data_mwt.append(analysis_seq.molecular_weight())
        row_wise = zip(['%.2f'%(analysis_seq.molecular_weight())],['%.2f'%(analysis_seq.isoelectric_point())])
        for row in row_wise:
            csv_write.writerow(row)
        #csv_write.writerow([record.description + '%.2f'%(analysis_seq.molecular_weight())])
#!/usr/bin/env python

import sys
from Bio import SeqIO
from Bio.SeqUtils.ProtParam import ProteinAnalysis

sys.stdout.write("ID\tMW\tIP\tgravy\tlength\tinstability\tmonoisotpoic\tSequence\n")

for record in SeqIO.parse(sys.stdin, "fasta"):
    a = ProteinAnalysis(str(record.seq))

    properties = list()
    properties.append(record.id)
    properties.append(a.molecular_weight())
    properties.append(a.isoelectric_point())
    properties.append(a.gravy())
    properties.append(a.length)
    properties.append(a.instability_index())
    properties.append(a.aromaticity())
    # always last column to make the output more readable
    properties.append(a.sequence)
    sys.stdout.write( '\t'.join(map(str, properties))+"\n" )

示例#26
-1
 def calc_isoelectric_point(self) -> float:
     """
     using http://biopython.org/DIST/docs/api/Bio.SeqUtils.ProtParam-pysrc.html
     :return: calculates the sequence's isoelectric point
     """
     protein_analysis = ProteinAnalysis(self.get_seq())
     return protein_analysis.isoelectric_point()