示例#1
0
def get_aa_count_dict(seq):

    p = ProteinAnalysis(seq)

    return {
        'A': p.count_amino_acids()['A'],
        'C': p.count_amino_acids()['C'],
        'D': p.count_amino_acids()['D'],
        'E': p.count_amino_acids()['E'],
        'F': p.count_amino_acids()['F'],
        'G': p.count_amino_acids()['G'],
        'H': p.count_amino_acids()['H'],
        'I': p.count_amino_acids()['I'],
        'K': p.count_amino_acids()['K'],
        'L': p.count_amino_acids()['L'],
        'M': p.count_amino_acids()['M'],
        'N': p.count_amino_acids()['N'],
        'P': p.count_amino_acids()['P'],
        'Q': p.count_amino_acids()['Q'],
        'R': p.count_amino_acids()['R'],
        'S': p.count_amino_acids()['S'],
        'T': p.count_amino_acids()['T'],
        'V': p.count_amino_acids()['V'],
        'W': p.count_amino_acids()['W'],
        'Y': p.count_amino_acids()['Y'],
    }
示例#2
0
def main(): #programm, mis kysib valgu fasta faili ja annab selle kohta parameetrid
    fasta = input()
    sequence = read_fasta(fasta)
    print(sequence)
    analysed_seq = ProteinAnalysis(str(sequence))
    print("\n","Molekulaarmass:",analysed_seq.molecular_weight())
    print("\n","Aminohapete arv:",analysed_seq.count_amino_acids())
    print("\n","Isoelektriline punkt:",analysed_seq.isoelectric_point())
    text_file = open("Valgu_parameetrid.txt", "w")
    text_file.write(str(analysed_seq.molecular_weight()))
    text_file.write("\n")
    text_file.write(str(analysed_seq.count_amino_acids()))
    text_file.write("\n")
    text_file.write(str(analysed_seq.isoelectric_point()))
    text_file.close()
示例#3
0
def protparams(aa_seq, vstarts, vstops):
    """Compute a set of parameters for a polypepeptide,
    which would helps assess the potenial of this peptide as a crystalization candidate.
    """
    MWs = []
    pIs = []
    epsilons = []
    for start in vstarts:
        for stop in vstops:
            if int(start) < int(stop):
                params = PA(aa_seq[int(start):int(stop)]
                            )  # works with string or Seq objects
                MW = params.molecular_weight()
                MW = round(MW / 1000, 1)  # in kiloDalton, rounded to 1 decimal
                pI = round(params.isoelectric_point(), 1)
                # To calculate the epsilon, we use this formula from protparam (web.expasy.org/protparam)
                # Epsilon (Prot) = N(Tyr)*Ext(Tyr) + N(Trp)*Ext(Trp) + N(Cystine)*Ext(Cystine) / MW in Dalton
                aa_dict = params.count_amino_acids(
                )  # returns a dict {'aa' : count } where aa is one letter code for the aminoacid
                epsilon = round((aa_dict['Y'] * 1490 + aa_dict['W'] * 5500 +
                                 aa_dict['C'] * 125) / (MW * 1000), 2)
                MWs.append(MW)
                pIs.append(pI)
                epsilons.append(epsilon)
    return MWs, pIs, epsilons
示例#4
0
def aa_composition(seq):

    protein = ProteinAnalysis(seq)

    aa = protein.count_amino_acids()

    aacomp = 'A:\t%i,' % aa['A']
    aacomp += 'C:\t%i,' % aa['C']
    aacomp += 'E:\t%i,' % aa['E']
    aacomp += 'D:\t%i,' % aa['D']
    aacomp += 'G:\t%i,' % aa['G']
    aacomp += 'F:\t%i,' % aa['F']
    aacomp += 'I:\t%i,' % aa['I']
    aacomp += 'H:\t%i,' % aa['H']
    aacomp += 'K:\t%i,' % aa['K']
    aacomp += 'M:\t%i,' % aa['M']
    aacomp += 'L:\t%i,' % aa['L']
    aacomp += 'N:\t%i,' % aa['N']
    aacomp += 'Q:\t%i,' % aa['Q']
    aacomp += 'P:\t%i,' % aa['P']
    aacomp += 'S:\t%i,' % aa['S']
    aacomp += 'R:\t%i,' % aa['R']
    aacomp += 'T:\t%i,' % aa['T']
    aacomp += 'W:\t%i,' % aa['W']
    aacomp += 'V:\t%i,' % aa['V']
    aacomp += 'Y:\t%i,' % aa['Y']

    aacomp = aacomp.split(",")
    return aacomp
示例#5
0
def aa_comp_calc():
    peptides = [
        'A', 'G', 'P', 'S', 'T', 'C', 'F', 'W', 'Y', 'H', 'R', 'K', 'M', 'I',
        'L', 'V', 'N', 'D', 'E', 'Q'
    ]
    if not os.path.isdir(args.output):
        os.mkdir(args.output)
    with open(args.input, 'r') as infile, open(f'{args.output}/aa_comp.tsv',
                                               'w') as outfile:
        outfile.write('Taxon\t' + '\t'.join(peptides) + '\n')

        # Reads in input file
        for record in SeqIO.parse(infile, format=args.in_format):
            outfile.write(f'{record.id}\t')
            analysed_seq = ProteinAnalysis(str(record.seq))
            count_dict = analysed_seq.count_amino_acids()
            length = len(
                str(record.seq).replace("-", "").replace("X",
                                                         "").replace("*", ""))
            out_str = ''

            # Loops through peptides and checks to see if it is in count_dict
            for pep in peptides:
                if pep in count_dict.keys():
                    out_str += f'{float(count_dict[pep]) / length}\t'
                else:
                    out_str += '0\t'

            outfile.write(out_str.strip() + '\n')
示例#6
0
def protein_properties(seq):
    """Return a tuple with some protein biochemical properties

    seq is a Bio.Seq.Seq or str representing protein sequence
    """
    pa = ProteinAnalysis(seq)

    aa_counts = pa.count_amino_acids()
    arom = pa.aromaticity()
    isoelec = pa.isoelectric_point()
    try:
        instability = pa.instability_index()
    except KeyError:
        instability = None
    try:
        gravy = pa.gravy()
    except KeyError:
        gravy = None

    return ProtProp(aa=str(seq),
                    gravy=gravy,
                    aromaticity=arom,
                    isoelectric_point=isoelec,
                    instability=instability,
                    aa_counts=aa_counts)
示例#7
0
def protParam(seq):
    params = ProteinAnalysis(seq)
    mw = params.molecular_weight()
    c_aa = params.count_amino_acids()
    p_aa = params.get_amino_acids_percent()
    gravy = params.gravy()
    aromaticity = params.aromaticity()
    isoelectric_point = params.isoelectric_point()
    ext_coeff = sum([c_aa["W"] * 5690, c_aa["Y"] * 1280, c_aa["C"] * 120])
    mgml = ext_coeff * (1. / mw)

    print("Amino acid count")
    pprint.pprint(c_aa)
    print("Amino acid percent")
    pprint.pprint(p_aa)
    print("Molecular weight")
    print("%f Da" % mw)
    print("Gravy")
    print(gravy)
    print("Isoelectric point")
    print(isoelectric_point)
    print("Aromaticity")
    print(aromaticity)
    print("Extinction coefficient: %d M-1cm-1 (Assuming reduced)" % ext_coeff)
    print("")
示例#8
0
def protParam(seq):
    params = ProteinAnalysis(seq)
    mw = params.molecular_weight()
    c_aa = params.count_amino_acids()
    p_aa = params.get_amino_acids_percent()
    gravy = params.gravy()
    aromaticity = params.aromaticity()
    isoelectric_point = params.isoelectric_point()
    ext_coeff = sum([c_aa["W"]*5690,c_aa["Y"]*1280,c_aa["C"]*120])
    mgml = ext_coeff * (1./mw)
    
    print("Amino acid count")
    pprint.pprint(c_aa)
    print("Amino acid percent")
    pprint.pprint(p_aa)
    print("Molecular weight")
    print("%f Da"%mw)
    print("Gravy")
    print(gravy)
    print("Isoelectric point")
    print(isoelectric_point)
    print("Aromaticity")
    print(aromaticity)
    print("Extinction coefficient: %d M-1cm-1 (Assuming reduced)"%ext_coeff)
    print("")
示例#9
0
def protein_analysis():
    if session.username == None:
        redirect(URL(r=request, c='account', f='log_in'))
    from Bio.SeqUtils.ProtParam import ProteinAnalysis
    form = FORM(
        TABLE(
            TR(
                "Amino acid sequence:  ",
                TEXTAREA(_type="text",
                         _name="sequence",
                         requires=IS_NOT_EMPTY())),
            INPUT(_type="submit", _value="SUBMIT")))
    if form.accepts(request.vars, session):
        session['sequence'] = seqClean(form.vars.sequence.upper())
        X = ProteinAnalysis(session['sequence'])
        session['aa_count'] = X.count_amino_acids()
        session['percent_aa'] = X.get_amino_acids_percent()
        session['mw'] = X.molecular_weight()
        session['aromaticity'] = X.aromaticity()
        session['instability'] = X.instability_index()
        session['flexibility'] = X.flexibility()
        session['pI'] = X.isoelectric_point()
        session['sec_struct'] = X.secondary_structure_fraction()
        redirect(URL(r=request, f='protein_analysis_output'))
    return dict(form=form)
示例#10
0
def get_protein_features(seq):
    seq = correct(seq)
    prot_analysis = ProteinAnalysis(seq)
    prot_weight = molecular_weight(seq)
    pI = prot_analysis.isoelectric_point()
    aa_count = prot_analysis.count_amino_acids()
    neg_charged_residues = aa_count['D'] + aa_count['E']
    pos_charged_residues = aa_count['K'] + aa_count['R']
    extinction_coefficient_1 = aa_count['Y'] * 1490 + aa_count['W'] * 5500
    extinction_coefficient_2 = aa_count['Y'] * 1490 + aa_count[
        'W'] * 5500 + aa_count['C'] * 125
    instability_idx = instability_index(seq)
    gravy = hydrophobicity(seq)
    secondary_structure_fraction = [
        frac for frac in prot_analysis.secondary_structure_fraction()
    ]

    names = [
        'length', 'weight', 'pI', 'neg_charged_residues',
        'pos_charged_residues', 'extinction_coeff1', 'extinction_coeff2',
        'instability_index', 'gravy', 'helix', 'turn', 'sheet'
    ]

    return names, [
        len(seq), prot_weight, pI, neg_charged_residues, pos_charged_residues,
        extinction_coefficient_1, extinction_coefficient_2, instability_idx,
        gravy, *secondary_structure_fraction
    ]
示例#11
0
 def aminoacid(self):
     cds = self.feature()
     translatedseq = cds.translate()
     # print(translatedseq)
     protein = ProteinAnalysis(str(translatedseq))
     aminodic = protein.count_amino_acids()
     aminolist = list(aminodic.values())
     return aminodic, aminolist
示例#12
0
def processFile(iterator, output):
    ### This loop prints the protein count of all the record###
    for record in iterator:
        ### gets the sequence from the record
        thisSeq = record.seq
        '''This prevents a Biopython warning from showing up if there are incomplete codons.
            Appends an N (wildcard) to the sequence until it is divisible by 3. This is essentially what is 
            recommended in the Biopython warning message.'''
        if (len(thisSeq) % 3 != 0):
            leftoverNucCount = len(thisSeq) % 3
            for i in range(3 - leftoverNucCount):
                thisSeq = Seq((str(thisSeq) + "N"), thisSeq.alphabet)
        ## translates the record as a new amino acid/peptide sequence
        translated_sequence = thisSeq.translate()
        ### This changes the sequence to a ProteinAnalysis object, which lets us call the needed methods on it. ###
        analyzed_sequence = ProteinAnalysis(str(translated_sequence))
        output.write("Name: {0}\nDescription: {1}\nAnnotations: {2}".format(
            record.name, record.description, record.annotations))
        ### gets the amino acid count
        aminoAcidCountDictionary = formatAminoAcids(
            analyzed_sequence.count_amino_acids())
        ### prints the amino acid count!
        output.write("\n\nThis is the amino acid count of record {0}:".format(
            record.id) + "\n\n")
        ### splits the output so that each Amino Acid gets it's own line
        for aminoAcid, count in aminoAcidCountDictionary.items():
            output.write(aminoAcid + ": " + str(count) + "\n")
        ###print(aminoAcidCountDictionary)
        ### turns the sequence into  RNA
        thisSeqRNA = thisSeq.transcribe()
        analyzed_RNAsequence = ProteinAnalysis(str(thisSeqRNA.translate()))
        """ Since the RNA is the same as the DNA with the exception of one nucleotide, getting an amino acid
        count from the RNA should be the same as the amino acid count from the DNA.
        """
        output.write(
            "\n\nThis is the amino acid count of the protein sequence derived from the RNA resulting from "
            "the DNA sequence in the file. It should be the same as the previous amino acid count: \n\n"
        )
        rnaAcidCountDictionary = formatAminoAcids(
            analyzed_RNAsequence.count_amino_acids())

        for aminoAcid, count in rnaAcidCountDictionary.items():

            output.write(aminoAcid + ": " + str(count) + "\n")
        output.write('\n**************************************\n\n')
示例#13
0
def getMF(subSeq):
    listofaminoacids = []
    #Dictionary for each amino acid with atoms for each
    A = {'C':3, 'H':7, 'N':1, 'O':2, 'S':0}
    R = {'C':6, 'H':14,'N':4, 'O':2, 'S':0}
    N = {'C':4, 'H':8, 'N':2, 'O':3, 'S':0}
    D = {'C':4, 'H':7, 'N':1, 'O':4, 'S':0}
    C = {'C':3, 'H':7, 'N':1, 'O':2, 'S':1}
    Q = {'C':5, 'H':10,'N':2, 'O':3, 'S':0}
    E = {'C':5, 'H':9, 'N':1, 'O':4, 'S':0}
    G = {'C':2, 'H':5, 'N':1, 'O':2, 'S':0}
    H = {'C':6, 'H':9, 'N':3, 'O':2, 'S':0}
    I = {'C':6, 'H':13,'N':1, 'O':2, 'S':0}
    L = {'C':6, 'H':13,'N':1, 'O':2, 'S':0}
    K = {'C':6, 'H':14,'N':2, 'O':2, 'S':0}
    M = {'C':5, 'H':11,'N':1, 'O':2, 'S':1}
    F = {'C':9, 'H':11,'N':1, 'O':2, 'S':0}
    P = {'C':5, 'H':9, 'N':1, 'O':2, 'S':0}
    S = {'C':3, 'H':7, 'N':1, 'O':3, 'S':0}
    T = {'C':4, 'H':9, 'N':1, 'O':3, 'S':0}
    W = {'C':11,'H':12,'N':2, 'O':2, 'S':0}
    Y = {'C':9, 'H':11,'N':1, 'O':3, 'S':0}
    V = {'C':5, 'H':11,'N':1, 'O':2, 'S':0}
    
    dictOfAmino = {'A':A,'R':R,'N':N,'D':D,'C':C,'Q':Q, 'E':E, 'G':G,'H':H,'I':I,'L':L,'K':K,'M':M,'F':F,'P':P,'S':S,'T':T,'W':W,'Y':Y,'V':V}
    mySeq = subSeq
    analysis = ProteinAnalysis(mySeq)
    listofaminoacids.append(analysis.count_amino_acids())

    for i in listofaminoacids:
        carbonTotal = 0
        hydrogenTotal = 0
        oxygenTotal = 0
        nitrogenTotal = 0
        sulfurTotal = 0
        peptideBonds = 0
        
        for value in i:
                for amino in dictOfAmino:
                        
                        if value == amino:
                                peptideBonds = peptideBonds + i[value]
                                thisAmino = {}
                                thisAmino = dictOfAmino[amino]
                                carbonTotal = carbonTotal + (i[value]*thisAmino['C'])
                                hydrogenTotal = hydrogenTotal + (i[value]*thisAmino['H'])
                                oxygenTotal = oxygenTotal + (i[value]*thisAmino['O'])
                                nitrogenTotal = nitrogenTotal + (i[value]*thisAmino['N'])
                                sulfurTotal = sulfurTotal + (i[value]*thisAmino['S'])
                                                             

        #Correcting totals for peptide bond loss of water
        peptideBonds = peptideBonds - 1
        hydrogenTotal = hydrogenTotal -(peptideBonds*2)
        oxygenTotal = oxygenTotal - (peptideBonds*1)
        outString = "C" + str(carbonTotal) + "H" + str(hydrogenTotal) + "N" + str(nitrogenTotal) + "O" + str(oxygenTotal) + "S" + str(sulfurTotal)
        return outString
示例#14
0
def net_charge(seq):
    """Get net charge of a peptide sequence"""

    X = ProteinAnalysis(seq)
    ac = 0
    ba = 0
    for aa, i in X.count_amino_acids().iteritems():
        if aa in ['D','E']:
            ac -= i
        elif aa in ['K','R']:
            ba += i
    return ac + ba
示例#15
0
def convert_to_aac(dataset):
    i = 0
    j = 0
    X_aac = np.zeros((len(dataset), 20))
    for seq in dataset:
        analysed_seq = ProteinAnalysis(str(seq.seq))
        for val in analysed_seq.count_amino_acids().values():
            X_aac[i][j] = val / len(seq)
            j += 1
        i += 1
        j = 0
    return X_aac
示例#16
0
def aa_frequency(outfile):
    fasta_sequences = SeqIO.parse(open(outfile),'fasta')
    all_seq=""
    for record in fasta_sequences:
        name, sequence = record.id, record.seq        
        #x=ProteinAnalysis(str(record.seq))
        #print(record.id, x.count_amino_acids())         
        all_seq=all_seq+str(sequence)
    #print(all_seq)
    y=ProteinAnalysis(str(all_seq))
    print("all_seq_n", y.count_amino_acids())
    print("all_seq_%", y.get_amino_acids_percent())
示例#17
0
def biopython_protein_analysis(inseq):
    """Utiize Biopython's ProteinAnalysis module to return general sequence properties of an amino acid string.

    For full definitions see: http://biopython.org/DIST/docs/api/Bio.SeqUtils.ProtParam.ProteinAnalysis-class.html

    Args:
        inseq: Amino acid sequence

    Returns:
        dict: Dictionary of sequence properties. Some definitions include:
        instability_index: Any value above 40 means the protein is unstable (has a short half life).
        secondary_structure_fraction: Percentage of protein in helix, turn or sheet

    TODO:
        Finish definitions of dictionary

    """

    inseq = ssbio.protein.sequence.utils.cast_to_str(inseq)

    analysed_seq = ProteinAnalysis(inseq)

    info_dict = {}
    info_dict['amino_acids_content-biop'] = analysed_seq.count_amino_acids()
    info_dict[
        'amino_acids_percent-biop'] = analysed_seq.get_amino_acids_percent()
    info_dict['length-biop'] = analysed_seq.length
    info_dict['monoisotopic-biop'] = analysed_seq.monoisotopic
    info_dict['molecular_weight-biop'] = analysed_seq.molecular_weight()
    info_dict['aromaticity-biop'] = analysed_seq.aromaticity()
    info_dict['instability_index-biop'] = analysed_seq.instability_index()
    # TODO: What is flexibility?
    info_dict['flexibility-biop'] = analysed_seq.flexibility()
    info_dict['isoelectric_point-biop'] = analysed_seq.isoelectric_point()

    # grand average of hydrophobicity
    info_dict['gravy-biop'] = analysed_seq.gravy()

    # Separated secondary_structure_fraction into each definition
    # info_dict['secondary_structure_fraction-biop'] = analysed_seq.secondary_structure_fraction()
    info_dict[
        'percent_helix_naive-biop'] = analysed_seq.secondary_structure_fraction(
        )[0]
    info_dict[
        'percent_turn_naive-biop'] = analysed_seq.secondary_structure_fraction(
        )[1]
    info_dict[
        'percent_strand_naive-biop'] = analysed_seq.secondary_structure_fraction(
        )[2]

    return info_dict
示例#18
0
def pep_param(pep):

    lanA_param = ProteinAnalysis(pep)

    lanA_mw = lanA_param.molecular_weight()

    params = [lanA_mw]
    if len(pep) > 0:
        lanA_pI = lanA_param.isoelectric_point()
    else:
        lanA_pI = 'na'
    params.extend([lanA_pI])
    lanA_AAs = lanA_param.count_amino_acids().values()
    params.extend(lanA_AAs)
    return params
示例#19
0
def get_AAfraction(seq, amino_acids=None):
    """Get fraction of give amino acids in a sequence"""

    X = ProteinAnalysis(seq)
    #h = X.protein_scale(ProtParam.ProtParamData.kd, len(seq), 0.4)
    nonpolar = ['A','V','L','F','I','W','P']
    if amino_acids == None:
        amino_acids = nonpolar
    count=0
    for aa, i in X.count_amino_acids().iteritems():
        if aa in amino_acids:
            count+=i
    if count == 0: return 0
    frac = round(float(count)/len(seq),2)
    return frac
示例#20
0
def getMW_mono(subSeq):
    peptideBonds = 0
    molecularWeight = 0.0
    waterLoss = 18.015
    
    listofaminoacids = []

    #MONOISOTOPIC MW FOR EACH AMINO ACID CURRENTLY
    dictOfAmino = {'A':71.03711,
                   'R':156.10111,
                   'N':114.04293,
                   'D':115.02694,
                   'C':103.00919,
                   'Q':128.05858,
                   'E':129.04259,
                   'G':57.02146,
                   'H':137.05891,
                   'I':113.08406,
                   'L':113.08406,
                   'K':128.09496,
                   'M':131.04049,
                   'F':147.06841,
                   'P':97.05276,
                   'S':87.03203,
                   'T':101.04768,
                   'W':186.07931,
                   'Y':163.06333,
                   'V':99.06841}
    mySeq = subSeq
    analysis = ProteinAnalysis(mySeq)
    listofaminoacids.append(analysis.count_amino_acids())

    for i in listofaminoacids:
        for value in i:
            for amino in dictOfAmino:
                if value == amino:
                    peptideBonds = peptideBonds + i[value]
                    #print dictOfAmino[value]
                    #print i[value]
                    molecularWeight = molecularWeight + (i[value]*dictOfAmino[value])

    #peptideBonds = peptideBonds - 1 
    #molecularWeight = molecularWeight - (peptideBonds*waterLoss)
    molecularWeight =  molecularWeight+waterLoss
    return molecularWeight
示例#21
0
def getMW_average(subSeq):
    peptideBonds = 0
    molecularWeight = 0.0
    waterLoss = 18.015
    
    listofaminoacids = []

    #AVERAGE MW FOR EACH AMINO ACID CURRENTLY
    dictOfAmino = {'A':71.0788,
                   'R':156.1875,
                   'N':114.1038,
                   'D':115.0886,
                   'C':103.1388,
                   'Q':128.1307,
                   'E':129.1155,
                   'G':57.0519,
                   'H':137.1411,
                   'I':113.1594,
                   'L':113.1594,
                   'K':128.1741,
                   'M':131.1926,
                   'F':147.1766,
                   'P':97.1167,
                   'S':87.0782,
                   'T':101.1051,
                   'W':186.2132,
                   'Y':163.1760,
                   'V':99.1326}
    mySeq = subSeq
    analysis = ProteinAnalysis(mySeq)
    listofaminoacids.append(analysis.count_amino_acids())

    for i in listofaminoacids:
        for value in i:
            for amino in dictOfAmino:
                if value == amino:
                    peptideBonds = peptideBonds + i[value]
                    #print dictOfAmino[value]
                    #print i[value]
                    molecularWeight = molecularWeight + (i[value]*dictOfAmino[value])

    #peptideBonds = peptideBonds - 1 
    #molecularWeight = molecularWeight - (peptideBonds*waterLoss)
    molecularWeight =  molecularWeight+waterLoss
    return molecularWeight
示例#22
0
    def aa_composition(self, seqs):
        all_aas = collections.defaultdict(int)

        aa_count = 0
        nlines = 0

        for rec in seqs:
            rec = self.f(rec)
            nlines = nlines + 1
            aa_count = aa_count + len(str(rec))
            x = ProteinAnalysis(str(rec))
            for aa, count in x.count_amino_acids().items():
                all_aas[aa] += count

        if aa_count < 1:
            return {
                'A': 1,
                'R': 1,
                'N': 1,
                'D': 1,
                'C': 1,
                'Q': 1,
                'E': 1,
                'G': 1,
                'H': 1,
                'I': 1,
                'L': 1,
                'K': 1,
                'M': 1,
                'F': 1,
                'P': 1,
                'S': 1,
                'T': 1,
                'W': 1,
                'Y': 1,
                'V': 1
            }

        aa_countsdict = {}
        for aa in all_aas:
            count = round(all_aas[aa] / aa_count, 3)
            aa_countsdict[aa] = count

        return aa_countsdict
def bio_feat(record):
    clean_seq = str(MutableSeq(record.seq)).replace("X", "")
    clean_seq = clean_seq.replace("U", "C")
    clean_seq = clean_seq.replace("B", "N")
    clean_seq = clean_seq.replace('Z', 'Q')
    clean_seq = MutableSeq(clean_seq).toseq()

    ### features
    seq_length = len(str(clean_seq))
    analysed_seq = ProteinAnalysis(str(clean_seq))
    molecular_weight = analysed_seq.molecular_weight()
    amino_percent = analysed_seq.get_amino_acids_percent().values()
    isoelectric_points = analysed_seq.isoelectric_point()
    count = analysed_seq.count_amino_acids().values()
    # aromaticity = analysed_seq.aromaticity()
    instability_index = analysed_seq.instability_index()
    # hydrophobicity = analysed_seq.protein_scale(ProtParamData.kd, 5, 0.4)
    secondary_structure_fraction = analysed_seq.secondary_structure_fraction()
    return np.array([seq_length, molecular_weight, isoelectric_points, instability_index] + list(secondary_structure_fraction) + list(count) + list(amino_percent))
示例#24
0
def protein_analysis():
    if session.username == None: redirect(URL(r=request,f='../account/log_in'))
    from Bio.SeqUtils.ProtParam import ProteinAnalysis
    form = FORM(TABLE(
            TR("Amino acid sequence:  ",
               TEXTAREA(_type="text", _name="sequence",
                        requires=IS_NOT_EMPTY())),
            INPUT(_type="submit", _value="SUBMIT")))
    if form.accepts(request.vars,session):
        session['sequence'] = seqClean(form.vars.sequence.upper())
        X = ProteinAnalysis(session['sequence'])
        session['aa_count'] = X.count_amino_acids()
        session['percent_aa'] = X.get_amino_acids_percent()
        session['mw'] = X.molecular_weight()
        session['aromaticity'] = X.aromaticity()
        session['instability'] = X.instability_index()
        session['flexibility'] = X.flexibility()
        session['pI'] = X.isoelectric_point()
        session['sec_struct'] = X.secondary_structure_fraction()
        redirect(URL(r=request, f='protein_analysis_output'))
    return dict(form=form)
def get_features(seq):
    """get global features from a protein sequence

    Parameters
    ----------
    seq : str
        protein sequence

    Return
    ----------
    dictionary:
        global features of the protein sequence

    """

    features = {}
    features['undefined_count'] = len([x for x in seq if x in ['X','B','Z',"'",'O','U']])
    features['length'] = len(seq)
    features['perc_undefined_count'] = features['undefined_count']/features['length']
    features['entropy'] = entropy(seq)
    features['ideal_entropy'] = entropy_ideal(len(seq))
    features['perc_entropy'] = features['entropy']/features['ideal_entropy']
    features['hydr_count'] = sum(1 for x in seq if x in hydrophobic_proteins)
    features['polar_count'] = sum(1 for x in seq if x in polar_proteins)
    features['buried'] = sum(buried[x] for x in seq if x in hydrophobic_proteins)

    seq = ''.join([x for x in seq if x not in ['X','B','Z',"'",'O','U']])

    protein = ProteinAnalysis(seq)
    features['gravy'] = protein.gravy()
    features['molecular_weight'] = protein.molecular_weight()
    features['aromaticity'] = protein.aromaticity()
    features['instability_index'] = protein.instability_index()
    features['isoelectric_point'] = protein.isoelectric_point()
    features['helix'], features['turn'], features['sheet'] = protein.secondary_structure_fraction()

    features.update(protein.count_amino_acids())
    # features.update(protein.get_amino_acids_percent())
    return features
示例#26
0
def GetFeatures (My_seq):

    Features = {}

    ProteinAnalysis(My_seq)
    analysed_seq = ProteinAnalysis(My_seq)
    #Caracteristicas monovaloradas

    Features["Molecular_weight"] = analysed_seq.molecular_weight()
    Features["Aromaticity"] = analysed_seq.aromaticity()
    Features["Instability_index"] = analysed_seq.instability_index()
    Features["Isoelectric_point"] = analysed_seq.isoelectric_point()


    #Caracteristicas multivaloradas

    Features["Flexibility"] = analysed_seq.flexibility() # List 580
    Features["Second_structure_fraction"] = analysed_seq.secondary_structure_fraction() #3 Tupla
    Features["Count_amino_acids"] = analysed_seq.count_amino_acids() #20 Dict
    Features["Amino_acids_percent"] = analysed_seq.get_amino_acids_percent() #20 Dict


    return Features
示例#27
0
    def on_enter(self, *args):  #what happens as you enter screen #3
        sequence_identity = ObjectProperty(None)

        # reads the no_header_sequence.txt file to calculate Mw in kDa
        noHeader = open("no_header_sequence.txt").read()
        print("noHeader: ", noHeader)
        analysed_seq = ProteinAnalysis(noHeader)
        Mw = analysed_seq.molecular_weight()  # Mw g/mol
        Mw_kDa = round(Mw / 1000, 3)  # Mw kDa

        print(analysed_seq.count_amino_acids()
              )  # Dictionary with count for each amino acid

        heaviness = str(Mw_kDa) + " kDa"
        self.weight.text = heaviness  # updates protein weight in kDa on the screen

        statinfo = os.stat('my_blast.xml')
        size = statinfo.st_size

        if size == 0:  #if no xml file created
            sequence_identity = "BLAST search failed.\nCheck your FASTA file and try again."
        else:
            result_handle = open("my_blast.xml")
            blast_record = NCBIXML.read(result_handle)

            counter = 1
            for alignment in blast_record.alignments:
                for hsp in alignment.hsps:
                    if counter < 2:  #takes only the first result
                        sequence_identity = alignment.hit_def
                        print("hit_def:", alignment.hit_def)
                        title_split = sequence_identity.split('>')
                        reduced_title = title_split[0]
                        print(title_split[0])
                        counter = counter + 1
        self.protname.text = reduced_title  #updates sequence identity on the app screen
global_counts_feats = pd.concat([d[str(i)] for i in range(len(d))],axis=1)
global_counts_feats = global_counts_feats.fillna(0)
global_counts_feats = global_counts_feats.T


# In[525]:

# Find the best split for local amino count
aminoFirstCount=[]
aminoLastCount=[]
cnt = np.arange(10,60,10)

for j in cnt:
    for i in range(len(sequences)):
            X=ProteinAnalysis(str(sequences[i][j:]))
            aminoFirstCount.append(X.count_amino_acids())
for j in cnt:
    for i in range(len(sequences)):
            X=ProteinAnalysis(str(sequences[i][:j]))
            aminoLastCount.append(X.count_amino_acids())

aminofirstchunk = [aminoFirstCount[i:i+len(sequences)] for i  in range(0, len(aminoFirstCount), len(sequences))]
aminofirst10 = pd.DataFrame(aminofirstchunk[0])
aminofirst10.columns = [str(cols)+'_first' for cols in aminofirst10.columns]
aminofirst20 = pd.DataFrame(aminofirstchunk[1])
aminofirst20.columns = [str(cols)+'_first' for cols in aminofirst20.columns]
aminofirst30 = pd.DataFrame(aminofirstchunk[2])
aminofirst30.columns = [str(cols)+'_first' for cols in aminofirst30.columns]
aminofirst40 = pd.DataFrame(aminofirstchunk[3])
aminofirst40.columns = [str(cols)+'_first' for cols in aminofirst40.columns]
aminofirst50 = pd.DataFrame(aminofirstchunk[4])
                sequence = str(record.seq).replace('X', 'G')
                protein = ProteinAnalysis(str(sequence))
                p_len.append(len(sequence))
                mol_w.append(protein.molecular_weight())
                iso_p.append(protein.isoelectric_point())
                smell.append(protein.aromaticity())
                taste_factor.append(protein.gravy())
                insta_ind.append(protein.instability_index())
                char_at_acid.append(protein.charge_at_pH(1))
                char_at_neutral.append(protein.charge_at_pH(7))
                char_at_base.append(protein.charge_at_pH(14))
                helter_skeler.append(protein.secondary_structure_fraction()[0])
                turnip.append(protein.secondary_structure_fraction()[1])
                garfield.append(protein.secondary_structure_fraction()[2])
                for x in amino_acids:
                    n = protein.count_amino_acids()[x]
                    for y in d_count.keys():
                        if y[-1] == x:
                            d_count[y].append(n)
                for a in amino_acids:
                    m = protein.get_amino_acids_percent()[a]
                    for b in d_perc.keys():
                        if b[-1] == a:
                            d_perc[b].append(m)
            #areas = get_area_classes(test_pdb)
            #polar_area.append(areas[0])
            #apolar_area.append(areas[1])
            #total_area.append(areas[2])
print('done')

for values_count in d_count.values():
# Countthe amino acids for a FASTA file containing numerous sequences

#to keep the program for general use, argparse and sys used to allow the input pile to be user defined through the command line

import argparse
import sys

parser = argparse.ArgumentParser(description='Calculate mw and pi for protein sequences.')

parser.add_argument('infile', nargs='?', type=argparse.FileType('r'),default=sys.stdin)
args = parser.parse_args()

#to read from a FASTA file with a loop over entries using SeqIO define the FASTA sequences and analyse them by ProteinAnalysis
#display the sequence names, molecular weight and isoelectric point


from Bio.SeqUtils.ProtParam import ProteinAnalysis
from Bio.Seq import Seq
from Bio.Alphabet import IUPAC
from Bio import SeqIO
for record in SeqIO.parse(args.infile, "fasta"):
    seq = str(record.seq)
#    my_c = Seq(seq)
    my_prot = ProteinAnalysis(seq)
    aa_counts = my_prot.count_amino_acids()
    c_counts = aa_counts['C']
    print '{}\t {}'.format(record.id,c_counts)

    #print '{}\t {}'.format(record.id, my_c.count("C"))
示例#31
0
from Bio.SeqUtils.ProtParam import ProteinAnalysis
my_seq = str(input("manual sequence from translate.py :"))
analysed_seq = ProteinAnalysis(my_seq)
answer1 = str(input("detect molecular weight y/n? :"))
if answer1 == "y":
    mweight = analysed_seq.molecular_weight()
    print(mweight)
answer2 = str(input("detect gravy y/n? :"))
if answer2 == "y":
    gravy_protein = analysed_seq.gravy()
    print(gravy_protein)
print(analysed_seq.count_amino_acids())
input("enter")
示例#32
0
 def get_sequence_count_aminoacids(self):
     x = ProteinAnalysis(self.sequence)
     return x.count_amino_acids()  # how to draw histogram
示例#33
0
class Peptide(PolyIon):
    """Peptide represents single protein chains in solution.

    Peptides properties are based entirely on analysis of the sequence of the
    peptide.
    """

    _state = {'name': 'Name of the peptide.',
              'sequence': 'Amino acid sequence of the peptide.'
              }

    _sequence = None
    _analysis = None

    # TODO: move h to function or constants. Unify with pitts?
    _h_max = 1
    _h_min = 2./3.
    _h = 5./6.

    def __init__(self, name=None, sequence=None):
        self._name = name
        self._sequence = sequence
        self._analysis = ProteinAnalysis(str(self.sequence))

    @property
    def molecular_weight(self):
        return SeqUtils.molecular_weight(self.sequence, 'protein')

    def charge(self, pH=None, ionic_strength=None, temperature=None,
               moment=1):
        """Return the time-averaged charge of the peptide.

        :param pH
        :param ionic_strength
        :param temperature
        """
        pH, ionic_strength, temperature = \
            self._resolve_context(pH, ionic_strength, temperature)

        amino_acid_count = self._analysis.count_amino_acids()

        pos_pKs = dict(positive_pKs)
        neg_pKs = dict(negative_pKs)

        nterm = self.sequence[0]
        cterm = self.sequence[-1]

        if nterm in pKnterminal:
            pos_pKs['Nterm'] = pKnterminal[nterm]
        if cterm in pKcterminal:
            neg_pKs['Cterm'] = pKcterminal[cterm]

        charge = IsoelectricPoint(self.sequence,
                                  amino_acid_count)._chargeR(pH,
                                                             pos_pKs,
                                                             neg_pKs)
        return charge**moment

    def isoelectric_point(self, ionic_strength=None, temperature=None):
        """Return the isoelectric point of the peptide."""
        # _, ionic_strength, temperature = \
        #     self._resolve_context(None, ionic_strength, temperature)
        return self._analysis.isoelectric_point()

    def volume(self):
        """Return the approximate volume of the folded peptide in m^3."""
        v = self.molecular_weight / avogadro / self.density() / lpm3 / gpkg
        return v

    def radius(self):
        """Return the approximate radius of the folded peptide in m."""
        return (self.volume() * 3. / 4. / pi) ** (1. / 3.)

    def density(self):
        """Return the approximate density of the folded peptide in kg/L."""
        return 1.410 + 0.145 * exp(-self.molecular_weight / 13.)

    def mobility(self, pH=None, ionic_strength=None, temperature=None):
        """Return the effective mobility of the ion in m^2/V/s.

        If a context solution is available, mobility uses the full Onsager-Fuoss
        correction to mobility. Otherwise, the Robinson-Stokes model is used.

        :param pH
        :param ionic_strength
        :param temperature
        """
        pH, ionic_strength, temperature = \
            self._resolve_context(pH, ionic_strength, temperature)

        mobility = self.charge(pH) * elementary_charge /\
            (6 * pi * self._solvent.viscosity(temperature) * self.radius() *
             (1 + self.radius() /
              self._solvent.debye(ionic_strength, temperature)
              )
             ) * self._h
        return mobility
Y = {'C':9, 'H':11,'N':1, 'O':3, 'S':0}
V = {'C':5, 'H':11,'N':1, 'O':2, 'S':0}



dictOfAmino = {'A':A,'R':R,'N':N,'D':D,'C':C,'Q':Q, 'E':E, 'G':G,'H':H,'I':I,'L':L,'K':K,'M':M,'F':F,'P':P,'S':S,'T':T,'W':W,'Y':Y,'V':V}

print "Note output file is appended if same file is selected twice molecular formulas \n for both runs will be present in output file"
fileName = raw_input("Protein FASTA file to generate molecular formulas for: ")
outFileName = raw_input("Output file name (include .txt): ")

fasta_file = open(fileName, "rU")
for record in SeqIO.parse(fasta_file, "fasta"):
	myseq = str(record.seq)
	analysis = ProteinAnalysis(myseq)
	listofaminoacids.append(analysis.count_amino_acids())


	
for i in listofaminoacids:
        carbonTotal = 0
        hydrogenTotal = 0
        oxygenTotal = 0
        nitrogenTotal = 0
        sulfurTotal = 0
        peptideBonds = 0
        
        for value in i:
                for amino in dictOfAmino:
                        
                        if value == amino:
                      "," + str(mol_w) + "," + str(ins) + "," + str(cnt) +
                      "\n")
    else:
        with open(path_ + "\\data\\output\\svm_out.txt", "a+") as s:
            s.write("-1 " + ' '.join("{}:{}".format(k, v)
                                     for k, v in a.items()) + "\n")
        with open(pth + "weka_output.arff", "a+") as w:
            w.write(' '.join("{},".format(x)
                             for x in list(aa_count.values())) + " loc\n")
        with open(pth + "tain_DL.csv", "a+") as DPL:
            DPL.write(''.join("{},".format(x)
                              for x in list(aa_count.values())) +
                      str(round(aromat, 3)) + "," +
                      str(round(fraction[0], 3)) + "," +
                      str(round(fraction[1], 3)) + "," +
                      str(round(fraction[2], 3)) + "," + str(round(iso, 3)) +
                      "," + str(mol_w) + "," + str(ins) + "," + "0" + "\n")


for seq, cl in zip(seq_list, cls_list):  # main loop to extract the features
    _ = ProteinAnalysis(seq)  # Biopython protein analysis package
    aa_count = (_.count_amino_acids())  # amino acid count
    aromat, fraction, iso = _.aromaticity(), _.secondary_structure_fraction(
    ), _.isoelectric_point()
    try:
        mol_w, ins = ("%0.2f" % _.molecular_weight()), ("%0.2f" %
                                                        _.instability_index())
    except Exception:
        mol_w, ins = mol_w, ins  # aromaticity, sec_strucure_fraction, iso_electric point , molecular weight, instability index
    format_output(aa_count, cl)
示例#36
0
from Bio.SeqUtils.ProtParam import ProteinAnalysis
from Bio.SeqUtils import ProtParamData
from Bio import SeqIO
with open('../../samples/pdbaa') as fh:
   for rec in SeqIO.parse(fh,'fasta'):
       myprot = ProteinAnalysis(str(rec.seq))
       print(myprot.count_amino_acids())
       print(myprot.get_amino_acids_percent())
       print(myprot.molecular_weight())
       print(myprot.aromaticity())
       print(myprot.instability_index())
       print(myprot.flexibility())
       print(myprot.isoelectric_point())
       print(myprot.secondary_structure_fraction())
       print(myprot.protein_scale(ProtParamData.kd, 9, .4))
示例#37
0
def openfile():
    global prob, probab, te
    global my_seq
    global anti
    global structure, structure_id, filename
    global antigenicity, hydro, flex, sec
    global m, a, c, b, length, j, k
    global hydroph, flexi, access
    anti = []
    sec = []
    probab = []
    from tkinter import filedialog
    root = Tk()
    root.filename = filedialog.askopenfilename(
        initialdir="/",
        title="Select file",
        filetypes=(("pdb files", "*.pdb"), ("pdb files", "*.pdb")))
    filename = root.filename
    print(filename)
    structure_id = "1e6j"
    structure = PDBParser().get_structure(structure_id, root.filename)
    ppb = PPBuilder()
    for pp in ppb.build_peptides(structure):
        my_seq = pp.get_sequence()  # type: Seq
        print(my_seq)
    for model in structure:
        for chain in model:
            print(chain)
    sequence = list(my_seq)
    m = ''.join(sequence)
    print(m)
    length = len(m)  # type: int
    print("Sequence consist of", length, "Amino Acids")
    from Bio.SeqUtils.ProtParam import ProteinAnalysis
    analysed_seq = ProteinAnalysis(m)
    print("Molecular weight = ", analysed_seq.molecular_weight())
    print("Amino Acid Count = ", analysed_seq.count_amino_acids())
    print("Secondary structure fraction =",
          analysed_seq.secondary_structure_fraction())
    kd = {
        'A': 1.8,
        'R': -4.5,
        'N': -3.5,
        'D': -3.5,
        'C': 2.5,
        'Q': -3.5,
        'E': -3.5,
        'G': -0.4,
        'H': -3.2,
        'I': 4.5,
        'L': 3.8,
        'K': -3.9,
        'M': 1.9,
        'F': 2.8,
        'P': -1.6,
        'S': -0.8,
        'T': -0.7,
        'W': -0.9,
        'Y': -1.3,
        'V': 4.2
    }
    c = list(analysed_seq.flexibility())
    b = list(analysed_seq.protein_scale(kd, 10, 1.0))
    hydro = list(analysed_seq.protein_scale(kd, 10, 1.0))
    flex = list(analysed_seq.flexibility())
    hydroph = list(analysed_seq.protein_scale(kd, 10, 1.0))
    flexi = list(analysed_seq.flexibility())

    i = 1
    j = -1  # type: int
    k = 9
    while i <= (length - 10):
        print("Sequence is = ", m[j + 1:k + 1])
        print("Flexibility value = ", c[j + 1])
        print("Hydrophilicity value = ", b[j + 1])
        ana_seq = ''.join(m[j + 1:k + 1])
        analyze_seq = ProteinAnalysis(ana_seq)
        # For Secondary structure Analysis
        print("Secondary structure fraction =",
              analyze_seq.secondary_structure_fraction())
        a = list(analyze_seq.secondary_structure_fraction())
        a = a[0]
        sec.append(a)
        i += 1
        j += 1
        k += 1
    f = length
    r = 1
    y = 10
    global acc, logacc
    acc = []
    for i in range(0, f):
        str1 = "accessibility, resi "
        str2 = str(r) + "-" + str(y)
        saving = str1 + str2
        print(saving)
        r = r + 1
        y = y + 1
        structure = freesasa.Structure("1e6j.pdb")
        resulta = freesasa.calc(structure)
        area_classes = freesasa.classifyResults(resulta, structure)
        print("Total : %.2f A2" % resulta.totalArea())
        for key in area_classes:
            print(key, ": %.2f A2" % area_classes[key])
        resulta = freesasa.calc(
            structure,
            freesasa.Parameters({
                'algorithm': freesasa.LeeRichards,
                'n-slices': 10
            }))
        selections = freesasa.selectArea(('alanine, resn ala', saving),
                                         structure, resulta)
        for key in selections:
            print(key, ": %.2f A2" % selections[key])
            a = selections[key]
            acc.append(a)

    l = acc[0::2]
    access = l
    print(acc)
    print(l)
    logacc = [math.log(y, 10) for y in l]

    print(logacc)
file_name=argv[1].split(".rtf")[0]
cmd = "textutil -convert txt " + argv[1]
call(['/bin/zsh','-i','-c',cmd])
string = file_name + ".txt"
infile=open(string,'r')

ofile_str = argv[1] + "_params.csv"
call(["rm",ofile_str])
ofile = open(ofile_str,'w')

ofile.write("name,MW,EC,EC/MW\n")

for line in infile:
	if re.search('^[0-9]+\.', line):
		name = '.'.join(line.strip().split('.')[1:])
	if re.search('^[A-Z]{20}', line):
		my_seq = line.strip().strip( '\*' )
		analysed_seq = ProteinAnalysis(my_seq)
		MW = analysed_seq.molecular_weight()
		W = analysed_seq.count_amino_acids()['W']
		Y = analysed_seq.count_amino_acids()['Y']
		C = analysed_seq.count_amino_acids()['C']
		EC = Y*1490 + W*5500 + C*125
		EC_MW = EC / MW
		ofile.write( name + "," + str(MW) + "," + str(EC) + "," +  str(EC_MW) + '\n' )
		print name + " " + str(MW) + " " + str(EC) + " " +  str(EC_MW)

ofile.close()
call(["open",ofile_str])
exit
示例#39
0
from Bio.SeqUtils.ProtParam import ProteinAnalysis
from Bio.SeqUtils import ProtParamData
import sys
import json

inp = json.loads(sys.argv[1])

seq = inp["Sequence"]

X = ProteinAnalysis(seq)

data = dict()

if "MW" in inp["Options"]:
	data["MW"] = X.molecular_weight()

if "EC280" in inp["Options"]:
	aa_count = X.count_amino_acids()
	if "hasDisulfide" in inp["Options"]:
		data["EC280"] = 1490 * aa_count["Y"] + 5500 * aa_count["W"] + 62.5 * aa_count["C"]
	else:
		data["EC280"] = 1490 * aa_count["Y"] + 5500 * aa_count["W"]

if "PI" in inp["Options"]:
	data["PI"] = X.isoelectric_point()

if "AACont" in inp["Options"]:
	ratios = X.get_amino_acids_percent()
	data["AACont"] = {aa: ratios[aa] * 100. for aa in ratios}

print json.dumps(data)
示例#40
0
def openfile():
    global my_seq
    global antigenicity
    global m, a, c, b
    from tkinter import filedialog
    root = Tk()
    root.filename = filedialog.askopenfilename(
        initialdir="/",
        title="Select file",
        filetypes=(("pdb files", "*.pdb"), ("pdb files", "*.pdb")))
    print(root.filename)
    structure_id = "1e6j"
    structure = PDBParser().get_structure(structure_id, root.filename)
    ppb = PPBuilder()
    for pp in ppb.build_peptides(structure):
        my_seq = pp.get_sequence()  # type: Seq
        print(my_seq)
    for model in structure:
        for chain in model:
            print(chain)
    sequence = list(my_seq)
    m = ''.join(sequence)  # type: str
    print(m)
    length = len(m)  # type: int
    print(length)
    print("Sequence consist of", len(m), "Amino Acids")
    from Bio.SeqUtils.ProtParam import ProteinAnalysis
    analysed_seq = ProteinAnalysis(m)
    print("Molecular weight = ", analysed_seq.molecular_weight())
    print("Amino Acid Count = ", analysed_seq.count_amino_acids())
    print("Secondary structure fraction =",
          analysed_seq.secondary_structure_fraction())
    kd = {
        'A': 1.8,
        'R': -4.5,
        'N': -3.5,
        'D': -3.5,
        'C': 2.5,
        'Q': -3.5,
        'E': -3.5,
        'G': -0.4,
        'H': -3.2,
        'I': 4.5,
        'L': 3.8,
        'K': -3.9,
        'M': 1.9,
        'F': 2.8,
        'P': -1.6,
        'S': -0.8,
        'T': -0.7,
        'W': -0.9,
        'Y': -1.3,
        'V': 4.2
    }
    c = list(analysed_seq.flexibility())
    b = list(analysed_seq.protein_scale(kd, 10, 1.0))
    i = 1
    j = -1  # type: int
    k = 9
    while i <= (length - 10):
        print("Sequence is = ", m[j + 1:k + 1])
        print("Flexibility value = ", c[j + 1])
        print("Hydrophilicity value = ", b[j + 1])
        ana_seq = ''.join(m[j + 1:k + 1])
        analyze_seq = ProteinAnalysis(ana_seq)
        # For Secondary structure Analysis
        print("Secondary structure fraction =",
              analyze_seq.secondary_structure_fraction())
        a = list(analyze_seq.secondary_structure_fraction())
        global tupleall
        tupleall = (m[j + 1:k + 1], c[j + 1], b[j + 1], a)
        print(tupleall[0], tupleall[2], tupleall[1], tupleall[3])
        i = i + 1
        if a[0] >= a[1]:
            a[0] = 1
        else:
            a[0] = a[1]
        # For Hydrophilicity
        if b[j + 1] > 0.5:
            b[j + 1] = 2
        elif b[j + 1] < 0.5 or b[j + 1] > 0:
            b[j + 1] = 1
        elif b[j + 1] > 0 or b[j + 1] > -0.4:
            b[j + 1] = -1
        elif b[j + 1] < -0.4:
            b[j + 1] = -2
        else:
            b[j + 1] = 0
        # For Flexibility
        if c[j + 1] > 1.0:
            c[j + 1] = 1
        else:
            c[j + 1] = 0
        # For antigenicity Index
        antigenicity = 0.3 * b[j + 1] + 0.15 * 1 + 0.15 * c[j + 1] + 0.2 * a[0]
        print("antigenicity", antigenicity)
        j += 1
        k += 1