def test_filter_values(self): E = GlobalDescriptor( ['GLFDIVKKVVGALG', 'LLLLLL', 'KKKKKKKKKK', 'DDDDDDDDDDDD']) E.calculate_charge() E.filter_values(values=[1.], operator='>=') self.assertEqual(E.sequences, ['KKKKKKKKKK']) self.assertEqual(len(E.descriptor), 1)
def test_filter_aa(self): D = GlobalDescriptor( ['GLFDIVKKVVGALG', 'LLLLLL', 'KKKKKKKKKK', 'DDDDDDDDDDDD']) D.calculate_charge() D.filter_aa(['D']) self.assertEqual(D.sequences, ['LLLLLL', 'KKKKKKKKKK']) self.assertEqual(len(D.descriptor), 2)
def calc_charge(self, ph=7.0, amide=True): """Method to calculate the total molecular charge at a given pH for all sequences in the library. :param ph: {float} ph at which to calculate the peptide charge. :param amide: {boolean} whether the sequences have an amidated C-terminus (-> charge += 1). :return: {numpy.ndarray} calculated charges in the attribute :py:attr:`charge`. """ for l in range(self.library.shape[0]): d = GlobalDescriptor(self.library[l]) d.calculate_charge(ph=ph, amide=amide) self.charge.append(d.descriptor[:, 0])
charges_long = [] charge_densities = coll.defaultdict(list) charge_densities_long = [] polarities = coll.defaultdict(list) polarities_long = [] gravy = coll.defaultdict(list) gravy_long = [] for gp in peptides: # eisenbergs[gp] = get_peptide_values(peptides[gp], 'eisenberg') for val in eisenbergs[gp]: eisenbergs_long.append([gp, val]) # properties = GlobalDescriptor(peptides[gp]) properties.calculate_charge(ph=7.4, amide=True) charges[gp] = [x[0] for x in properties.descriptor] for val in charges[gp]: charges_long.append([gp, val]) # properties = GlobalDescriptor(peptides[gp]) properties.charge_density(ph=7.4, amide=True) charge_densities[gp] = [x[0] for x in properties.descriptor] for val in charge_densities[gp]: charge_densities_long.append([gp, val]) # polarities[gp] = get_peptide_values(peptides[gp], 'polarity') for val in polarities[gp]: polarities_long.append([gp, val]) # gravy[gp] = get_peptide_values(peptides[gp], 'gravy')
def predict(): if request.method == 'POST': seq = request.form['seq'] with open("random.fasta", "w") as fp: fp.write(seq) pepdesc = PeptideDescriptor( '/home/sanika/proj/random.fasta', 'eisenberg') # use Eisenberg consensus scale globdesc = GlobalDescriptor('/home/sanika/proj/random.fasta') # --------------- Peptide Descriptor (AA scales) Calculations --------------- pepdesc.calculate_global() # calculate global Eisenberg hydrophobicity pepdesc.calculate_moment( append=True) # calculate Eisenberg hydrophobic moment # load other AA scales pepdesc.load_scale('gravy') # load GRAVY scale pepdesc.calculate_global( append=True) # calculate global GRAVY hydrophobicity pepdesc.calculate_moment( append=True) # calculate GRAVY hydrophobic moment pepdesc.load_scale('z3') # load old Z scale pepdesc.calculate_autocorr( 1, append=True) # calculate global Z scale (=window1 autocorrelation) # --------------- Global Descriptor Calculations --------------- globdesc.length() # sequence length globdesc.boman_index(append=True) # Boman index globdesc.aromaticity(append=True) # global aromaticity globdesc.aliphatic_index(append=True) # aliphatic index globdesc.instability_index(append=True) # instability index globdesc.calculate_charge(ph=7.4, amide=False, append=True) # net charge globdesc.calculate_MW(amide=False, append=True) # molecular weight f1 = pepdesc.descriptor f2 = globdesc.descriptor result = np.concatenate((f2, f1), axis=1) clf = joblib.load('ml_model.pkl') pred = clf.predict(result) proba = clf.predict_proba(result).tocoo() mc = pred.tocoo() out = mc.col res = [] labels = ['antiviral', 'antibacterial', 'antifungal'] values = proba.data plt.pie(values, labels=labels, autopct='%.0f%%', shadow=True, radius=0.5) plt.savefig('/home/sanika/proj/pie_chart.jpg') figfile = BytesIO() plt.savefig(figfile, format='png') figfile.seek(0) figdata_png = base64.b64encode(figfile.getvalue()).decode('ascii') plt.close() for i in range(len(out)): if out[i] == 0: res.append("antiviral") elif out[i] == 1: res.append("antibacterial") else: res.append("antifungal") return render_template('seq.html', seq=res, result=figdata_png) return render_template('predictor.html')
def upload(): if request.method == 'POST': # This will be executed on POST request. upfile = request.files['file'] if upfile and allowed_file(upfile.filename): filename = secure_filename(upfile.filename) upfile.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) #return render_template('upload.html') #flash("File uploaded", "success") #with open("/home/sanika/proj/uploads/aa.fasta") as f: #lines = f.readlines() #lines = [l for l in lines if "ROW" in l] #with open("/home/sanika/proj/uploads/out.fasta", "w") as f1: #f1.writelines(lines) #f = open(filename) #prot_seq = ReadFasta(f) with open(filename) as fasta_file: # Will close handle cleanly identifiers = [] sequence = [] for seq_record in SeqIO.parse(fasta_file, 'fasta'): # (generator) identifiers.append(seq_record.id) sequence.append(seq_record.seq) pepdesc = PeptideDescriptor( filename, 'eisenberg') # use Eisenberg consensus scale globdesc = GlobalDescriptor(filename) # --------------- Peptide Descriptor (AA scales) Calculations --------------- pepdesc.calculate_global( ) # calculate global Eisenberg hydrophobicity pepdesc.calculate_moment( append=True) # calculate Eisenberg hydrophobic moment # load other AA scales pepdesc.load_scale('gravy') # load GRAVY scale pepdesc.calculate_global( append=True) # calculate global GRAVY hydrophobicity pepdesc.calculate_moment( append=True) # calculate GRAVY hydrophobic moment pepdesc.load_scale('z3') # load old Z scale pepdesc.calculate_autocorr( 1, append=True ) # calculate global Z scale (=window1 autocorrelation) # --------------- Global Descriptor Calculations --------------- globdesc.length() # sequence length globdesc.boman_index(append=True) # Boman index globdesc.aromaticity(append=True) # global aromaticity globdesc.aliphatic_index(append=True) # aliphatic index globdesc.instability_index(append=True) # instability index globdesc.calculate_charge(ph=7.4, amide=False, append=True) # net charge globdesc.calculate_MW(amide=False, append=True) # molecular weight f1 = pepdesc.descriptor f2 = globdesc.descriptor result = np.concatenate((f2, f1), axis=1) rs = [] for i in range(len(result)): prt = np.reshape(result[i], (-1, 14)) clf = joblib.load('ml_model.pkl') pred = clf.predict(prt) out = pred.toarray() #print(clf.predict_proba(result)) proba = clf.predict_proba(prt).tocoo() mc = pred.tocoo() out = mc.col res = [] for i in range(len(out)): if out[i] == 0: res.append("antiviral") elif out[i] == 1: res.append("antibacterial") else: res.append("antifungal") rs.append(res) a = [] for i in range(len(rs)): a.append('-'.join(rs[i])) df = pd.DataFrame(data={ "id": identifiers, "sequence": sequence, "activity": a }, columns=['id', 'sequence', 'activity']) df.to_csv("result.csv", sep=',', index=False) os.remove(os.path.join(app.config['UPLOAD_FOLDER'], filename)) #return render_template('seq.html', seq = rs) return render_template('up.html', mimetype="text/csv") #flash("File uploaded: Thanks!", "success") else: error = "PLEASE CHECK THE FORMAT OF FILE TO UPLOAD" return render_template('upload.html', error=error) # This will be executed on GET request. return render_template('predictor.html')
globdesc = GlobalDescriptor('/path/to/sequences.fasta') # --------------- Peptide Descriptor (AA scales) Calculations --------------- pepdesc.calculate_global() # calculate global Eisenberg hydrophobicity pepdesc.calculate_moment(append=True) # calculate Eisenberg hydrophobic moment # load other AA scales pepdesc.load_scale('gravy') # load GRAVY scale pepdesc.calculate_global(append=True) # calculate global GRAVY hydrophobicity pepdesc.calculate_moment(append=True) # calculate GRAVY hydrophobic moment pepdesc.load_scale('z3') # load old Z scale pepdesc.calculate_autocorr( 1, append=True) # calculate global Z scale (=window1 autocorrelation) # save descriptor data to .csv file col_names1 = 'ID,Sequence,H_Eisenberg,uH_Eisenberg,H_GRAVY,uH_GRAVY,Z3_1,Z3_2,Z3_3' pepdesc.save_descriptor('/path/to/descriptors1.csv', header=col_names1) # --------------- Global Descriptor Calculations --------------- globdesc.length() # sequence length globdesc.boman_index(append=True) # Boman index globdesc.aromaticity(append=True) # global aromaticity globdesc.aliphatic_index(append=True) # aliphatic index globdesc.instability_index(append=True) # instability index globdesc.calculate_charge(ph=7.4, amide=False, append=True) # net charge globdesc.calculate_MW(amide=False, append=True) # molecular weight # save descriptor data to .csv file col_names2 = 'ID,Sequence,Length,BomanIndex,Aromaticity,AliphaticIndex,InstabilityIndex,Charge,MW' globdesc.save_descriptor('/path/to/descriptors2.csv', header=col_names2)
desc = GlobalDescriptor([database['Sequence'][i]]) desc.calculate_MW(amide=True) molecular_weigth_array.append(desc.descriptor) except: molecular_weigth_array.append('') database['molecular_weigth'] = molecular_weigth_array print("Estimate charge") #calculate charge for each sequence charge_array = [] for i in range(len(database)): try: desc = GlobalDescriptor([database['Sequence'][i]]) desc.calculate_charge(ph=7, amide=True) charge_array.append(desc.descriptor[0][0]) except: charge_array.append('') database['charge'] = charge_array print("Estimate charge_density") #calculate charge density for each sequence charge_density_array = [] for i in range(len(database)): try: desc = GlobalDescriptor([database['Sequence'][i]]) desc.charge_density(ph=7, amide=True) charge_density_array.append(desc.descriptor[0][0])
def exec(peptide, time_node): file = open("../src/public/jobs/service1/service1.fasta", "w") file.write(peptide) file.close() fasta = SeqIO.parse("../src/public/jobs/service1/service1.fasta", "fasta") if(any(fasta) == False): #False when `fasta` is empty return "error" cantidad = 0 for record in SeqIO.parse("../src/public/jobs/service1/service1.fasta", "fasta"): cantidad = cantidad+1 if (cantidad == 1): properties = {} for record in SeqIO.parse("../src/public/jobs/service1/service1.fasta", "fasta"): properties[str(record.id)] = {} #save properties properties[str(record.id)]["length"] = len(record.seq) #formula try: desc = GlobalDescriptor(str(record.seq)) desc.formula(amide=True) properties[str(record.id)]["formula"] = desc.descriptor[0][0] except: properties[str(record.id)]["formula"] = "-" #molecular weigth try: desc = GlobalDescriptor(str(record.seq)) desc.calculate_MW(amide=True) properties[str(record.id)]["molecular_weigth"] = float("%.4f" % desc.descriptor[0][0]) except: properties[str(record.id)]["molecular_weigth"] = "-" #boman_index try: desc = GlobalDescriptor(str(record.seq)) desc.boman_index() properties[str(record.id)]["boman_index"] = float("%.4f" % desc.descriptor[0][0]) except: properties[str(record.id)]["boman_index"] = "-" #charge try: desc = GlobalDescriptor(str(record.seq)) desc.calculate_charge(ph=7, amide=True) properties[str(record.id)]["charge"] = float("%.4f" % desc.descriptor[0][0]) except: properties[str(record.id)]["charge"] = "-" #charge density try: desc = GlobalDescriptor(str(record.seq)) desc.charge_density(ph=7, amide=True) properties[str(record.id)]["charge_density"] = float("%.4f" % desc.descriptor[0][0]) except: properties[str(record.id)]["charge_density"] = "-" #estimate isoelectric point try: desc = GlobalDescriptor(str(record.seq)) desc.isoelectric_point() properties[str(record.id)]["isoelectric_point"] = float("%.4f" % desc.descriptor[0][0]) except: properties[str(record.id)]["isoelectric_point"] = "-" #estimate inestability index try: desc = GlobalDescriptor(str(record.seq)) desc.instability_index() properties[str(record.id)]["instability_index"] = float("%.4f" % desc.descriptor[0][0]) except: properties[str(record.id)]["instability_index"] = "-" #estimate aromaticity try: desc = GlobalDescriptor(str(record.seq)) desc.aromaticity() properties[str(record.id)]["aromaticity"] = float("%.4f" % desc.descriptor[0][0]) except: properties[str(record.id)]["aromaticity"] = "-" #estimate aliphatic_index try: desc = GlobalDescriptor(str(record.seq)) desc.aliphatic_index() properties[str(record.id)]["aliphatic_index"] = float("%.4f" % desc.descriptor[0][0]) except: properties[str(record.id)]["aliphatic_index"] = "-" #estimate hydrophobic_ratio try: desc = GlobalDescriptor(str(record.seq)) desc.hydrophobic_ratio() properties[str(record.id)]["hydrophobic_ratio"] = float("%.4f" % desc.descriptor[0][0]) except: properties[str(record.id)]["hydrophobic_ratio"] = "-" #profile hydrophobicity try: desc = PeptideDescriptor(str(record.seq), scalename='Eisenberg') desc.calculate_profile(prof_type='H') properties[str(record.id)]["hydrophobicity_profile"] = float("%.4f" % desc.descriptor[0][0]) except: properties[str(record.id)]["hydrophobicity_profile"] = "-" #profile hydrophobic try: desc = PeptideDescriptor(str(record.seq), scalename='Eisenberg') desc.calculate_profile(prof_type='uH') properties[str(record.id)]["hydrophobic_profile"] = float("%.4f" % desc.descriptor[0][0]) except: properties[str(record.id)]["hydrophobic_profile"] = "-" #moment try: desc = PeptideDescriptor(str(record.seq), scalename='Eisenberg') desc.calculate_moment() properties[str(record.id)]["calculate_moment"] = float("%.4f" % desc.descriptor[0][0]) except: properties[str(record.id)]["calculate_moment"] = "-" try: os.mkdir("../src/public/jobs/service1/"+time_node) except: print("Error") #generate plot profile plot_profile(str(record.seq), scalename='eisenberg', filename= "../src/public/jobs/service1/"+time_node+"/profile.png") #generate helical wheel helical_wheel(str(record.seq), colorcoding='charge', lineweights=False, filename= "../src/public/jobs/service1/"+time_node+"/helical.png") return(properties) if (cantidad > 1): properties = {} for record in SeqIO.parse("../src/public/jobs/service1/service1.fasta", "fasta"): properties[str(record.id)] = {} properties[str(record.id)]["length"] = len(record.seq) #formula try: desc = GlobalDescriptor(str(record.seq)) desc.formula(amide=True) properties[str(record.id)]["formula"] = desc.descriptor[0][0] except: properties[str(record.id)]["formula"] = "-" #molecular weigth try: desc = GlobalDescriptor(str(record.seq)) desc.calculate_MW(amide=True) properties[str(record.id)]["molecular_weigth"] = float("%.4f" % desc.descriptor[0][0]) except: properties[str(record.id)]["molecular_weigth"] = "-" #boman_index try: desc = GlobalDescriptor(str(record.seq)) desc.boman_index() properties[str(record.id)]["boman_index"] = float("%.4f" % desc.descriptor[0][0]) except: properties[str(record.id)]["boman_index"] = "-" #charge try: desc = GlobalDescriptor(str(record.seq)) desc.calculate_charge(ph=7, amide=True) properties[str(record.id)]["charge"] = float("%.4f" % desc.descriptor[0][0]) except: properties[str(record.id)]["charge"] = "-" #charge density try: desc = GlobalDescriptor(str(record.seq)) desc.charge_density(ph=7, amide=True) properties[str(record.id)]["charge_density"] = float("%.4f" % desc.descriptor[0][0]) except: properties[str(record.id)]["charge_density"] = "-" #estimate isoelectric point try: desc = GlobalDescriptor(str(record.seq)) desc.isoelectric_point() properties[str(record.id)]["isoelectric_point"] = float("%.4f" % desc.descriptor[0][0]) except: properties[str(record.id)]["isoelectric_point"] = "-" #estimate inestability index try: desc = GlobalDescriptor(str(record.seq)) desc.instability_index() properties[str(record.id)]["instability_index"] = float("%.4f" % desc.descriptor[0][0]) except: properties[str(record.id)]["instability_index"] = "-" #estimate aromaticity try: desc = GlobalDescriptor(str(record.seq)) desc.aromaticity() properties[str(record.id)]["aromaticity"] = float("%.4f" % desc.descriptor[0][0]) except: properties[str(record.id)]["aromaticity"] = "-" #estimate aliphatic_index try: desc = GlobalDescriptor(str(record.seq)) desc.aliphatic_index() properties[str(record.id)]["aliphatic_index"] = float("%.4f" % desc.descriptor[0][0]) except: properties[str(record.id)]["aliphatic_index"] = "-" #estimate hydrophobic_ratio try: desc = GlobalDescriptor(str(record.seq)) desc.hydrophobic_ratio() properties[str(record.id)]["hydrophobic_ratio"] = float("%.4f" % desc.descriptor[0][0]) except: properties[str(record.id)]["hydrophobic_ratio"] = "-" #profile hydrophobicity try: desc = PeptideDescriptor(str(record.seq), scalename='Eisenberg') desc.calculate_profile(prof_type='H') properties[str(record.id)]["hydrophobicity_profile"] = float("%.4f" % desc.descriptor[0][0]) except: properties[str(record.id)]["hydrophobicity_profile"] = "-" #profile hydrophobic try: desc = PeptideDescriptor(str(record.seq), scalename='Eisenberg') desc.calculate_profile(prof_type='uH') properties[str(record.id)]["hydrophobic_profile"] = float("%.4f" % desc.descriptor[0][0]) except: properties[str(record.id)]["hydrophobic_profile"] = "-" #moment try: desc = PeptideDescriptor(str(record.seq), scalename='Eisenberg') desc.calculate_moment() properties[str(record.id)]["calculate_moment"] = float("%.4f" % desc.descriptor[0][0]) except: properties[str(record.id)]["calculate_moment"] = "-" return(properties)