def return_equal_charge_concentrations(protein_file, RNA_file, protein_conc=1000, count_histidine=0, count_gfp=1): protein = list(SeqIO.parse(protein_file, 'fasta')) RNAS = list(SeqIO.parse(RNA_file, 'fasta')) pos_protein = 0 for p in protein: protein_obj = SequenceParameters(str(p.seq)) pos_protein = protein_obj.get_countPos() - protein_obj.get_countNeg() GFP = SequenceParameters( "MVSKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTLTYGVQCFSRYPDHMKQHDFFKSAMPEGYVQERTIFFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYIMADKQKNGIKVNFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITLGMDELYK" ) if count_histidine: pos_protein = pos_protein + 0.5 * protein_obj.get_amino_acid_fractions( )['H'] * (protein_obj.get_length()) if count_gfp: pos_protein = pos_protein + (GFP.get_countPos() - GFP.get_countNeg()) counter_conc_max = {} for rna in RNAS: counter_conc_max[rna.id.split('(') [0]] = protein_conc * pos_protein / len(rna.seq) return (counter_conc_max)
#can't use SeqParam(seqfile=file) #because all the sequences are appended to each other. output = open(file + "_charge", 'w+') for protein in [x for x in contents if x]: header = protein[0:protein.index('\n')] seq = protein[protein.index('\n'):-1] print(header) if ('X' in seq): print("Warning: unspecified protein encountered.") seq = seq.replace('X', '') seq_param = SequenceParameters(seq) #mean_net_charge is always positive, whereas # net_charge_per_residue is alternating net_charge = seq_param.get_NCPR(pH=7.0) * seq_param.get_length() print(net_charge) output.write(header) output.write(", ") output.write(str(7.0)) output.write(str(", ")) output.write(str(seq_param.get_molecular_weight())) output.write(str(", ")) output.write(str(net_charge)) output.write('\n') output.close()