### ### ######################################################## # ['bound', 'dgnp', 'long_loops', 'loops_all', 'paired', 'structured', 'threshold_freq'] # # parameter to control RNA-NP non-specific attraction ... # dg_NP = float(sys.argv[1]) # threshold_freq = 1.0 enrichment_dat = pd.DataFrame({}) ############# dg_list = [-0.18,-0.2,-0.25,-0.3] dg_list_num = len(dg_list) ################### for dg_NP in dg_list: print "NP melting energy:",dg_NP ################################## # setup flu genome here ... h1n1 = flu.influenza(genome_fname,orf_fname) # calculate its RNA structure ... h1n1.calculate_rna_lMFE_optimized(dg_NP) ################################## for loci_group_name in loci_group_names: # print name ... print "loci of interest: ", loci_group_name loci = loci_groups.get_group(loci_group_name) loci_seg = loci[['seg','pos']].groupby('seg') # enrichment_tmp = get_loci_structure_pattern(h1n1, loci_seg, "%.3f"%threshold_freq) # enrichment_dat.append(enrichment_tmp,ignore_index=True) #################################################### # DEALING WITH THE ENRICHMENT DATA ... ####################################################
# Kyte-Doolittle hydrophobicity scale ... from Bio.SeqUtils import ProtParamData KD = ProtParamData.kd KD['*'] = 25.0 ################################################### # # # columns = ['name','seg','pos','description'] # seg_num = 8 # name = 'test1' out_fname = 'test_loci.txt' # ######################################################## # reference loading ... ref_fname = "./pH1N1_coding_dat/pH1N1.fa" orf_fname = "./pH1N1_coding_dat/pH1N1_noPB1F.orf" ph1n1 = flu.influenza(ref_fname, orf_fname) ######################################################### # typical fname for segment alignemnt ... aligned_seg_fname = lambda number: "seg%d.afa"%number # load coding part alignments for all of the segments ... segment_aln = {} for seg_idx in range(1,ph1n1.segnum+1): fname = aligned_seg_fname(seg_idx) segment_aln['seg%d'%seg_idx] = AlignIO.read(fname,'fasta') # brief alignemnt info aln_size = len(segment_aln['seg%d'%seg_idx]) aln_length = segment_aln['seg%d'%seg_idx].get_alignment_length() # print # print "segment %d: %d sequences of length %d are aligned (length%%3=%d)"%(seg_idx, aln_size, aln_length, aln_length%3)