def calc_pwm_from_simulations(mo, chem_affinity, n_sims=10000): include_shape = True if mo.encoding_type == 'ONE_HOT_PLUS_SHAPE' else False # we add 4 bases to the motif length to account for the shape features seqs = FixedLengthDNASequences(sample_random_seqs(n_sims, 4+mo.motif_len)) affinities = -seqs.score_binding_sites(mo, 'FWD')[:,2] occs = calc_occ(chem_affinity, affinities) # normalize to the lowest occupancy sequence occs /= occs.max() # give a pseudo count of one to avoid divide by zeros cnts = np.zeros((4, mo.motif_len), dtype=float) for seq, occ, aff in izip(seqs, occs, affinities): for i, base in enumerate(seq.seq[2:-2]): cnts[base_map[base], i] += occ # normalize the base columns to sum to 1 return cnts/cnts.sum(0)
def calc_bnd_frac(affinities, chem_pot): # since the weights default to unfiform, this is the mean on average return (weights*calc_occ(chem_pot, affinities)).sum()