def dotrueiteration(seqsdata, W, pwm, lambda_): """Do one true iteration of EM. I.e. iterate over all W-mers.""" logger.debug('Calculating true Zn sums') summer = jis.ZnSumCb(W) calculateZn = jem.createZncalculatorFn(pwm, lambda_) sumvisitor = jis.ZnCalcVisitor(W, calculateZn, summer) seqan.traverse.topdownhistorytraversal(seqsdata.index.topdownhistory(), sumvisitor) return summer
def uniformuniquemethod(seqsdata, pwm, lambda_, Widx, numsamples, args): logger.debug('Importance sampling using uniform weights') calculateZn = jem.createZncalculatorFn(pwm, lambda_) W = args.Ws[Widx] childuniquefreqs = jis.DistForFreqs(seqsdata.childuniquefreqs[:, Widx]) return seqsdata.numunique[Widx], jis.importancesample( seqsdata.index, W, jis.WeightedSamplingDist(childuniquefreqs, jis.UniformImportanceWeight()), numsamples, jis.ISMemoCbAdaptor(W, jis.ZnSumCb(W), calculateZn, unique=True))
def pwmoccsmethod(seqsdata, pwm, lambda_, Widx, numsamples, args): logger.debug('Importance sampling using PWM importance weights') calculateZn = jem.createZncalculatorFn(pwm, lambda_) numpositive = numsamples / 2 # Sample half in each orientation W = args.Ws[Widx] childoccfreqs = jis.DistForFreqs(seqsdata.childoccfreqs[:, Widx]) cb = jis.importancesample( seqsdata.index, W, jis.WeightedSamplingDist(childoccfreqs, jis.PWMImportanceWeight(pwm)), numpositive, jis.ISMemoCbAdaptor(W, jis.ZnSumCb(W), calculateZn)) return seqsdata.numoccs[Widx], jis.importancesample( seqsdata.index, W, jis.WeightedSamplingDist(childoccfreqs, jis.PWMImportanceWeight(jem.pwmrevcomp(pwm))), numsamples - numpositive, cb)
def pwmuniquemethod(seqsdata, pwm, lambda_, Widx, numsamples, args): logger.debug('Importance sampling using PWM weights over unique W-mers') calculateZn = jem.createZncalculatorFn(pwm, lambda_) W = args.Ws[Widx] childuniquefreqs = jis.DistForFreqs(seqsdata.childuniquefreqs[:, Widx]) childoccfreqs = jis.DistForFreqs(seqsdata.childoccfreqs[:, Widx]) numpositive = numsamples / 2 # Sample half in each orientation cb = jis.importancesample( seqsdata.index, W, jis.WeightedSamplingDist( childuniquefreqs, jis.PWMImportanceWeightUnique(pwm, childoccfreqs)), numpositive, jis.ISMemoCbAdaptor(W, jis.ZnSumCb(W), calculateZn, unique=True)) return seqsdata.numunique[Widx], jis.importancesample( seqsdata.index, W, jis.WeightedSamplingDist( childuniquefreqs, jis.PWMImportanceWeightUnique(jem.pwmrevcomp(pwm), childoccfreqs)), numsamples - numpositive, cb)
rdm.seed(2) memocb = jis.importancesample( index, W, childWmerfreqs[:, 0], jis.UniformImportanceWeight(), numsamples=1, callback=jis.ISCbMemo()) pwm = jem.pwmfromWmer(memocb.Xns[0], numseedsites, 1.) jem.logo(pwm, 'seed') numsamples = 3000 distsbs = [] distsbg = [] truesums = [] varratios = [] pwmtrue = pwm.copy() for iteration in xrange(5): logging.debug('Calculating true Zn sums') summer = jis.ZnSumCb(W) calculateZn = jem.createZncalculatorFn(pwmtrue, lambda_) sumvisitor = jis.ZnCalcVisitor(W, calculateZn, summer) seqan.traverse.topdownhistorytraversal(index.topdownhistory(), sumvisitor) logging.debug('Sums:\n%s', summer.sums) trueZnsum = summer.sums[0].sum() truesums.append(trueZnsum) logging.info('True sum: %s', trueZnsum) logging.debug('Importance sampling using binding site model') cbbs = jis.importancesample( index, W, childWmerfreqs[:, 0], jis.PWMImportanceWeight(pwmtrue), numsamples, jis.ISMemoCbAdaptor(jis.ZnSumCb(W), calculateZn)) samplebs = makedf(cbbs, calculateZn, model='BS') logging.debug('Variances:\n%s', cbbs.cb.variances())