Пример #1
0
def pwmoccsmethod(seqsdata, pwm, lambda_, Widx, numsamples, args):
    logger.debug('Importance sampling using PWM importance weights')
    calculateZn = jem.createZncalculatorFn(pwm, lambda_)
    numpositive = numsamples / 2  # Sample half in each orientation
    W = args.Ws[Widx]
    childoccfreqs = jis.DistForFreqs(seqsdata.childoccfreqs[:, Widx])
    cb = jis.importancesample(
        seqsdata.index, W,
        jis.WeightedSamplingDist(childoccfreqs, jis.PWMImportanceWeight(pwm)),
        numpositive, jis.ISMemoCbAdaptor(W, jis.ZnSumCb(W), calculateZn))
    return seqsdata.numoccs[Widx], jis.importancesample(
        seqsdata.index, W,
        jis.WeightedSamplingDist(childoccfreqs,
                                 jis.PWMImportanceWeight(jem.pwmrevcomp(pwm))),
        numsamples - numpositive, cb)
Пример #2
0
def uniformuniquemethod(seqsdata, pwm, lambda_, Widx, numsamples, args):
    logger.debug('Importance sampling using uniform weights')
    calculateZn = jem.createZncalculatorFn(pwm, lambda_)
    W = args.Ws[Widx]
    childuniquefreqs = jis.DistForFreqs(seqsdata.childuniquefreqs[:, Widx])
    return seqsdata.numunique[Widx], jis.importancesample(
        seqsdata.index, W,
        jis.WeightedSamplingDist(childuniquefreqs,
                                 jis.UniformImportanceWeight()), numsamples,
        jis.ISMemoCbAdaptor(W, jis.ZnSumCb(W), calculateZn, unique=True))
Пример #3
0
def pwmuniquemethod(seqsdata, pwm, lambda_, Widx, numsamples, args):
    logger.debug('Importance sampling using PWM weights over unique W-mers')
    calculateZn = jem.createZncalculatorFn(pwm, lambda_)
    W = args.Ws[Widx]
    childuniquefreqs = jis.DistForFreqs(seqsdata.childuniquefreqs[:, Widx])
    childoccfreqs = jis.DistForFreqs(seqsdata.childoccfreqs[:, Widx])
    numpositive = numsamples / 2  # Sample half in each orientation
    cb = jis.importancesample(
        seqsdata.index, W,
        jis.WeightedSamplingDist(
            childuniquefreqs,
            jis.PWMImportanceWeightUnique(pwm, childoccfreqs)), numpositive,
        jis.ISMemoCbAdaptor(W, jis.ZnSumCb(W), calculateZn, unique=True))
    return seqsdata.numunique[Widx], jis.importancesample(
        seqsdata.index, W,
        jis.WeightedSamplingDist(
            childuniquefreqs,
            jis.PWMImportanceWeightUnique(jem.pwmrevcomp(pwm), childoccfreqs)),
        numsamples - numpositive, cb)
Пример #4
0
def uniformuniquemethod(seqsdata, pwm, lambda_, Widx, numsamples, args):
    logger.debug('Importance sampling using uniform weights')
    calculateZn = jem.createZncalculatorFn(pwm, lambda_)
    W = args.Ws[Widx]
    childuniquefreqs = jis.DistForFreqs(seqsdata.childuniquefreqs[:, Widx])
    return seqsdata.numunique[Widx], jis.importancesample(
        seqsdata.index,
        W,
        jis.WeightedSamplingDist(
            childuniquefreqs, jis.UniformImportanceWeight()),
        numsamples,
        jis.ISMemoCbAdaptor(W, jis.ZnSumCb(W), calculateZn, unique=True))
Пример #5
0
def pwmoccsmethod(seqsdata, pwm, lambda_, Widx, numsamples, args):
    logger.debug('Importance sampling using PWM importance weights')
    calculateZn = jem.createZncalculatorFn(pwm, lambda_)
    numpositive = numsamples / 2  # Sample half in each orientation
    W = args.Ws[Widx]
    childoccfreqs = jis.DistForFreqs(seqsdata.childoccfreqs[:, Widx])
    cb = jis.importancesample(
        seqsdata.index,
        W,
        jis.WeightedSamplingDist(
            childoccfreqs,
            jis.PWMImportanceWeight(pwm)),
        numpositive,
        jis.ISMemoCbAdaptor(W, jis.ZnSumCb(W), calculateZn))
    return seqsdata.numoccs[Widx], jis.importancesample(
        seqsdata.index,
        W,
        jis.WeightedSamplingDist(
            childoccfreqs,
            jis.PWMImportanceWeight(jem.pwmrevcomp(pwm))),
        numsamples - numpositive,
        cb)
Пример #6
0
def pwmuniquemethod(seqsdata, pwm, lambda_, Widx, numsamples, args):
    logger.debug('Importance sampling using PWM weights over unique W-mers')
    calculateZn = jem.createZncalculatorFn(pwm, lambda_)
    W = args.Ws[Widx]
    childuniquefreqs = jis.DistForFreqs(seqsdata.childuniquefreqs[:, Widx])
    childoccfreqs = jis.DistForFreqs(seqsdata.childoccfreqs[:, Widx])
    numpositive = numsamples / 2  # Sample half in each orientation
    cb = jis.importancesample(
        seqsdata.index,
        W,
        jis.WeightedSamplingDist(
            childuniquefreqs,
            jis.PWMImportanceWeightUnique(pwm, childoccfreqs)),
        numpositive,
        jis.ISMemoCbAdaptor(W, jis.ZnSumCb(W), calculateZn, unique=True))
    return seqsdata.numunique[Widx], jis.importancesample(
        seqsdata.index,
        W,
        jis.WeightedSamplingDist(
            childuniquefreqs,
            jis.PWMImportanceWeightUnique(jem.pwmrevcomp(pwm), childoccfreqs)),
        numsamples - numpositive,
        cb)
Пример #7
0
def generateseed(args):
    """Generate seed from possible fasta files and motif widths."""
    fasta = rdm.choice(args.fastas)
    Widx = rdm.randint(len(args.Ws))
    W = args.Ws[Widx]
    logger.info('Generating seed of width %d from %s', W, fasta)
    seqsdata = getseqsdata(fasta, args.Ws)
    logger.info('Importance sampling using background model to find seed')
    W = args.Ws[Widx]
    childfreqs = jis.DistForFreqs(seqsdata.childoccfreqs[:, Widx])
    memocb = jis.importancesample(
        seqsdata.index,
        W,
        jis.WeightedSamplingDist(childfreqs, jis.UniformImportanceWeight()),
        numsamples=1,
        callback=jis.ISCbMemo())
    return seqsdata, Widx, memocb.its[0].representative[:W]
Пример #8
0
def generateseed(args):
    """Generate seed from possible fasta files and motif widths."""
    fasta = rdm.choice(args.fastas)
    Widx = rdm.randint(len(args.Ws))
    W = args.Ws[Widx]
    logger.info('Generating seed of width %d from %s', W, fasta)
    seqsdata = getseqsdata(fasta, args.Ws)
    logger.info('Importance sampling using background model to find seed')
    W = args.Ws[Widx]
    childfreqs = jis.DistForFreqs(seqsdata.childoccfreqs[:, Widx])
    memocb = jis.importancesample(seqsdata.index,
                                  W,
                                  jis.WeightedSamplingDist(
                                      childfreqs,
                                      jis.UniformImportanceWeight()),
                                  numsamples=1,
                                  callback=jis.ISCbMemo())
    return seqsdata, Widx, memocb.its[0].representative[:W]
Пример #9
0
index = seqan.IndexStringDNASetESA(seqs)

logging.info('Counting W-mers')
Ws = [W]
Wmercounts = npy.zeros((2*len(index), len(Ws)), dtype=npy.uint)
numWmers = wmers.countWmersMulti(index.topdownhistory(), Ws, Wmercounts)[0]
logging.info('Got %d %d-mers', numWmers, W)
childWmerfreqs = npy.zeros((2*len(index), len(Ws), jem.SIGMA))
wmers.countWmerChildren(index.topdownhistory(), W, Wmercounts, childWmerfreqs)
childWmerfreqs = jem.normalisearray(childWmerfreqs)
sumestimator = jis.makesumestimator(numWmers)

logging.info('Importance sampling using background model to find one seed')
rdm.seed(2)
memocb = jis.importancesample(
    index, W, childWmerfreqs[:, 0], jis.UniformImportanceWeight(),
    numsamples=1, callback=jis.ISCbMemo())
pwm = jem.pwmfromWmer(memocb.Xns[0], numseedsites, 1.)
jem.logo(pwm, 'seed')

numsamples = 3000
distsbs = []
distsbg = []
truesums = []
varratios = []
pwmtrue = pwm.copy()
for iteration in xrange(5):
    logging.debug('Calculating true Zn sums')
    summer = jis.ZnSumCb(W)
    calculateZn = jem.createZncalculatorFn(pwmtrue, lambda_)
    sumvisitor = jis.ZnCalcVisitor(W, calculateZn, summer)