Пример #1
0
def kmersWithPosLists(K, seq):
    fwd = []
    rev = []
    L = len(seq) - K + 1
    for (x,p) in kmersWithPosList(K, seq, True):
        if p > 0:
            fwd.append((x, p - 1))
        else:
            rev.append((x, L + p))
    return (fwd, rev)
Пример #2
0
def remapReads(K, L, rds, v):
    ctx = v.context(2*L)
    idx = {}
    for (x,p) in kmersWithPosList(K, ctx[1], False):
        if x not in idx:
            idx[x] = []
        idx[x].append(p)

    res = {}
    for fq in rds:
        for (x,p) in locate(K, idx, fq[1]):
            if p not in res:
                res[p] = {}
            if x not in res[p]:
                res[p][x] = 0
            res[p][x] += 1

    for (p,ys) in sorted(res.items()):
        for (y,c) in sorted(ys.items()):
            print '%d\t%s\t%d' % (p, render(K, y), c)
Пример #3
0
def main(argv):
    opts = docopt.docopt(__doc__, argv)

    random.seed(17)

    K = int(opts['-k'])
    S = 2*(K-3)

    frameAnchors = {}
    knownStops = {}
    sequences = {}
    seqKmers = {}

    if opts['-r']:
        with openFile(opts['-r']) as f:
            for (nm,seq) in readFasta(f):
                sequences[nm] = seq
                # trim polyA tails
                seq = re.sub('AAAAAA*$', '', seq)
                seqKmers[nm] = set([])
                for (x,p1) in kmersWithPosList(K, seq, False):
                    seqKmers[nm].add(x)
                    p = p1 - 1
                    w = p % 3
                    if x not in frameAnchors:
                        frameAnchors[x] = set([])
                    frameAnchors[x].add((nm,p))
                    y = x & 63
                    if w == 0 and y in stops:
                        if x not in knownStops:
                            knownStops[x] = set([])
                        knownStops[x].add(nm)

    rn = 0
    res = {}
    for fn in opts['<input>']:
        with openFile(fn) as f:
            for rd in readFastq(f):
                L = len(rd[1])
                rn += 1
                fwdAndRev = kmersWithPosLists(K, rd[1])
                frames = {}
                possibleStops = {}
                for i in range(2):
                    #print i, sorted([p for (x,p) in fwdAndRev[i]])
                    for (x,p) in fwdAndRev[i]:
                        if x in frameAnchors:
                            for (nm,q) in frameAnchors[x]:
                                o = (q - p)
                                k = (nm, o, i)
                                frames[k] = 1 + frames.get(k, 0)
                if len(frames) == 0:
                    continue
                n = sum(frames.values())
                probs = []
                for ((nm, off, strnd), cnt) in sorted(frames.items()):
                    probs.append((float(cnt)/float(n), cnt, off, strnd, nm))
                v = random.random()
                for (pv, cnt, off, strnd, nm) in probs:
                    if v < pv:
                        #print rd[1]
                        #print proj(strnd, sequences[nm][off:off+len(rd[1])])
                        #print codons(off % 3, rd[1]), off
                        for (x,p) in fwdAndRev[strnd]:
                            if (p + off + K - 3) % 3 == 0 and (x & 63) in stops:
                                if nm not in res:
                                    res[nm] = {}
                                if x not in res[nm]:
                                    res[nm][x] = 0
                                res[nm][x] += 1
                        break
                    v -= pv
    for (nm,stps) in res.iteritems():
        for (x,c) in stps.iteritems():
            (d,y) = nearest3(K, seqKmers[nm], x)
            if x in knownStops:
                k = 'known'
            else:
                k = 'novel'
            print '%s\t%s\t%d\t%d\t%s\t%s' % (k, render(K, x), c, d, render(K, y), nm)
Пример #4
0
def main(argv):
    opts = docopt.docopt(__doc__, argv)

    K = int(opts['-k'])
    if (K & 1) != 0:
        print >> sys.stderr, "K must be even."
        return

    minCov = int(opts['-m'])

    verbose = opts['-v']

    J = K // 2
    S = 2*(K - J)
    Mj = (1 << (2*J)) - 1

    names = []
    seqs = {}
    bait = {}
    wtFst = []
    wtLst = []
    posIdx = []
    rds = []
    with openFile(opts['<sequences>']) as f:
        for (nm, seq) in readFasta(f):
            n = len(names)
            names.append(nm)
            seqs[nm] = seq
            wf = {}
            wl = {}
            for x in kmersList(K, seq, False):
                if x not in bait:
                    bait[x] = set([])
                bait[x].add(n)

                y0 = x >> S
                y1 = x & Mj
                #print '- %s\t%s\t%s' % (render(K, x), render(J, y0), render(J, y1))

                if y0 not in wf:
                    wf[y0] = set([])
                wf[y0].add(y1)

                if y1 not in wl:
                    wl[y1] = set([])
                wl[y1].add(y0)

            wtFst.append(wf)
            wtLst.append(wl)
            
            px = {}
            for (x,p) in kmersWithPosList(J, seq, False):
                if x not in px:
                    px[x] = []
                px[x].append(p)
            posIdx.append(px)

            for (a, b, c, d) in findDup(wtFst[n], wtLst[n], wtFst[n], wtLst[n]):
                pps = positions(posIdx[n], J, a, b, c, d)
                if pps is None:
                    continue
                for pp in pps:
                    ab = a << S | b
                    cb = c << S | b
                    cd = c << S | d
                    dd = pp[2] - pp[0]
                    print >> sys.stderr, 'warning: phantom dumplication: %s-%s-%s (%d)' % (render(K, ab), render(K, cb), render(K, cd), dd)

            rds.append([])

    N = len(names)

    L = None
    X = [{} for n in range(N)]
    for itm in reads(opts['<input>'], K=K, reads=True, kmers=True, both=True, verbose=verbose):
        rd = itm.reads[0]
        L = len(rd)

        xs = itm.kmers[0]
        hits = set([])
        for x in xs:
            if x in bait:
                hits |= bait[x]
        for n in hits:
            for x in xs:
                if x not in X[n]:
                    X[n][x] = 0
                X[n][x] += 1
            rds[n].append(rd)

    hdrShown = False
    vn = 0
    for n in range(N):
        xs = {}
        for (x,c) in X[n].iteritems():
            if c >= 10:
                xs[x] = c

        seq = seqs[names[n]]

        rngs = []
        st = None
        en = None
        inside = False
        xx = []
        for x in kmersList(K, seq, False):
            if x in xs:
                xx.append('.')
            else:
                xx.append('X')
        print ''.join(xx)
        for x in kmersList(K, seq, False):
            if not inside:
                if x in xs:
                    st = x
                else:
                    inside = True
            else:
                if x in xs:
                    en = x
                    rngs.append((st, en))
                    st = x
                    en = None
                    inside = False
        if inside:
            rngs.append((st, en))

        pthr = Pather(K, xs)

        for (x,y) in rngs:
            if x is None or y is None:
                continue
            print render(K, x), render(K, y)
            for p in pthr.trace(x, y, 100):
                print renderPath(K, p)
        continue

        fst = {}
        lst = {}
        for (x,c) in xs.iteritems():
            #if c < 5:
            #    continue
            y0 = x >> S
            y1 = x & Mj

            if y0 not in fst:
                fst[y0] = []
            fst[y0].append(y1)

            if y1 not in lst:
                lst[y1] = []
            lst[y1].append(y0)

        #for (a, b, c, d) in findDupDeNovo(fst, lst):
        for (a, b, c, d) in findDup(wtFst[n], wtLst[n], fst, lst):
            #continue
            pps = positions(posIdx[n], J, a, b, c, d)
            if pps is None:
                continue
            for pp in pps:
                ab = a << S | b
                cb = c << S | b
                cd = c << S | d
                #print [(render(J, w), p) for (w,p) in zip([a, b, c, d], pps)]

                dd = pp[2] - pp[0]

                if not opts['-a'] and dd % 3 != 0:
                    continue

                if opts['-s']:
                    fstPath = interpolate(K, xs, ab, cb, dd+1)
                    sndPath = interpolate(K, xs, cb, cd, dd+1)

                    if fstPath is None:
                        continue
                    if sndPath is None:
                        continue

                    if fstPath[J:-J] != sndPath[J:-J]:
                        continue

                pa = pp[0]
                pb = pp[1]
                pc = pp[2]
                pd = pp[3]

                cab = xs.get(ab, 0)
                ccb = xs.get(cb, 0)
                ccd = xs.get(cd, 0)

                if cab < minCov:
                    continue
                if ccb < minCov:
                    continue
                if ccd < minCov:
                    continue

                m = (cab + ccd) / 2.0
                # Assume the true std dev is 10% of the mean
                w = ccb / m

                hgvs = '%s:c.%d_%ddup' % (names[n], pb, pd - 1)
                v = Duplication(names[n], pb, pd-1, seqs)
                if opts['-A']:
                    showAnchoredReads(K, {ab:'AB', cb:'CB', cd:'CD'}, rds[n])

                vn += 1

                hdrs = ['n']
                fmts = ['%d']
                outs = [vn]

                hdrs += ['left', 'leftCov']
                fmts += ['%s','%d']
                outs += [render(K, ab), cab]

                hdrs += ['mid', 'midCov']
                fmts += ['%s','%d']
                outs += [render(K, cb), ccb]

                hdrs += ['right', 'rightCov']
                fmts += ['%s','%d']
                outs += [render(K, cd), ccd]

                hdrs += ['len']
                fmts += ['%d']
                outs += [dd]

                hdrs += ['vaf']
                fmts += ['%g']
                outs += [w]

                hdrs += ['hgvs']
                fmts += ['%s']
                outs += [hgvs]

                if not hdrShown:
                    hdrShown = True
                    print '\t'.join(hdrs)
                print '\t'.join(fmts) % tuple(outs)
Пример #5
0
def findAnchors(K, seq, mx, isLhs, D):
    xps = kmersWithPosList(K, seq, False)
    xps = [(x, p - 1) for (x, p) in xps]

    # Find the highest coverage k-mer that intersects.
    xc = 0
    for (x, p) in xps:
        if x in mx and mx[x] >= xc:
            xc = mx[x]

    if xc == 0:
        return set([])

    # Seeds should be in the same order of magnitude
    # as the highest-coverage seed.
    t = int(math.exp(math.log(xc) - 1.5))

    xs = {}
    for (x, c) in mx.iteritems():
        if c < t:
            continue
        xs[x] = c

    ys = xs.keys()

    zs = {}
    for (x, p) in xps:
        zs[p] = set([])

        if x not in xs:
            continue

        for y in ys:
            d = ham(x, y)
            if d > D:
                continue
            zs[p].add(y)

    e = set([])
    res = set([])
    if isLhs:
        for (x, p) in xps:
            ss = zs.get(p, e)
            tt = zs.get(p - 1, e)
            for s in ss:
                res.add((s, p))
                for t in tt:
                    if debruijn(K, t, s):
                        res.discard((t, p - 1))
    else:
        for (x, p) in xps[::-1]:
            ss = zs.get(p, e)
            tt = zs.get(p + 1, e)
            for s in ss:
                res.add((s, p))
                for t in tt:
                    if debruijn(K, s, t):
                        res.discard((t, p + 1))

    if isLhs:
        l = len(seq) - K
        res = [(x, l - p) for (x, p) in res]
    else:
        res = list(res)
    res.sort()

    return res
Пример #6
0
def main(argv):
    opts = docopt.docopt(__doc__, argv)

    verbose = opts['-v']

    K = int(opts['-k'])

    C = int(opts['-C'])

    L = int(opts['-L'])

    raw = opts['-r']

    S = int(opts['-S'])

    V = float(opts['-V'])

    d = "."
    if opts['-g']:
        d = opts['-g']
    sf = SequenceFactory(d)

    with openFile(opts['<regions>']) as f:
        R = readBED(f)

    refTbl = {}
    refIdx = {}
    zoneIdx = {}
    for (acc, zones) in R.items():
        accSeq = sf[acc]
        for (s, e, nm) in zones:
            zoneIdx[nm] = (acc, s, e)
            seq = accSeq[s - 1:e]
            if nm not in refTbl:
                refTbl[nm] = {}
            for (x, p) in kmersWithPosList(K, seq, False):
                p -= 1
                p += s
                refTbl[nm][p] = x
                if x not in refIdx:
                    refIdx[x] = []
                refIdx[x].append((nm, p))

    acc = {}
    for itm in reads(opts['<input>'],
                     K=K,
                     paired=True,
                     reads=True,
                     kmers=False,
                     verbose=verbose):
        rdL = itm.reads[0]
        zL = len(rdL)
        (fwdL, revL) = kmersWithPosLists(K, rdL[1])
        fwdLHits = hits(refIdx, K, fwdL, acc)
        revLHits = hits(refIdx, K, revL, acc)

        rdR = itm.reads[1]
        zR = len(rdR)
        (fwdR, revR) = kmersWithPosLists(K, rdR[1])
        fwdRHits = hits(refIdx, K, fwdR, acc)
        revRHits = hits(refIdx, K, revR, acc)

    killZ = set([])
    for z in acc.keys():
        killP = set([])
        for p in acc[z].keys():
            killX = set([])
            vv = {}
            for x in acc[z][p].keys():
                y = x >> 2
                if y not in vv:
                    vv[y] = []
                vv[y].append((x, acc[z][p][x]))
            for vs in vv.values():
                vt = V * sum([c for (x, c) in vs])
                for (x, c) in vs:
                    if c < vt or c < C:
                        killX.add(x)
            for x in killX:
                del acc[z][p][x]
            if len(acc[z][p]) == 0:
                killP.add(p)
        for p in killP:
            del acc[z][p]
        if len(acc[z]) == 0:
            killZ.add(z)
    for z in killZ:
        del acc[z]

    if raw:
        print '\t'.join(['chrom', 'pos', 'side', 'label', 'anchor', 'insSeq'])
    else:
        print '\t'.join([
            'chrom', 'after', 'before', 'label', 'rhsShift', 'lhsShift',
            'lhsAnc', 'rhsAnc', 'lhsSeq', 'rhsSeq'
        ])

    for z in sorted(acc.keys()):
        (ch, st, en) = zoneIdx[z]

        Z = acc[z]
        ref = refTbl[z]
        aft = dict(forwardSpurs(K, ref, Z))
        bef = dict(reverseSpurs(K, ref, Z))

        scoredAft = {}
        for p in sorted(aft.keys()):
            if p + K - 1 == en:
                continue

            for spur in aft[p]:

                if len(spur) < L:
                    continue

                if raw:
                    (xs, cs) = zip(*spur)
                    seq = renderPath(K, xs)
                    anc = seq[:K]
                    ins = seq[K:]
                    print '%s\t%d\t%s\t%s\t%s\t%s\t%s' % (
                        ch, p + K - 1, 'after', z, anc, ins, ','.join(
                            map(str, cs)))
                    continue

                for (q, xcs, v) in shiftForwardSpur(ref, Z, S, p, spur):
                    q += K - 1
                    if q not in scoredAft:
                        scoredAft[q] = []
                    (xs, cs) = zip(*xcs)
                    seq = renderPath(K, xs)
                    anc = seq[:K]
                    ins = seq[K:]
                    scoredAft[q].append((v, anc, ins, cs))

        scoredBef = {}
        for p in sorted(bef.keys()):
            if p == st:
                continue

            for spur in bef[p]:
                if len(spur) < L:
                    continue

                if raw:
                    (xs, cs) = zip(*spur)
                    seq = renderPath(K, xs)
                    anc = seq[-K:]
                    ins = seq[:-K]
                    print '%s\t%d\t%s\t%s\t%s\t%s\t%s' % (
                        ch, p, 'before', z, anc, ins, ','.join(map(str, cs)))
                    continue

                for (q, xcs, v) in shiftReverseSpur(ref, Z, S, p, spur):
                    if q not in scoredBef:
                        scoredBef[q] = []
                    (xs, cs) = zip(*xcs)
                    seq = renderPath(K, xs)
                    anc = seq[-K:]
                    ins = seq[:-K]
                    scoredBef[q].append((v, anc, ins, cs))

        for p0 in sorted(scoredAft.keys()):
            p1 = p0 + 1
            if p1 not in scoredBef:
                continue
            for (aftV, aftAnc, aftIns, aftCov) in scoredAft[p0]:
                for (befV, befAnc, befIns, befCov) in scoredBef[p1]:
                    if befAnc in aftIns or aftAnc in befIns:
                        continue
                    v = aftV + befV
                    print '%s\t%d\t%d\t%s\t%d\t%d\t%s\t%s\t%s\t%s' % (
                        ch, p0, p1, z, aftV, befV, aftAnc, befAnc, aftIns,
                        befIns)
Пример #7
0
def main(argv):
    global verbose

    opts = docopt.docopt(__doc__, argv)

    verbose = opts['-v']

    genomeDir = '.'
    if opts['-g']:
        genomeDir = opts['-g']
    sf = SequenceFactory(genomeDir)

    if opts['-P']:
        if opts['-t']:
            prepareBedFileGeneTx(opts['<gene-list>'], opts['<refgene>'],
                                 opts['<bedfile>'])
        else:
            prepareBedFileGene(opts['<gene-list>'], opts['<refgene>'],
                               opts['<bedfile>'])
        return

    if opts['-X']:
        with openFile(opts['<index>'], 'w') as out:
            yaml.safe_dump_all(indexBedFiles(opts['<must-have>'], sf),
                               out,
                               default_flow_style=False)
        return

    K = int(opts['-k'])
    minGeneReads = int(opts['-M'])
    minExonReads = int(opts['-m'])
    minGeneRate = float(opts['-R'])
    minExonRate = float(opts['-r'])
    (minGeneCount, maxGeneCount) = map(int, opts['-Z'].split(':'))
    (minExonCount, maxExonCount) = map(int, opts['-z'].split(':'))

    with openFile(opts['<index>']) as f:
        ref = list(yaml.load_all(f, Loader=yaml.BaseLoader))

    if True:
        # Test the double-layer index
        idx = ExonIndex(K, ref)

        acc = {}
        toc = {}
        rn = 0
        for itm in reads(opts['<input>'],
                         K=K,
                         paired=True,
                         reads=True,
                         kmers=False,
                         both=True,
                         verbose=verbose):
            rn += 1
            (lhsFwd, lhsRev) = kmersLists(K, itm.reads[0][1])
            (rhsFwd, rhsRev) = kmersLists(K, itm.reads[1][1])
            xs0 = lhsFwd + rhsRev
            rh0 = idx.readHash(xs0)
            if rh0 is not None:
                (h0, ys0) = rh0
                if h0 not in acc:
                    acc[h0] = []
                    toc[h0] = ys0
                acc[h0].append((compressRead(itm.reads[0][1]),
                                compressRead(itm.reads[1][1])))

            xs1 = lhsRev + rhsFwd
            rh1 = idx.readHash(xs1)
            if rh1 is not None:
                (h1, ys1) = rh1
                if h1 not in acc:
                    acc[h1] = []
                    toc[h1] = ys1
                acc[h1].append((compressRead(itm.reads[0][1]),
                                compressRead(itm.reads[1][1])))

        nx = 0
        for h in sorted(acc.keys()):
            for (x, c) in sorted(acc[h].items()):
                nx += 1
                if c <= 1:
                    continue
                print '%016x\t%s\t%d' % (h, render(K, x), c)

        print >> sys.stderr, 'nx =', nx
        return

    if False:
        # Position index
        idx = {}
        for i in range(len(ref)):
            itm = ref[i]
            for (x, p) in kmersWithPosList(K, itm['seq'], False):
                p -= 1
                if x not in idx:
                    idx[x] = []
                idx[x].append((i, p))

    if True:
        # Exon tuple index
        idx = {}
        lens = [0 for i in range(len(ref))]
        for i in range(len(ref)):
            itm = ref[i]
            for (x, p) in kmersWithPosList(K, itm['seq'], False):
                if x not in idx:
                    idx[x] = set([])
                idx[x].add(i)
                lens[i] += 1
        for x in idx.iterkeys():
            idx[x] = tuple(sorted(idx[x]))

    if opts['-T']:
        ak = {}
        for x in sorted(idx.iterkeys()):
            if len(idx[x]) == 1:
                continue
            xStr = render(K, x)
            ak[xStr] = []
            for i in idx[x]:
                itm = ref[i]
                k = '%s/%s' % (itm['gene'], itm['exon'])
                ak[xStr].append(k)
            ak[xStr].sort()
        rep = {}
        rep['aliasing-within'] = ak
        chrs = set([])
        for i in range(len(ref)):
            itm = ref[i]
            chrs.add(itm['chr'])
        counts = [0 for i in range(len(ref))]
        for ch in sorted(chrs):
            if verbose:
                print >> sys.stderr, 'processing %s' % (ch, )
            seq = sf[ch]
            for (x, p) in kmersWithPos(K, seq, True):
                if x not in idx:
                    continue
                for i in idx[x]:
                    counts[i] += 1
        gk = {}
        for i in range(len(ref)):
            if lens[i] == counts[i]:
                continue
            itm = ref[i]
            k = '%s/%s' % (itm['gene'], itm['exon'])
            gk[k] = {'indexed': lens[i], 'genomic': counts[i]}
        rep['aliasing-genomic'] = gk
        yaml.safe_dump(rep, sys.stdout, default_flow_style=False)
        return

    acc = {}
    rn = 0
    hitStats = Summary()
    hitHist = [0 for i in range(1000)]
    for itm in reads(opts['<input>'],
                     K=K,
                     paired=True,
                     reads=True,
                     kmers=False,
                     both=True,
                     verbose=verbose):
        rn += 1
        (lhsFwd, lhsRev) = kmersWithPosLists(K, itm.reads[0][1])
        (rhsFwd, rhsRev) = kmersWithPosLists(K, itm.reads[1][1])
        (hits0, hitCount0) = recHits(idx, lhsFwd + rhsRev)
        (hits1, hitCount1) = recHits(idx, lhsRev + rhsFwd)
        if len(hits0) > 0:
            k = tuple(sorted(hits0.keys()))
            v = sum(hits0.values())
            if k not in acc:
                acc[k] = [0, 0]
            acc[k][0] += 1
            acc[k][1] += v
            hitStats.add(hitCount0)
            hitHist[hitCount0] += 1

        if len(hits1) > 0:
            k = tuple(sorted(hits1.keys()))
            v = sum(hits1.values())
            if k not in acc:
                acc[k] = [0, 0]
            acc[k][0] += 1
            acc[k][1] += v
            hitStats.add(hitCount1)
            hitHist[hitCount1] += 1

    if verbose:
        print >> sys.stderr, 'total read hits: %d' % (len(hitStats), )
        print >> sys.stderr, 'total hits per read: %g (%g)' % (hitStats.mean(),
                                                               hitStats.sd())
        print >> sys.stderr, 'total reads: %d' % (rn, )
        for i in range(len(hitHist)):
            if hitHist[i] > 0:
                print >> sys.stderr, '\t%d\t%d' % (i, hitHist[i])

    def gex(s):
        r = []
        for n in s:
            itm = ref[n]
            r.append('%s/%s' % (itm['gene'], itm['exon']))
        return '|'.join(r)

    def fmtKey(k):
        nex = len(k)
        gx = set([])
        kStrParts = []
        for s in k:
            kStrParts.append(gex(s))
            gx |= set([ref[i]['gene'] for i in s])
        kStr = '--'.join(sorted(kStrParts))
        return (nex, gx, kStr)

    gxCounts = {}
    for k in acc.keys():
        gx = set([])
        ex = set([])
        for s in k:
            gx |= set([ref[i]['gene'] for i in s])
            ex |= set(s)
        gx = tuple(sorted(gx))
        if gx not in gxCounts:
            gxCounts[gx] = [0, 0]
        gxCounts[gx][0] += acc[k][0]
        gxCounts[gx][1] += acc[k][1]

    hdr = ['numReads', 'numKmers', 'kmersPerRead']
    hdr += ['ggNumReads', 'ggNumKmers', 'ggKmersPerRead']
    hdr += ['numExons', 'numGenes', 'geneGroup', 'exonGroup']
    print '\t'.join(hdr)
    for k in acc.keys():
        (nex, gx, kStr) = fmtKey(k)
        gx = tuple(sorted(gx))
        if len(gx) < minGeneCount or len(gx) > maxGeneCount:
            continue
        if len(ex) < minExonCount or len(ex) > maxExonCount:
            continue
        if gxCounts[gx][0] < minGeneReads:
            continue
        if acc[k][0] < minExonReads:
            continue
        gxRate = float(gxCounts[gx][1]) / float(gxCounts[gx][0])
        if gxRate < minGeneRate:
            continue
        exRate = float(acc[k][1]) / float(acc[k][0])
        if exRate < minExonRate:
            continue
        gxStr = ':'.join(gx)

        print '%d\t%d\t%g\t%d\t%d\t%g\t%d\t%d\t%s\t%s' % (
            acc[k][0], acc[k][1], exRate, gxCounts[gx][0], gxCounts[gx][1],
            gxRate, nex, len(gx), gxStr, kStr)