#statsShuffles = RunningStats()
statsShuffles = OfflineStats()

recordsCount = 0
warningsCount = 0

rl = RateLimit(30)

total = countSpeciesCDS(taxId)

for protId in SpeciesCDSSource(taxId):
    cds = CDSHelper(taxId, protId)

    statsShuffles.push(
        cds.dropShuffledSeqs(lastItemToKeep=args.keep_first_n_shuffles))

    recordsCount += 1

    if (rl()):
        print("processed %d records (%.2g%%)" %
              (recordsCount, float(recordsCount) / total * 100))

    # DEBUG ONLY # DEBUG ONLY # DEBUG ONLY # DEBUG ONLY # DEBUG ONLY # DEBUG ONLY #
    #if( recordsCount > 20 ):
    #    break
    # DEBUG ONLY # DEBUG ONLY # DEBUG ONLY # DEBUG ONLY # DEBUG ONLY # DEBUG ONLY #

print(statsShuffles.count())
print("%.3g %.3g +-%.3g %.3g" % (statsShuffles.min(), statsShuffles.mean(), 2 *
                                 statsShuffles.stdev(), statsShuffles.max()))
Exemplo n.º 2
0
          (taxId, getSpeciesName(taxId)))
    print("Nothing left to do...")
    sys.exit(0)

print("Species %d (%s) has %d proteins stored." %
      (taxId, getSpeciesName(taxId), countSpeciesCDS(taxId)))
print("Will delete it in 10 seconds...")
sleep(10)

count = 0

for protId in SpeciesCDSSource(taxId):
    print(protId)
    cds = CDSHelper(taxId, protId)
    try:
        cds.dropShuffledSeqs()
    except Exception as e:
        print(e)

    try:
        cds.dropNativeSeq()
    except Exception as e:
        print(e)

    cds.dropRecord()

    count += 1

    if (rl()):
        print("Done processing %d records" % count)