#statsShuffles = RunningStats() statsShuffles = OfflineStats() recordsCount = 0 warningsCount = 0 rl = RateLimit(30) total = countSpeciesCDS(taxId) for protId in SpeciesCDSSource(taxId): cds = CDSHelper(taxId, protId) statsShuffles.push( cds.dropShuffledSeqs(lastItemToKeep=args.keep_first_n_shuffles)) recordsCount += 1 if (rl()): print("processed %d records (%.2g%%)" % (recordsCount, float(recordsCount) / total * 100)) # DEBUG ONLY # DEBUG ONLY # DEBUG ONLY # DEBUG ONLY # DEBUG ONLY # DEBUG ONLY # #if( recordsCount > 20 ): # break # DEBUG ONLY # DEBUG ONLY # DEBUG ONLY # DEBUG ONLY # DEBUG ONLY # DEBUG ONLY # print(statsShuffles.count()) print("%.3g %.3g +-%.3g %.3g" % (statsShuffles.min(), statsShuffles.mean(), 2 * statsShuffles.stdev(), statsShuffles.max()))
(taxId, getSpeciesName(taxId))) print("Nothing left to do...") sys.exit(0) print("Species %d (%s) has %d proteins stored." % (taxId, getSpeciesName(taxId), countSpeciesCDS(taxId))) print("Will delete it in 10 seconds...") sleep(10) count = 0 for protId in SpeciesCDSSource(taxId): print(protId) cds = CDSHelper(taxId, protId) try: cds.dropShuffledSeqs() except Exception as e: print(e) try: cds.dropNativeSeq() except Exception as e: print(e) cds.dropRecord() count += 1 if (rl()): print("Done processing %d records" % count)