def generate_gram_list(gram_groups, intervals): if LOG_MEM: logging.info("Memory: current: %s, peak: %s" % tuple( (tracemalloc._format_size(m, False) for m in tracemalloc.get_traced_memory()))) policies_db_cache = ioutils.load_clean_policies_db() for gram_list in gram_groups: #[sum(gram_groups,[])]: n_str = get_n_str(gram_list) logging.info("Starting counts for %s-grams at %s" % (n_str, datetime.now().strftime("%H:%M:%S"))) for year, season in intervals: yearseason = "%d%s" % (year, season) ngrams = generate_grams(gram_list, yearseason, policies_db_cache=policies_db_cache) #Write changes write_grams(ngrams, year, season) #Save changes to disk to save memory #ioutils.close_db(year=year,season=season) ngrams = None logging.info("Done with counts for %s-grams at %s" % (n_str, datetime.now().strftime("%H:%M:%S"))) if LOG_MEM: gc.collect( ) # Collect garbage so we know the memory usage is accurate logging.info("Memory: current: %s, peak: %s" % tuple( (tracemalloc._format_size(m, False) for m in tracemalloc.get_traced_memory())))
def format_size(self, role, size, diff): if role == SORT_ROLE: return size if role == Qt.ToolTipRole: if abs(size) < 10 * 1024: return None if diff: return "%+i" % size else: return str(size) return tracemalloc._format_size(size, diff)
def set_stats(self, snapshot1, snapshot2, group_by, cumulative): self.emit(QtCore.SIGNAL("layoutAboutToBeChanged()")) if snapshot1 is not None: if snapshot2 is not None: stats = snapshot2.compare_to(snapshot1, group_by, cumulative) else: stats = snapshot1.statistics(group_by, cumulative) self.stats = stats self.diff = isinstance(stats[0], tracemalloc.StatisticDiff) self.total = sum(stat.size for stat in self.stats) self.total_text = tracemalloc._format_size(self.total, False) if snapshot2 is not None: total1 = sum(trace.size for trace in snapshot1.traces) total2 = self.total self.total_text += ' (%s)' % tracemalloc._format_size( total2 - total1, True) else: self.stats = () self.diff = False self.total = 0 self.total_text = tracemalloc._format_size(0, False) self.group_by = group_by if self.group_by == 'traceback': source = self.tr("Traceback") elif self.group_by == 'lineno': source = self.tr("Line") else: source = self.tr("Filename") self.headers = [source, self.tr("Size")] if self.diff: self.headers.append(self.tr("Size Diff")) self.headers.append(self.tr("Count")) if self.diff: self.headers.append(self.tr("Count Diff")) self.headers.extend([self.tr("Item Size"), self.tr("%Total")]) self.emit(QtCore.SIGNAL("layoutChanged()"))
def display(): # snapshot = t.take_snapshot().filter_traces( # ( # t.Filter(False, "<frozen importlib._bootstrap>"), # t.Filter(False, "<unknown>"), # ) # ) snapshot = t.take_snapshot() for stat in snapshot.statistics("lineno", cumulative=False)[:5]: print("----------------------------------------") print(t._format_size(stat.size, False)) for line in stat.traceback.format(): print(line) print("========================================")
def get_label(self): if self.ntraces is None: print("Process snapshot %s..." % self.filename) # fill ntraces and total self.load() self.unload() print("Process snapshot %s... done" % self.filename) name = os.path.basename(self.filename) infos = [ tracemalloc._format_size(self.total, False), fmt(tr("%s traces"), self.ntraces), str(self.timestamp), ] return "%s (%s)" % (name, ', '.join(infos))
def loop(*, size, times): for i in range(times): print(len(L)) g(size) print([t._format_size(x, False) for x in t.get_traced_memory()]) snapshot = t.take_snapshot().filter_traces( ( t.Filter(False, "<frozen importlib._bootstrap>"), t.Filter(False, "*tracemalloc*"), t.Filter(False, "*linecache*"), t.Filter(False, "*sre_*"), t.Filter(False, "*re.py"), t.Filter(False, "*fnmatch*"), t.Filter(False, "*tokenize*"), t.Filter(False, "<unknown>"), ) ) for stat in snapshot.statistics("lineno", cumulative=False)[:3]: print("----------------------------------------") print(t._format_size(stat.size, False)) for line in stat.traceback.format(): print(line) print("========================================")
import tracemalloc as t print("*start") print([t._format_size(x, False) for x in t.get_traced_memory()]) t.start() L = [[_ for _ in range(10000)] for i in range(100)] print("*gen") print([t._format_size(x, False) for x in t.get_traced_memory()]) snapshot = t.take_snapshot() for stats in snapshot.statistics("traceback")[:3]: print(stats) print("----------------------------------------") snapshot = t.take_snapshot() for stats in snapshot.statistics("lineno", cumulative=True)[:3]: print(stats) t.stop() print([t._format_size(x, False) for x in t.get_traced_memory()])
def mem_count(): if MEM_DEBUG: logging.info("Memory: current: %s, peak: %s" % tuple( (tracemalloc._format_size(m, False) for m in tracemalloc.get_traced_memory())))
import json from collections import defaultdict, namedtuple from pympler.asizeof import asizeof from tracemalloc import _format_size idmap = defaultdict(lambda: str(len(idmap))) def on_pairs(pairs, named={}): fields = tuple(sorted([p[0].lstrip("_") for p in pairs])) k = idmap[fields] if k not in named: named[k] = namedtuple("N" + k, " ".join(fields)) return named[k](*[p[1] for p in pairs]) filename = "citylots.json" # size is 181 Mib with open(filename) as rf: d0 = json.load(rf, object_pairs_hook=on_pairs) print(len(idmap)) print(type(d0), _format_size(asizeof(d0), False)) # 4 # <class '__main__.N3'> 638 MiB # real 1m45.924s # user 1m38.756s # sys 0m5.089
import json from tracemalloc import _format_size from pympler.asizeof import asizeof filename = "citylots.json" # size is 181 Mib with open(filename) as rf: d0 = json.load(rf) print(type(d0), _format_size(asizeof(d0), False)) # <class 'dict'> 795 MiB # real 1m31.358s # user 1m25.421s # sys 0m4.634s
def loop(*, size, times): for i in range(times): logger.info( "memory (current, peak) %s", str([t._format_size(x, False) for x in t.get_traced_memory()])) g(size)
def main(): global NO_PUNCTUATION if LOG_MEM: tracemalloc.start() logging.info("Starting at %s " % datetime.now().strftime("%H:%M:%S")) parser = argparse.ArgumentParser( description='Breaks documents into n-grams under a variety of fitlers') parser.add_argument('--start', dest="MIN", default=3, type=int, help='Analyze n-grams with n>=start') parser.add_argument('--stop', dest="MAX", default=9, type=int, help='Analyze n-grams with n<=stop') parser.add_argument(dest="intervals", type=str, nargs='+', help='Intervals to collect n-grams over') parser.add_argument('-s', dest="sentences", action='store_const', const=True, default=False, help='Examine sentences') parser.add_argument('-w', dest="words", action='store_const', const=True, default=False, help='Examine words') parser.add_argument('-e', dest="entities", action='store_const', const=True, default=False, help='Examine entities') util.add_arguments(parser) args = parser.parse_args() #Arguments: #analytics.py <MIN> <MAX> <N> (sw) #Finds the top N n-grams for each n \in [MIN .. MAX] #"s" in the last argument indicates including sentences, "w" words. Blank for nothing start = args.MIN stop = args.MAX + 1 yearseasons = args.intervals SENTENCES = args.sentences WORDS = args.words ENTITIES = args.entities util.process_arguments(args) NO_PUNCTUATION = util.NO_PUNCTUATION MERGE_SIMILAR = util.MERGE_SIMILAR clean = "_CL" if util.USE_CLEAN else "" np = "_NP" if NO_PUNCTUATION else "" global stopwords stopwords = set(nltk.corpus.stopwords.words('english')) cleaned_words = set(["_organization_", "_number_", "_url_", "_email_"]) stopwords.update(cleaned_words) try: os.mkdir("../data/%s/" % yearseason) except: pass gram_groups = [[n] for n in range(start, stop)] if SENTENCES: gram_groups.append(["s"]) if WORDS: gram_groups.append(["w"]) if ENTITIES: gram_groups.append(["e", "m", "u"]) #Decide how much we're going to iterate if yearseasons[0] == "all": logging.info("Removing old data at %s " % datetime.now().strftime("%H:%M:%S")) ioutils.remove_grams() logging.info("Done removing old data at %s" % datetime.now().strftime("%H:%M:%S")) intervals = [t for t in util.iter_year_season()] else: intervals = [] for yearseason in yearseasons: year = int(yearseason[:4]) if len(yearseason) == 5: season = yearseason[4] intervals.append((year, season)) elif len(yearseason) == 4: intervals.append((year, 'A')) intervals.append((year, 'B')) else: logging.error("Error on %s\n" % yearseason) generate_gram_list(gram_groups, intervals) #logging.info("Closing DB at %s " % datetime.now().strftime("%H:%M:%S")) #ioutils.close_db() #logging.info("Finished at %s" % datetime.now().strftime("%H:%M:%S")) if LOG_MEM: print("Max memory usage:") print("Current: %s, Peak: %s" % tuple( (tracemalloc._format_size(m, False) for m in tracemalloc.get_traced_memory())))
def handle_traceback(sig, frame): logger.info( "memory (current, peak) %s", str([t._format_size(x, False) for x in t.get_traced_memory()]) ) import traceback traceback.print_stack(limit=5)
def tick(): while True: logger.info("%s", str([t._format_size(x, False) for x in t.get_traced_memory()])) time.sleep(0.2)
def tick(): while True: logger.info( "%s", str([t._format_size(x, False) for x in t.get_traced_memory()])) time.sleep(0.2)
def update_event(self, inp=-1): self.set_output_val( 0, tracemalloc._format_size(self.input(0), self.input(1)))
def handle_traceback(sig, frame): logger.info("memory (current, peak) %s", str([t._format_size(x, False) for x in t.get_traced_memory()])) import traceback traceback.print_stack(limit=5)