def test_get_traces_intern_traceback(self): # dummy wrappers to get more useful and identical frames in the traceback def allocate_bytes2(size): return allocate_bytes(size) def allocate_bytes3(size): return allocate_bytes2(size) def allocate_bytes4(size): return allocate_bytes3(size) # Ensure that two identical tracebacks are not duplicated tracemalloc.stop() tracemalloc.start(4) obj_size = 123 obj1, obj1_traceback = allocate_bytes4(obj_size) obj2, obj2_traceback = allocate_bytes4(obj_size) traces = tracemalloc._get_traces() trace1 = self.find_trace(traces, obj1_traceback) trace2 = self.find_trace(traces, obj2_traceback) size1, traceback1 = trace1 size2, traceback2 = trace2 self.assertEqual(traceback2, traceback1) self.assertIs(traceback2, traceback1)
def test_stop_track(self): tracemalloc.start() tracemalloc.stop() with self.assertRaises(RuntimeError): self.track() self.assertIsNone(self.get_traceback())
def test_get_traced_memory(self): # Python allocates some internals objects, so the test must tolerate # a small difference between the expected size and the real usage max_error = 2048 # allocate one object obj_size = 1024 * 1024 tracemalloc.clear_traces() obj, obj_traceback = allocate_bytes(obj_size) size, peak_size = tracemalloc.get_traced_memory() self.assertGreaterEqual(size, obj_size) self.assertGreaterEqual(peak_size, size) self.assertLessEqual(size - obj_size, max_error) self.assertLessEqual(peak_size - size, max_error) # destroy the object obj = None size2, peak_size2 = tracemalloc.get_traced_memory() self.assertLess(size2, size) self.assertGreaterEqual(size - size2, obj_size - max_error) self.assertGreaterEqual(peak_size2, peak_size) # clear_traces() must reset traced memory counters tracemalloc.clear_traces() self.assertEqual(tracemalloc.get_traced_memory(), (0, 0)) # allocate another object obj, obj_traceback = allocate_bytes(obj_size) size, peak_size = tracemalloc.get_traced_memory() self.assertGreaterEqual(size, obj_size) # stop() also resets traced memory counters tracemalloc.stop() self.assertEqual(tracemalloc.get_traced_memory(), (0, 0))
def test_stop_untrack(self): tracemalloc.start() self.track() tracemalloc.stop() with self.assertRaises(RuntimeError): self.untrack()
def stop(): """ Stops application memory profiling """ logging.debug("Stopping memory profiling") with _lock: if is_running(): snapshot(_make_snapshot_name) tracemalloc.clear_traces() tracemalloc.stop()
def stop(self): if not self.profiling: return self.profiling = False tracemalloc.stop() self.timer.stop() self.trace_stream.close() self.trace_stream = None self.timer = None
def _stop_memory_tracing(): try: import tracemalloc except ImportError: return snapshot = tracemalloc.take_snapshot() _log_memory_top(snapshot) tracemalloc.stop()
def measure_memory_diff(self, func): import tracemalloc tracemalloc.start() try: before = tracemalloc.take_snapshot() # Keep the result and only delete it after taking a snapshot res = func() after = tracemalloc.take_snapshot() del res return after.compare_to(before, 'lineno') finally: tracemalloc.stop()
def exec_with_profiler(filename, profiler, backend): choose_backend(backend) if _backend == 'tracemalloc' and has_tracemalloc: tracemalloc.start() builtins.__dict__['profile'] = profiler # shadow the profile decorator defined above ns = dict(_CLEAN_GLOBALS, profile=profiler) try: with open(filename) as f: exec(compile(f.read(), filename, 'exec'), ns, ns) finally: if has_tracemalloc and tracemalloc.is_tracing(): tracemalloc.stop()
def stop_profiler(self): self.agent.log('Deactivating memory allocation profiler.') with self.profile_lock: if self.overhead_monitor: self.overhead_monitor.cancel() self.overhead_monitor = None if tracemalloc.is_tracing(): snapshot = tracemalloc.take_snapshot() self.agent.log('Allocation profiler memory overhead {0} bytes'.format(tracemalloc.get_tracemalloc_memory())) tracemalloc.stop() self.process_snapshot(snapshot, time.time() - self.start_ts)
def exec_with_profiler(filename, profiler, backend, passed_args=[]): from runpy import run_module builtins.__dict__['profile'] = profiler ns = dict(_CLEAN_GLOBALS, profile=profiler) _backend = choose_backend(backend) sys.argv = [filename] + passed_args try: if _backend == 'tracemalloc' and has_tracemalloc: tracemalloc.start() with open(filename) as f: exec(compile(f.read(), filename, 'exec'), ns, ns) finally: if has_tracemalloc and tracemalloc.is_tracing(): tracemalloc.stop()
def test_get_traces(self): tracemalloc.clear_traces() obj_size = 12345 obj, obj_traceback = allocate_bytes(obj_size) traces = tracemalloc._get_traces() trace = self.find_trace(traces, obj_traceback) self.assertIsInstance(trace, tuple) domain, size, traceback = trace self.assertEqual(size, obj_size) self.assertEqual(traceback, obj_traceback._frames) tracemalloc.stop() self.assertEqual(tracemalloc._get_traces(), [])
def run_module_with_profiler(module, profiler, backend, passed_args=[]): from runpy import run_module builtins.__dict__['profile'] = profiler ns = dict(_CLEAN_GLOBALS, profile=profiler) _backend = choose_backend(backend) sys.argv = [module] + passed_args if PY2: run_module(module, run_name="__main__", init_globals=ns) else: if _backend == 'tracemalloc' and has_tracemalloc: tracemalloc.start() try: run_module(module, run_name="__main__", init_globals=ns) finally: if has_tracemalloc and tracemalloc.is_tracing(): tracemalloc.stop()
def test_does_not_leak_too_much(self): tracemalloc.start() gc.collect() series = [] snapshot1 = tracemalloc.take_snapshot() for i in range(100): try: execute_script(self.feature, self) except Exception: pass gc.collect() snapshot2 = tracemalloc.take_snapshot() stats = snapshot2.compare_to(snapshot1, "lineno") snapshot1 = snapshot2 series.append(sum(stat.size / 1024 for stat in stats)) tracemalloc.stop() series = series[1:] # ignore first run, which creates regex cv = statistics.stdev(series) / statistics.mean(series) assert cv < 0.1
def test_snapshot(self): obj, source = allocate_bytes(123) # take a snapshot snapshot = tracemalloc.take_snapshot() # write on disk snapshot.dump(support.TESTFN) self.addCleanup(support.unlink, support.TESTFN) # load from disk snapshot2 = tracemalloc.Snapshot.load(support.TESTFN) self.assertEqual(snapshot2.traces, snapshot.traces) # tracemalloc must be tracing memory allocations to take a snapshot tracemalloc.stop() with self.assertRaises(RuntimeError) as cm: tracemalloc.take_snapshot() self.assertEqual(str(cm.exception), "the tracemalloc module must be tracing memory " "allocations to take a snapshot")
def compute(self): args = self.args if args.track_memory: if MS_WINDOWS: from perf._win_memory import get_peak_pagefile_usage else: from perf._memory import PeakMemoryUsageThread mem_thread = PeakMemoryUsageThread() mem_thread.start() if args.tracemalloc: import tracemalloc tracemalloc.start() WorkerTask.compute(self) if args.tracemalloc: traced_peak = tracemalloc.get_traced_memory()[1] tracemalloc.stop() if not traced_peak: raise RuntimeError("tracemalloc didn't trace any Python " "memory allocation") # drop timings, replace them with the memory peak self._set_memory_value(traced_peak) if args.track_memory: if MS_WINDOWS: mem_peak = get_peak_pagefile_usage() else: mem_thread.stop() mem_peak = mem_thread.peak_usage if not mem_peak: raise RuntimeError("failed to get the memory peak usage") # drop timings, replace them with the memory peak self._set_memory_value(mem_peak)
def pyfaidx_fasta(n): print('timings for pyfaidx.Fasta') ti = [] tf = [] for _ in range(n): t = time.time() f = pyfaidx.Fasta(fa_file.name) ti.append(time.time() - t) t = time.time() read_dict(f, headers) tf.append(time.time() - t) os.remove(index) # profile memory usage and report timings tracemalloc.start() f = pyfaidx.Fasta(fa_file.name) read_dict(f, headers) os.remove(index) print(tracemalloc.get_traced_memory()) print(mean(ti)) print(mean(tf)/nreads/10*1000*1000) tracemalloc.stop()
def show(filename=None): global before_objects gc.collect() after_objects = gc.get_objects() frame = sys._getframe() globals_ = globals() num_leaks = 0 before_id_set = set(map(id, before_objects)) if filename is not None: f = open(filename, 'w') else: f = sys.stderr for obj in after_objects: if id(obj) not in before_id_set: if obj in (before_objects, frame): continue num_leaks += 1 print("Leak: id=0x{:x} type={} {}".format(id(obj), type(obj), _get_detail(obj)), file=f) tb = tracemalloc.get_object_traceback(obj) if tb is None: print("Traceback: None", file=f) else: print("Traceback:", file=f) print("\n".join(tb.format(MAX_FRAMES)), file=f) print(file=f) print("Referrers:", file=f) for ref in gc.get_referrers(obj): if ref in (after_objects, before_objects, frame, globals_): continue print(" id=0x{:x} type={} {}".format(id(ref), type(ref), _get_detail(ref)), file=f) print(" traceback: {}".format(tracemalloc.get_object_traceback(ref)), file=f) print(file=f) print(file=f) print("Total leaks: {}".format(num_leaks), file=f) if filename is not None: f.close() tracemalloc.stop() gc.enable()
def pyfaidx_bgzf_faidx(n): print('timings for pyfaidx.Faidx with bgzf compression') ti = [] tf = [] for _ in range(n): t = time.time() f = pyfaidx.Faidx(fa_file.name + '.gz') ti.append(time.time() - t) t = time.time() read_faidx(f, headers) tf.append(time.time() - t) os.remove(index) # profile memory usage and report timings tracemalloc.start() f = pyfaidx.Faidx(fa_file.name + '.gz') read_faidx(f, headers) os.remove(index) print(tracemalloc.get_traced_memory()) print(mean(ti)) print(mean(tf)/nreads/10*1000*1000) tracemalloc.stop()
def bench(func, trace=True, nframe=1): if trace: tracemalloc.stop() tracemalloc.start(nframe) gc.collect() best = None for run in range(BENCH_RUNS): start = time.monotonic() func() dt = time.monotonic() - start if best is not None: best = min(best, dt) else: best = dt if trace: mem = tracemalloc.get_tracemalloc_memory() ntrace = len(tracemalloc.take_snapshot().traces) tracemalloc.stop() else: mem = ntrace = None gc.collect() return best * 1e3, mem, ntrace
def fastahack_fetch(n): print('timings for fastahack.FastaHack') ti = [] tf = [] for _ in range(n): t = time.time() f = fastahack.FastaHack(fa_file.name) ti.append(time.time() - t) t = time.time() read_fastahack(f, headers) tf.append(time.time() - t) os.remove(index) # profile memory usage and report timings tracemalloc.start() f = fastahack.FastaHack(fa_file.name) read_fastahack(f, headers) os.remove(index) print(tracemalloc.get_traced_memory()) print(mean(ti)) print(mean(tf)/nreads/10*1000*1000) tracemalloc.stop()
def seqio_read(n): print('timings for Bio.SeqIO') ti = [] tf = [] for _ in range(n): t = time.time() fh = open(fa_file.name) f = SeqIO.to_dict(SeqIO.parse(fh, "fasta")) ti.append(time.time() - t) t = time.time() read_dict(f, headers) tf.append(time.time() - t) fh.close() # profile memory usage and report timings tracemalloc.start() fh = open(fa_file.name) f = SeqIO.to_dict(SeqIO.parse(fh, "fasta")) read_dict(f, headers) fh.close() print(tracemalloc.get_traced_memory()) print(mean(ti)) print(mean(tf)/nreads/100*1000*1000) tracemalloc.stop()
def pyfasta_fseek(n): print('timings for pyfasta.Fasta (fseek)') ti = [] tf = [] for _ in range(n): t = time.time() f = pyfasta.Fasta(fa_file.name, record_class=pyfasta.FastaRecord) ti.append(time.time() - t) t = time.time() read_dict(f, headers) tf.append(time.time() - t) os.remove(fa_file.name + '.flat') os.remove(fa_file.name + '.gdx') # profile memory usage and report timings tracemalloc.start() f = pyfasta.Fasta(fa_file.name, record_class=pyfasta.FastaRecord) read_dict(f, headers) os.remove(fa_file.name + '.flat') os.remove(fa_file.name + '.gdx') print(tracemalloc.get_traced_memory()) print(mean(ti)) print(mean(tf)/nreads/10*1000*1000) tracemalloc.stop()
def test_set_traceback_limit(self): obj_size = 10 tracemalloc.stop() self.assertRaises(ValueError, tracemalloc.start, -1) tracemalloc.stop() tracemalloc.start(10) obj2, obj2_traceback = allocate_bytes(obj_size) traceback = tracemalloc.get_object_traceback(obj2) self.assertEqual(len(traceback), 10) self.assertEqual(traceback, obj2_traceback) tracemalloc.stop() tracemalloc.start(1) obj, obj_traceback = allocate_bytes(obj_size) traceback = tracemalloc.get_object_traceback(obj) self.assertEqual(len(traceback), 1) self.assertEqual(traceback, obj_traceback)
def __del__(self): tracemalloc.stop()
async def app_after_serving(): tracemalloc.stop()
def search_summary(selected_search, puzzle, round_times): print('') print(f'***** {selected_search.__name__} *****') print('Search Algorithm in Progress ...\n') export_text = f'\n***** {selected_search.__name__} *****\n' start_time = time.time() # start timer tracemalloc.start() # start memory tracking initial_mem, _ = tracemalloc.get_traced_memory() actions = selected_search(puzzle) # perform search algorithm current_mem, peak_mem = tracemalloc.get_traced_memory() tracemalloc.stop() # stop memory tracking end_time = time.time() # end timer time_taken = end_time - start_time time_taken = round(time_taken, 2) # Convert to MiB initial_mem = round(initial_mem / (1024 ** 2), 6) current_mem = round(current_mem / (1024 ** 2), 6) peak_mem = round(peak_mem / (1024 ** 2), 6) print('Done.\n') if actions is not None: if len(actions) < 30: print(f'Path -------------------------------> {actions}') export_text += f'Path -------------------------------> {actions}\n' else: print( f'Path -------------------------------> {actions[0:29]} ...too long, omit rest actions') export_text += f'Path -------------------------------> {actions[0:29]} ...too long, omit rest actions\n' print( f'Path length --------------------------------> {get_cost_of_actions(actions)} actions') export_text += f'Path length --------------------------------> {get_cost_of_actions(actions)} actions\n' print( f'Time taken in seconds ----------------------> {time_taken} seconds') export_text += f'Time taken in seconds ----------------------> {time_taken} seconds\n' print( f'Expanded nodes -----------------------------> {puzzle.expanded_nodes} nodes') export_text += f'Expanded nodes -----------------------------> {puzzle.expanded_nodes} nodes\n' print( f'Initial memory is --------------------------> {initial_mem} MiB') export_text += f'Initial memory is --------------------------> {initial_mem} MiB\n' print( f'Current memory is --------------------------> {current_mem} MiB') export_text += f'Current memory is --------------------------> {current_mem} MiB\n' print( f'Peak memory is -----------------------------> {peak_mem} MiB (=Memory Usage)') export_text += f'Peak memory is -----------------------------> {peak_mem} MiB (=Memory Usage)\n' print('Preparing Solution for Animation ...\n') solution=puzzle.get_solution_as_list_of_states(actions) print('Done.\n') # Export the collected data to .txt file for better user reading fileName = f'algo_summary_report{round_times}.txt' export_file = open('result/'+fileName, 'a') export_file.write(export_text) export_file.close() return solution
def run_evodevo(nfactors=5, Ndown=3, warp=False, save=True, warping_ref="Mouse", sample_seed=4891, seed=2020, species=["Mouse", "Rabbit", "Rat", "Human", "Opossum"], views=["Brain", "Cerebellum", "Heart", "Liver", "Testis"], model_groups=True, nm=None, tissue_as_sample=False): if tissue_as_sample: assert not warp, "Need to adapt warping reference if tissues are treated as groups" # specify data directory of normalized gene expression data if species == ["Mouse", "Rabbit", "Rat"] and not warp: nmtmp = "MRRab" datadir = "data/input_data/MRRab_matched/" elif warp: nmtmp = "warping" datadir = "data/input_data/all_unmatched/" else: print("Matched inputs are only provided for [Mouse, Rabbit, Rat]") sys.exit() # set filenames for output if nm is not None: nm = nm else: nm = nmtmp # load data and covariate data = [] times = [] samples_names = [] if tissue_as_sample: group_names = [] data_view = [] for m in views: for g in species: df = pd.read_csv(datadir + "view_" + m + "_group_" + g + ".csv", header=0, index_col=0) data_view.append(np.asarray(df).transpose()) times.append( np.asarray( pd.read_csv(datadir + "times_group_" + g + ".csv", header=0, index_col=0)).transpose()) samples_names.append(df.columns) group_names.append(m + "-" + g) data = [data_view] features_names = [df.index] else: for m in views: data_view = [] for g in species: data_view.append( np.asarray( pd.read_csv(datadir + "view_" + m + "_group_" + g + ".csv", header=0, index_col=0)).transpose()) if m == "Brain": # only needed once times.append( np.asarray( pd.read_csv(datadir + "times_group_" + g + ".csv", header=0, index_col=0)).transpose()) data.append(data_view) # convert warping ref to numeric warping_ref = np.where( [species[i] == warping_ref for i in range(len(species))])[0][0] # mask values at random if Ndown > 0: np.random.seed(sample_seed) if tissue_as_sample: for i in range(len(data[0])): Ng = data[0][i].shape[0] masked_samples = np.random.choice(Ng, Ndown, replace=False) data[0][i][masked_samples, :] = np.nan else: for m in range(len(views)): for g in range(len(species)): Ng = data[m][g].shape[0] masked_samples = np.random.choice(Ng, Ndown, replace=False) data[m][g][masked_samples, :] = np.nan # check dimension and name views and groups if tissue_as_sample: assert len(data) == 1, "problem in loading data, wrong number of views" assert len(data[0]) == len(species) * len( views), "problem in loading data, wrong number of groups" view_names = ["mRNA"] else: assert len(data) == len( views), "problem in loading data, wrong number of views" assert len(data[0]) == len( species), "problem in loading data, wrong number of groups" view_names = views group_names = species # prepare MOFA model with time as covariate ent = entry_point() ent.set_data_options() ent.set_data_matrix(data, groups_names=group_names, views_names=view_names) ent.set_model_options(factors=nfactors) ent.set_train_options(seed=seed, convergence_mode="medium") ent.set_covariates(times, covariates_names="time") ent.set_smooth_options(warping=warp, warping_ref=warping_ref, model_groups=model_groups) # Build and run the model tracemalloc.start() ent.build() t0 = time.time() ent.run() t1 = time.time() total = t1 - t0 current, peak = tracemalloc.get_traced_memory() tracemalloc.stop() # save model if save: if Ndown == 0: if model_groups: outfile = "out/evodevo_groups_%s-seed_%s.hdf5" % (nm, seed) else: outfile = "out/evodevo_%s-seed_%s.hdf5" % (nm, seed) # interpolate for missing time points ent.predict_factor( new_covariates=ent.model.nodes["Sigma"].covariates) else: if model_groups: outfile = "out/evodevo_groups_%s-N%s-sample_seed_%s.hdf5" % ( nm, Ndown, sample_seed) else: outfile = "out/evodevo_%s-N%s-sample_seed_%s.hdf5" % ( nm, Ndown, sample_seed) ent.save(outfile) # write output to csv results = { 'time': total, 'mem_usage': peak, 'n_down': Ndown, 'sample_seed': sample_seed, 'seed': seed } df = pd.DataFrame.from_dict(data=results, orient='index').T if model_groups: stats_file = 'out/evodevo_groups_%s_stats.csv' % nm else: stats_file = 'out/evodevo_%s_stats.csv' % nm if os.path.exists(stats_file): df.to_csv(stats_file, mode='a', header=False) else: df.to_csv(stats_file, header=True)
def end(self): _, peak = tracemalloc.get_traced_memory() tracemalloc.stop() self._result_list.append(peak)
def search_algo(n, maze, start, end): tracemalloc.start() start_time = time.time() current, peak = tracemalloc.get_traced_memory() print(f"Current memory usage before search {current / 10**6}MB") from queue import PriorityQueue pos = start delay = 0.0 grid, rect, screen, wid = make_screen(n) queue = PriorityQueue() # VCS priority queue stores (cost,position) queue.put((0, 0)) row = 0 col = 0 maze[row][col] = -1 total_cost = 0 moves = [] parent = [-1] * (n * n) # stores parent of every node search_cost = 0 costs = [10**5] * (n * n) #stores current best cost path till that node while pos != end: curr_elem = queue.get( ) # get top element from priority queue takes O(Size) Time curr_cost = curr_elem[0] pos = curr_elem[1] costs[pos] = min(curr_cost, costs[pos]) row = pos // n col = pos % n maze[row][col] = -1 expanded = True # expanding current node if (col + 1 < n) and (maze[row][col + 1] not in [1]): # adding to priority queue if cost is less than current path cost to that node if costs[row * n + col + 1] > curr_cost + 2: costs[row * n + col + 1] = curr_cost + 2 queue.put((curr_cost + 2, row * n + col + 1)) parent[row * n + col + 1] = pos expanded = True if row * n + col + 1 == end: pos = end total_cost = curr_cost + 2 if (row + 1 < n) and (maze[row + 1][col] not in [1]): if costs[(row + 1) * n + col] > curr_cost + 3: costs[(row + 1) * n + col] = curr_cost + 3 queue.put((curr_cost + 3, (row + 1) * n + col)) parent[(row + 1) * n + col] = pos expanded = True if (row + 1) * n + col == end: pos = end total_cost = curr_cost + 3 if (col - 1 >= 0) and (maze[row][col - 1] not in [1]): if costs[row * n + col - 1] > curr_cost + 2: costs[row * n + col - 1] = curr_cost + 2 queue.put((curr_cost + 2, row * n + col - 1)) parent[row * n + col - 1] = pos expanded = True if row * n + col - 1 == end: pos = end total_cost = curr_cost + 2 if (row - 1 >= 0) and (maze[row - 1][col] not in [1]): if costs[(row - 1) * n + col] > curr_cost + 2: costs[(row - 1) * n + col] = curr_cost + 2 queue.put((curr_cost + 2, (row - 1) * n + col)) parent[(row - 1) * n + col] = pos expanded = True if (row - 1) * n + col == end: pos = end total_cost = curr_cost + 2 redraw_maze(grid, rect, screen, n, maze, pos, delay, wid, end) if expanded: search_cost += 2 if parent[pos] == pos - n: search_cost += 1 curr_node = end # printing the path from start to end while parent[curr_node] != -1: maze[curr_node // n][curr_node % n] = 2 if parent[curr_node] == curr_node - 1: moves.append("Right") elif parent[curr_node] == curr_node + 1: moves.append("Left") elif parent[curr_node] == curr_node + n: moves.append("Up") elif parent[curr_node] == curr_node - n: moves.append("Down") curr_node = parent[curr_node] moves = moves[::-1] maze[0][0] = 2 redraw_maze(grid, rect, screen, n, maze, pos, delay, wid, end) end_time = time.time() current, peak = tracemalloc.get_traced_memory() print("Total Search Time : {} seconds".format(end_time - start_time)) print(f"Peak Memory usage was was {peak / 10**6}MB") print(f"Total Expanding Search Cost is {search_cost} Units") print(f"Best Path Total Cost is {total_cost} Units") tracemalloc.stop() print(moves) popup_win(str(total_cost), "Score", "./final.png", screen)
def trace_stop(self): tracemalloc.stop() return "<p>tracemalloc stopped</p>"
items = [] for l in read_data: i, b, w = l.split(' ') items.append(Item(i, b, w)) print("Breadth first search\n") t_start = time.time() for i in range(1000): tracemalloc.start() solution = search(items, 420, "BFS") current, peak = tracemalloc.get_traced_memory() print( f"Current memory usage is {current / 10 ** 6}MB; Peak was {peak / 10 ** 6}MB" ) tracemalloc.stop() t_end = time.time() print("\nHighest benefit solution:") # solution.print(items) print("Elapsed time: {}\n".format((t_end - t_start) / 1000)) print("Depth first search:") t_start = time.time() for i in range(1000): tracemalloc.start() solution = search(items, 420, "DFS") current, peak = tracemalloc.get_traced_memory() print( f"Current memory usage is {current / 10 ** 6}MB; Peak was {peak / 10 ** 6}MB" ) tracemalloc.stop()
def run_grid(nfactors = 2, G = 1, N = 20, Dm = 100, M = 4, noise_level = 1, missing = 0.1, missing_all = 0.1, seed = 1234567, method = "MEFISTO", note = "none", lscales = [0.2, 0.1], scales = [1, 0.6], n_factors_sim = None, save = False, max_iter = 1000): nfactors = int(nfactors) if n_factors_sim is None: n_factors_sim = nfactors assert len(lscales) == n_factors_sim assert len(scales) == n_factors_sim groupsidx = np.repeat(range(G), N) # simulate data np.random.seed(seed) views_nms = [str(m) for m in range(M)] sim = simmofa.simulate_data(N=N, seed=seed, views=views_nms, D=[Dm] * M, K=n_factors_sim, G=G, lscales=lscales, noise_level=noise_level, scales = scales, shared = True) # keep unmasked data in data_full and center as done in MOFA (per group) data_full = copy.deepcopy(sim['data']) for g in range(G): for m in range(M): data_full[m][g]-= np.nanmean(data_full[m][g], axis=0) # mask parts of the data sim['data'] = simmofa.mask_samples(sim, perc = missing, perc_all_views = missing_all) data_masked = copy.deepcopy(sim['data']) # fit models tracemalloc.start() t0 = time.time() if method != "univGPs": if method == "MEFISTO": GP_factors = True else: GP_factors = False predictions, ent = fit_MOFA(data = sim['data'], times = sim['sample_cov'], nfactors = nfactors, seed = 2020, GP_factors = GP_factors, warping = False, warping_ref = 0, model_groups = True, iter = max_iter) else: predictions, model, likelihood = fit_GP(data = sim['data'], times = sim['sample_cov'], iter = max_iter) t1 = time.time() total_time = t1 - t0 current, peak = tracemalloc.get_traced_memory() tracemalloc.stop() # evaluate interpolation MSE mse, mse_mean, n_missing = calc_mse(data_masked = data_masked, data_full = data_full, predictions = predictions) # save results outdir = 'out/' if not os.path.exists(outdir): os.mkdir(outdir) # save summary statistics if not the model itself is saved if not save: results = {'time': total_time, 'method': method, 'N': N, 'G': G, 'M': M, 'Dm': Dm, 'noise_level': noise_level, 'missing': missing, 'missing_all': missing_all, 'seed': seed, 'date': date.today(), 'note': note, 'mem_usage': peak, 'scales' : scales, 'lscales' :lscales, 'n_factors': nfactors, 'n_factors_sim' : n_factors_sim, 'mse': mse, 'mse_mean': mse_mean, 'n_missing': n_missing} df = pd.DataFrame.from_dict(data=results, orient='index').T for nm in ['scales', 'lscales']: # expand multi-factor columns dfsplit = df[nm].apply(pd.Series) dfsplit = dfsplit.rename(columns=lambda x: nm + "_" + str(x)) df = pd.concat([df, dfsplit], axis=1) df = df.drop(columns=[nm]) filenm = 'interpolation_results_%s.csv' % method if os.path.exists(outdir + filenm): df.to_csv(outdir + filenm, mode='a', header=False) else: df.to_csv(outdir + filenm, header=True) # save model + predictions else: if method != "univGPs": ent.save(outdir + "grid_model.hdf5") save_predictions(predictions = predictions, data_masked = data_masked, times = sim['sample_cov'], method = method, outdir= outdir) save_predictions(predictions = [np.vstack(data_full[m]) for m in range(M)], data_masked = data_masked, times = sim['sample_cov'], method = "ground_truth", outdir = outdir)
def test_reproject_3D_memory(): pytest.importorskip('reproject') tracemalloc.start() snap1 = tracemalloc.take_snapshot() # create a 64 MB cube cube, _ = utilities.generate_gaussian_cube(shape=[200, 200, 200]) sz = _.dtype.itemsize # check that cube is loaded into memory snap2 = tracemalloc.take_snapshot() diff = snap2.compare_to(snap1, 'lineno') diffvals = np.array([dd.size_diff for dd in diff]) # at this point, the generated cube should still exist in memory assert diffvals.max() * u.B >= 200**3 * sz * u.B wcs_in = cube.wcs wcs_out = wcs_in.deepcopy() wcs_out.wcs.ctype = ['GLON-SIN', 'GLAT-SIN', cube.wcs.wcs.ctype[2]] wcs_out.wcs.crval = [0.001, 0.001, cube.wcs.wcs.crval[2]] wcs_out.wcs.crpix = [2., 2., cube.wcs.wcs.crpix[2]] header_out = (wcs_out.to_header()) header_out['NAXIS'] = 3 header_out['NAXIS1'] = int(cube.shape[2] / 2) header_out['NAXIS2'] = int(cube.shape[1] / 2) header_out['NAXIS3'] = cube.shape[0] # First the unfilled reprojection test: new memory is allocated for # `result`, but nowhere else result = cube.reproject(header_out, filled=False) snap3 = tracemalloc.take_snapshot() diff = snap3.compare_to(snap2, 'lineno') diffvals = np.array([dd.size_diff for dd in diff]) # result should have the same size as the input data, except smaller in two dims # make sure that's all that's allocated assert diffvals.max() * u.B >= 200 * 100**2 * sz * u.B assert diffvals.max() * u.B < 200 * 110**2 * sz * u.B # without masking the cube, nothing should change result = cube.reproject(header_out, filled=True) snap4 = tracemalloc.take_snapshot() diff = snap4.compare_to(snap3, 'lineno') diffvals = np.array([dd.size_diff for dd in diff]) assert diffvals.max() * u.B <= 1 * u.MB assert result.wcs.wcs.crval[0] == 0.001 assert result.wcs.wcs.crpix[0] == 2. # masking the cube will force the fill to create a new in-memory copy mcube = cube.with_mask(cube > 0.1 * cube.unit) # `_is_huge` would trigger a use_memmap assert not mcube._is_huge assert mcube.mask.any() # take a new snapshot because we're not testing the mask creation snap5 = tracemalloc.take_snapshot() tracemalloc.stop() tracemalloc.start() # stop/start so we can check peak mem use from here current_b4, peak_b4 = tracemalloc.get_traced_memory() result = mcube.reproject(header_out, filled=True) current_aftr, peak_aftr = tracemalloc.get_traced_memory() snap6 = tracemalloc.take_snapshot() diff = snap6.compare_to(snap5, 'lineno') diffvals = np.array([dd.size_diff for dd in diff]) # a duplicate of the cube should have been created by filling masked vals # (this should be near-exact since 'result' should occupy exactly the # same amount of memory) assert diffvals.max() * u.B <= 1 * u.MB #>= 200**3*sz*u.B # the peak memory usage *during* reprojection will have that duplicate, # but the memory gets cleaned up afterward assert (peak_aftr - peak_b4) * u.B >= (200**3 * sz * u.B + 200 * 100**2 * sz * u.B) assert result.wcs.wcs.crval[0] == 0.001 assert result.wcs.wcs.crpix[0] == 2.
tracemalloc.start() # Start profiling memory use three_letter_combos_list = [ letter_1 + letter_2 + letter_3 for letter_1 in LETTERS for letter_2 in LETTERS for letter_3 in LETTERS ] # TODO: Update this definition three_letter_combos_generator = None # DON'T EDIT THE CODE BELOW THIS LINE # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # This code looks at current memory usage, and displays the results memory_snapshot = tracemalloc.take_snapshot() display_malloc_snapshot(memory_snapshot) tracemalloc.stop() # Stop profiling memory use print("---------------------------------") print("The first three letter combos are:") print(next(three_letter_combos_generator)) print(next(three_letter_combos_generator)) print(next(three_letter_combos_generator)) print("---------------------------------") print() print("Generator expression contains", len(list(three_letter_combos_generator)), "items") print("List comprehension contains", len(three_letter_combos_list), "items")
def run_grid(nfactors=5, Tmissing=5, GP_factors=True, note="", masking_seed=1234, Nviews=1, seed=2020, model_groups=True, method="FA", max_iter=1000, species=["Mouse", "Rabbit", "Rat", "Human", "Opossum"], views=["Brain", "Cerebellum", "Heart", "Liver", "Testis"], nm="all", warping=True, frac_features=1, n_genes=1000): M = len(views) G = len(species) # specify data directory of normalized gene expression data datadir = "data/input_data/all_unmatched/" # set and check number of views to mask a time point in if Nviews == "all": Nviews = len(views) if Nviews > len(views): print( "Nviews is larger than available number of views, setting to all views." ) Nviews = min(Nviews, len(views)) # load data data = [] times = [] for m in views: data_view = [] for g in species: dd_m_g = np.asarray( pd.read_csv(datadir + "view_" + m + "_group_" + g + ".csv", header=0, index_col=0)).transpose() data_view.append(dd_m_g) if m == views[0]: # only needed once times.append( np.asarray( pd.read_csv(datadir + "times_group_" + g + ".csv", header=0, index_col=0)).transpose()) data.append(data_view) if n_genes != "all": np.random.seed(masking_seed + 2020) genes2keep = np.random.choice( range(data[0][0].shape[1]), n_genes, replace=False) # keep a subset of genes in all species and organs for m in range(M): for g in range(G): data[m][g] = data[m][g][:, genes2keep] # check dimension assert len(data) == M, "problem in loading data, wrong number of groups" assert len(data[0]) == G, "problem in loading data, wrong number of views" # keep unmasked data in data_full and center as done in MOFA (per group) data_full = copy.deepcopy(data) for g in range(len(species)): for m in range(len(views)): data_full[m][g] -= np.nanmean(data_full[m][g], axis=0) # mask values (draw timepoint - species combinations and views at random) times_spec = np.vstack([ np.repeat(range(len(species)), [len(times[g]) for g in range(len(species))]), np.concatenate([np.arange(len(times[g])) for g in range(len(species))]) ]) np.random.seed(masking_seed) times_spec2mask = np.random.choice(range(times_spec.shape[1]), Tmissing, replace=False) if frac_features < 1: D = data[0][0].shape[1] genes2mask = np.random.choice(range(D), int(frac_features * D), replace=False) for ts in times_spec2mask: g2mask = times_spec[0, ts] t2mask = times_spec[1, ts] views2mask = np.random.choice(range(len(views)), Nviews, replace=False) for m in views2mask: if frac_features < 1: data[m][g2mask][t2mask, genes2mask] = np.nan else: data[m][g2mask][t2mask, :] = np.nan data_masked = copy.deepcopy(data) # warping reference as in full training warping_ref = "Mouse" warping_ref = np.where( [species[i] == warping_ref for i in range(len(species))])[0][0] tracemalloc.start() t0 = time.time() if method != "univGPs": predictions, ent = fit_MOFA(data=data, times=times, nfactors=nfactors, seed=seed, GP_factors=GP_factors, warping=warping, warping_ref=warping_ref, model_groups=model_groups, iter=max_iter) else: predictions, model, likelihood = fit_GP(data, times, iter=max_iter) t1 = time.time() total_time = t1 - t0 current, peak = tracemalloc.get_traced_memory() tracemalloc.stop() # evaluate interpolation MSE (on missing values that hava a ground truth) mse, mse_mean, n_missing = calc_mse(data_masked, data_full, predictions) # write output to csv results = { 'mse': mse, 'mse_mean': mse_mean, 'time': total_time, 'GP_factors': GP_factors, 'n_missing': n_missing, 'Tmissing': Tmissing, 'masking_seed': masking_seed, 'date': date.today(), 'note': note, 'mem_usage': peak, 'Nviews': Nviews, 'method': method, 'n_genes': n_genes, 'frac_features': frac_features } df = pd.DataFrame.from_dict(data=results, orient='index').T filenm = 'out/interpol_results_evodevo_%s_%s.csv' % (nm, method) if os.path.exists(filenm): df.to_csv(filenm, mode='a', header=False) else: df.to_csv(filenm, header=True)
def _stop_tracemalloc(self): tracemalloc.stop() self.running = False
def peak_monitor_stop(self): tracemalloc.stop() self.peak_monitoring = False
def stop_tracemalloc(self): tracemalloc.stop()
def activated_tracemalloc(): tracemalloc.start() try: yield finally: tracemalloc.stop()
def loadData(analyzer, contexto, user, sentiment): """ Carga los datos de los archivos CSV en el modelo """ tracemalloc.start() start_time = getTime() start_memory = getMemory() #sentimen = cf.data_dir + sentiment #input_file3 = csv.DictReader(open(sentimen, encoding="utf-8"),delimiter=",") contexto = cf.data_dir + contexto input_file = csv.DictReader(open(contexto, encoding="utf-8"), delimiter=",") #user= cf.data_dir + user #input_file2 = csv.DictReader(open(user, encoding="utf-8"),delimiter=",") #x=0 #lst=lt.newList() #sss=lt.newList() # for senti in input_file3: # lt.addLast(sss,senti) #itet=it.newIterator(sss) #x=0 #lst=lt.newList() #for hashtag in input_file2: # lt.addLast(lst,hashtag) #new=it.newIterator(lst) #x+=1 #if x==5: #break#Se usaba para mirar que cantidad de datos mirar #i=0"instrumentalness","liveness","speechiness","danceability","valence","loudness","tempo","acousticness", # "energy","mode","key","artist_id","tweet_lang","track_id","created_at","lang","time_zone","user_id","id" x = 0 for track in input_file: #No es lo mismo comparar str que con numeros track['tempo'] = float( track['tempo'] ) #Se habia hecho primero un orden de str(eterolexico?(que se comparan str en om.values())) Es necesario tenerlos en int track['energy'] = float(track['energy']) track['liveness'] = float(track['liveness']) track['instrumentalness'] = float(track['instrumentalness']) hora = track['created_at'][11:19] t = datetime.time.fromisoformat( hora ) #Fromisoformat crea un formato iso del datetime() Formato de libreria para poder hacer operaciones (su inversa es isoFormat (ponerlo en str)) track['horas'] = t track['created_at'] = str(track['created_at']) track['speechiness'] = float(track['speechiness']) track['danceability'] = float(track['danceability']) track['valence'] = float(track['valence']) track['user_id'] = int(track['user_id']) track['loudness'] = float(track['loudness']) track['acousticness'] = float(track['acousticness']) track['energy'] = float(track['energy']) track['mode'] = float(track['mode']) track['key'] = float(track['key']) track['id'] = int(track['id']) #track['hashtag']="" #track['hashtag']=lt.getElement(lst,x+1) #if track['hashtag']==None: #track['hashtag']="" #for x in model.addTrack(analyzer, track) x += 1 # if x==1000: #Se usaba para mirar que cantidad de datos mirar # break #i+=1 stop_memory = getMemory() stop_time = getTime() tracemalloc.stop() delta_time = stop_time - start_time delta_memory = deltaMemory(start_memory, stop_memory) return analyzer, delta_time, delta_memory
def stop(self): if tracemalloc is not None: tracemalloc.stop() super(MemoryCollector, self).stop()
import tracemalloc as t print("*start") print([t._format_size(x, False) for x in t.get_traced_memory()]) t.start() L = [[_ for _ in range(10000)] for i in range(100)] print("*gen") print([t._format_size(x, False) for x in t.get_traced_memory()]) snapshot = t.take_snapshot() for stats in snapshot.statistics("traceback")[:3]: print(stats) print("----------------------------------------") snapshot = t.take_snapshot() for stats in snapshot.statistics("lineno", cumulative=True)[:3]: print(stats) t.stop() print([t._format_size(x, False) for x in t.get_traced_memory()])
async def app_after_serving(): tracemalloc.stop() await app.db.disconnect()
def test_reproject_3D_memory(): pytest.importorskip('reproject') tracemalloc.start() snap1 = tracemalloc.take_snapshot() # create a 64 MB cube cube,_ = utilities.generate_gaussian_cube(shape=[200,200,200]) sz = _.dtype.itemsize # check that cube is loaded into memory snap2 = tracemalloc.take_snapshot() diff = snap2.compare_to(snap1, 'lineno') diffvals = np.array([dd.size_diff for dd in diff]) # at this point, the generated cube should still exist in memory assert diffvals.max()*u.B >= 200**3*sz*u.B wcs_in = cube.wcs wcs_out = wcs_in.deepcopy() wcs_out.wcs.ctype = ['GLON-SIN', 'GLAT-SIN', cube.wcs.wcs.ctype[2]] wcs_out.wcs.crval = [0.001, 0.001, cube.wcs.wcs.crval[2]] wcs_out.wcs.crpix = [2., 2., cube.wcs.wcs.crpix[2]] header_out = (wcs_out.to_header()) header_out['NAXIS'] = 3 header_out['NAXIS1'] = int(cube.shape[2]/2) header_out['NAXIS2'] = int(cube.shape[1]/2) header_out['NAXIS3'] = cube.shape[0] # First the unfilled reprojection test: new memory is allocated for # `result`, but nowhere else result = cube.reproject(header_out, filled=False) snap3 = tracemalloc.take_snapshot() diff = snap3.compare_to(snap2, 'lineno') diffvals = np.array([dd.size_diff for dd in diff]) # result should have the same size as the input data, except smaller in two dims # make sure that's all that's allocated assert diffvals.max()*u.B >= 200*100**2*sz*u.B assert diffvals.max()*u.B < 200*110**2*sz*u.B # without masking the cube, nothing should change result = cube.reproject(header_out, filled=True) snap4 = tracemalloc.take_snapshot() diff = snap4.compare_to(snap3, 'lineno') diffvals = np.array([dd.size_diff for dd in diff]) assert diffvals.max()*u.B <= 1*u.MB assert result.wcs.wcs.crval[0] == 0.001 assert result.wcs.wcs.crpix[0] == 2. # masking the cube will force the fill to create a new in-memory copy mcube = cube.with_mask(cube > 0.1*cube.unit) # `_is_huge` would trigger a use_memmap assert not mcube._is_huge assert mcube.mask.any() # take a new snapshot because we're not testing the mask creation snap5 = tracemalloc.take_snapshot() tracemalloc.stop() tracemalloc.start() # stop/start so we can check peak mem use from here current_b4, peak_b4 = tracemalloc.get_traced_memory() result = mcube.reproject(header_out, filled=True) current_aftr, peak_aftr = tracemalloc.get_traced_memory() snap6 = tracemalloc.take_snapshot() diff = snap6.compare_to(snap5, 'lineno') diffvals = np.array([dd.size_diff for dd in diff]) # a duplicate of the cube should have been created by filling masked vals # (this should be near-exact since 'result' should occupy exactly the # same amount of memory) assert diffvals.max()*u.B <= 1*u.MB #>= 200**3*sz*u.B # the peak memory usage *during* reprojection will have that duplicate, # but the memory gets cleaned up afterward assert (peak_aftr-peak_b4)*u.B >= (200**3*sz*u.B + 200*100**2*sz*u.B) assert result.wcs.wcs.crval[0] == 0.001 assert result.wcs.wcs.crpix[0] == 2.
def on_stop_test(self) -> None: tracemalloc.stop()
def main(args): path = None if args.path is not None: path = args.path if not os.path.isdir(path): os.mkdir(path) file_name = args.file_name alignment1, alignment2 = None, None if args.alignment1 is not None and args.alignment2 is not None: alignment1 = args.alignment1 alignment2 = args.alignment2 if os.path.isfile(alignment1): alignment = '' with open(alignment1, 'r') as f: lines = f.readlines() for line in lines: alignment += ''.join(c for c in line if c.isalpha()) alignment1 = alignment if os.path.isfile(alignment2): alignment = '' with open(alignment2, 'r') as f: lines = f.readlines() for line in lines: alignment += ''.join(c for c in line if c.isalpha()) alignment2 = alignment else: print("Missing one or more sequences to align with!") exit(0) delta = None score = None keys = None if args.delta is not None: delta, keys = read_delta(args.delta) else: if args.match is not None and args.mismatch is not None and args.gap is not None: score = { 'match': args.match, 'mismatch': args.mismatch, 'gap': args.gap } if args.keys is not None: keys = args.keys.split(',') if '-' not in keys: keys.append('-') else: print("Symbols will be used by the sequences are missing!") exit(0) hs = NeedlemanWunsch(score, keys, delta) tracemalloc.start() start_time = time.time() score, alignments = hs.align(alignment1, alignment2) current, peak = tracemalloc.get_traced_memory() end_time = time.time() print( f"Current memory usage is {current / 10 ** 6}MB; Peak was {peak / 10 ** 6}MB" ) tracemalloc.stop() elapsed_time = end_time - start_time if path is None: print("Best Alignment Score:", score) print("Sequence 1: ", alignments[0]) print("Sequence 2: ", alignments[1]) print("Alignment is done in %.4f seconds!" % elapsed_time) else: with open(os.path.join(path, file_name), 'w+') as f: f.write("Best Alignment Score: %s \n" % str(score)) f.write("Sequence 1: %s \n" % alignments[0]) f.write("Sequence 2: %s \n" % alignments[1]) print("Alignment is done in %.4f seconds!" % elapsed_time) print("Result saved at %s" % (path + file_name))
def checkrun( raw_datapath, tform_config_path, classifier_config_path, # Optional for Data iterator_mode='arrays', plot_format="RGBrgb", print_out=True, skiprows=0, # Optional for Classifier scale=1.0, classifier='tuner', split=0.1, target_size=(80, 80), batch_size=32, classes=['class_1', 'class_2'], project_name='tuner_run', k_fold=False, k=None, color_mode='rgb', seed=None): ''' Check run function that when executed runs all the transformations over 1% of the data and returns the time and memory required for the complete preprocessing of data step. Parameters ---------- raw_datapath : str path to the raw .csv files containing the data to classify tform_config_path : str string containing the path to the yaml file containing the transformations to use and which data in the .csv file to perform it on classifier_config_path : str string containing the path to the yaml file representing the classifier hyperparameters iterator_mode : str option to use images from arrays directly or save the .png and use a directory iterator mode plot_format : str option for standard or RGB color gradient print_out : bool option for printing out feedback on conputational time taken to initialize the data and generate the images num_test_files_class : int or float numebr of files per class to select for the test set classifier : str option cnn or tuner scale : float percentage fo the image to reduce its size to. split : float the percentage of the learning set to use for the validation step target_size : tuple image target size. Presented as a tuble indicating number of pixels composing the two dimensions of the image (w x h) batch_size : int The number of files to group up into a batch classes : list A list containing strings of the classes the data is divided in. The class name represent the folder name the files are contained in. project_name : str name of the folder to be created for storing the results of the tuning k_fold: bool bool value indicating if the k_fold is to be performed. Not valid for tuner k: int integer value indicating how many k folds needs to be performed. seed: int used in hold_out_test_set to isolate the testing data randomly for use in training of neural network. Can be assigned value to repeat the selection. ''' # listing down the filenames file_names = [ item for item in os.listdir(raw_datapath) if item.endswith('.csv') ] # calculations for extracting 1% of data range_filename = int(len(file_names) * 0.01) # check to pass test or when the dataset is very small if range_filename == 0: range_filename += 1 range_classes = len(classes) # calculating files per class to pass to hold_out_test_set # for getting filesnames to be transferring to temporary # folder file_per_class = round(range_filename / range_classes) # check to pass test or when the dataset is very small if file_per_class == 0: file_per_class += 1 # calculating the test_set_filenames to be used for # the hold_out_test_set in the checkpoint_datacreation # function num_test_files_class = round(0.25 * file_per_class) # start measuring time here time_1 = perf_counter() # starting memory tracer tracemalloc.start() # getting filenames to collect files for checkrun file_names_for_test = preprocessing.hold_out_test_set( raw_datapath, number_of_files_per_class=file_per_class, classes=classes) # addding .csv extension to the filenames and storing it in # seperate list file_names_csv = [] for item in file_names_for_test: file_names_csv.append(item + '.csv') # making directory to store the temporary data os.mkdir(os.path.join(raw_datapath, 'temp/')) # copying the 1% data to seperate folder for item in file_names_csv: shutil.copy(os.path.join(raw_datapath, item), os.path.join(raw_datapath + 'temp/')) # creating new data path to be passed to checkpoint_ # datacreation function new_data_path = os.path.join(raw_datapath, 'temp/') checkpoint_datacreation( new_data_path, tform_config_path, classifier_config_path, # Optional for Data iterator_mode=iterator_mode, plot_format=plot_format, print_out=print_out, skiprows=skiprows, # Optional for Classifier num_test_files_class=num_test_files_class, scale=scale, classifier=classifier, split=split, target_size=target_size, batch_size=batch_size, classes=classes, project_name=project_name, k_fold=k_fold, k=k, color_mode=color_mode, seed=seed) # getting feedback from memory tracer current, peak = tracemalloc.get_traced_memory() # stopping memory tracer tracemalloc.stop() time_2 = perf_counter() time_elapsed = time_2 - time_1 print("The total time required for data creation will be approx.\ {} hours".format(round((time_elapsed * 100) / 3600, 3))) print("The total memory required for the process will be approx.\ {} Gigabytes".format(round(peak * 100 / (10**9), 3))) # removing the temporary data folder shutil.rmtree(new_data_path) print("Temporary files created by process are successfully deleted")
def test_is_tracing(self): tracemalloc.stop() self.assertFalse(tracemalloc.is_tracing()) tracemalloc.start() self.assertTrue(tracemalloc.is_tracing())
async def stop(self, exception: Exception = None) -> None: tracemalloc.stop()
def excec_program(list, args, file): list_dup = list.copy() swaps = 0 compares = 0 my_time = 0 print("-------------------") tracemalloc.start() if args[0] == "insert": if file: t1_start = time.process_time() insertion.insertion_sort_stat(list, args[1]) t1_stop = time.process_time() else: t1_start = time.process_time() insertion.insertion_sort(list, args[1]) t1_stop = time.process_time() swaps = insertion.swaps compares = insertion.compares my_time = round(t1_stop - t1_start, 8) elif args[0] == "merge": merge.reset_counters() if file: t1_start = time.process_time() merge.merge_sort_stat(list, args[1]) t1_stop = time.process_time() else: t1_start = time.process_time() merge.merge_sort(list, args[1]) t1_stop = time.process_time() swaps = merge.swaps compares = merge.compares my_time = round(t1_stop - t1_start, 8) elif args[0] == "quick": quick.reset_counters() if file: t1_start = time.process_time() quick.quick_sort_stat(list, 0, len(list) - 1, args[1]) t1_stop = time.process_time() else: t1_start = time.process_time() quick.quick_sort(list, 0, len(list) - 1, args[1]) t1_stop = time.process_time() swaps = quick.swaps compares = quick.compares my_time = round(t1_stop - t1_start, 8) elif args[0] == "dual_pivot": dual_pivot.reset_counters() if file: t1_start = time.process_time() dual_pivot.dual_sort_stat(list, 0, len(list) - 1, args[1]) t1_stop = time.process_time() else: t1_start = time.process_time() dual_pivot.dual_sort(list, 0, len(list) - 1, args[1]) t1_stop = time.process_time() swaps = dual_pivot.swaps compares = dual_pivot.compares my_time = round(t1_stop - t1_start, 8) elif args[0] == "hybrid": hybrid.reset_counters() if file: t1_start = time.process_time() hybrid.hybrid_sort(list, args[1]) t1_stop = time.process_time() else: t1_start = time.process_time() hybrid.hybrid_sort(list, args[1]) t1_stop = time.process_time() swaps = hybrid.swaps compares = hybrid.compares my_time = round(t1_stop - t1_start, 8) elif args[0] == "radix": radix.reset_counters() t1_start = time.process_time() radix.radix_sort(list, args[1]) t1_stop = time.process_time() swaps = radix.swaps compares = '-' my_time = round(t1_stop - t1_start, 8) elif args[0] == "select_dual": select_dual.reset_counters() t1_start = time.process_time() select_dual.dual_sort(list, 0, len(list) - 1, args[1]) t1_stop = time.process_time() swaps = select_dual.swaps compares = select_dual.compares my_time = round(t1_stop - t1_start, 8) elif args[0] == "select_quick": select_quick.reset_counters() t1_start = time.process_time() select_quick.quick_sort(list, 0, len(list) - 1, args[1]) t1_stop = time.process_time() swaps = select_quick.swaps compares = select_quick.compares my_time = round(t1_stop - t1_start, 8) mem = tracemalloc.get_traced_memory()[1] tracemalloc.stop() if file: info = str(len(list)) + ';' info += str(swaps) + ';' info += str(compares) + ';' info += str(my_time) + ';' info += str(mem) + ';' info += ('\n') try: file_name = args[2] with open(file_name, 'a+') as f: f.write(info) except FileNotFoundError: print("Creating new file ...") with open(file_name, 'a+') as f: f.write(info) else: print("-----------------") print("Time: %.10f" % (t1_stop - t1_start)) print("Swaps: ", swaps) print("Compares: ", compares) print("Memory: ", mem, "B") sorted_info(list_dup, list) if check_order(list, args[1]) or args[0] == "select_dual": print(list) else: print("Something went wrong :( ")
def tearDown(self): tracemalloc.stop()
def read_write(plot=False): # mesh = generate_tetrahedral_mesh() mesh = generate_triangular_mesh() print(mesh) mem_size = mesh.points.nbytes + mesh.cells[0].data.nbytes mem_size /= 1024.0**2 print(f"mem_size: {mem_size:.2f} MB") formats = { "Abaqus": (meshio.abaqus.write, meshio.abaqus.read, ["out.inp"]), "Ansys (ASCII)": ( lambda f, m: meshio.ansys.write(f, m, binary=False), meshio.ansys.read, ["out.ans"], ), # "Ansys (binary)": ( # lambda f, m: meshio.ansys.write(f, m, binary=True), # meshio.ansys.read, # ["out.ans"], # ), "AVS-UCD": (meshio.avsucd.write, meshio.avsucd.read, ["out.ucd"]), # "CGNS": (meshio.cgns.write, meshio.cgns.read, ["out.cgns"]), "Dolfin-XML": (meshio.dolfin.write, meshio.dolfin.read, ["out.xml"]), "Exodus": (meshio.exodus.write, meshio.exodus.read, ["out.e"]), # "FLAC3D": (meshio.flac3d.write, meshio.flac3d.read, ["out.f3grid"]), "Gmsh 4.1 (ASCII)": ( lambda f, m: meshio.gmsh.write(f, m, binary=False), meshio.gmsh.read, ["out.msh"], ), "Gmsh 4.1 (binary)": ( lambda f, m: meshio.gmsh.write(f, m, binary=True), meshio.gmsh.read, ["out.msh"], ), "MDPA": (meshio.mdpa.write, meshio.mdpa.read, ["out.mdpa"]), "MED": (meshio.med.write, meshio.med.read, ["out.med"]), "Medit": (meshio.medit.write, meshio.medit.read, ["out.mesh"]), "MOAB": (meshio.h5m.write, meshio.h5m.read, ["out.h5m"]), "Nastran": (meshio.nastran.write, meshio.nastran.read, ["out.bdf"]), "OBJ": (meshio.obj.write, meshio.obj.read, ["out.obj"]), "OFF": (meshio.off.write, meshio.off.read, ["out.off"]), "Permas": (meshio.permas.write, meshio.permas.read, ["out.dato"]), "PLY (binary)": ( lambda f, m: meshio.ply.write(f, m, binary=True), meshio.ply.read, ["out.ply"], ), "PLY (ASCII)": ( lambda f, m: meshio.ply.write(f, m, binary=False), meshio.ply.read, ["out.ply"], ), "STL (binary)": ( lambda f, m: meshio.stl.write(f, m, binary=True), meshio.stl.read, ["out.stl"], ), "STL (ASCII)": ( lambda f, m: meshio.stl.write(f, m, binary=False), meshio.stl.read, ["out.stl"], ), # "TetGen": (meshio.tetgen.write, meshio.tetgen.read, ["out.node", "out.ele"],), "VTK (binary)": ( lambda f, m: meshio.vtk.write(f, m, binary=True), meshio.vtk.read, ["out.vtk"], ), "VTK (ASCII)": ( lambda f, m: meshio.vtk.write(f, m, binary=False), meshio.vtk.read, ["out.vtk"], ), "VTU (binary, uncompressed)": ( lambda f, m: meshio.vtu.write(f, m, binary=True, compression=None), meshio.vtu.read, ["out.vtu"], ), "VTU (binary, zlib)": ( lambda f, m: meshio.vtu.write( f, m, binary=True, compression="zlib"), meshio.vtu.read, ["out.vtu"], ), "VTU (binary, LZMA)": ( lambda f, m: meshio.vtu.write( f, m, binary=True, compression="lzma"), meshio.vtu.read, ["out.vtu"], ), "VTU (ASCII)": ( lambda f, m: meshio.vtu.write(f, m, binary=False), meshio.vtu.read, ["out.vtu"], ), "Wavefront .obj": (meshio.obj.write, meshio.obj.read, ["out.obj"]), # "wkt": ".wkt", "XDMF (binary)": ( lambda f, m: meshio.xdmf.write(f, m, data_format="Binary"), meshio.xdmf.read, ["out.xdmf", "out0.bin", "out1.bin"], ), "XDMF (HDF, GZIP)": ( lambda f, m: meshio.xdmf.write( f, m, data_format="HDF", compression="gzip"), meshio.xdmf.read, ["out.xdmf", "out.h5"], ), "XDMF (HDF, uncompressed)": ( lambda f, m: meshio.xdmf.write( f, m, data_format="HDF", compression=None), meshio.xdmf.read, ["out.xdmf", "out.h5"], ), "XDMF (XML)": ( lambda f, m: meshio.xdmf.write(f, m, data_format="XML"), meshio.xdmf.read, ["out.xdmf"], ), } # formats = { # # "VTK (ASCII)": formats["VTK (ASCII)"], # # "VTK (binary)": formats["VTK (binary)"], # # "VTU (ASCII)": formats["VTU (ASCII)"], # # "VTU (binary)": formats["VTU (binary)"], # # "Gmsh 4.1 (binary)": formats["Gmsh 4.1 (binary)"], # # "FLAC3D": formats["FLAC3D"], # "MDPA": formats["MDPA"], # } # max_key_length = max(len(key) for key in formats) elapsed_write = [] elapsed_read = [] file_sizes = [] peak_memory_write = [] peak_memory_read = [] print() print("format " + "write (s) " + "read(s) " + "file size " + "write mem " + "read mem ") print() with tempfile.TemporaryDirectory() as directory: directory = pathlib.Path(directory) for name, (writer, reader, filenames) in formats.items(): filename = directory / filenames[0] tracemalloc.start() t = time.time() writer(filename, mesh) # snapshot = tracemalloc.take_snapshot() elapsed_write.append(time.time() - t) peak_memory_write.append(tracemalloc.get_traced_memory()[1]) tracemalloc.stop() file_sizes.append( sum(os.stat(directory / f).st_size for f in filenames)) tracemalloc.start() t = time.time() reader(filename) elapsed_read.append(time.time() - t) peak_memory_read.append(tracemalloc.get_traced_memory()[1]) tracemalloc.stop() print("{:<26} {:e} {:e} {:e} {:e} {:e}".format( name, elapsed_write[-1], elapsed_read[-1], file_sizes[-1] / 1024.0**2, peak_memory_write[-1] / 1024.0**2, peak_memory_read[-1] / 1024.0**2, )) names = list(formats.keys()) # convert to MB file_sizes = numpy.array(file_sizes) file_sizes = file_sizes / 1024.0**2 peak_memory_write = numpy.array(peak_memory_write) peak_memory_write = peak_memory_write / 1024.0**2 peak_memory_read = numpy.array(peak_memory_read) peak_memory_read = peak_memory_read / 1024.0**2 if plot: plot_speed(names, elapsed_write, elapsed_read) plot_file_sizes(names, file_sizes, mem_size) plot_memory_usage(names, peak_memory_write, peak_memory_read, mem_size)
def run(args, device, data): # Unpack data n_classes, train_g, val_g, test_g, train_nfeat, train_labels, \ val_nfeat, val_labels, test_nfeat, test_labels = data in_feats = train_nfeat.shape[1] train_nid = th.nonzero(train_g.ndata['train_mask'], as_tuple=True)[0] val_nid = th.nonzero(val_g.ndata['val_mask'], as_tuple=True)[0] test_nid = th.nonzero( ~(test_g.ndata['train_mask'] | test_g.ndata['val_mask']), as_tuple=True)[0] print("in_feats " + str(in_feats)) # print("train_g.shape "+ str(train_g.shape)) print("train_labels.shape " + str(train_labels.shape)) # print("val_g.shape "+ str(val_g.shape)) # Create PyTorch DataLoader for constructing blocks sampler = dgl.dataloading.MultiLayerNeighborSampler( [int(fanout) for fanout in args.fan_out.split(',')]) # see_memory_usage("-----------------------------------------after sampler------------------------") dataloader = dgl.dataloading.NodeDataLoader(train_g, train_nid, sampler, batch_size=args.batch_size, shuffle=True, drop_last=False, num_workers=args.num_workers) print("args.batch_size " + str(args.batch_size)) # see_memory_usage("-----------------------------------------after data loader------------------------") # Define model and optimizer model = SAGE(in_feats, args.num_hidden, n_classes, args.num_layers, F.relu, args.dropout) print(model) # see_memory_usage("-----------------------------------------before model to gpu------------------------") model = model.to(device) loss_fcn = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=args.lr) # see_memory_usage("-----------------------------------------before start------------------------") # Training loop avg = 0 iter_tput = [] tracemalloc.start() CPU_mem( "-----------------------------------------before start------------------------" ) for epoch in range(args.num_epochs): tic = time.time() # Loop over the dataloader to sample the computation dependency graph as a list of # blocks. tic_step = time.time() for step, (input_nodes, seeds, blocks) in enumerate(dataloader): # if step>1: # break # print("*"*80 +str(step)) # print("input_nodes.shape "+str(input_nodes.shape)) # print("output_nodes.shape "+str(seeds.shape)) # print("blocks.length "+str(len(blocks))) # print("blocks.shape "+str(blocks.shape)) # print(blocks) # see_memory_usage("-----------------------------------------step start------------------------") # Load the input features as well as output labels batch_inputs, batch_labels = load_subtensor( train_nfeat, train_labels, seeds, input_nodes, device) CPU_mem( "-----------------------------------------before blocks to device" ) # see_memory_usage("-----------------------------------------before blocks to device") blocks = [block.int().to(device) for block in blocks] CPU_mem( "-----------------------------------------after blocks to device" ) # see_memory_usage("-----------------------------------------after blocks to device") print( "---------------------------------------------------------batch_inputs.shape " + str(batch_inputs.shape)) # Compute loss and prediction batch_pred = model(blocks, batch_inputs) CPU_mem( "-----------------------------------------after batch train") # see_memory_usage("-----------------------------------------after batch train") loss = loss_fcn(batch_pred, batch_labels) # see_memory_usage("-----------------------------------------after batch loss") CPU_mem( "-----------------------------------------after batch loss") optimizer.zero_grad() loss.backward() # see_memory_usage("-----------------------------------------after batch loss backward") CPU_mem( "-----------------------------------------after batch loss backward" ) optimizer.step() # iter_tput.append(len(seeds) / (time.time() - tic_step)) # if step % args.log_every==0: # print('Epoch {:05d} | Step {:05d} | Loss {:.4f} | Train Acc {:.4f} | Speed (samples/sec) {:.4f} | GPU {:.1f} MB'.format( # epoch, step, loss.item(), 0, np.mean(iter_tput[3:]), 0)) # # acc = compute_acc(batch_pred, batch_labels) # # gpu_mem_alloc = th.cuda.max_memory_allocated() / 1000000 if th.cuda.is_available() else 0 # # print('Epoch {:05d} | Step {:05d} | Loss {:.4f} | Train Acc {:.4f} | Speed (samples/sec) {:.4f} | GPU {:.1f} MB'.format( # # epoch, step, loss.item(), acc.item(), np.mean(iter_tput[3:]), gpu_mem_alloc)) # # print(a # # 'Epoch {:05d} | Step {:05d} | Loss {:.4f} | Train Acc {:.4f} | Speed (samples/sec) {:.4f} | GPU {:.1f} MB'.format( # # epoch, step, loss.item(), 0, np.mean(iter_tput[3:]), gpu_mem_alloc)) # # print('Epoch {:05d} | Step {:05d} | Loss {:.4f} | Train Acc {:.4f} | Speed (samples/sec) {:.4f} | GPU {:.1f} MB'.format( # # epoch, step, loss.item(), 0, np.mean(iter_tput[3:]), 0)) # tic_step = time.time() toc = time.time() print('Epoch Time(s): {:.4f}'.format(toc - tic)) if epoch >= 2: avg += toc - tic # if epoch % args.eval_every==0 and epoch!=0: # eval_acc = evaluate(model, val_g, val_nfeat, val_labels, val_nid, device) # print('Eval Acc {:.4f}'.format(eval_acc)) # test_acc = evaluate(model, test_g, test_nfeat, test_labels, test_nid, device) # print('Test Acc: {:.4f}'.format(test_acc)) print('Avg epoch time: {}'.format(avg / (epoch - 1))) current, peak = tracemalloc.get_traced_memory() print( f"Current memory usage is {current / 10 ** 6}MB; Peak was {peak / 10 ** 6}MB" ) tracemalloc.stop()
def stop(self) -> None: """Stop the profiler.""" self._t0 = self._timefunc() tracemalloc.stop()
def wrapper(*args, **kwargs): tracemalloc.start() result = func(*args, **kwargs) print(f"Memory Usage: {tracemalloc.get_traced_memory()}") tracemalloc.stop() return result
def on_stop_test(self): tracemalloc.stop()