def getMemSize(self): """ try to guess the size in memory of a job @return: expected size in memory """ if asizeof is not None: return asizeof.asizeof(self)
def simsize(): """ Return the size of topo.sim reported by asizeof.asizeof(). This estimate does not currently include any numpy arrays, and may also be missing other important items. Python 2.6 supports getsizeof() and a __sizeof__ attribute that user code can implement, which should provide a more accurate estimate. """ import asizeof,topo return asizeof.asizeof(topo.sim)
def simsize(): """ Return the size of topo.sim reported by asizeof.asizeof(). This estimate does not currently include any numpy arrays, and may also be missing other important items. Python 2.6 supports getsizeof() and a __sizeof__ attribute that user code can implement, which should provide a more accurate estimate. """ import asizeof, topo return asizeof.asizeof(topo.sim)
def _generate_file(distribution, N, outFile, relevanceFile=None): global _sampling_functions genfunc = _sampling_functions[distribution] dataset = [genfunc() for i in range(N)] print "Generating dataset", outFile print "N =", N db = Database() if hasattr(dataset[0], 'featureNames'): db.keys = dataset[0].featureNames() else: for i, f in enumerate(dataset[0].features()): db.keys.append("f" + str(i)) db.keys.append("label") for x in dataset: db.entries.append(x.features() + [x.concept()]) db.writeCSV(outFile) print "Nfeatures =", len(db.entries[0]) - 1 print "Fraction of positive examples =", float( sum(f[-1] for f in db.entries)) / len(db.entries) print "Size of training set (mb)", float(asizeof(db.entries)) / 1024 / 1024 print "Size of training example (bytes)", asizeof(db.entries[0]) if relevanceFile != None: dbr = Database() dbr.keys = [v + "_relevance" for v in db.keys[:-1]] for x in dataset: dbr.entries.append([int(v) for v in x.relevance()]) nrel = 0 ntotal = 0 for e in dbr.entries: nrel += sum(e) ntotal += len(e) print "Fraction of relevant features:", float(nrel) / float(ntotal) dbr.writeCSV(relevanceFile)
import heimdall heimdall = reload(heimdall) import data_flow data_flow = reload(data_flow) import watcher watcher = reload(watcher) import optimizer optimizer = reload(optimizer) from tests import code2 import asizeof, gc heim = heimdall.Heimdall(max_nesting=2, prof_shot_time=4) heim.run('code2.g(5)', globals(), locals()) sizes = [] print "number of objects is ", len(gc.get_objects()) biggest, biggest_size = None, 0 for (i, o) in enumerate(gc.get_objects()): if i % 100 == 0: print i try: size = asizeof.asizeof(o) if size > 1000000: print o, size sizes.append((o, size)) if size > biggest_size: print "new biggest", size, o biggest, biggest_size = o, size except: pass
from pynlpl.statistics import FrequencyList from pynlpl.textprocessors import crude_tokenizer, Classer import sys import codecs import asizeof freqlist = FrequencyList() f = codecs.open(sys.argv[1], 'r','utf-8') for line in f: line = crude_tokenizer(line.strip()) freqlist.append(line) f.close() print "FREQLIST: " ,asizeof.asizeof(freqlist) classer = Classer(freqlist) print "CLASSER: " ,asizeof.asizeof(classer) classer2 = Classer(freqlist, False,True) print "CLASSER (ONLY DECODER): " ,asizeof.asizeof(classer2) freqlist2 = FrequencyList() f = codecs.open(sys.argv[1], 'r','utf-8') for line in f: line = crude_tokenizer(line.strip()) freqlist2.append(classer.encodeseq(line))
data_flow = reload(data_flow) import watcher watcher = reload(watcher) import optimizer optimizer = reload(optimizer) from tests import code2 import asizeof, gc heim = heimdall.Heimdall(max_nesting=2, prof_shot_time=4) heim.run("code2.g(5)", globals(), locals()) sizes = [] print "number of objects is ", len(gc.get_objects()) biggest, biggest_size = None, 0 for (i, o) in enumerate(gc.get_objects()): if i % 100 == 0: print i try: size = asizeof.asizeof(o) if size > 1000000: print o, size sizes.append((o, size)) if size > biggest_size: print "new biggest", size, o biggest, biggest_size = o, size except: pass