def words_generator(n): start = time.time() gib = Gibberish() words = gib.generate_words(n) end = time.time() print('Time generating:: {}'.format(end - start)) print('Real Number of words: {}'.format(len(words))) len_unique = len(set(words)) print('Number of unique words: {}'.format(len_unique)) print('Size of words: {} Mb, {} Kb'.format(asizeof.flatsize(words)/1024/1024, asizeof.flatsize(words)/1024)) print('________________________________________________________________\n') return words, len_unique
def _print_functions(self, obj, name=None, align=8, detail=MAX, code=False, limit=MAX, opt='', **unused): if name: self._printf('%sasizeof functions for %s ... %s', os.linesep, name, opt) self._printf('%s(): %s', ' basicsize', asizeof.basicsize(obj)) self._printf('%s(): %s', ' itemsize', asizeof.itemsize(obj)) self._printf('%s(): %r', ' leng', asizeof.leng(obj)) self._printf('%s(): %s', ' refs', _repr(asizeof.refs(obj))) self._printf('%s(): %s', ' flatsize', asizeof.flatsize(obj, align=align)) # , code=code self._printf( '%s(): %s', ' asized', asizeof.asized(obj, align=align, detail=detail, code=code, limit=limit))
def _print_functions(self, obj, name=None, align=8, detail=MAX, code=False, limit=MAX, opt='', **unused): if name: self._printf('%sasizeof functions for %s ... %s', os.linesep, name, opt) self._printf('%s(): %s', ' basicsize', asizeof.basicsize(obj)) self._printf('%s(): %s', ' itemsize', asizeof.itemsize(obj)) self._printf('%s(): %r', ' leng', asizeof.leng(obj)) self._printf('%s(): %s', ' refs', _repr(asizeof.refs(obj))) self._printf('%s(): %s', ' flatsize', asizeof.flatsize(obj, align=align)) # , code=code self._printf('%s(): %s', ' asized', asizeof.asized(obj, align=align, detail=detail, code=code, limit=limit))
def test_flatsize(failf=None, stdf=None): '''Compare the results of **flatsize()** without using ``sys.getsizeof()`` with the accurate sizes returned by ``sys.getsizeof()``. Return the total number of tests and number of unexpected failures. Expect differences for sequences as dicts, lists, sets, tuples, etc. While this is no proof for the accuracy of **flatsize()** on Python builds without ``sys.getsizeof()``, it does provide some evidence that function **flatsize()** produces reasonable and usable results. ''' t, g, e = [], asizeof._getsizeof, 0 if g: for v in asizeof._values(asizeof._typedefs): t.append(v.type) try: # creating one instance if v.type.__module__ not in ('io',): # avoid 3.0 RuntimeWarning t.append(v.type()) except Exception: # ignore errors pass t.extend(({1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8}, [1, 2, 3, 4, 5, 6, 7, 8], ['1', '2', '3'], [0] * 100, '12345678', 'x' * 1001, (1, 2, 3, 4, 5, 6, 7, 8), ('1', '2', '3'), (0,) * 100, asizeof._Slots((1, 2, 3, 4, 5, 6, 7, 8)), asizeof._Slots(('1', '2', '3')), asizeof._Slots((0,) * 100), 0, 1 << 8, 1 << 16, 1 << 32, 1 << 64, 1 << 128, complex(0, 1), True, False)) asizeof._getsizeof = None # zap _getsizeof for flatsize() for o in t: a = asizeof.flatsize(o) s = sys.getsizeof(o, 0) # 0 as default if a != s: if isinstance(o, (dict, list, set, frozenset, tuple, bytes)): # flatsize() approximates length of sequences x = ', expected failure' elif (isinstance(o, (type, bool, asizeof.ABCMeta)) and sys.version_info >= (3, 0)): x = ', expected failure' elif isinstance(o, str) and sys.version_info >= (3, 3): x = ', expected failure' elif isinstance(o, deque) and sys.version_info >= (3, 4): x = ', expected failure' else: x = ', %r' % asizeof._typedefof(o) e += 1 if failf: # report failure failf('%s vs %s for %s: %s', a, s, asizeof._nameof(type(o)), _repr(o)) if stdf: asizeof._printf('flatsize() %s vs sys.getsizeof() %s for %s: %s%s', a, s, asizeof._nameof(type(o)), _repr(o), x, file=stdf) asizeof._getsizeof = g # restore return len(t), e
def test_flatsize(self): '''Test asizeof.flatsize() ''' l = ["spam", 2, 3, 4, "eggs", 6, 7, 8] for _type in (list, tuple, set, frozenset): data = _type(l) bsz = asizeof.basicsize(data) isz = asizeof.itemsize(data) lng = asizeof.leng(data) fsz = asizeof.flatsize(data) self.assertEqual(fsz, bsz + (lng * isz), (fsz, bsz, lng, isz)) self.assertRaises(ValueError, asizeof.flatsize, l, **{'align': 3})
def __init__(self, dict_of_classes_with_training_files, max_elements=2000000, error_rate=0.00001): self.blooms = {} self.classification_result = [] self.stop_words = set( stopwords.words("english") + get_stop_words('english')) for model in dict_of_classes_with_training_files: self.blooms[model] = BloomFilter(max_elements=max_elements, error_rate=error_rate) print('Size of initializing Bloom Filter: {} Kb'.format( asizeof.flatsize(self.blooms[model]) / 1000)) # print('num_bits_m = {}'.format(self.blooms[model].num_bits_m)) # print('num_probes_k = {}'.format(self.blooms[model].num_probes_k)) # print('len array of Int32 = {}'.format(len(self.blooms[model].backend.array_))) # print('size of array of Int32 = {} Kb'.format(asizeof.flatsize(self.blooms[model].backend.array_)/1000)) # print('\n') # TRAINING MODEL # print('Start training model "{}"'.format(model)) start_overall = time.time() for file_path in dict_of_classes_with_training_files[model]: with open(file_path, errors='ignore') as f: start = time.time() for word in self.text_preprocessing( " ".join(f.readlines()), os.path.basename(file_path)): self.blooms[model].add(word) # print('Time training text {}: {}'.format(file_path, time.time() - start)) print('\nTime for training model {}: {}'.format( model, time.time() - start_overall)) print( '________________________________________________________________\n' )
quit(2) print("------------------------------------------------------------") print("Controller capabilities : {}".format(network.controller.capabilities)) print("Controller node capabilities : {}".format(network.controller.node.capabilities)) print("------------------------------------------------------------") print("Driver statistics : {}".format(network.controller.stats)) print("------------------------------------------------------------") print("Nodes in network : {}".format(network.nodes_count)) print("------------------------------------------------------------") print("Memory use : ") print("------------------------------------------------------------") print("Memory use for network {} : ".format(network.home_id_str)) print(" asizeof : {} bytes".format(asizeof(network))) print(" basicsize : {} bytes".format(basicsize(network))) print(" itemsize : {} bytes".format(itemsize(network))) print(" flatsize : {} bytes".format(flatsize(network))) print("------------------------------------------------------------") manager = network.manager print("Memory use for manager : ") print(" asizeof : {} bytes".format(asizeof(manager))) print(" basicsize : {} bytes".format(basicsize(manager))) print(" itemsize : {} bytes".format(itemsize(manager))) print(" flatsize : {} bytes".format(flatsize(manager))) print("------------------------------------------------------------") print("Memory use for controller : ") print(" asizeof : {} bytes".format(asizeof(network.controller))) print(" basicsize : {} bytes".format(basicsize(network.controller))) print(" itemsize : {} bytes".format(itemsize(network.controller))) print(" flatsize : {} bytes".format(flatsize(network.controller))) print("------------------------------------------------------------") print("Memory use for {} scenes (scenes are generated on call) : ".format(network.scenes_count))
print("------------------------------------------------------------") print("Controller capabilities : {}".format(network.controller.capabilities)) print("Controller node capabilities : {}".format( network.controller.node.capabilities)) print("------------------------------------------------------------") print("Driver statistics : {}".format(network.controller.stats)) print("------------------------------------------------------------") print("Nodes in network : {}".format(network.nodes_count)) print("------------------------------------------------------------") print("Memory use : ") print("------------------------------------------------------------") print("Memory use for network {} : ".format(network.home_id_str)) print(" asizeof : {} bytes".format(asizeof(network))) print(" basicsize : {} bytes".format(basicsize(network))) print(" itemsize : {} bytes".format(itemsize(network))) print(" flatsize : {} bytes".format(flatsize(network))) print("------------------------------------------------------------") manager = network.manager print("Memory use for manager : ") print(" asizeof : {} bytes".format(asizeof(manager))) print(" basicsize : {} bytes".format(basicsize(manager))) print(" itemsize : {} bytes".format(itemsize(manager))) print(" flatsize : {} bytes".format(flatsize(manager))) print("------------------------------------------------------------") print("Memory use for controller : ") print(" asizeof : {} bytes".format(asizeof(network.controller))) print(" basicsize : {} bytes".format(basicsize(network.controller))) print(" itemsize : {} bytes".format(itemsize(network.controller))) print(" flatsize : {} bytes".format(flatsize(network.controller))) print("------------------------------------------------------------") print("Memory use for {} scenes (scenes are generated on call) : ".format(
def to_db(self, server = 'beavis.ph.utexas.edu'): print 'in to_db' sim_blob = {'author': 'Dmitry', 'tags': ['fusion', 'in', '50 years'], 'date': datetime.utcnow()} for key, value in self.__dict__.iteritems(): try: print key, value.__class__, sys.getsizeof(value), asizeof(value) except: print key if type(value) not in ban_types: sim_blob[key] = value elif type(value) == type(np.array([12])): print key, value.nbytes if value.ndim == 1: sim_blob[key] = value.tolist() elif value.size == 1: sim_blob[key] = value else: print 'adding to the db obj', key, type(value), value.shape, value.size if value.size< 1600000: sim_blob[key] = value else: print 'nevermind, ', key, ' is too big' print 'before: ', asizeof(sim_blob), ' ', sys.getsizeof(sim_blob), ' ', flatsize(sim_blob) cutlist = ['pos_i','kx','ky','pdf_x','df_x','v'] #cutlist = ['pos_i','kx','ky','pdf_x','df_x','v'] #cutlist = ['t_stop','dx'] keep_list = ['nz','nx','alpha_c'] for k in cutlist: try: print 'k: ',(sim_blob[k]) sim_blob.pop(k, None) except: print 'not found' #new_blob = {} # for k in keep_list: # new_blob.pop(k, None) # print 'look for bugs' # for k in sim_blob: # print 'trying: ',k#,sim_blob[k] # new_blob[k] = sim_blob[k] # ser_dict = self.serialize(new_blob) # #print sys.getsizeof(new_dict), asizeof(ser_dict), ' ', asizesof(sim_blob) # print BSON.encode(ser_dict).__sizeof__() # exit() #sim_blob = new_blob print 'after: ', asizeof(sim_blob), ' ', sys.getsizeof(sim_blob), ' ', flatsize(sim_blob) print 'coarse_x' in sim_blob print 'df_x' in sim_blob #print type(sim_blob['coarse_x']), sim_blob['coarse_x'][0].__class__ #exit() ser_dict = self.serialize(sim_blob) # for elem in ser_dict: # print elem,': ',type(ser_dict[elem])m #print ser_dict print sys.getsizeof(ser_dict), asizeof(ser_dict), ' ', asizesof(sim_blob) print BSON.encode(ser_dict).__sizeof__() #exit() c = MongoClient(host=server) db = c.new_database print 'db info' print db.command('collstats', 'alpha_runs') alpha_runs = db.alpha_runs alpha_runs.ensure_index('md5', unique=True, dropDups=True) #db.things.ensureIndex({'source_references.key' : 1}, {unique : true, dropDups : true}) try: #print ser_dict.keys() alpha_runs.insert(ser_dict, db) except mongoErr.DuplicateKeyError: alpha_runs.remove({'path': ser_dict['path']}) alpha_runs.remove({'md5': ser_dict['md5']}) #alpha_runs.remove({ "$and" :[{'path': ser_dict['path']},{'md5': ser_dict['md5']}]}) alpha_runs.insert(ser_dict, db) print 'Duplicate run not adding to db, but updating'
def size(obj): return asizeof.flatsize(obj)
def test_flatsize(failf=None, stdf=None): '''Compare the results of **flatsize()** without using ``sys.getsizeof()`` with the accurate sizes returned by ``sys.getsizeof()``. Return the total number of tests and number of unexpected failures. Expect differences for sequences as dicts, lists, sets, tuples, etc. While this is no proof for the accuracy of **flatsize()** on Python builds without ``sys.getsizeof()``, it does provide some evidence that function **flatsize()** produces reasonable and usable results. ''' t, g, e = [], asizeof._getsizeof, 0 if g: for v in asizeof._typedefs.copy().values(): t.append(v.type) try: # creating one instance if v.type.__module__ not in ( 'io', ): # avoid 3.0 RuntimeWarning t.append(v.type()) except Exception: # ignore errors pass t.extend( ({ 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8 }, [1, 2, 3, 4, 5, 6, 7, 8], ['1', '2', '3'], [0] * 100, '12345678', 'x' * 1001, (1, 2, 3, 4, 5, 6, 7, 8), ('1', '2', '3'), (0, ) * 100, asizeof._Slots((1, 2, 3, 4, 5, 6, 7, 8)), asizeof._Slots(('1', '2', '3')), asizeof._Slots( (0, ) * 100), 0, 1 << 8, 1 << 16, 1 << 32, 1 << 64, 1 << 128, complex(0, 1), True, False)) asizeof._getsizeof = None # zap _getsizeof for flatsize() for o in t: a = asizeof.flatsize(o) s = sys.getsizeof(o, 0) # 0 as default if a != s: if isinstance(o, (dict, list, set, frozenset, tuple, bytes)): # flatsize() approximates length of sequences x = ', expected failure' elif (isinstance(o, (type, bool, asizeof.ABCMeta)) and sys.version_info >= (3, 0)): x = ', expected failure' elif isinstance(o, str) and sys.version_info >= (3, 3): x = ', expected failure' elif isinstance(o, deque) and sys.version_info >= (3, 4): x = ', expected failure' else: x = ', %r' % asizeof._typedefof(o) e += 1 if failf: # report failure failf('%s vs %s for %s: %s', a, s, asizeof._nameof(type(o)), _repr(o)) if stdf: asizeof._printf( 'flatsize() %s vs sys.getsizeof() %s for %s: %s%s', a, s, asizeof._nameof(type(o)), _repr(o), x, file=stdf) asizeof._getsizeof = g # restore return len(t), e
quit(2) print "------------------------------------------------------------" print "Controller capabilities : %s" % network.controller.capabilities print "Controller node capabilities : %s" % network.controller.node.capabilities print "------------------------------------------------------------" print "Driver statistics : %s" % network.controller.stats print "------------------------------------------------------------" print "Nodes in network : %s" % network.nodes_count print "------------------------------------------------------------" print "Memory use : " print "------------------------------------------------------------" print "Memory use for network %s : " % (network.home_id_str) print " asizeof : %s bytes" % (asizeof(network)) print " basicsize : %s bytes" % (basicsize(network)) print " itemsize : %s bytes" % (itemsize(network)) print " flatsize : %s bytes" % (flatsize(network)) print "------------------------------------------------------------" manager = network.manager print "Memory use for manager : " print " asizeof : %s bytes" % (asizeof(manager)) print " basicsize : %s bytes" % (basicsize(manager)) print " itemsize : %s bytes" % (itemsize(manager)) print " flatsize : %s bytes" % (flatsize(manager)) print "------------------------------------------------------------" print "Memory use for controller : " print " asizeof : %s bytes" % (asizeof(network.controller)) print " basicsize : %s bytes" % (basicsize(network.controller)) print " itemsize : %s bytes" % (itemsize(network.controller)) print " flatsize : %s bytes" % (flatsize(network.controller)) print "------------------------------------------------------------" print "Memory use for %s scenes (scenes are generated on call) : " % (
quit(2) print "------------------------------------------------------------" print "Controller capabilities : %s" % network.controller.capabilities print "Controller node capabilities : %s" % network.controller.node.capabilities print "------------------------------------------------------------" print "Driver statistics : %s" % network.controller.stats print "------------------------------------------------------------" print "Nodes in network : %s" % network.nodes_count print "------------------------------------------------------------" print "Memory use : " print "------------------------------------------------------------" print "Memory use for network %s : " %(network.home_id_str) print " asizeof : %s bytes" %(asizeof(network)) print " basicsize : %s bytes" %(basicsize(network)) print " itemsize : %s bytes" %(itemsize(network)) print " flatsize : %s bytes" %(flatsize(network)) print "------------------------------------------------------------" manager = network.manager print "Memory use for manager : " print " asizeof : %s bytes" %(asizeof(manager)) print " basicsize : %s bytes" %(basicsize(manager)) print " itemsize : %s bytes" %(itemsize(manager)) print " flatsize : %s bytes" %(flatsize(manager)) print "------------------------------------------------------------" print "Memory use for controller : " print " asizeof : %s bytes" %(asizeof(network.controller)) print " basicsize : %s bytes" %(basicsize(network.controller)) print " itemsize : %s bytes" %(itemsize(network.controller)) print " flatsize : %s bytes" %(flatsize(network.controller)) print "------------------------------------------------------------" print "Memory use for %s scenes (scenes are generated on call) : " %(network.scenes_count)