def generate_regular(size = 100, zipf_distribution = 2, rate = 1.0, seed = 0.0, output_filename = None): if(output_filename == None): output_filename = './workloads/regular_size' + str(size) + '_dist' + str(zipf_distribution) + '.csv' #get ips/ids from ethereum repo ip_file = open('./workloads/ips.txt', "r") id_file = open('./workloads/ids.txt', "r") rand.seed(seed) topics = random.zipf(a=zipf_distribution, size=size)#for topics t_next_req = 0.0 # time of next request with open(output_filename, 'w') as output_file: fieldnames = ['time', 'id', 'ip', 'topic', 'attack'] dict_writer = csv.DictWriter(output_file, fieldnames=fieldnames) dict_writer.writeheader() for i in range(0, size): t_next_req += rand.expovariate(rate) record = {} ip = ip_file.readline().rstrip() iD = id_file.readline().rstrip() if(not ip or not iD): print("Not enough IPs/IDs in the files") exit(1) #record['time'] = int(1000*t_next_req) record['time'] = int(10*i) record['id'] = iD record['ip'] =ip record['topic'] = 't' + str(topics[i]) record['attack'] = 0 #print(record) dict_writer.writerow(record) print("Generated regular workload in", str(output_filename))
def __call__(self, options=None): options = self._parse_options(options) # decode distribution parameter alpha = self._decode(options['alpha']) val = zipf(alpha) - 1 return val
def randhouse(house): x = zipf( a=1.01 + random.random() / 2, size=1000 ) # use zipf distribution with some randomness in the variability of the distribution, a = 1-2 because larger number of distribution makes one number too popular return ( 10 - (int(random.choice(x[x < 6])) * 2 - random.randint(0, 1)) - int(house / (len(houses) - 1) * 8) - random.randint(-1, 1) ) % 10 + 1.0 # return a number between 1-10, change popularity between houses so that some more are popular than others
def get_companysizes(a, n, max=35000): """ Draw the size of n companies from a Zipf distribution :param n: int, number of companies :param a: float, coefficient of zipf distribution :param max: float, maximum size, for Munich equal to BMW with roughly 35000 employees :return: """ return npr.zipf(a, size=n)
def generate_first_descriptions(dictionary): # randomly generate a set of first descriptions first_descriptions = [] for i in range(NUM_MUTANTS): first_desc = [] # randomly appending words from the dictionary for j in range(DESCRIPTION_LEN): zipf_val = rnd.zipf(ZIPF_LAW_CONST) while zipf_val > len(dictionary): # just in case our zipf value is absurdly big zipf_val = rnd.zipf(ZIPF_LAW_CONST) first_desc.append(dictionary[int(zipf_val) - 1]) first_descriptions.append(first_desc) return first_descriptions
def setZipfian(self, para_a, sequence, size, scale, a_change): for i in range(0, len(sequence)): sequence[i] = int(round(1.0 * sequence[i] * scale)) # sequence[i] = 1.0 * sequence[i] * scale result = [] count = 0 while count < size: one = zipf(para_a) if one >= 1 and one <= len(sequence): result.append(sequence[one - 1]) count += 1 self.data = result self.distribution = 'zipfian'
def write_schedule_to (TCL) : TCL.write("# scheduling\n") for app in list_of_apps: if app[1] == "zipf": target_volume = app[4] current_volume = 0 while current_volume < target_volume: volume = npr.zipf(1.5) % target_volume current_volume = current_volume + volume # random value between 0 and 4 minutes time = npr.rand() * 240 TCL.write("$ns at "+ str(time) + " \"$app" + app[2] + app[3] + \ " send " + str(volume) + "\"\n") else: # app[1] == "onoff" # On/Off model : we just need to schedule the start time = npr.rand() * 240 TCL.write("$ns at "+ str(time) + " \"$app" + app[2] + app[3] + \ " start\"\n") TCL.write("$ns at 300.0 \"finish\"\n") TCL.write("\n")
def get_random_accesses(amount): return zipf(NumDocuments, amount)
def zipf(size, params): try: return random.zipf(params['a'], size) except ValueError as e: exit(e)
def generate_attack_topic(size = 100, zipf_distribution = 2, topic_to_attack = 't11', attacker_ip_num = 3, attacker_id_num=10, rate_normal = 1.0, rate_attack = 10.0, seed = 0.0, output_filename = None): if(output_filename == None): output_filename = './workloads/attack_topic_size' + str(size) + '_dist' + str(zipf_distribution) + '.csv' #get ips/ids from ethereum repo ip_file = open('./workloads/ips.txt', "r") id_file = open('./workloads/ids.txt', "r") topics = random.zipf(a=zipf_distribution, size=size)#for topics attacker_ips = [] for i in range(0, attacker_ip_num): num = int(255/attacker_ip_num * i) ip = str(num) + "." + str(num) + "."+ str(num) + "."+ str(num) attacker_ips.append(ip) attacker_ids = [] for i in range(0, attacker_id_num): attacker_ids.append(''.join([str(i)]*20)) print("attacker ips:", attacker_ips) print("attacker ids:", attacker_ids) rand.seed(seed) t_next_normal_req = rand.expovariate(rate_normal) # time of next normal request t_next_attack_req = rand.expovariate(rate_attack) # time of next attack request time = 0.0 attack = 0 with open(output_filename, 'w') as output_file: fieldnames = ['time', 'id', 'ip', 'topic', 'attack'] dict_writer = csv.DictWriter(output_file, fieldnames=fieldnames) dict_writer.writeheader() for i in range(0, size): if t_next_normal_req < t_next_attack_req: attack = 0 time = t_next_normal_req elif t_next_normal_req > t_next_attack_req: attack = 1 time = t_next_attack_req record = {} if( attack == 0 ): ip = ip_file.readline().rstrip() iD = id_file.readline().rstrip() if(not ip or not iD): print("Not enough IPs/IDs in the files") exit(1) topic = 't' + str(topics[i]) else: # attack == 1 ip = attacker_ips[i % attacker_ip_num] iD = attacker_ids[i % attacker_id_num] topic = topic_to_attack record['time'] = int(10*i) record['id'] = iD record['ip'] =ip record['topic'] = topic record['attack'] = attack #print(record) dict_writer.writerow(record) if time == t_next_normal_req: t_next_normal_req += rand.expovariate(rate_normal) if time == t_next_attack_req: t_next_attack_req += rand.expovariate(rate_attack) print("Generated regular workload in", str(output_filename))
def zipfgen(): tok_it = _strseq() vocab = defaultdict(lambda: next(tok_it)) while True: yield vocab[zipf(2.0)]
def np_zipf_distribution(): x = random.zipf(a=2, size=(2, 3)) print(x) x = random.zipf(a=2, size=1000) sns.distplot(x[x < 10], kde=False) plt.show()
def _rvs(self, a): return mtrand.zipf(a, size=self._size)
""" Zipf's law: In a collection the nth common term is 1/n times of the most common term. e.g. 5th common word in english has occur nearly 1/5 th times as most of the most used word. """ # It has two parameters. # a - distribution parameter # size - the shape of the returned array # draw out a sample for zipf distribution with distribution parameter 2 with size 2x3 from numpy import random x = random.zipf(a=2, size=(2, 3)) print(x) # visualization of zipf distribution # sample 1000 points but plotting only ones with value < 10 for more meanngful chart. # from numpy import random import matplotlib.pyplot as plt import seaborn as sns x = random.zipf(a=2, size=1000) sns.distplot(x[x < 10], kde=False)
pattern_meas = re.compile(r"^(\d+)\s+(\d+)\s+([-]?\d+)$", re.VERBOSE | re.MULTILINE) if file_name.split(".")[0] == 'amazon': pattern_meas = re.compile(r"^(\d+)\s+(\d+)", re.VERBOSE | re.MULTILINE) for match in pattern_meas.finditer(text): nodes_list.append("%s" % int(match.group(1))) nodes_list.append("%s" % int(match.group(2))) for node in nodes_list: count = frequency.get(node, 0) frequency[node] = count + 1 node_ocurr = [] s = [] for key, value in reversed(sorted(frequency.items(), key=itemgetter(1))): node_ocurr.append([key, value / 2]) s.append(zipf(2., value / 2)) a = 2 s = zipf(a, 10) result = (s / float(max(s))) * 5 for i in result: print(result[i]) print s[i] print '------' print min(s), max(s) print min(result), max(result)
def calculaTiempoSesion(self): rho = 2 a = rand.zipf(rho) return a
print " Payoffs: ", pprint.pprint(self.historyPayoffs) print " Demes: ", pprint.pprint(self.historyDemes) print "Born: " + str(self.born) print "Rounds Alive: " + str(self.roundsAlive) print "Times observed: " + str(self.nObserved) print "Current Deme: " + str(self.currentDeme) print "Points Earned: " + str(self.pointsEarned) print "Number of Offspring: " + str(self.nOffspring) # Initialize structures in model fitness = [] # fitness landscape for i in range(3): # tmp = [round(2*random.expovariate(lambd)**2) for x in range(nact)] tmp = [round(2*npr.zipf(alpha)) for x in range(nact)] # tmp = [round(2*random.lognormvariate(lgmu,lgsd)**2) for x in range(nact)] fitness.append(tmp) aliveAgents = [] Agents = [] outputFH.write("generation,strategy,nAgents,nInnovate,nObserve,nExploit,nRefine,totalPayoffs,avgLifespan,stdLifespan,medLifespan,maxLifespan\n") # Initialize stats class statsDict: Name = "statsDict" def __init__(self): self.aliveAgents = 0 self.innovate = 0 self.observe = 0
def zipfTime(self): i = random.zipf(1.3) return i
from numpy.random import zipf from math import ceil privacy_level = [] a=1.47 s = zipf(a, 50000) zipf_dis = (s/float(max(s)))*5 privacy_level = [x for x in zipf_dis] privacy_level.sort() privacy_level.reverse() for item in privacy_level: print int(ceil(item)) print len(privacy_level)
def zipfgen(): tok_it = _strseq() vocab = defaultdict(lambda : next(tok_it)) while True: yield vocab[zipf(2.0)]
def create_zipf_firm(self): ref = {} random.seed() x = zipf(a=4, size=self.num_firms) ref[self.name] = x return ref
# zipf distribution are used to sample data based in zipf's law. # zipf's law - in a collection the nth common term is 1/n times of the most common term. # E.g. 5th common word in english has occurs nearly 1/5th times as of the most used word. # it has two parameters # a = distribution parameter. # size - shape of returned array. from numpy import random import matplotlib.pyplot as plt import seaborn as sns arr1 = random.zipf(a=2, size=1000) print(arr1) sns.distplot(arr1[arr1 < 25], kde=False) # shows how many of 1000 values are below 25 plt.show()
Zipf Distribution se related aur kuch baate: 1. Zipf's law ko probability me kuch ayse dekha jata hai ki, yaha par kise event ki hone ki frequency (f) hoti hai aur uska rank (r) hota hai. 2. Iss law ko American linguist George Kingsly Zipf (1902-1950) ne diya tha. 3. Iss law ko unhone ne English me kisi bhi word ke aane ki frequecy ko janne ke liye iss law ko diya tha. Joki aaj bahot hi popular aur Machine Learning me bahot jada useful hai. 4. Wise hi Zipf ne 1949 me issi law me ye bhi claim kiya tha ki, desh me maujud sab se bada sahar, dusre sahar se doguna bada hai aur 3rd wala se tiguna bada hai and so on. Lekin ye law kewal wahin par fit batha iska use language me ya kcuh aur case me sahi fit nahi bathta hai. Jada jankari ke liye: 1. https://www.sciencedirect.com/topics/computer-science/zipf-distribution 2. https://www.nngroup.com/articles/zipf-curves-and-website-popularity/ 3. https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4176592/ 4. https://plus.maths.org/content/mystery-zipf ''' import numpy.random as r import matplotlib.pyplot as plt import seaborn as sns zipf = r.zipf(a=2, size=(1000)) # print('\n',zipf) # print('\n',zipf[zipf<10]);exit() sns.distplot(zipf, hist=False) plt.xlabel('Rank X') plt.ylabel('Frequencey Y') plt.title('Zipf Distribution') # plt.xlim(0,100) # plt.ylim(0,100) plt.show()
import unittest import random import sys from numpy.random import zipf sys.path.append("../") import ARC, CLOCK, LRU, LFU, LRU3, LRU10 from cache import Cache from common import Entry CACHESIZE = 500 NUMREQUESTS = 10000 key_alpha = 1.33 keydistribution = zipf(key_alpha, NUMREQUESTS) class TestAlgorithms(unittest.TestCase): def setUp(self): pass def test_algorithm(self, name=None): if name == None: return self.cache = Cache(name, CACHESIZE) self.assertEqual(str(self.cache.cache), name) self.assertEqual(self.cache.get("trausti"), None) self.assertEqual(self.cache.put("trausti", 100), 1) self.assertEqual(self.cache.get("trausti"), 100) for j in xrange(2000):