def generate_regular(size = 100, zipf_distribution = 2, rate = 1.0, seed = 0.0, output_filename = None):
    if(output_filename == None):
        output_filename = './workloads/regular_size' + str(size) + '_dist' + str(zipf_distribution) + '.csv'
    #get ips/ids from ethereum repo
    ip_file = open('./workloads/ips.txt', "r")
    id_file = open('./workloads/ids.txt', "r")
    rand.seed(seed)
    topics = random.zipf(a=zipf_distribution, size=size)#for topics
    t_next_req = 0.0 # time of next request
    with open(output_filename, 'w') as output_file:
        fieldnames = ['time', 'id', 'ip', 'topic', 'attack']
        dict_writer = csv.DictWriter(output_file, fieldnames=fieldnames)
        dict_writer.writeheader()
        for i in range(0, size):
            t_next_req += rand.expovariate(rate)
            record = {}
            ip = ip_file.readline().rstrip()
            iD = id_file.readline().rstrip()
            if(not ip or not iD):
                print("Not enough IPs/IDs in the files")
                exit(1)
            #record['time'] = int(1000*t_next_req)
            record['time'] = int(10*i)
            record['id'] = iD
            record['ip'] =ip
            record['topic'] = 't' + str(topics[i])
            record['attack'] = 0
            #print(record)
            dict_writer.writerow(record)
    print("Generated regular workload in", str(output_filename))
示例#2
0
    def __call__(self, options=None):
        options = self._parse_options(options)

        # decode distribution parameter
        alpha = self._decode(options['alpha'])

        val = zipf(alpha) - 1
        return val
示例#3
0
    def __call__(self, options=None):
        options = self._parse_options(options)

        # decode distribution parameter
        alpha = self._decode(options['alpha'])

        val = zipf(alpha) - 1
        return val
def randhouse(house):
    x = zipf(
        a=1.01 + random.random() / 2, size=1000
    )  # use zipf distribution with some randomness in the variability of the distribution, a = 1-2 because larger number of distribution makes one number too popular
    return (
        10 - (int(random.choice(x[x < 6])) * 2 - random.randint(0, 1)) -
        int(house / (len(houses) - 1) * 8) - random.randint(-1, 1)
    ) % 10 + 1.0  # return a number between 1-10, change popularity between houses so that some more are popular than others
示例#5
0
def get_companysizes(a, n, max=35000):
    """
    Draw the size of n companies from a Zipf distribution
    :param n: int, number of companies
    :param a: float, coefficient of zipf distribution
    :param max: float, maximum size, for Munich equal to BMW with roughly 35000 employees

    :return:
    """
    return npr.zipf(a, size=n)
def generate_first_descriptions(dictionary):
    # randomly generate a set of first descriptions
    first_descriptions = []

    for i in range(NUM_MUTANTS):
        first_desc = []

        # randomly appending words from the dictionary
        for j in range(DESCRIPTION_LEN):
            zipf_val = rnd.zipf(ZIPF_LAW_CONST)

            while zipf_val > len(dictionary):
                # just in case our zipf value is absurdly big
                zipf_val = rnd.zipf(ZIPF_LAW_CONST)

            first_desc.append(dictionary[int(zipf_val) - 1])

        first_descriptions.append(first_desc)

    return first_descriptions
示例#7
0
文件: Item.py 项目: dbiir/MiDBench
 def setZipfian(self, para_a, sequence, size, scale, a_change):
     for i in range(0, len(sequence)):
         sequence[i] = int(round(1.0 * sequence[i] * scale))
         # sequence[i] = 1.0 * sequence[i] * scale
     result = []
     count = 0
     while count < size:
         one = zipf(para_a)
         if one >= 1 and one <= len(sequence):
             result.append(sequence[one - 1])
             count += 1
     self.data = result
     self.distribution = 'zipfian'
def write_schedule_to (TCL) :
    TCL.write("# scheduling\n")
    for app in list_of_apps:
        if app[1] == "zipf":
            target_volume = app[4]
            current_volume = 0
            while current_volume < target_volume:
                volume = npr.zipf(1.5) % target_volume
                current_volume = current_volume + volume
                # random value between 0 and 4 minutes
                time = npr.rand() * 240
                TCL.write("$ns at "+ str(time) + " \"$app" + app[2] + app[3] + \
                        " send " + str(volume) + "\"\n")
        else: # app[1] == "onoff"
            # On/Off model : we just need to schedule the start
            time = npr.rand() * 240
            TCL.write("$ns at "+ str(time) + " \"$app" + app[2] + app[3] + \
                    " start\"\n")

    TCL.write("$ns at 300.0 \"finish\"\n")
    TCL.write("\n")
示例#9
0
def get_random_accesses(amount):
        return zipf(NumDocuments, amount)
示例#10
0
文件: utils.py 项目: kunlegiwa/MANGO
def zipf(size, params):
    try:
        return random.zipf(params['a'], size)
    except ValueError as e:
        exit(e)
def generate_attack_topic(size = 100, zipf_distribution = 2, topic_to_attack = 't11', attacker_ip_num = 3, attacker_id_num=10, rate_normal = 1.0, rate_attack = 10.0, seed = 0.0, output_filename = None):
    if(output_filename == None):
        output_filename = './workloads/attack_topic_size' + str(size) + '_dist' + str(zipf_distribution) + '.csv'
    #get ips/ids from ethereum repo
    ip_file = open('./workloads/ips.txt', "r")
    id_file = open('./workloads/ids.txt', "r")
    topics = random.zipf(a=zipf_distribution, size=size)#for topics

    attacker_ips = []
    for i in range(0, attacker_ip_num):
        num = int(255/attacker_ip_num * i)
        ip = str(num) + "." + str(num) + "."+ str(num) + "."+ str(num)
        attacker_ips.append(ip)
    
    attacker_ids = []
    for i in range(0, attacker_id_num):
        attacker_ids.append(''.join([str(i)]*20))
    print("attacker ips:", attacker_ips)
    print("attacker ids:", attacker_ids)

    rand.seed(seed)
    t_next_normal_req = rand.expovariate(rate_normal)  # time of next normal request
    t_next_attack_req = rand.expovariate(rate_attack) # time of next attack request
    time = 0.0
    attack = 0
    with open(output_filename, 'w') as output_file:
        fieldnames = ['time', 'id', 'ip', 'topic', 'attack']
        dict_writer = csv.DictWriter(output_file, fieldnames=fieldnames)
        dict_writer.writeheader()
        for i in range(0, size):
            if t_next_normal_req < t_next_attack_req:
                attack = 0
                time = t_next_normal_req
            elif t_next_normal_req > t_next_attack_req:
                attack = 1
                time = t_next_attack_req

            record = {}
            if( attack == 0 ):
                ip = ip_file.readline().rstrip()
                iD = id_file.readline().rstrip()
                if(not ip or not iD):
                    print("Not enough IPs/IDs in the files")
                    exit(1)
                topic = 't' + str(topics[i])
            else: # attack == 1
                ip = attacker_ips[i % attacker_ip_num]
                iD = attacker_ids[i % attacker_id_num]
                topic = topic_to_attack

            record['time'] = int(10*i)
            record['id'] = iD
            record['ip'] =ip
            record['topic'] = topic
            record['attack'] = attack
            #print(record)
            dict_writer.writerow(record)
        
            if time == t_next_normal_req:
                t_next_normal_req += rand.expovariate(rate_normal)
            if time == t_next_attack_req:
                t_next_attack_req += rand.expovariate(rate_attack)

    print("Generated regular workload in", str(output_filename))
示例#12
0
文件: lang.py 项目: afcarl/nerv
def zipfgen():
    tok_it = _strseq()
    vocab = defaultdict(lambda: next(tok_it))
    while True:
        yield vocab[zipf(2.0)]
示例#13
0
def np_zipf_distribution():
    x = random.zipf(a=2, size=(2, 3))
    print(x)
    x = random.zipf(a=2, size=1000)
    sns.distplot(x[x < 10], kde=False)
    plt.show()
示例#14
0
 def _rvs(self, a):
     return mtrand.zipf(a, size=self._size)
示例#15
0
 def _rvs(self, a):
     return mtrand.zipf(a, size=self._size)
示例#16
0
"""
Zipf's law: In a collection the nth common term is 1/n times of the most common term. 
e.g. 5th common word in english has occur nearly 1/5 th times as most of the most used word.
"""

# It has two parameters.

# a - distribution parameter

# size - the shape of the returned array

# draw out a sample for zipf distribution with distribution parameter 2 with size 2x3

from numpy import random

x = random.zipf(a=2, size=(2, 3))

print(x)

# visualization of zipf distribution

# sample 1000 points but plotting only ones with value < 10 for more meanngful chart.

# from numpy import random

import matplotlib.pyplot as plt
import seaborn as sns

x = random.zipf(a=2, size=1000)
sns.distplot(x[x < 10], kde=False)
示例#17
0
            pattern_meas = re.compile(r"^(\d+)\s+(\d+)\s+([-]?\d+)$",
                                      re.VERBOSE | re.MULTILINE)
        if file_name.split(".")[0] == 'amazon':
            pattern_meas = re.compile(r"^(\d+)\s+(\d+)",
                                      re.VERBOSE | re.MULTILINE)
        for match in pattern_meas.finditer(text):
            nodes_list.append("%s" % int(match.group(1)))
            nodes_list.append("%s" % int(match.group(2)))

for node in nodes_list:
    count = frequency.get(node, 0)
    frequency[node] = count + 1
node_ocurr = []
s = []
for key, value in reversed(sorted(frequency.items(), key=itemgetter(1))):
    node_ocurr.append([key, value / 2])
    s.append(zipf(2., value / 2))

a = 2
s = zipf(a, 10)

result = (s / float(max(s))) * 5

for i in result:
    print(result[i])
    print s[i]
    print '------'

print min(s), max(s)
print min(result), max(result)
示例#18
0
 def calculaTiempoSesion(self):
     rho = 2
     a = rand.zipf(rho)
     return a
示例#19
0
        print "  Payoffs: ",
        pprint.pprint(self.historyPayoffs)
        print "  Demes: ",
        pprint.pprint(self.historyDemes)
        print "Born: " + str(self.born)
        print "Rounds Alive: " + str(self.roundsAlive)
        print "Times observed: " + str(self.nObserved)
        print "Current Deme: " + str(self.currentDeme)
        print "Points Earned: " + str(self.pointsEarned)
        print "Number of Offspring: " + str(self.nOffspring)

# Initialize structures in model
fitness = [] # fitness landscape
for i in range(3):
#    tmp = [round(2*random.expovariate(lambd)**2) for x in range(nact)]
    tmp = [round(2*npr.zipf(alpha)) for x in range(nact)]
#    tmp = [round(2*random.lognormvariate(lgmu,lgsd)**2) for x in range(nact)]
    fitness.append(tmp)
aliveAgents = []
Agents = []

outputFH.write("generation,strategy,nAgents,nInnovate,nObserve,nExploit,nRefine,totalPayoffs,avgLifespan,stdLifespan,medLifespan,maxLifespan\n")

# Initialize stats
class statsDict:
    Name = "statsDict"

    def __init__(self):
        self.aliveAgents = 0
        self.innovate = 0
        self.observe = 0
示例#20
0
 def zipfTime(self):
     i = random.zipf(1.3)
     return i
示例#21
0
from numpy.random import zipf
from math import ceil

privacy_level = []
a=1.47
s = zipf(a, 50000)
zipf_dis = (s/float(max(s)))*5

privacy_level = [x for x in zipf_dis]
privacy_level.sort()
privacy_level.reverse()
for item in privacy_level:
    print int(ceil(item))
print len(privacy_level)
示例#22
0
文件: lang.py 项目: pdsujnow/nerv
def zipfgen():
    tok_it = _strseq()
    vocab = defaultdict(lambda : next(tok_it))
    while True:
        yield vocab[zipf(2.0)]
示例#23
0
 def create_zipf_firm(self):
     ref = {}
     random.seed()
     x = zipf(a=4, size=self.num_firms)
     ref[self.name] = x
     return ref
示例#24
0
# zipf distribution are used to sample data based in zipf's law.
# zipf's law - in a collection the nth common term is 1/n times of the most common term.
# E.g. 5th common word in english has occurs nearly 1/5th times as of the most used word.
# it has two parameters
# a = distribution parameter.
# size - shape of returned array.
from numpy import random
import matplotlib.pyplot as plt
import seaborn as sns

arr1 = random.zipf(a=2, size=1000)
print(arr1)
sns.distplot(arr1[arr1 < 25],
             kde=False)  # shows how many of 1000 values are below 25
plt.show()
示例#25
0
Zipf Distribution se related aur kuch baate:

1. Zipf's law ko probability me kuch ayse dekha jata hai ki, yaha par kise event ki hone ki frequency (f) hoti hai aur uska rank (r) hota hai.
2. Iss law ko American linguist George Kingsly Zipf (1902-1950) ne diya tha.
3. Iss law ko unhone ne English me kisi bhi word ke aane ki frequecy ko janne ke liye iss law ko diya tha. Joki aaj bahot hi popular aur Machine Learning me bahot jada useful hai.
4. Wise hi Zipf ne 1949 me issi law me ye bhi claim kiya tha ki, desh me maujud sab se bada sahar, dusre sahar se doguna bada hai aur 3rd wala se tiguna bada hai and so on. Lekin ye law kewal wahin par fit batha iska use language me ya kcuh aur case me sahi fit nahi bathta hai.

Jada jankari ke liye:
1. https://www.sciencedirect.com/topics/computer-science/zipf-distribution
2. https://www.nngroup.com/articles/zipf-curves-and-website-popularity/
3. https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4176592/
4. https://plus.maths.org/content/mystery-zipf
'''

import numpy.random as r
import matplotlib.pyplot as plt
import seaborn as sns

zipf = r.zipf(a=2, size=(1000))
# print('\n',zipf)
# print('\n',zipf[zipf<10]);exit()
sns.distplot(zipf, hist=False)

plt.xlabel('Rank X')
plt.ylabel('Frequencey Y')
plt.title('Zipf Distribution')
# plt.xlim(0,100)
# plt.ylim(0,100)

plt.show()
示例#26
0
import unittest
import random
import sys

from numpy.random import zipf

sys.path.append("../")
import ARC, CLOCK, LRU, LFU, LRU3, LRU10
from cache import Cache
from common import Entry


CACHESIZE = 500
NUMREQUESTS = 10000
key_alpha = 1.33
keydistribution = zipf(key_alpha, NUMREQUESTS)

class TestAlgorithms(unittest.TestCase):
    def setUp(self):
        pass

    def test_algorithm(self, name=None):
        if name == None:
            return
        self.cache = Cache(name, CACHESIZE)
        self.assertEqual(str(self.cache.cache), name)
        self.assertEqual(self.cache.get("trausti"), None)
        self.assertEqual(self.cache.put("trausti", 100), 1)
        self.assertEqual(self.cache.get("trausti"), 100)

        for j in xrange(2000):