Python zipf示例，numpy.random.zipf Python示例

示例#1

0

显示文件

文件： generate_table_workloads.py 项目： datahop/p2p-service-discovery

def generate_regular(size = 100, zipf_distribution = 2, rate = 1.0, seed = 0.0, output_filename = None):
    if(output_filename == None):
        output_filename = './workloads/regular_size' + str(size) + '_dist' + str(zipf_distribution) + '.csv'
    #get ips/ids from ethereum repo
    ip_file = open('./workloads/ips.txt', "r")
    id_file = open('./workloads/ids.txt', "r")
    rand.seed(seed)
    topics = random.zipf(a=zipf_distribution, size=size)#for topics
    t_next_req = 0.0 # time of next request
    with open(output_filename, 'w') as output_file:
        fieldnames = ['time', 'id', 'ip', 'topic', 'attack']
        dict_writer = csv.DictWriter(output_file, fieldnames=fieldnames)
        dict_writer.writeheader()
        for i in range(0, size):
            t_next_req += rand.expovariate(rate)
            record = {}
            ip = ip_file.readline().rstrip()
            iD = id_file.readline().rstrip()
            if(not ip or not iD):
                print("Not enough IPs/IDs in the files")
                exit(1)
            #record['time'] = int(1000*t_next_req)
            record['time'] = int(10*i)
            record['id'] = iD
            record['ip'] =ip
            record['topic'] = 't' + str(topics[i])
            record['attack'] = 0
            #print(record)
            dict_writer.writerow(record)
    print("Generated regular workload in", str(output_filename))

示例#2

0

显示文件

文件： operators.py 项目： CaptainPatate/mtools

    def __call__(self, options=None):
        options = self._parse_options(options)

        # decode distribution parameter
        alpha = self._decode(options['alpha'])

        val = zipf(alpha) - 1
        return val

示例#3

0

显示文件

文件： operators.py 项目： slathropacc/mtools

    def __call__(self, options=None):
        options = self._parse_options(options)

        # decode distribution parameter
        alpha = self._decode(options['alpha'])

        val = zipf(alpha) - 1
        return val

示例#4

0

显示文件

文件： generate_ballots.py 项目： ChoateProgrammingUnion/housing-optimization

def randhouse(house):
    x = zipf(
        a=1.01 + random.random() / 2, size=1000
    )  # use zipf distribution with some randomness in the variability of the distribution, a = 1-2 because larger number of distribution makes one number too popular
    return (
        10 - (int(random.choice(x[x < 6])) * 2 - random.randint(0, 1)) -
        int(house / (len(houses) - 1) * 8) - random.randint(-1, 1)
    ) % 10 + 1.0  # return a number between 1-10, change popularity between houses so that some more are popular than others

示例#5

0

显示文件

文件： network_setup.py 项目： sisyga/covidnetwork

def get_companysizes(a, n, max=35000):
    """
    Draw the size of n companies from a Zipf distribution
    :param n: int, number of companies
    :param a: float, coefficient of zipf distribution
    :param max: float, maximum size, for Munich equal to BMW with roughly 35000 employees

    :return:
    """
    return npr.zipf(a, size=n)

示例#6

0

显示文件

文件： evolving_group_descriptions.py 项目： colinparsonsme/cult-experiments

def generate_first_descriptions(dictionary):
    # randomly generate a set of first descriptions
    first_descriptions = []

    for i in range(NUM_MUTANTS):
        first_desc = []

        # randomly appending words from the dictionary
        for j in range(DESCRIPTION_LEN):
            zipf_val = rnd.zipf(ZIPF_LAW_CONST)

            while zipf_val > len(dictionary):
                # just in case our zipf value is absurdly big
                zipf_val = rnd.zipf(ZIPF_LAW_CONST)

            first_desc.append(dictionary[int(zipf_val) - 1])

        first_descriptions.append(first_desc)

    return first_descriptions

示例#7

0

显示文件

文件： Item.py 项目： dbiir/MiDBench

 def setZipfian(self, para_a, sequence, size, scale, a_change):
     for i in range(0, len(sequence)):
         sequence[i] = int(round(1.0 * sequence[i] * scale))
         # sequence[i] = 1.0 * sequence[i] * scale
     result = []
     count = 0
     while count < size:
         one = zipf(para_a)
         if one >= 1 and one <= len(sequence):
             result.append(sequence[one - 1])
             count += 1
     self.data = result
     self.distribution = 'zipfian'

示例#8

0

显示文件

文件： generate_tcl.py 项目： lukhio/network-simulations

def write_schedule_to (TCL) :
    TCL.write("# scheduling\n")
    for app in list_of_apps:
        if app[1] == "zipf":
            target_volume = app[4]
            current_volume = 0
            while current_volume < target_volume:
                volume = npr.zipf(1.5) % target_volume
                current_volume = current_volume + volume
                # random value between 0 and 4 minutes
                time = npr.rand() * 240
                TCL.write("$ns at "+ str(time) + " \"$app" + app[2] + app[3] + \
                        " send " + str(volume) + "\"\n")
        else: # app[1] == "onoff"
            # On/Off model : we just need to schedule the start
            time = npr.rand() * 240
            TCL.write("$ns at "+ str(time) + " \"$app" + app[2] + app[3] + \
                    " start\"\n")

    TCL.write("$ns at 300.0 \"finish\"\n")
    TCL.write("\n")

示例#9

0

显示文件

文件： random_access.py 项目： kaimast/documentbench

def get_random_accesses(amount):
        return zipf(NumDocuments, amount)

示例#10

0

显示文件

文件： utils.py 项目： kunlegiwa/MANGO

def zipf(size, params):
    try:
        return random.zipf(params['a'], size)
    except ValueError as e:
        exit(e)

示例#11

0

显示文件

文件： generate_table_workloads.py 项目： datahop/p2p-service-discovery

def generate_attack_topic(size = 100, zipf_distribution = 2, topic_to_attack = 't11', attacker_ip_num = 3, attacker_id_num=10, rate_normal = 1.0, rate_attack = 10.0, seed = 0.0, output_filename = None):
    if(output_filename == None):
        output_filename = './workloads/attack_topic_size' + str(size) + '_dist' + str(zipf_distribution) + '.csv'
    #get ips/ids from ethereum repo
    ip_file = open('./workloads/ips.txt', "r")
    id_file = open('./workloads/ids.txt', "r")
    topics = random.zipf(a=zipf_distribution, size=size)#for topics

    attacker_ips = []
    for i in range(0, attacker_ip_num):
        num = int(255/attacker_ip_num * i)
        ip = str(num) + "." + str(num) + "."+ str(num) + "."+ str(num)
        attacker_ips.append(ip)
    
    attacker_ids = []
    for i in range(0, attacker_id_num):
        attacker_ids.append(''.join([str(i)]*20))
    print("attacker ips:", attacker_ips)
    print("attacker ids:", attacker_ids)

    rand.seed(seed)
    t_next_normal_req = rand.expovariate(rate_normal)  # time of next normal request
    t_next_attack_req = rand.expovariate(rate_attack) # time of next attack request
    time = 0.0
    attack = 0
    with open(output_filename, 'w') as output_file:
        fieldnames = ['time', 'id', 'ip', 'topic', 'attack']
        dict_writer = csv.DictWriter(output_file, fieldnames=fieldnames)
        dict_writer.writeheader()
        for i in range(0, size):
            if t_next_normal_req < t_next_attack_req:
                attack = 0
                time = t_next_normal_req
            elif t_next_normal_req > t_next_attack_req:
                attack = 1
                time = t_next_attack_req

            record = {}
            if( attack == 0 ):
                ip = ip_file.readline().rstrip()
                iD = id_file.readline().rstrip()
                if(not ip or not iD):
                    print("Not enough IPs/IDs in the files")
                    exit(1)
                topic = 't' + str(topics[i])
            else: # attack == 1
                ip = attacker_ips[i % attacker_ip_num]
                iD = attacker_ids[i % attacker_id_num]
                topic = topic_to_attack

            record['time'] = int(10*i)
            record['id'] = iD
            record['ip'] =ip
            record['topic'] = topic
            record['attack'] = attack
            #print(record)
            dict_writer.writerow(record)
        
            if time == t_next_normal_req:
                t_next_normal_req += rand.expovariate(rate_normal)
            if time == t_next_attack_req:
                t_next_attack_req += rand.expovariate(rate_attack)

    print("Generated regular workload in", str(output_filename))

示例#12

0

显示文件

文件： lang.py 项目： afcarl/nerv

def zipfgen():
    tok_it = _strseq()
    vocab = defaultdict(lambda: next(tok_it))
    while True:
        yield vocab[zipf(2.0)]

示例#13

0

显示文件

def np_zipf_distribution():
    x = random.zipf(a=2, size=(2, 3))
    print(x)
    x = random.zipf(a=2, size=1000)
    sns.distplot(x[x < 10], kde=False)
    plt.show()

示例#14

0

显示文件

文件： _discrete_distns.py 项目： danaon/scipy

 def _rvs(self, a):
     return mtrand.zipf(a, size=self._size)

示例#15

0

显示文件

文件： _discrete_distns.py 项目： sugiki/scipy

 def _rvs(self, a):
     return mtrand.zipf(a, size=self._size)

示例#16

0

显示文件

"""
Zipf's law: In a collection the nth common term is 1/n times of the most common term. 
e.g. 5th common word in english has occur nearly 1/5 th times as most of the most used word.
"""

# It has two parameters.

# a - distribution parameter

# size - the shape of the returned array

# draw out a sample for zipf distribution with distribution parameter 2 with size 2x3

from numpy import random

x = random.zipf(a=2, size=(2, 3))

print(x)

# visualization of zipf distribution

# sample 1000 points but plotting only ones with value < 10 for more meanngful chart.

# from numpy import random

import matplotlib.pyplot as plt
import seaborn as sns

x = random.zipf(a=2, size=1000)
sns.distplot(x[x < 10], kde=False)

示例#17

0

显示文件

文件： zipfnumber.py 项目： alisheykhi/SocialPDA

            pattern_meas = re.compile(r"^(\d+)\s+(\d+)\s+([-]?\d+)$",
                                      re.VERBOSE | re.MULTILINE)
        if file_name.split(".")[0] == 'amazon':
            pattern_meas = re.compile(r"^(\d+)\s+(\d+)",
                                      re.VERBOSE | re.MULTILINE)
        for match in pattern_meas.finditer(text):
            nodes_list.append("%s" % int(match.group(1)))
            nodes_list.append("%s" % int(match.group(2)))

for node in nodes_list:
    count = frequency.get(node, 0)
    frequency[node] = count + 1
node_ocurr = []
s = []
for key, value in reversed(sorted(frequency.items(), key=itemgetter(1))):
    node_ocurr.append([key, value / 2])
    s.append(zipf(2., value / 2))

a = 2
s = zipf(a, 10)

result = (s / float(max(s))) * 5

for i in result:
    print(result[i])
    print s[i]
    print '------'

print min(s), max(s)
print min(result), max(result)

示例#18

0

显示文件

文件： estadistica.py 项目： vtomasr5/distribuits

 def calculaTiempoSesion(self):
     rho = 2
     a = rand.zipf(rho)
     return a

示例#19

0

显示文件

文件： tournament.py 项目： winteram/Social-Learning

        print "  Payoffs: ",
        pprint.pprint(self.historyPayoffs)
        print "  Demes: ",
        pprint.pprint(self.historyDemes)
        print "Born: " + str(self.born)
        print "Rounds Alive: " + str(self.roundsAlive)
        print "Times observed: " + str(self.nObserved)
        print "Current Deme: " + str(self.currentDeme)
        print "Points Earned: " + str(self.pointsEarned)
        print "Number of Offspring: " + str(self.nOffspring)

# Initialize structures in model
fitness = [] # fitness landscape
for i in range(3):
#    tmp = [round(2*random.expovariate(lambd)**2) for x in range(nact)]
    tmp = [round(2*npr.zipf(alpha)) for x in range(nact)]
#    tmp = [round(2*random.lognormvariate(lgmu,lgsd)**2) for x in range(nact)]
    fitness.append(tmp)
aliveAgents = []
Agents = []

outputFH.write("generation,strategy,nAgents,nInnovate,nObserve,nExploit,nRefine,totalPayoffs,avgLifespan,stdLifespan,medLifespan,maxLifespan\n")

# Initialize stats
class statsDict:
    Name = "statsDict"

    def __init__(self):
        self.aliveAgents = 0
        self.innovate = 0
        self.observe = 0

示例#20

0

显示文件

 def zipfTime(self):
     i = random.zipf(1.3)
     return i

示例#21

0

显示文件

文件： zipf_test.py 项目： alisheykhi/SocialPDA

from numpy.random import zipf
from math import ceil

privacy_level = []
a=1.47
s = zipf(a, 50000)
zipf_dis = (s/float(max(s)))*5

privacy_level = [x for x in zipf_dis]
privacy_level.sort()
privacy_level.reverse()
for item in privacy_level:
    print int(ceil(item))
print len(privacy_level)

示例#22

0

显示文件

文件： lang.py 项目： pdsujnow/nerv

def zipfgen():
    tok_it = _strseq()
    vocab = defaultdict(lambda : next(tok_it))
    while True:
        yield vocab[zipf(2.0)]

示例#23

0

显示文件

 def create_zipf_firm(self):
     ref = {}
     random.seed()
     x = zipf(a=4, size=self.num_firms)
     ref[self.name] = x
     return ref

示例#24

0

显示文件

# zipf distribution are used to sample data based in zipf's law.
# zipf's law - in a collection the nth common term is 1/n times of the most common term.
# E.g. 5th common word in english has occurs nearly 1/5th times as of the most used word.
# it has two parameters
# a = distribution parameter.
# size - shape of returned array.
from numpy import random
import matplotlib.pyplot as plt
import seaborn as sns

arr1 = random.zipf(a=2, size=1000)
print(arr1)
sns.distplot(arr1[arr1 < 25],
             kde=False)  # shows how many of 1000 values are below 25
plt.show()

示例#25

0

显示文件

Zipf Distribution se related aur kuch baate:

1. Zipf's law ko probability me kuch ayse dekha jata hai ki, yaha par kise event ki hone ki frequency (f) hoti hai aur uska rank (r) hota hai.
2. Iss law ko American linguist George Kingsly Zipf (1902-1950) ne diya tha.
3. Iss law ko unhone ne English me kisi bhi word ke aane ki frequecy ko janne ke liye iss law ko diya tha. Joki aaj bahot hi popular aur Machine Learning me bahot jada useful hai.
4. Wise hi Zipf ne 1949 me issi law me ye bhi claim kiya tha ki, desh me maujud sab se bada sahar, dusre sahar se doguna bada hai aur 3rd wala se tiguna bada hai and so on. Lekin ye law kewal wahin par fit batha iska use language me ya kcuh aur case me sahi fit nahi bathta hai.

Jada jankari ke liye:
1. https://www.sciencedirect.com/topics/computer-science/zipf-distribution
2. https://www.nngroup.com/articles/zipf-curves-and-website-popularity/
3. https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4176592/
4. https://plus.maths.org/content/mystery-zipf
'''

import numpy.random as r
import matplotlib.pyplot as plt
import seaborn as sns

zipf = r.zipf(a=2, size=(1000))
# print('\n',zipf)
# print('\n',zipf[zipf<10]);exit()
sns.distplot(zipf, hist=False)

plt.xlabel('Rank X')
plt.ylabel('Frequencey Y')
plt.title('Zipf Distribution')
# plt.xlim(0,100)
# plt.ylim(0,100)

plt.show()

示例#26

0

显示文件

文件： test_algorithms.py 项目： trauzti/mimir

import unittest
import random
import sys

from numpy.random import zipf

sys.path.append("../")
import ARC, CLOCK, LRU, LFU, LRU3, LRU10
from cache import Cache
from common import Entry


CACHESIZE = 500
NUMREQUESTS = 10000
key_alpha = 1.33
keydistribution = zipf(key_alpha, NUMREQUESTS)

class TestAlgorithms(unittest.TestCase):
    def setUp(self):
        pass

    def test_algorithm(self, name=None):
        if name == None:
            return
        self.cache = Cache(name, CACHESIZE)
        self.assertEqual(str(self.cache.cache), name)
        self.assertEqual(self.cache.get("trausti"), None)
        self.assertEqual(self.cache.put("trausti", 100), 1)
        self.assertEqual(self.cache.get("trausti"), 100)

        for j in xrange(2000):