Python zipf 예제들, zipf.zipf Python 예제들

예제 #1

0

파일 보기

파일: cache_sim.py 프로젝트: ibismark/nwSimulator

def makeEvent(tranCount):
	tev, f, t, a = [], '', '', 0	
	e = zipf.zipf(contentNum, tranCount+1)

	for t in range(tranCount):
		r = random.random()
		t = 't%04d' %t
		a = math.ceil((r*3600)*10**6)
		r = random.random()
		f = 'f' + str(e.pop(0))
		tev.append([t, f, a])

	return tev

예제 #2

0

파일 보기

파일: generate.py 프로젝트: iandr413/vagrant-spark

def fake_phoenix_timeseries(scale=1, child=0):
	num_days = 7 * 8
	record_interval_seconds = 15
	records_per_hour = 3600 / record_interval_seconds
	records_per_file = 24 * records_per_hour * num_days
	num_devices = scale
	total_records = records_per_hour * num_days * 24 * num_devices
	total_files = total_records / records_per_file
	one_block  = records_per_hour * num_devices
	start_rows =  child    * records_per_file
	end_rows   = (child+1) * records_per_file

	if (child >= total_files):
		print "Argument mismatch: child too large for scale"
		return 1

	table = open("timeseries.%06d.txt" % child, "w")
	tsmin = datetime(2016, 1, 1, 0, 0)
	start_timestamp = time.mktime(tsmin.timetuple())

	possible_tags = [ 'AAA', 'BBB', 'CCC', 'DDD', 'EEE' ]

	random.seed(start_rows)
	faker.seed(start_rows)
	num_tags_generator = zipf(4, 5)

	# For random walks.
	walk_values = [ random.random() for i in xrange(start_rows, end_rows) ]
	base_hr = 70
	hr = base_hr
	stock = 100

	for i in xrange(start_rows, end_rows):
		record = []
		hours_in = i / one_block
		block_offset = i - hours_in * one_block
		device_id = block_offset / records_per_hour
		device_offset = block_offset - device_id * records_per_hour
		this_time = start_timestamp + ( hours_in * 60 * 60 ) + device_offset * record_interval_seconds

		record.append(device_id)
		record.append(str(this_time))

		# Move the heart rate, but not too far.
		hr_delta = int( ((walk_values[i-start_rows] - 0.5) / 0.25) ** 3 )
		distance = base_hr - hr
		if abs(distance) > 10:
			hr_delta += distance / abs(distance)
		hr += hr_delta
		record.append("%d" % hr)

		# Temperature random around 20 plus a component for time of day.
		temp = random.normalvariate(20, 0.1)
		offset = random.normalvariate( math.sin(((this_time % 86400) / 86400) * math.pi) * 5, 0.1 )
		record.append("%0.2f" % (temp + offset))

		# Load average (float, between 0 and 10, gamma distribution)
		record.append("%0.2f" % random.gammavariate(9, 0.3))

		# Stock ticker price (float, random walk > 1, step 0.01)
		stock_delta = int( ((walk_values[i-start_rows] - 0.5) / 0.1) ** 3 )
		stock += 0.01 * stock_delta
		if stock < 1:
			stock = 1
		record.append("%0.2f" % stock)

		record.append(faker.boolean())
		tags = []
		num_tags = num_tags_generator.next() - 2
		for j in range(0, num_tags):
			tags.append(random.choice(possible_tags))
		record.append(",".join(tags))
		write_rows(table, [record])

예제 #3

0

파일 보기

파일: generate.py 프로젝트: iandr413/vagrant-spark

def fake_omniture(scale=1, child=0):
	if (child >= scale):
		print "Argument mismatch: child too large for scale"
		return 1

	output = open("fake_weblog.%06d.txt" % child, "w")

	random.seed(child)
	zipf_generator = zipf(15, 2.5)

	# Generate data in the week of 2016-03-01 + 7 days * child
	date_min = datetime(2016, 03, 01 + (7 * child), 0, 0)
	date_max = datetime(2016, 03, 01 + (7 * child) + 6, 23, 59)

	# Categories with base weights.
	categories = [
		("accessories", 0.1),
		("automotive", 0.15),
		("books", 0.2),
		("clothing", 0.3),
		("computers", 0.4),
		("electronics", 0.5),
		("games", 0.6),
		("grocery", 0.65),
		("handbags", 0.7),
		("home&garden", 0.75),
		("movies", 0.8),
		("outdoors", 0.85),
		("shoes", 0.95),
		("tools", 1.0)
	]

	# Generate 7 random promos for the week.
	promotions = dict((x, (random.choice(categories)[0], random.uniform(0.03, 0.10))) for x in range(0, 7))

	# Zip code history runs.
	zip_code_history = {}

	# Age grouping.
	age_groups = [ (18, "18-25"), (26, "26-35"), (35, "35-50"), (51, "50+") ]

	for i in xrange(0, 500000):
		date_time = faker.date_time_between(start_date=date_min, end_date=date_max)
		offset = date_time - date_min

		state = faker.state_abbr()

		# "Daily Deal" check.
		promo_id = offset.days
		promotion = promotions[promo_id]
		promo_name = ""
		promo_tag = "{0}-{1}".format(promotion[0], promo_id)
		if random.random() < promotion[1]:
			promo_name = promo_tag
		else:
			promotion = None

		# Referrer ID
		referrer_id = "search"

		# Select a category.
		# If there is a promotion, use its category.
		if promotion != None:
			category = promotion[0]
		else:
			value = random.random()
			fuzz_factor = (random.random() - 0.5) / 30

			i = 0
			prob = categories[i][1] + fuzz_factor
			while prob <= value and i <= len(categories):
				i += 1
				prob = categories[i][1]
			category = categories[i][0]

		# If from a promo, 75% chance of a referring site.
		# Zipfian distribution of referrers within this child bucket.
		# XXX: Need to switch this to a checksum.
		if promotion and random.random() < 0.75:
			# Mix up the offsets a bit based on promo tag.
			shuffle_seed = (sum([ math.sqrt(ord(x)) for x in promo_tag ]) % 99) / 100.0
			ids = range(1, 20)
			random.shuffle(ids, lambda: shuffle_seed)
			referrer_index = zipf_generator.next() - 1
			referrer = ids[referrer_index]
			referrer_id = "{0}-partnerid-{1}".format(promo_tag, referrer)
		elif random.random() < 0.30:
			# Idea here is offsets are shuffled based on category.
			shuffle_seed = (sum([ math.sqrt(ord(x)) for x in category ]) % 99) / 100.0
			ids = range(1, 20)
			random.shuffle(ids, lambda: shuffle_seed)
			referrer_index = zipf_generator.next() - 1
			referrer = ids[referrer_index]
			referrer_id = "partnerid-{0}".format(referrer)

		# Zip code. 70% chance of re-using the old zip code within this category.
		if category not in zip_code_history or random.random() > 0.7:
			zip_code_history[category] = faker.postcode()[0:4] + "0"
		zip_code = zip_code_history[category]

		# Age and sex.
		if category == "handbags":
			if promo_name != "" and random.random() > 0.5:
				age = int(random.gammavariate(5, 1) + 30)
			else:
				age = int(random.gammavariate(5, 4) + 18)
			sex = "M"
			if random.random() < 0.85:
				sex = "F"
		elif category == "accessories" or category == "shoes":
			if promo_name != "" and random.random() > 0.5:
				age = int(random.gammavariate(5, 1) + 30)
			else:
				age = int(random.gammavariate(5, 5) + 18)
			sex = "M"
			if random.random() < 0.75:
				sex = "F"
		elif category == "grocery":
			age = int(random.gammavariate(5, 3) + 18)
			sex = "M"
			if random.random() < 0.5:
				sex = "F"
		elif category == "books":
			age = int(random.gammavariate(5, 2) + 40)
			sex = "M"
			if random.random() < 0.8:
				sex = "F"
		elif category == "games" or category == "electronics":
			if promo_name != "" and random.random() > 0.5:
				age = int(random.gammavariate(5, 1) + 18)
			else:
				age = int(random.gammavariate(5, 2) + 18)
			sex = "M"
			if random.random() < 0.3:
				sex = "F"
		elif category == "computers" or category == "outdoors":
			age = int(random.gammavariate(5, 2) + 18)
			sex = "M"
			if random.random() < 0.3:
				sex = "F"
		elif category == "movies" or category == "clothing":
			if promo_name != "" and random.random() > 0.5:
				age = int(random.gammavariate(5, 1) + 30)
			else:
				age = int(random.gammavariate(5, 4) + 18)
			sex = "M"
			if random.random() < 0.5:
				sex = "F"
		elif category == "home&garden":
			age = int(random.gammavariate(5, 2) + 30)
			sex = "M"
			if random.random() < 0.5:
				sex = "F"
		elif category == "automotive" or category == "tools":
			age = int(random.gammavariate(5, 3) + 18)
			sex = "M"
			if random.random() < 0.10:
				sex = "F"

		is_promo = "0" if promo_name == "" else "1"
		age_group = [ x[1] for x in age_groups if x[0] <= age ][-1]

		record = [ str(date_time).replace(" ", "T"), state, category,
		    str(age), sex, promo_name, referrer_id, zip_code, is_promo, age_group ]
		output.write('|'.join(record) + "\n")

예제 #4

0

파일 보기

파일: generate.py 프로젝트: iandr413/vagrant-spark

def fake_accounts(scale=1, child=0):
	global earliest_date

	account_type = namedtuple('AccountType', ['account_code', 'account_type', 'account_subtype', 'account_subtype2', 'effective_date'])
	transaction_type = namedtuple('TransactionType', ['transaction_code', 'transaction_type'])
	account_types = parse_into(path_join(basedir, "../data/account_types.dat"), account_type)
	transaction_types = parse_into(path_join(basedir, "../data/transaction_types.dat"), transaction_type) 
	customer_types = ["corporate", "individual"]

	start_rows = 0
	end_rows = 100
	child = 0
	scale = 1
	if (child >= scale):
		print "Argument mismatch: child cannot be greater than scale"
		return 1
	if (scale > 1):
		# the distribution below corresponds to approx 100kb per customer
		start_rows = child*1024*100
		end_rows = (child+1)*1024*100
	max_accounts = 13
	max_txns = 20
	reseed_split = 100
	z1 = None
	z2 = None
	total = 0

	fname = lambda t : "%s.dat.%d" % (t, child)

	tbl_customers = open(fname("customers"), "w")
	tbl_accounts = open(fname("accounts"), "w")
	tbl_c_accounts = open(fname("customer_accounts"), "w")
	tbl_txns = open(fname("transactions"), "w")
	totals = 0
	if child == 0:
		write_rows(open(fname("account_types"), "w"), map(list, account_types))
		write_rows(open(fname("transaction_types"), "w"), map(list, transaction_types))
	for i in xrange(start_rows, end_rows):
		# reseed every 100 rows so that we can split the gen
		if (i % reseed_split == 0):
			random.seed(i)
			faker.seed(i)
			z1 = zipf(max_accounts, 1)
			z2 = zipf(max_txns, 1.1)
		edate = faker.date_time_between(start_date=earliest_date).strftime("%Y-%m-%d")
		# Customer has > 1 account, account as > 1 txn
		c_s = customer_accounts(i, edate)
		# one account in the same month
		zv = z1.next();
		a_s = [account(max_accounts*i+j, edate, zv == 1) for (k,j) in enumerate(xrange(0,zv-1))]
		# customer_id + first col is account_id, last col is effective_date
		ac_s = [[c_s[0], a[0], a[-1]] for a in a_s]
		# one txn in the same month
		zv = z2.next();
		t_s = [transaction(max_accounts*i+max_txns*j+k, a[1], a[-1], zv == 1) for (j,a) in enumerate(ac_s) for k in xrange(0, zv-1)]
		totals += len(t_s) 
		write_rows(tbl_customers, [c_s])
		write_rows(tbl_accounts, a_s)
		write_rows(tbl_c_accounts, ac_s)
		write_rows(tbl_txns, t_s)
		#print "Written %d transactions for %d customers" % (totals, i-start_rows)
	return 0

예제 #5

0

파일 보기

파일: Center.py 프로젝트: yiwei01/CNS_Final_Project

	parser.add_argument('-p1', type=str, default='kRR', metavar='good_protocol', help='protocol of good guys, \'kRR\' or \'OUE\', default = \'kRR\'')
	parser.add_argument('-m', type=int, default=3, metavar='m', help='number of bad guys, default = 3')
	# parser.add_argument('-p2', type=str, default='MGA', metavar='bad_protocol',help='protocol of bad guys')
	parser.add_argument('-d', type=int, default=10, help='size of domain, default = 10')
	parser.add_argument('--promote', type=str, default="3,4,5", metavar='promoted_items', help='the items attackers want to promoted, separate by \',\' default = \'3,4,5\'')
	parser.add_argument('-r', type=int, default=10, metavar='round', help='number of rounds, default = 10')
	args = parser.parse_args()

	return args

if __name__ == '__main__':
	# help(Center)
	args = parse_arguments()

	promoted_items = [int(i) for i in args.promote.split(',')]
	promoted_items.sort()

	if promoted_items != None and args.d <= promoted_items[-1]:
		logging.error('promoted item\'s index cannot exceed the size of domain!')
		exit(-1)

	good_protocol = kRR(d=args.d) if args.p1 == 'kRR' else OUE(d=args.d)
	bad_protocol = MGA(d=args.d, attack_protocol=args.p1)

	good_guys = GoodGuys(n=args.n, d=args.d, distribution=zipf(d=args.d), protocol=good_protocol)
	bad_guys = BadGuys(m=args.m, d=args.d, promotedItems=promoted_items, protocol=bad_protocol)

	center = Center(round=args.r, good_guys=good_guys, bad_guys=bad_guys)
	center.run()

예제 #6

0

파일 보기

파일: agent.py 프로젝트: inbalroz/energy-demand

class  agent:
    zipf_dist = zipf.zipf(1, 500)
    def __init__(self, ID, _zipf_dist = zipf_dist):

        #TODO check zipf(1,500) np allow only a>1
        #self.q = max(np.random.zipf(range(2, 500)) * 10)
        self.q = _zipf_dist.sample() * 10
        #self.q = self.q[0]
        #self.q = 25
        self.c = rd.uniform(0.2, 1)*self.q
        self.p = rd.uniform(0.7, 1)
        self.ID = ID
        self.bid_in_SCE = self.c / self.q <= 0.5
        #self.q = self.q * self.p

    def introduce_self(self):
        print('agent ID: ', self.ID, 'c:',self.c,'q:',self.q,'p:',self.p)

    def set_contract(self, contract):
        self.contract = contract

    def set_single_contract(self, single_contract):
        self.single_contract = single_contract

    def Fixed_cont_bid_on_contract(self):
        self.Fixed_cont_bids = []
        self.Fixed_cont_all_q = []
        for cont in self.contract:
            if self.q >= cont.l and self.q < cont.l + 10:
                self.Fixed_cont_l_of_selected_cont = cont.l
                self.Fixed_cont_f_of_selected_cont = cont.f
                Fixed_cont_Bid = (1 - self.p) * cont.f + self.c
                self.Fixed_cont_bids.append(Fixed_cont_Bid)
                self.Fixed_cont_all_q.append(self.q)

                # else:  #oded!
                #    self.Fixed_cont_bids.append(None)
                return
        # q is higer from all contects l
        Fixed_cont_highest_contract = self.contract[-1]
        self.Fixed_cont_l_of_selected_cont = Fixed_cont_highest_contract.l
        self.Fixed_cont_f_of_selected_cont = Fixed_cont_highest_contract.f
        Fixed_cont_Bid = (1 - self.p) * Fixed_cont_highest_contract.f + self.c
        # if Fixed_cont_Bid <= cont.l / 2:
        self.Fixed_cont_bids.append(Fixed_cont_Bid)
        self.Fixed_cont_all_q.append(self.q)



        #print(self.bids)
        #print('Fixed_cont- The sum all of the q is: ', sum(self.all_q), 'KWh')

    def SCE_cont_bid_on_contract(self):
        for cont in self.contract:
            if self.q >= cont.l and self.q < cont.l + 10:
                self.SCE_cont_l_of_selected_cont = cont.l
                return
        SCE_cont_highest_contract = self.contract[-1]
        self.SCE_cont_l_of_selected_cont = SCE_cont_highest_contract.l


    def cliff_cont_bid_on_contract(self):
        self.Cliff_cont_bids = []
        self.Cliff_cont_all_q = []
        for cont in self.contract:
            if self.q >= cont.l and self.q < cont.l+10:
                self.Cliff_cont_l_of_selected_cont = cont.l
                self.Cliff_cont_f_of_selected_cont = cont.f
                Cliff_cont_Bid = (1 - self.p) * cont.f + self.c
                #if self.q < cont.alfa * cont.l:
                #    Cliff_cont_Bid = (1 - self.p) * cont.f + self.c
                #if self.q >= cont.alfa * cont.l and self.q < cont.l:
                #    Cliff_cont_Bid = (cont.l - self.q) * cont.beita + self.c
                #if self.q >= cont.l:
                #    Cliff_cont_Bid = 0 + self.c
                if Cliff_cont_Bid <= cont.l/2:
                    self.Cliff_cont_bids.append((Cliff_cont_Bid))
                    self.Cliff_cont_all_q.append((self.q))
                    return
                else:
                    self.Cliff_cont_bids.append(None)
                    return
        Cliff_cont_highest_contract = self.contract[-1]
        self.Cliff_cont_l_of_selected_cont = Cliff_cont_highest_contract.l
        self.Cliff_cont_f_of_selected_cont = Cliff_cont_highest_contract.f
        #if self.q < Cliff_cont_highest_contract.alfa * Cliff_cont_highest_contract.l:
        Cliff_cont_Bid = (1 - self.p) * Cliff_cont_highest_contract.f + self.c
        #if self.q >= Cliff_cont_highest_contract.alfa * Cliff_cont_highest_contract.l and self.q < Cliff_cont_highest_contract.l:
        #    Cliff_cont_Bid = (Cliff_cont_highest_contract.l - self.q) * Cliff_cont_highest_contract.beita + self.c
        #if self.q >= Cliff_cont_highest_contract.l:
        #    Cliff_cont_Bid = 0 + self.c
        if Cliff_cont_Bid <= Cliff_cont_highest_contract.l / 2:
            self.Cliff_cont_bids.append((Cliff_cont_Bid))
            self.Cliff_cont_all_q.append((self.q))
            return
        else:
            self.Cliff_cont_bids.append(None)
            return

    def Fixed_single_cont_bid_on_contract(self):
        self.Fixed_single_cont_bids = []
        self.Fixed_single_cont_all_q = []
        for cont in self.single_contract:
            if self.q >= cont.l:
                self.Fixed_single_cont_l_of_selected_cont = cont.l
                self.Fixed_single_cont_f_of_selected_cont = cont.f
                Fixed_single_cont_Bid = (1 - self.p) * cont.f + self.c
                #if Fixed_single_cont_Bid <= cont.l/2:
                self.Fixed_single_cont_bids.append(Fixed_single_cont_Bid)
                self.Fixed_single_cont_all_q.append(self.q)
                #    return
                #else:
                #    self.Fixed_single_cont_bids.append(None)
                #    return
            else:
                self.Fixed_single_cont_bids.append(None)
                return