-
Notifications
You must be signed in to change notification settings - Fork 0
/
optimization.py
129 lines (105 loc) · 4.55 KB
/
optimization.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
'''
Created on Jan 11, 2016
@author: hanhanwu
using annealing optimization here
The algorithm starts from very high temperature (the willingness to accept a worse solution)
this algorithm is willing to accept a worse solution near the beginning of the process,
but later will become less and less likely to accept worse solution,
till the end, it only accepts better solutions
'''
import random as rd
import math
import heterogeneous_data
import mock_Chinese_stock_price
import cross_validation
import KNN
import numpy as np
# annealing optimization
# domain if the value range for each attribute
def annealing_opt(domain, costf, T = 10000.0, cool = 0.95, step = 5):
# random initialize the Vector
vec = [rd.randint(domain[i][0], domain[i][1]) for i in range(len(domain))]
# randomly choose a direction
dir = rd.randint(-step, step)
# randomly choose an index
i = rd.randint(0, len(domain)-1)
while T > 0.1:
vec_new = vec
vec_new[i] += dir
if vec_new[i] < domain[i][0]: vec_new[i] = domain[i][0]
elif vec_new[i] > domain[i][1]: vec_new[i] = domain[i][1]
error_old = costf(vec)
error_new = costf(vec_new)
p = pow(math.e, (-error_new-error_old)/T)
if (error_new < error_old or rd.random() < p):
vec = vec_new
T *= cool
print T
return vec
# genetic optimization
# elite_frac is the fraction of good solutions in the population
def genetic_optimization(domain, costf, generations_num=100, population_size=50, prob_mutation=0.4, elite_frac=0.2, step=1):
def mutation(vec):
mutate_idx = rd.randint(0, len(vec)-1)
if rd.random() < 0.5 and vec[mutate_idx] > domain[mutate_idx][0]:
return vec[0:mutate_idx] + [vec[mutate_idx]+step] + vec[mutate_idx+1:]
elif vec[mutate_idx] < domain[mutate_idx][1]:
return vec[0:mutate_idx] + [vec[mutate_idx]-step] + vec[mutate_idx+1:]
return vec
def crossover(vec1, vec2):
cross_idx = rd.randint(1, len(vec1)-2)
return vec1[0:cross_idx] + vec2[cross_idx:]
# initialize population
population = []
for i in range(population_size):
pv = [rd.randint(domain[j][0], domain[j][1]) for j in range(len(domain))]
population.append(pv)
num_elites = int(population_size*elite_frac)
for k in range(generations_num):
scores = [(costf(v), v) for v in population]
# sort by the cost(key), get the winners
scores.sort()
ranked = [v for (c,v) in scores]
population = ranked[0:num_elites]
# Each generation has the same number of population
while len(population) < population_size:
if rd.random() < prob_mutation:
# do mutation
v_idx = rd.randint(0, num_elites)
population.append(mutation(ranked[v_idx]))
else:
# do crossover
v1_idx = rd.randint(0,num_elites)
v2_idx = rd.randint(0,num_elites)
population.append(crossover(ranked[v1_idx], ranked[v2_idx]))
# print current best score
print 'current best score and the vector: ', scores[0]
allscores = [c for (c,v) in scores]
current_avg_score = np.average(allscores)
print 'current average score: ', current_avg_score
# print final best score
print 'best score and the vector: ', scores[0][0]
return scores[0][1]
# return the cost function
def generatecostf(data, algr, trails):
def costf(scale):
rescaled_data = rescale(data, scale)
cost = cross_validation.cross_validate(rescaled_data, algr, trails)
return cost
return costf
def rescale(data_set, scale):
scaled_data = []
for row in data_set:
scaled_input = [row['input'][i]*scale[i] for i in range(len(scale))]
scaled_data.append({'input': scaled_input, 'price': row['price']})
return scaled_data
def main():
domain = [(0,10)]*5
data= mock_Chinese_stock_price.get_stockset_various()
costf = generatecostf(data, algr=KNN.get_KNN, trails=10)
annealing_optimized_result = annealing_opt(domain, costf)
print 'using annealing optimizaton: [rating, age, duration, investment, employee_number]', annealing_optimized_result
genetic_optimized_resule = genetic_optimization(domain, costf)
print 'using genetic optimizaton: [rating, age, duration, investment, employee_number]', genetic_optimized_result
if __name__ == '__main__':
main()