def binary_search_x(x_minus, x_plus, y_minus, y_plus): #users must make sure there exits a feasible solution in the rectangular area #the search range is x_minus <= x <= x_plus, 0 <= y <= y_plus with torch.no_grad(): a,b,c = plane(x_minus,y_plus) q_loss, valid = qualification_loss(x_minus, x_plus, y_minus, y_plus, a, b ,c, confidence=-0) if valid.min()<1: idx = valid<1 print(x_minus[idx], x_plus[idx], y_minus[idx], y_plus[idx], a[idx], b[idx] ,c[idx]) raise Exception('(x_minus, y_plus) is not always feasible') alpha_u = torch.ones(x_minus.shape, device=x_minus.device) #alpha_u is always feasible alpha_l = alpha_u * 0 for i in range(10): alpha = (alpha_u + alpha_l)/2 x = x_minus * alpha y = y_plus * alpha + (1-alpha)*y_minus a,b,c = plane(x,y) q_loss, valid = qualification_loss(x_minus, x_plus, y_minus, y_plus, a, b ,c, confidence=-0) valid = valid.float() alpha_l = (1-valid)*alpha + valid*alpha_l alpha_u = valid*alpha + (1-valid)*alpha_u return x_minus * alpha_u, alpha_u*y_plus + (1-alpha)*y_minus
def binary_search_k(x_minus, x_plus, y_minus, y_plus): with torch.no_grad(): # ka = a #a = a - ka, ka from 0 to a # kb = b #b = b - kb, kb from 0 to b a0,b0,c0 = plane(x_minus,y_plus) alpha_u = torch.ones(x_minus.shape, device=x_minus.device) #alpha_u is always feasible alpha_l = alpha_u * 0 #ka = alpha * a, a = a - ka = (1-alpha) * a #kb = alpha * b, b = b - kb = (1-alpha) * b for i in range(10): alpha = (alpha_u + alpha_l)/2 ka = a0 * alpha kb = -b0 * alpha a = a0 - ka # c = c0 - ka * x_minus b = b0 + kb c = c0 - kb * y_plus + ka * x_minus q_loss, valid = qualification_loss(x_minus, x_plus, y_minus, y_plus, a, b ,c, confidence=-0) valid = valid.float() alpha_l = (1-valid)*alpha + valid*alpha_l alpha_u = valid*alpha + (1-valid)*alpha_u return a0*alpha_u, -b0*alpha_u
def find_initial_feasible_solution(x_minus, x_plus, y_minus, y_plus): with torch.no_grad(): device = x_minus.device x_best = torch.zeros(x_minus.shape).to(device) y_best = torch.zeros(x_minus.shape).to(device) # c_best = torch.zeros(x_minus.shape) ka_best = torch.zeros(x_minus.shape).to(device) kb_best = torch.zeros(x_minus.shape).to(device) x = x_minus y = y_plus a,b,c = plane(x,y) q_loss, valid = qualification_loss(x_minus, x_plus, y_minus, y_plus, a, b ,c, confidence=-0) if valid.sum()>0: x_temp, y_temp = binary_search_x(x_minus[valid], x_plus[valid], y_minus[valid], y_plus[valid]) x_best[valid] = x_temp y_best[valid] = y_temp ka_best[valid] = 0 kb_best[valid] = 0 valid = ~valid if valid.sum()>0: ka_temp, kb_temp = binary_search_k(x_minus[valid], x_plus[valid], y_minus[valid], y_plus[valid]) x_best[valid] = x_minus[valid] y_best[valid] = y_plus[valid] ka_best[valid] = ka_temp kb_best[valid] = kb_temp return x_best, y_best, ka_best, kb_best
def train_lower(x0, y0, x_minus, x_plus, y_minus, y_plus, lr=1e-3, max_iter=100, print_info=True): # search over (x,y) \in (x_minus, x_plus) * (y_minus, y_plus) # the plane z = ax + by + c is the tangent plane of the surface tanh(x) sigmoid(y) at point (x,y) x = x0.data.clone() x.requires_grad = True y = y0.data.clone() y.requires_grad = True a_best = torch.zeros(x_minus.shape, device=x_minus.device) b_best = torch.zeros(x_minus.shape, device=x_minus.device) c_best = torch.zeros(x_minus.shape, device=x_minus.device) x_best = torch.zeros(x_minus.shape, device=x_minus.device) y_best = torch.zeros(x_minus.shape, device=x_minus.device) optimizer = optim.Adam([x, y], lr=lr) cubic = torch.abs((x_plus - x_minus) * (y_plus - y_minus) * torch.tanh(x_minus) * torch.sigmoid(y_plus)) cubic = torch.clamp(cubic, min=1e-3) v_best = -torch.ones(x_minus.shape, device=x_minus.device) * 1000 #we want to maximize the volume for i in range(max_iter): q_loss, valid = qualification_loss_lower(x, y, x_minus, x_plus, y_minus, y_plus) a, b, c = plane(x, y) v_loss = get_volume(a, b, c, x_minus, x_plus, y_minus, y_plus) / cubic best = (v_loss > v_best) * valid a_best[best] = a[best] b_best[best] = b[best] c_best[best] = c[best] x_best[best] = x[best] y_best[best] = y[best] v_best[best] = v_loss[best] # print('volume', v_loss) if print_info: print('2l q loss: %.4f volume: %.4f' % (q_loss.mean().item(), v_loss.mean().item())) loss = q_loss - (valid.float() + 0.1) * v_loss loss = loss.mean() optimizer.zero_grad() loss.backward() optimizer.step() # print('x,y',x,y) return a_best, b_best, c_best, x_best, y_best
def binary_search_x(x_minus, x_plus, y_minus, y_plus): #users must make sure it is feasible with torch.no_grad(): a, b, c = plane(x_minus, y_plus) q_loss, valid = qualification_loss_lower(a, b, c, x_minus, x_plus, y_minus, y_plus, confidence=-0) if valid.min() < 1: raise Exception('(x_minus, y_plus) is not always feasible') alpha_u = torch.ones(x_minus.shape, device=x_minus.device) #alpha_u is always feasible alpha_l = alpha_u * 0 for i in range(10): alpha = (alpha_u + alpha_l) / 2 x = x_minus * alpha y = y_plus * alpha a, b, c = plane(x, y) q_loss, valid = qualification_loss_lower(a, b, c, x_minus, x_plus, y_minus, y_plus, confidence=-0) valid = valid.float() alpha_l = (1 - valid) * alpha + valid * alpha_l alpha_u = valid * alpha + (1 - valid) * alpha_u return alpha_u * x_minus, alpha_u * y_plus
def adjust_upper_plane(x0, y0, x_minus, x_plus, y_minus, y_plus, lr=1e-2, max_iter=100, print_info=True): # this function finds the minimum value of a*x + b*y + c - torch.tanh(x)*torch.sigmoid(y) # if it is less than zero, we will need to raise the plane z = ax + by + c a little bit x0 = x0.detach() y0 = y0.detach() x1 = ((x0 + x_minus) / 2).data.clone() y1 = ((y0 + y_minus) / 2).data.clone() x2 = ((x0 + x_minus) / 2).data.clone() y2 = ((y0 + y_plus) / 2).data.clone() x3 = ((x0 + x_plus) / 2).data.clone() y3 = ((y0 + y_plus) / 2).data.clone() x4 = ((x0 + x_plus) / 2).data.clone() y4 = ((y0 + y_minus) / 2).data.clone() x1.requires_grad = True y1.requires_grad = True x2.requires_grad = True y2.requires_grad = True x3.requires_grad = True y3.requires_grad = True x4.requires_grad = True y4.requires_grad = True a, b, c = plane(x0, y0) optimizer = optim.Adam([x1, y1, x2, y2, x3, y3, x4, y4], lr=lr) x1_best = torch.zeros(x_minus.shape, device=x_minus.device) y1_best = torch.zeros(x_minus.shape, device=x_minus.device) loss1_best = torch.ones(x_minus.shape, device=x_minus.device) * 1000 x2_best = torch.zeros(x_minus.shape, device=x_minus.device) y2_best = torch.zeros(x_minus.shape, device=x_minus.device) loss2_best = torch.ones(x_minus.shape, device=x_minus.device) * 1000 x3_best = torch.zeros(x_minus.shape, device=x_minus.device) y3_best = torch.zeros(x_minus.shape, device=x_minus.device) loss3_best = torch.ones(x_minus.shape, device=x_minus.device) * 1000 x4_best = torch.zeros(x_minus.shape, device=x_minus.device) y4_best = torch.zeros(x_minus.shape, device=x_minus.device) loss4_best = torch.ones(x_minus.shape, device=x_minus.device) * 1000 for i in range(max_iter): loss1 = a * x1 + b * y1 + c - torch.tanh(x1) * torch.sigmoid(y1) loss2 = a * x2 + b * y2 + c - torch.tanh(x2) * torch.sigmoid(y2) loss3 = a * x3 + b * y3 + c - torch.tanh(x3) * torch.sigmoid(y3) loss4 = a * x4 + b * y4 + c - torch.tanh(x4) * torch.sigmoid(y4) qloss1, valid1 = qualification_loss_upper(x1, y1, x_minus, x_plus, y_minus, y_plus) best1 = (loss1 < loss1_best) * valid1 x1_best[best1] = x1[best1] y1_best[best1] = y1[best1] loss1_best[best1] = loss1[best1] qloss2, valid2 = qualification_loss_upper(x2, y2, x_minus, x_plus, y_minus, y_plus) best2 = (loss2 < loss2_best) * valid2 x2_best[best2] = x2[best2] y2_best[best2] = y2[best2] loss2_best[best2] = loss2[best2] qloss3, valid3 = qualification_loss_upper(x3, y3, x_minus, x_plus, y_minus, y_plus) best3 = (loss3 < loss3_best) * valid3 x3_best[best3] = x3[best3] y3_best[best3] = y3[best3] loss3_best[best3] = loss3[best3] qloss4, valid4 = qualification_loss_upper(x4, y4, x_minus, x_plus, y_minus, y_plus) best4 = (loss4 < loss4_best) * valid4 x4_best[best4] = x4[best4] y4_best[best4] = y4[best4] loss4_best[best4] = loss4[best4] loss = loss1 * (valid1.float() + 0.1) + qloss1 loss = loss + loss2 * (valid2.float() + 0.1) + qloss2 loss = loss + loss3 * (valid3.float() + 0.1) + qloss3 loss = loss + loss4 * (valid4.float() + 0.1) + qloss4 loss = loss.mean() optimizer.zero_grad() loss.backward() optimizer.step() if print_info: print('1 adjust upper plane loss: %.4f' % loss.item()) return x1_best, y1_best, x2_best, y2_best, x3_best, y3_best, x4_best, y4_best, loss1_best, loss2_best, loss3_best, loss4_best
def train_lower(u0, v0, ka0, kb0, x_minus, x_plus, y_minus, y_plus, lr_x = 1e-3, lr_k=1e-2, max_iter = 100, print_info = True): device = x_minus.device x_best = torch.zeros(x_minus.shape).to(device) y_best = torch.zeros(x_minus.shape).to(device) a_best = torch.zeros(x_minus.shape).to(device) b_best = torch.zeros(x_minus.shape).to(device) c_best = torch.zeros(x_minus.shape).to(device) ka_best = torch.zeros(x_minus.shape).to(device) kb_best = torch.zeros(x_minus.shape).to(device) cubic = -(x_plus-x_minus) * (y_plus-y_minus) * torch.tanh(x_minus) * torch.sigmoid(y_plus) cubic = torch.clamp(cubic, min=1e-3) v_best = -cubic/cubic * 10000 # eps = 0.1 u = u0.data.clone()#torch.clamp(x_plus.data.clone(), max=3)#torch.rand(x_plus.shape)#torch.Tensor([3]) v = v0.data.clone()#torch.clamp(y_plus.data.clone(), max=3)#torch.rand(y_plus.shape)#torch.Tensor([3]) ka = ka0.data.clone()#torch.Tensor([1]) kb = kb0.data.clone()#torch.Tensor([1]) u.requires_grad = True v.requires_grad = True ka.requires_grad = True kb.requires_grad = True # optimizer = optim.SGD([u,v,ka,kb], lr=lr, momentum=momentum) optimizer_x = optim.Adam([u,v], lr=lr_x) optimizer_k = optim.Adam([ka,kb], lr=lr_k) max_iter = max_iter # tanh_l_min = tanh_lmin(x_minus, y_minus) # sigmoid_l_min = sigmoid_lmin(x_plus, y_minus) for i in range(max_iter): #x: 0 to x_minus <=0 #y: 0 to y_plus slop = 0.01 u_minus = -F.leaky_relu(-u, negative_slope=slop) #this makes u_minus grows much slower when u>=0 idx_v = (v>=y_minus).float() v_plus = v * idx_v + (1-idx_v)*(slop*(v-y_minus)+y_minus) #this makes v_plus decrease slower when v< y_minus idx_x = (u>=x_minus).float() x = u_minus * idx_x + (1-idx_x)*(slop*(u_minus-x_minus)+x_minus) #make x decrease slower when u<x_minus idx_y = (v<=y_plus).float() y = v_plus * idx_y + (1-idx_y)*(slop*(v_plus-y_plus)+y_plus) #make y grows slower when v>y_plus a,b,c = plane(x,y) idx = (x<=x_minus).float() a = a - F.leaky_relu(ka, negative_slope=slop) * idx c = c + F.leaky_relu(ka, negative_slope=slop) * x * idx #ka = ka * idx #if x<=x_minus, we keep its original value #if x>x_minus, we reset it to 0 idx = (y>=y_plus).float() b = b + F.leaky_relu(kb, negative_slope=slop) * idx c = c - F.leaky_relu(kb, negative_slope=slop) * y * idx # q_loss, valid = qualification_loss_lower(a,b,c,x_minus, x_plus, y_minus, y_plus, # tanh_l_min, # sigmoid_l_min, confidence=-0) q_loss, valid = qualification_loss(x_minus, x_plus, y_minus, y_plus, a, b ,c, confidence=-1e-3) # print('q_loss:',q_loss) v_loss = get_volume(a,b,c,x_minus, x_plus, y_minus, y_plus) v_loss = v_loss/cubic # print('volume', v_loss) if print_info: print('12l q loss: %.4f volume: %.4f' % (q_loss.mean().item(), v_loss.mean().item())) loss = (q_loss - v_loss*(valid.float()+0.01)).mean() #we want to maximize the volume best = (v_loss > v_best) * (valid) v_best[best] = v_loss[best] x_best[best] = x[best] y_best[best] = y[best] ka_best[best] = ka[best] kb_best[best] = kb[best] a_best[best] = a[best] b_best[best] = b[best] c_best[best] = c[best] optimizer_x.zero_grad() optimizer_k.zero_grad() loss.backward() idx = (y>y_plus) * (kb>0) #if y>y_plus and kb>0, we don't move v v.grad = v.grad * (1-idx.float()) idx = (x<x_minus) * (ka>0) #if x<x_minus and ka>0, we don't move u u.grad = u.grad * (1-idx.float()) # if y>=y_plus and kb >=0: # v.grad = v.grad * 0 # if x<=x_plus and ka >=0: # u.grad = u.grad * 0 optimizer_x.step() optimizer_k.step() # print('u,v:',u,v) return x_best,y_best,ka_best,kb_best,a_best,b_best,c_best,v_best
def train_lower(u0, v0, ka0, kb0, x_minus, x_plus, y_minus, y_plus, lr_x=1e-3, lr_k=1e-2, max_iter=100, print_info=True): device = x_minus.device x_best = torch.zeros(x_minus.shape).to(device) y_best = torch.zeros(x_minus.shape).to(device) a_best = torch.zeros(x_minus.shape).to(device) b_best = torch.zeros(x_minus.shape).to(device) c_best = torch.zeros(x_minus.shape).to(device) ka_best = torch.zeros(x_minus.shape).to(device) kb_best = torch.zeros(x_minus.shape).to(device) cubic = (x_plus - x_minus) * (y_plus - y_minus) cubic = torch.clamp(cubic, min=1e-4) v_best = -torch.ones(x_minus.shape).to(device) # eps = 0.1 u = u0 #torch.clamp(x_plus.data.clone(), max=3)#torch.rand(x_plus.shape)#torch.Tensor([3]) v = v0 #torch.clamp(y_plus.data.clone(), max=3)#torch.rand(y_plus.shape)#torch.Tensor([3]) ka = ka0 #torch.Tensor([1]) kb = kb0 #torch.Tensor([1]) u.requires_grad = True v.requires_grad = True ka.requires_grad = True kb.requires_grad = True # optimizer = optim.SGD([u,v,ka,kb], lr=lr, momentum=momentum) optimizer_x = optim.Adam([u, v], lr=lr_x) optimizer_k = optim.Adam([ka, kb], lr=lr_k) max_iter = max_iter for i in range(max_iter): #x: 0 to x_minus <=0 #y: 0 to y_plus slop = 0.01 u_minus = -F.leaky_relu(-u, negative_slope=0.01) v_plus = F.leaky_relu(v, negative_slope=0.01) idx_x = (u >= x_minus).float() x = u_minus * idx_x + (1 - idx_x) * (slop * (u_minus - x_minus) + x_minus) idx_y = (v <= y_plus).float() y = v_plus * idx_y + (1 - idx_y) * (slop * (v_plus - y_plus) + y_plus) a, b, c = plane(x, y) idx = (x <= x_minus).float() a = a - F.leaky_relu(ka, negative_slope=0.01) * idx c = c + F.leaky_relu(ka, negative_slope=0.01) * x * idx #ka = ka * idx #if x<=x_minus, we keep its original value #if x>x_minus, we reset it to 0 idx = (y >= y_plus).float() b = b + F.leaky_relu(kb, negative_slope=0.01) * idx c = c - F.leaky_relu(kb, negative_slope=0.01) * y * idx q_loss, valid = qualification_loss_lower(a, b, c, x_minus, x_plus, y_minus, y_plus, confidence=-0) # print('q_loss:',q_loss) v_loss = get_volume(a, b, c, x_minus, x_plus, y_minus, y_plus) v_loss = v_loss / cubic # print('cubic', cubic.min()) # print('u,v', u.max(), u.min(), v.max(), v.min()) # print('ka,kb', ka.max(), ka.min(), ka.max(), ka.min()) # # print('volume', v_loss) # print('a,b,c',a.max(),b.max(),c.max(), a.min,b.min,c.min) if print_info: print('all 4l q_loss %.4f, volume %.4f' % (q_loss.mean().item(), v_loss.mean().item())) loss = q_loss - (v_loss * (valid.float() + 0.1)).mean() best = (v_loss > v_best) * (valid) v_best[best] = v_loss[best] x_best[best] = x[best] y_best[best] = y[best] ka_best[best] = ka[best] kb_best[best] = kb[best] a_best[best] = a[best] b_best[best] = b[best] c_best[best] = c[best] optimizer_x.zero_grad() optimizer_k.zero_grad() loss.backward() idx = (y > y_plus) * (kb > 0) v.grad = v.grad * (1 - idx.float()) idx = (x < x_minus) * (ka > 0) u.grad = u.grad * (1 - idx.float()) # print('u grad', u.grad) # print('v grad', v.grad) # nan = (u.grad != u.grad) # print('not a number', u0[nan], v0[nan], ka0[nan], kb0[nan], # x_minus[nan], x_plus[nan], y_minus[nan], y_plus[nan]) # print('u', u) # print('v', v) # if y>=y_plus and kb >=0: # v.grad = v.grad * 0 # if x<=x_plus and ka >=0: # u.grad = u.grad * 0 u.grad[u.grad != u.grad] = 0 v.grad[v.grad != v.grad] = 0 ka.grad[ka.grad != ka.grad] = 0 kb.grad[kb.grad != kb.grad] = 0 optimizer_x.step() optimizer_k.step() # print('u,v:',u,v) return x_best, y_best, ka_best, kb_best, a_best, b_best, c_best, v_best