def sobel(A, Gx, Gy): B = hcl.compute((height, width), lambda x, y: A[x][y][0] + A[x][y][1] + A[x][y][2], "B", dtype=hcl.Float()) r = hcl.reduce_axis(0, 3) c = hcl.reduce_axis(0, 3) D = hcl.compute( (height, width), lambda x, y: hcl.select( hcl.and_(x > 0, x < (height - 1), y > 0, y < (width - 1)), hcl.sum(B[x + r, y + c] * Gx[r, c], axis=[r, c]), B[x, y]), "D", dtype=hcl.Float()) t = hcl.reduce_axis(0, 3) g = hcl.reduce_axis(0, 3) E = hcl.compute( (height, width), lambda x, y: hcl.select( hcl.and_(x > 0, x < (height - 1), y > 0, y < (width - 1)), hcl.sum(B[x + t, y + g] * Gy[t, g], axis=[t, g]), B[x, y]), "E", dtype=hcl.Float()) return hcl.compute((height, width), lambda x, y: hcl.sqrt(D[x][y] * D[x][y] + E[x][y] * E[x] [y]) / 4328 * 255, dtype=hcl.Float())
def sobelAlgo(A, Fx, Fy): B = hcl.compute((height, width), lambda x,y :A[x][y][0]+A[x][y][1]+A[x][y][2],"B", dtype=hcl.Float()) r = hcl.reduce_axis(0, 3) c = hcl.reduce_axis(0, 3) Gx = hcl.compute((height, width), lambda x,y: hcl.select(hcl.and_(x>0,x<(height-1),y>0,y<(width-1)), hcl.sum(B[x+r,y+c]*Fx[r,c],axis=[r,c]), B[x,y]), "Gx") t = hcl.reduce_axis(0, 3) g = hcl.reduce_axis(0, 3) Gy = hcl.compute((height, width), lambda x,y: hcl.select(hcl.and_(x>0,x<(height-1),y>0,y<(width-1)), hcl.sum(B[x+t,y+g]*Fy[t,g],axis=[t,g]), B[x,y]), "Gy") return hcl.compute((height, width), lambda x,y:(hcl.sqrt(Gx[x][y]*Gx[x][y]+Gy[x][y]*Gy[x][y]))/4328*255, dtype = hcl.Float())
def updateVopt(i, j, k, l, m, n, o, iVals, sVals, actions, Vopt, intermeds, trans, interpV, gamma, bounds, goal, ptsEachDim, useNN): p = hcl.scalar(0, "p") with hcl.for_(0, actions.shape[0], name="a") as a: # set iVals equal to (i,j,k,l,m,n,o) and sVals equal to the corresponding state values (si,sj,sk,sl,sm,sn,so) updateStateVals(i, j, k, l, m, n, o, iVals, sVals, bounds, ptsEachDim) # call the transition function to obtain the outcome(s) of action a from state (si,sj,sk,sl,sm,sn,so) UD.transition(sVals, actions[a], bounds, trans, goal) # initialize the value of the action Q value with the immediate reward of taking that action intermeds[a] = UD.reward(sVals, actions[a], bounds, goal, trans) # add the value of each possible successor state to the Q value with hcl.for_(0, trans.shape[0], name="si") as si: p[0] = trans[si, 0] sVals[0] = trans[si, 1] sVals[1] = trans[si, 2] sVals[2] = trans[si, 3] sVals[3] = trans[si, 4] sVals[4] = trans[si, 5] sVals[5] = trans[si, 6] sVals[6] = trans[si, 7] # Nearest neighbour with hcl.if_(useNN[0] == 1): # convert the state values of the successor state (si,sj,sk,sl,sm,sn,so) into indeces (ia,ja,ka,la,ma,na,oa) stateToIndex(sVals, iVals, bounds, ptsEachDim) # if (ia,ja,ka,la,ma,na,oa) is within the state space, add its discounted value to the Q value with hcl.if_( hcl.and_(iVals[0] < Vopt.shape[0], iVals[1] < Vopt.shape[1], iVals[2] < Vopt.shape[2])): with hcl.if_( hcl.and_(iVals[3] < Vopt.shape[3], iVals[4] < Vopt.shape[4], iVals[5] < Vopt.shape[5], iVals[6] < Vopt.shape[6])): with hcl.if_( hcl.and_(iVals[0] >= 0, iVals[1] >= 0, iVals[2] >= 0, iVals[3] >= 0, iVals[4] >= 0, iVals[5] >= 0, iVals[6] >= 0)): intermeds[a] += ( gamma[0] * (p[0] * Vopt[iVals[0], iVals[1], iVals[2], iVals[3], iVals[4], iVals[5], iVals[6]])) # maximize over each Q value to obtain the optimal value Vopt[i, j, k, l, m, n, o] = -1000000 with hcl.for_(0, intermeds.shape[0], name="r") as r: with hcl.if_(Vopt[i, j, k, l, m, n, o] < intermeds[r]): Vopt[i, j, k, l, m, n, o] = intermeds[r]
def updateVopt(obj, i, j, k, iVals, sVals, actions, Vopt, intermeds, trans, interpV, gamma, bounds, goal, ptsEachDim, useNN, fillVal): p = hcl.scalar(0, "p") with hcl.for_(0, actions.shape[0], name="a") as a: # set iVals equal to (i,j,k) and sVals equal to the corresponding state values (si,sj,sk) updateStateVals(i, j, k, iVals, sVals, bounds, ptsEachDim) # call the transition function to obtain the outcome(s) of action a from state (si,sj,sk) obj.transition(sVals, actions[a], bounds, trans, goal) # initialize the value of the action using the immediate reward of taking that action intermeds[a] = obj.reward(sVals, actions[a], bounds, goal, trans) Vopt[i, j, k] = intermeds[a] # add the value of each possible successor state to the estimated value of taking action a with hcl.for_(0, trans.shape[0], name="si") as si: p[0] = trans[si, 0] sVals[0] = trans[si, 1] sVals[1] = trans[si, 2] sVals[2] = trans[si, 3] # Nearest neighbour with hcl.if_(useNN[0] == 1): # convert the state values of the successor state (si,sj,sk) into indeces (ia,ij,ik) stateToIndex(sVals, iVals, bounds, ptsEachDim) # if (ia, ij, ik) is within the state space, add its discounted value to action a with hcl.if_( hcl.and_(iVals[0] < Vopt.shape[0], iVals[1] < Vopt.shape[1], iVals[2] < Vopt.shape[2])): with hcl.if_( hcl.and_(iVals[0] >= 0, iVals[1] >= 0, iVals[2] >= 0)): intermeds[a] += ( gamma[0] * (p[0] * Vopt[iVals[0], iVals[1], iVals[2]])) # Linear interpolation with hcl.if_(useNN[0] == 0): # if (sia, sja, ska) is within the state space, add its discounted value to action a with hcl.if_( hcl.and_(sVals[0] <= bounds[0, 1], sVals[1] <= bounds[1, 1], sVals[2] <= bounds[2, 1])): with hcl.if_( hcl.and_(sVals[0] >= bounds[0, 0], sVals[1] >= bounds[1, 0], sVals[2] >= bounds[2, 0])): stateToIndexInterpolants(Vopt, sVals, bounds, ptsEachDim, interpV, fillVal) intermeds[a] += (gamma[0] * (p[0] * interpV[0])) # maximize over each possible action in intermeds to obtain the optimal value with hcl.for_(0, intermeds.shape[0], name="r") as r: with hcl.if_(Vopt[i, j, k] < intermeds[r]): Vopt[i, j, k] = intermeds[r]
def updateQopt(i, j, k, a, iVals, sVals, Qopt, actions, intermeds, trans, interpV, gamma, bounds, goal, ptsEachDim, useNN, fillVal): r = hcl.scalar(0, "r") p = hcl.scalar(0, "p") # set iVals equal to (i,j,k) and sVals equal to the corresponding state values at (i,j,k) updateStateVals(i, j, k, iVals, sVals, bounds, ptsEachDim) # call the transition function to obtain the outcome(s) of action a from state (si,sj,sk) transition(sVals, actions[a], bounds, trans, goal) # initialize Qopt[i,j,k,a] with the immediate reward r[0] = reward(sVals, actions[a], bounds, goal, trans) Qopt[i, j, k, a] = r[0] # maximize over successor Q-values with hcl.for_(0, trans.shape[0], name="si") as si: p[0] = trans[si, 0] sVals[0] = trans[si, 1] sVals[1] = trans[si, 2] sVals[2] = trans[si, 3] # Nearest neighbour with hcl.if_(useNN[0] == 1): # obtain the nearest neighbour successor state stateToIndex(sVals, iVals, bounds, ptsEachDim) # maximize over successor state Q-values with hcl.if_( hcl.and_(iVals[0] < Qopt.shape[0], iVals[1] < Qopt.shape[1], iVals[2] < Qopt.shape[2])): with hcl.if_( hcl.and_(iVals[0] >= 0, iVals[1] >= 0, iVals[2] >= 0)): with hcl.for_(0, actions.shape[0], name="a_") as a_: with hcl.if_( (r[0] + (gamma[0] * (p[0] * Qopt[iVals[0], iVals[1], iVals[2], a_])) ) > Qopt[i, j, k, a]): Qopt[i, j, k, a] = r[0] + (gamma[0] * ( p[0] * Qopt[iVals[0], iVals[1], iVals[2], a_])) # Linear interpolation with hcl.if_(useNN[0] == 0): with hcl.if_( hcl.and_(sVals[0] <= bounds[0, 1], sVals[1] <= bounds[1, 1], sVals[2] <= bounds[2, 1])): with hcl.if_( hcl.and_(sVals[0] >= bounds[0, 0], sVals[1] >= bounds[1, 0], sVals[2] >= bounds[2, 0])): stateToIndexInterpolants(Qopt, sVals, actions, bounds, ptsEachDim, interpV, fillVal) Qopt[i, j, k, a] += (gamma[0] * (p[0] * interpV[0])) r[0] += Qopt[i, j, k, a]
def pad(x, y, z): out = hcl.scalar(0, "out") with hcl.if_(hcl.and_(x > 0, y > 0)): out.v = imgF[x - 1, y - 1, z] with hcl.else_(): out.v = 0 return out.v
def transition(sVals, action, bounds, trans, goal): dx = hcl.scalar(0, "dx") dy = hcl.scalar(0, "dy") mag = hcl.scalar(0, "mag") # Check if moving from a goal state dx[0] = sVals[0] - goal[0, 0] dy[0] = sVals[1] - goal[0, 1] mag[0] = hcl.sqrt((dx[0] * dx[0]) + (dy[0] * dy[0])) with hcl.if_( hcl.and_(mag[0] <= 1.0, sVals[2] <= goal[1, 1], sVals[2] >= goal[1, 0])): trans[0, 0] = 0 # Check if moving from an obstacle with hcl.elif_( hcl.or_(sVals[0] < bounds[0, 0] + 0.2, sVals[0] > bounds[0, 1] - 0.2)): trans[0, 0] = 0 with hcl.elif_( hcl.or_(sVals[1] < bounds[1, 0] + 0.2, sVals[1] > bounds[1, 1] - 0.2)): trans[0, 0] = 0 # Standard move with hcl.else_(): trans[0, 0] = 1.0 trans[0, 1] = sVals[0] + (0.6 * action[0] * hcl.cos(sVals[2])) trans[0, 2] = sVals[1] + (0.6 * action[0] * hcl.sin(sVals[2])) trans[0, 3] = sVals[2] + (0.6 * action[1]) # Adjust for periodic dimension with hcl.while_(trans[0, 3] > 3.141592653589793): trans[0, 3] -= 6.283185307179586 with hcl.while_(trans[0, 3] < -3.141592653589793): trans[0, 3] += 6.283185307179586
def reward(sVals, action, bounds, goal, trans): dx = hcl.scalar(0, "dx") dy = hcl.scalar(0, "dy") mag = hcl.scalar(0, "mag") rwd = hcl.scalar(0, "rwd") # Check if moving from a collision state, if so, assign a penalty with hcl.if_( hcl.or_(sVals[0] < bounds[0, 0] + 0.2, sVals[0] > bounds[0, 1] - 0.2)): rwd[0] = -400 with hcl.elif_( hcl.or_(sVals[1] < bounds[1, 0] + 0.2, sVals[1] > bounds[1, 1] - 0.2)): rwd[0] = -400 with hcl.else_(): # Check if moving from a goal state dx[0] = sVals[0] - goal[0, 0] dy[0] = sVals[1] - goal[0, 1] mag[0] = hcl.sqrt((dx[0] * dx[0]) + (dy[0] * dy[0])) with hcl.if_( hcl.and_(mag[0] <= 1.0, sVals[2] <= goal[1, 1], sVals[2] >= goal[1, 0])): rwd[0] = 1000 # Standard move with hcl.else_(): rwd[0] = 0 return rwd[0]
def sobel(B, G): r = hcl.reduce_axis(0, 3) c = hcl.reduce_axis(0, 3) return hcl.compute( (height, width), lambda x, y: hcl.select( hcl.and_(x > 0, x < (height - 1), y > 0, y < (width - 1)), hcl.sum(B[x + r, y + c] * G[r, c], axis=[r, c]), B[x, y]), "D", dtype=hcl.Float())
def guassian(A, G): h = hcl.reduce_axis(0, size) w = hcl.reduce_axis(0, size) return hcl.compute( (height, width), lambda x, y: hcl.select( hcl.and_(x > (size - 1), x < (height - size), y > (size - 1), y < (width - size)), hcl.sum(A[x + h, y + w] * G[h, w], axis=[h, w]), A[x, y]), "F", dtype=hcl.Float())
def loop_body(x, y): q = 255 r = 255 c1 = hcl.and_((0 <= angle[x][y]), (angle[x][y] < 22.5)) c2 = hcl.and_((157.5 <= angle[x][y]), (angle[x][y] <= 180)) c3 = hcl.and_((22.5 <= angle[x][y]), (angle[x][y] < 67.5)) c4 = hcl.and_((67.5 <= angle[x][y]), (angle[x][y] < 112.5)) c5 = hcl.and_((112.5 <= angle[x][y]), (angle[x][y] < 157.5)) #angle 0 with hcl.if_(hcl.or_(c1, c2)): q = image[x][y + 1] r = image[x][y - 1] #angle 45 with hcl.elif_(c3): q = image[x + 1][y - 1] r = image[x - 1][y + 1] #angle 90 with hcl.elif_(c4): q = image[x + 1][y] r = image[x - 1, ][y] #angle 135 with hcl.elif_(c5): q = image[x - 1, y - 1] r = image[x + 1, y + 1] with hcl.if_(hcl.and_((image[x, y] >= q), (image[x, y] >= r))): Z[x][y] = image[x][y] with hcl.else_(): Z[x][y] = 0
def Gaussian_Sobel_filters(A, G, Fx, Fy): h = hcl.reduce_axis(0, kernel_size) w = hcl.reduce_axis(0, kernel_size) B = hcl.compute( (height, width), lambda y, x: hcl.select( hcl.and_(y > (k - 1), y < (width - k), x > (k - 1), x < (height - k)), hcl.sum(A[y + w, x + h] * G[w, h], axis=[w, h]), B[y, x]), "B", dtype=hcl.Float()) # Sobel Filters r = hcl.reduce_axis(0, 3) c = hcl.reduce_axis(0, 3) Gx = hcl.compute( (height, width), lambda y, x: hcl.select( hcl.and_(y > (k - 1), y < (width - k), x > (k - 1), x < (height - k)), hcl.sum(B[y + r, x + c] * Fx[r, c], axis=[r, c]), B[y, x]), "Gx", dtype=hcl.Float()) t = hcl.reduce_axis(0, 3) g = hcl.reduce_axis(0, 3) Gy = hcl.compute( (height, width), lambda y, x: hcl.select( hcl.and_(y > (k - 1), y < (width - k), x > (k - 1), x < (height - k)), hcl.sum(B[y + t, x + g] * Fy[t, g], axis=[t, g]), B[y, x]), "Gy", dtype=hcl.Float()) # return the intensity matrix and the edge direction matrix? return hcl.compute( (height, width), lambda y, x: (hcl.sqrt(Gx[y][x] * Gx[y][x] + Gy[y][x] * Gy[y][x])) / 4328 * 255, dtype=hcl.Float())
def sobel_x(A, Gx): B = hcl.compute((height, width), lambda x, y: A[x][y][0] + A[x][y][1] + A[x][y][2], "B", dtype=hcl.Float()) r = hcl.reduce_axis(0, 3) c = hcl.reduce_axis(0, 3) return hcl.compute( (height, width), lambda x, y: hcl.select( hcl.and_(x > 0, x < (height - 1), y > 0, y < (width - 1)), hcl.sum(B[x + r, y + c] * Gx[r, c], axis=[r, c]), B[x, y]), "X", dtype=hcl.Float())
def populate_matrix(i, j): trace_back = hcl.compute((4, ), lambda x: 0, "trace_back") with hcl.if_(hcl.and_(i != 0, j != 0)): trace_back[0] = matrix[i-1, j-1] + \ similarity_score(seqA[i-1], seqB[j-1]) trace_back[1] = matrix[i - 1, j] + penalty trace_back[2] = matrix[i, j - 1] + penalty trace_back[3] = 0 matrix[i, j], action[i, j] = find_max(trace_back, 4) with hcl.if_(matrix[i, j] > matrix_max[0]): matrix_max[0] = matrix[i, j] i_max[0] = i j_max[0] = j
def insertion_sort(A): # Introduce a stage. with hcl.Stage("S"): # for i in range(1, A.shape[0]) # We can name the axis with hcl.for_(1, A.shape[0], name="i") as i: key = hcl.local(A[i], "key") j = hcl.local(i - 1, "j") # while(j >= 0 && key < A[j]) with hcl.while_(hcl.and_(j >= 0, key < A[j])): A[j + 1] = A[j] j[0] -= 1 A[j + 1] = key[0]
def sobel_y(A, Gy): B = hcl.compute((height, width), lambda x, y: A[x][y][0] + A[x][y][1] + A[x][y][2], "B", dtype=hcl.Float()) t = hcl.reduce_axis(0, 3) g = hcl.reduce_axis(0, 3) return hcl.compute( (height, width), lambda x, y: hcl.select( hcl.and_(x > 0, x < (height - 1), y > 0, y < (width - 1)), hcl.sum(B[x + t, y + g] * Gy[t, g], axis=[t, g]), B[x, y]), "Y", dtype=hcl.Float())
def loop_body(x, y): q = 255 r = 255 with hcl.if_(D[x][y] < 0): D[x][y] = D[x][y]+180 with hcl.if_(hcl.or_(hcl.and_(D[x][y]>=0,D[x][y]<22.5),hcl.and_(D[x][y]>=157.5,D[x][y]<=180))): q = I[x][y+1] r = I[x][y-1] with hcl.elif_(hcl.and_(22.5 <= D[x][y],D[x][y] < 67.5)): q = I[x+1][y-1] r = I[x-1][y+1] with hcl.elif_(hcl.and_(67.5 <= D[x][y],D[x][y] < 112.5)): q = I[x+1][y] r = I[x-1][y] with hcl.elif_(hcl.and_(112.5 <= D[x][y],D[x][y] < 157.5)): q = I[x-1][y-1] r = I[x+1][y+1] with hcl.if_(hcl.and_(I[x][y]>=q,I[x][y]>=r)): Z[x][y] = I[x][y] with hcl.else_(): Z[x][y] = 0
def solve_Vopt(Vopt, actions, intermeds, trans, interpV, gamma, epsilon, iVals, sVals, bounds, goal, ptsEachDim, count, maxIters, useNN): reSweep = hcl.scalar(1, "reSweep") oldV = hcl.scalar(0, "oldV") newV = hcl.scalar(0, "newV") with hcl.while_(hcl.and_(reSweep[0] == 1, count[0] < maxIters[0])): reSweep[0] = 0 # Perform value iteration by sweeping in direction 1 with hcl.Stage("Sweep_1"): with hcl.for_(0, Vopt.shape[0], name="i") as i: with hcl.for_(0, Vopt.shape[1], name="j") as j: with hcl.for_(0, Vopt.shape[2], name="k") as k: with hcl.for_(0, Vopt.shape[3], name="l") as l: with hcl.for_(0, Vopt.shape[4], name="m") as m: oldV[0] = Vopt[i, j, k, l, m] updateVopt(MDP_object, i, j, k, l, m, iVals, sVals, actions, Vopt, intermeds, trans, interpV, gamma, bounds, goal, ptsEachDim, useNN) newV[0] = Vopt[i, j, k, l, m] evaluateConvergence( newV, oldV, epsilon, reSweep) count[0] += 1 # Perform value iteration by sweeping in direction 2 with hcl.Stage("Sweep_2"): with hcl.if_(useNN[0] == 1): with hcl.for_(1, Vopt.shape[0] + 1, name="i") as i: with hcl.for_(1, Vopt.shape[1] + 1, name="j") as j: with hcl.for_(1, Vopt.shape[2] + 1, name="k") as k: with hcl.for_(0, Vopt.shape[3], name="l") as l: with hcl.for_(0, Vopt.shape[4], name="m") as m: i2 = Vopt.shape[0] - i j2 = Vopt.shape[1] - j k2 = Vopt.shape[2] - k oldV[0] = Vopt[i2, j2, k2, l, m] updateVopt(MDP_object, i2, j2, k2, l, m, iVals, sVals, actions, Vopt, intermeds, trans, interpV, gamma, bounds, goal, ptsEachDim, useNN) newV[0] = Vopt[i2, j2, k2, l, m] evaluateConvergence( newV, oldV, epsilon, reSweep) count[0] += 1 # Perform value iteration by sweeping in direction 3 with hcl.Stage("Sweep_3"): with hcl.if_(useNN[0] == 1): with hcl.for_(1, Vopt.shape[0] + 1, name="i") as i: with hcl.for_(0, Vopt.shape[1], name="j") as j: with hcl.for_(0, Vopt.shape[2], name="k") as k: with hcl.for_(0, Vopt.shape[3], name="l") as l: with hcl.for_(0, Vopt.shape[4], name="m") as m: i2 = Vopt.shape[0] - i oldV[0] = Vopt[i2, j, k, l, m] updateVopt(MDP_object, i2, j, k, l, m, iVals, sVals, actions, Vopt, intermeds, trans, interpV, gamma, bounds, goal, ptsEachDim, useNN) newV[0] = Vopt[i2, j, k, l, m] evaluateConvergence( newV, oldV, epsilon, reSweep) count[0] += 1 # Perform value iteration by sweeping in direction 4 with hcl.Stage("Sweep_4"): with hcl.if_(useNN[0] == 1): with hcl.for_(0, Vopt.shape[0], name="i") as i: with hcl.for_(1, Vopt.shape[1] + 1, name="j") as j: with hcl.for_(0, Vopt.shape[2], name="k") as k: with hcl.for_(0, Vopt.shape[3], name="l") as l: with hcl.for_(0, Vopt.shape[4], name="m") as m: j2 = Vopt.shape[1] - j oldV[0] = Vopt[i, j2, k, l, m] updateVopt(MDP_object, i, j2, k, l, m, iVals, sVals, actions, Vopt, intermeds, trans, interpV, gamma, bounds, goal, ptsEachDim, useNN) newV[0] = Vopt[i, j2, k, l, m] evaluateConvergence( newV, oldV, epsilon, reSweep) count[0] += 1 # Perform value iteration by sweeping in direction 5 with hcl.Stage("Sweep_5"): with hcl.if_(useNN[0] == 1): with hcl.for_(0, Vopt.shape[0], name="i") as i: with hcl.for_(0, Vopt.shape[1], name="j") as j: with hcl.for_(1, Vopt.shape[2] + 1, name="k") as k: with hcl.for_(0, Vopt.shape[3], name="l") as l: with hcl.for_(0, Vopt.shape[4], name="m") as m: k2 = Vopt.shape[2] - k oldV[0] = Vopt[i, j, k2, l, m] updateVopt(MDP_object, i, j, k2, l, m, iVals, sVals, actions, Vopt, intermeds, trans, interpV, gamma, bounds, goal, ptsEachDim, useNN) newV[0] = Vopt[i, j, k2, l, m] evaluateConvergence( newV, oldV, epsilon, reSweep) count[0] += 1 # Perform value iteration by sweeping in direction 6 with hcl.Stage("Sweep_6"): with hcl.if_(useNN[0] == 1): with hcl.for_(1, Vopt.shape[0] + 1, name="i") as i: with hcl.for_(1, Vopt.shape[1] + 1, name="j") as j: with hcl.for_(0, Vopt.shape[2], name="k") as k: with hcl.for_(0, Vopt.shape[3], name="l") as l: with hcl.for_(0, Vopt.shape[4], name="m") as m: i2 = Vopt.shape[0] - i j2 = Vopt.shape[1] - j oldV[0] = Vopt[i2, j2, k, l, m] updateVopt(MDP_object, i2, j2, k, l, m, iVals, sVals, actions, Vopt, intermeds, trans, interpV, gamma, bounds, goal, ptsEachDim, useNN) newV[0] = Vopt[i2, j2, k, l, m] evaluateConvergence( newV, oldV, epsilon, reSweep) count[0] += 1 # Perform value iteration by sweeping in direction 7 with hcl.Stage("Sweep_7"): with hcl.if_(useNN[0] == 1): with hcl.for_(1, Vopt.shape[0] + 1, name="i") as i: with hcl.for_(0, Vopt.shape[1], name="j") as j: with hcl.for_(1, Vopt.shape[2] + 1, name="k") as k: with hcl.for_(0, Vopt.shape[3], name="l") as l: with hcl.for_(0, Vopt.shape[4], name="m") as m: i2 = Vopt.shape[0] - i k2 = Vopt.shape[2] - k oldV[0] = Vopt[i2, j, k2, l, m] updateVopt(MDP_object, i2, j, k2, l, m, iVals, sVals, actions, Vopt, intermeds, trans, interpV, gamma, bounds, goal, ptsEachDim, useNN) newV[0] = Vopt[i2, j, k2, l, m] evaluateConvergence( newV, oldV, epsilon, reSweep) count[0] += 1 # Perform value iteration by sweeping in direction 8 with hcl.Stage("Sweep_8"): with hcl.if_(useNN[0] == 1): with hcl.for_(0, Vopt.shape[0], name="i") as i: with hcl.for_(1, Vopt.shape[1] + 1, name="j") as j: with hcl.for_(1, Vopt.shape[2] + 1, name="k") as k: with hcl.for_(0, Vopt.shape[3], name="l") as l: with hcl.for_(0, Vopt.shape[4], name="m") as m: j2 = Vopt.shape[1] - j k2 = Vopt.shape[2] - k oldV[0] = Vopt[i, j2, k2, l, m] updateVopt(MDP_object, i, j2, k2, l, m, iVals, sVals, actions, Vopt, intermeds, trans, interpV, gamma, bounds, goal, ptsEachDim, useNN) newV[0] = Vopt[i, j2, k2, l, m] evaluateConvergence( newV, oldV, epsilon, reSweep) count[0] += 1
def rasterization(frag_cntr,triangle_2d,fragment): x0 = hcl.compute((1,),lambda x:triangle_2d[0],"x0") y0 = hcl.compute((1,),lambda x:triangle_2d[1],"y0") x1 = hcl.compute((1,),lambda x:triangle_2d[2],"x1") y1 = hcl.compute((1,),lambda x:triangle_2d[3],"y1") x2 = hcl.compute((1,),lambda x:triangle_2d[4],"x2") y2 = hcl.compute((1,),lambda x:triangle_2d[5],"y2") z = hcl.compute((1,),lambda x:triangle_2d[6],"z") # Determine whether three vertices of a trianlge # (x0,y0) (x1,y1) (x2,y2) are in clockwise order by Pineda algorithm # if so, return cw > 0 # else if three points are in line, return cw == 0 # else in counterclockwise order, return cw < 0 cw = hcl.compute((1,),lambda x:(x2[0]-x0[0])*(y1[0]-y0[0])-(y2[0]-y0[0])*(x1[0]-x0[0]),"cw") #frag_cntr counts the pixels with hcl.if_(cw[0] == 0): frag_cntr[0] = 0 with hcl.elif_(cw[0] < 0): tmp_x = hcl.scalar(x0[0]) tmp_y = hcl.scalar(y0[0]) x0[0] = x1[0] y0[0] = y1[0] x1[0] = tmp_x.v y1[0] = tmp_y.v #find min_x,max_x,min_y,max_y in the 2d triangle min_x = hcl.scalar(0) max_x = hcl.scalar(0) min_y = hcl.scalar(0) max_y = hcl.scalar(0) with hcl.if_(x0[0]<x1[0]): with hcl.if_(x2[0]<x0[0]): min_x.v = x2[0] with hcl.else_(): min_x.v = x0[0] with hcl.else_(): with hcl.if_(x2[0]<x1[0]): min_x.v = x2[0] with hcl.else_(): min_x.v = x1[0] with hcl.if_(x0[0]>x1[0]): with hcl.if_(x2[0]>x0[0]): max_x.v = x2[0] with hcl.else_(): max_x.v = x0[0] with hcl.else_(): with hcl.if_(x2[0]>x1[0]): max_x.v = x2[0] with hcl.else_(): max_x.v = x1[0] with hcl.if_(y0[0]<y1[0]): with hcl.if_(y2[0]<y0[0]): min_y.v = y2[0] with hcl.else_(): min_y.v = y0[0] with hcl.else_(): with hcl.if_(y2[0]<y1[0]): min_y.v = y2[0] with hcl.else_(): min_y.v = y1[0] with hcl.if_(y0[0]>y1[0]): with hcl.if_(y2[0]>y0[0]): max_y.v = y2[0] with hcl.else_(): max_y.v = y0[0] with hcl.else_(): with hcl.if_(y2[0]>y1[0]): max_y.v = y2[0] with hcl.else_(): max_y.v = y1[0] color = hcl.scalar(100,"color") # i: size of pixels in the triangle i = hcl.scalar(0,dtype=hcl.Int()) with hcl.Stage("S1"): with hcl.for_(min_y,max_y) as y: with hcl.for_(min_x,max_x) as x: pi0 = hcl.compute((1,),lambda a:(x - x0[0]) * (y1[0] - y0[0]) - (y - y0[0]) * (x1[0] - x0[0])) pi1 = hcl.compute((1,),lambda a:(x - x1[0]) * (y2[0] - y1[0]) - (y - y1[0]) * (x2[0] - x1[0])) pi2 = hcl.compute((1,),lambda a:(x - x2[0]) * (y0[0] - y2[0]) - (y - y2[0]) * (x0[0] - x2[0])) # if pi0, pi1 and pi2 are all non-negative, the pixel is in the triangle with hcl.if_(hcl.and_(pi0 >= 0,pi1 >= 0,pi2 >= 0)): fragment[i][0] = x fragment[i][1] = y fragment[i][2] = z[0] fragment[i][3] = color.v i.v += 1 frag_cntr[0] = i.v
def solve_Qopt(Qopt, actions, intermeds, trans, interpV, gamma, epsilon, iVals, sVals, bounds, goal, ptsEachDim, count, maxIters, useNN, fillVal): reSweep = hcl.scalar(1, "reSweep") oldQ = hcl.scalar(0, "oldV") newQ = hcl.scalar(0, "newV") with hcl.while_(hcl.and_(reSweep[0] == 1, count[0] < maxIters[0])): reSweep[0] = 0 # Perform value iteration by sweeping in direction 1 with hcl.Stage("Sweep_1"): with hcl.for_(0, Qopt.shape[0], name="i") as i: with hcl.for_(0, Qopt.shape[1], name="j") as j: with hcl.for_(0, Qopt.shape[2], name="k") as k: with hcl.for_(0, Qopt.shape[3], name="a") as a: oldQ[0] = Qopt[i, j, k, a] updateQopt(i, j, k, a, iVals, sVals, Qopt, actions, intermeds, trans, interpV, gamma, bounds, goal, ptsEachDim, useNN, fillVal) newQ[0] = Qopt[i, j, k, a] evaluateConvergence(newQ, oldQ, epsilon, reSweep) count[0] += 1 # Perform value iteration by sweeping in direction 2 with hcl.Stage("Sweep_2"): # For all states with hcl.for_(1, Qopt.shape[0] + 1, name="i") as i: with hcl.for_(1, Qopt.shape[1] + 1, name="j") as j: with hcl.for_(1, Qopt.shape[2] + 1, name="k") as k: i2 = Qopt.shape[0] - i j2 = Qopt.shape[1] - j k2 = Qopt.shape[2] - k # For all actions with hcl.for_(0, Qopt.shape[3], name="a") as a: oldQ[0] = Qopt[i2, j2, k2, a] updateQopt(i2, j2, k2, a, iVals, sVals, Qopt, actions, intermeds, trans, interpV, gamma, bounds, goal, ptsEachDim, useNN, fillVal) newQ[0] = Qopt[i2, j2, k2, a] evaluateConvergence(newQ, oldQ, epsilon, reSweep) count[0] += 1 # Perform value iteration by sweeping in direction 3 with hcl.Stage("Sweep_3"): # For all states with hcl.for_(1, Qopt.shape[0] + 1, name="i") as i: with hcl.for_(0, Qopt.shape[1], name="j") as j: with hcl.for_(0, Qopt.shape[2], name="k") as k: i2 = Qopt.shape[0] - i # For all actions with hcl.for_(0, Qopt.shape[3], name="a") as a: oldQ[0] = Qopt[i2, j, k, a] updateQopt(i2, j, k, a, iVals, sVals, Qopt, actions, intermeds, trans, interpV, gamma, bounds, goal, ptsEachDim, useNN, fillVal) newQ[0] = Qopt[i2, j, k, a] evaluateConvergence(newQ, oldQ, epsilon, reSweep) count[0] += 1 # Perform value iteration by sweeping in direction 4 with hcl.Stage("Sweep_4"): # For all states with hcl.for_(0, Qopt.shape[0], name="i") as i: with hcl.for_(1, Qopt.shape[1] + 1, name="j") as j: with hcl.for_(0, Qopt.shape[2], name="k") as k: j2 = Qopt.shape[1] - j # For all actions with hcl.for_(0, Qopt.shape[3], name="a") as a: oldQ[0] = Qopt[i, j2, k, a] updateQopt(i, j2, k, a, iVals, sVals, Qopt, actions, intermeds, trans, interpV, gamma, bounds, goal, ptsEachDim, useNN, fillVal) newQ[0] = Qopt[i, j2, k, a] evaluateConvergence(newQ, oldQ, epsilon, reSweep) count[0] += 1 # Perform value iteration by sweeping in direction 5 with hcl.Stage("Sweep_5"): # For all states with hcl.for_(0, Qopt.shape[0], name="i") as i: with hcl.for_(0, Qopt.shape[1], name="j") as j: with hcl.for_(1, Qopt.shape[2] + 1, name="k") as k: k2 = Qopt.shape[2] - k # For all actions with hcl.for_(0, Qopt.shape[3], name="a") as a: oldQ[0] = Qopt[i, j, k2, a] updateQopt(i, j, k2, a, iVals, sVals, Qopt, actions, intermeds, trans, interpV, gamma, bounds, goal, ptsEachDim, useNN, fillVal) newQ[0] = Qopt[i, j, k2, a] evaluateConvergence(newQ, oldQ, epsilon, reSweep) count[0] += 1 # Perform value iteration by sweeping in direction 6 with hcl.Stage("Sweep_6"): # For all states with hcl.for_(1, Qopt.shape[0] + 1, name="i") as i: with hcl.for_(1, Qopt.shape[1] + 1, name="j") as j: with hcl.for_(0, Qopt.shape[2], name="k") as k: i2 = Qopt.shape[0] - i j2 = Qopt.shape[1] - j # For all actions with hcl.for_(0, Qopt.shape[3], name="a") as a: oldQ[0] = Qopt[i2, j2, k, a] updateQopt(i2, j2, k, a, iVals, sVals, Qopt, actions, intermeds, trans, interpV, gamma, bounds, goal, ptsEachDim, useNN, fillVal) newQ[0] = Qopt[i2, j2, k, a] evaluateConvergence(newQ, oldQ, epsilon, reSweep) count[0] += 1 # Perform value iteration by sweeping in direction 7 with hcl.Stage("Sweep_7"): # For all states with hcl.for_(1, Qopt.shape[0] + 1, name="i") as i: with hcl.for_(0, Qopt.shape[1], name="j") as j: with hcl.for_(1, Qopt.shape[2] + 1, name="k") as k: i2 = Qopt.shape[0] - i k2 = Qopt.shape[2] - k # For all actions with hcl.for_(0, Qopt.shape[3], name="a") as a: oldQ[0] = Qopt[i2, j, k2, a] updateQopt(i2, j, k2, a, iVals, sVals, Qopt, actions, intermeds, trans, interpV, gamma, bounds, goal, ptsEachDim, useNN, fillVal) newQ[0] = Qopt[i2, j, k2, a] evaluateConvergence(newQ, oldQ, epsilon, reSweep) count[0] += 1 # Perform value iteration by sweeping in direction 8 with hcl.Stage("Sweep_8"): # For all states with hcl.for_(0, Qopt.shape[0], name="i") as i: with hcl.for_(1, Qopt.shape[1] + 1, name="j") as j: with hcl.for_(1, Qopt.shape[2] + 1, name="k") as k: j2 = Qopt.shape[1] - j k2 = Qopt.shape[2] - k # For all actions with hcl.for_(0, Qopt.shape[3], name="a") as a: oldQ[0] = Qopt[i, j2, k2, a] updateQopt(i, j2, k2, a, iVals, sVals, Qopt, actions, intermeds, trans, interpV, gamma, bounds, goal, ptsEachDim, useNN, fillVal) newQ[0] = Qopt[i, j2, k2, a] evaluateConvergence(newQ, oldQ, epsilon, reSweep) count[0] += 1
def stateToIndexInterpolants(Qopt, sVals, actions, bounds, ptsEachDim, interpV, fillVal): iMin = hcl.scalar(0, "iMin") jMin = hcl.scalar(0, "jMin") kMin = hcl.scalar(0, "kMin") iMax = hcl.scalar(0, "iMax") jMax = hcl.scalar(0, "jMax") kMax = hcl.scalar(0, "kMax") c000 = hcl.scalar(fillVal[0], "c000") c001 = hcl.scalar(fillVal[0], "c001") c010 = hcl.scalar(fillVal[0], "c010") c011 = hcl.scalar(fillVal[0], "c011") c100 = hcl.scalar(fillVal[0], "c100") c101 = hcl.scalar(fillVal[0], "c101") c110 = hcl.scalar(fillVal[0], "c110") c111 = hcl.scalar(fillVal[0], "c111") c00 = hcl.scalar(0, "c00") c01 = hcl.scalar(0, "c01") c10 = hcl.scalar(0, "c10") c11 = hcl.scalar(0, "c11") c0 = hcl.scalar(0, "c0") c1 = hcl.scalar(0, "c1") ia = hcl.scalar(0, "ia") ja = hcl.scalar(0, "ja") ka = hcl.scalar(0, "ka") di = hcl.scalar(0, "di") dj = hcl.scalar(0, "dj") dk = hcl.scalar(0, "dk") # obtain unrounded index values ia[0] = ((sVals[0] - bounds[0, 0]) / (bounds[0, 1] - bounds[0, 0])) * (ptsEachDim[0] - 1) ja[0] = ((sVals[1] - bounds[1, 0]) / (bounds[1, 1] - bounds[1, 0])) * (ptsEachDim[1] - 1) ka[0] = ((sVals[2] - bounds[2, 0]) / (bounds[2, 1] - bounds[2, 0])) * (ptsEachDim[2] - 1) # obtain neighbouring state indeces in each direction with hcl.if_(ia[0] < 0): iMin[0] = hcl.cast(hcl.Int(), ia[0] - 1.0) iMax[0] = hcl.cast(hcl.Int(), ia[0]) with hcl.else_(): iMin[0] = hcl.cast(hcl.Int(), ia[0]) iMax[0] = hcl.cast(hcl.Int(), ia[0] + 1.0) with hcl.if_(ja[0] < 0): jMin[0] = hcl.cast(hcl.Int(), ja[0] - 1.0) jMax[0] = hcl.cast(hcl.Int(), ja[0]) with hcl.else_(): jMin[0] = hcl.cast(hcl.Int(), ja[0]) jMax[0] = hcl.cast(hcl.Int(), ja[0] + 1.0) with hcl.if_(ka[0] < 0): kMin[0] = hcl.cast(hcl.Int(), ka[0] - 1.0) kMax[0] = hcl.cast(hcl.Int(), ka[0]) with hcl.else_(): kMin[0] = hcl.cast(hcl.Int(), ka[0]) kMax[0] = hcl.cast(hcl.Int(), ka[0] + 1.0) # obtain weights in each direction di[0] = ia[0] - iMin[0] dj[0] = ja[0] - jMin[0] dk[0] = ka[0] - kMin[0] # Obtain value of each neighbour state # Qopt[iMin, jMin, kMin] with hcl.if_( hcl.and_(iMin[0] < Qopt.shape[0], jMin[0] < Qopt.shape[1], kMin[0] < Qopt.shape[2])): with hcl.if_(hcl.and_(iMin[0] >= 0, jMin[0] >= 0, kMin[0] >= 0)): with hcl.for_(0, actions.shape[0], name="a_") as a_: with hcl.if_(c000[0] < Qopt[iMin[0], jMin[0], kMin[0], a_]): c000[0] = Qopt[iMin[0], jMin[0], kMin[0], a_] # Qopt[iMin, jMin, kMax] with hcl.if_( hcl.and_(iMin[0] < Qopt.shape[0], jMin[0] < Qopt.shape[1], kMax[0] < Qopt.shape[2])): with hcl.if_(hcl.and_(iMin[0] >= 0, jMin[0] >= 0, kMax[0] >= 0)): with hcl.for_(0, actions.shape[0], name="a_") as a_: with hcl.if_(c001[0] < Qopt[iMin[0], jMin[0], kMax[0], a_]): c001[0] = Qopt[iMin[0], jMin[0], kMax[0], a_] # Qopt[iMin, jMax, kMin] with hcl.if_( hcl.and_(iMin[0] < Qopt.shape[0], jMax[0] < Qopt.shape[1], kMin[0] < Qopt.shape[2])): with hcl.if_(hcl.and_(iMin[0] >= 0, jMax[0] >= 0, kMin[0] >= 0)): with hcl.for_(0, actions.shape[0], name="a_") as a_: with hcl.if_(c010[0] < Qopt[iMin[0], jMax[0], kMin[0], a_]): c010[0] = Qopt[iMin[0], jMax[0], kMin[0], a_] # Qopt[iMin, jMax, kMax] with hcl.if_( hcl.and_(iMin[0] < Qopt.shape[0], jMax[0] < Qopt.shape[1], kMax[0] < Qopt.shape[2])): with hcl.if_(hcl.and_(iMin[0] >= 0, jMax[0] >= 0, kMax[0] >= 0)): with hcl.for_(0, actions.shape[0], name="a_") as a_: with hcl.if_(c011[0] < Qopt[iMin[0], jMax[0], kMax[0], a_]): c011[0] = Qopt[iMin[0], jMax[0], kMax[0], a_] # Qopt[iMax, jMin, kMin] with hcl.if_( hcl.and_(iMax[0] < Qopt.shape[0], jMin[0] < Qopt.shape[1], kMin[0] < Qopt.shape[2])): with hcl.if_(hcl.and_(iMax[0] >= 0, jMin[0] >= 0, kMin[0] >= 0)): with hcl.for_(0, actions.shape[0], name="a_") as a_: with hcl.if_(c100[0] < Qopt[iMax[0], jMin[0], kMin[0], a_]): c100[0] = Qopt[iMax[0], jMin[0], kMin[0], a_] # Qopt[iMax, jMin, kMax] with hcl.if_( hcl.and_(iMax[0] < Qopt.shape[0], jMin[0] < Qopt.shape[1], kMax[0] < Qopt.shape[2])): with hcl.if_(hcl.and_(iMax[0] >= 0, jMin[0] >= 0, kMax[0] >= 0)): with hcl.for_(0, actions.shape[0], name="a_") as a_: with hcl.if_(c101[0] < Qopt[iMax[0], jMin[0], kMax[0], a_]): c101[0] = Qopt[iMax[0], jMin[0], kMax[0], a_] # Qopt[iMax, jMax, kMin] with hcl.if_( hcl.and_(iMax[0] < Qopt.shape[0], jMax[0] < Qopt.shape[1], kMin[0] < Qopt.shape[2])): with hcl.if_(hcl.and_(iMax[0] >= 0, jMax[0] >= 0, kMin[0] >= 0)): with hcl.for_(0, actions.shape[0], name="a_") as a_: with hcl.if_(c110[0] < Qopt[iMax[0], jMax[0], kMin[0], a_]): c110[0] = Qopt[iMax[0], jMax[0], kMin[0], a_] # Qopt[iMax, jMax, kMax] with hcl.if_( hcl.and_(iMax[0] < Qopt.shape[0], jMax[0] < Qopt.shape[1], kMax[0] < Qopt.shape[2])): with hcl.if_(hcl.and_(iMax[0] >= 0, jMax[0] >= 0, kMax[0] >= 0)): with hcl.for_(0, actions.shape[0], name="a_") as a_: with hcl.if_(c111[0] < Qopt[iMax[0], jMax[0], kMax[0], a_]): c111[0] = Qopt[iMax[0], jMax[0], kMax[0], a_] # perform linear interpolation c00[0] = (c000[0] * (1 - di[0])) + (c100[0] * di[0]) c01[0] = (c001[0] * (1 - di[0])) + (c101[0] * di[0]) c10[0] = (c010[0] * (1 - di[0])) + (c110[0] * di[0]) c11[0] = (c011[0] * (1 - di[0])) + (c111[0] * di[0]) c0[0] = (c00[0] * (1 - dj[0])) + (c10[0] * dj[0]) c1[0] = (c01[0] * (1 - dj[0])) + (c11[0] * dj[0]) interpV[0] = (c0[0] * (1 - dk[0])) + (c1[0] * dk[0])
def top(input, ): final_total_extent_1 = ( hcl.cast(dtype=hcl.Int(bits=64), expr=final_extent_1) * hcl.cast(dtype=hcl.Int(bits=64), expr=final_extent_0)) padded16 = hcl.compute(((final_extent_0 + 6), (final_extent_1 + 6)), lambda x, y: 0, name="padded16", dtype=hcl.Int(bits=16)) with hcl.Stage("padded16"): with hcl.for_(final_min_1, (final_extent_1 + 6), name="padded16_s0_y") as padded16_s0_y: with hcl.for_(final_min_0, (final_extent_0 + 6), name="padded16_s0_x") as padded16_s0_x: padded16[padded16_s0_x, padded16_s0_y] = hcl.cast(dtype=hcl.Int(bits=16), expr=input[padded16_s0_x, padded16_s0_y]) grad_x = hcl.compute(((final_extent_0 + 4), (final_extent_1 + 4)), lambda x, y: 0, name="grad_x", dtype=hcl.Int(bits=16)) with hcl.Stage("grad_x"): with hcl.for_(final_min_1, (final_extent_1 + 4), name="grad_x_s0_y") as grad_x_s0_y: with hcl.for_(final_min_0, (final_extent_0 + 4), name="grad_x_s0_x") as grad_x_s0_x: grad_x[grad_x_s0_x, grad_x_s0_y] = ( padded16[(grad_x_s0_x + 2), (grad_x_s0_y + 2)] + (((padded16[(grad_x_s0_x + 2), (grad_x_s0_y + 1)] * hcl.cast(dtype=hcl.Int(bits=16), expr=2)) + ((padded16[(grad_x_s0_x + 2), grad_x_s0_y] - padded16[grad_x_s0_x, grad_x_s0_y]) - (padded16[grad_x_s0_x, (grad_x_s0_y + 1)] * hcl.cast(dtype=hcl.Int(bits=16), expr=2)))) - padded16[grad_x_s0_x, (grad_x_s0_y + 2)])) grad_xx = hcl.compute(((final_extent_0 + 4), (final_extent_1 + 4)), lambda x, y: 0, name="grad_xx", dtype=hcl.Int(bits=32)) with hcl.Stage("grad_xx"): with hcl.for_(final_min_1, (final_extent_1 + 4), name="grad_xx_s0_y") as grad_xx_s0_y: with hcl.for_(final_min_0, (final_extent_0 + 4), name="grad_xx_s0_x") as grad_xx_s0_x: t30_s = grad_x[grad_xx_s0_x, grad_xx_s0_y] grad_xx[grad_xx_s0_x, grad_xx_s0_y] = ( hcl.cast(dtype=hcl.Int(bits=32), expr=t30_s) * hcl.cast(dtype=hcl.Int(bits=32), expr=t30_s)) grad_gx = hcl.compute(((final_extent_0 + 2), (final_extent_1 + 2)), lambda x, y: 0, name="grad_gx", dtype=hcl.Int(bits=32)) with hcl.Stage("grad_gx"): with hcl.for_(final_min_1, (final_extent_1 + 2), name="grad_gx_s0_y") as grad_gx_s0_y: with hcl.for_(final_min_0, (final_extent_0 + 2), name="grad_gx_s0_x") as grad_gx_s0_x: grad_gx[grad_gx_s0_x, grad_gx_s0_y] = 0 with hcl.for_(final_min_1, (final_extent_1 + 2), name="grad_gx_s1_y") as grad_gx_s1_y: with hcl.for_(final_min_0, (final_extent_0 + 2), name="grad_gx_s1_x") as grad_gx_s1_x: with hcl.for_(0, 3, name="grad_gx_s1_box__y") as grad_gx_s1_box__y: with hcl.for_( 0, 3, name="grad_gx_s1_box__x") as grad_gx_s1_box__x: grad_gx[grad_gx_s1_x, grad_gx_s1_y] = ( grad_gx[grad_gx_s1_x, grad_gx_s1_y] + grad_xx[(grad_gx_s1_box__x + grad_gx_s1_x), (grad_gx_s1_box__y + grad_gx_s1_y)]) grad_y = hcl.compute(((final_extent_0 + 4), (final_extent_1 + 4)), lambda x, y: 0, name="grad_y", dtype=hcl.Int(bits=16)) with hcl.Stage("grad_y"): with hcl.for_(final_min_1, (final_extent_1 + 4), name="grad_y_s0_y") as grad_y_s0_y: with hcl.for_(final_min_0, (final_extent_0 + 4), name="grad_y_s0_x") as grad_y_s0_x: grad_y[grad_y_s0_x, grad_y_s0_y] = ( (padded16[(grad_y_s0_x + 2), (grad_y_s0_y + 2)] + (((padded16[(grad_y_s0_x + 1), (grad_y_s0_y + 2)] * hcl.cast(dtype=hcl.Int(bits=16), expr=2)) + (padded16[grad_y_s0_x, (grad_y_s0_y + 2)] - padded16[grad_y_s0_x, grad_y_s0_y])) - (padded16[(grad_y_s0_x + 1), grad_y_s0_y] * hcl.cast(dtype=hcl.Int(bits=16), expr=2)))) - padded16[(grad_y_s0_x + 2), grad_y_s0_y]) grad_xy = hcl.compute(((final_extent_0 + 4), (final_extent_1 + 4)), lambda x, y: 0, name="grad_xy", dtype=hcl.Int(bits=32)) with hcl.Stage("grad_xy"): with hcl.for_(final_min_1, (final_extent_1 + 4), name="grad_xy_s0_y") as grad_xy_s0_y: with hcl.for_(final_min_0, (final_extent_0 + 4), name="grad_xy_s0_x") as grad_xy_s0_x: grad_xy[grad_xy_s0_x, grad_xy_s0_y] = ( hcl.cast(dtype=hcl.Int(bits=32), expr=grad_x[grad_xy_s0_x, grad_xy_s0_y]) * hcl.cast(dtype=hcl.Int(bits=32), expr=grad_y[grad_xy_s0_x, grad_xy_s0_y])) grad_gxy = hcl.compute(((final_extent_0 + 2), (final_extent_1 + 2)), lambda x, y: 0, name="grad_gxy", dtype=hcl.Int(bits=32)) with hcl.Stage("grad_gxy"): with hcl.for_(final_min_1, (final_extent_1 + 2), name="grad_gxy_s0_y") as grad_gxy_s0_y: with hcl.for_(final_min_0, (final_extent_0 + 2), name="grad_gxy_s0_x") as grad_gxy_s0_x: grad_gxy[grad_gxy_s0_x, grad_gxy_s0_y] = 0 with hcl.for_(final_min_1, (final_extent_1 + 2), name="grad_gxy_s1_y") as grad_gxy_s1_y: with hcl.for_(final_min_0, (final_extent_0 + 2), name="grad_gxy_s1_x") as grad_gxy_s1_x: with hcl.for_(0, 3, name="grad_gxy_s1_box__y") as grad_gxy_s1_box__y: with hcl.for_( 0, 3, name="grad_gxy_s1_box__x") as grad_gxy_s1_box__x: grad_gxy[grad_gxy_s1_x, grad_gxy_s1_y] = ( grad_gxy[grad_gxy_s1_x, grad_gxy_s1_y] + grad_xy[(grad_gxy_s1_box__x + grad_gxy_s1_x), (grad_gxy_s1_box__y + grad_gxy_s1_y)]) grad_yy = hcl.compute(((final_extent_0 + 4), (final_extent_1 + 4)), lambda x, y: 0, name="grad_yy", dtype=hcl.Int(bits=32)) with hcl.Stage("grad_yy"): with hcl.for_(final_min_1, (final_extent_1 + 4), name="grad_yy_s0_y") as grad_yy_s0_y: with hcl.for_(final_min_0, (final_extent_0 + 4), name="grad_yy_s0_x") as grad_yy_s0_x: t31_s = grad_y[grad_yy_s0_x, grad_yy_s0_y] grad_yy[grad_yy_s0_x, grad_yy_s0_y] = ( hcl.cast(dtype=hcl.Int(bits=32), expr=t31_s) * hcl.cast(dtype=hcl.Int(bits=32), expr=t31_s)) grad_gy = hcl.compute(((final_extent_0 + 2), (final_extent_1 + 2)), lambda x, y: 0, name="grad_gy", dtype=hcl.Int(bits=32)) with hcl.Stage("grad_gy"): with hcl.for_(final_min_1, (final_extent_1 + 2), name="grad_gy_s0_y") as grad_gy_s0_y: with hcl.for_(final_min_0, (final_extent_0 + 2), name="grad_gy_s0_x") as grad_gy_s0_x: grad_gy[grad_gy_s0_x, grad_gy_s0_y] = 0 with hcl.for_(final_min_1, (final_extent_1 + 2), name="grad_gy_s1_y") as grad_gy_s1_y: with hcl.for_(final_min_0, (final_extent_0 + 2), name="grad_gy_s1_x") as grad_gy_s1_x: with hcl.for_(0, 3, name="grad_gy_s1_box__y") as grad_gy_s1_box__y: with hcl.for_( 0, 3, name="grad_gy_s1_box__x") as grad_gy_s1_box__x: grad_gy[grad_gy_s1_x, grad_gy_s1_y] = ( grad_gy[grad_gy_s1_x, grad_gy_s1_y] + grad_yy[(grad_gy_s1_box__x + grad_gy_s1_x), (grad_gy_s1_box__y + grad_gy_s1_y)]) cim = hcl.compute(((final_extent_0 + 2), (final_extent_1 + 2)), lambda x, y: 0, name="cim", dtype=hcl.Float(bits=32)) with hcl.Stage("cim"): with hcl.for_(final_min_1, (final_extent_1 + 2), name="cim_s0_y") as cim_s0_y: with hcl.for_(final_min_0, (final_extent_0 + 2), name="cim_s0_x") as cim_s0_x: t32 = grad_gx[cim_s0_x, cim_s0_y] t33 = grad_gy[cim_s0_x, cim_s0_y] t34 = grad_gxy[cim_s0_x, cim_s0_y] t35 = (hcl.cast(dtype=hcl.Float(bits=32), expr=(t32 // 144)) + hcl.cast(dtype=hcl.Float(bits=32), expr=(t33 // 144))) cim[cim_s0_x, cim_s0_y] = ( ((hcl.cast(dtype=hcl.Float(bits=32), expr=(t32 // 144)) * hcl.cast(dtype=hcl.Float(bits=32), expr=(t33 // 144))) - (hcl.cast(dtype=hcl.Float(bits=32), expr=(t34 // 144)) * hcl.cast(dtype=hcl.Float(bits=32), expr=(t34 // 144)))) - ((t35 * t35) * hcl.cast(dtype=hcl.Float(bits=32), expr=0.040000))) output_final = hcl.compute((final_extent_0, final_extent_1), lambda x, y: 0, name="output_final", dtype=hcl.UInt(bits=16)) with hcl.Stage("output_final"): with hcl.for_(final_min_1, final_extent_1, name="output_final_s0_y") as output_final_s0_y: with hcl.for_(final_min_0, final_extent_0, name="output_final_s0_x") as output_final_s0_x: t36 = cim[(output_final_s0_x + 1), (output_final_s0_y + 1)] output_final[output_final_s0_x, output_final_s0_y] = hcl.select( hcl.and_( (hcl.cast(dtype=hcl.Float(bits=32), expr=100.000000) <= t36), (hcl.select( cim[(output_final_s0_x + 2), (output_final_s0_y + 2)] > hcl.select( cim[(output_final_s0_x + 1), (output_final_s0_y + 2)] > hcl.select( cim[output_final_s0_x, (output_final_s0_y + 2)] > hcl.select( cim[(output_final_s0_x + 2), (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y]), cim[output_final_s0_x, (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y])), cim[(output_final_s0_x + 2), (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y]), cim[output_final_s0_x, (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y]))), cim[output_final_s0_x, (output_final_s0_y + 2)], hcl.select( cim[(output_final_s0_x + 2), (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y]), cim[output_final_s0_x, (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y])), cim[(output_final_s0_x + 2), (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y]), cim[output_final_s0_x, (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y])))), cim[(output_final_s0_x + 1), (output_final_s0_y + 2)], hcl.select( cim[output_final_s0_x, (output_final_s0_y + 2)] > hcl.select( cim[(output_final_s0_x + 2), (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y]), cim[output_final_s0_x, (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y])), cim[(output_final_s0_x + 2), (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y]), cim[output_final_s0_x, (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y]))), cim[output_final_s0_x, (output_final_s0_y + 2)], hcl.select( cim[(output_final_s0_x + 2), (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y]), cim[output_final_s0_x, (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y])), cim[(output_final_s0_x + 2), (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y]), cim[output_final_s0_x, (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y]))))), cim[(output_final_s0_x + 2), (output_final_s0_y + 2)], hcl.select( cim[(output_final_s0_x + 1), (output_final_s0_y + 2)] > hcl.select( cim[output_final_s0_x, (output_final_s0_y + 2)] > hcl.select( cim[(output_final_s0_x + 2), (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y]), cim[output_final_s0_x, (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y])), cim[(output_final_s0_x + 2), (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y]), cim[output_final_s0_x, (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y]))), cim[output_final_s0_x, (output_final_s0_y + 2)], hcl.select( cim[(output_final_s0_x + 2), (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y]), cim[output_final_s0_x, (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y])), cim[(output_final_s0_x + 2), (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y]), cim[output_final_s0_x, (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y])))), cim[(output_final_s0_x + 1), (output_final_s0_y + 2)], hcl.select( cim[output_final_s0_x, (output_final_s0_y + 2)] > hcl.select( cim[(output_final_s0_x + 2), (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y]), cim[output_final_s0_x, (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y])), cim[(output_final_s0_x + 2), (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y]), cim[output_final_s0_x, (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y]))), cim[output_final_s0_x, (output_final_s0_y + 2)], hcl.select( cim[(output_final_s0_x + 2), (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y]), cim[output_final_s0_x, (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y])), cim[(output_final_s0_x + 2), (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y]), cim[output_final_s0_x, (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y])))))) < t36)), hcl.cast(dtype=hcl.UInt(bits=16), expr=255), hcl.cast(dtype=hcl.UInt(bits=16), expr=0)) final = hcl.compute((2442, 3258), lambda x, y: 0, name="final", dtype=hcl.UInt(bits=16)) with hcl.Stage("final"): with hcl.for_(final_min_1, final_extent_1, name="final_s0_y") as final_s0_y: with hcl.for_(final_min_0, final_extent_0, name="final_s0_x") as final_s0_x: final[final_s0_x, final_s0_y] = output_final[final_s0_x, final_s0_y] return final