def kernel(A, B): def foo(x): with hcl.for_(0, 10) as y: with hcl.if_(A[x][y] > 5): B[x] += 1 hcl.mutate((10, ), foo)
def non_max_suppression(image, angle, Z): def loop_body(x, y): q = 255 r = 255 c1 = hcl.and_((0 <= angle[x][y]), (angle[x][y] < 22.5)) c2 = hcl.and_((157.5 <= angle[x][y]), (angle[x][y] <= 180)) c3 = hcl.and_((22.5 <= angle[x][y]), (angle[x][y] < 67.5)) c4 = hcl.and_((67.5 <= angle[x][y]), (angle[x][y] < 112.5)) c5 = hcl.and_((112.5 <= angle[x][y]), (angle[x][y] < 157.5)) #angle 0 with hcl.if_(hcl.or_(c1, c2)): q = image[x][y + 1] r = image[x][y - 1] #angle 45 with hcl.elif_(c3): q = image[x + 1][y - 1] r = image[x - 1][y + 1] #angle 90 with hcl.elif_(c4): q = image[x + 1][y] r = image[x - 1, ][y] #angle 135 with hcl.elif_(c5): q = image[x - 1, y - 1] r = image[x + 1, y + 1] with hcl.if_(hcl.and_((image[x, y] >= q), (image[x, y] >= r))): Z[x][y] = image[x][y] with hcl.else_(): Z[x][y] = 0 hcl.mutate(Z.shape, lambda x, y: loop_body(x, y), "M")
def non_max_sup(I, theta, Z): D = hcl.compute((height, width), lambda x,y: theta[x][y]*180/np.pi, "D") def loop_body(x, y): q = 255 r = 255 with hcl.if_(D[x][y] < 0): D[x][y] = D[x][y]+180 with hcl.if_(hcl.or_(hcl.and_(D[x][y]>=0,D[x][y]<22.5),hcl.and_(D[x][y]>=157.5,D[x][y]<=180))): q = I[x][y+1] r = I[x][y-1] with hcl.elif_(hcl.and_(22.5 <= D[x][y],D[x][y] < 67.5)): q = I[x+1][y-1] r = I[x-1][y+1] with hcl.elif_(hcl.and_(67.5 <= D[x][y],D[x][y] < 112.5)): q = I[x+1][y] r = I[x-1][y] with hcl.elif_(hcl.and_(112.5 <= D[x][y],D[x][y] < 157.5)): q = I[x-1][y-1] r = I[x+1][y+1] with hcl.if_(hcl.and_(I[x][y]>=q,I[x][y]>=r)): Z[x][y] = I[x][y] with hcl.else_(): Z[x][y] = 0 hcl.mutate(Z.shape, lambda x,y: loop_body(x,y))
def kmeans(points, means): def loop_kernel(labels): # assign cluster with hcl.for_(0, N, name="N") as n: min_dist = hcl.scalar(100000) with hcl.for_(0, K) as k: dist = hcl.scalar(0) with hcl.for_(0, dim) as d: dist_ = points[n, d] - means[k, d] dist.v += dist_ * dist_ with hcl.if_(dist.v < min_dist.v): min_dist.v = dist.v labels[n] = k # update mean num_k = hcl.compute((K, ), lambda x: 0) sum_k = hcl.compute((K, dim), lambda x, y: 0) def calc_sum(n): num_k[labels[n]] += 1 with hcl.for_(0, dim) as d: sum_k[labels[n], d] += points[n, d] hcl.mutate((N, ), lambda n: calc_sum(n), "calc_sum") hcl.update(means, lambda k, d: sum_k[k, d] // num_k[k], "update_mean") labels = hcl.compute((N, ), lambda x: 0) hcl.mutate((niter, ), lambda _: loop_kernel(labels), "main_loop") return labels
def loop_kernel(labels): # assign cluster with hcl.for_(0, N, name="n") as n: min_dist = hcl.scalar(100000) new_label = hcl.scalar(labels[n]) with hcl.for_(0, K) as k: dist = hcl.scalar(0) with hcl.for_(0, dim) as d: dist_ = hcl.scalar(points[n, d] - means[k, d], "temp") dist.v += dist_.v * dist_.v with hcl.if_(dist.v < min_dist.v): min_dist.v = dist.v new_label[0] = k labels[n] = new_label # update mean num_k = hcl.compute((K, ), lambda x: 0, "num_k") sum_k = hcl.compute((K, dim), lambda x, y: 0, "sum_k") def calc_sum(n): num_k[labels[n]] += 1 with hcl.for_(0, dim) as d: sum_k[labels[n], d] += points[n, d] hcl.mutate((N, ), lambda n: calc_sum(n), "calc_sum") hcl.update(means, lambda k, d: sum_k[k, d] // num_k[k], "update_mean")
def rendering(triangle_3ds,angle): z_buffer = hcl.compute((MAX_X,MAX_Y),lambda x,y:255,"z_buffer") frame_buffer = hcl.compute((MAX_X,MAX_Y), lambda x,y:0, "frame_buffer") def loop_body(m): triangle_3d = hcl.compute((9,),lambda x:triangle_3ds[m][x],"triangle_3d_") fragment = hcl.compute((500,4),lambda x,y:0, "fragment") pixels = hcl.compute((500,3),lambda x,y:0, "pixels") triangle_2d = hcl.compute((7,),lambda x:0,"triangle_2d") frag_cntr = hcl.compute((1,),lambda x:0,"frag_cntr") size_pixels = hcl.compute((1,),lambda x:0,"size_pixels") # 1st Stage Projection hcl.mutate((7,),lambda x: projection(triangle_3d,triangle_2d,x),"twod_update") # 2nd Stage Rasterization:update fragment hcl.mutate((1,),lambda x:rasterization(frag_cntr,triangle_2d,fragment)) # 3rd Stage Z-culling:update z_buffer,pixels hcl.mutate((1,),lambda x: zculling(size_pixels,frag_cntr[0],fragment,z_buffer,pixels)) # coloring frame buffer hcl.mutate((size_pixels[0],), lambda x: coloringFB(x,pixels,frame_buffer)) hcl.mutate((num_3d_triangles,), lambda m: loop_body(m),"main_body") return frame_buffer
def update(l, prototype, prototypeCounter, max): hcl.print((l + 1), "%d:Use hard examples to update the prototype counters.\n") ###data preparation distance = hcl.compute((hdTrainData.shape[1], ), lambda x: 0, 'distance') hamming_dist = hcl.compute((numClasses, ), lambda x: 0, "hamming_dist") m = hcl.reduce_axis(0, hdTrainData.shape[1], "m") ### with hcl.for_(0, hdTrainData.shape[0]) as i: with hcl.for_(0, numClasses) as n: #Do hdc multiplication(XOR) on sample[i]'s hdv and prototype[n]'s hdv (elementwise on the high-bit data) hcl.update(distance, lambda x: hdTrainData[i][x] ^ prototype[n][x]) #Calculate the hamming distance of the two vectors by adding 1s hamming_dist[n] = hcl.sum(distance[m], axis=m) #Find the one having the least hamming distance and choose it's label as the predicted label pred = hcl.scalar(0, 'pred') with hcl.for_(0, hamming_dist.shape[0]) as j: with hcl.if_(hamming_dist[j] < hamming_dist[pred]): pred.v = j #Adjust the proto vectors by adding the sample vector on its label proto hdv and substrct it on its predicted proto hdv with hcl.if_(pred.v != trainLabels[i]): max[trainLabels[i]] += 1 max[pred] -= 1 with hcl.for_(0, hdTrainData.shape[1]) as m: prototypeCounter[trainLabels[i]][m] += hdTrainData[i][m] prototypeCounter[pred][m] -= hdTrainData[i][m] with hcl.if_(max[trainLabels[i]] % 2 == 0): with hcl.if_(prototypeCounter[trainLabels[i]][m] - max[trainLabels[i]] / 2 == 0): prototype[trainLabels[i]][m] &= 1 with hcl.else_(): prototype[trainLabels[i]][m] = hcl.select( prototypeCounter[trainLabels[i]][m] - max[trainLabels[i]] / 2 > 0, 1, 0) with hcl.if_(max[pred] % 2 == 0): with hcl.if_(prototypeCounter[pred][m] - max[pred] / 2 == 0): prototype[pred][m] &= 1 with hcl.else_(): prototype[pred][m] = hcl.select( prototypeCounter[pred][m] - max[pred] / 2 > 0, 1, 0) #print the accuracy hcl.mutate( (1, ), lambda x: test_hdc_accu(prototype, hdTrainData, trainLabels, 1), 'training_update') hcl.mutate( (1, ), lambda x: test_hdc_accu(prototype, hdTestData, testLabels, 2), 'testing_update')
def find_max_two(A, M): def loop_body(x): with hcl.if_(A[x] > M[0]): with hcl.if_(A[x] > M[1]): M[0] = M[1] M[1] = A[x] with hcl.else_(): M[0] = A[x] hcl.mutate(A.shape, lambda x: loop_body(x))
def update(l, prototype, prototypeCounter, max): hcl.print((l+1),"%d:Use hard examples to update the prototype counters.\n") ###data preparation distance = hcl.compute((in_train.shape[1],), lambda x: 0, 'distance', dtype=hcl.UInt(in_bw)) pre_dist = hcl.compute((in_train.shape[1],), lambda x: 0, "pre_dist") hamming_dist = hcl.compute((numClasses,), lambda x: 0, "hamming_dist") m = hcl.reduce_axis(0, in_train.shape[1], "m") ### with hcl.for_(0, in_train.shape[0]) as i: hcl.print((i),"%d suc\n") # pack_proto = hcl.pack(prototype, axis=1, dtype=hcl.UInt(in_bw), name="pack_proto") with hcl.for_(0, numClasses) as n: #Do hdc multiplication(XOR) on sample[i]'s hdv and prototype[n]'s hdv (elementwise on the high-bit data) hcl.update(distance, lambda x: in_train[i][x] ^ prototype[n][x]) #Calculate the hamming distance of the two vectors by adding 1s hcl.update(pre_dist, lambda x: popcount(distance[x])) hcl.print((),"sum of 1s suc") hamming_dist[n] = hcl.sum(pre_dist[m], axis=m) #Find the one having the least hamming distance and choose it's label as the predicted label pred = hcl.scalar(0, 'pred') with hcl.for_(0, hamming_dist.shape[0]) as j: with hcl.if_(hamming_dist[j] < hamming_dist[pred]): pred.v = j #Adjust the proto vectors by adding the sample vector on its label proto hdv and substrct it on its predicted proto hdv with hcl.if_(pred.v != trainLabels[i]): max[trainLabels[i]] += 1 max[pred] -= 1 with hcl.for_(0, in_train.shape[1]) as m: with hcl.for_(0, in_bw) as bit: # with hcl.if_(in_train[i][m][bit] == 1): # ########### # prototypeCounter[trainLabels[i]][m*in_bw+bit] += 1 # prototypeCounter[pred][m*in_bw+bit] -= 1 prototypeCounter[trainLabels[i]][m*in_bw+bit] += in_train[i][m][bit] prototypeCounter[pred][m*in_bw+bit] -= in_train[i][m][bit] with hcl.if_(max[trainLabels[i]] % 2 == 0): with hcl.if_(prototypeCounter[trainLabels[i]][m*in_bw+bit] - max[trainLabels[i]]/2 == 0): prototype[trainLabels[i]][m][bit] &= 1 with hcl.else_(): prototype[trainLabels[i]][m][bit] = hcl.select(prototypeCounter[trainLabels[i]][m*in_bw+bit] - max[trainLabels[i]]/2 > 0, 1, 0) with hcl.if_(max[pred] % 2 == 0): with hcl.if_(prototypeCounter[pred][m*in_bw+bit] - max[pred]/2 == 0): prototype[pred][m][bit] &= 1 with hcl.else_(): prototype[pred][m][bit] = hcl.select(prototypeCounter[pred][m*in_bw+bit] - max[pred]/2 > 0, 1, 0) #print the accuracy hcl.mutate((1,), lambda x: test_hdc_accu(prototype, in_train, trainLabels, 1), 'training_update') hcl.mutate((1,), lambda x: test_hdc_accu(prototype, in_test, testLabels, 2), 'testing_update')
def algo(A, B): def f_mutate(i, j): factor = hcl.scalar(B[0][0][13:11], name="factor") idx = hcl.scalar(B[0][0][11:0], dtype=hcl.UInt(16), name="idx") idx += i * hcl.cast(hcl.UInt(16), factor.v) A[idx][j] = B[idx][j] bound = hcl.scalar(5, dtype=hcl.Int(32)) domain = (hcl.cast(hcl.UInt(32), bound.v), hcl.cast(hcl.UInt(32), bound.v)) hcl.mutate(domain, f_mutate)
def kernel(A, M): def loop_body(x): with hcl.if_(A[x]> M[0]): with hcl.if_(A[x]> M[1]): hcl.assert_(x == 2, "assert error in if--value of x: %d", x) M[0] = M[1] M[1] = A[x] with hcl.else_(): M[0] = A[x] hcl.mutate(A.shape, lambda x : loop_body(x)) hcl.print(0, "this should not be printed\n")
def kernel(A): B = hcl.compute(A.shape, lambda i: A[i] + 1, "B") C1 = hcl.compute(A.shape, lambda i: 0, "C1") C2 = hcl.compute(A.shape, lambda i: 0, "C2") def foo(i): C1[i] = B[i] + 1 C2[i] = C1[i] + 1 hcl.mutate((10, ), lambda i: foo(i), "C") D = hcl.compute(A.shape, lambda i: C2[i] + 1, "D") return D
def loop_body(m): triangle_3d = hcl.compute((9, ), lambda x: triangle_3ds[m][x], "triangle_3d") fragment = hcl.compute((500, 4), lambda x, y: 0, "fragment") pixels = hcl.compute((500, 3), lambda x, y: 0, "pixels") triangle_2d = hcl.compute((7, ), lambda x: 0, "triangle_2d") frag_cntr = hcl.compute((1, ), lambda x: 0, "frag_cntr") size_pixels = hcl.compute((1, ), lambda x: 0, "size_pixels") # 1st Stage Projection hcl.mutate((7, ), lambda x: projection(triangle_3d, triangle_2d, x), "twod_update") # 2nd Stage Rasterization:update fragment hcl.mutate((1, ), lambda x: rasterization(frag_cntr, triangle_2d, fragment), "fragment_update") # 3rd Stage Z-culling:update z_buffer,pixels hcl.mutate((1, ), lambda x: zculling(size_pixels, frag_cntr[ 0], fragment, z_buffer, pixels), "z_update") # coloring frame buffer hcl.mutate((size_pixels[0], ), lambda x: coloringFB(x, pixels, frame_buffer), "buffer_update")
def systolic_array(A, B): # define modules with loop @hcl.def_([(1,), (1,), ()]) def pe(a, b, x): with hcl.if_(x == 0): result = a * b hcl.return_(a) with hcl.elif_(x == 1): hcl.return_(b) with hcl.else_(): hcl.return_(result) # PE = {f'pe_{i}' : partial(pe) for i in range(w*h)} PE = {} for i in range(w * h): with hcl.Stage("pe_{}".format(i)): PE['pe_{}'.format(i)] = partial(pe) # each k calls of update function calculate one block of result matrix # b_row: block row index # b_col: block col index def update(b_row, b_col, k, O): # fetch input localA = [] localB = [] for input_a in range(h): localA.append(hcl.compute((1,), lambda x : A[input_a + h * b_row, k], "localA_{}".format(input_a))) for input_b in range(w): localB.append(hcl.compute((1,), lambda x : B[k, input_b + w * b_col], "localB_{}".format(input_b))) # systolic connection net = [[None] * h] * w for i in range(h + w - 1): for row in range(i + 1): col = i - row if col < 0 or col > w-1 or row > h-1: continue ## instantiate a PE and record partial results input_a = localA[row] if col == 0 else hcl.compute((1,), lambda x : net[row][col-1][0], "input_a{}{}".format(row, col)) input_b = localB[col] if row == 0 else hcl.compute((1,), lambda x : net[row-1][col][1], "input_b{}{}".format(row, col)) out = hcl.compute((3,), lambda x : PE['pe_%d' % (row * w + col)]( input_a, input_b, x), "out_{}{}".format(row, col)) O[row + h * b_row, col + w * b_col] += out[2] net[row][col] = out block_rows = int(m / h) block_cols = int(n / w) O = hcl.compute((m, n), lambda *args : 0, name="Output") hcl.mutate((block_rows, block_cols, k), lambda b_row, b_col, k: update(b_row, b_col, k, O), name="update") return O
def sobel(A,B,Gx,Gy): def img_mutate(x,y): B[x][y] = A[x][y][0]+A[x][y][1]+A[x][y][2] hcl.mutate(B.shape, lambda x,y: img_mutate(x,y)) r = hcl.reduce_axis(0,3) c = hcl.reduce_axis(0,3) # D = hcl.compute((height, width), lambda x,y: hcl.select(hcl.and_(x>0,x<(height-1),y>0,y<(width-1)), hcl.sum(B[x+r,y+c]*Gx[r,c],axis=[r,c]), B[x,y]), "xx") D = hcl.compute((height-2, width-2), lambda x,y: hcl.sum(B[x+r, y+c]*Gx[r,c], axis=[r,c]), "xx") t = hcl.reduce_axis(0, 3) g = hcl.reduce_axis(0, 3) # E = hcl.compute((height, width), lambda x,y: hcl.select(hcl.and_(x>0,x<(height-1),y>0,y<(width-1)), hcl.sum(B[x+t,y+g]*Gy[t,g],axis=[t,g]), B[x,y]), "yy") E = hcl.compute((height-2, width-2), lambda x,y: hcl.sum(B[x+t, y+g]*Gy[t,g], axis=[t,g]), "yy") return hcl.compute((height-2,width-2), lambda x,y:hcl.sqrt(D[x][y]*D[x][y]+E[x][y]*E[x][y])*0.05891867,"Fimg")
def kernel(A, B, O): localA = hcl.compute((m, k - 1), lambda *args: 0, "localA") localB = hcl.compute((k - 1, n), lambda *args: 0, "localB") def update(k, y, x): last = hcl.scalar(hcl.select(k == 0, 0, O[y, x]), "last") localA[y, x] = hcl.select(x > 0, localA[y, x - 1], A[y, k]) localB[y, x] = hcl.select(y > 0, localB[y - 1, x], B[k, x]) O[y, x] = last.v + localA[y, x] * localB[y, x] hcl.mutate((m, dim_y, dim_x), lambda k, y, x: update(k, y, x), name="update")
def kernel(trainData, testData, itemMem, idMem, rdv1, rdv2): def train_encoding(m, preTrainData): train_temp = hcl.compute((trainData.shape[1], dim), lambda x, y: itemMem[trainData[m][x]][y] ^ idMem[x][y], name = "train_temp") k1 = hcl.reduce_axis(0, trainData.shape[1], 'k1') train_result = hcl.compute((dim,), lambda x: hcl.sum(train_temp[k1, x], axis = k1, dtype=hcl.Int()), name = "train_result") with hcl.for_(0, dim) as n: preTrainData[m][n] = train_result[n] with hcl.if_((m + 1) % 1000 == 0): hcl.print((m+1), "Finish encoding %d training data\n") def test_encoding(m, preTestData): test_temp = hcl.compute((testData.shape[1], dim), lambda x, y: itemMem[testData[m][x]][y]^idMem[x][y], name = "test_temp") k2 = hcl.reduce_axis(0, testData.shape[1], 'k2') test_result = hcl.compute((dim,), lambda x: hcl.sum(test_temp[k2, x], axis = k2, dtype=hcl.Int()), name = "test_result") with hcl.for_(0, dim) as n: preTestData[m][n] = test_result[n] with hcl.if_((m+1)%100 == 0): hcl.print((m+1), "Finish encoding %d testing data\n") #Encoding hcl.print((), "Encoding the training data into HDVs.\n") preTrainData = hcl.compute((trainData.shape[0], dim), lambda x, y: 0, "preTrainData") hcl.mutate((trainData.shape[0], ), lambda x: train_encoding(x, preTrainData)) hdTrainData = hcl.compute((trainData.shape[0], dim), lambda x, y: 0, "hdTrainData", dtype=hcl.UInt(1)) with hcl.Stage("S1"): with hcl.if_(trainData.shape[1] % 2 == 0): hcl.print((), "Use the random vector\n") hcl.update(hdTrainData, lambda x, y: hcl.select(preTrainData[x][y] + rdv1[x][y] - trainData.shape[1]/2 > 0, 1, 0)) with hcl.else_(): hcl.update(hdTrainData, lambda x, y: hcl.select(preTrainData[x][y] - trainData.shape[1]/2 > 0, 1, 0)) hcl.print((),"Encoding the testing data into HDVs.\n") preTestData = hcl.compute((testData.shape[0], dim), lambda x, y: 0, "preTestData") hcl.mutate((testData.shape[0], ), lambda x: test_encoding(x, preTestData)) hdTestData = hcl.compute((testData.shape[0], dim), lambda x, y: 0, "hdTestData", dtype=hcl.UInt(1)) with hcl.Stage("S2"): with hcl.if_(testData.shape[1] % 2 == 0): hcl.print((), "Use the random vector\n") hcl.update(hdTestData, lambda x, y: hcl.select(preTestData[x][y] + rdv2[x][y] - testData.shape[1]/2 > 0, 1, 0)) with hcl.else_(): hcl.update(hdTestData, lambda x, y: hcl.select(preTestData[x][y] - testData.shape[1]/2 > 0, 1, 0)) ###data_packing pack_train = hcl.pack(hdTrainData, axis=1, dtype=hcl.UInt(bw), name="pack_train") pack_test = hcl.pack(hdTestData, axis=1, dtype=hcl.UInt(bw), name="pack_test") return pack_train, pack_test
def systolic(m=16, k=16, n=16, dtype=hcl.Int(), target=None): hcl.init(dtype) dim_x, dim_y = 16, 16 m_A = hcl.placeholder((m, k), dtype=dtype, name="m_A") m_B = hcl.placeholder((k, n), dtype=dtype, name="m_B") m_output = hcl.placeholder((m, n), dtype=dtype, name="m_output") # k (time) and y/x (spatial) dim def kernel(k, y, x): last = hcl.scalar(hcl.select(k == 0, 0, m_output[y, x]), "last") m_output[y, x] = last.v + m_A[y, k] * m_B[k, x] hcl.mutate((m, dim_y, dim_x), lambda k, y, x: kernel(k, y, x)) s = hcl.create_schedule([m_A, m_B, m_output]) f = hcl.build(s, target=target) return f
def kernel(A, B): localA = hcl.compute((m, k - 1), lambda *args: 0, "localA") localB = hcl.compute((k - 1, n), lambda *args: 0, "localB") output = hcl.compute((m, n), lambda *args: 0, "output") def update(k, y, x): localA[y, x] = hcl.select(x > 0, localA[y, x - 1], A[y, k]) localB[y, x] = hcl.select(y > 0, localB[y - 1, x], B[k, x]) output[y, x] = hcl.select( k == 0, 0, output[y, x]) + localA[y, x] * localB[y, x] hcl.mutate((m, dim_y, dim_x), lambda k, y, x: update(k, y, x), name="update") return output
def knn(test_image, train_images): # Imperative programming and bit operations (§2) def popcount(num): out = hcl.scalar(0, "out") with hcl.for_(0, train_images.type.bits) as i: # Bit selection operation out.v += num[i] return out.v # This function update the candidates, i.e., `knn_mat`. Here we mutate # through the shape of tensor `dist`. For each `dist` value, if it is # smaller than the maximum candidate, we replace it. def update_knn(dist, knn_mat, i, j): max_id = hcl.scalar(0, "max_id") with hcl.for_(0, 3) as k: with hcl.if_(knn_mat[i][k] > knn_mat[i][max_id.v]): max_id.v = k with hcl.if_(dist[i][j] < knn_mat[i][max_id.v]): knn_mat[i][max_id.v] = dist[i][j] # Main algorithm (§3) # Fist step: XOR (§3.1) diff = hcl.compute(train_images.shape, lambda x, y: train_images[x][y] ^ test_image, "diff") # Second step: popcount (§3.2) dist = hcl.compute(diff.shape, lambda x, y: popcount(diff[x][y]), "dist") # Third step: initialize the candidates (§3.3) knn_mat = hcl.compute((10, 3), lambda x, y: 50, "knn_mat") # Fourth step: update the candidates (§3.4) hcl.mutate(dist.shape, lambda x, y: update_knn(dist, knn_mat, x, y), "knn_update") # Final step: return the candidates (§3.5) return knn_mat
def sobelAlgo(A, B, Fx, Fy): def rgb_sum(x, y): B[x][y] = A[x][y][0] + A[x][y][1] + A[x][y][2] hcl.mutate(B.shape, lambda x, y: rgb_sum(x, y)) #B = hcl.compute((height+2, width+2), lambda x,y:A[x][y][0]+A[x][y][1]+A[x][y][2], "B") r = hcl.reduce_axis(0, 3) c = hcl.reduce_axis(0, 3) Gx = hcl.compute( (height, width), lambda y, x: hcl.sum(B[y + r, x + c] * Fx[r, c], axis=[r, c]), "Gx") t = hcl.reduce_axis(0, 3) g = hcl.reduce_axis(0, 3) Gy = hcl.compute( (height, width), lambda y, x: hcl.sum(B[y + t, x + g] * Fy[t, g], axis=[t, g]), "Gy") return hcl.compute( (height, width), lambda y, x: (hcl.sqrt(Gx[y][x] * Gx[y][x] + Gy[y][x] * Gy[y][x]) * 0.05891867))
def cordic(X, Y, C, theta, N): # Prepare all input values and intermediate variables. T = hcl.compute((1, ), lambda x: 0, "T", X.dtype) current = hcl.compute((1, ), lambda x: 0, "current", X.dtype) # Main loop body: The more steps we iterate, the better accuracy we get. def step_loop(step): with hcl.if_(theta[0] > current[0]): T[0] = X[0] - (Y[0] >> step) Y[0] = Y[0] + (X[0] >> step) X[0] = T[0] current[0] = current[0] + C[step] with hcl.else_(): T[0] = X[0] + (Y[0] >> step) Y[0] = Y[0] - (X[0] >> step) X[0] = T[0] current[0] = current[0] - C[step] # This is the main computation that calls the loop body. hcl.mutate((N, ), lambda step: step_loop(step), "calc")
def kernel(inputs): def split(inputs, number): cus = [] size = inputs.shape[0] for i in range(number): base = i * (size / number) name = "batch_" + str(i) ret = hcl.compute((int(size / number), ), lambda x: inputs[base + x], dtype=st, name=name) cus.append(ret) return cus # ret is the input slice { (key, value)...} # res is the intermediate result def count(res, ret, x): res[ret[x].key] += ret[x].val def reducer(ress, output, x): for res in ress: output[x] += res[x] rets = split(inputs, compute_units) ress = [] for ret in rets: name = "map_batch_" + str(rets.index(ret)) res = hcl.compute((class_number, ), lambda *args: 0, name=name) # mapping (accumulate quality scores in each batch) hcl.mutate((int(size / compute_units), ), lambda x: count(res, ret, x), name="mutate_" + name) ress.append(res) # shuffle and reduce the ress into output output = hcl.compute((class_number, ), lambda x: 0, name="output") hcl.mutate((class_number, ), lambda x: reducer(ress, output, x), "reducer") return output
def concatenate(*data_tup, axis=1, name='concatenate', frontend='keras'): idx_start = [0] axis_len = 0 for i in range(len(data_tup)): idx_start.append(idx_start[i] + (data_tup[i]).shape[axis]) axis_len = axis_len + (data_tup[i]).shape[axis] new_shape = list(data_tup[0].shape) new_shape[axis] = axis_len C = hcl.placeholder(tuple(new_shape)) def concat(data, offset, *indices): orig_idx = list(indices[0]) idx = list(indices[0]) idx[axis] = idx[axis] + offset orig_idx = tuple(orig_idx) idx = tuple(idx) C[idx] = data[orig_idx] for i in range(len(data_tup)): hcl.mutate(data_tup[i].shape, lambda *x: concat(data_tup[i], idx_start[i], x), name=name) return C
def loop_kernel(labels): # assign cluster with hcl.for_(0, N, name="N") as n: min_dist = hcl.local(100000) with hcl.for_(0, K) as k: dist = hcl.local(0) with hcl.for_(0, dim) as d: dist_ = points[n, d] - means[k, d] dist[0] += dist_ * dist_ with hcl.if_(dist[0] < min_dist[0]): min_dist[0] = dist[0] labels[n] = k # update mean num_k = hcl.compute((K, ), lambda x: 0) sum_k = hcl.compute((K, dim), lambda x, y: 0) def calc_sum(n): num_k[labels[n]] += 1 with hcl.for_(0, dim) as d: sum_k[labels[n], d] += points[n, d] hcl.mutate((N, ), lambda n: calc_sum(n), "calc_sum") hcl.update(means, lambda k, d: sum_k[k, d] // num_k[k], "update_mean")
def kernel(A, B): def foo(x): B[x] = A[x] + 1 hcl.mutate(A.shape, foo)
def kernel(pack_train, trainLabels, pack_test, testLabels, rdv3, epoch): def learn(k, hdTrainData, prototype, prototypeCounter): #Find samples that have the label k match = hcl.compute( hdTrainData.shape, lambda x, y: hcl.select(trainLabels[x] == k, hdTrainData[x][y], 0), "match") #Record the number of these samples with hcl.for_(0, hdTrainData.shape[0]) as a: with hcl.if_(trainLabels[a] == k): max[k] += 1 #Do hdc sum on these samples' hdv r = hcl.reduce_axis(0, hdTrainData.shape[0], 'r') result = hcl.compute((hdTrainData.shape[1], ), lambda y: hcl.sum(match[r][y], axis=r), "result") #Do the binary voting sum1 = hcl.compute((hdTrainData.shape[1], ), lambda x: 0, "sum1") with hcl.if_(max[k] % 2 == 0): hcl.update( sum1, lambda x: hcl.select( result[x] + rdv3[k][x] - max[k] / 2 > 0, 1, 0)) with hcl.else_(): hcl.update(sum1, lambda x: hcl.select(result[x] - max[k] / 2 > 0, 1, 0)) #Push the binary sum to prototype and the original sum to prototypeCounter with hcl.for_(0, hdTrainData.shape[1]) as t: prototype[k][t] = sum1[t] prototypeCounter[k][t] = result[t] def test_hdc_accu(proto, hyper_dataset, labels, type): ###data preparation distance1 = hcl.compute((hyper_dataset.shape[1], ), lambda x: 0, 'distance1') hamming_dist1 = hcl.compute((numClasses, ), lambda x: 0, "hamming_dist1") m1 = hcl.reduce_axis(0, hyper_dataset.shape[1], "m1") correct1 = hcl.scalar(0, 'correct1') ### with hcl.for_(0, hyper_dataset.shape[0]) as i: with hcl.for_(0, numClasses) as n: #Do hdc multiplication(XOR) on sample[i]'s hdv and prototype[n]'s hdv (elementwise on the high-bit data) hcl.update(distance1, lambda x: hyper_dataset[i][x] ^ proto[n][x]) #Calculate the hamming distance of the two vectors by adding 1s hamming_dist1[n] = hcl.sum(distance1[m1], axis=m1) #Find the one having the least hamming distance and choose it's label as the predicted label pred1 = hcl.scalar(0, 'pred1') with hcl.for_(0, hamming_dist1.shape[0]) as j: with hcl.if_(hamming_dist1[j] < hamming_dist1[pred1]): pred1.v = j with hcl.if_(pred1.v == labels[i]): correct1.v += 1 #Print the accuracy all1 = hcl.scalar(hyper_dataset.shape[0], "all1", dtype=hcl.Float(32)) accuracy1 = hcl.compute((1, ), lambda x: correct1.v / all1.v * 100, "accuracy1", dtype=hcl.Float(32)) with hcl.if_(type == 1): hcl.print((correct1, hyper_dataset.shape[0], accuracy1[0]), "Training accu: %d/%d (%.2f%%)\n") with hcl.else_(): hcl.print((correct1, hyper_dataset.shape[0], accuracy1[0]), "Testing accu: %d/%d (%.2f%%)\n") def update(l, prototype, prototypeCounter, max): hcl.print((l + 1), "%d:Use hard examples to update the prototype counters.\n") ###data preparation distance = hcl.compute((hdTrainData.shape[1], ), lambda x: 0, 'distance') hamming_dist = hcl.compute((numClasses, ), lambda x: 0, "hamming_dist") m = hcl.reduce_axis(0, hdTrainData.shape[1], "m") ### with hcl.for_(0, hdTrainData.shape[0]) as i: with hcl.for_(0, numClasses) as n: #Do hdc multiplication(XOR) on sample[i]'s hdv and prototype[n]'s hdv (elementwise on the high-bit data) hcl.update(distance, lambda x: hdTrainData[i][x] ^ prototype[n][x]) #Calculate the hamming distance of the two vectors by adding 1s hamming_dist[n] = hcl.sum(distance[m], axis=m) #Find the one having the least hamming distance and choose it's label as the predicted label pred = hcl.scalar(0, 'pred') with hcl.for_(0, hamming_dist.shape[0]) as j: with hcl.if_(hamming_dist[j] < hamming_dist[pred]): pred.v = j #Adjust the proto vectors by adding the sample vector on its label proto hdv and substrct it on its predicted proto hdv with hcl.if_(pred.v != trainLabels[i]): max[trainLabels[i]] += 1 max[pred] -= 1 with hcl.for_(0, hdTrainData.shape[1]) as m: prototypeCounter[trainLabels[i]][m] += hdTrainData[i][m] prototypeCounter[pred][m] -= hdTrainData[i][m] with hcl.if_(max[trainLabels[i]] % 2 == 0): with hcl.if_(prototypeCounter[trainLabels[i]][m] - max[trainLabels[i]] / 2 == 0): prototype[trainLabels[i]][m] &= 1 with hcl.else_(): prototype[trainLabels[i]][m] = hcl.select( prototypeCounter[trainLabels[i]][m] - max[trainLabels[i]] / 2 > 0, 1, 0) with hcl.if_(max[pred] % 2 == 0): with hcl.if_(prototypeCounter[pred][m] - max[pred] / 2 == 0): prototype[pred][m] &= 1 with hcl.else_(): prototype[pred][m] = hcl.select( prototypeCounter[pred][m] - max[pred] / 2 > 0, 1, 0) #print the accuracy hcl.mutate( (1, ), lambda x: test_hdc_accu(prototype, hdTrainData, trainLabels, 1), 'training_update') hcl.mutate( (1, ), lambda x: test_hdc_accu(prototype, hdTestData, testLabels, 2), 'testing_update') ###unpack hdTrainData = hcl.unpack(pack_train, axis=1, dtype=hcl.UInt(1), name="hdTrainData") hdTestData = hcl.unpack(pack_test, axis=1, dtype=hcl.UInt(1), name="hdTestData") ###learn hcl.print((), "Learning the prototype HDVs.\n") prototype = hcl.compute( (numClasses, hdTrainData.shape[1]), lambda x, y: 0, "prototype", ) prototypeCounter = hcl.compute( (numClasses, hdTrainData.shape[1]), lambda x, y: 0, "prototypeCounter") #Every dimension is the sum of the targeted data #max is the number records the added vectors, later for binary voting max = hcl.compute((numClasses, ), lambda x: 0) hcl.mutate((numClasses, ), lambda k: learn(k, hdTrainData, prototype, prototypeCounter), "learn") #Test the accuracy after learning hcl.mutate((1, ), lambda x: test_hdc_accu(prototype, hdTrainData, trainLabels, 1), "test_train_accu") hcl.mutate((1, ), lambda x: test_hdc_accu(prototype, hdTestData, testLabels, 2), "test_test_accu") ###update hcl.mutate((epoch[0], ), lambda x: update(x, prototype, prototypeCounter, max), "update")
def batch_sw(seqAs, seqBs, outAs, outBs): hcl.mutate( (num, ), lambda t: smith_waterman(seqAs[t], seqBs[t], outAs[t], outBs[t]), "B")
def mut_example(A, B, C): def loop_body(x): C[x] = A[x] + B[x] hcl.mutate((10,), lambda x: loop_body(x), "M")
def smith_waterman(seqA, seqB, consA, consB): def similarity_score(a, b): return hcl.select(a == b, 1, penalty) def find_max(A, len_): max_ = hcl.local(A[0], "max") act_ = hcl.local(0, "act") with hcl.for_(0, len_) as i: with hcl.if_(A[i] > max_[0]): max_[0] = A[i] act_[0] = i return max_[0], act_[0] matrix_max = hcl.local(0, "maxtrix_max") i_max = hcl.local(0, "i_max") j_max = hcl.local(0, "j_max") matrix = hcl.compute((lenA + 1, lenB + 1), lambda x, y: 0, "matrix") action = hcl.compute(matrix.shape, lambda x, y: 3, "action") def populate_matrix(i, j): trace_back = hcl.compute((4, ), lambda x: 0, "trace_back") with hcl.if_(hcl.and_(i != 0, j != 0)): trace_back[0] = matrix[i-1, j-1] + \ similarity_score(seqA[i-1], seqB[j-1]) trace_back[1] = matrix[i - 1, j] + penalty trace_back[2] = matrix[i, j - 1] + penalty trace_back[3] = 0 matrix[i, j], action[i, j] = find_max(trace_back, 4) with hcl.if_(matrix[i, j] > matrix_max[0]): matrix_max[0] = matrix[i, j] i_max[0] = i j_max[0] = j P = hcl.mutate((lenA + 1, lenB + 1), lambda i, j: populate_matrix(i, j)) def align(curr_i, curr_j, next_i, next_j): outA = hcl.local(0, "a") outB = hcl.local(0, "b") with hcl.if_(next_i[0] == curr_i[0]): outA[0] = 0 with hcl.else_(): outA[0] = seqA[curr_i[0] - 1] with hcl.if_(next_j[0] == curr_j[0]): outB[0] = 0 with hcl.else_(): outB[0] = seqB[curr_j[0] - 1] return outA[0], outB[0] def get_next(action, i, j): act_ = hcl.local(action[i][j], "act") next_i = hcl.local(0, "next_i") next_j = hcl.local(0, "next_j") with hcl.if_(act_[0] == 0): next_i[0] = i - 1 next_j[0] = j - 1 with hcl.elif_(act_[0] == 1): next_i[0] = i - 1 next_j[0] = j with hcl.elif_(act_[0] == 2): next_i[0] = i next_j[0] = j - 1 with hcl.else_(): next_i[0] = i next_j[0] = j return next_i[0], next_j[0] with hcl.Stage("T"): curr_i = hcl.local(i_max[0], "curr_i") curr_j = hcl.local(j_max[0], "curr_j") next_i = hcl.local(0, "next_i") next_j = hcl.local(0, "next_j") next_i[0], next_j[0] = get_next(action, curr_i[0], curr_j[0]) tick = hcl.local(0, "tick") with hcl.while_( hcl.or_(curr_i[0] != next_i[0], curr_j[0] != next_j[0])): consA[tick[0]], consB[tick[0]] = \ align(curr_i, curr_j, next_i, next_j) curr_i[0], curr_j[0] = next_i[0], next_j[0] next_i[0], next_j[0] = get_next(action, curr_i[0], curr_j[0]) tick[0] += 1