def flowdronet_predict(self, img_pairs): """ Inference loop. Run inference on a pair of images. Args: img_pairs: image pairs in (img_1, img_2) format. Returns: Predicted Steering Angle and Collision Probability """ assert self.dronet_sess is not None, "DroNet session not loaded." # Predict Flow first with self.graph.as_default(): # Chunk image pair list # Repackage input image pairs as np.ndarray x = np.array(img_pairs) # Make input samples conform to the network's requirements # x: [batch_size,2,H,W,3] uint8; x_adapt: [batch_size,2,H,W,3] float32 x_adapt, x_adapt_info = self.adapt_x(x) if x_adapt_info is not None: y_adapt_info = (x_adapt_info[0], x_adapt_info[2], x_adapt_info[3], 2) else: y_adapt_info = None # Run the adapted samples through the network feed_dict = {self.x_tnsr: x_adapt} y_hat = self.sess.run(self.y_hat_test_tnsr, feed_dict=feed_dict) flow, _ = self.postproc_y_hat_test(y_hat, y_adapt_info) if self.opts['dronet_mode'] == 'raw': """ Rescale flow to compensate for changing flow magnitudes. These operations could precede the DroNet network as tf ops (pseudo tf-code): mag = tf.norm(flow, axis=2) magmean = tf.reduce_mean(mag) flow = tf.divide(flow, magmean) """ # Scale flow so that mag = 1 flow_magnitude = np.linalg.norm(flow, axis=2) x = flow / np.mean(flow_magnitude) else: """ Converting the flow to a rgb image is certainly not the optimal solution. However using raw flow doesn't work so far, maybe due to flow scaling issues or incompatibilities in the res-net model architecture (e.g. ReLU-units). """ f_img = flow_to_img(flow[0]) x = np.asarray(f_img, dtype=np.float32) * np.float32(1.0 / 255.0) x = x[np.newaxis, ...] # Predict DroNet output from flow input if possible with self.dronet_graph.as_default(): steer_coll = self.dronet_sess.run(self.dronet_y_tnsr, feed_dict={self.dronet_x_tnsr: x}) return flow[0], steer_coll
def plot_img_pairs_w_flows(img_pairs, flow_pyrs=None, num_lvls=0, flow_preds=None, flow_gts=None, titles=None, info=None, flow_mag_max=None): """Plot the given set of image pairs, optionally with flows and titles. Args: img_pairs: image pairs in [batch_size, 2, H, W, 3] or list([2, H, W, 3]) format. flow_pyrs: optional, predicted optical flow pyramids [batch_size, H, W, 2] or list([H, W, 2]) format. num_lvls: number of levels to show per pyramid (flow_pyrs must be set) flow_preds: optional, predicted flows in [batch_size, H, W, 2] or list([H, W, 2]) format. flow_gts: optional, groundtruth flows in [batch_size, H, W, 2] or list([H, W, 2]) format. titles: optional, list of image and flow IDs to display with each image. info: optional, stats to display above predicted flow flow_mag_max: Max flow to map to 255 Returns: plt: plot """ # Setup drawing canvas fig_height, fig_width = 5, 5 row_count = len(img_pairs) col_count = 2 if flow_preds is not None: col_count += 1 if flow_gts is not None: col_count += 1 if flow_pyrs is not None: row_count += len(img_pairs) jump = num_lvls - col_count col_count = max(num_lvls, col_count) plt.figure(figsize=(fig_width * col_count, fig_height * row_count)) # Plot img_pairs inside the canvas plot = 1 for row in range(len(img_pairs)): # Plot image pair plt.subplot(row_count, col_count, plot) if titles is not None: plt.title(titles[row][0], fontsize=fig_width * 2) plt.axis('off') plt.imshow(img_pairs[row][0]) plt.subplot(row_count, col_count, plot + 1) if titles is not None: plt.title(titles[row][1], fontsize=fig_width * 2) plt.axis('off') plt.imshow(img_pairs[row][1]) plot += 2 # Plot predicted flow, if any if flow_preds is not None: plt.subplot(row_count, col_count, plot) title = "predicted flow " + info[ row] if info is not None else "predicted flow" plt.title(title, fontsize=fig_width * 2) plt.axis('off') plt.imshow(flow_to_img(flow_preds[row], flow_mag_max=flow_mag_max)) plot += 1 # Plot groundtruth flow, if any if flow_gts is not None: plt.subplot(row_count, col_count, plot) plt.title("groundtruth flow", fontsize=fig_width * 2) plt.axis('off') plt.imshow(flow_to_img(flow_gts[row], flow_mag_max=flow_mag_max)) plot += 1 # Plot the flow pyramid on the next row if flow_pyrs is not None: if jump > 0: plot += jump for lvl in range(num_lvls): plt.subplot(row_count, col_count, plot) plt.title("level {len(flow_pyrs[row]) - lvl + 1}", fontsize=fig_width * 2) plt.axis('off') plt.imshow( flow_to_img(flow_pyrs[row][lvl], flow_mag_max=flow_mag_max)) plot += 1 if jump < 0: plot -= jump plt.tight_layout() return plt
# Run caffe args = [caffe_bin, 'test', '-model', 'tmp/deploy.prototxt', '-weights', '../trained/' + cnn_model + '.caffemodel', '-iterations', str(len(files[:-1])), '-gpu', '0'] cmd = str.join(' ', args) print('Executing %s' % cmd) subprocess.call(args) # convert .flo output files to images outfiles = sorted(glob.glob(os.path.join(imgoutfol, '*.flo'))) for idx, file in enumerate(outfiles): uv = flow_read(file) img_out = flow_to_img(uv, normalize=True, flow_mag_max=None) outfile = os.path.join(outputfolder, 'images', os.path.split(files[idx])[1]) img_out = cv2.resize(img_out, outsize, interpolation=cv2.INTER_AREA) print("Writing: " + outfile) cv2.imwrite(outfile, img_out) # copy labels file infile = os.path.join(input_fol, subdir, 'labels.txt') if os.path.isfile(infile): with open(infile, 'r') as fin: dat = fin.read().splitlines(True) with open(os.path.join(outputfolder,'labels.txt'), 'w') as fout: fout.writelines(dat[1::nthimage]) fin.close() fout.close() print('Done: ' + os.path.join(input_fol, subdir))
def grab_colors(): """Find the cube in the webcam picture and grab the colors of the facelets.""" grid_N = 25 # number of grid-squares in vertical direction cap = cv2.VideoCapture(0) _, bgrcap = cap.read() old_bgrcap = bgrcap.copy() height, width = bgrcap.shape[:2] config_all = [] detect = True last_relation = '' k = 0 problem = [] relation = None text2 = 0 text3 = 0 possibility = [0] optical_check = False last_face = '111111' move = [] candidate = [] start = (800, 500) long = 30 step = 0 text = "Rotate your cube" old_gray = cv2.cvtColor(bgrcap, cv2.COLOR_BGR2GRAY) for i in range(6): config_all.append([]) while 1: # Take each frame facelet = [] _, bgrcap = cap.read() # new_frame = bgrcap.copy() gray_frame = cv2.cvtColor(bgrcap, cv2.COLOR_BGR2GRAY) # now set all hue values >160 to 0. This is important since the color red often contains hue values # in this range *and* also hue values >0 and else we get a mess when we compute mean and variance if detect == True: hsv = cv2.cvtColor(bgrcap, cv2.COLOR_BGR2HSV) h, s, v = cv2.split(hsv) # h_mask = cv2.inRange(h, 0, 160) h = cv2.bitwise_and(h, h, mask=h_mask) hsv = cv2.merge((h, s, v)).astype(float) # define two empty masks for the white-filter and the color-filter color_mask = cv2.inRange(bgrcap, np.array([1, 1, 1]), np.array([0, 0, 0])) # mask for colors white_mask = cv2.inRange(bgrcap, np.array([1, 1, 1]), np.array([0, 0, 0])) # special mask for white cent = [ ] # the centers of the facelet-square candidates are stored in this global variable draw_face(bgrcap, config_all, start, long) cent = find_squares(bgrcap, grid_N, color_mask, white_mask, hsv, height, width) # find the candida m = medoid(cent) cf, ef = facelets( cent, m) # identify the centers of the corner and edge facelets acf, aef = mirr_facelet(cf, ef, m, width) vision_params.face_hsv, vision_params.face_col, cents = getcolors( cf, ef, acf, aef, m, hsv) if len(cents) > 4 and validate(cents, width) < 10: for i in cents: display_colorname(bgrcap, i, hsv) if optical_check: center = cents[4] edge = np.linalg.norm(cents[5] - cents[4]) if vision_params.face_col[4] != last_face[ 4] and vision_params.face_col in candidate: if len(move) > 0: motion = np.mean(move, axis=0) if abs(motion[0] - motion[1]) >= 1: action = decide(motion) text = action + str(motion) relation = (last_face, vision_params.face_col, action, problem) last_face = vision_params.face_col move = [] config_all = [] for i in range(6): config_all.append([]) if text2 != 0: cv2.putText(bgrcap, text2, (100, 150), cv2.FONT_HERSHEY_PLAIN, 4.0, (0, 0, 255), 2) if text3 != 0: cv2.putText(bgrcap, text3, (100, 250), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), 1) try: pred_labels = nn.predict_from_img_pairs([[ old_bgrcap[int(center[0] - edge):int(center[0] + edge), int(center[1] - edge):int(center[1] + edge), :], new_frame[int(center[0] - edge):int(center[0] + edge), int(center[1] - edge):int(center[1] + edge), :] ]], batch_size=1, verbose=False) flow = pred_labels[0] move.append( [np.mean(flow[:, :, 0]), np.mean(flow[:, :, 1])]) optical_frame = flow_to_img(pred_labels[0]) old_bgrcap = new_frame.copy() rectangle(center, edge, bgrcap) cv2.imshow("optical_frame", optical_frame) except: pass if (relation is not None) and relation != last_relation: temp = problem problem = choose_from(relation) last_relation = relation if len(problem) != 0: for i in problem: possibility[temp.index(i)] += 1 problem = temp text2 = str(len(problem)) + ' ' + 'candidate solutions' text3 = str(possibility) else: if vision_params.face_col[4] == 'U': config_all[0].append(vision_params.face_col) if vision_params.face_col[4] == 'R': config_all[1].append(vision_params.face_col) if vision_params.face_col[4] == 'F': config_all[2].append(vision_params.face_col) if vision_params.face_col[4] == 'D': config_all[3].append(vision_params.face_col) if vision_params.face_col[4] == 'L': config_all[4].append(vision_params.face_col) if vision_params.face_col[4] == 'B': config_all[5].append(vision_params.face_col) else: draw(bgrcap, problem, start, long) draw_sol(bgrcap, start, long, solution, step) # 'n' if k == 110: problem = teach(problem, solution, step) step += 1 if step == len(solution.split(' ')): text = " Solved!!!" detect = True cv2.destroyAllWindows() break cv2.putText(bgrcap, text, (100, 50), cv2.FONT_HERSHEY_PLAIN, 4.0, (0, 0, 255), 2) cv2.imshow('Webcam - type "x" to quit.', bgrcap) flag = True for i in range(6): if len(config_all[i]) == 0: flag = False if sum(possibility) != 0: sorted_pos = sorted(possibility) if sorted_pos[-1] - sorted_pos[-2] >= 3: pro_index = possibility.index(max(possibility)) solution = kociemba.solve(problem[pro_index]) text = "Solution found!" config_all = [] for i in range(6): config_all.append([]) print('finished') print(solution) problem = problem[pro_index] possibility = [0] detect = False if flag: solution, success, problem = solve(config_all) problem = list(set(problem)) possibility = [0 for i in range(len(problem))] if success > 1: text = "Candidate Solution found!" text2 = str(len(problem)) + ' ' + 'candidate solutions' candidate = produce_can(problem[-1]) optical_check = True config_all = [] for i in range(6): config_all.append([]) elif success > 0: problem = problem[0] text = "Solution found!" config_all = [] for i in range(6): config_all.append([]) print('finished') print(solution) detect = False k = cv2.waitKey(5) & 0xFF if k == 120: # type x to exit cv2.destroyAllWindows() break