def twd_cb(frame, bboxes): stop_flag = utils.Esc_key_pressed() beep_alarm = False for box in bboxes: box_color = (0, 255, 0) # GREEN if self.is_event_detected(box, rule) == True: beep_alarm = True box_color = (0,0,255) # RED # Always show the Trip-Wire line on each frame utils.draw_box(frame, box, box_color) cv2.line(frame, self.tw_start, self.tw_end, self.tw_color, 3) cv2.imshow(self.cam.name, frame) self.writer.write(frame) # Play a beep Sound if the monitored event is detected if beep_alarm == True: beep_alarm = False if self.beep_on == False: play_sound(True) self.beep_on = True else: if self.beep_on == True: play_sound(False) self.beep_on = False return stop_flag
def anonymize_image(image, objects_info): """Add objects information to input image Parameters ---------- image : np.array Image to put the object info on objects : list of dicts For each detected object, a dict with the class name, probability and box_points info. Box points info is a tuple (left, top, right, bottom) to create the box around the detected objects display_meta_info : bool If True, add info (class name and percentage) about recognition on the image """ annotated_image = image.copy() annotated_image = cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB) for object in objects_info: detection_box = object["box_points"] # add ....# TODO draw_box(annotated_image, detection_box, color=color) return annotated_image
def main(): model_file = 'model/ssd300_vgg16_short.pb' graph = load_graph(model_file) with tf.Session(graph=graph) as sess: image_input = sess.graph.get_tensor_by_name( 'import/define_input/image_input:0') result = sess.graph.get_tensor_by_name("import/result/result:0") image_path = 'demo/test.jpg' img = cv2.imread(image_path) img = np.float32(img) img = cv2.resize(img, (300, 300)) img = np.expand_dims(img, axis=0) print('image_input', image_input) print('img', type(img), img.shape, img[0][1][1]) enc_boxes = sess.run(result, feed_dict={image_input: img}) print('enc_boxes', type(enc_boxes), len(enc_boxes), type(enc_boxes[0]), enc_boxes[0].shape) print('detect_result_[0][0]', enc_boxes[0][0]) lid2name = { 0: 'Aeroplane', 1: 'Bicycle', 2: 'Bird', 3: 'Boat', 4: 'Bottle', 5: 'Bus', 6: 'Car', 7: 'Cat', 8: 'Chair', 9: 'Cow', 10: 'Diningtable', 11: 'Dog', 12: 'Horse', 13: 'Motorbike', 14: 'Person', 15: 'Pottedplant', 16: 'Sheep', 17: 'Sofa', 18: 'Train', 19: 'Tvmonitor' } preset = get_preset_by_name('vgg300') anchors = get_anchors_for_preset(preset) print('anchors', type(anchors)) boxes = decode_boxes(enc_boxes[0], anchors, 0.5, lid2name, None) boxes = suppress_overlaps(boxes)[:200] print('boxes', boxes) img = cv2.imread(image_path) for box in boxes: color = (31, 119, 180) draw_box(img, box[1], color) box_data = '{} {} {} {} {} {}\n'.format( box[1].label, box[1].labelid, box[1].center.x, box[1].center.y, box[1].size.w, box[1].size.h) print('box_data', box_data) cv2.imwrite(image_path + '_out.jpg', img)
def demo(mp4_file): n = TinyYoloNet() load_cnn() load_weights() class_set = load_class_set() fourcc = cv2.VideoWriter_fourcc(*'MP4V') out = cv2.VideoWriter("out.mp4", fourcc, 20.0, (n.width, n.height)) cap = cv2.VideoCapture(mp4_file) if not cap.isOpened(): print("Unable to open the mp4 file.") return while True: res, img = cap.read() if res: resized_img = cv2.resize(img, (n.width, n.height)) bboxes = n.forward(resized_img, 0.5, 0.4) draw_img = draw_box(resized_img, bboxes, None, class_set) out.write(draw_img) cv2.imshow("demo", draw_img) if cv2.waitKey(1) == 27: break else: print("Unable to read image.") break out.release() cap.release() cv2.destroyAllWindows()
def detect_image(image_np): target_dimension = int(model.meta["height"]) processed_img = utils.process_image(image_np, target_dimension) image_dimension = torch.FloatTensor([image_np.shape[1], image_np.shape[0]]) scaling_factor = torch.min(target_dimension / image_dimension) if CUDA: processed_img = processed_img.cuda() image_var = Variable(processed_img) # 416 * 416 * (1/(8*8) + 1/(16*16) + 1/(32*32) )*3 start = time.time() with torch.no_grad(): output = model(image_var, CUDA) end = time.time() print("Total time: {}".format(end - start)) # print("output", output.shape) thresholded_output = utils.object_thresholding(output[0]) # print("Thresholded", thresholded_output.shape) # print(output[0]) true_output = utils.non_max_suppression(thresholded_output) # print("True output", true_output.shape) original_image_np = np.copy(image_np) if true_output.size(0) > 0: # Offset for padded image vertical_offset = (target_dimension - scaling_factor * image_dimension[0].item()) / 2 horizontal_offset = (target_dimension - scaling_factor * image_dimension[1].item()) / 2 for output_box in true_output: rect_coords = utils.center_coord_to_diagonals(output_box[:4]) rect_coords = torch.FloatTensor(rect_coords) # transform box detection w.r.t. boundaries of the padded image rect_coords[[0, 2]] -= vertical_offset rect_coords[[1, 3]] -= horizontal_offset rect_coords /= scaling_factor # Clamp to actual image's boundaries rect_coords[[0, 2]] = torch.clamp(rect_coords[[0, 2]], 0.0, image_dimension[0]) rect_coords[[1, 3]] = torch.clamp(rect_coords[[1, 3]], 0.0, image_dimension[1]) # print(image_np.shape) class_label = coco_classes[output_box[5].int()] print("Output Box:", output_box, "Class Label:", class_label) print("Rect coords:", rect_coords) if constants.PERFORM_FACE_DETECTION and class_label == "person": rc = rect_coords.int() person_img_np = original_image_np[rc[1]:rc[3], rc[0]:rc[2]] # print("person_img_np: ", person_img_np, person_img_np.shape) # cv2.imshow("bounded_box_img", person_img_np) # cv2.waitKey(0) face_label = face_recognition_utils.recognize_face_in_patch( person_img_np) if face_label is not None: class_label = face_label image_np = utils.draw_box(rect_coords, image_np, class_label) return image_np
def visualize_image_car_detection(boxes, imagePath = './test_images/test5.jpg'): image = plt.imread(imagePath) print("boxes results:") print(boxes) # visualize the box on the original image f,(ax1,ax2) = plt.subplots(1,2,figsize=(16,6)) ax1.imshow(image) ax2.imshow(draw_box(boxes,plt.imread(imagePath),[[500,1280],[300,650]])) pylab.show()
def frame_func(self, image): crop = image resized = cv2.resize(crop,(448,448)) batch = np.array([resized[:,:,0],resized[:,:,1],resized[:,:,2]]) batch = 2*(batch/255.) - 1 batch = np.expand_dims(batch, axis=0) out = self.model.predict(batch) boxes = yolo_net_out_to_car_boxes(out[0], threshold = 0.17) return draw_box(boxes,image,[[0, image.shape[1]], [0, image.shape[0]]])
def annotate(data_dir, samples, colors, sample_name): """ Draw the bounding boxes on the sample images :param data_dir: the directory where the dataset's files are stored :param samples: samples to be processed :param colors: a dictionary mapping class name to a BGR color tuple :param sample_name: name of the sample """ result_dir = data_dir+'/annotated/'+sample_name.strip()+'/' if not os.path.exists(result_dir): os.makedirs(result_dir) for sample in tqdm(samples, desc=sample_name, unit='samples'): img = cv2.imread(sample.filename) basefn = os.path.basename(sample.filename) for box in sample.boxes: draw_box(img, box, colors[box.label]) cv2.imwrite(result_dir+basefn, img)
def frame_func(image): crop = image[300:650, 500:, :] resized = cv2.resize(crop, (448, 448)) batch = np.array([resized[:, :, 0], resized[:, :, 1], resized[:, :, 2]]) batch = 2 * (batch / 255.) - 1 batch = np.expand_dims(batch, axis=0) out = model.predict(batch) boxes = yolo_net_out_to_car_boxes(out[0], threshold=0.17) return draw_box(boxes, image, [[500, 1280], [300, 650]])
def main(): if not os.path.exists(out_dir): os.makedirs(out_dir) # path to input video cap = utils.load_video(in_vid) frame_id = 0 # open file for reading ROI location f = open(in_txt, 'r') # get rid of header and get first bbox _ = f.readline() while cap.isOpened(): # read frame and update bounding box _, img = cap.read() line = f.readline() if not _ or not line: break bbox = utils.get_bbox(line) utils.draw_box(img, bbox, (0, 0, 255)) # cv2.imshow('frame', img[bbox[1]+1:bbox[1]+bbox[3], bbox[0]+1:bbox[0]+bbox[2]]) cv2.imshow('frame', img) if cv2.waitKey(1) & 0xff == ord('q'): break # update frame_id and write to file if is_print: print('{}{}.jpg'.format(out_dir, frame_id)) cv2.imwrite( '{}{}.jpg'.format(out_dir, frame_id), img[bbox[1] + 1:bbox[1] + bbox[3], bbox[0] + 1:bbox[0] + bbox[2]]) frame_id += 1 # print('{}, {:1.0f}, {:1.0f}, {:1.0f}, {:1.0f}' # .format(frame_id, bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3])) cv2.waitKey(5) cap.release() cv2.destroyAllWindows() f.close()
def run(files, img_input_tensor, result_tensor, data, sess, batch_size = 32): output_imgs = [] preset = data['preset'] colors = data['colors'] lid2name = data['lid2name'] anchors = get_anchors_for_preset(preset) for i in range(0, len(files), batch_size): batch_names = files[i:i+batch_size] batch_imgs = [] batch = [] for f in batch_names: img = cv2.imread(f) batch_imgs.append(img) img = cv2.resize(img, (300, 300)) batch.append(img) batch = np.array(batch) feed = {img_input_tensor: batch} enc_boxes = sess.run(result_tensor, feed_dict=feed) for i in range(len(batch_names)): boxes = decode_boxes(enc_boxes[i], anchors, 0.5, lid2name, None) boxes = suppress_overlaps(boxes)[:200] name = os.path.basename(batch_names[i]) for box in boxes: draw_box(batch_imgs[i], box[1], colors[box[1].label]) output_imgs.append(batch_imgs[i]) #with open(os.path.join(args.output_dir, name+'.txt'), 'w') as f: # for box in boxes: # draw_box(batch_imgs[i], box[1], colors[box[1].label]) #box_data = '{} {} {} {} {} {}\n'.format(box[1].label, # box[1].labelid, box[1].center.x, box[1].center.y, # box[1].size.w, box[1].size.h) #f.write(box_data) #cv2.imwrite(os.path.join(args.output_dir, name), # batch_imgs[i]) return output_imgs
def plot_objects(image, objects_info, display_meta_info = False): """Add objects information to input image Parameters ---------- image : np.array Image to put the object info on objects : list of dicts For each detected object, a dict with the class name, probability and box_points info. Box points info is a tuple (left, top, right, bottom) to create the box around the detected objects display_meta_info : bool If True, add info (class name and percentage) about recognition on the image """ annotated_image = image.copy() annotated_image = cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB) for object in objects_info: detection_box = object["box_points"] # create a random color color = np.random.rand(3,) # add box draw_box(annotated_image, detection_box, color=color) if display_meta_info: label = "{} {:.2f}".format(object["name"], object["percentage_probability"]) draw_caption(annotated_image, detection_box, label) return annotated_image
def __init__(self, x, y, tile_width, tile_height, option, options, is_circular, text_code): super().__init__(text_code) self.option = int(option) self.options = options self.is_circular = is_circular self.x = x self.y = y self.text_secondary = '' self.spr_box = draw_box(SpriteManager.load_sprite('spr_box.png'), tile_width, tile_height) self.arrow_left = ArrowLeft(self, x, y + self.spr_box.get_height() / 2) self.arrow_right = ArrowRight(self, x + self.spr_box.get_width(), y + self.spr_box.get_height() / 2)
def inference_and_visualize_batch_images_car_detection(model): # more examples images = [plt.imread(file) for file in glob.glob('./test_images/*.jpg')] batch = np.array([ np.transpose(cv2.resize(image[300:650, 500:, :], (448, 448)), (2, 0, 1)) for image in images ]) batch = 2 * (batch / 255.) - 1 out = model.predict(batch) f, ((ax1, ax2), (ax3, ax4), (ax5, ax6)) = plt.subplots(3, 2, figsize=(11, 10)) th = 0.17 for i, ax in zip(range(len(batch)), [ax1, ax2, ax3, ax4, ax5, ax6]): #boxes = yolo_net_out_to_car_boxes(out[i], threshold = 0.17) boxes = yolo_net_out_to_car_boxes(out[i], threshold=th) print("boxes" + str(i)) print(boxes) ax.imshow(draw_box(boxes, images[i], [[500, 1280], [300, 650]])) pylab.show()
def main(): # Parse commandline parser = argparse.ArgumentParser(description='SSD inference') parser.add_argument("files", nargs="*") parser.add_argument('--checkpoint-dir', default='pascal-voc/checkpoints', help='project name') parser.add_argument('--checkpoint', type=int, default=-1, help='checkpoint to restore; -1 is the most recent') parser.add_argument('--data-source', default="pascal-voc", help='Use test files from the data source') parser.add_argument('--data-dir', default='pascal-voc', help='Use test files from the data source') parser.add_argument('--training-data', default='pascal-voc/training-data.pkl', help='Information about parameters used for training') parser.add_argument('--output-dir', default='pascal-voc/annotated/train', help='directory for the resulting images') parser.add_argument('--annotate', type=str2bool, default='False', help="Annotate the data samples") parser.add_argument('--dump-predictions', type=str2bool, default='False', help="Dump raw predictions") parser.add_argument('--summary', type=str2bool, default='True', help='dump the detections in Pascal VOC format') parser.add_argument('--compute-stats', type=str2bool, default='True', help="Compute the mAP stats") parser.add_argument('--sample', default='train', choices=['train', 'valid']) parser.add_argument('--batch-size', type=int, default=32, help='batch size') parser.add_argument('--threshold', type=float, default=0.5, help='confidence threshold') args = parser.parse_args() # Print parameters print('[i] Checkpoint directory: ', args.checkpoint_dir) print('[i] Data source: ', args.data_source) print('[i] Data directory: ', args.data_dir) print('[i] Training data: ', args.training_data) print('[i] Output directory: ', args.output_dir) print('[i] Annotate: ', args.annotate) print('[i] Dump predictions: ', args.dump_predictions) print('[i] Summary: ', args.summary) print('[i] Compute state: ', args.compute_stats) print('[i] Sample: ', args.sample) print('[i] Batch size: ', args.batch_size) print('[i] Threshold: ', args.threshold) # Check if we can get the checkpoint state = tf.train.get_checkpoint_state(args.checkpoint_dir) if state is None: print('[!] No network state found in ' + args.checkpoint_dir) return 1 try: checkpoint_file = state.all_model_checkpoint_paths[args.checkpoint] except IndexError: print('[!] Cannot find checkpoint ' + str(args.checkpoint_file)) return 1 metagraph_file = checkpoint_file + '.meta' if not os.path.exists(metagraph_file): print('[!] Cannot find metagraph ' + metagraph_file) return 1 # Load the training data parameters try: with open(args.training_data, 'rb') as f: data = pickle.load(f) preset = data['preset'] colors = data['colors'] lid2name = data['lid2name'] image_size = preset.image_size anchors = get_anchors_for_preset(preset) except (FileNotFoundError, IOError, KeyError) as e: print('[!] Unable to load training data:', str(e)) return 1 # Load the samples according to data source and sample type try: if args.sample == 'train': with open(args.data_dir + '/train-samples.pkl', 'rb') as f: samples = pickle.load(f) else: with open(args.data_dir + '/valid-samples.pkl', 'rb') as f: samples = pickle.load(f) num_samples = len(samples) print('[i] # samples: ', num_samples) except (ImportError, AttributeError, RuntimeError) as e: print('[!] Unable to load data source:', str(e)) return 1 # Create a list of files to analyse and make sure that the output directory exists files = [] for sample in samples: files.append(sample.filename) files = list(filter(lambda x: os.path.exists(x), files)) if files: if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) # Print model and dataset stats print('[i] Network checkpoint:', checkpoint_file) print('[i] Metagraph file: ', metagraph_file) print('[i] Image size: ', image_size) print('[i] Number of files: ', len(files)) # Create the network if args.compute_stats: ap_calc = APCalculator() if args.summary: summary = PascalSummary() with tf.Session() as sess: print('[i] Creating the model...') net = SSDVGG(sess, preset) net.build_from_metagraph(metagraph_file, checkpoint_file) # Process the images generator = sample_generator(files, image_size, args.batch_size) n_sample_batches = int(math.ceil(len(files) / args.batch_size)) description = '[i] Processing samples' for x, idxs in tqdm(generator, total=n_sample_batches, desc=description, unit='batches'): feed = {net.image_input: x, net.keep_prob: 1} enc_boxes = sess.run(net.result, feed_dict=feed) # Process the predictions for i in range(enc_boxes.shape[0]): boxes = decode_boxes(enc_boxes[i], anchors, args.threshold, lid2name, None) boxes = suppress_overlaps(boxes)[:200] filename = files[idxs[i]] basename = os.path.basename(filename) # Annotate samples if args.annotate: img = cv2.imread(filename) for box in boxes: draw_box(img, box[1], colors[box[1].label]) fn = args.output_dir + '/images/' + basename cv2.imwrite(fn, img) # Dump the predictions if args.dump_predictions: raw_fn = args.output_dir + '/' + basename + '.npy' np.save(raw_fn, enc_boxes[i]) # Add predictions to the stats calculator and to the summary if args.compute_stats: ap_calc.add_detections(samples[idxs[i]].boxes, boxes) if args.summary: summary.add_detections(filename, boxes) # Compute and print the stats if args.compute_stats: aps = ap_calc.compute_aps() for k, v in aps.items(): print('[i] AP [{0}]: {1:.3f}'.format(k, v)) print('[i] mAP: {0:.3f}'.format(APs2mAP(aps))) # Write the summary files if args.summary: summary.write_summary(args.output_dir + "/summaries") print('[i] All done.') return 0
def main(): # Parse the commandline parser = argparse.ArgumentParser(description='SSD inference') parser.add_argument( '--model', default='./pascal-voc/frozen/e225-SSD300-VGG16-PASCALVOC.pb', help='model file') parser.add_argument('--training-data', default='./pascal-voc/training-data.pkl', help='training data') parser.add_argument("--input-dir", default='./test/in', help='input directory') parser.add_argument('--output-dir', default='./test/out', help='output directory') parser.add_argument('--batch-size', type=int, default=32, help='batch size') args = parser.parse_args() # Print parameters print('[i] Model: ', args.model) print('[i] Training data: ', args.training_data) print('[i] Input dir: ', args.input_dir) print('[i] Output dir: ', args.output_dir) print('[i] Batch size: ', args.batch_size) # Load the graph and the training data graph_def = tf.GraphDef() with open(args.model, 'rb') as f: serialized = f.read() graph_def.ParseFromString(serialized) with open(args.training_data, 'rb') as f: data = pickle.load(f) preset = data['preset'] colors = data['colors'] lid2name = data['lid2name'] anchors = get_anchors_for_preset(preset) # Get the input images images = os.listdir(args.input_dir) images = ["%s/%s" % (args.input_dir, image) for image in images] # Create the output directory if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) # Run the detections in batches with tf.Session() as sess: tf.import_graph_def(graph_def, name='detector') img_input = sess.graph.get_tensor_by_name('detector/image_input:0') result = sess.graph.get_tensor_by_name('detector/result/result:0') for i in tqdm(range(0, len(images), args.batch_size)): batch_names = images[i:i + args.batch_size] batch_imgs = [] batch = [] for f in batch_names: img = cv2.imread(f) batch_imgs.append(img) img = cv2.resize(img, (300, 300)) batch.append(img) batch = np.array(batch) feed = {img_input: batch} enc_boxes = sess.run(result, feed_dict=feed) for i in range(len(batch_names)): boxes = decode_boxes(enc_boxes[i], anchors, 0.5, lid2name, None) boxes = suppress_overlaps(boxes)[:200] name = os.path.basename(batch_names[i]) meta = {} for j, box in enumerate(boxes): draw_box(batch_imgs[i], box[1], colors[box[1].label]) box_data = {} box_data['Label'] = box[1].label, box_data['LabelID'] = str(box[1].labelid) box_data['Center'] = [box[1].center.x, box[1].center.y] box_data['Size'] = [box[1].size.w, box[1].size.h] box_data['Confidence'] = str(box[0]) meta["prediction_%s" % (j + 1)] = box_data with open(os.path.join(args.output_dir, name + '.json'), 'w') as f: json.dump(meta, f, indent=4) cv2.imwrite(os.path.join(args.output_dir, name), batch_imgs[i])
objects = [ obj for obj in objects if utils.object_in_roi(ROI_DICT, obj["centroid"]) ] # Draw object boxes draw = ImageDraw.Draw(pil_image) for obj in objects: name = obj["name"] confidence = obj["confidence"] box = obj["bounding_box"] box_label = f"{name}: {confidence:.1f}%" utils.draw_box( draw, (box["y_min"], box["x_min"], box["y_max"], box["x_max"]), pil_image.width, pil_image.height, text=box_label, color=const.YELLOW, ) # Draw ROI box if ROI_TUPLE != DEFAULT_ROI: utils.draw_box( draw, ROI_TUPLE, pil_image.width, pil_image.height, text="ROI", color=const.GREEN, )
def render(self): read_game = self.env.read_game frame = self.env.frame weapon_names = self.env.weapon_names if not read_game.is_in_game: return # enemy behind indicators enemy_behind = enemy_left = enemy_right = INFINITY if keys["KEY_BOXESP"]: for idx in range(PLAYERMAX): p = read_game.player[idx] if (p.type == ET_PLAYER) and p.valid and p.alive and p != read_game.my_player: # colors already calculated feet, head, size_x, size_y = self.calc_size_xy(p) if feet and head: p.color_esp = self.get_faded_color(p.pos, p.color_esp) if keys["KEY_BOXESP"]: draw_box(frame.line, feet.x - size_x/2, feet.y, size_x, -size_y, COLOR_BOX_OUTER_WIDTH, p.color_esp) if keys["KEY_WEAPON_ESP"]: name_esp_str = "%s [%s]" % (p.name, weapon_names.get_weapon_model(p.weapon_num)) else: name_esp_str = p.name draw_string_center(frame.font, feet.x, feet.y - size_y, COLOR_PLAYER_NAME, name_esp_str) if keys["KEY_BOX_SNAPLINE"] and p.enemy and p.alive & ALIVE_FLAG: draw_line_abs(frame.line, read_game.screen_center_x, read_game.resolution_y, feet.x, feet.y, COLOR_BOX_LINE_WIDTH, p.color_esp) # w/h ratio if keys["KEY_BOXESP"]: self.draw_distance_ESP(p.pos, feet.x, feet.y, COLOR_PLAYER_NAME) if keys["KEY_TRIGGERBOT"] and keys["KEY_TRIGGER_BOT_KEY"]: if p.alive & ALIVE_FLAG and p.enemy and p.pose != 0: if (read_game.screen_center_x > feet.x - (size_x/3+1)) and (read_game.screen_center_x < feet.x + (size_x/3+1)): if (read_game.screen_center_y > feet.y - size_y) and (read_game.screen_center_y < feet.y ): if self.env.ticks - self.last_trigger_tick >= TRIGGER_BOT_FIRE_TICK_DELAY: self.last_trigger_tick = self.env.ticks windll.User32.keybd_event(ord(TRIGGER_BOT_FIRE_KEY), 0x12, 0, 0) windll.User32.keybd_event(ord(TRIGGER_BOT_FIRE_KEY), 0x12, KEYEVENTF_KEYUP, 0) else: # check if we need to show enemy behind indicator transform = read_game.world_to_screen_transform(p.pos) if transform.z < -10 and p.enemy: distance = (p.pos - self.env.read_game.my_player.pos).length() if abs(transform.x / transform.z) < 1: if distance < enemy_behind: enemy_behind = distance elif transform.x > 0: if distance < enemy_left: enemy_left = distance else: if distance < enemy_right: enemy_right = distance if keys["KEY_ENEMY_BEHIND"] and (enemy_behind or enemy_left or enemy_right): sprites = self.env.sprites if enemy_behind < INFINITY: color = self.get_faded_color_dist(ENEMY_BEHIND_COLOR_BLEND, enemy_behind) sprites.draw_sprite("down", read_game.screen_center_x, read_game.screen_center_y + ENEMY_BEHIND_Y, 0, color, ENEMY_BEHIND_SCALING) if enemy_left < INFINITY: color = self.get_faded_color_dist(ENEMY_BEHIND_COLOR_BLEND, enemy_left) sprites.draw_sprite("left-down", read_game.screen_center_x - ENEMY_BEHIND_X, read_game.screen_center_y + ENEMY_BEHIND_Y, 0, color, ENEMY_BEHIND_SCALING) if enemy_right < INFINITY: color = self.get_faded_color_dist(ENEMY_BEHIND_COLOR_BLEND, enemy_right) sprites.draw_sprite("right-down", read_game.screen_center_x + ENEMY_BEHIND_X, read_game.screen_center_y + ENEMY_BEHIND_Y, 0, color, ENEMY_BEHIND_SCALING) for idx in range(ENTITIESMAX): e = read_game.cod7_entity.arr[idx] if e.type == ET_TURRET and e.alive & ALIVE_FLAG and keys["KEY_BOXESP"]: self.env.tracker.track_entity(idx) head_pos = VECTOR(e.pos.x, e.pos.y, e.pos.z + 20) # eyepos of standing player feet = read_game.world_to_screen(e.pos) head = read_game.world_to_screen(head_pos) if feet and head: size_y = feet.y - head.y if size_y < 5: size_y = 5 size_x = size_y / 2.75 draw_box(frame.line, feet.x - size_x/2, feet.y, size_x, -size_y, COLOR_BOX_OUTER_WIDTH, COLOR_SENTRY) # if e.owner_turret >= 0 and e.owner_turret < PLAYERMAX: # self.env.tracker.track_entity(idx, e.owner_turret) # if read_game.player[e.owner_turret].enemy: # head_pos = VECTOR(e.pos.x, e.pos.y, e.pos.z + 20) # eyepos of standing player # feet = read_game.world_to_screen(e.pos) # head = read_game.world_to_screen(head_pos) # if feet and head: # size_y = feet.y - head.y # if size_y < 5: size_y = 5 # size_x = size_y / 2.75 # draw_box(frame.line, feet.x - size_x/2, feet.y, size_x, -size_y, COLOR_BOX_OUTER_WIDTH, COLOR_SENTRY) if e.type == ET_EXPLOSIVE and e.alive & ALIVE_FLAG: self.track_explosive(idx) elif e.type == ET_VEHICLE and e.alive & ALIVE_FLAG: if weapon_names.get_weapon_model(e.weapon) == "rc_car_weapon_mp": # RC-XD self.env.tracker.track_rcxd(idx) elif (e.type == ET_HELICOPTER or e.type == ET_PLANE) and e.alive & ALIVE_FLAG and keys["KEY_BOXESP"]: # all planes are shown because we don't know if they are enemies self.env.tracker.track_entity(idx) head_pos = VECTOR(e.pos.x, e.pos.y, e.pos.z + 100) feet = read_game.world_to_screen(e.pos) head = read_game.world_to_screen(head_pos) if feet and head: size_y = feet.y - head.y if size_y < 10: size_y = 10 size_x = size_y draw_box(frame.line, feet.x - size_x/2, feet.y, size_x, -size_y, COLOR_BOX_OUTER_WIDTH, COLOR_PLANE) if keys["KEY_BOX_SNAPLINE"]: draw_line_abs(frame.line, read_game.screen_center_x, read_game.resolution_y, feet.x, feet.y, COLOR_BOX_LINE_WIDTH, COLOR_PLANE) # elif (e.type == ET_HELICOPTER or e.type == ET_PLANE) and e.alive & ALIVE_FLAG and keys["KEY_BOXESP"]: # if e.owner_air >= 0 and e.owner_air < PLAYERMAX: # self.env.tracker.track_entity(idx, e.owner_air) # if e.type == ET_PLANE or read_game.player[e.owner_air].enemy: # # all planes are shown because we don't know if they are enemies # head_pos = VECTOR(e.pos.x, e.pos.y, e.pos.z + 100) # eyepos of standing player # feet = read_game.world_to_screen(e.pos) # head = read_game.world_to_screen(head_pos) # if feet and head: # size_y = feet.y - head.y # if size_y < 10: size_y = 10 # size_x = size_y # draw_box(frame.line, feet.x - size_x/2, feet.y, size_x, -size_y, COLOR_BOX_OUTER_WIDTH, COLOR_PLANE) # if keys["KEY_BOX_SNAPLINE"]: # draw_line_abs(frame.line, read_game.screen_center_x, read_game.resolution_y, # feet.x, feet.y, COLOR_BOX_LINE_WIDTH, COLOR_PLANE) # if keys["KEY_DOGS_ESP"]: for idx in range(DOGSMAX): dog = read_game.cod7_dog.arr[idx] client_num = dog.client_num # the entity number holding the dog if client_num > 0 and client_num < ENTITIESMAX: # let's look the real entity e = read_game.cod7_entity.arr[client_num] if e.alive & ALIVE_FLAG: if DEBUG: print "Found living dog idx=%i, clientnum=%i, owner=%i, team=%i" self.env.inspector.dump_entity(client_num) self.env.tracker.track_dog(client_num) self.loop_tracked_explo()
def main(): # Parse the commandline parser = argparse.ArgumentParser(description='SSD inference') parser.add_argument('--model', default='./pascal-voc/models/e225-SSD300-VGG16-PASCALVOC.tflite', help='model file') parser.add_argument('--training-data', default='./pascal-voc/training-data.pkl', help='training data') parser.add_argument("--input-dir", default='./test/in', help='input directory') parser.add_argument('--output-dir', default='./test/out', help='output directory') parser.add_argument('--batch-size', type=int, default=1, help='batch size') args = parser.parse_args() # Print parameters print('[i] Model: ', args.model) print('[i] Training data: ', args.training_data) print('[i] Input dir: ', args.input_dir) print('[i] Output dir: ', args.output_dir) print('[i] Batch size: ', args.batch_size) # Load the training data with open(args.training_data, 'rb') as f: data = pickle.load(f) preset = data['preset'] colors = data['colors'] lid2name = data['lid2name'] anchors = get_anchors_for_preset(preset) # Get the input images images = os.listdir(args.input_dir) images = ["%s/%s" % (args.input_dir, image) for image in images] # Create the output directory if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) # Load the TFLite model and allocate tensors. interpreter = tf.lite.Interpreter(model_path=args.model) interpreter.allocate_tensors() # Run the detections in batches for i in tqdm(range(0, len(images), args.batch_size)): batch_names = images[i:i+args.batch_size] batch_imgs = [] batch = [] for f in batch_names: img = cv2.imread(f) batch_imgs.append(img) img = cv2.resize(img, (300, 300)) batch.append(img.astype(np.float32)) batch = np.array(batch) # Get input and output tensors. input_details = interpreter.get_input_details() interpreter.set_tensor(input_details[0]['index'], batch) interpreter.invoke() output_details = interpreter.get_output_details() enc_boxes = interpreter.get_tensor(output_details[0]['index']) for i in range(len(batch_names)): boxes = decode_boxes(enc_boxes[i], anchors, 0.5, lid2name, None) boxes = suppress_overlaps(boxes)[:200] name = os.path.basename(batch_names[i]) meta = {} for j, box in enumerate(boxes): draw_box(batch_imgs[i], box[1], colors[box[1].label]) box_data = {} box_data['Label'] = box[1].label, box_data['LabelID'] = str(box[1].labelid) box_data['Center'] = [box[1].center.x, box[1].center.y] box_data['Size'] = [box[1].size.w, box[1].size.h] box_data['Confidence'] = str(box[0]) meta["prediction_%s" % (j+1)] = box_data with open(os.path.join(args.output_dir, name+'.json'), 'w') as f: json.dump(meta, f, indent=4) cv2.imwrite(os.path.join(args.output_dir, name), batch_imgs[i])
def main(args): """ main """ if not os.path.exists(args.image_path): print("{} does not exists".format(args.image_path)) return 1 # export model.pb from session dir. Skip if model.pb already exists model.export(train.NUM_CLASSES, train.SESSION_DIR, "model-best-0", train.MODEL_PATH) graph = model.load(train.MODEL_PATH, args.device) with graph.as_default(): # (?, n, n, NUM_CLASSES) tensor logits = graph.get_tensor_by_name(model.OUTPUT_TENSOR_NAME + ":0") images_ = graph.get_tensor_by_name(model.INPUT_TENSOR_NAME + ":0") # each cell in coords (batch_position, i, j) -> is a probability vector per_region_probabilities = tf.nn.softmax( tf.reshape(logits, [-1, train.NUM_CLASSES])) # [tested positions, train.NUM_CLASSES] # array[0]=values, [1]=indices # get every probabiliy, because we can use localization to do classification top_k = tf.nn.top_k(per_region_probabilities, k=train.NUM_CLASSES) # each with shape [tested_positions, k] original_image = tf.image.convert_image_dtype( image_processing.read_image( tf.constant(args.image_path), 3, args.image_path.split('.')[-1]), dtype=tf.uint8) original_image_dim = tf.shape(original_image) k = 2 eval_image_side = tf.cond( tf.less_equal( tf.minimum(original_image_dim[0], original_image_dim[1]), tf.constant(model.INPUT_SIDE)), lambda: tf.constant(model.INPUT_SIDE), lambda: tf.constant(model.INPUT_SIDE + model.DOWNSAMPLING_FACTOR * model.LAST_CONV_INPUT_STRIDE * k) ) eval_image = tf.expand_dims( image_processing.zm_mp( image_processing.resize_bl(original_image, eval_image_side)), 0) # roi placehoder roi_ = tf.placeholder(tf.uint8) # rop preprocessing, single image classification roi_preproc = image_processing.zm_mp( image_processing.resize_bl( tf.image.convert_image_dtype(roi_, tf.float32), model.INPUT_SIDE)) with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: input_image, input_image_side, image = sess.run( [eval_image, eval_image_side, original_image]) start = time.time() probability_map, top_values, top_indices = sess.run( [logits, top_k[0], top_k[1]], feed_dict={images_: input_image}) # let's think to the net as a big net, with the last layer (before the FC # layers for classification) with a receptive field of # LAST_KERNEL_SIDE x LAST_KERNEL_SIDE. Lets approximate the net with this last kernel: # If the image is scaled down to LAST_KERNEL_SIDE x LAST_KERNEL_SIDE # the output is a single point. # if the image is scaled down to something bigger # (that make the output side of contolution integer) the result is a spacial map # of points. Every point has a depth of num classes. # for every image in the input batch probability_coords = 0 for _ in range(len(input_image)): # scaling factor between original image and resized image full_image_scaling_factors = np.array([ image.shape[1] / input_image_side, image.shape[0] / input_image_side ]) glance = defaultdict(list) # select count(*), avg(prob) from map group by label, order by count, avg. group = defaultdict(lambda: defaultdict(float)) for pmap_y in range(probability_map.shape[1]): # calculate position in the downsampled image ds ds_y = pmap_y * model.LAST_CONV_OUTPUT_STRIDE for pmap_x in range(probability_map.shape[2]): ds_x = pmap_x * model.LAST_CONV_OUTPUT_STRIDE if top_indices[probability_coords][ 0] != pascal.BACKGROUND_CLASS_ID: # create coordinates of rect in the downsampled image # convert to numpy array in order to use broadcast ops coord = [ ds_x, ds_y, ds_x + model.LAST_KERNEL_SIDE, ds_y + model.LAST_KERNEL_SIDE ] # if something is found, append rectagle to the # map of rectalges per class rect = utils.upsample_and_shift( coord, model.DOWNSAMPLING_FACTOR, [0, 0], full_image_scaling_factors) prob = top_values[probability_coords][0] label = pascal.CLASSES[top_indices[ probability_coords][0]] rect_prob = [rect, prob] glance[label].append(rect_prob) group[label]["count"] += 1 group[label]["prob"] += prob # update probability coord value probability_coords += 1 classes = group.keys() print('Found {} classes: {}'.format(len(classes), classes)) # merge overlapping rectangles for each class global_rect_prob = utils.group_overlapping_regions( glance, eps=RECT_SIMILARITY) # loop preserving order, because rois are evaluated in order rois = [] rois_count = 0 for label, rect_prob_list in sorted(global_rect_prob.items()): # extract rectangles for each image and classify it. # if the classification gives the same global label as top-1(2,3?) draw it # else skip it. for rect_prob in rect_prob_list: rect = rect_prob[0] y2 = rect[3] y1 = rect[1] x2 = rect[2] x1 = rect[0] roi = image[y1:y2, x1:x2] rois.append( sess.run(roi_preproc, feed_dict={roi_: roi})) rois_count += 1 # evaluate top values for every image in the batch of rois rois_top_values, rois_top_indices = sess.run( [top_k[0], top_k[1]], feed_dict={images_: rois}) roi_id = 0 # localization dictionary. ["label"] => [[rect, prob], ...] localize = defaultdict(list) # classification dictionary. #[(rect)] => [top_values[0..num_cl], top_indices[0..num_cl]] classify = defaultdict(list) for label, rect_prob_list in sorted(global_rect_prob.items()): # loop over rect with the current label for rect_prob in rect_prob_list: # remove background class from avaiable classes # need to use tolist because rois_top_indices[roi_id] is # a ndarray (Tensorflow always returns ndarray, even if # the data is 1-D) bg_pos = rois_top_indices[roi_id].tolist().index( pascal.BACKGROUND_CLASS_ID) roi_top_probs = np.delete(rois_top_values[roi_id], bg_pos) roi_top_indices = np.delete(rois_top_indices[roi_id], bg_pos) roi_label = pascal.CLASSES[roi_top_indices[0]] if label == roi_label: localize[label].append( [rect_prob[0], roi_top_probs[0]]) classify[tuple(rect_prob[0])] = [ roi_top_indices, roi_top_probs ] roi_id += 1 end_time = time.time() - start print("time: {}".format(end_time)) # now I can convert RGB to BGR to display image with OpenCV # I can't do that before, because ROIs gets extracted on RGB image # in order to be processed without errors by Tensorflow image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) for label, rect_prob_list in localize.items(): for rect_prob in rect_prob_list: utils.draw_box( image, rect_prob[0], "{}({:.3})".format(label, rect_prob[1]), utils.LABEL_COLORS[label], thickness=2) cv2.imshow("img", image) cv2.waitKey(0) return 0
def main(): checkpoint_file = 'model/e25.ckpt' metagraph_file = checkpoint_file + '.meta' with tf.Session() as sess: init = tf.global_variables_initializer() sess.run(init) preset = get_preset_by_name('vgg300') anchors = get_anchors_for_preset(preset) net = SSDVGG(sess, preset) net.build_from_metagraph(metagraph_file, checkpoint_file) #for tensor in tf.get_default_graph().as_graph_def().node: print(tensor.name) image_path = 'demo/test.jpg' img = cv2.imread(image_path) img = np.float32(img) img = cv2.resize(img, (300, 300)) img = np.expand_dims(img, axis=0) print('image_input', net.image_input) print('img', type(img), img.shape, img[0][1][1]) #exit() enc_boxes = sess.run(net.result, feed_dict={net.image_input: img}) print('enc_boxes', type(enc_boxes), len(enc_boxes), type(enc_boxes[0]), enc_boxes[0].shape) lid2name = { 0: 'Aeroplane', 1: 'Bicycle', 2: 'Bird', 3: 'Boat', 4: 'Bottle', 5: 'Bus', 6: 'Car', 7: 'Cat', 8: 'Chair', 9: 'Cow', 10: 'Diningtable', 11: 'Dog', 12: 'Horse', 13: 'Motorbike', 14: 'Person', 15: 'Pottedplant', 16: 'Sheep', 17: 'Sofa', 18: 'Train', 19: 'Tvmonitor' } print('anchors', type(anchors)) boxes = decode_boxes(enc_boxes[0], anchors, 0.5, lid2name, None) boxes = suppress_overlaps(boxes)[:200] img = cv2.imread(image_path) for box in boxes: color = (31, 119, 180) draw_box(img, box[1], color) box_data = '{} {} {} {} {} {}\n'.format( box[1].label, box[1].labelid, box[1].center.x, box[1].center.y, box[1].size.w, box[1].size.h) print('box_data', box_data) cv2.imwrite(image_path + '_out.jpg', img)
def predict(): data = {"success": False} if fl.request.method == "POST": req_image = fl.request.get_json()["img"] name = fl.request.get_json()["name"] with open("temp.jpg", "wb") as f: f.write(base64.decodebytes(req_image.encode())) image = imread("temp.jpg") # size = image.shape ik = np.zeros((1, 800, 800, 3), dtype=np.uint8) image = resize(image, (800, 800), mode="constant", preserve_range=True) ik[0] = image image = ik[0] labels_to_names = { 2: 'maple', 1: 'juniper', 0: 'oak', 3: 'spruce', 4: 'thuja', 5: 'birch', 6: 'mistletoe' } boxes, scores, labels = prediction_model.predict( np.expand_dims(image, axis=0)) for box, score, label in zip(boxes[0], scores[0], labels[0]): # scores are sorted so we can break if score < 0.5: break if abs(box[0] - box[2]) < image.shape[0] * 0.1 or abs( box[1] - box[3]) < image.shape[0] * 0.1: continue if abs(box[0] - box[2]) > image.shape[0] * 0.8 or abs( box[1] - box[3]) > image.shape[0] * 0.8: continue if abs(box[0] - box[2]) > abs(box[1] - box[3]): continue color = label_color(label) b = box.astype(int) draw_box(image, b, color=color) caption = "{} {:.3f}".format(labels_to_names[label], score) draw_caption(image, b, caption) imsave("temp.jpg", image) with open(f"data/{name}.png", "rb") as f_img: encoded = base64.b64encode(f_img.read()) data["success"] = True data["image"] = encoded.decode() return fl.jsonify(data)
print("Start Reading...") while True: _, img = video.read() img = cv2.transpose(img) img = cv2.flip(img, 1) height, width, channel = img.shape # matrix = cv2.getRotationMatrix2D((width / 2, height / 2), 270, 1) # frame = cv2.warpAffine(frame, matrix, (width, height)) tic = time.time() resize_img = cv2.resize(img, (0, 0), fx=resize_rate, fy=resize_rate) if resize_img.ndim == 2: resize_img = facenet.to_rgb(resize_img) resize_img = resize_img[:, :, 0:3] bounding_boxes = detection.detect_faces(resize_img, img.shape) if bounding_boxes.shape[0] > 0: match_names, p = recognition.recognize_faces(img, bounding_boxes) else: bounding_boxes = match_names = p = [] toc = time.time() - tic img = utils.mosaic(img, bounding_boxes, match_names, 6) img = utils.draw_box(img, bounding_boxes, match_names, p) cv2.imshow("origin", img) cv2.waitKey(0)
def main(): #--------------------------------------------------------------------------- # Parse the commandline #--------------------------------------------------------------------------- parser = argparse.ArgumentParser(description='SSD inference') parser.add_argument("files", nargs="*") parser.add_argument('--model', default='model300.pb', help='model file') parser.add_argument('--training-data', default='training-data-300.pkl', help='training data') parser.add_argument('--output-dir', default='test-out', help='output directory') parser.add_argument('--batch-size', type=int, default=1, help='batch size') args = parser.parse_args() #--------------------------------------------------------------------------- # Print parameters #--------------------------------------------------------------------------- print('[i] Model: ', args.model) print('[i] Training data: ', args.training_data) print('[i] Output dir: ', args.output_dir) print('[i] Batch size: ', args.batch_size) #--------------------------------------------------------------------------- # Load the graph and the training data #--------------------------------------------------------------------------- graph_def = tf.GraphDef() with open(args.model, 'rb') as f: serialized = f.read() graph_def.ParseFromString(serialized) with open(args.training_data, 'rb') as f: data = pickle.load(f) preset = data['preset'] colors = data['colors'] lid2name = data['lid2name'] anchors = get_anchors_for_preset(preset) #--------------------------------------------------------------------------- # Create the output directory #--------------------------------------------------------------------------- if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) #--------------------------------------------------------------------------- # Run the detections in batches #--------------------------------------------------------------------------- with tf.Session() as sess: tf.import_graph_def(graph_def, name='detector') img_input = sess.graph.get_tensor_by_name('detector/image_input:0') result = sess.graph.get_tensor_by_name('detector/result/result:0') files = sys.argv[1:] for i in tqdm(range(0, len(files), args.batch_size)): batch_names = files[i:i + args.batch_size] batch_imgs = [] batch = [] for f in batch_names: img = cv2.imread(f) batch_imgs.append(img) img = cv2.resize(img, (300, 300)) batch.append(img) batch = np.array(batch) feed = {img_input: batch} enc_boxes = sess.run(result, feed_dict=feed) for i in range(len(batch_names)): boxes = decode_boxes(enc_boxes[i], anchors, 0.5, lid2name, None) boxes = suppress_overlaps(boxes)[:200] name = os.path.basename(batch_names[i]) with open(os.path.join(args.output_dir, name + '.txt'), 'w') as f: for box in boxes: draw_box(batch_imgs[i], box[1], colors[box[1].label]) box_data = '{} {} {} {} {} {}\n'.format( box[1].label, box[1].labelid, box[1].center.x, box[1].center.y, box[1].size.w, box[1].size.h) f.write(box_data) cv2.imwrite(os.path.join(args.output_dir, name), batch_imgs[i])
def draw_on_image(self, boxes, imagePath): f,(ax1,ax2) = plt.subplots(1,2,figsize=(16,6)) ax1.imshow(plt.imread(imagePath)) ax2.imshow(draw_box(boxes,plt.imread(imagePath),[[0, plt.imread(imagePath).shape[1]], [0, plt.imread(imagePath).shape[0]]])) return draw_box(boxes,plt.imread(imagePath),[[0, plt.imread(imagePath).shape[1]], [0, plt.imread(imagePath).shape[0]]])
def train(): """ Introduction ------------ 训练模型 """ train_reader = Reader('train', config.data_dir, config.anchors_path, config.num_classes, input_shape=config.input_shape, max_boxes=config.max_boxes) train_data = train_reader.build_dataset(config.train_batch_size) is_training = tf.placeholder(tf.bool, shape=[]) iterator = train_data.make_one_shot_iterator() images, bbox, bbox_true_13, bbox_true_26, bbox_true_52 = iterator.get_next( ) images.set_shape([None, config.input_shape, config.input_shape, 3]) bbox.set_shape([None, config.max_boxes, 5]) grid_shapes = [ config.input_shape // 32, config.input_shape // 16, config.input_shape // 8 ] bbox_true_13.set_shape( [None, grid_shapes[0], grid_shapes[0], 3, 5 + config.num_classes]) bbox_true_26.set_shape( [None, grid_shapes[1], grid_shapes[1], 3, 5 + config.num_classes]) bbox_true_52.set_shape( [None, grid_shapes[2], grid_shapes[2], 3, 5 + config.num_classes]) draw_box(images, bbox) model = yolo(config.norm_epsilon, config.norm_decay, config.anchors_path, config.classes_path, config.pre_train) bbox_true = [bbox_true_13, bbox_true_26, bbox_true_52] output = model.yolo_inference(images, config.num_anchors / 3, config.num_classes, is_training) loss = model.yolo_loss(output, bbox_true, model.anchors, config.num_classes, config.ignore_thresh) l2_loss = tf.losses.get_regularization_loss() loss += l2_loss tf.summary.scalar('loss', loss) merged_summary = tf.summary.merge_all() global_step = tf.Variable(0, trainable=False) lr = tf.train.exponential_decay(config.learning_rate, global_step, decay_steps=2000, decay_rate=0.8) optimizer = tf.train.AdamOptimizer(learning_rate=lr) # 如果读取预训练权重,则冻结darknet53网络的变量 update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): if config.pre_train: train_var = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='yolo') train_op = optimizer.minimize(loss=loss, global_step=global_step, var_list=train_var) else: train_op = optimizer.minimize(loss=loss, global_step=global_step) init = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session(config=tf.ConfigProto(log_device_placement=False)) as sess: ckpt = tf.train.get_checkpoint_state(config.model_dir) if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): print('restore model', ckpt.model_checkpoint_path) saver.restore(sess, ckpt.model_checkpoint_path) else: sess.run(init) if config.pre_train is True: load_ops = load_weights(tf.global_variables(scope='darknet53'), config.darknet53_weights_path) sess.run(load_ops) summary_writer = tf.summary.FileWriter(config.log_dir, sess.graph) loss_value = 0 for epoch in range(config.Epoch): for step in range(int(config.train_num / config.train_batch_size)): start_time = time.time() train_loss, summary, global_step_value, _ = sess.run( [loss, merged_summary, global_step, train_op], {is_training: True}) loss_value += train_loss duration = time.time() - start_time examples_per_sec = float(duration) / config.train_batch_size format_str = ( 'Epoch {} step {}, train loss = {} ( {} examples/sec; {} ' 'sec/batch)') print( format_str.format(epoch, step, loss_value / global_step_value, examples_per_sec, duration)) summary_writer.add_summary(summary=tf.Summary(value=[ tf.Summary.Value(tag="train loss", simple_value=train_loss) ]), global_step=step) summary_writer.add_summary(summary, step) summary_writer.flush() if epoch == 0 and step == 100: checkpoint_path = os.path.join(config.model_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=global_step) # 每3个epoch保存一次模型 if epoch % 3 == 0: checkpoint_path = os.path.join(config.model_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=global_step)
# In[12]: batch = np.transpose(resized, (2, 0, 1)) batch = 2 * (batch / 255.) - 1 batch = np.expand_dims(batch, axis=0) out = model.predict(batch) # In[13]: boxes = yolo_net_out_to_car_boxes(out[0], threshold=0.17) # In[14]: f, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6)) ax1.imshow(image) ax2.imshow(draw_box(boxes, plt.imread(imagePath), [[0, 1280], [0, 720]])) # In[15]: images = [plt.imread(file) for file in glob.glob('./test_images/*.jpg')] batch = np.array([ np.transpose(cv2.resize(image, (448, 448)), (2, 0, 1)) for image in images ]) batch = 2 * (batch / 255.) - 1 out = model.predict(batch) f, ((ax1, ax2), (ax3, ax4), (ax5, ax6)) = plt.subplots(3, 2, figsize=(11, 10)) for i, ax in zip(range(len(batch)), [ax1, ax2, ax3, ax4, ax5, ax6]): boxes = yolo_net_out_to_car_boxes(out[i], threshold=0.17) ax.imshow(draw_box(boxes, images[i], [[0, 1280], [0, 720]])) # In[ ]:
model.add(Dense(1470)) model.summary() load_weights(model, 'weights\\yolo-tiny.weights') imagePath = 'test_images\\test1.jpg' image = plt.imread(imagePath, 'rb') image_crop = image[300:650, 500:, :] resized = cv2.resize(image_crop, (448, 448)) batch = np.transpose(resized, (2, 0, 1)) batch = 2 * (batch / 255.) - 1 batch = np.expand_dims(batch, axis=0) # img_width, img_height = 448, 448 img = load_img(imagePath, False, target_size=(img_width, img_height)) x = img_to_array(img) x = np.expand_dims(x, axis=0) out = model.predict(x) print(np.argmax(out, axis=1)) # boxes = yolo_net_out_to_car_boxes(out[0], threshold=0.17) f, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6)) ax1.imshow(image) ax2.imshow(draw_box(boxes, plt.imread(imagePath), [[500, 1280], [300, 650]])) # img = cv2.imread(imagePath) # print(img.shape) # cv2.imshow('Amanda', img) # cv2.waitKey(0)
# single image prediction img_ten, img_pil, origin_size = imload(args.data, args.pretrained, args.imsize) box_ten, cls_ten, score_ten = predictor(img_ten.to(device)) box_lst, cls_lst, score_lst = box_ten[0].tolist(), cls_ten[0].tolist( ), score_ten[0].tolist() # clamp outside image box_lst = [ list(map(lambda x: max(0, min(x, args.imsize)), box)) for box in box_lst ] # draw box, class and score per prediction for i, (box, cls, score) in enumerate(zip(box_lst, cls_lst, score_lst)): img_pil = draw_box(img_pil, box, color=CLASS2COLOR[cls]) if args.fontsize > 0: text = '%s: %1.2f' % (INDEX2CLASS[cls], score) coord = [box[0], box[1] - args.fontsize] img_pil = write_text(img_pil, text, coord, fontsize=args.fontsize) # resize origin scale of image xmin, ymin, xmax, ymax = box xmin = xmin * origin_size[0] / args.imsize ymin = ymin * origin_size[1] / args.imsize xmax = xmax * origin_size[0] / args.imsize ymax = ymax * origin_size[0] / args.imsize print( '%s: Index: %3d, Class: %7s, Score: %1.2f, Box: %4d, %4d, %4d, %4d' %
def main(args): """ main """ if not os.path.exists(args.image_path): print("{} does not exists".format(args.image_path)) return 1 # export model.pb from session dir. Skip if model.pb already exists model.export(train.NUM_CLASSES, train.SESSION_DIR, "model-best-0", train.MODEL_PATH) graph = model.load(train.MODEL_PATH, args.device) with graph.as_default(): # (?, n, n, NUM_CLASSES) tensor logits = graph.get_tensor_by_name(model.OUTPUT_TENSOR_NAME + ":0") images_ = graph.get_tensor_by_name(model.INPUT_TENSOR_NAME + ":0") # each cell in coords (batch_position, i, j) -> is a probability vector per_region_probabilities = tf.nn.softmax( tf.reshape(logits, [-1, train.NUM_CLASSES])) # [tested positions, train.NUM_CLASSES] # array[0]=values, [1]=indices # get every probabiliy, because we can use localization to do classification top_k = tf.nn.top_k(per_region_probabilities, k=train.NUM_CLASSES) # each with shape [tested_positions, k] original_image = tf.image.convert_image_dtype( image_processing.read_image( tf.constant(args.image_path), 3, args.image_path.split('.')[-1]), dtype=tf.uint8) original_image_dim = tf.shape(original_image) k = 2 eval_image_side = tf.cond( tf.less_equal( tf.minimum(original_image_dim[0], original_image_dim[1]), tf.constant(model.INPUT_SIDE)), lambda: tf.constant(model.INPUT_SIDE), lambda: tf.constant(model.INPUT_SIDE + model.DOWNSAMPLING_FACTOR * model.LAST_CONV_INPUT_STRIDE * k) ) eval_image = tf.expand_dims( image_processing.zm_mp( image_processing.resize_bl(original_image, eval_image_side)), 0) with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: input_image, input_image_side, image = sess.run( [eval_image, eval_image_side, original_image]) start = time.time() probability_map, top_values, top_indices = sess.run( [logits, top_k[0], top_k[1]], feed_dict={images_: input_image}) # let's think to the net as a big net, with the last layer (before the FC # layers for classification) with a receptive field of # LAST_KERNEL_SIDE x LAST_KERNEL_SIDE. Lets approximate the net with this last kernel: # If the image is scaled down to LAST_KERNEL_SIDE x LAST_KERNEL_SIDE # the output is a single point. # if the image is scaled down to something bigger # (that make the output side of contolution integer) the result is a spacial map # of points. Every point has a depth of num classes. # for every image in the input batch probability_coords = 0 for _ in range(len(input_image)): # scaling factor between original image and resized image full_image_scaling_factors = np.array([ image.shape[1] / input_image_side, image.shape[0] / input_image_side ]) glance = defaultdict(list) # select count(*), avg(prob) from map group by label, order by count, avg. group = defaultdict(lambda: defaultdict(float)) for pmap_y in range(probability_map.shape[1]): # calculate position in the downsampled image ds ds_y = pmap_y * model.LAST_CONV_OUTPUT_STRIDE for pmap_x in range(probability_map.shape[2]): ds_x = pmap_x * model.LAST_CONV_OUTPUT_STRIDE if top_indices[probability_coords][ 0] != pascal.BACKGROUND_CLASS_ID: # create coordinates of rect in the downsampled image # convert to numpy array in order to use broadcast ops coord = [ ds_x, ds_y, ds_x + model.LAST_KERNEL_SIDE, ds_y + model.LAST_KERNEL_SIDE ] # if something is found, append rectagle to the # map of rectalges per class rect = utils.upsample_and_shift( coord, model.DOWNSAMPLING_FACTOR, [0, 0], full_image_scaling_factors) prob = top_values[probability_coords][0] label = pascal.CLASSES[top_indices[ probability_coords][0]] rect_prob = [rect, prob] glance[label].append(rect_prob) group[label]["count"] += 1 group[label]["prob"] += prob # update probability coord value probability_coords += 1 classes = group.keys() print('Found {} classes: {}'.format(len(classes), classes)) # find out the minimum amount of intersection among regions # in the original image, that can be used to trigger a match # or 2, is s square. 0 dim is batch map_side = probability_map.shape[1] map_area = map_side**2 min_intersection = map_side print('min intersection: ', min_intersection) # Save the relative frequency for every class # To trigger a match, at least a fraction of intersection should be present for label in group: group[label]["prob"] /= group[label]["count"] group[label]["rf"] = group[label]["count"] / map_area print(label, group[label]) # merge overlapping rectangles for each class. # return a map of {"label": [rect, prob, count] localize = utils.group_overlapping_regions( glance, eps=RECT_SIMILARITY) end_time = time.time() - start print("time: {}".format(end_time)) # now I can convert RGB to BGR to display image with OpenCV # I can't do that before, because ROIs gets extracted on RGB image # in order to be processed without errors by Tensorflow image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) for label, rect_prob_list in localize.items(): for rect_prob in rect_prob_list: rect = rect_prob[0] prob = rect_prob[1] count = rect_prob[2] freq = group[label]["rf"] if count >= min_intersection and freq > 0.1: utils.draw_box( image, rect, "{}({:.3})".format(label, prob), utils.LABEL_COLORS[label], thickness=2) cv2.imshow("img", image) cv2.waitKey(0) return 0
def train(): """ Introduction ------------ 训练模型 """ # gpu_num = check_available_gpus() # # for gpu_id in range(int(gpu_num)): # with tf.device(tf.DeviceSpec(device_type="GPU", device_index=gpu_id)): # with tf.variable_scope(tf.get_variable_scope(), reuse=(gpu_id > 0)): # with tf.variable_scope(tf.get_variable_scope(), reuse=False): #-----------------------train_data------------------------- train_reader = Reader('train', config.data_dir, config.anchors_path2, config.num_classes, input_shape=config.input_shape, max_boxes=config.max_boxes) train_data = train_reader.build_dataset(config.train_batch_size) is_training = tf.placeholder(tf.bool, shape=[]) iterator = train_data.make_one_shot_iterator() images, bbox, bbox_true_13, bbox_true_26, bbox_true_52 = iterator.get_next( ) #----------------------- definition------------------------- images.set_shape([None, config.input_shape, config.input_shape, 3]) bbox.set_shape([None, config.max_boxes, 5]) grid_shapes = [ config.input_shape // 32, config.input_shape // 16, config.input_shape // 8 ] lr_images = tf.image.resize_images( images, size=[config.input_shape // 4, config.input_shape // 4], method=0, align_corners=False) lr_images.set_shape( [None, config.input_shape // 4, config.input_shape // 4, 3]) bbox_true_13.set_shape( [None, grid_shapes[0], grid_shapes[0], 3, 5 + config.num_classes]) bbox_true_26.set_shape( [None, grid_shapes[1], grid_shapes[1], 3, 5 + config.num_classes]) bbox_true_52.set_shape( [None, grid_shapes[2], grid_shapes[2], 3, 5 + config.num_classes]) bbox_true = [bbox_true_13, bbox_true_26, bbox_true_52] #------------------------summary + draw----------------------------------- tf.summary.image('input1', images, max_outputs=3) draw_box(images, bbox) #------------------------------model--------------------------------- model = yolo(config.norm_epsilon, config.norm_decay, config.anchors_path2, config.classes_path, config.pre_train) # with tf.variable_scope("train_var"): # g_img1 = model.GAN_g1(lr_images) # print(g_img1.outputs) # tf.summary.image('img', g_img1.outputs, 3) # g_img2 = model.GAN_g2(g_img1) # print(model.g_variables) # net_g1 = model.GAN_g1(lr_images, is_train=True) with tf.variable_scope("model_gd"): net_g1 = model.GAN_g(lr_images, is_train=True, mask=False) net_g = model.GAN_g(lr_images, is_train=True, reuse=True, mask=True) d_real = model.yolo_inference(images, config.num_anchors / 3, config.num_classes, training=True) tf.get_variable_scope().reuse_variables() d_fake = model.yolo_inference(net_g.outputs, config.num_anchors / 3, config.num_classes, training=True) #---------------------------d_loss--------------------------------- d_loss1 = model.yolo_loss(d_real, bbox_true, model.anchors, config.num_classes, 1, config.ignore_thresh) d_loss2 = model.yolo_loss(d_fake, bbox_true, model.anchors, config.num_classes, 0, config.ignore_thresh) d_loss = d_loss1 + d_loss2 l2_loss = tf.losses.get_regularization_loss() d_loss += l2_loss #--------------------------g_loss------------------------------------ adv_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like( d_fake[3]), logits=d_fake[3]) # adv_loss = 1e-3 * tf.reduce_sum(adv_loss) / tf.cast(tf.shape(d_fake[3])[0], tf.float32) adv_loss = tf.reduce_sum(adv_loss) / tf.cast( tf.shape(d_fake[3])[0], tf.float32) mse_loss1 = tl.cost.mean_squared_error(net_g1.outputs, images, is_mean=True) mse_loss1 = tf.reduce_sum(mse_loss1) / tf.cast( tf.shape(net_g1.outputs)[0], tf.float32) mse_loss2 = tl.cost.mean_squared_error(net_g.outputs, images, is_mean=True) mse_loss2 = tf.reduce_sum(mse_loss2) / tf.cast( tf.shape(net_g.outputs)[0], tf.float32) mse_loss = mse_loss1 + mse_loss2 # clc_loss = 2e-6 * d_loss2 clc_loss = model.yolo_loss(d_fake, bbox_true, model.anchors, config.num_classes, 1, config.ignore_thresh) g_loss = mse_loss + adv_loss + clc_loss l2_loss = tf.losses.get_regularization_loss() g_loss += l2_loss #----------------summary loss------------------------- # tf.summary.image('img', images, 3) tf.summary.scalar('d_loss', d_loss) tf.summary.scalar('g_loss', g_loss) merged_summary = tf.summary.merge_all() #----------------------optimizer--------------------------- global_step = tf.Variable(0, trainable=False) lr = tf.train.exponential_decay(config.learning_rate, global_step, decay_steps=2000, decay_rate=0.8) optimizer = tf.train.AdamOptimizer(learning_rate=lr) # 如果读取预训练权重,则冻结darknet53网络的变量 update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # print(tf.all_variables()) with tf.control_dependencies(update_ops): if config.pre_train: # aaa = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='generator') train_varg1 = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='model_gd/generator/generator1') train_varg2 = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='model_gd/generator/generator2') train_varg = train_varg1 + train_varg2 # print(train_varg) train_vard = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='model_gd/yolo_inference/discriminator') # print(train_vard) train_opg = optimizer.minimize(loss=g_loss, global_step=global_step, var_list=train_varg) train_opd = optimizer.minimize(loss=d_loss, global_step=global_step, var_list=train_vard) else: train_opd = optimizer.minimize(loss=d_loss, global_step=global_step) train_opg = optimizer.minimize(loss=g_loss, global_step=global_step) #-------------------------session----------------------------------- init = tf.global_variables_initializer() # tl.layers.print_all_variables() saver = tf.train.Saver() with tf.Session(config=tf.ConfigProto(log_device_placement=False, allow_soft_placement=True)) as sess: # sess = tf_debug.LocalCLIDebugWrapperSession(sess) # sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan) ckpt = tf.train.get_checkpoint_state(config.model_dir) if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): print('restore model', ckpt.model_checkpoint_path) saver.restore(sess, ckpt.model_checkpoint_path) else: sess.run(init) if config.pre_train is True: load_ops = load_weights(tf.global_variables(scope='darknet53'), config.darknet53_weights_path) sess.run(load_ops) summary_writer = tf.summary.FileWriter(config.log_dir, sess.graph) dloss_value = 0 gloss_value = 0 for epoch in range(config.Epoch): for step in range(int(config.train_num / config.train_batch_size)): start_time = time.time() train_dloss, summary, global_step_value, _ = sess.run( [d_loss, merged_summary, global_step, train_opd], {is_training: True}) train_gloss, summary, global_step_value, _ = sess.run( [g_loss, merged_summary, global_step, train_opg], {is_training: True}) dloss_value += train_dloss gloss_value += train_gloss duration = time.time() - start_time examples_per_sec = float(duration) / config.train_batch_size print(global_step_value) #------------------------print(epoch)-------------------------- format_str1 = ( 'Epoch {} step {}, train dloss = {} train gloss = {} ( {} examples/sec; {} ' 'sec/batch)') print( format_str1.format(epoch, step, dloss_value / global_step_value, gloss_value / global_step_value, examples_per_sec, duration)) # print(format_str1.format(epoch, step, train_dloss, train_gloss, examples_per_sec, duration)) #----------------------------summary loss------------------------ summary_writer.add_summary(summary=tf.Summary(value=[ tf.Summary.Value(tag="train dloss", simple_value=train_dloss) ]), global_step=step) summary_writer.add_summary(summary=tf.Summary(value=[ tf.Summary.Value(tag="train gloss", simple_value=train_gloss) ]), global_step=step) summary_writer.add_summary(summary, step) summary_writer.flush() #--------------------------save model------------------------------ # 每3个epoch保存一次模型 if epoch % 3 == 0: checkpoint_path = os.path.join(config.model_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=global_step)
def render(self): read_game = self.env.read_game frame = self.env.frame weapon_names = self.env.weapon_names if not read_game.is_in_game: return if keys["KEY_BOXESP"]: for idx in range(PLAYERMAX): p = read_game.player[idx] if (p.type == ET_PLAYER) and p.valid and p.alive and p != read_game.my_player: # colors already calculated feet, head, size_x, size_y = self.calc_size_xy(p) if feet and head: p.color_esp = self.get_faded_color(p.pos, p.color_esp) if keys["KEY_BOXESP"]: draw_box(frame.line, feet.x - size_x/2, feet.y, size_x, -size_y, COLOR_BOX_OUTER_WIDTH, p.color_esp) if keys["KEY_WEAPON_ESP"]: name_esp_str = "%s [%s]" % (p.name, weapon_names.get_weapon_name(p.weapon_num)) else: name_esp_str = p.name draw_string_center(frame.font, feet.x, feet.y - size_y, COLOR_PLAYER_NAME, name_esp_str) if keys["KEY_BOX_SNAPLINE"] and p.enemy and p.alive & ALIVE_FLAG: draw_line_abs(frame.line, read_game.screen_center_x, read_game.resolution_y, feet.x, feet.y, COLOR_BOX_LINE_WIDTH, p.color_esp) # w/h ratio if keys["KEY_BOXESP"]: self.draw_distance_ESP(p.pos, feet.x, feet.y, COLOR_PLAYER_NAME) if keys["KEY_TRIGGERBOT"] and keys["KEY_TRIGGER_BOT_KEY"]: if p.alive & ALIVE_FLAG and p.enemy and p.pose != 0: if (read_game.screen_center_x > feet.x - size_x/2) and (read_game.screen_center_x < feet.x + size_x/2): if (read_game.screen_center_y > feet.y - size_y) and (read_game.screen_center_y < feet.y ): #print "try trigger bot" if self.env.ticks - self.last_trigger_tick > 5: #print "triggerbot fire" self.last_trigger_tick = self.env.ticks windll.User32.keybd_event(TRIGGER_BOT_FIRE_KEY, 0x12, 0, 0) windll.User32.keybd_event(TRIGGER_BOT_FIRE_KEY, 0x12, KEYEVENTF_KEYUP, 0) #======================================================================= # pp = cast(pointer(read_game.mw2_entity), POINTER(c_int)) # for i in range(ENTITIESMAX): # #type = pp[0xE0/4 + 0x204/4*i] # type = pp[0x38 + 0x81*i] # if type == ET_TURRET or type == ET_EXPLOSIVE or type==ET_HELICOPTER or type==ET_PLANE: #======================================================================= for idx in range(ENTITIESMAX): e = read_game.mw2_entity.arr[idx] if e.type == ET_TURRET and e.alive & ALIVE_FLAG and keys["KEY_BOXESP"]: if e.owner_turret >= 0 and e.owner_turret < PLAYERMAX: self.env.tracker.track_entity(idx, e.owner_turret) if read_game.player[e.owner_turret].enemy: head_pos = VECTOR(e.pos.x, e.pos.y, e.pos.z + 20) # eyepos of standing player feet = read_game.world_to_screen(e.pos) head = read_game.world_to_screen(head_pos) if feet and head: size_y = feet.y - head.y if size_y < 5: size_y = 5 size_x = size_y / 2.75 draw_box(frame.line, feet.x - size_x/2, feet.y, size_x, -size_y, COLOR_BOX_OUTER_WIDTH, COLOR_SENTRY) elif e.type == ET_EXPLOSIVE and e.alive & ALIVE_FLAG: #self.draw_explosive(e) self.track_explosive(idx) elif (e.type == ET_HELICOPTER or e.type == ET_PLANE) and e.alive & ALIVE_FLAG and keys["KEY_BOXESP"]: if e.owner_air >= 0 and e.owner_air < PLAYERMAX: self.env.tracker.track_entity(idx, e.owner_air) if e.type == ET_PLANE or read_game.player[e.owner_air].enemy: # all planes are shown because we don't know if they are enemies head_pos = VECTOR(e.pos.x, e.pos.y, e.pos.z + 100) # eyepos of standing player feet = read_game.world_to_screen(e.pos) head = read_game.world_to_screen(head_pos) if feet and head: size_y = feet.y - head.y if size_y < 10: size_y = 10 size_x = size_y draw_box(frame.line, feet.x - size_x/2, feet.y, size_x, -size_y, COLOR_BOX_OUTER_WIDTH, COLOR_PLANE) if keys["KEY_BOX_SNAPLINE"]: draw_line_abs(frame.line, read_game.screen_center_x, read_game.resolution_y, feet.x, feet.y, COLOR_BOX_LINE_WIDTH, COLOR_PLANE) self.loop_tracked_explo()
vehicle_model.add(Dense(1470)) vehicle_model.summary() load_weights(vehicle_model, './yolo-tiny.weights') image = plt.imread(filename) image_crop = image[300:650, 500:, :] resized = cv2.resize(image_crop, (448, 448)) batch = np.transpose(resized, (2, 0, 1)) batch = 2 * (batch / 255.) - 1 batch = np.expand_dims(batch, axis=0) out = vehicle_model.predict(batch) boxes = yolo_net_out_to_car_boxes(out[0], threshold=0.12) #Parameter Tuning f, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6)) ax1.imshow(image) ax2.imshow(draw_box(boxes, plt.imread(filename), [[500, 1280], [300, 650]])) plt.show() def frame_function(image): crop = image[300:650, 500:, :] resized = cv2.resize(crop, (448, 448)) batch = np.array([resized[:, :, 0], resized[:, :, 1], resized[:, :, 2]]) batch = 2 * (batch / 255.) - 1 batch = np.expand_dims(batch, axis=0) out = vehicle_model.predict(batch) boxes = yolo_net_out_to_car_boxes(out[0], threshold=0.12) #Parameter Tuning return draw_box(boxes, image, [[500, 1280], [300, 650]])