def get_detector_image_generator(labels, width, height, augmenter=None, area_threshold=0.5): """Generated augmented (image, lines) tuples from a list of (filepath, lines, confidence) tuples. Confidence is not used right now but is included for a future release that uses semi-supervised data. Args: labels: A list of (image, lines, confience) tuples. augmenter: An augmenter to apply to the images. width: The width to use for output images height: The height to use for output images area_threshold: The area threshold to use to keep characters in augmented images. """ labels = labels.copy() for index in itertools.cycle(range(len(labels))): if index == 0: random.shuffle(labels) image_filepath, lines = labels[index] image = tools.read(image_filepath) if augmenter is not None: image, lines = tools.augment(boxes=lines, boxes_format='lines', image=image, area_threshold=area_threshold, augmenter=augmenter) image, scale = tools.fit(image, width=width, height=height, mode='letterbox', return_scale=True) lines = tools.adjust_boxes(boxes=lines, boxes_format='lines', scale=scale) bboxes = [line[0] for line in lines] words = [line[1] for line in lines] words = ''.join(words) yield image[np.newaxis, ...], np.array(bboxes)[np.newaxis, ...],\ np.array(words)[np.newaxis, ... ], np.ones((image.shape[0], image.shape[1]), np.float32)[np.newaxis, ...],\ np.ones(len(words), np.float32)[np.newaxis, ...]
def main(): if tf.__version__.split('.')[0] != "1": raise Exception("Tensorflow version 1 required") if a.seed is None: a.seed = random.randint(0, 2**31 - 1) tf.set_random_seed(a.seed) np.random.seed(a.seed) random.seed(a.seed) if not os.path.exists(a.output_dir): os.makedirs(a.output_dir) #%% test if a.mode == "test" or a.mode == "export": if a.checkpoint is None: raise Exception("checkpoint required for test mode") # load some options from the checkpoint options = {"which_direction", "ngf", "ndf", "lab_colorization"} with open(os.path.join(a.checkpoint, "options.json")) as f: for key, val in json.loads(f.read()).items(): if key in options: print("loaded", key, "=", val) setattr(a, key, val) # disable these features in test mode a.scale_size = CROP_SIZE a.flip = False #%% for k, v in a._get_kwargs(): print(k, "=", v) with open(os.path.join(a.output_dir, "options.json"), "w") as f: f.write(json.dumps(vars(a), sort_keys=True, indent=4)) #%% export the meta if a.mode == "export": # export the generator to a meta graph that can be imported later for standalone generation if a.lab_colorization: raise Exception("export not supported for lab_colorization") input = tf.placeholder(tf.string, shape=[1]) input_data = tf.decode_base64(input[0]) input_image = tf.image.decode_png(input_data) # remove alpha channel if present #if true, excute the former ,otherwise the latter input_image = tf.cond(tf.equal(tf.shape(input_image)[2], 4), lambda: input_image[:, :, :3], lambda: input_image) # convert grayscale to RGB input_image = tf.cond(tf.equal(tf.shape(input_image)[2], 1), lambda: tf.image.grayscale_to_rgb(input_image), lambda: input_image) input_image = tf.image.convert_image_dtype(input_image, dtype=tf.float32) input_image.set_shape([CROP_SIZE, CROP_SIZE, 3]) batch_input = tf.expand_dims(input_image, axis=0) with tf.variable_scope("generator"): batch_output = tools.deprocess( create_generator(tools.preprocess(batch_input), 3)) output_image = tf.image.convert_image_dtype(batch_output, dtype=tf.uint8)[0] if a.output_filetype == "png": output_data = tf.image.encode_png(output_image) elif a.output_filetype == "jpeg": output_data = tf.image.encode_jpeg(output_image, quality=80) else: raise Exception("invalid filetype") output = tf.convert_to_tensor([tf.encode_base64(output_data)]) key = tf.placeholder(tf.string, shape=[1]) inputs = {"key": key.name, "input": input.name} tf.add_to_collection("inputs", json.dumps(inputs)) outputs = { "key": tf.identity(key).name, "output": output.name, } tf.add_to_collection("outputs", json.dumps(outputs)) init_op = tf.global_variables_initializer() restore_saver = tf.train.Saver() export_saver = tf.train.Saver() with tf.Session() as sess: sess.run(init_op) print("loading model from checkpoint") checkpoint = tf.train.latest_checkpoint(a.checkpoint) restore_saver.restore(sess, checkpoint) print("exporting model") export_saver.export_meta_graph( filename=os.path.join(a.output_dir, "export.meta")) export_saver.save(sess, os.path.join(a.output_dir, "export"), write_meta_graph=False) return #%% examples = load_examples(a) print("examples count = %d" % examples.count) # inputs and targets are [batch_size, height, width, channels] model = create_model(examples.inputs, examples.targets, a) # undo colorization splitting on images that we use for display/output if a.lab_colorization: if a.which_direction == "AtoB": # inputs is brightness, this will be handled fine as a grayscale image # need to augment targets and outputs with brightness targets = tools.augment(examples.targets, examples.inputs) outputs = tools.augment(model.outputs, examples.inputs) # inputs can be deprocessed normally and handled as if they are single channel # grayscale images inputs = tools.deprocess(examples.inputs) elif a.which_direction == "BtoA": # inputs will be color channels only, get brightness from targets inputs = tools.augment(examples.inputs, examples.targets) targets = tools.deprocess(examples.targets) outputs = tools.deprocess(model.outputs) else: raise Exception("invalid direction") else: inputs = tools.deprocess(examples.inputs) targets = tools.deprocess(examples.targets) outputs = tools.deprocess(model.outputs) # reverse any processing on images so they can be written to disk or displayed to user with tf.name_scope("convert_inputs"): converted_inputs = convert(inputs) with tf.name_scope("convert_targets"): converted_targets = convert(targets) with tf.name_scope("convert_outputs"): converted_outputs = convert(outputs) with tf.name_scope("encode_images"): display_fetches = { "paths": examples.paths, "inputs": tf.map_fn(tf.image.encode_png, converted_inputs, dtype=tf.string, name="input_pngs"), "targets": tf.map_fn(tf.image.encode_png, converted_targets, dtype=tf.string, name="target_pngs"), "outputs": tf.map_fn(tf.image.encode_png, converted_outputs, dtype=tf.string, name="output_pngs"), } # summaries with tf.name_scope("inputs_summary"): tf.summary.image("inputs", converted_inputs) with tf.name_scope("targets_summary"): tf.summary.image("targets", converted_targets) with tf.name_scope("outputs_summary"): tf.summary.image("outputs", converted_outputs) with tf.name_scope("predict_real_summary"): tf.summary.image( "predict_real", tf.image.convert_image_dtype(model.predict_real, dtype=tf.uint8)) with tf.name_scope("predict_fake_summary"): tf.summary.image( "predict_fake", tf.image.convert_image_dtype(model.predict_fake, dtype=tf.uint8)) tf.summary.scalar("discriminator_loss", model.discrim_loss) tf.summary.scalar("generator_loss_GAN", model.gen_loss_GAN) tf.summary.scalar("generator_loss_L1", model.gen_loss_L1) for var in tf.trainable_variables(): tf.summary.histogram(var.op.name + "/values", var) for grad, var in model.discrim_grads_and_vars + model.gen_grads_and_vars: tf.summary.histogram(var.op.name + "/gradients", grad) with tf.name_scope("parameter_count"): parameter_count = tf.reduce_sum( [tf.reduce_prod(tf.shape(v)) for v in tf.trainable_variables()]) saver = tf.train.Saver(max_to_keep=1) logdir = a.output_dir if (a.trace_freq > 0 or a.summary_freq > 0) else None sv = tf.train.Supervisor(logdir=logdir, save_summaries_secs=0, saver=None) with sv.managed_session() as sess: print("parameter_count =", sess.run(parameter_count)) if a.checkpoint is not None: print("loading model from checkpoint") checkpoint = tf.train.latest_checkpoint(a.checkpoint) saver.restore(sess, checkpoint) max_steps = 2**32 if a.max_epochs is not None: max_steps = examples.steps_per_epoch * a.max_epochs if a.max_steps is not None: max_steps = a.max_steps if a.mode == "test": # testing # at most, process the test data once max_steps = min(examples.steps_per_epoch, max_steps) for step in range(max_steps): results = sess.run(display_fetches) filesets = save_images(results) for i, f in enumerate(filesets): print("evaluated image", f["name"]) index_path = append_index(filesets) print("wrote index at", index_path) else: # training start = time.time() for step in range(max_steps): def should(freq): return freq > 0 and ((step + 1) % freq == 0 or step == max_steps - 1) options = None run_metadata = None if should(a.trace_freq): options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() fetches = { "train": model.train, "global_step": sv.global_step, } if should(a.progress_freq): fetches["discrim_loss"] = model.discrim_loss fetches["gen_loss_GAN"] = model.gen_loss_GAN fetches["gen_loss_L1"] = model.gen_loss_L1 if should(a.summary_freq): fetches["summary"] = sv.summary_op if should(a.display_freq): fetches["display"] = display_fetches results = sess.run(fetches, options=options, run_metadata=run_metadata) if should(a.summary_freq): print("recording summary") sv.summary_writer.add_summary(results["summary"], results["global_step"]) if should(a.display_freq): print("saving display images") filesets = save_images(results["display"], step=results["global_step"]) append_index(filesets, step=True) if should(a.trace_freq): print("recording trace") sv.summary_writer.add_run_metadata( run_metadata, "step_%d" % results["global_step"]) if should(a.progress_freq): # global_step will have the correct step count if we resume from a checkpoint train_epoch = math.ceil(results["global_step"] / examples.steps_per_epoch) train_step = (results["global_step"] - 1) % examples.steps_per_epoch + 1 rate = (step + 1) * a.batch_size / (time.time() - start) remaining = (max_steps - step) * a.batch_size / rate print( "progress epoch %d step %d image/sec %0.1f remaining %dm" % (train_epoch, train_step, rate, remaining / 60)) print("discrim_loss", results["discrim_loss"]) print("gen_loss_GAN", results["gen_loss_GAN"]) print("gen_loss_L1", results["gen_loss_L1"]) if should(a.save_freq): print("saving model") saver.save(sess, os.path.join(a.output_dir, "model"), global_step=sv.global_step) if sv.should_stop(): break
visualize(coposition, category_id_to_name) #print('here') # PREPARE THE DATASET FOR TRAINING imageSize = [1920, 1080, 3] trainingImageSize = [640, 360, 3] path = "C:/Users/Nucelles 3.0/Documents/BICS/BSP S2/Project/DATA/testing" category_id_to_name = {1: "License Plate"} x_test, y_test = prepareDataset(path, 50, trainingImageSize, 500) i = 1 predFormatted = dataAugmentFormat(x_test[i], y_test[i]*255) resizedComposedImage = augment([Resize(p=1, height=144, width=256)]) finalComposedImage = resizedComposedImage(**predFormatted) showPrediction(finalComposedImage) model = load_model("model_30-03-2020_01-02-36_PM.h5") results = model.evaluate(x_test, y_test, batch_size=128) print('test loss, test acc:', results) # score = model.evaluate(x_train, y_train) # print("%s: %.2f%%" % (model.metrics_names[1], score[1]*100)) toShow = 3
def get_detector_image_generator(labels, width, height, augmenter=None, area_threshold=0.5, focused=False, min_area=None): """Generated augmented (image, lines) tuples from a list of (filepath, lines, confidence) tuples. Confidence is not used right now but is included for a future release that uses semi-supervised data. Args: labels: A list of (image, lines, confience) tuples. augmenter: An augmenter to apply to the images. width: The width to use for output images height: The height to use for output images area_threshold: The area threshold to use to keep characters in augmented images. min_area: The minimum area for a character to be included. focused: Whether to pre-crop images to width/height containing a region containing text. """ labels = labels.copy() for index in itertools.cycle(range(len(labels))): if index == 0: random.shuffle(labels) image_filepath, lines, confidence = labels[index] # print(image_filepath) image = tools.read(image_filepath) if augmenter is not None: image, lines = tools.augment(boxes=lines, boxes_format='lines', image=image, area_threshold=area_threshold, min_area=min_area, augmenter=augmenter) if focused: boxes = [tools.combine_line(line)[0] for line in lines] if boxes: selected = np.array(boxes[np.random.choice(len(boxes))]) left, top = selected.min(axis=0).clip(0, np.inf).astype('int') if left > 0: left -= np.random.randint(0, min(left, width / 2)) if top > 0: top -= np.random.randint(0, min(top, height / 2)) image, lines = tools.augment( boxes=lines, augmenter=imgaug.augmenters.Sequential([ imgaug.augmenters.Crop(px=(int(top), 0, 0, int(left))), imgaug.augmenters.CropToFixedSize(width=width, height=height, position='right-bottom') ]), boxes_format='lines', image=image, min_area=min_area, area_threshold=area_threshold) image, scale = tools.fit(image, width=width, height=height, mode='letterbox', return_scale=True) lines = tools.adjust_boxes(boxes=lines, boxes_format='lines', scale=scale) yield image, lines, confidence
def real_data_generator(labels, width, height, augmenter=None, area_threshold=0.5, ): labels = labels.copy() for index in itertools.cycle(range(len(labels))): image_filepath, lines = labels[index] image = tools.read(image_filepath) if augmenter is not None: image, lines = tools.augment(boxes=lines, boxes_format='lines', image=image, area_threshold=area_threshold, augmenter=augmenter) image, scale = tools.fit(image, width=width, height=height, mode='letterbox', return_scale=True) lines = tools.adjust_boxes(boxes=lines, boxes_format='lines', scale=scale) confidence_mask = np.zeros((image.shape[0], image.shape[1]), np.float32) confidences = [] # character_bboxes = np.array([]).reshape(0, 4, 2) # new_words = [] lines_label = [] detector = Detector() if len(lines)==1: lines = lines[0] for i, line in enumerate(lines): word_label = [] word_bbox, word = line[0], line[1] word = word.replace(',', '') word_bbox = np.float32(word_bbox) if len(word_bbox) > 0: for _ in range(len(word_bbox)): if word == '###' or len(word.strip()) == 0: cv2.fillPoly(confidence_mask, [np.int32(word_bbox)], (0)) pursedo_bboxes, bbox_region_scores, confidence = inference_character_box(detector, image, word, word_bbox) confidences.append(confidence) cv2.fillPoly(confidence_mask, [np.int32(word_bbox)], (confidence)) for j in range(len(pursedo_bboxes)): if j>len(word)-1: continue word_label.append((pursedo_bboxes[j], word[j])) lines_label.append(word_label) # new_words.append(word) # character_bboxes = np.concatenate((character_bboxes, pursedo_bboxes), 0) # character_bboxes.append(pursedo_bboxes) yield image, lines_label, 1