def __sort_options(self, after, options, layer_index): '''Sort the options (combinations of befores and marks) by comparing their activations with the after result Keyword arguments: after -- image for basis of comparison of the combinations options -- combinations of befores and marks to sort layer_index -- layer at which we are judging everything Returns: Returns a list of the options sorted by how closely their activations match the after image. ''' start_time = time.time() # get activations for after at layer after_act = get_layers_output(self.layers, [self.layer_names[layer_index]], after)[0] after_act = np.einsum('ijkc->c', after_act) target = normalize_L2(after_act) # get activations for options at layer result_acts = [] for option in options: result = option[0] result_f = get_np_arr(result) result_act = get_layers_output(self.layers, [self.layer_names[layer_index]], result_f)[0] result_act = np.einsum('ijkc->c', result_act) result_act = normalize_L2(result_act) result_acts.append(result_act) # calc difference between options and after for comparison error = [] for i, result_act in enumerate(result_acts): error.append(np.sum((result_act - target) ** 2)) # sort and return options in order of best match to after sort_idx = np.argsort(error) options_ordered = [] for i in sort_idx: options_ordered.append(options[i]) print('Found afters matches in --- %s seconds ---' % (time.time() - start_time)) return options_ordered
def __get_mark_matches(self, mark_to_match, n): '''Returns the top n matches to the given mark at layers 2 and 4 from the corpus. We could do every level, but this is faster and is meant to represent low (lines) and high level (closed forms) concepts. Keyword arguments: mark_to_match -- the mark made as an np array n -- number of matches to return Returns: Returns two lists of mark matches (from layer 2 and layer 4) ''' start_time = time.time() f_size2 = layers_meta[1][2] mark2 = cv2.resize(mark_to_match, (f_size2, f_size2), interpolation=cv2.INTER_AREA) mark2_f = get_np_arr(mark2) mark2_acts = get_layers_output(self.layers, ['conv2'], mark2_f)[0] mark2_acts = mark2_acts[0, 0, 0, :] target2 = normalize_L2(mark2_acts) indices2 = self.ANN.get_nn(target2, 2, n) # get images corresponding to indices top_matches2 = [] for idx in indices2: top_match = cv2.normalize(self.imgs2[idx], None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F) top_matches2.append(top_match) f_size4 = layers_meta[3][2] mark4 = cv2.resize(mark_to_match, (f_size4, f_size4), interpolation=cv2.INTER_AREA) mark4_f = get_np_arr(mark4) mark4_acts = get_layers_output(self.layers, ['conv4'], mark4_f)[0] mark4_acts = mark4_acts[0, 2, 2, :] target4 = normalize_L2(mark4_acts) indices4 = self.ANN.get_nn(target4, 4, n) # get images corresponding to indices top_matches4 = [] for idx in indices4: top_match = cv2.normalize(self.imgs4[idx], None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F) top_matches4.append(top_match) print('Fetched mark matches in --- %s seconds ---' % (time.time() - start_time)) return top_matches2, top_matches4
def get_before_match_options(img, shape_to_match, pct_to_keep, layers, layer_name): '''Return a filtered set of the image split into segments matching the shape given Keyword arguments: img -- image to split shape_to_match -- shape to match when splitting pct_to_keep -- percent of the image pieces to keep layers -- layers to use for calculating activations layer_name -- name of layer to get activations for Returns: Returns a list of img pieces, their activations, and their locations ''' start_time = time.time() h, w, c = img.shape img_pieces = [] locations = [] acts = [] ySteps = int(h - shape_to_match[0]) + 1 xSteps = int(w - shape_to_match[1]) + 1 for y in range(0, ySteps): for x in range(0, xSteps): # skip some percentage if random.random() > pct_to_keep: continue # get section of before image x_start = x x_end = x_start + shape_to_match[0] y_start = y y_end = y_start + shape_to_match[1] img_piece = img[y_start:y_end, x_start:x_end] img_pieces.append(img_piece) # record location of this piece location = {'x': x_start, 'y': y_start} locations.append(location) # get activations for piece at specified layer act = get_layers_output(layers, [layer_name], img_piece)[0] # get averages for each channel act_avg = np.einsum('ijkc->c', act) acts.append(act_avg) print('Divided in --- %s seconds ---' % (time.time() - start_time)) return img_pieces, acts, locations
def __get_before_matches(self, imgs, befores, n): '''Get matches from images that are similar to the before images. Keyword arguments: imgs -- the current state of the AI images befores -- the before state where the mark was made on the human canvas at sizes appropriate for layer n -- number of matches to fetch Returns: Returns a list (by layer) of lists of before matches ''' start_time = time.time() # the percent of randomly selected pieces of the image to search. # higher values will make it slower, there are a lot more strides # at lower levels. pcts = [0.0001, 0.001, 0.001, 0.001] # find the best matches for each layer # limit to first 3, because issue finding matches for L4 before_matches_by_layer = [] for i in range(0, 3): before = befores[i] shape_to_match = before.shape img_pieces, acts, locations = get_before_match_options(imgs[i], shape_to_match, pcts[i], self.layers, self.layer_names[i]) # get the activations for this before image before_act = get_layers_output(self.layers, [self.layer_names[i]], before)[0] before_act = np.einsum('ijkc->c', before_act) target = normalize_L2(before_act) # find n closest matches from chopped up image... error = [] for i, act in enumerate(acts): # use Euclidean distance act = normalize_L2(act) error.append(np.sum((act - target) ** 2)) sort_idx = np.argsort(error) # get top matches that hopefully do not overlap n_safe = min(n, len(sort_idx)) top_matches = [] for j in range(n_safe): match_idx = sort_idx[j] location = locations[match_idx] img_piece = img_pieces[match_idx] match = [img_piece, location] # ignore matches that overlap matches we already found if not overlapsInList(match, top_matches): top_matches.append(match) # get the imgs and their locations before_matches_by_layer.append(top_matches) # TODO: Fix issue with L4! # matching does not work well at L4 for some reason # for now, copy L3 matches to L4 and resize before_matches_L4 = [] img_L3 = imgs[2] layer_name3, stride3, f_size3, padding3 = layers_meta[2] layer_name4, stride4, f_size4, padding4 = layers_meta[3] padding = (f_size4 - f_size3) / 2 for before_match in before_matches_by_layer[2]: before_match_img, location = before_match x, y = location['x'] - padding, location['y'] - padding end_x, end_y = x + f_size4, y + f_size4 location = {'x': x, 'y': y} before_match_img = img_L3[x:end_x, y:end_y] h, w, c = before_match_img.shape before_match_img = cv2.copyMakeBorder(before_match_img, 0, (f_size4 - h), 0, (f_size4 - w), cv2.BORDER_CONSTANT, value=[0., 0., 0.]) before_matches_L4.append((before_match_img, location)) before_matches_by_layer.append(before_matches_L4) print('Fetched before matches in --- %s seconds ---' % (time.time() - start_time)) return before_matches_by_layer
def load_corpus_for_layer_2(sample_rate, pct_to_keep, low_threshold=None, low_threshold_pct=1, top_threshold=None, top_threshold_pct=1): '''Load corpus of image pieces to use for low level mark matches Keyword arguments: sample_rate -- percent of images from corpus to keep pct_to_keep -- percent of image segments to keep low_threshold -- the threshold to filter mostly empty image pieces low_threshold_pct -- percent of pieces below the threshold to keep top_threshold -- the threshold to filter really busy image pieces top_threshold_pct -- percent of pieces above the top threshold to keep Returns: Returns a list of img pieces and a corresponding list with their activations at layer 2. ''' start_time = time.time() # load the Sketch-A-Net layers to use for calculating activations layers = load_layers('./data/model_without_order_info_224.mat') # get images from TU Berlin corpus imgs = load_corpus_imgs_for_TU_Berlin(sample_rate) # keep a record of the images filtered by the bottom and top filters and those kept b_imgs = [] t_imgs = [] kept_imgs = [] layer_name, stride, f_size, padding = layers_meta[1] for img in imgs: # split the image up into pieces matching this layer2 size img_segs = split_img_by_receptive_field(img, stride, f_size, padding) for i, img_seg in enumerate(img_segs): # skip some percentage if random.random() > pct_to_keep: continue # calculate how much of the image is markings img_sum = np.sum(img_seg) / (f_size**2) # check low threshold if low_threshold is not None: if img_sum < low_threshold and random.random( ) > low_threshold_pct: b_imgs.append(img_seg) continue # check high threshold if top_threshold is not None: if img_sum > top_threshold and random.random( ) > top_threshold_pct: t_imgs.append(img_seg) continue # keep image kept_imgs.append(img_seg) # save the filtered images to help with tuning of filters save_imgs(b_imgs, 'b_2_imgs_debug') save_imgs(t_imgs, 't_2_imgs_debug') save_imgs(kept_imgs, 'kept_2_imgs_debug') # calculate the activations for the kept images at layer 4 acts = [] for img in kept_imgs: act = get_layers_output(layers, ['conv2'], img)[0] # get averages for each channel act_avg = np.einsum('ijkc->c', act) acts.append(act_avg) print('Loaded corpus in --- %s seconds ---' % (time.time() - start_time)) print('Loaded ' + str(len(kept_imgs)) + ' imgs and ' + str(len(acts)) + ' acts') return kept_imgs, acts
def load_corpus_for_layer_4(sample_rate, low_threshold=None, low_threshold_pct=1, top_threshold=None, top_threshold_pct=1): '''Load corpus of image pieces to use for high level mark matches Keyword arguments: sample_rate -- percent of images from corpus to keep low_threshold -- the threshold to filter mostly empty image pieces low_threshold_pct -- percent of pieces below the threshold to keep top_threshold -- the threshold to filter really busy image pieces top_threshold_pct -- percent of pieces above the top threshold to keep Returns: Returns a list of img pieces and a corresponding list with their activations at layer 4. ''' start_time = time.time() # load the Sketch-A-Net layers to use for calculating activations layers = load_layers('./data/model_without_order_info_224.mat') layer4_size = layers_meta[3][2] # load the preprocessed sketch corpus (contains images from Google "Quick, Draw" and TU Berlin) with open('./data/sketchrnn_corpus.txt', 'rb') as fp: imgs = pickle.load(fp) # keep a record of the images filtered by the bottom and top filters and those kept b_imgs = [] t_imgs = [] kept_imgs = [] for i, img in enumerate(imgs): if i % sample_rate == 0: # cutout the center of the image to test for threshold # this gives us a better filter h, w = img.shape stt = int(h * 0.25) end = int(h * 0.75) cutout_size = int(h * 0.5) img_seg = img[stt:end, stt:end] # calculate how much of the image is markings img_sum = np.sum(img_seg) / (cutout_size**2) # check low threshold if low_threshold is not None: if img_sum < low_threshold and random.random( ) > low_threshold_pct: b_imgs.append(img_seg) continue # check high threshold if top_threshold is not None: if img_sum > top_threshold and random.random( ) > top_threshold_pct: t_imgs.append(img_seg) continue # keep image img = img.reshape(layer4_size, layer4_size, 1) kept_imgs.append(img) # save the filtered images to help with tuning of filters save_imgs(b_imgs, 'b_4_imgs_debug') save_imgs(t_imgs, 't_4_imgs_debug') save_imgs(kept_imgs, 'kept_4_imgs_debug') # calculate the activations for the kept images at layer 4 acts = [] for img in kept_imgs: act = get_layers_output(layers, ['conv4'], img)[0] # get averages for each channel act_avg = np.einsum('ijkc->c', act) acts.append(act_avg) print('Loaded corpus in --- %s seconds ---' % (time.time() - start_time)) print('Loaded ' + str(len(kept_imgs)) + ' imgs and ' + str(len(acts)) + ' acts') return kept_imgs, acts