def training_patches(imnames, npatches, psize, maxdim=None, colour=False, verbose=False): """ Extract patches from images for dictionary training Arguments: imnames: A list of image names from which to extract training patches. npatches: The number (int) of patches to extract from the images maxdim: The maximum dimension of the image in pixels. The image is rescaled if it is larger than this. By default there is no scaling. psize: A int of the size of the square patches to extract verbose: bool, print progress bar Returns: An np.array (npatches, psize**2*3) for RGB or (npatches, psize**2) for grey of flattened image patches. NOTE, the actual npatches found may be less than that requested. """ nimg = len(imnames) ppeimg = int(round(float(npatches) / nimg)) plist = [] # Set up progess updates progbar = Progress(nimg, title='Extracting patches', verbose=verbose) for i, ims in enumerate(imnames): img = imread_resize(ims, maxdim) # read in and resize the image spaceing = max(int(round(img.shape[1] * ppeimg**(-0.5))), 1) # Extract patches and map to grayscale if necessary if (colour == False) and (img.ndim == 3): imgg = rgb2gray(img) plist.append(grid_patches(imgg, psize, spaceing)[0]) else: plist.append(grid_patches(img, psize, spaceing)[0]) progbar.update(i) progbar.finished() patches = np.concatenate(plist, axis=0) return np.reshape(patches, (patches.shape[0], np.prod(patches.shape[1:])))
def training_patches (imnames, npatches, psize, maxdim=None, colour=False, verbose=False): """ Extract patches from images for dictionary training Arguments: imnames: A list of image names from which to extract training patches. npatches: The number (int) of patches to extract from the images maxdim: The maximum dimension of the image in pixels. The image is rescaled if it is larger than this. By default there is no scaling. psize: A int of the size of the square patches to extract verbose: bool, print progress bar Returns: An np.array (npatches, psize**2*3) for RGB or (npatches, psize**2) for grey of flattened image patches. NOTE, the actual npatches found may be less than that requested. """ nimg = len(imnames) ppeimg = int(round(float(npatches)/nimg)) plist = [] # Set up progess updates progbar = Progress(nimg, title='Extracting patches', verbose=verbose) for i, ims in enumerate(imnames): img = imread_resize(ims, maxdim) # read in and resize the image spaceing = max(int(round(img.shape[1] * ppeimg**(-0.5))), 1) # Extract patches and map to grayscale if necessary if (colour == False) and (img.ndim == 3): imgg = rgb2gray(img) plist.append(grid_patches(imgg, psize, spaceing)[0]) else: plist.append(grid_patches(img, psize, spaceing)[0]) progbar.update(i) progbar.finished() patches = np.concatenate(plist, axis=0) return np.reshape(patches, (patches.shape[0], np.prod(patches.shape[1:])))
data = csv.DictReader(fd, delimiter="\t", quotechar='"', escapechar='') for r in data: raw_id = r['raw_id'] # Check if valid with regex match = re.match(r"^(tt)*(?P<id>\d{7,10}).*", raw_id) if not match: progress.count() wrongs.append(raw_id) continue imdb_id = match.group(2) film_node = n['Movie/tt' + imdb_id] # Create a node for dbpedia uri = r['uri'] wiki_node = URIRef(uri) g.add((film_node, n['has' + source + 'Node'], wiki_node)) progress.count() if progress.finished(): break g.serialize(destination=outfile, format='turtle') end = time.time() print('Wrong formatted IMDB IDs found: ', len(wrongs)) print(wrongs) print("Total Items Processed: ", progress.total) print("Total Time: ", end - start) g.close()