files = [value] image_lib = FLAGS.image_lib.lower() if image_lib == 'pil': resize_crop = PILResizeCrop() else: resize_crop = OpenCVResizeCrop() for i, line in enumerate(files): try: line = line.replace(FLAGS.input_folder, '').strip() line = line.split() image_file_name = line[0] input_file = os.path.join(FLAGS.input_folder, image_file_name) output_file = os.path.join(FLAGS.output_folder, image_file_name) output_dir = output_file[:output_file.rfind('/')] if not os.path.exists(output_dir): os.makedirs(output_dir) feat = resize_crop.resize_and_crop_image(input_file, output_file, FLAGS.output_side_length) except Exception, e: # we ignore the exception (maybe the image is corrupted?) print line, Exception, e yield value, FLAGS.output_folder mapreducer.REGISTER_DEFAULT_MAPPER(ResizeCropImagesMapper) mapreducer.REGISTER_DEFAULT_READER(mapreducer.FileReader) mapreducer.REGISTER_DEFAULT_WRITER(mapreducer.FileWriter) if __name__ == '__main__': launcher.launch()
os.makedirs(FLAGS.feature_dir) except OSError: pass def read_image(self, name): """Reads the image and does the manipulation """ img = datasets.imread_rgb(name) return datasets.manipulate(img, None, None, None, CENTER_CROP) def map(self, key, value): """key will be dummy, and value will be the image filename """ imagename = os.path.basename(value) feature = self._conv.process( \ self.read_image(os.path.join(FLAGS.image_dir, value)),\ convbuffer = self._buffer) np.save(os.path.join(FLAGS.feature_dir, imagename), feature) yield self._hostname, imagename mapreducer.REGISTER_DEFAULT_MAPPER(FeatureExtractionMapper) # for Reduce, we will simply use the identity reducer. mapreducer.REGISTER_DEFAULT_REDUCER(mapreducer.IdentityReducer) mapreducer.REGISTER_DEFAULT_READER(mapreducer.FileReader) mapreducer.REGISTER_DEFAULT_WRITER(mapreducer.PickleWriter) if __name__ == "__main__": launcher.launch()
# mapreducer.REGISTER_DEFAULT_REDUCER(PygistReducer) mapreducer.REGISTER_REDUCER(PygistReducer) mapreducer.REGISTER_DEFAULT_READER(mapreducer.FileReader) mapreducer.REGISTER_DEFAULT_WRITER(mapreducer.FileWriter) if __name__ == "__main__": import time sys.argv.append('--input=in.txt') sys.argv.append('--output=out.txt') sys.argv.append('--input_folder=./input') sys.argv.append('--output_folder=./output') sys.argv.append('--mapper=PygistMapper') sys.argv.append('--reducer=PygistReducer') sys.argv.append('--num_clients=3') start_time = time.time() launcher.launch(sys.argv) print time.time() - start_time start_time = time.time() dirs = ['./input/cat', './input/dog', './input/human'] for ddir in dirs: files = glob.glob(os.path.join(ddir, '*.jpg')) files.sort() features = np.zeros((len(files), FEA_DIM), dtype=FEA_TYPE) for i, f in enumerate(files): feat = process_image(f) features[i] = feat outname = str(uuid.uuid4()) + '.npy' np.save(os.path.join('./output', outname), features) print time.time() - start_time