def get_saved_classifier(): '''Getting classifier from saved data if possible or training/generating new''' data_dir = util.get_data_dir() if not os.path.exists(data_dir): os.mkdir(data_dir) #Trying to load saved neural network coefficients if os.path.exists(data_dir + "/" + config.network_name + ".npy"): input_layer = config.sample_h * config.sample_w outer_layer = config.character_number neural_classifier = neural.NeuralClassfier(input_layer, config.hidden_layer, outer_layer, config.reg, random_seed=config.seed) neural_classifier.weights = numpy.load(data_dir + "/" + config.network_name + ".npy") return neural_classifier #Trying to load saved training data if (os.path.exists(data_dir + "/X.npy") and os.path.exists(data_dir + "/y.npy")): X = numpy.load(data_dir + "/X.npy") y = numpy.load(data_dir + "/y.npy") else: image_dir = util.get_image_dir() if not os.path.exists(image_dir): os.mkdir(image_dir) if len(glob.glob(image_dir + "/*.gif")) < config.gen_train_size / 2: debug("Generating recognized captcha images using perl script.") generate_captcha(config.gen_train_size) debug( "Saved training data is not found. Generating new by segmentating images." ) X, y = segment.extract_features() numpy.save(data_dir + "/X.npy", X) numpy.save(data_dir + "/y.npy", y) debug("Network coefficients are not found. Training new neural network.") neural_classifier = neural.train_network(X, y) numpy.save(data_dir + "/" + config.network_name + ".npy", neural_classifier.weights) debug("Selfchecking full captcha files.") accuracy = test.check_labeled_dir(neural_classifier, util.get_image_dir(), limit=100) debug("Accuracy on generated set: {}".format(accuracy)) return neural_classifier
def get_buildfile_path(path: str, image_name: str) -> str: ''' Returns the path of the buildfile. :param path: The path of the image directory. ''' image_dir = util.get_image_dir(path, image_name) buildfile = os.path.join(image_dir, default.Config.BUILDFILE_NAME.value) if not os.path.isfile(buildfile): raise ValueError(f'buildfile does not exist: {buildfile}') return buildfile
def build( self, namespace: argparse.Namespace, image: str, build_config: config.ImageBuildConfig): ''' :param namespace: Namespace passed in via CLI. :param image: The image to build. :param build_config: the image build configuration. :raises: subprocess.CalledProcessError ''' LOGGER.info('Build image') LOGGER.info('Dry Run: %s', namespace.dry_run) image_dir = util.get_image_dir(namespace.path, image) image = build_config.image.full_name build_args = builder.get_build_args(build_config) command = util.Command(['buildah', 'bud', '-t']) command.add_arg(image) command.add_args_list('--build-arg', build_args) version = build_config.image.tag_build.version if version: command.add_args('--build-arg', f'VERSION={version}') # add build context command.add_arg('.') LOGGER.info('Image name: %s', image) LOGGER.info('Command: %s', ' '.join(command)) if namespace.dry_run: return # build with util.pushd(image_dir): subprocess.check_call(command) image_tag = build_config.image.tag if namespace.tag_latest and image_tag != 'latest': self.tag_latest(image) if namespace.push: self.push( namespace=namespace, image=image)
def extract_features(): '''Extract features from all labeled images.''' image_dir = util.get_image_dir() images = glob.glob(image_dir+"/*.gif") characters = [] var_segments = [] def extract_single(image_file): captcha = re.match("(.*)\.gif",os.path.basename(image_file)).group(1) image = util.read_grey_image(image_file) return image_to_features(image,captcha) X,y = list(zip(*list(map(extract_single,images)))) #return X,y X = numpy.concatenate(X,axis=0) y = numpy.concatenate(y,axis=0) return (X,y)
def build(self, namespace: argparse.Namespace, image: str, build_config: config.ImageBuildConfig): ''' :param namespace: Namespace passed in via CLI. :param image: The image to build. :param build_config: the image build configuration. :raises: subprocess.CalledProcessError ''' LOGGER.info('Build image') image_dir = util.get_image_dir(namespace.path, image) image = build_config.image.full_name build_args = builder.get_build_args(build_config) version = build_config.image.tag_build.version if version: build_args.append(f'VERSION={version}') command = self._create_command(namespace=namespace, action='build', build_args=build_args) image_names_output = [image] image_tag = build_config.image.tag if namespace.tag_latest and image_tag != 'latest': image_latest = util.set_image_tag_latest(image=image) image_names_output.append(image_latest) names_output = ','.join([f'name={i}' for i in image_names_output]) command.add_args( name='--output', value=f'type=image,{names_output},push={namespace.push}') LOGGER.info('Image name: %s', image) LOGGER.info('Command: %s', ' '.join(command)) if namespace.dry_run: return with util.pushd(image_dir): subprocess.check_call(command)
def check_labeled_dir(NN, dir, limit=None, shift=0): '''Checking accuracy on <captcha>.gif files.''' total = 0 recognized = 0 if not os.path.exists(dir): os.mkdir(dir) images = glob.glob(dir + "/*.gif") for image_file in images[shift:]: total += 1 captcha_p = wkcaptcha.predict_file(NN, image_file) captcha = re.match("(.*)\.gif", os.path.basename(image_file)).group(1) if (captcha == captcha_p): recognized += 1 if (limit and total >= limit): break return recognized / total if __name__ == '__main__': if (len(sys.argv) > 1): debug("Accuracy on captcha in {} directory: {}".format( sys.argv[1], check_labeled_dir(wkcaptcha.get_saved_classifier(), sys.argv[1]))) else: debug("Accuracy on generated set: {}".format( check_labeled_dir(wkcaptcha.get_saved_classifier(), util.get_image_dir(), limit=100)))