예제 #1
0
def get_saved_classifier():
    '''Getting classifier from saved data if possible or training/generating new'''
    data_dir = util.get_data_dir()
    if not os.path.exists(data_dir):
        os.mkdir(data_dir)
    #Trying to load saved neural network coefficients
    if os.path.exists(data_dir + "/" + config.network_name + ".npy"):
        input_layer = config.sample_h * config.sample_w
        outer_layer = config.character_number
        neural_classifier = neural.NeuralClassfier(input_layer,
                                                   config.hidden_layer,
                                                   outer_layer,
                                                   config.reg,
                                                   random_seed=config.seed)
        neural_classifier.weights = numpy.load(data_dir + "/" +
                                               config.network_name + ".npy")
        return neural_classifier

    #Trying to load saved training data
    if (os.path.exists(data_dir + "/X.npy")
            and os.path.exists(data_dir + "/y.npy")):
        X = numpy.load(data_dir + "/X.npy")
        y = numpy.load(data_dir + "/y.npy")
    else:
        image_dir = util.get_image_dir()
        if not os.path.exists(image_dir):
            os.mkdir(image_dir)
        if len(glob.glob(image_dir + "/*.gif")) < config.gen_train_size / 2:
            debug("Generating recognized captcha images using perl script.")
            generate_captcha(config.gen_train_size)
        debug(
            "Saved training data is not found. Generating new by segmentating images."
        )
        X, y = segment.extract_features()
        numpy.save(data_dir + "/X.npy", X)
        numpy.save(data_dir + "/y.npy", y)

    debug("Network coefficients are not found. Training new neural network.")
    neural_classifier = neural.train_network(X, y)
    numpy.save(data_dir + "/" + config.network_name + ".npy",
               neural_classifier.weights)
    debug("Selfchecking full captcha files.")
    accuracy = test.check_labeled_dir(neural_classifier,
                                      util.get_image_dir(),
                                      limit=100)
    debug("Accuracy on generated set: {}".format(accuracy))
    return neural_classifier
예제 #2
0
def get_buildfile_path(path: str, image_name: str) -> str:
    '''
    Returns the path of the buildfile.
    :param path: The path of the image directory.
    '''
    image_dir = util.get_image_dir(path, image_name)
    buildfile = os.path.join(image_dir, default.Config.BUILDFILE_NAME.value)

    if not os.path.isfile(buildfile):
        raise ValueError(f'buildfile does not exist: {buildfile}')

    return buildfile
예제 #3
0
    def build(
            self,
            namespace: argparse.Namespace,
            image: str,
            build_config: config.ImageBuildConfig):
        '''
        :param namespace: Namespace passed in via CLI.
        :param image: The image to build.
        :param build_config: the image build configuration.
        :raises: subprocess.CalledProcessError
        '''
        LOGGER.info('Build image')
        LOGGER.info('Dry Run: %s', namespace.dry_run)

        image_dir = util.get_image_dir(namespace.path, image)
        image = build_config.image.full_name
        build_args = builder.get_build_args(build_config)

        command = util.Command(['buildah', 'bud', '-t'])
        command.add_arg(image)
        command.add_args_list('--build-arg', build_args)

        version = build_config.image.tag_build.version
        if version:
            command.add_args('--build-arg', f'VERSION={version}')

        # add build context
        command.add_arg('.')

        LOGGER.info('Image name: %s', image)
        LOGGER.info('Command: %s', ' '.join(command))

        if namespace.dry_run:
            return

        # build
        with util.pushd(image_dir):
            subprocess.check_call(command)

        image_tag = build_config.image.tag
        if namespace.tag_latest and image_tag != 'latest':
            self.tag_latest(image)

        if namespace.push:
            self.push(
                namespace=namespace,
                image=image)
예제 #4
0
def extract_features():
    '''Extract features from all labeled images.'''
    image_dir = util.get_image_dir()
    images = glob.glob(image_dir+"/*.gif")
    characters = []
    var_segments = []

    def extract_single(image_file):
        captcha = re.match("(.*)\.gif",os.path.basename(image_file)).group(1)
        image = util.read_grey_image(image_file)
        return image_to_features(image,captcha)

    X,y = list(zip(*list(map(extract_single,images))))
    #return X,y
    X = numpy.concatenate(X,axis=0)
    y = numpy.concatenate(y,axis=0)
    return (X,y)
예제 #5
0
파일: buildkit.py 프로젝트: spiarh/jojo
    def build(self, namespace: argparse.Namespace, image: str,
              build_config: config.ImageBuildConfig):
        '''
        :param namespace: Namespace passed in via CLI.
        :param image: The image to build.
        :param build_config: the image build configuration.
        :raises: subprocess.CalledProcessError
        '''
        LOGGER.info('Build image')

        image_dir = util.get_image_dir(namespace.path, image)
        image = build_config.image.full_name
        build_args = builder.get_build_args(build_config)

        version = build_config.image.tag_build.version
        if version:
            build_args.append(f'VERSION={version}')

        command = self._create_command(namespace=namespace,
                                       action='build',
                                       build_args=build_args)

        image_names_output = [image]

        image_tag = build_config.image.tag
        if namespace.tag_latest and image_tag != 'latest':
            image_latest = util.set_image_tag_latest(image=image)
            image_names_output.append(image_latest)

        names_output = ','.join([f'name={i}' for i in image_names_output])
        command.add_args(
            name='--output',
            value=f'type=image,{names_output},push={namespace.push}')

        LOGGER.info('Image name: %s', image)
        LOGGER.info('Command: %s', ' '.join(command))

        if namespace.dry_run:
            return

        with util.pushd(image_dir):
            subprocess.check_call(command)
예제 #6
0

def check_labeled_dir(NN, dir, limit=None, shift=0):
    '''Checking accuracy on <captcha>.gif files.'''
    total = 0
    recognized = 0
    if not os.path.exists(dir):
        os.mkdir(dir)
    images = glob.glob(dir + "/*.gif")
    for image_file in images[shift:]:
        total += 1
        captcha_p = wkcaptcha.predict_file(NN, image_file)
        captcha = re.match("(.*)\.gif", os.path.basename(image_file)).group(1)
        if (captcha == captcha_p):
            recognized += 1
        if (limit and total >= limit):
            break
    return recognized / total


if __name__ == '__main__':
    if (len(sys.argv) > 1):
        debug("Accuracy on captcha in {} directory: {}".format(
            sys.argv[1],
            check_labeled_dir(wkcaptcha.get_saved_classifier(), sys.argv[1])))
    else:
        debug("Accuracy on generated set: {}".format(
            check_labeled_dir(wkcaptcha.get_saved_classifier(),
                              util.get_image_dir(),
                              limit=100)))