def saveCharacters(self, characters): characters_hashes = [] # Get characters' hashes for character in characters: m = hashlib.md5() # Get image as a string of 0 and 1 (for 255) and n (for new line) ones_array = map(lambda lst: map(lambda x: "0" if x == 0 else "1", lst), numpy.array(character)) ones_string = "n".join(map(lambda lst: "".join(lst), ones_array)) m.update(ones_string) characters_hashes.append(m.hexdigest()) saveAsCategorized = False if self.options.auto_train: # Parse file name and find code basename = os.path.basename(self._captcha) code = self.getCodeFromString(basename) if len(code) != len(characters): saveAsCategorized = False Logger.error( "Error! Training found {0} chars while in file name are specified {1} chars. Real code is: {2}".format( len(characters), len(code), basename ), 1, ) else: saveAsCategorized = True if self.options.verbose: if saveAsCategorized: Logger.subheader("Saving characters into categorized folders") else: Logger.log("Saving characters into output folder") i = 0 for character in characters: character_hash = characters_hashes[i] if saveAsCategorized: character_symbol = code[i] dst = os.path.join(self.options.mod_dir, "char", character_symbol.lower(), character_hash + ".gif") else: dst = os.path.join(self.options.output_char_dir, character_hash + ".gif") character.save(dst) if saveAsCategorized: Logger.log("Saving {0} into mod folder".format(character_symbol)) i += 1
def crack(self): Logger.info("\nTrain on " + self._captcha) options = self.options if not options.mod: Logger.error("Can't crack without a mod") return None supposed_words = [] processed = self.getImage() processed = self.cleanImage(processed) # Ensure that image is in black and white processed = self.blackAndWhite(processed) # List of images characters = self.getCharacters(processed) trained_chars = self.loadTrainedChars() Logger.subheader("Detected characters") # Go through each character for character in characters: # Transform image to list of lists a_char = self.imageTo2DBinaryList(character) similarity_best = 0.0 similarity_letter = None for char, chars_list in trained_chars.iteritems(): for a_trained_char in chars_list: similarity = self.computeSimilarity(a_char, a_trained_char) if similarity > similarity_best: similarity_best = similarity similarity_letter = char supposed_words.append((similarity_best, similarity_letter)) Logger.log("{} {}%".format(similarity_letter, round(similarity_best, 2))) guess = "" multiple_probability = 1.0 average_probability = 0.0 all_guesed = True for probability, letter in supposed_words: if letter: guess += str(letter) else: guess += "_" all_guesed = False multiple_probability *= probability / 100 average_probability += probability if len(guess): average_probability /= len(guess) Logger.subheader("Results") if all_guesed: Logger.success(guess) else: Logger.info("characters marked with _ are the ones that are not guessed:") Logger.error(guess) Logger.info("{}% Overall probability".format(round(multiple_probability * 100, 2))) Logger.info("{}% Average probability".format(round(average_probability, 2))) if not options.verbose: Logger.log(guess, True) return guess