def __init__(self, alphabet, stat_file, encoding, file_input=None, text_input=None, top=15000, benchmark=False): self.alphabet = alphabet self.file = file_input self.bench = benchmark self.encoding = encoding self.stat = load_stat(stat_file, encoding) self.word_patterns = get_wordlists_pattern(self.stat["words"].keys()) if text_input: code_text_info = TextInfo(alphabet, encoding, input_text=text_input) elif file_input: code_text_info = TextInfo(alphabet, encoding, input_filename=file_input) else: raise Exception("Cipher's Text Input Error") self.cipher_words_list = code_text_info.find_info( top).make_count_dict() self.patterns = get_wordlists_pattern( make_words_list(self.cipher_words_list["words"], top)) self.temp_subst = create_default_substitution(alphabet) self.key = create_default_substitution(alphabet) self.quadgrams = self.count_quadrams()
def handle_many_files(args, files): """ Process a list of files :param args: :param files: a list of files' names """ handle_one_object(args, files[0]) for i in range(1, len(files)): text_info = TextInfo(args.alph, args.encoding, input_filename=files[i]) count_info = text_info.find_info(args.top) count_info.update_count_info(args.output, text_info.alph, args.top, args.encoding) updated_dict = count_info.make_count_dict() write_json_in_file(args.output, updated_dict, args.encoding)
def learn_single_file(top, update_file = None, file_name=None): print('Learning single file') if file_name: text_info = TextInfo( 'A-Za-z', 'utf-8', input_filename=file_name) count_info = text_info.find_info(top) if update_file is not None: count_info.update_count_info( update_file, text_info.alph, top, 'utf-8') updated_dict = count_info.make_count_dict() else: updated_dict = count_info.make_count_dict() write_json_in_file('result.txt', updated_dict, 'utf-8') print('Done')
def learn_multiple_files(top, files, update_file=None): global workdone print('Learning multiple files: {}'.format(len(files))) for i in range(0, len(files)): text_info = TextInfo('A-Za-z', 'utf-8', input_filename=files[i]) count_info = text_info.find_info(top) if update_file is not None: count_info.update_count_info( update_file, text_info.alph, top, 'utf-8') updated_dict = count_info.make_count_dict() else: updated_dict = count_info.make_count_dict() write_json_in_file('result.txt', updated_dict, 'utf-8') workdone = (i+1) / (len(files)) print("\rProgress: [{0:50s}] {1:.1f}%".format('#' * int(workdone * 50), workdone * 100), end="", flush=True) if workdone == 1: workdone = 0
def __count_score(self, coded_text): score = 0 coded_quadgrams = TextInfo( self.alphabet, self.encoding, input_text=coded_text).find_info(15000).make_ngramms_dict()['4'] for quadgram in coded_quadgrams: if quadgram in self.__example_quadgrams.keys(): score += self.__example_quadgrams[quadgram] else: score += self.__floor return score
def count_precision_coefficient(self, coded_text): score = 0 coded_quadgrams = TextInfo( self.alphabet, self.encoding, input_text=coded_text).find_info(15000).make_ngramms_dict()['4'] for quadgram in coded_quadgrams: if quadgram in self.quadgrams.keys(): score += self.quadgrams[quadgram] else: score += self.floor return score
def handle_one_object(args, file_name=None, text=None): """ Process one file at a time, or a given piece of text :param args: :param file_name: :param text: """ if file_name: text_info = TextInfo(args.alph, args.encoding, input_filename=file_name) elif text: text_info = TextInfo(args.alph, args.encoding, input_text=text) count_info = text_info.find_info(args.top) if args.update_fn: count_info.update_count_info(args.update_fn, text_info.alph, args.top, args.encoding) updated_dict = count_info.make_count_dict() else: updated_dict = count_info.make_count_dict() if args.output: write_json_in_file(args.output, updated_dict, args.encoding) else: pprint(updated_dict)