def find_key(s, key_length): """ Find the key by scoring several "single byte xor" using the frequency letter. """ s = base64.b64decode(s) transposed_blocks = utils.transpose_blocks(utils.get_blocks(s, key_length), key_length) letter_place = {} key = "" occurence = dict((letter, 0) for letter in string.printable) for i, tblock in enumerate(transposed_blocks): for letter in string.printable: occurence[letter] = 0 xored_tblock = [byte ^ ord(letter) for byte in tblock] for byte in xored_tblock: # the 6 first most frequent letters in english plaintexts http://letterfrequency.org/#english-language-letter-frequency if chr(byte) in "etaoinETAOIN": occurence[letter] += 1 letter_place[i] = occurence.copy() for i, value in letter_place.items( ): #for each dict in letter_place returns the key that contains the greatest score getkey = lambda i, val: [ k for k, v in value.items() if v == (max(value.values())) ] key += max(getkey(i, value)) return key
def construct_blocks_json(self): #print(self.input_entry_list) #print(self.output_entry_list) new_block = ProgramBlock() #print((self.builder.get_object('name_ent')).get()) new_block.name = self.builder.get_object('name_ent').get() new_block.command = self.builder.get_object('command_ent').get() new_block.inputs = [ a.get() for a in self.input_entry_list] prelim_outputs = [ a.get() for a in self.output_entry_list] new_block.outputs = [] prelim_output_types = {} for i in prelim_outputs: this_output_list = i.split('.') new_block.outputs.append(this_output_list[0]) if len(this_output_list) == 1: prelim_output_types.update({this_output_list[0]:""}) else: prelim_output_types.update({this_output_list[0]:this_output_list[1]}) new_block.output_types = prelim_output_types prelim_sbatch_text = self.builder.get_object('sbatch_txtbox').get("1.0",END) new_block.sbatch = prelim_sbatch_text.splitlines() blocks = utils.get_blocks() blocks[new_block.name] = new_block json_loader.write_config("./../programs/blocks.json",blocks) program_compiler.run(new_block.name)
def cut_base_on_dict(self, sentence): word_list, tag_list = [], [] blocks = utils.get_blocks(sentence, utils.RE_NORMAL_HAN) for block in blocks: max_prob_route = self.trie_model.get_max_prob_route(block) max_prob_word_list = [block[max_prob_route[idx]: max_prob_route[idx+1]] \ for idx in range(len(max_prob_route)-1)] continuos_singe_list = self.get_continuos_singe(max_prob_word_list) last_end = 0 for start, end in continuos_singe_list: for pre_word in max_prob_word_list[last_end: start]: word_list.append(pre_word) tag_list.append(self.trie_model.word_value.get(pre_word, {}).get('tag', 'x')) last_end = end continuos_singe_str = ''.join(max_prob_word_list[start: end]) for slices in utils.get_splits(continuos_singe_str, utils.RE_NUNMBER_ENG): #print slices if utils.is_number_or_eng(slices): word_list.append(slices) number_tag = 'm' tag_list.append(number_tag) else: mid_word_list = tag.crf_tag.crfToken(slices) mid_tag_list = tag.crf_tag.crfPos(mid_word_list) word_list.extend(mid_word_list) tag_list.extend(mid_tag_list) for word in max_prob_word_list[last_end: ]: word_list.append(word) tag_list.append(self.trie_model.word_value.get(pre_word, {}).get('tag', 'x')) #tag_list = [self.trie_model.word_value.get(word, {}).get('tag', 'x') \ #for word in word_list] return SegmentPair(word_list, tag_list)
def key_length(s): with open(s, "rb") as txt: s = base64.b64decode(txt.read()) distances = {} for k_length in range(2, 40): blocks = utils.get_blocks(s, k_length) # get the normalized hamming distance of each block with his adjacent block distances[k_length] = sum( hamming_dist(a, b) / k_length for a, b in zip(blocks, blocks[1:])) # get the avg of all normalized hamming distance with the current key length distances[k_length] /= len(blocks) distances = sorted(distances.items(), key=lambda x: x[1]) #Returns the lowest distance using the greatest common divisor if the first two are a multiple of the key_length return distances[0][0] if utils.pgcd(distances[0][0], distances[1][0]) == 1\ else utils.pgcd(distances[0][0], distances[1][0])
def key_length(s): with open(s, "rb") as txt: s = base64.b64decode(txt.read()) distances = {} for k_length in range(2, 40): blocks = utils.get_blocks(s, k_length) # get the normalized hamming distance of each block with his adjacent block distances[k_length] = sum(hamming_dist(a, b)/k_length for a, b in zip(blocks, blocks[1:])) # get the avg of all normalized hamming distance with the current key length distances[k_length] /= len(blocks) distances = sorted(distances.items(), key=lambda x: x[1]) #Returns the lowest distance using the greatest common divisor if the first two are a multiple of the key_length return distances[0][0] if utils.pgcd(distances[0][0], distances[1][0]) == 1\ else utils.pgcd(distances[0][0], distances[1][0])
def find_key(s, key_length): """ Find the key by scoring several "single byte xor" using the frequency letter. """ s = base64.b64decode(s) transposed_blocks = utils.transpose_blocks(utils.get_blocks(s, key_length), key_length) letter_place = {} key = "" occurence = dict((letter, 0) for letter in string.printable) for i, tblock in enumerate(transposed_blocks): for letter in string.printable: occurence[letter] = 0 xored_tblock = [byte ^ ord(letter) for byte in tblock] for byte in xored_tblock: # the 6 first most frequent letters in english plaintexts http://letterfrequency.org/#english-language-letter-frequency if chr(byte) in "etaoinETAOIN": occurence[letter] += 1 letter_place[i] = occurence.copy() for i, value in letter_place.items(): #for each dict in letter_place returns the key that contains the greatest score getkey = lambda i, val: [k for k,v in value.items() if v == (max(value.values()))] key += max(getkey(i, value)) return key
def cut_base_on_dict(self, sentence): word_list, tag_list = [], [] blocks = utils.get_blocks(sentence, utils.RE_NORMAL_HAN) for block in blocks: max_prob_route = self.trie_model.get_max_prob_route(block) max_prob_word_list = [block[max_prob_route[idx]: max_prob_route[idx+1]] \ for idx in range(len(max_prob_route)-1)] continuos_singe_list = self.get_continuos_singe(max_prob_word_list) last_end = 0 for start, end in continuos_singe_list: for pre_word in max_prob_word_list[last_end:start]: word_list.append(pre_word) tag_list.append( self.trie_model.word_value.get(pre_word, {}).get('tag', 'x')) last_end = end continuos_singe_str = ''.join(max_prob_word_list[start:end]) for slices in utils.get_splits(continuos_singe_str, utils.RE_NUNMBER_ENG): #print slices if utils.is_number_or_eng(slices): word_list.append(slices) number_tag = 'm' tag_list.append(number_tag) else: mid_word_list = tag.crf_tag.crfToken(slices) mid_tag_list = tag.crf_tag.crfPos(mid_word_list) word_list.extend(mid_word_list) tag_list.extend(mid_tag_list) for word in max_prob_word_list[last_end:]: word_list.append(word) tag_list.append( self.trie_model.word_value.get(pre_word, {}).get('tag', 'x')) #tag_list = [self.trie_model.word_value.get(word, {}).get('tag', 'x') \ #for word in word_list] return SegmentPair(word_list, tag_list)
if __name__ == '__main__': img_dir = "/home/mahad/tmp/01-protected-retirement-plan-customer-key-features/png" npy_dir = "/home/mahad/tmp/01-protected-retirement-plan-customer-key-features/npy" save_dir = "/tmp" img_files = os.listdir(img_dir) npy_files = os.listdir(npy_dir) for img_file in img_files: img_path = os.path.join(img_dir, img_file) npy_path = os.path.join(npy_dir, Path(img_file).stem + ".npy") img = cv2.imread(img_path) all_boxes = np.load(npy_path, allow_pickle=True).tolist() print() # all_boxes = para_boxes + table_boxes # all_texts = para_texts + table_texts column_blocks = get_blocks((img.shape[0], img.shape[1]), all_boxes) column_blocks_merged = merge_blocks(column_blocks, all_boxes) ordered_boxes = create_order(column_blocks_merged, all_boxes) # ordered_texts = [] # for i in range(0, len(ordered_boxes)): # idx = all_boxes.index(ordered_boxes[i]) # ordered_texts.append(all_texts[idx]) # if idx: # del idx for ordered_box in ordered_boxes: img_draw = draw_boxes(img, [ordered_box]) cv2.imshow('', cv2.resize(img_draw, fx=0.25, fy=0.25, dsize=None)) # cv2.imshow('', img_draw) cv2.waitKey() # # cv2.imwrite("/tmp/" + xml_file.replace("xml", "png"), img_draw)
def setup_batch(request): """Setup method for posting batches and returning the response """ data = {} signer = get_signer() expected_trxn_ids = [] expected_batch_ids = [] initial_state_length = len(get_state_list()) LOGGER.info("Creating intkey transactions with set operations") txns = [ create_intkey_transaction("set", 'a', 0, [], signer), ] for txn in txns: data = MessageToDict(txn, including_default_value_fields=True, preserving_proto_field_name=True) trxn_id = data['header_signature'] expected_trxn_ids.append(trxn_id) data['expected_trxn_ids'] = expected_trxn_ids LOGGER.info("Creating batches for transactions 1trn/batch") batches = [create_batch([txn], signer) for txn in txns] for batch in batches: data = MessageToDict(batch, including_default_value_fields=True, preserving_proto_field_name=True) batch_id = data['header_signature'] expected_batch_ids.append(batch_id) data['expected_batch_ids'] = expected_batch_ids data['signer_key'] = signer.get_public_key().as_hex() post_batch_list = [ BatchList(batches=[batch]).SerializeToString() for batch in batches ] LOGGER.info("Submitting batches to the handlers") for batch in post_batch_list: try: response = post_batch(batch) except urllib.error.HTTPError as error: LOGGER.info("Rest Api is not reachable") data = json.loads(error.fp.read().decode('utf-8')) LOGGER.info(data['error']['title']) LOGGER.info(data['error']['message']) block_list = get_blocks() data['block_list'] = block_list block_ids = [block['header_signature'] for block in block_list] data['block_ids'] = block_ids batch_ids = [block['header']['batch_ids'][0] for block in block_list] data['batch_ids'] = batch_ids expected_head_id = block_ids[0] data['expected_head_id'] = expected_head_id yield data
xml_path = os.path.join(xml_dir, xml_file) pdf_path = os.path.join(pdf_dir, Path(xml_file).stem + ".pdf") xml_data = get_raw_data(xml_path) for page in xml_data: para_boxes = page["para_boxes"] para_texts = page["para_texts"] para_boxes, para_texts = remove_empty(para_boxes, para_texts) tables = page["tables"] table_boxes = [tt["bbox"] for tt in tables] table_texts = [tt["rows"] for tt in tables] img = pdf2image.convert_from_path(pdf_path, size=(page["width"], page["height"]), first_page=page["page_number"], last_page=page["page_number"]) img = np.asarray(img[0]) all_boxes = para_boxes + table_boxes all_texts = para_texts + table_texts column_blocks = get_blocks((page["height"], page["width"]), all_boxes) column_blocks_merged = merge_blocks(column_blocks, all_boxes) ordered_boxes = create_order(column_blocks_merged, all_boxes) ordered_texts = [] for i in range(0, len(ordered_boxes)): idx = all_boxes.index(ordered_boxes[i]) ordered_texts.append(all_texts[idx]) if idx: del idx for i in range(0, len(ordered_boxes)): if not isinstance(ordered_texts[i], list): img_draw = draw_boxes(img, [ordered_boxes[i]]) cv2.imshow('', cv2.resize(img_draw, fx=0.25, fy=0.25, dsize=None)) cv2.waitKey() else: for row in ordered_texts[i]: