def extract(self,derived_key): # Initialize state self.tj_count = 0 self.tj_count_valid = 0 tjs = [] # Get the numerals from the key nums = encoding.encode_key(derived_key,self.nbits) # Initiate chaotic map if self.improve: ch_two = random.Random(derived_key) else: ch_two = chaos.Chaotic(self.mu_two,nums) # Open input file # # NB: Only works for valid PDF files self.l.info("Input file: \"" + self.input + "\"") driver.uncompress(self.input,self.input+".qdf") embedding_file = open(self.input+".qdf",encoding="iso-8859-1") # Determine start position if 0:#self.improve:#TODO: fix # Parse file for line in embedding_file: # Parse line for TJ blocks m = re.search(r'\[(.*)\][ ]?TJ',line) if m != None: tjs += self.get_tjs(m.group(1)) start = int(tjs.__len__() * ch_two.random()) embedding_file.seek(0,0) tjs = [] else: start = 0 # Parse file self.l.info("Extracting data, please wait...") self.print_conf_extract(start,nums) for line in embedding_file: # Parse line for TJ blocks # -> Look for a TJ block, starting at current position m = re.search(r'\[(.*)\][ ]?TJ',line) if m != None: # A TJ block is found # -> Try to extract data from TJ block tjs += self.extract_line(line,ch_two) # Close file and clean up embedding_file.close() driver.delete(self.input+".qdf") # Extract data from TJ ops normalrange = 1 # NB: Hack for custom range (do not shift by 1) # TODO: do that better and include in docs if self.customrange: normalrange = 0 # Normalize values # # TODO: check if really necessary tjs = list(map(lambda x: (x - normalrange) % (2**self.nbits), tjs)) # Wrap values around in order # to move easily inside # # TODO: use modulo calculation instead tjs_ = tjs + tjs # Start extracting after CheckStr # # NB: CheckStr is 20 numerals long k = start + 20 # Go through the list of numerals c = 0 while c < tjs.__len__(): # Look for end position FlagStr # at current position # # NB: FlagStr is 20 numerals long if nums == tjs_[k:k+20]: # End position is found, register it end = k + 20 - 1 self.l.debug("End position found",end) # NB: length = end - start + 1 # Extract CheckStr checkstr = tjs_[start:start + 20] # Extract data embedded = tjs_[start + 20:k] # Break the loop c = tjs.__len__() # FlagStr not found # -> Look further c += 1 k += 1 # Check is FlagStr was found # # TODO: check that if c != tjs.__len__() + 1: # FlagStr not found # -> Fail self.l.error("Ending code FlagStr not found") return -1 # FlagStr was found # -> Decode embedded data self.l.info("Done extracting.") self.l.info("Decoding data, please wait...") # Go through the list of numerals # containing the data k = 0 bin_str = "" while k < embedded.__len__(): # Decode the next numeral into a binary string bin = encoding.num_to_binstr(embedded[k],self.nbits) # Check if it was the last numeral if k == embedded.__len__() - 1: # Processing the last numeral # -> Only take the bits needed bin_str += bin[bin.__len__() - self.nbits:] else: # Not processing the last numeral # -> Take all bits bin_str += bin # -> Keep decoding k += 1 # Decode the full binary string into bytes emb_chars = encoding.decode(bin_str) emb_str = b"" for ch in emb_chars: emb_str += ch self.debug_extract_print_sum(encoding.encode_key(emb_str,self.nbits),bin_str,checkstr,embedded,emb_str) # Check integrity if encoding.digest_to_nums(emb_str, self.nbits) != checkstr: # Data coes not match embedded checksum # -> Fail self.l.error("CheckStr does not match embedded data") return -1 # Data matches checksum self.l.info("Done decoding.") # -> Produce output file output_file = open(self.output,"wb") output_file.write(emb_str) output_file.close() # All finished self.l.info("Output file: \"" + self.output + "\"") return 0
def extract(self, derived_key): # Initialize state self.tj_count = 0 self.tj_count_valid = 0 tjs = [] # Get the numerals from the key nums = encoding.encode_key(derived_key, self.nbits) # Initiate chaotic map if self.improve: ch_two = random.Random(derived_key) else: ch_two = chaos.Chaotic(self.mu_two, nums) # Open input file # # NB: Only works for valid PDF files self.l.info("Input file: \"" + self.input + "\"") driver.uncompress(self.input, self.input + ".qdf") embedding_file = open(self.input + ".qdf", encoding="iso-8859-1") # Determine start position if 0: #self.improve:#TODO: fix # Parse file for line in embedding_file: # Parse line for TJ blocks m = re.search(r'\[(.*)\][ ]?TJ', line) if m != None: tjs += self.get_tjs(m.group(1)) start = int(tjs.__len__() * ch_two.random()) embedding_file.seek(0, 0) tjs = [] else: start = 0 # Parse file self.l.info("Extracting data, please wait...") self.print_conf_extract(start, nums) for line in embedding_file: # Parse line for TJ blocks # -> Look for a TJ block, starting at current position m = re.search(r'\[(.*)\][ ]?TJ', line) if m != None: # A TJ block is found # -> Try to extract data from TJ block tjs += self.extract_line(line, ch_two) # Close file and clean up embedding_file.close() driver.delete(self.input + ".qdf") # Extract data from TJ ops normalrange = 1 # NB: Hack for custom range (do not shift by 1) # TODO: do that better and include in docs if self.customrange: normalrange = 0 # Normalize values # # TODO: check if really necessary tjs = list(map(lambda x: (x - normalrange) % (2**self.nbits), tjs)) # Wrap values around in order # to move easily inside # # TODO: use modulo calculation instead tjs_ = tjs + tjs # Start extracting after CheckStr # # NB: CheckStr is 20 numerals long k = start + 20 # Go through the list of numerals c = 0 while c < tjs.__len__(): # Look for end position FlagStr # at current position # # NB: FlagStr is 20 numerals long if nums == tjs_[k:k + 20]: # End position is found, register it end = k + 20 - 1 self.l.debug("End position found", end) # NB: length = end - start + 1 # Extract CheckStr checkstr = tjs_[start:start + 20] # Extract data embedded = tjs_[start + 20:k] # Break the loop c = tjs.__len__() # FlagStr not found # -> Look further c += 1 k += 1 # Check is FlagStr was found # # TODO: check that if c != tjs.__len__() + 1: # FlagStr not found # -> Fail self.l.error("Ending code FlagStr not found") return -1 # FlagStr was found # -> Decode embedded data self.l.info("Done extracting.") self.l.info("Decoding data, please wait...") # Go through the list of numerals # containing the data k = 0 bin_str = "" while k < embedded.__len__(): # Decode the next numeral into a binary string bin = encoding.num_to_binstr(embedded[k], self.nbits) # Check if it was the last numeral if k == embedded.__len__() - 1: # Processing the last numeral # -> Only take the bits needed bin_str += bin[bin.__len__() - self.nbits:] else: # Not processing the last numeral # -> Take all bits bin_str += bin # -> Keep decoding k += 1 # Decode the full binary string into bytes emb_chars = encoding.decode(bin_str) emb_str = b"" for ch in emb_chars: emb_str += ch self.debug_extract_print_sum(encoding.encode_key(emb_str, self.nbits), bin_str, checkstr, embedded, emb_str) # Check integrity if encoding.digest_to_nums(emb_str, self.nbits) != checkstr: # Data coes not match embedded checksum # -> Fail self.l.error("CheckStr does not match embedded data") return -1 # Data matches checksum self.l.info("Done decoding.") # -> Produce output file output_file = open(self.output, "wb") output_file.write(emb_str) output_file.close() # All finished self.l.info("Output file: \"" + self.output + "\"") return 0
def embed(self,data,passkey,norandom=False): # Initialize state self.norandom = norandom if self.customrange: if norandom: self.l.warn("Custom range is enabled, so I am forcing the no-random flag") self.norandom = True self.tj_count = 0 self.tj_count_valid = 0 self.tjs = [] i = 0 j = 0 new_file = b"" # Get the numerals to embed from the key and the message nums = encoding.encode_msg(data,passkey,self.nbits) ind = nums[0] + nums[1] + nums[2] # Initialize chaotic maps if self.improve: ch_one = random.Random(encoding.digest(data)) ch_two = random.Random(passkey) else: ch_one = chaos.Chaotic(self.mu_one,nums[2]) ch_two = chaos.Chaotic(self.mu_two,nums[2]) # Open input file # # NB: Only works for valid PDF files self.l.info("Input file: \"" + self.input + "\"") driver.uncompress(self.input,self.input+".qdf") cover_file = open(self.input + ".qdf","rb") cover_file.seek(0,0) # Determine start position if 0:#self.improve: #TODO: fix start = int(self.tjs.__len__() * ch_two.random()) self.l.debug("Random start position",start) else: start = 0 # Parse file self.l.info("Embedding data, please wait...") self.print_conf_embed(data,nums) for line__ in cover_file: line = line__.decode("latin-1") line_ = line # Parse line for TJ blocks k = 0 while k < line_.__len__(): # Look for a TJ block, starting at current position m = re.match(r'\[(.*?)\][ ]?TJ',line_[k:]) if m == None: # No TJ blocks # -> Look further # # TODO: check that k += 1 else: # A TJ block is found # -> Try to embed data in TJ block block = self.embed_line(m.group(1),ch_one,ch_two,ind,i,start,self.tjs.__len__(),j) # Insert new block in the line line_ = line_[:k + m.start(1)] + block[0] + line_[k + m.end(1):] # Update state i = block[1] j = block[2] # Update current position k += m.start(1) + block[0].__len__() # Encode new line new_file += line_.encode("latin-1") self.debug_embed_check_tj(cover_file) # Close file and clean up cover_file.close() driver.delete(self.input+".qdf") # Check if all data was embedded if i < ind.__len__(): # All data was not embedded # -> Fail self.l.error("Not enough space available (only " + str(self.tj_count_valid) + " available, " + str(ind.__len__()) + " needed)") return -ind.__len__() # All data was embedded self.l.info("Done embedding.") # -> Produce output file output_file = open(self.output+".raw","wb") output_file.write(new_file) output_file.close() # Fix Compress Clean driver.fcc(self.output+".raw",self.output) self.debug_embed_print_sum() driver.delete(self.output+".raw.fix") # All finished self.l.info("Output file: \"" + self.output + "\"") return nums[1].__len__()
def embed(self, data, passkey, norandom=False): # Initialize state self.norandom = norandom if self.customrange: if norandom: self.l.warn( "Custom range is enabled, so I am forcing the no-random flag" ) self.norandom = True self.tj_count = 0 self.tj_count_valid = 0 self.tjs = [] i = 0 j = 0 new_file = b"" # Get the numerals to embed from the key and the message nums = encoding.encode_msg(data, passkey, self.nbits) ind = nums[0] + nums[1] + nums[2] # Initialize chaotic maps if self.improve: ch_one = random.Random(encoding.digest(data)) ch_two = random.Random(passkey) else: ch_one = chaos.Chaotic(self.mu_one, nums[2]) ch_two = chaos.Chaotic(self.mu_two, nums[2]) # Open input file # # NB: Only works for valid PDF files self.l.info("Input file: \"" + self.input + "\"") driver.uncompress(self.input, self.input + ".qdf") cover_file = open(self.input + ".qdf", "rb") cover_file.seek(0, 0) # Determine start position if 0: #self.improve: #TODO: fix start = int(self.tjs.__len__() * ch_two.random()) self.l.debug("Random start position", start) else: start = 0 # Parse file self.l.info("Embedding data, please wait...") self.print_conf_embed(data, nums) for line__ in cover_file: line = line__.decode("latin-1") line_ = line # Parse line for TJ blocks k = 0 while k < line_.__len__(): # Look for a TJ block, starting at current position m = re.match(r'\[(.*?)\][ ]?TJ', line_[k:]) if m == None: # No TJ blocks # -> Look further # # TODO: check that k += 1 else: # A TJ block is found # -> Try to embed data in TJ block block = self.embed_line(m.group(1), ch_one, ch_two, ind, i, start, self.tjs.__len__(), j) # Insert new block in the line line_ = line_[:k + m.start(1)] + block[0] + line_[k + m.end(1):] # Update state i = block[1] j = block[2] # Update current position k += m.start(1) + block[0].__len__() # Encode new line new_file += line_.encode("latin-1") self.debug_embed_check_tj(cover_file) # Close file and clean up cover_file.close() driver.delete(self.input + ".qdf") # Check if all data was embedded if i < ind.__len__(): # All data was not embedded # -> Fail self.l.error("Not enough space available (only " + str(self.tj_count_valid) + " available, " + str(ind.__len__()) + " needed)") return -ind.__len__() # All data was embedded self.l.info("Done embedding.") # -> Produce output file output_file = open(self.output + ".raw", "wb") output_file.write(new_file) output_file.close() # Fix Compress Clean driver.fcc(self.output + ".raw", self.output) self.debug_embed_print_sum() driver.delete(self.output + ".raw.fix") # All finished self.l.info("Output file: \"" + self.output + "\"") return nums[1].__len__()