def generic_keydep(self, key_symbol_name, key_length, plaintext_symbol_name, plaintext_length, num_instructions, num_bitflips, skip=0): key_dependency_graph = DependencyGraph(DependencyGraphKey.KEY, name="Key dependency graph") plaintext_dependency_graph = DependencyGraph(DependencyGraphKey.PLAINTEXT, name="Plaintext dependency graph") key = b"\x00" * key_length plaintext = b"\x00" * plaintext_length clean_state = X64EmulationState(self.elf) clean_state.write_symbol(key_symbol_name, key) clean_state.write_symbol(plaintext_symbol_name, plaintext) clean_state.write_register(UC_X86_REG_RSP, self.elf.sp) for b in range(0, num_bitflips): # Create reference state: this state will contain a key with all bits zero during the execution ref_state = clean_state.copy() # Create current key state: this state will contain flipped key bits during the execution current_key_state = clean_state.copy() current_key_state.write_symbol(key_symbol_name, binascii.unhexlify(("%0" + str(key_length*2) + "x") % (1 << b))) # Create current plaintext state: this state will contain flipped plaintext bits during the execution current_plaintext_state = clean_state.copy() current_plaintext_state.write_symbol(plaintext_symbol_name, binascii.unhexlify(("%0" + str(plaintext_length*2) + "x") % (1 << b))) # Run <skip> steps if skip != 0: emulate(ref_state, self.elf.get_symbol_address('stop'), skip) emulate(current_key_state, self.elf.get_symbol_address('stop'), skip) emulate(current_plaintext_state, self.elf.get_symbol_address('stop'), skip) # Emulate for num_instructions steps for t in range(1, num_instructions+1-skip): if t % 10 == 0: print("\rBitflipped index: %d, t: %d " % (b, t), end='') # Progress reference and current states with 1 step emulate(ref_state, self.elf.get_symbol_address('stop'), 1) emulate(current_key_state, self.elf.get_symbol_address('stop'), 1) emulate(current_plaintext_state, self.elf.get_symbol_address('stop'), 1) # Diff states and store result in dependency_graph for time t current_key_state.diff(ref_state, b, t, key_dependency_graph) current_plaintext_state.diff(ref_state, b, t, plaintext_dependency_graph) # Print dependencies print(key_dependency_graph) print(plaintext_dependency_graph) both_modified = DependencyGraph.from_intersection(key_dependency_graph, plaintext_dependency_graph) pickle.dump(key_dependency_graph, open('/tmp/key_dependency_graph.p', 'wb')) pickle.dump(plaintext_dependency_graph, open('/tmp/plaintext_dependency_graph.p', 'wb')) pickle.dump(both_modified, open('/tmp/combined_dependency_graph.p', 'wb'))
def demo(): dg1 = DependencyGraph("""1 John _ NNP _ _ 2 SUBJ _ _ 2 sees _ VB _ _ 0 ROOT _ _ 3 a _ DT _ _ 4 SPEC _ _ 4 dog _ NN _ _ 2 OBJ _ _ """) dg2 = DependencyGraph("""1 John _ NNP _ _ 2 SUBJ _ _ 2 walks _ VB _ _ 0 ROOT _ _ """) verbose = False maltParser = MaltParser() maltParser.train([dg1,dg2], verbose=verbose) print maltParser.parse('John sees Mary', verbose=verbose).tree().pprint() print maltParser.parse('a man runs', verbose=verbose).tree().pprint()
def nonprojective_conll_parse_demo(): graphs = [ DependencyGraph(entry) for entry in conll_data2.split('\n\n') if entry ] npp = ProbabilisticNonprojectiveParser() npp.train(graphs, NaiveBayesDependencyScorer()) parse_graph = npp.parse(['Cathy', 'zag', 'hen', 'zwaaien', '.'], ['N', 'V', 'Pron', 'Adj', 'N', 'Punc']) print(parse_graph)
def tagged_parse(self, sentence, verbose=False): """ Use MaltParser to parse a sentence. Takes a sentence as a list of (word, tag) tuples; the sentence must have already been tokenized and tagged. @param sentence: Input sentence to parse @type sentence: L{list} of (word, tag) L{tuple}s. @return: C{DependencyGraph} the dependency graph representation of the sentence """ if not self._malt_bin: raise Exception( "MaltParser location is not configured. Call config_malt() first." ) if not self._trained: raise Exception( "Parser has not been trained. Call train() first.") input_file = os.path.join(tempfile.gettempdir(), 'malt_input.conll') output_file = os.path.join(tempfile.gettempdir(), 'malt_output.conll') execute_string = 'java -jar %s -w %s -c %s -i %s -o %s -m parse' if not verbose: execute_string += ' > ' + os.path.join(tempfile.gettempdir(), "malt.out") f = None try: f = open(input_file, 'w') for (i, (word, tag)) in enumerate(sentence): f.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' % (i + 1, word, '_', tag, tag, '_', '0', 'a', '_', '_')) f.write('\n') f.close() cmd = [ 'java', '-jar %s' % self._malt_bin, '-w %s' % tempfile.gettempdir(), '-c %s' % self.mco, '-i %s' % input_file, '-o %s' % output_file, '-m parse' ] self._execute(cmd, 'parse', verbose) return DependencyGraph.load(output_file) finally: if f: f.close()
def tagged_parse(self, sentence, verbose=False): """ Use MaltParser to parse a sentence. Takes a sentence as a list of (word, tag) tuples; the sentence must have already been tokenized and tagged. @param sentence: Input sentence to parse @type sentence: L{list} of (word, tag) L{tuple}s. @return: C{DependencyGraph} the dependency graph representation of the sentence """ if not self._malt_bin: raise Exception("MaltParser location is not configured. Call config_malt() first.") if not self._trained: raise Exception("Parser has not been trained. Call train() first.") input_file = os.path.join(tempfile.gettempdir(), 'malt_input.conll') output_file = os.path.join(tempfile.gettempdir(), 'malt_output.conll') execute_string = 'java -jar %s -w %s -c %s -i %s -o %s -m parse' if not verbose: execute_string += ' > ' + os.path.join(tempfile.gettempdir(), "malt.out") f = None try: f = open(input_file, 'w') for (i, (word,tag)) in enumerate(sentence): f.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' % (i+1, word, '_', tag, tag, '_', '0', 'a', '_', '_')) f.write('\n') f.close() cmd = ['java', '-jar %s' % self._malt_bin, '-w %s' % tempfile.gettempdir(), '-c %s' % self.mco, '-i %s' % input_file, '-o %s' % output_file, '-m parse'] self._execute(cmd, 'parse', verbose) return DependencyGraph.load(output_file) finally: if f: f.close()
def parse(self, tokens): """ Parses the input tokens with respect to the parser's grammar. Parsing is accomplished by representing the search-space of possible parses as a fully-connected directed graph. Arcs that would lead to ungrammatical parses are removed and a lattice is constructed of length n, where n is the number of input tokens, to represent all possible grammatical traversals. All possible paths through the lattice are then enumerated to produce the set of non-projective parses. param tokens: A list of tokens to parse. type tokens: A list of L{String}. return: A set of non-projective parses. rtype: A list of L{DependencyGraph} """ # Create graph representation of tokens self._graph = DependencyGraph() self._graph.nodelist = [] # Remove the default root for index, token in enumerate(tokens): self._graph.nodelist.append({'word':token, 'deps':[], 'rel':'NTOP', 'address':index}) for head_node in self._graph.nodelist: deps = [] for dep_node in self._graph.nodelist: if self._grammar.contains(head_node['word'], dep_node['word']) and not head_node['word'] == dep_node['word']: deps.append(dep_node['address']) head_node['deps'] = deps # Create lattice of possible heads roots = [] possible_heads = [] for i, word in enumerate(tokens): heads = [] for j, head in enumerate(tokens): if (i != j) and self._grammar.contains(head, word): heads.append(j) if len(heads) == 0: roots.append(i) possible_heads.append(heads) # Set roots to attempt if len(roots) > 1: print "No parses found." return False elif len(roots) == 0: for i in range(len(tokens)): roots.append(i) # Traverse lattice analyses = [] for root in roots: stack = [] analysis = [[] for i in range(len(possible_heads))] i = 0 forward = True while(i >= 0): if forward: if len(possible_heads[i]) == 1: analysis[i] = possible_heads[i][0] elif len(possible_heads[i]) == 0: analysis[i] = -1 else: head = possible_heads[i].pop() analysis[i] = head stack.append([i, head]) if not forward: index_on_stack = False for stack_item in stack: # print stack_item if stack_item[0] == i: index_on_stack = True orig_length = len(possible_heads[i]) # print len(possible_heads[i]) if index_on_stack and orig_length == 0: for j in xrange(len(stack) -1, -1, -1): stack_item = stack[j] if stack_item[0] == i: possible_heads[i].append(stack.pop(j)[1]) # print stack elif index_on_stack and orig_length > 0: head = possible_heads[i].pop() analysis[i] = head stack.append([i, head]) forward = True # print 'Index on stack:', i, index_on_stack if i + 1 == len(possible_heads): analyses.append(analysis[:]) forward = False if forward: i += 1 else: i -= 1 # Filter parses graphs = [] #ensure 1 root, every thing has 1 head for analysis in analyses: root_count = 0 root = [] for i, cell in enumerate(analysis): if cell == -1: root_count += 1 root = i if root_count == 1: graph = DependencyGraph() graph.nodelist[0]['deps'] = root + 1 for i in range(len(tokens)): node = {'word':tokens[i], 'address':i+1} node['deps'] = [j+1 for j in range(len(tokens)) if analysis[j] == i] graph.nodelist.append(node) # cycle = graph.contains_cycle() # if not cycle: graphs.append(graph) return graphs
def parse(self, tokens, tags): """ Parses a list of tokens in accordance to the MST parsing algorithm for non-projective dependency parses. Assumes that the tokens to be parsed have already been tagged and those tags are provided. Various scoring methods can be used by implementing the C{DependencyScorerI} interface and passing it to the training algorithm. :type tokens: A list of C{String}. :param tokens: A list of words or punctuation to be parsed. :type tags: A List of C{String}. :param tags: A list of tags corresponding by index to the words in the tokens list. """ self.inner_nodes = {} # Initialize g_graph g_graph = DependencyGraph() for index, token in enumerate(tokens): g_graph.nodelist.append({'word':token, 'tag':tags[index], 'deps':[], 'rel':'NTOP', 'address':index+1}) # Fully connect non-root nodes in g_graph g_graph.connect_graph() original_graph = DependencyGraph() for index, token in enumerate(tokens): original_graph.nodelist.append({'word':token, 'tag':tags[index], 'deps':[], 'rel':'NTOP', 'address':index+1}) # Initialize b_graph b_graph = DependencyGraph() b_graph.nodelist = [] # Initialize c_graph c_graph = DependencyGraph() c_graph.nodelist = [{'word':token, 'tag':tags[index], 'deps':[], 'rel':'NTOP', 'address':index+1} for index, token in enumerate(tokens)] # Assign initial scores to g_graph edges self.initialize_edge_scores(g_graph) print self.scores # Initialize a list of unvisited vertices (by node address) unvisited_vertices = [vertex['address'] for vertex in c_graph.nodelist] # Iterate over unvisited vertices nr_vertices = len(tokens) betas = {} while(len(unvisited_vertices) > 0): # Mark current node as visited current_vertex = unvisited_vertices.pop(0) print 'current_vertex:', current_vertex # Get corresponding node n_i to vertex v_i current_node = g_graph.get_by_address(current_vertex) print 'current_node:', current_node # Get best in-edge node b for current node best_in_edge = self.best_incoming_arc(current_vertex) betas[current_vertex] = self.original_best_arc(current_vertex) print 'best in arc: ', best_in_edge, ' --> ', current_vertex # b_graph = Union(b_graph, b) for new_vertex in [current_vertex, best_in_edge]: b_graph.add_node({'word':'TEMP', 'deps':[], 'rel': 'NTOP', 'address': new_vertex}) b_graph.add_arc(best_in_edge, current_vertex) # Beta(current node) = b - stored for parse recovery # If b_graph contains a cycle, collapse it cycle_path = b_graph.contains_cycle() if cycle_path: # Create a new node v_n+1 with address = len(nodes) + 1 new_node = {'word': 'NONE', 'deps':[], 'rel': 'NTOP', 'address': nr_vertices + 1} # c_graph = Union(c_graph, v_n+1) c_graph.add_node(new_node) # Collapse all nodes in cycle C into v_n+1 self.update_edge_scores(new_node, cycle_path) self.collapse_nodes(new_node, cycle_path, g_graph, b_graph, c_graph) for cycle_index in cycle_path: c_graph.add_arc(new_node['address'], cycle_index) # self.replaced_by[cycle_index] = new_node['address'] self.inner_nodes[new_node['address']] = cycle_path # Add v_n+1 to list of unvisited vertices unvisited_vertices.insert(0, nr_vertices + 1) # increment # of nodes counter nr_vertices += 1 # Remove cycle nodes from b_graph; B = B - cycle c for cycle_node_address in cycle_path: b_graph.remove_by_address(cycle_node_address) print 'g_graph:\n', g_graph print print 'b_graph:\n', b_graph print print 'c_graph:\n', c_graph print print 'Betas:\n', betas print 'replaced nodes', self.inner_nodes print #Recover parse tree print 'Final scores:\n', self.scores print 'Recovering parse...' for i in range(len(tokens) + 1, nr_vertices + 1): betas[betas[i][1]] = betas[i] print 'Betas: ', betas new_graph = DependencyGraph() for node in original_graph.nodelist: node['deps'] = [] for i in range(1, len(tokens) + 1): # print i, betas[i] original_graph.add_arc(betas[i][0], betas[i][1]) # print original_graph return original_graph print 'Done.'
def parsed_sents(self, fileids=None): sents = concat([ DependencyCorpusView(fileid, False, True, True, encoding=enc) for fileid, enc in self.abspaths(fileids, include_encoding=True) ]) return [DependencyGraph(sent) for sent in sents]
def parse(self, tokens): """ Parses the input tokens with respect to the parser's grammar. Parsing is accomplished by representing the search-space of possible parses as a fully-connected directed graph. Arcs that would lead to ungrammatical parses are removed and a lattice is constructed of length n, where n is the number of input tokens, to represent all possible grammatical traversals. All possible paths through the lattice are then enumerated to produce the set of non-projective parses. param tokens: A list of tokens to parse. type tokens: list(str) return: A set of non-projective parses. rtype: list(DependencyGraph) """ # Create graph representation of tokens self._graph = DependencyGraph() self._graph.nodelist = [] # Remove the default root for index, token in enumerate(tokens): self._graph.nodelist.append({ 'word': token, 'deps': [], 'rel': 'NTOP', 'address': index }) for head_node in self._graph.nodelist: deps = [] for dep_node in self._graph.nodelist: if self._grammar.contains( head_node['word'], dep_node['word'] ) and not head_node['word'] == dep_node['word']: deps.append(dep_node['address']) head_node['deps'] = deps # Create lattice of possible heads roots = [] possible_heads = [] for i, word in enumerate(tokens): heads = [] for j, head in enumerate(tokens): if (i != j) and self._grammar.contains(head, word): heads.append(j) if len(heads) == 0: roots.append(i) possible_heads.append(heads) # Set roots to attempt if len(roots) > 1: print("No parses found.") return False elif len(roots) == 0: for i in range(len(tokens)): roots.append(i) # Traverse lattice analyses = [] for root in roots: stack = [] analysis = [[] for i in range(len(possible_heads))] i = 0 forward = True while (i >= 0): if forward: if len(possible_heads[i]) == 1: analysis[i] = possible_heads[i][0] elif len(possible_heads[i]) == 0: analysis[i] = -1 else: head = possible_heads[i].pop() analysis[i] = head stack.append([i, head]) if not forward: index_on_stack = False for stack_item in stack: # print stack_item if stack_item[0] == i: index_on_stack = True orig_length = len(possible_heads[i]) # print len(possible_heads[i]) if index_on_stack and orig_length == 0: for j in xrange(len(stack) - 1, -1, -1): stack_item = stack[j] if stack_item[0] == i: possible_heads[i].append(stack.pop(j)[1]) # print stack elif index_on_stack and orig_length > 0: head = possible_heads[i].pop() analysis[i] = head stack.append([i, head]) forward = True # print 'Index on stack:', i, index_on_stack if i + 1 == len(possible_heads): analyses.append(analysis[:]) forward = False if forward: i += 1 else: i -= 1 # Filter parses graphs = [] #ensure 1 root, every thing has 1 head for analysis in analyses: root_count = 0 root = [] for i, cell in enumerate(analysis): if cell == -1: root_count += 1 root = i if root_count == 1: graph = DependencyGraph() graph.nodelist[0]['deps'] = root + 1 for i in range(len(tokens)): node = {'word': tokens[i], 'address': i + 1} node['deps'] = [ j + 1 for j in range(len(tokens)) if analysis[j] == i ] graph.nodelist.append(node) # cycle = graph.contains_cycle() # if not cycle: graphs.append(graph) return graphs
def parse(self, tokens, tags): """ Parses a list of tokens in accordance to the MST parsing algorithm for non-projective dependency parses. Assumes that the tokens to be parsed have already been tagged and those tags are provided. Various scoring methods can be used by implementing the ``DependencyScorerI`` interface and passing it to the training algorithm. :type tokens: list(str) :param tokens: A list of words or punctuation to be parsed. :type tags: list(str) :param tags: A list of tags corresponding by index to the words in the tokens list. """ self.inner_nodes = {} # Initialize g_graph g_graph = DependencyGraph() for index, token in enumerate(tokens): g_graph.nodelist.append({ 'word': token, 'tag': tags[index], 'deps': [], 'rel': 'NTOP', 'address': index + 1 }) # Fully connect non-root nodes in g_graph g_graph.connect_graph() original_graph = DependencyGraph() for index, token in enumerate(tokens): original_graph.nodelist.append({ 'word': token, 'tag': tags[index], 'deps': [], 'rel': 'NTOP', 'address': index + 1 }) # Initialize b_graph b_graph = DependencyGraph() b_graph.nodelist = [] # Initialize c_graph c_graph = DependencyGraph() c_graph.nodelist = [{ 'word': token, 'tag': tags[index], 'deps': [], 'rel': 'NTOP', 'address': index + 1 } for index, token in enumerate(tokens)] # Assign initial scores to g_graph edges self.initialize_edge_scores(g_graph) print(self.scores) # Initialize a list of unvisited vertices (by node address) unvisited_vertices = [vertex['address'] for vertex in c_graph.nodelist] # Iterate over unvisited vertices nr_vertices = len(tokens) betas = {} while (len(unvisited_vertices) > 0): # Mark current node as visited current_vertex = unvisited_vertices.pop(0) print('current_vertex:', current_vertex) # Get corresponding node n_i to vertex v_i current_node = g_graph.get_by_address(current_vertex) print('current_node:', current_node) # Get best in-edge node b for current node best_in_edge = self.best_incoming_arc(current_vertex) betas[current_vertex] = self.original_best_arc(current_vertex) print('best in arc: ', best_in_edge, ' --> ', current_vertex) # b_graph = Union(b_graph, b) for new_vertex in [current_vertex, best_in_edge]: b_graph.add_node({ 'word': 'TEMP', 'deps': [], 'rel': 'NTOP', 'address': new_vertex }) b_graph.add_arc(best_in_edge, current_vertex) # Beta(current node) = b - stored for parse recovery # If b_graph contains a cycle, collapse it cycle_path = b_graph.contains_cycle() if cycle_path: # Create a new node v_n+1 with address = len(nodes) + 1 new_node = { 'word': 'NONE', 'deps': [], 'rel': 'NTOP', 'address': nr_vertices + 1 } # c_graph = Union(c_graph, v_n+1) c_graph.add_node(new_node) # Collapse all nodes in cycle C into v_n+1 self.update_edge_scores(new_node, cycle_path) self.collapse_nodes(new_node, cycle_path, g_graph, b_graph, c_graph) for cycle_index in cycle_path: c_graph.add_arc(new_node['address'], cycle_index) # self.replaced_by[cycle_index] = new_node['address'] self.inner_nodes[new_node['address']] = cycle_path # Add v_n+1 to list of unvisited vertices unvisited_vertices.insert(0, nr_vertices + 1) # increment # of nodes counter nr_vertices += 1 # Remove cycle nodes from b_graph; B = B - cycle c for cycle_node_address in cycle_path: b_graph.remove_by_address(cycle_node_address) print('g_graph:\n', g_graph) print() print('b_graph:\n', b_graph) print() print('c_graph:\n', c_graph) print() print('Betas:\n', betas) print('replaced nodes', self.inner_nodes) print() #Recover parse tree print('Final scores:\n', self.scores) print('Recovering parse...') for i in range(len(tokens) + 1, nr_vertices + 1): betas[betas[i][1]] = betas[i] print('Betas: ', betas) new_graph = DependencyGraph() for node in original_graph.nodelist: node['deps'] = [] for i in range(1, len(tokens) + 1): # print i, betas[i] original_graph.add_arc(betas[i][0], betas[i][1]) # print original_graph return original_graph print('Done.')
def check_whether_random_plaintexts_affect_key_dependencies(self, n=10): """ Test function that checks whether we can determine the key dependencies in a pre-processing step when assuming that the plaintext is constant. This is checked by observing whether the key dependencies change if the plain- text is changed in a random manner. :return: """ tested = set() static_pt_dependency_graph = DependencyGraph( DependencyGraphKey.KEY, name="Static-plaintext key dependency graph") dynamic_pt_dependency_graph = DependencyGraph( DependencyGraphKey.KEY, name="Dynamic-plaintext key dependency graph") zero_key = b"\x00" * self.key_meta.length zero_plaintext = b"\x00" * self.plaintext_meta.length clean_state = X64EmulationState(self.elf) clean_state.write_symbol(self.key_meta.symbol_name, zero_key) clean_state.write_symbol(self.plaintext_meta.symbol_name, zero_plaintext) clean_state.write_register(UC_X86_REG_RSP, self.elf.sp) # Create reference state: this state will contain a key with all bits zero during the execution ref_state = clean_state.copy() emulate(ref_state, self.elf.get_symbol_address('stop'), self.num_instructions) for b in range(0, self.key_meta.length * 8): key = binascii.unhexlify( ("%0" + str(self.key_meta.length * 2) + "x") % (1 << b)) dyn_pt = random_uniform(self.plaintext_meta.length) # Flip bit of key but keep static plaintext static_pt_state = clean_state.copy() static_pt_state.write_symbol(self.key_meta.symbol_name, key) # Emulate and diff emulate(static_pt_state, self.elf.get_symbol_address('stop'), self.num_instructions) static_pt_state.diff(ref_state, b, 0, static_pt_dependency_graph) # Flip bit of key but also change plaintext dyn_pt_state = clean_state.copy() dyn_pt_state.write_symbol(self.key_meta.symbol_name, key) dyn_pt_state.write_symbol(self.plaintext_meta.symbol_name, dyn_pt) # Emulate and diff emulate(dyn_pt_state, self.elf.get_symbol_address('stop'), self.num_instructions) dyn_pt_state.diff(ref_state, b, 0, dynamic_pt_dependency_graph) eq = static_pt_dependency_graph.has_same_deps( dynamic_pt_dependency_graph) if eq is False: print("-------Found not equal at bit %d:" % b) print_numpy_as_hex(np.array(bytearray(key)), "Key") print_numpy_as_hex(np.array(bytearray(dyn_pt)), "Plaintext") tested.add((key, dyn_pt)) print("Tested:") print(tested) print("Done!")
def analyze(self, random_samples_per_bit=32): zero_key = b"\x00" * self.key_meta.length zero_plaintext = b"\x00" * self.plaintext_meta.length clean_state = X64EmulationState(self.elf) clean_state.write_symbol(self.key_meta.symbol_name, zero_key) clean_state.write_symbol(self.plaintext_meta.symbol_name, zero_plaintext) clean_state.write_register(UC_X86_REG_RSP, self.elf.sp) #for b in range(0, self.key_meta.length*8): for b in range(0, 1): observations = defaultdict(lambda: []) for p in range(0, random_samples_per_bit): print("\rBit: %d, pt: %d " % (b, p), end='') # Get a random key and plaintext, but set a certain bit of the key to zero / one one_dependency_graph = DependencyGraph( DependencyGraphKey.KEY, name="Key dependency graph (one values)") zero_dependency_graph = DependencyGraph( DependencyGraphKey.KEY, name="Key dependency graph (zero values)") random_pt = random_uniform(self.plaintext_meta.length) random_key = random_uniform(self.key_meta.length) mask = np.frombuffer(binascii.unhexlify( ("%0" + str(self.key_meta.length * 2) + "x") % (1 << b)), dtype=np.uint8) imask = np.bitwise_not(mask) random_key_zero = bytes( np.bitwise_and(imask, np.frombuffer(random_key, dtype=np.uint8)).data) random_key_one = bytes( np.bitwise_or(mask, np.frombuffer(random_key, dtype=np.uint8)).data) # print_numpy_as_hex(mask, "Mask") # print_numpy_as_hex(np.frombuffer(random_key_zero, dtype=np.uint8), "Zero") # print_numpy_as_hex(np.frombuffer(random_key_one, dtype=np.uint8), "One") # Create states zero_state = clean_state.copy() zero_state.write_symbol(self.key_meta.symbol_name, random_key_zero) zero_state.write_symbol(self.plaintext_meta.symbol_name, random_pt) one_state = clean_state.copy() one_state.write_symbol(self.key_meta.symbol_name, random_key_one) one_state.write_symbol(self.plaintext_meta.symbol_name, random_pt) # Emulate and store dependencies in key_dependency_graph if self.skip != 0: emulate(zero_state, self.elf.get_symbol_address('stop'), self.skip) emulate(one_state, self.elf.get_symbol_address('stop'), self.skip) for t in range(0, self.limit): emulate(zero_state, self.elf.get_symbol_address('stop'), 1) emulate(one_state, self.elf.get_symbol_address('stop'), 1) one_state.diff(zero_state, b, self.skip + t, one_dependency_graph, skip_dup=True) zero_state.diff(one_state, b, self.skip + t, zero_dependency_graph, skip_dup=True) # Store observations # Per bit for c in zero_dependency_graph: for t in zero_dependency_graph[c][b]: if type(c) is X86ConstEnum: max_shift = 64 else: max_shift = 8 for shift in range(0, max_shift): mask = 0x01 << shift o = Observation( time=t, key_bit=0, value=1 if int(zero_dependency_graph[c][b][t]) & mask else 0, origin="%s_%d" % (str(c), shift)) observations[t].append(o) for c in one_dependency_graph: for t in one_dependency_graph[c][b]: if type(c) is X86ConstEnum: max_shift = 64 else: max_shift = 8 for shift in range(0, max_shift): mask = 0x01 << shift o = Observation( time=t, key_bit=1, value=1 if int(one_dependency_graph[c][b][t]) & mask else 0, origin="%s_%d" % (str(c), shift)) observations[t].append(o) """ # Per byte for c in zero_dependency_graph: for t in zero_dependency_graph[c][b]: o = Observation(time=t, key_bit=0, value=zero_dependency_graph[c][b][t], origin="%s" % str(c)) observations[t].append(o) for c in one_dependency_graph: for t in one_dependency_graph[c][b]: o = Observation(time=t, key_bit=1, value=one_dependency_graph[c][b][t], origin="%s" % str(c)) observations[t].append(o) """ print('') self.calc_observations_mi(observations)
def show(self): for t in self.subgraph: self.g.subgraph(self.subgraph[t]) self.g.view() if __name__ == "__main__": with open('/tmp/key_dependency_graph.p', 'rb') as f: key_dependency_graph = pickle.load(f) with open('/tmp/plaintext_dependency_graph.p', 'rb') as f: plaintext_dependency_graph = pickle.load(f) print(key_dependency_graph) print(plaintext_dependency_graph) union_graph = DependencyGraph.from_union(key_dependency_graph, plaintext_dependency_graph) print(union_graph) intersection_graph = DependencyGraph.from_intersection( key_dependency_graph, plaintext_dependency_graph) print(intersection_graph) #d = DependencyGraphVisualization(union_graph, lines=True, exclude_partial_registers=False) #d.show() #d = DependencyGraphVisualization(intersection_graph, lines=True, exclude_partial_registers=False) #d.show() d = DependencyGraphVisualization(key_dependency_graph, lines=True, exclude_partial_registers=False)