def __init__(self): self.gilp = GurobiILP() self.weights = FeatureVector() self.feat_extr = FeatureExtractor() return
class Decoder(object): """ Implement of decoder for structured prediction """ def __init__(self): self.gilp = GurobiILP() self.weights = FeatureVector() self.feat_extr = FeatureExtractor() return def decode(self, instance, oracle_len='nolen', node_cost=None, edge_cost=None): """ an instance includes: my_nodes: (1,) -> AmrNode1, (2,) -> AmrNode2, ... my_edges: (1,2) -> AmrEdge1, (2,1) -> AmrEdge2,... selected_nodes: (1,), (3,),... nodes contained in summary graph selected_edges: (1,2), (3,1),... edges contained in summary graph """ logger.debug('start feature extraction...') curr_filename = instance.filename my_nodes, oracle_nodes, root_nodes = instance.nodes # nodes and selected nodes my_edges, oracle_edges = instance.edges # edges and selected edges num_gold_nodes, num_gold_edges = instance.gold # number of gold nodes and edges node_weights = {} edge_weights = {} # get edge weights num_nonnegative_edges = 0 for k_edge, v_edge in my_edges.iteritems(): for tag in [0, 1]: edge_feats = self.feat_extr.getEdgeFeats( k_edge, v_edge, tag, curr_filename, my_nodes, my_edges) edge_weights[k_edge + (tag, )] = self.weights.dot(edge_feats) # cost-augmented decoding if edge_cost is not None: if tag == 0 and k_edge not in oracle_edges: # true negative curr_edge_cost = 0 if tag == 1 and k_edge in oracle_edges: # true positive curr_edge_cost = 0 if tag == 1 and k_edge not in oracle_edges: # false positive curr_edge_cost = edge_cost if tag == 0 and k_edge in oracle_edges: # false negative curr_edge_cost = edge_cost edge_weights[k_edge + (tag, )] += curr_edge_cost if tag == 1 and edge_weights[k_edge + (tag, )] > 0.0: num_nonnegative_edges += 1 # count number of non-negative edges logger.debug('[num_nonnegative_edges]: %d' % num_nonnegative_edges) # get node weights num_nonnegative_nodes = 0 for k_node, v_node in my_nodes.iteritems(): for tag in [0, 1]: node_feats = self.feat_extr.getNodeFeats( k_node, v_node, tag, curr_filename, my_nodes, my_edges) node_weights[k_node + (tag, )] = self.weights.dot(node_feats) # cost-augmented decoding if node_cost is not None: if tag == 0 and k_node not in oracle_nodes: curr_node_cost = 0 if tag == 1 and k_node in oracle_nodes: curr_node_cost = 0 if tag == 1 and k_node not in oracle_nodes: # false positive curr_node_cost = node_cost if tag == 0 and k_node in oracle_nodes: # false negative curr_node_cost = node_cost node_weights[k_node + (tag, )] += curr_node_cost if tag == 1 and node_weights[k_node + (tag, )] > 0.0: num_nonnegative_nodes += 1 # count number of non-negative nodes logger.debug('[num_nonnegative_nodes]: %d' % num_nonnegative_nodes) # run Gurobi ILP decoder using node and edge weights # optionally set the decoded summary length (#nodes and #edges) logger.debug('start ILP decoding...') num_selected_nodes = num_gold_nodes if oracle_len == 'nodes' else 0 num_selected_edges = num_gold_edges if oracle_len == 'edges' else 0 selected_nodes, selected_edges, score_pred = self.gilp.decode( node_weights, edge_weights, root_nodes, num_selected_nodes=num_selected_nodes, num_selected_edges=num_selected_edges) logger.debug('[num_gold_nodes]: %d' % num_gold_nodes) logger.debug('[num_selected_nodes]: %d' % len(selected_nodes)) logger.debug('[num_gold_edges]: %d' % num_gold_edges) logger.debug('[num_selected_edges]: %d' % len(selected_edges)) # features that are associated with the decoded graph feat_vec = FeatureVector() for k_edge, v_edge in my_edges.iteritems(): tag = 1 if k_edge in selected_edges else 0 # use decoded tag feat_vec += self.feat_extr.getEdgeFeats(k_edge, v_edge, tag, curr_filename, my_nodes, my_edges) for k_node, v_node in my_nodes.iteritems(): tag = 1 if k_node in selected_nodes else 0 # use decoded tag feat_vec += self.feat_extr.getNodeFeats(k_node, v_node, tag, curr_filename, my_nodes, my_edges) # return features associated with decoded graph return feat_vec, selected_nodes, selected_edges, score_pred def oracle(self, instance): """ an instance includes: my_nodes: (1,) -> AmrNode1, (2,) -> AmrNode2, ... my_edges: (1,2) -> AmrEdge1, (2,1) -> AmrEdge2,... root_nodes: (1,), (3,),... nodes that are root of sentence selected_nodes: (1,), (3,),... nodes contained in summary graph selected_edges: (1,2), (3,1),... edges contained in summary graph """ logger.debug('start oracle decoding...') curr_filename = instance.filename my_nodes, oracle_nodes, _ = instance.nodes # nodes and selected nodes my_edges, oracle_edges = instance.edges # edges and selected edges # features that are associated with oracle graph feat_vec = FeatureVector() for k_edge, v_edge in my_edges.iteritems(): tag = 1 if k_edge in oracle_edges else 0 # use oracle tag feat_vec += self.feat_extr.getEdgeFeats(k_edge, v_edge, tag, curr_filename, my_nodes, my_edges) for k_node, v_node in my_nodes.iteritems(): tag = 1 if k_node in oracle_nodes else 0 # use oracle tag feat_vec += self.feat_extr.getNodeFeats(k_node, v_node, tag, curr_filename, my_nodes, my_edges) score_true = self.weights.dot(feat_vec) # return features associated with oracle graph return feat_vec, oracle_nodes, oracle_edges, score_true
class Decoder(object): """ Implement of decoder for structured prediction """ def __init__(self): self.gilp = GurobiILP() self.weights = FeatureVector() self.feat_extr = FeatureExtractor() return def decode(self, instance, oracle_len='nolen', node_cost=None, edge_cost=None): """ an instance includes: my_nodes: (1,) -> AmrNode1, (2,) -> AmrNode2, ... my_edges: (1,2) -> AmrEdge1, (2,1) -> AmrEdge2,... selected_nodes: (1,), (3,),... nodes contained in summary graph selected_edges: (1,2), (3,1),... edges contained in summary graph """ logger.debug('start feature extraction...') curr_filename = instance.filename my_nodes, oracle_nodes, root_nodes = instance.nodes # nodes and selected nodes my_edges, oracle_edges = instance.edges # edges and selected edges num_gold_nodes, num_gold_edges = instance.gold # number of gold nodes and edges node_weights = {} edge_weights = {} # get edge weights num_nonnegative_edges = 0 for k_edge, v_edge in my_edges.iteritems(): for tag in [0,1]: edge_feats = self.feat_extr.getEdgeFeats(k_edge, v_edge, tag, curr_filename, my_nodes, my_edges) edge_weights[k_edge + (tag,)] = self.weights.dot(edge_feats) # cost-augmented decoding if edge_cost is not None: if tag == 0 and k_edge not in oracle_edges: # true negative curr_edge_cost = 0 if tag == 1 and k_edge in oracle_edges: # true positive curr_edge_cost = 0 if tag == 1 and k_edge not in oracle_edges: # false positive curr_edge_cost = edge_cost if tag == 0 and k_edge in oracle_edges: # false negative curr_edge_cost = edge_cost edge_weights[k_edge + (tag,)] += curr_edge_cost if tag == 1 and edge_weights[k_edge + (tag,)] > 0.0: num_nonnegative_edges += 1 # count number of non-negative edges logger.debug('[num_nonnegative_edges]: %d' % num_nonnegative_edges) # get node weights num_nonnegative_nodes = 0 for k_node, v_node in my_nodes.iteritems(): for tag in [0,1]: node_feats = self.feat_extr.getNodeFeats(k_node, v_node, tag, curr_filename, my_nodes, my_edges) node_weights[k_node + (tag,)] = self.weights.dot(node_feats) # cost-augmented decoding if node_cost is not None: if tag == 0 and k_node not in oracle_nodes: curr_node_cost = 0 if tag == 1 and k_node in oracle_nodes: curr_node_cost = 0 if tag == 1 and k_node not in oracle_nodes: # false positive curr_node_cost = node_cost if tag == 0 and k_node in oracle_nodes: # false negative curr_node_cost = node_cost node_weights[k_node + (tag,)] += curr_node_cost if tag == 1 and node_weights[k_node + (tag,)] > 0.0: num_nonnegative_nodes += 1 # count number of non-negative nodes logger.debug('[num_nonnegative_nodes]: %d' % num_nonnegative_nodes) # run Gurobi ILP decoder using node and edge weights # optionally set the decoded summary length (#nodes and #edges) logger.debug('start ILP decoding...') num_selected_nodes = num_gold_nodes if oracle_len == 'nodes' else 0 num_selected_edges = num_gold_edges if oracle_len == 'edges' else 0 selected_nodes, selected_edges, score_pred = self.gilp.decode(node_weights, edge_weights, root_nodes, num_selected_nodes=num_selected_nodes, num_selected_edges=num_selected_edges) logger.debug('[num_gold_nodes]: %d' % num_gold_nodes) logger.debug('[num_selected_nodes]: %d' % len(selected_nodes)) logger.debug('[num_gold_edges]: %d' % num_gold_edges) logger.debug('[num_selected_edges]: %d' % len(selected_edges)) # features that are associated with the decoded graph feat_vec = FeatureVector() for k_edge, v_edge in my_edges.iteritems(): tag = 1 if k_edge in selected_edges else 0 # use decoded tag feat_vec += self.feat_extr.getEdgeFeats(k_edge, v_edge, tag, curr_filename, my_nodes, my_edges) for k_node, v_node in my_nodes.iteritems(): tag = 1 if k_node in selected_nodes else 0 # use decoded tag feat_vec += self.feat_extr.getNodeFeats(k_node, v_node, tag, curr_filename, my_nodes, my_edges) # return features associated with decoded graph return feat_vec, selected_nodes, selected_edges, score_pred def oracle(self, instance): """ an instance includes: my_nodes: (1,) -> AmrNode1, (2,) -> AmrNode2, ... my_edges: (1,2) -> AmrEdge1, (2,1) -> AmrEdge2,... root_nodes: (1,), (3,),... nodes that are root of sentence selected_nodes: (1,), (3,),... nodes contained in summary graph selected_edges: (1,2), (3,1),... edges contained in summary graph """ logger.debug('start oracle decoding...') curr_filename = instance.filename my_nodes, oracle_nodes, _ = instance.nodes # nodes and selected nodes my_edges, oracle_edges = instance.edges # edges and selected edges # features that are associated with oracle graph feat_vec = FeatureVector() for k_edge, v_edge in my_edges.iteritems(): tag = 1 if k_edge in oracle_edges else 0 # use oracle tag feat_vec += self.feat_extr.getEdgeFeats(k_edge, v_edge, tag, curr_filename, my_nodes, my_edges) for k_node, v_node in my_nodes.iteritems(): tag = 1 if k_node in oracle_nodes else 0 # use oracle tag feat_vec += self.feat_extr.getNodeFeats(k_node, v_node, tag, curr_filename, my_nodes, my_edges) score_true = self.weights.dot(feat_vec) # return features associated with oracle graph return feat_vec, oracle_nodes, oracle_edges, score_true