def pattern_generator(length, loops=True, exclude_isomorphic=True): canonicalized_patterns = {} possible_var_nodes = [Variable('n%d' % i) for i in range(length - 1)] possible_nodes = possible_var_nodes + [SOURCE_VAR, TARGET_VAR] possible_edges = [Variable('e%d' % i) for i in range(length)] possible_triples = [(s, p, o) for s in possible_nodes for p in possible_edges for o in possible_nodes] n_patterns = binom(len(possible_triples), length) logger.info('generating %d possible patterns of length %d', n_patterns, length) i = 0 pid = 0 for pid, pattern in enumerate(combinations(possible_triples, length)): gp = GraphPattern(pattern) # check that source and target are in gp: if not gp.complete(): logger.debug('excluded %d: source or target missing: %s', pid, gp) continue nodes = sorted(gp.nodes - {SOURCE_VAR, TARGET_VAR}) edges = sorted(gp.edges) # check there are no skipped nodes, e.g., link to n2 picked but no n1 if nodes != possible_var_nodes[:len(nodes)]: logger.debug('excluded %d: skipped node: %s', pid, gp) continue if edges != possible_edges[:len(edges)]: logger.debug('excluded %d: skipped edge: %s', pid, gp) continue # check for loops if necessary if not loops and any([s == o for s, p, o in gp]): logger.debug('excluded %d: loop: %s', pid, gp) continue # check that the pattern is connected if not gp.is_connected(): logger.debug('excluded %d: not connected:\n%s', pid, gp) continue # exclude patterns which are isomorphic to already generated ones if exclude_isomorphic: cgp = canonicalize(gp) if cgp in canonicalized_patterns: logger.debug('excluded %d: isomorphic to %d:\n%sand\n%s', pid, canonicalized_patterns[cgp][0], gp, canonicalized_patterns[cgp][1]) continue else: canonicalized_patterns[cgp] = (pid, gp) gp = cgp i += 1 logger.debug('generated pattern %d: %s', pid, gp) yield pid, gp assert pid + 1 == n_patterns logger.info('found %d differing patterns out of %d possible of length %d', i, n_patterns, length) yield (n_patterns, None)
def pattern_generator( length, loops=True, node_edge_joint=True, p_only_connected=True, source_target_edges=True, exclude_isomorphic=True, count_candidates_only=False, ): assert not source_target_edges or node_edge_joint, \ 'source_target_edges cannot be used without node_edge_joint' canonicalized_patterns = {} if node_edge_joint: # To be connected there are max 3 + 2 + 2 + 2 + ... vars for triples. # The first can be 3 different ones (including ?source and ?target, then # in each of the following triples at least one var has to be an old one possible_vars = [Variable('v%d' % i) for i in range((2 * length) - 1)] possible_nodes = possible_vars + [SOURCE_VAR, TARGET_VAR] if source_target_edges: possible_edges = possible_nodes else: possible_edges = possible_vars else: possible_var_nodes = [Variable('n%d' % i) for i in range(length - 1)] possible_nodes = possible_var_nodes + [SOURCE_VAR, TARGET_VAR] possible_edges = [Variable('e%d' % i) for i in range(length)] possible_triples = [(s, p, o) for s in possible_nodes for p in possible_edges for o in possible_nodes] n_patterns = binom(len(possible_triples), length) logger.info('generating %d possible patterns of length %d', n_patterns, length) if count_candidates_only: yield (n_patterns, None) return i = 0 pid = 0 for pid, pattern in enumerate(combinations(possible_triples, length)): gp = GraphPattern(pattern) # check that source and target are in gp: if not gp.complete(): logger.debug('excluded %d: source or target missing: %s', pid, gp) continue nodes = sorted(gp.nodes - {SOURCE_VAR, TARGET_VAR}) edges = sorted(gp.edges - {SOURCE_VAR, TARGET_VAR}) vars_ = sorted(gp.vars_in_graph - {SOURCE_VAR, TARGET_VAR}) # check there are no skipped variables (nodes or edges) # noinspection PyUnboundLocalVariable if ((node_edge_joint and vars_ != possible_vars[:len(vars_)]) or (not node_edge_joint and (nodes != possible_var_nodes[:len(nodes)] or edges != possible_edges[:len(edges)]))): logger.debug('excluded %d: skipped var: %s', pid, gp) continue # check if nodes and edges are disjoint if not node_edge_joint and (gp.nodes & gp.edges): logger.debug('excluded %d: node-edge-joined: %s', pid, gp) continue # check for loops if necessary if not loops and any([s == o for s, p, o in gp]): logger.debug('excluded %d: loop: %s', pid, gp) continue # check that the pattern is connected if not gp.is_connected(via_edges=p_only_connected): logger.debug('excluded %d: not connected:\n%s', pid, gp) continue # exclude patterns which are isomorphic to already generated ones if exclude_isomorphic: cgp = canonicalize(gp) if cgp in canonicalized_patterns: logger.debug('excluded %d: isomorphic to %d:\n%sand\n%s', pid, canonicalized_patterns[cgp][0], gp, canonicalized_patterns[cgp][1]) continue else: canonicalized_patterns[cgp] = (pid, gp) gp = cgp i += 1 logger.debug('generated pattern %d: %s', pid, gp) yield pid, gp assert pid + 1 == n_patterns logger.info('found %d differing patterns out of %d possible of length %d', i, n_patterns, length) yield (n_patterns, None)
def pattern_generator( length, loops=True, node_edge_joint=True, p_only_connected=True, source_target_edges=True, exclude_isomorphic=True, count_candidates_only=False, ): assert not source_target_edges or node_edge_joint, \ 'source_target_edges cannot be used without node_edge_joint' canonicalized_patterns = {} if node_edge_joint: # To be connected there are max 3 + 2 + 2 + 2 + ... vars for triples. # The first can be 3 different ones (including ?source and ?target, then # in each of the following triples at least one var has to be an old one possible_vars = [Variable('v%d' % i) for i in range((2 * length) - 1)] possible_nodes = possible_vars + [SOURCE_VAR, TARGET_VAR] if source_target_edges: possible_edges = possible_nodes else: possible_edges = possible_vars else: possible_var_nodes = [Variable('n%d' % i) for i in range(length - 1)] possible_nodes = possible_var_nodes + [SOURCE_VAR, TARGET_VAR] possible_edges = [Variable('e%d' % i) for i in range(length)] possible_triples = [ (s, p, o) for s in possible_nodes for p in possible_edges for o in possible_nodes ] n_patterns = binom(len(possible_triples), length) logger.info( 'generating %d possible patterns of length %d', n_patterns, length) if count_candidates_only: yield (n_patterns, None) return i = 0 pid = 0 for pid, pattern in enumerate(combinations(possible_triples, length)): gp = GraphPattern(pattern) # check that source and target are in gp: if not gp.complete(): logger.debug( 'excluded %d: source or target missing: %s', pid, gp) continue nodes = sorted(gp.nodes - {SOURCE_VAR, TARGET_VAR}) edges = sorted(gp.edges - {SOURCE_VAR, TARGET_VAR}) vars_ = sorted(gp.vars_in_graph - {SOURCE_VAR, TARGET_VAR}) # check there are no skipped variables (nodes or edges) # noinspection PyUnboundLocalVariable if ( (node_edge_joint and vars_ != possible_vars[:len(vars_)]) or (not node_edge_joint and ( nodes != possible_var_nodes[:len(nodes)] or edges != possible_edges[:len(edges)] )) ): logger.debug('excluded %d: skipped var: %s', pid, gp) continue # check if nodes and edges are disjoint if not node_edge_joint and (gp.nodes & gp.edges): logger.debug('excluded %d: node-edge-joined: %s', pid, gp) continue # check for loops if necessary if not loops and any([s == o for s, p, o in gp]): logger.debug('excluded %d: loop: %s', pid, gp) continue # check that the pattern is connected if not gp.is_connected(via_edges=p_only_connected): logger.debug('excluded %d: not connected:\n%s', pid, gp) continue # exclude patterns which are isomorphic to already generated ones if exclude_isomorphic: cgp = canonicalize(gp) if cgp in canonicalized_patterns: logger.debug( 'excluded %d: isomorphic to %d:\n%sand\n%s', pid, canonicalized_patterns[cgp][0], gp, canonicalized_patterns[cgp][1] ) continue else: canonicalized_patterns[cgp] = (pid, gp) gp = cgp i += 1 logger.debug('generated pattern %d: %s', pid, gp) yield pid, gp assert pid + 1 == n_patterns logger.info( 'found %d differing patterns out of %d possible of length %d', i, n_patterns, length ) yield (n_patterns, None)
def pattern_generator(length, loops=True, exclude_isomorphic=True): canonicalized_patterns = {} possible_var_nodes = [Variable('n%d' % i) for i in range(length - 1)] possible_nodes = possible_var_nodes + [SOURCE_VAR, TARGET_VAR] possible_edges = [Variable('e%d' % i) for i in range(length)] possible_triples = [ (s, p, o) for s in possible_nodes for p in possible_edges for o in possible_nodes ] n_patterns = binom(len(possible_triples), length) logger.info( 'generating %d possible patterns of length %d', n_patterns, length) i = 0 pid = 0 for pid, pattern in enumerate(combinations(possible_triples, length)): gp = GraphPattern(pattern) # check that source and target are in gp: if not gp.complete(): logger.debug( 'excluded %d: source or target missing: %s', pid, gp) continue nodes = sorted(gp.nodes - {SOURCE_VAR, TARGET_VAR}) edges = sorted(gp.edges) # check there are no skipped nodes, e.g., link to n2 picked but no n1 if nodes != possible_var_nodes[:len(nodes)]: logger.debug('excluded %d: skipped node: %s', pid, gp) continue if edges != possible_edges[:len(edges)]: logger.debug('excluded %d: skipped edge: %s', pid, gp) continue # check for loops if necessary if not loops and any([s == o for s, p, o in gp]): logger.debug('excluded %d: loop: %s', pid, gp) continue # check that the pattern is connected if not gp.is_connected(): logger.debug('excluded %d: not connected:\n%s', pid, gp) continue # exclude patterns which are isomorphic to already generated ones if exclude_isomorphic: cgp = canonicalize(gp) if cgp in canonicalized_patterns: logger.debug( 'excluded %d: isomorphic to %d:\n%sand\n%s', pid, canonicalized_patterns[cgp][0], gp, canonicalized_patterns[cgp][1] ) continue else: canonicalized_patterns[cgp] = (pid, gp) gp = cgp i += 1 logger.debug('generated pattern %d: %s', pid, gp) yield pid, gp assert pid + 1 == n_patterns logger.info( 'found %d differing patterns out of %d possible of length %d', i, n_patterns, length ) yield (n_patterns, None)