示例#1
0
def pattern_generator(length, loops=True, exclude_isomorphic=True):
    canonicalized_patterns = {}
    possible_var_nodes = [Variable('n%d' % i) for i in range(length - 1)]
    possible_nodes = possible_var_nodes + [SOURCE_VAR, TARGET_VAR]
    possible_edges = [Variable('e%d' % i) for i in range(length)]

    possible_triples = [(s, p, o) for s in possible_nodes
                        for p in possible_edges for o in possible_nodes]

    n_patterns = binom(len(possible_triples), length)
    logger.info('generating %d possible patterns of length %d', n_patterns,
                length)

    i = 0
    pid = 0
    for pid, pattern in enumerate(combinations(possible_triples, length)):
        gp = GraphPattern(pattern)

        # check that source and target are in gp:
        if not gp.complete():
            logger.debug('excluded %d: source or target missing: %s', pid, gp)
            continue
        nodes = sorted(gp.nodes - {SOURCE_VAR, TARGET_VAR})
        edges = sorted(gp.edges)

        # check there are no skipped nodes, e.g., link to n2 picked but no n1
        if nodes != possible_var_nodes[:len(nodes)]:
            logger.debug('excluded %d: skipped node: %s', pid, gp)
            continue
        if edges != possible_edges[:len(edges)]:
            logger.debug('excluded %d: skipped edge: %s', pid, gp)
            continue

        # check for loops if necessary
        if not loops and any([s == o for s, p, o in gp]):
            logger.debug('excluded %d: loop: %s', pid, gp)
            continue

        # check that the pattern is connected
        if not gp.is_connected():
            logger.debug('excluded %d: not connected:\n%s', pid, gp)
            continue

        # exclude patterns which are isomorphic to already generated ones
        if exclude_isomorphic:
            cgp = canonicalize(gp)
            if cgp in canonicalized_patterns:
                logger.debug('excluded %d: isomorphic to %d:\n%sand\n%s', pid,
                             canonicalized_patterns[cgp][0], gp,
                             canonicalized_patterns[cgp][1])
                continue
            else:
                canonicalized_patterns[cgp] = (pid, gp)
                gp = cgp
        i += 1
        logger.debug('generated pattern %d: %s', pid, gp)
        yield pid, gp
    assert pid + 1 == n_patterns
    logger.info('found %d differing patterns out of %d possible of length %d',
                i, n_patterns, length)
    yield (n_patterns, None)
def pattern_generator(
    length,
    loops=True,
    node_edge_joint=True,
    p_only_connected=True,
    source_target_edges=True,
    exclude_isomorphic=True,
    count_candidates_only=False,
):
    assert not source_target_edges or node_edge_joint, \
        'source_target_edges cannot be used without node_edge_joint'
    canonicalized_patterns = {}

    if node_edge_joint:
        # To be connected there are max 3 + 2 + 2 + 2 + ... vars for triples.
        # The first can be 3 different ones (including ?source and ?target, then
        # in each of the following triples at least one var has to be an old one
        possible_vars = [Variable('v%d' % i) for i in range((2 * length) - 1)]
        possible_nodes = possible_vars + [SOURCE_VAR, TARGET_VAR]
        if source_target_edges:
            possible_edges = possible_nodes
        else:
            possible_edges = possible_vars
    else:
        possible_var_nodes = [Variable('n%d' % i) for i in range(length - 1)]
        possible_nodes = possible_var_nodes + [SOURCE_VAR, TARGET_VAR]
        possible_edges = [Variable('e%d' % i) for i in range(length)]

    possible_triples = [(s, p, o) for s in possible_nodes
                        for p in possible_edges for o in possible_nodes]

    n_patterns = binom(len(possible_triples), length)
    logger.info('generating %d possible patterns of length %d', n_patterns,
                length)
    if count_candidates_only:
        yield (n_patterns, None)
        return

    i = 0
    pid = 0
    for pid, pattern in enumerate(combinations(possible_triples, length)):
        gp = GraphPattern(pattern)

        # check that source and target are in gp:
        if not gp.complete():
            logger.debug('excluded %d: source or target missing: %s', pid, gp)
            continue
        nodes = sorted(gp.nodes - {SOURCE_VAR, TARGET_VAR})
        edges = sorted(gp.edges - {SOURCE_VAR, TARGET_VAR})
        vars_ = sorted(gp.vars_in_graph - {SOURCE_VAR, TARGET_VAR})

        # check there are no skipped variables (nodes or edges)
        # noinspection PyUnboundLocalVariable
        if ((node_edge_joint and vars_ != possible_vars[:len(vars_)])
                or (not node_edge_joint and
                    (nodes != possible_var_nodes[:len(nodes)]
                     or edges != possible_edges[:len(edges)]))):
            logger.debug('excluded %d: skipped var: %s', pid, gp)
            continue

        # check if nodes and edges are disjoint
        if not node_edge_joint and (gp.nodes & gp.edges):
            logger.debug('excluded %d: node-edge-joined: %s', pid, gp)
            continue

        # check for loops if necessary
        if not loops and any([s == o for s, p, o in gp]):
            logger.debug('excluded %d: loop: %s', pid, gp)
            continue

        # check that the pattern is connected
        if not gp.is_connected(via_edges=p_only_connected):
            logger.debug('excluded %d: not connected:\n%s', pid, gp)
            continue

        # exclude patterns which are isomorphic to already generated ones
        if exclude_isomorphic:
            cgp = canonicalize(gp)
            if cgp in canonicalized_patterns:
                logger.debug('excluded %d: isomorphic to %d:\n%sand\n%s', pid,
                             canonicalized_patterns[cgp][0], gp,
                             canonicalized_patterns[cgp][1])
                continue
            else:
                canonicalized_patterns[cgp] = (pid, gp)
                gp = cgp
        i += 1
        logger.debug('generated pattern %d: %s', pid, gp)
        yield pid, gp
    assert pid + 1 == n_patterns
    logger.info('found %d differing patterns out of %d possible of length %d',
                i, n_patterns, length)
    yield (n_patterns, None)
示例#3
0
def pattern_generator(
        length,
        loops=True,
        node_edge_joint=True,
        p_only_connected=True,
        source_target_edges=True,
        exclude_isomorphic=True,
        count_candidates_only=False,
):
    assert not source_target_edges or node_edge_joint, \
        'source_target_edges cannot be used without node_edge_joint'
    canonicalized_patterns = {}

    if node_edge_joint:
        # To be connected there are max 3 + 2 + 2 + 2 + ... vars for triples.
        # The first can be 3 different ones (including ?source and ?target, then
        # in each of the following triples at least one var has to be an old one
        possible_vars = [Variable('v%d' % i) for i in range((2 * length) - 1)]
        possible_nodes = possible_vars + [SOURCE_VAR, TARGET_VAR]
        if source_target_edges:
            possible_edges = possible_nodes
        else:
            possible_edges = possible_vars
    else:
        possible_var_nodes = [Variable('n%d' % i) for i in range(length - 1)]
        possible_nodes = possible_var_nodes + [SOURCE_VAR, TARGET_VAR]
        possible_edges = [Variable('e%d' % i) for i in range(length)]

    possible_triples = [
        (s, p, o)
        for s in possible_nodes
        for p in possible_edges
        for o in possible_nodes
    ]

    n_patterns = binom(len(possible_triples), length)
    logger.info(
        'generating %d possible patterns of length %d', n_patterns, length)
    if count_candidates_only:
        yield (n_patterns, None)
        return

    i = 0
    pid = 0
    for pid, pattern in enumerate(combinations(possible_triples, length)):
        gp = GraphPattern(pattern)

        # check that source and target are in gp:
        if not gp.complete():
            logger.debug(
                'excluded %d: source or target missing: %s', pid, gp)
            continue
        nodes = sorted(gp.nodes - {SOURCE_VAR, TARGET_VAR})
        edges = sorted(gp.edges - {SOURCE_VAR, TARGET_VAR})
        vars_ = sorted(gp.vars_in_graph - {SOURCE_VAR, TARGET_VAR})

        # check there are no skipped variables (nodes or edges)
        # noinspection PyUnboundLocalVariable
        if (
                (node_edge_joint and vars_ != possible_vars[:len(vars_)]) or
                (not node_edge_joint and (
                    nodes != possible_var_nodes[:len(nodes)] or
                    edges != possible_edges[:len(edges)]
                ))
        ):
            logger.debug('excluded %d: skipped var: %s', pid, gp)
            continue

        # check if nodes and edges are disjoint
        if not node_edge_joint and (gp.nodes & gp.edges):
            logger.debug('excluded %d: node-edge-joined: %s', pid, gp)
            continue

        # check for loops if necessary
        if not loops and any([s == o for s, p, o in gp]):
            logger.debug('excluded %d: loop: %s', pid, gp)
            continue

        # check that the pattern is connected
        if not gp.is_connected(via_edges=p_only_connected):
            logger.debug('excluded %d: not connected:\n%s', pid, gp)
            continue

        # exclude patterns which are isomorphic to already generated ones
        if exclude_isomorphic:
            cgp = canonicalize(gp)
            if cgp in canonicalized_patterns:
                logger.debug(
                    'excluded %d: isomorphic to %d:\n%sand\n%s',
                    pid,
                    canonicalized_patterns[cgp][0],
                    gp,
                    canonicalized_patterns[cgp][1]
                )
                continue
            else:
                canonicalized_patterns[cgp] = (pid, gp)
                gp = cgp
        i += 1
        logger.debug('generated pattern %d: %s', pid, gp)
        yield pid, gp
    assert pid + 1 == n_patterns
    logger.info(
        'found %d differing patterns out of %d possible of length %d',
        i, n_patterns, length
    )
    yield (n_patterns, None)
示例#4
0
def pattern_generator(length, loops=True, exclude_isomorphic=True):
    canonicalized_patterns = {}
    possible_var_nodes = [Variable('n%d' % i) for i in range(length - 1)]
    possible_nodes = possible_var_nodes + [SOURCE_VAR, TARGET_VAR]
    possible_edges = [Variable('e%d' % i) for i in range(length)]

    possible_triples = [
        (s, p, o)
        for s in possible_nodes
        for p in possible_edges
        for o in possible_nodes
    ]

    n_patterns = binom(len(possible_triples), length)
    logger.info(
        'generating %d possible patterns of length %d', n_patterns, length)

    i = 0
    pid = 0
    for pid, pattern in enumerate(combinations(possible_triples, length)):
        gp = GraphPattern(pattern)

        # check that source and target are in gp:
        if not gp.complete():
            logger.debug(
                'excluded %d: source or target missing: %s', pid, gp)
            continue
        nodes = sorted(gp.nodes - {SOURCE_VAR, TARGET_VAR})
        edges = sorted(gp.edges)

        # check there are no skipped nodes, e.g., link to n2 picked but no n1
        if nodes != possible_var_nodes[:len(nodes)]:
            logger.debug('excluded %d: skipped node: %s', pid, gp)
            continue
        if edges != possible_edges[:len(edges)]:
            logger.debug('excluded %d: skipped edge: %s', pid, gp)
            continue

        # check for loops if necessary
        if not loops and any([s == o for s, p, o in gp]):
            logger.debug('excluded %d: loop: %s', pid, gp)
            continue

        # check that the pattern is connected
        if not gp.is_connected():
            logger.debug('excluded %d: not connected:\n%s', pid, gp)
            continue

        # exclude patterns which are isomorphic to already generated ones
        if exclude_isomorphic:
            cgp = canonicalize(gp)
            if cgp in canonicalized_patterns:
                logger.debug(
                    'excluded %d: isomorphic to %d:\n%sand\n%s',
                    pid,
                    canonicalized_patterns[cgp][0],
                    gp,
                    canonicalized_patterns[cgp][1]
                )
                continue
            else:
                canonicalized_patterns[cgp] = (pid, gp)
                gp = cgp
        i += 1
        logger.debug('generated pattern %d: %s', pid, gp)
        yield pid, gp
    assert pid + 1 == n_patterns
    logger.info(
        'found %d differing patterns out of %d possible of length %d',
        i, n_patterns, length
    )
    yield (n_patterns, None)