def undecided_user_audience_check(g, transient_id, website_url,
                                  thank_you_page_url, since,
                                  min_visited_count):
    """
    Given transient id, check whether it belongs to an audience.

    It's simple yes, no question.

    User belongs to an audience whenever all of the following criteria are met:
        * visited some website url at least X times since specific timestamp
        * did not visit thank you page url since specific timestamp
    """
    return (g.V(transient_id).hasLabel("transientId").in_("has_identity").out(
        "has_identity").outE("visited").has("ts", P.gt(since)).choose(
            has("visited_url", website_url),
            groupCount("visits").by(constant("page_visits"))).choose(
                has("visited_url", thank_you_page_url),
                groupCount("visits").by(
                    constant("thank_you_page_vists"))).cap("visits").coalesce(
                        and_(
                            coalesce(select("thank_you_page_vists"),
                                     constant(0)).is_(0),
                            select("page_visits").is_(
                                P.gt(min_visited_count))).choose(
                                    count().is_(1), constant(True)),
                        constant(False)))
示例#2
0
    def topology_subgraph(self, topology_id: str,
                          topology_ref: str) -> GraphTraversalSource:
        """ Gets a gremlin graph traversal source limited to the sub-graph of
        vertices with the supplied topology ID and topology reference
        properties.

        Arguments:
            topology_id (str):  The topology identification string.
            topology_ref (str): The reference string for the version of the
                                topology you want to sub-graph.

        Returns:
            A GraphTraversalSource instance linked to the desired sub-graph
        """

        LOG.debug(
            "Creating traversal source for topology %s subgraph with "
            "reference: %s",
            topology_id,
            topology_ref,
        )

        topo_graph_traversal: GraphTraversalSource = self.graph_traversal.withStrategies(
            SubgraphStrategy(vertices=has("topology_ref", topology_ref).has(
                "topology_id", topology_id)))

        return topo_graph_traversal
示例#3
0
def query_users_active_in_n_days(g,
                                 n=30,
                                 today=datetime(2016, 6, 22, 23, 59),
                                 limit=1000):
    """Get users that were active in last 30 days."""

    dt_condition = [has("ts", P.gt(today - timedelta(days=n)))]
    return query_users_active_in_given_date_intervals(g, dt_condition, limit)
示例#4
0
def get_component_paths(graph_client: GremlinClient, topology_id: str,
                        topology_ref: str) -> List[List[str]]:
    """ Gets all component level paths through the specified topology. This method is
    cached as the component paths are fixed for the lifetime of a topology.

    Arguments:
        graph_client (GremlinClient):   The graph database client instance.
        topology_id (str):  The topology identification string.
        topology_ref (str): The topology graph identification string.

    Returns:
        List[List[str]]:    A list of component name string path lists. For example
        [["A", "B", "D"], ["A", "C", "D"]
    """

    sources_sinks: Dict[str, List[str]] = get_source_and_sink_comps(
        graph_client, topology_id, topology_ref)

    sgt: GraphTraversalSource = graph_client.topology_subgraph(
        topology_id, topology_ref)

    output: List[List[str]] = []

    for source in sources_sinks["sources"]:
        # Pick a start vertex for this source
        start: Vertex = sgt.V().has("component", source).next()
        for sink in sources_sinks["sinks"]:
            LOG.debug(
                "Finding paths from source component: %s to sink component: %s",
                source,
                sink,
            )
            # Find one path from the source vertex to any sink vertex and emit the
            # components as well as the edges.
            full_path: List[Union[str, Edge]] = (sgt.V(start).repeat(
                out("logically_connected").simplePath()).until(
                    has("component",
                        sink)).path().by("component").by().limit(1).next())

            # Filter out the edges and keep the component strings
            path: List[str] = [
                element for element in full_path if isinstance(element, str)
            ]

            output.append(path)

    return output
def undecided_users_audience(g, website_url, thank_you_page_url, since,
                             min_visited_count):
    """
    Given website url, get all the users that meet audience conditions.

    It returns list of transient identities uids.

    Audience is build from the users that met following criteria:
        * visited some website url at least X times since specific timestamp
        * did not visit thank you page url since specific timestamp
    """
    return (g.V(website_url).hasLabel("website").inE("visited").has(
        "ts", P.gt(since)).outV().in_("has_identity").groupCount().unfold(
        ).dedup().where(select(Column.values).is_(
            P.gt(min_visited_count))).select(Column.keys).as_("pids").map(
                out("has_identity").outE("visited").has(
                    "visited_url",
                    thank_you_page_url).has("ts", P.gt(since)).outV().in_(
                        "has_identity").dedup().values("pid").fold()).as_(
                            "pids_that_visited").select("pids").not_(
                                has("pid",
                                    where(P.within("pids_that_visited")))).out(
                                        "has_identity").values("uid"))