Exemplo n.º 1
def create(graph, verbose=True):
    Compute the in degree, out degree and total degree of each vertex.

    graph : SGraph
        The graph on which to compute degree counts.

    verbose : bool, optional
        If True, print progress updates.

    out : DegreeCountingModel

    If given an :class:`~turicreate.SGraph` ``g``, we can create
    a :class:`~turicreate.degree_counting.DegreeCountingModel` as follows:

    >>> g = turicreate.load_sgraph('http://snap.stanford.edu/data/web-Google.txt.gz',
    ...                         format='snap')
    >>> m = turicreate.degree_counting.create(g)
    >>> g2 = m['graph']
    >>> g2
    SGraph({'num_edges': 5105039, 'num_vertices': 875713})
    Vertex Fields:['__id', 'in_degree', 'out_degree', 'total_degree']
    Edge Fields:['__src_id', '__dst_id']

    >>> g2.vertices.head(5)
        __id	int
        in_degree	int
        out_degree	int
        total_degree	int
    Rows: 5
    | __id | in_degree | out_degree | total_degree |
    |  5   |     15    |     7      |      22      |
    |  7   |     3     |     16     |      19      |
    |  8   |     1     |     2      |      3       |
    |  10  |     13    |     11     |      24      |
    |  27  |     19    |     16     |      35      |

    See Also
    if not isinstance(graph, _SGraph):
        raise TypeError('graph input must be a SGraph object.')

    params = _main.run('degree_count', {'graph': graph.__proxy__}, verbose)
    return DegreeCountingModel(params['model'])
Exemplo n.º 2
def create(graph, verbose=True):
    Compute the number of triangles each vertex belongs to, ignoring edge
    directions. A triangle is a complete subgraph with only three vertices.
    Return a model object with total number of triangles as well as the triangle
    counts for each vertex in the graph.

    graph : SGraph
        The graph on which to compute triangle counts.

    verbose : bool, optional
        If True, print progress updates.

    out : TriangleCountingModel

    - T. Schank. (2007) `Algorithmic Aspects of Triangle-Based Network Analysis

    If given an :class:`~turicreate.SGraph` ``g``, we can create a
    :class:`~turicreate.traingle_counting.TriangleCountingModel` as follows:

    >>> g =
    >>> turicreate.load_graph('http://snap.stanford.edu/data/email-Enron.txt.gz',
            >>> format='snap') tc = turicreate.triangle_counting.create(g)

    We can obtain the number of triangles that each vertex in the graph ``g``
    is present in:

    >>> tc_out = tc['triangle_count']  # SFrame

    We can add the new "triangle_count" field to the original graph g using:

    >>> g.vertices['triangle_count'] = tc['graph'].vertices['triangle_count']

    Note that the task above does not require a join because the vertex
    ordering is preserved through ``create()``.

    See Also
    if not isinstance(graph, _SGraph):
        raise TypeError('graph input must be a SGraph object.')

    params = _main.run('triangle_counting', {'graph': graph.__proxy__},
    return TriangleCountingModel(params['model'])
Exemplo n.º 3
def create(graph, verbose=True):
    Compute the graph coloring. Assign a color to each vertex such that no
    adjacent vertices have the same color. Return a model object with total
    number of colors used as well as the color ID for each vertex in the graph.
    This algorithm is greedy and is not guaranteed to find the **minimum** graph
    coloring. It is also not deterministic, so successive runs may return
    different answers.

    graph : SGraph
        The graph on which to compute the coloring.

    verbose : bool, optional
        If True, print progress updates.

    out : GraphColoringModel

    - `Wikipedia - graph coloring <http://en.wikipedia.org/wiki/Graph_coloring>`_

    If given an :class:`~turicreate.SGraph` ``g``, we can create
    a :class:`~turicreate.graph_coloring.GraphColoringModel` as follows:

    >>> g = turicreate.load_graph('http://snap.stanford.edu/data/email-Enron.txt.gz', format='snap')
    >>> gc = turicreate.graph_coloring.create(g)

    We can obtain the ``color id`` corresponding to each vertex in the graph ``g``
    as follows:

    >>> color_id = gc['color_id']  # SFrame

    We can obtain the total number of colors required to color the graph ``g``
    as follows:

    >>> num_colors = gc['num_colors']

    See Also
    if not isinstance(graph, _SGraph):
        raise TypeError('graph input must be a SGraph object.')

    params = _main.run('graph_coloring', {'graph': graph.__proxy__}, verbose)
    return GraphColoringModel(params['model'])
Exemplo n.º 4
 def _describe_fields(cls):
     Return a dictionary for the class fields description.
     Fields should NOT be wrapped by _precomputed_field, if necessary
     dispatch_table = {
         'ShortestPathModel': 'sssp_model_fields',
         'GraphColoringModel': 'graph_coloring_model_fields',
         'PagerankModel': 'pagerank_model_fields',
         'ConnectedComponentsModel': 'connected_components_model_fields',
         'TriangleCountingModel': 'triangle_counting_model_fields',
         'KcoreModel': 'kcore_model_fields',
         'DegreeCountingModel': 'degree_count_model_fields',
         'LabelPropagationModel': 'label_propagation_model_fields'
         fields_description = _main.run(dispatch_table[cls.__name__], {})
         return fields_description
         raise RuntimeError('Model %s does not have fields description' %
def create(graph,
    Given a weighted graph with observed class labels of a subset of vertices,
    infer the label probability for the unobserved vertices using the
    "label propagation" algorithm.

    The algorithm iteratively updates the label probability of current vertex
    as a weighted sum of label probability of self and the neighboring vertices
    until converge.  See
    :class:`turicreate.label_propagation.LabelPropagationModel` for the details
    of the algorithm.

    Notes: label propagation works well with small number of labels, i.e. binary
    labels, or less than 1000 classes. The toolkit will throw error
    if the number of classes exceeds the maximum value (1000).

    graph : SGraph
        The graph on which to compute the label propagation.

    label_field: str
        Vertex field storing the initial vertex labels. The values in
        must be [0, num_classes). None values indicate unobserved vertex labels.

    threshold : float, optional
        Threshold for convergence, measured in the average L2 norm
        (the sum of squared values) of the delta of each vertex's
        label probability vector.

    max_iterations: int, optional
        The max number of iterations to run. Default is unlimited.
        If set, the algorithm terminates when either max_iterations
        or convergence threshold is reached.

    weight_field: str, optional
        Vertex field for edge weight. If empty, all edges are assumed
        to have unit weight.

    self_weight: float, optional
        The weight for self edge.

    undirected: bool, optional
        If true, treat each edge as undirected, and propagates label in
        both directions.

    _single_precision : bool, optional
        If true, running label propagation in single precision. The resulting
        probability values may less accurate, but should run faster
        and use less memory.

    _distributed : distributed environment, internal

    verbose : bool, optional
        If True, print progress updates.

    out : LabelPropagationModel

    - Zhu, X., & Ghahramani, Z. (2002). `Learning from labeled and unlabeled data
      with label propagation <http://www.cs.cmu.edu/~zhuxj/pub/CMU-CALD-02-107.pdf>`_.

    If given an :class:`~turicreate.SGraph` ``g``, we can create
    a :class:`~turicreate.label_propagation.LabelPropagationModel` as follows:

    >>> g = turicreate.load_sgraph('http://snap.stanford.edu/data/email-Enron.txt.gz',
    ...                         format='snap')
    # Initialize random classes for a subset of vertices
    # Leave the unobserved vertices with None label.
    >>> import random
    >>> def init_label(vid):
    ...     x = random.random()
    ...     if x < 0.2:
    ...         return 0
    ...     elif x > 0.9:
    ...         return 1
    ...     else:
    ...         return None
    >>> g.vertices['label'] = g.vertices['__id'].apply(init_label, int)
    >>> m = turicreate.label_propagation.create(g, label_field='label')

    We can obtain for each vertex the predicted label and the probability of
    each label in the graph ``g`` using:

    >>> labels = m['labels']     # SFrame
    >>> labels
    | __id | label | predicted_label |         P0        |       P1       |
    |  5   |   1   |        1        |        0.0        |      1.0       |
    |  7   |  None |        0        |    0.8213214997   |  0.1786785003  |
    |  8   |  None |        1        | 5.96046447754e-08 | 0.999999940395 |
    |  10  |  None |        0        |   0.534984718273  | 0.465015281727 |
    |  27  |  None |        0        |   0.752801638549  | 0.247198361451 |
    |  29  |  None |        1        | 5.96046447754e-08 | 0.999999940395 |
    |  33  |  None |        1        | 5.96046447754e-08 | 0.999999940395 |
    |  47  |   0   |        0        |        1.0        |      0.0       |
    |  50  |  None |        0        |   0.788279032657  | 0.211720967343 |
    |  52  |  None |        0        |   0.666666666667  | 0.333333333333 |
    [36692 rows x 5 columns]

    See Also
    _raise_error_if_not_of_type(label_field, str)
    _raise_error_if_not_of_type(weight_field, str)

    if not isinstance(graph, _SGraph):
        raise TypeError('graph input must be a SGraph object.')

    if graph.vertices[label_field].dtype != int:
        raise TypeError('label_field %s must be integer typed.' % label_field)

    opts = {
        'label_field': label_field,
        'threshold': threshold,
        'weight_field': weight_field,
        'self_weight': self_weight,
        'undirected': undirected,
        'max_iterations': max_iterations,
        'single_precision': _single_precision,
        'graph': graph.__proxy__

    params = _main.run('label_propagation', opts, verbose)
    model = params['model']
    return LabelPropagationModel(model)
Exemplo n.º 6
def create(graph, verbose=True):
    Compute the number of weakly connected components in the graph. Return a
    model object with total number of weakly connected components as well as the
    component ID for each vertex in the graph.

    graph : SGraph
        The graph on which to compute the triangle counts.

    verbose : bool, optional
        If True, print progress updates.

    out : ConnectedComponentsModel

    - `Mathworld Wolfram - Weakly Connected Component

    If given an :class:`~turicreate.SGraph` ``g``, we can create
    a :class:`~turicreate.connected_components.ConnectedComponentsModel` as

    >>> g = turicreate.load_graph('http://snap.stanford.edu/data/email-Enron.txt.gz', format='snap')
    >>> cc = turicreate.connected_components.create(g)
    >>> cc.summary()

    We can obtain the ``component id`` corresponding to each vertex in the
    graph ``g`` as follows:

    >>> cc_ids = cc['component_id']  # SFrame

    We can obtain a graph with additional information about the ``component
    id`` corresponding to each vertex as follows:

    >>> cc_graph = cc['graph']      # SGraph

    We can add the new component_id field to the original graph g using:

    >>> g.vertices['component_id'] = cc['graph'].vertices['component_id']

    Note that the task above does not require a join because the vertex
    ordering is preserved through ``create()``.

    See Also
    if not isinstance(graph, _SGraph):
        raise TypeError('graph input must be a SGraph object.')

    params = _main.run('connected_components', {'graph': graph.__proxy__},
    return ConnectedComponentsModel(params['model'])
Exemplo n.º 7
def create(graph, kmin=0, kmax=10, verbose=True):
    Compute the K-core decomposition of the graph. Return a model object with
    total number of cores as well as the core id for each vertex in the graph.

    graph : SGraph
        The graph on which to compute the k-core decomposition.

    kmin : int, optional
        Minimun core id. Vertices having smaller core id than `kmin` will be
        assigned with core_id = `kmin`.

    kmax : int, optional
        Maximun core id. Vertices having larger core id than `kmax` will be
        assigned with core_id=`kmax`.

    verbose : bool, optional
        If True, print progress updates.

    out : KcoreModel

    - Alvarez-Hamelin, J.I., et al. (2005) `K-Core Decomposition: A Tool for the
      Visualization of Large Networks <http://arxiv.org/abs/cs/0504107>`_.

    If given an :class:`~turicreate.SGraph` ``g``, we can create
    a :class:`~turicreate.kcore.KcoreModel` as follows:

    >>> g = turicreate.load_graph('http://snap.stanford.edu/data/email-Enron.txt.gz', format='snap')
    >>> kc = turicreate.kcore.create(g)

    We can obtain the ``core id`` corresponding to each vertex in the graph
    ``g`` using:

    >>> kcore_id = kc['core_id']     # SFrame

    We can add the new core id field to the original graph g using:

    >>> g.vertices['core_id'] = kc['graph'].vertices['core_id']

    Note that the task above does not require a join because the vertex
    ordering is preserved through ``create()``.

    See Also
    if not isinstance(graph, _SGraph):
        raise TypeError('graph input must be a SGraph object.')

    opts = {'graph': graph.__proxy__, 'kmin': kmin, 'kmax': kmax}
    params = _main.run('kcore', opts, verbose)

    return KcoreModel(params['model'])
Exemplo n.º 8
def create(graph, source_vid, weight_field="", max_distance=1e30, verbose=True):
    Compute the single source shortest path distance from the source vertex to
    all vertices in the graph. Note that because SGraph is directed, shortest
    paths are also directed. To find undirected shortest paths add edges to the
    SGraph in both directions. Return a model object with distance each of
    vertex in the graph.

    graph : SGraph
        The graph on which to compute shortest paths.

    source_vid : vertex ID
        ID of the source vertex.

    weight_field : string, optional
        The edge field representing the edge weights. If empty, uses unit

    verbose : bool, optional
        If True, print progress updates.

    out : ShortestPathModel

    - `Wikipedia - ShortestPath <http://en.wikipedia.org/wiki/Shortest_path_problem>`_

    If given an :class:`~turicreate.SGraph` ``g``, we can create
    a :class:`~turicreate.shortest_path.ShortestPathModel` as follows:

    >>> g = turicreate.load_sgraph('http://snap.stanford.edu/data/email-Enron.txt.gz', format='snap')
    >>> sp = turicreate.shortest_path.create(g, source_vid=1)

    We can obtain the shortest path distance from the source vertex to each
    vertex in the graph ``g`` as follows:

    >>> sp_sframe = sp['distance']   # SFrame

    We can add the new distance field to the original graph g using:

    >>> g.vertices['distance_to_1'] = sp['graph'].vertices['distance']

    Note that the task above does not require a join because the vertex
    ordering is preserved through ``create()``.

    To get the actual path from the source vertex to any destination vertex:

    >>> path = sp.get_path(vid=10)

    We can obtain an auxiliary graph with additional information corresponding
    to the shortest path from the source vertex to each vertex in the graph
    ``g`` as follows:

    >>> sp_graph = sp.get.graph      # SGraph

    See Also
    if not isinstance(graph, _SGraph):
        raise TypeError('graph input must be a SGraph object.')

    opts = {'source_vid': source_vid, 'weight_field': weight_field,
            'max_distance': max_distance, 'graph': graph.__proxy__}
    params = _main.run('sssp', opts, verbose)
    return ShortestPathModel(params['model'])
Exemplo n.º 9
def create(graph,
    Compute the PageRank for each vertex in the graph. Return a model object
    with total PageRank as well as the PageRank value for each vertex in the

    graph : SGraph
        The graph on which to compute the pagerank value.

    reset_probability : float, optional
        Probability that a random surfer jumps to an arbitrary page.

    threshold : float, optional
        Threshold for convergence, measured in the L1 norm
        (the sum of absolute value) of the delta of each vertex's
        pagerank value.

    max_iterations : int, optional
        The maximun number of iterations to run.

    _single_precision : bool, optional
        If true, running pagerank in single precision. The resulting
        pagerank values may not be accurate for large graph, but
        should run faster and use less memory.

    _distributed : distributed environment, internal

    verbose : bool, optional
        If True, print progress updates.

    out : PagerankModel

    - `Wikipedia - PageRank <http://en.wikipedia.org/wiki/PageRank>`_
    - Page, L., et al. (1998) `The PageRank Citation Ranking: Bringing Order to
      the Web <http://ilpubs.stanford.edu:8090/422/1/1999-66.pdf>`_.

    If given an :class:`~turicreate.SGraph` ``g``, we can create
    a :class:`~turicreate.pagerank.PageRankModel` as follows:

    >>> g = turicreate.load_graph('http://snap.stanford.edu/data/email-Enron.txt.gz', format='snap')
    >>> pr = turicreate.pagerank.create(g)

    We can obtain the page rank corresponding to each vertex in the graph ``g``

    >>> pr_out = pr['pagerank']     # SFrame

    We can add the new pagerank field to the original graph g using:

    >>> g.vertices['pagerank'] = pr['graph'].vertices['pagerank']

    Note that the task above does not require a join because the vertex
    ordering is preserved through ``create()``.

    See Also
    if not isinstance(graph, _SGraph):
        raise TypeError('graph input must be a SGraph object.')

    opts = {
        'threshold': threshold,
        'reset_probability': reset_probability,
        'max_iterations': max_iterations,
        'single_precision': _single_precision,
        'graph': graph.__proxy__

    params = _main.run('pagerank', opts, verbose)
    model = params['model']

    return PagerankModel(model)
Exemplo n.º 10
    def extract_features(self, dataset, missing_value_action='auto'):
        For each example in the dataset, extract the leaf indices of
        each tree as features.

        For multiclass classification, each leaf index contains #num_class

        The returned feature vectors can be used as input to train another
        supervised learning model such as a
        an :py:class:`~turicreate.svm_classifier.SVMClassifier`, or a

        dataset : SFrame
            Dataset of new observations. Must include columns with the same
            names as the features used for model training, but does not require
            a target column. Additional columns are ignored.

        missing_value_action: str, optional
            Action to perform when missing values are encountered. This can be
            one of:

            - 'auto': Choose a model dependent missing value policy.
            - 'impute': Proceed with evaluation by filling in the missing
                        values with the mean of the training data. Missing
                        values are also imputed if an entire column of data is
                        missing during evaluation.
            - 'none': Treat missing value as is. Model must be able to handle
                      missing value.
            - 'error' : Do not proceed with prediction and terminate with
                        an error message.

        out : SArray
            An SArray of dtype array.array containing extracted features.

        >>> data =  turicreate.SFrame(

        >>> # Regression Tree Models
        >>> data['regression_tree_features'] = model.extract_features(data)

        >>> # Classification Tree Models
        >>> data['classification_tree_features'] = model.extract_features(data)
        metric_name = '.'.join([self.__module__, 'extract_features'])
        _raise_error_if_not_sframe(dataset, "dataset")
        if missing_value_action == 'auto':
            missing_value_action = select_default_missing_value_policy(
                self, 'extract_features')

        options = dict()
            'model': self.__proxy__,
            'model_name': self.__name__,
            'missing_value_action': missing_value_action,
            'dataset': dataset
        target = _toolkits_main.run('supervised_learning_feature_extraction',
        return _map_unity_proxy_to_object(target['extracted'])