def _SGraphFromJsonTree(json_str):
    """
    Convert the Json Tree to SGraph
    """
    g = json.loads(json_str)
    vertices = [_Vertex(x['id'],
                dict([(str(k), v) for k, v in x.iteritems() if k != 'id']))
                                                      for x in g['vertices']]
    edges = [_Edge(x['src'], x['dst'],
             dict([(str(k), v) for k, v in x.iteritems() if k != 'src' and k != 'dst']))
                                                      for x in g['edges']]
    sg = _SGraph().add_vertices(vertices)
    if len(edges) > 0:
        sg = sg.add_edges(edges)
    return sg
Пример #2
0
def _SGraphFromJsonTree(json_str):
    """
    Convert the Json Tree to SGraph
    """
    g = json.loads(json_str)
    vertices = [_Vertex(x['id'],
                dict([(str(k), v) for k, v in x.iteritems() if k != 'id']))
                                                      for x in g['vertices']]
    edges = [_Edge(x['src'], x['dst'],
             dict([(str(k), v) for k, v in x.iteritems() if k != 'src' and k != 'dst']))
                                                      for x in g['edges']]
    sg = _SGraph().add_vertices(vertices)
    if len(edges) > 0:
        sg = sg.add_edges(edges)
    return sg
import json
from graphlab.data_structures.sframe import SArray as _SArray
from graphlab.data_structures.sframe import SFrame as _SFrame
from graphlab.data_structures.sgraph import SGraph as _SGraph
from graphlab.data_structures.sgraph import Vertex as _Vertex
from graphlab.data_structures.sgraph import Edge as _Edge
from graphlab.cython.cy_sarray import UnitySArrayProxy
from graphlab.cython.cy_sframe import UnitySFrameProxy
from graphlab.cython.cy_graph import UnityGraphProxy
from graphlab.toolkits._main import ToolkitError
import logging as _logging


_proxy_map = {UnitySFrameProxy: (lambda x: _SFrame(_proxy=x)),
              UnitySArrayProxy: (lambda x: _SArray(_proxy=x)),
              UnityGraphProxy: (lambda x: _SGraph(_proxy=x))}

def _add_docstring(format_dict):
  """
  Format a doc-string on the fly.
  @arg format_dict: A dictionary to format the doc-strings
  Example:

    @add_docstring({'context': __doc_string_context})
    def predict(x):
      '''
      {context}
        >> model.predict(data)
      '''
      return x
  """
    def get_path(self, vid, show=False, highlight=None, **kwargs):
        """
        Get the shortest path.
        Return one of the shortest paths between the source vertex defined
        in the model and the query vertex.
        The source vertex is specified by the original call to shortest path.
        Optionally, plots the path with networkx.

        Parameters
        ----------
        vid : string
            ID of the destination vertex. The source vertex ID is specified
            when the shortest path result is first computed.

        show : boolean
            Indicates whether the path should be plotted. Default is False.

        highlight : list
            If the path is plotted, identifies the vertices (by vertex ID) that
            should be highlighted by plotting in a different color.

        kwargs :
            Additional parameters passed into the :func:`graphlab.SGraph.show`
            when `show` is True.

        Returns
        -------
        path : list
            List of pairs of (vertex_id, distance) in the path.

        Examples
        --------
        >>> m.get_path(vid=0, show=True)

        See Also
        --------
        SGraph.show
        """
        if self._path_query_table is None:
            self._path_query_table = self._generate_path_sframe()

        source_vid = self['source_vid']
        path = []
        path_query_table = self._path_query_table
        if not vid in path_query_table['vid']:
            raise ValueError('Destination vertex id ' + str(vid) +
                             ' not found')

        record = path_query_table[path_query_table['vid'] == vid][0]
        dist = record['distance']
        if dist > 1e5:
            raise ValueError(
                'The distance to {} is too large to show the path.'.format(
                    vid))

        path = [(vid, dist)]
        max_iter = len(path_query_table)
        num_iter = 0
        while record['distance'] != 0 and num_iter < max_iter:
            parent_id = record['parent_row_id']
            assert parent_id < len(path_query_table)
            assert parent_id >= 0
            record = path_query_table[parent_id]
            path.append((record['vid'], record['distance']))
            num_iter += 1
        assert record['vid'] == source_vid
        assert num_iter < max_iter
        path.reverse()

        if show is True and len(path) > 1:
            sub_g = _SGraph()
            for i, j in zip(path, path[1:]):
                sub_g = sub_g.add_edges(self['graph'].get_edges(src_ids=[i[0]],
                                                                dst_ids=[j[0]
                                                                         ]),
                                        src_field='__src_id',
                                        dst_field='__dst_id')

            path_highlight = []

            if highlight is not None:

                if not isinstance(highlight, list):
                    raise TypeError("Input 'highlight' must be a list.")

                path_names = set([x[0] for x in path])
                path_highlight = list(
                    set.intersection(path_names, set(highlight)))

            plot = sub_g.show(vlabel='id', highlight=path_highlight, **kwargs)
            if _HAS_IPYTHON:
                _IPython.display(plot)

        return path
    def get_path(self, vid, show=False, highlight=None, **kwargs):
        """
        Get the shortest path.
        Return one of the shortest paths between the source vertex defined
        in the model and the query vertex.
        The source vertex is specified by the original call to shortest path.
        Optionally, plots the path with networkx.

        Parameters
        ----------
        vid : string
            ID of the destination vertex. The source vertex ID is specified
            when the shortest path result is first computed.

        show : boolean
            Indicates whether the path should be plotted. Default is False.

        highlight : list
            If the path is plotted, identifies the vertices (by vertex ID) that
            should be highlighted by plotting in a different color.

        kwargs :
            Additional parameters passed into the :func:`graphlab.SGraph.show`
            when `show` is True.

        Returns
        -------
        path : list
            List of pairs of (vertex_id, distance) in the path.

        Examples
        --------
        >>> m.get_path(vid=0, show=True)

        See Also
        --------
        SGraph.show
        """
        if self._path_query_table is None:
            self._path_query_table = self._generate_path_sframe()

        source_vid = self['source_vid']
        path = []
        path_query_table = self._path_query_table
        if not vid in path_query_table['vid']:
            raise ValueError('Destination vertex id ' + str(vid) + ' not found')

        record = path_query_table[path_query_table['vid'] == vid][0]
        dist = record['distance']
        if dist > 1e5:
            raise ValueError('The distance to {} is too large to show the path.'.format(vid))

        path = [(vid, dist)]
        max_iter = len(path_query_table)
        num_iter = 0
        while record['distance'] != 0 and num_iter < max_iter:
            parent_id = record['parent_row_id']
            assert parent_id < len(path_query_table)
            assert parent_id >= 0
            record = path_query_table[parent_id]
            path.append((record['vid'], record['distance']))
            num_iter += 1
        assert record['vid'] == source_vid
        assert num_iter < max_iter
        path.reverse()

        if show is True and len(path) > 1:
            sub_g = _SGraph()
            for i, j in zip(path, path[1:]):
                sub_g = sub_g.add_edges(self['graph'].get_edges(src_ids=[i[0]], dst_ids=[j[0]]),
                                        src_field='__src_id',
                                        dst_field='__dst_id')

            path_highlight = []

            if highlight is not None:

                if not isinstance(highlight, list):
                    raise TypeError("Input 'highlight' must be a list.")

                path_names = set([x[0] for x in path])
                path_highlight = list(set.intersection(path_names, set(highlight)))

            plot = sub_g.show(vlabel='id', highlight=path_highlight, **kwargs)
            if _HAS_IPYTHON:
                _IPython.display(plot)

        return path
Пример #6
0
import json
from graphlab.data_structures.sframe import SArray as _SArray
from graphlab.data_structures.sframe import SFrame as _SFrame
from graphlab.data_structures.sgraph import SGraph as _SGraph
from graphlab.data_structures.sgraph import Vertex as _Vertex
from graphlab.data_structures.sgraph import Edge as _Edge
from graphlab.cython.cy_sarray import UnitySArrayProxy
from graphlab.cython.cy_sframe import UnitySFrameProxy
from graphlab.cython.cy_graph import UnityGraphProxy
from graphlab.toolkits._main import ToolkitError
import logging as _logging


_proxy_map = {UnitySFrameProxy: (lambda x: _SFrame(_proxy=x)),
              UnitySArrayProxy: (lambda x: _SArray(_proxy=x)),
              UnityGraphProxy: (lambda x: _SGraph(_proxy=x))}

def _add_docstring(format_dict):
  """
  Format a doc-string on the fly.
  @arg format_dict: A dictionary to format the doc-strings
  Example:

    @add_docstring({'context': __doc_string_context})
    def predict(x):
      '''
      {context}
        >> model.predict(data)
      '''
      return x
  """
    def similarity_graph(self,
                         k=5,
                         radius=None,
                         include_self_edges=False,
                         output_type='SGraph',
                         verbose=True):
        """
        Construct the similarity graph on the reference dataset, which is
        already stored in the model. This is conceptually very similar to
        running `query` with the reference set, but this method is optimized
        for the purpose, syntactically simpler, and automatically removes
        self-edges.

        Parameters
        ----------
        k : int, optional
            Maximum number of neighbors to return for each point in the
            dataset. Setting this to ``None`` deactivates the constraint, so
            that all neighbors are returned within ``radius`` of a given point.

        radius : float, optional
            For a given point, only neighbors within this distance are
            returned. The default is ``None``, in which case the ``k`` nearest
            neighbors are returned for each query point, regardless of
            distance.

        include_self_edges : bool, optional
            For most distance functions, each point in the model's reference
            dataset is its own nearest neighbor. If this parameter is set to
            False, this result is ignored, and the nearest neighbors are
            returned *excluding* the point itself.

        output_type : {'SGraph', 'SFrame'}, optional
            By default, the results are returned in the form of an SGraph,
            where each point in the reference dataset is a vertex and an edge A
            -> B indicates that vertex B is a nearest neighbor of vertex A. If
            'output_type' is set to 'SFrame', the output is in the same form as
            the results of the 'query' method: an SFrame with columns
            indicating the query label (in this case the query data is the same
            as the reference data), reference label, distance between the two
            points, and the rank of the neighbor.

        verbose : bool, optional
            If True, print progress updates and model details.

        Returns
        -------
        out : SFrame or SGraph
            The type of the output object depends on the 'output_type'
            parameter. See the parameter description for more detail.

        Notes
        -----
        - If both ``k`` and ``radius`` are set to ``None``, each data point is
          matched to the entire dataset. If the reference dataset has
          :math:`n` rows, the output is an SFrame with :math:`n^2` rows (or an
          SGraph with :math:`n^2` edges).

        - For models created with the 'lsh' method, the output similarity graph
          may have fewer vertices than there are data points in the original
          reference set. Because LSH is an approximate method, a query point
          may have fewer than 'k' neighbors. If LSH returns no neighbors at all
          for a query and self-edges are excluded, the query point is omitted
          from the results.

        Examples
        --------
        First construct an SFrame and create a nearest neighbors model:

        >>> sf = graphlab.SFrame({'x1': [0.98, 0.62, 0.11],
        ...                       'x2': [0.69, 0.58, 0.36]})
        ...
        >>> model = graphlab.nearest_neighbors.create(sf, distance='euclidean')

        Unlike the ``query`` method, there is no need for a second dataset with
        ``similarity_graph``.

        >>> g = model.similarity_graph(k=1)  # an SGraph
        >>> g.show()
        >>> g.edges
        +----------+----------+----------------+------+
        | __src_id | __dst_id |    distance    | rank |
        +----------+----------+----------------+------+
        |    0     |    1     | 0.376430604494 |  1   |
        |    2     |    1     | 0.55542776308  |  1   |
        |    1     |    0     | 0.376430604494 |  1   |
        +----------+----------+----------------+------+
        """
        _mt._get_metric_tracker().track(
            'toolkit.nearest_neighbors.similarity_graph')

        ## Validate inputs.
        if k is not None:
            if not isinstance(k, int):
                raise ValueError("Input 'k' must be an integer.")

            if k <= 0:
                raise ValueError("Input 'k' must be larger than 0.")

        if radius is not None:
            if not isinstance(radius, (int, float)):
                raise ValueError("Input 'radius' must be an integer or float.")

            if radius < 0:
                raise ValueError("Input 'radius' must be non-negative.")

        ## Set k and radius to special values to indicate 'None'
        if k is None:
            k = -1

        if radius is None:
            radius = -1.0

        opts = {
            'model': self.__proxy__,
            'model_name': self.__name__,
            'k': k,
            'radius': radius,
            'include_self_edges': include_self_edges
        }

        result = _graphlab.toolkits._main.run(
            '_nearest_neighbors.similarity_graph', opts, verbose)

        knn = _SFrame(None, _proxy=result['neighbors'])

        if output_type == "SFrame":
            return knn

        else:
            sg = _SGraph(edges=knn,
                         src_field='query_label',
                         dst_field='reference_label')
            return sg
    def similarity_graph(self, k=5, radius=None, include_self_edges=False,
                         output_type='SGraph', verbose=True):
        """
        Construct the similarity graph on the reference dataset, which is
        already stored in the model. This is conceptually very similar to
        running `query` with the reference set, but this method is optimized for
        the purpose, syntactically simpler, and automatically removes
        self-edges.

        Parameters
        ----------
        k : int, optional
            Maximum number of neighbors to return for each point in the dataset.
            Setting this to ``None`` deactivates the constraint, so that all
            neighbors are returned within ``radius`` of a given point.

        radius : float, optional
            For a given point, only neighbors within this distance are returned.
            The default is ``None``, in which case the ``k`` nearest neighbors
            are returned for each query point, regardless of distance.

        include_self_edges : bool, optional
            For most distance functions, each point in the model's reference
            dataset is its own nearest neighbor. If this parameter is set to
            False, this result is ignored, and the nearest neighbors are
            returned *excluding* the point itself.

        output_type : {'SGraph', 'SFrame'}, optional
            By default, the results are returned in the form of an SGraph, where
            each point in the reference dataset is a vertex and an edge A -> B
            indicates that vertex B is a nearest neighbor of vertex A. If
            'output_type' is set to 'SFrame', the output is in the same form as
            the results of the 'query' method: an SFrame with columns indicating
            the query label (in this case the query data is the same as the
            reference data), reference label, distance between the two points,
            and the rank of the neighbor.

        verbose : bool, optional
            If True, print progress updates and model details.

        Returns
        -------
        out : SFrame or SGraph
            The type of the output object depends on the 'output_type'
            parameter. See the parameter description for more detail.

        Notes
        -----
        - If both ``k`` and ``radius`` are set to ``None``, each data point is
          matched to the entire dataset. If the reference dataset has
          :math:`n` rows, the output is an SFrame with :math:`n^2` rows (or an
          SGraph with :math:`n^2` edges).

        - For models created with the 'lsh' method, the output similarity graph
          may have fewer vertices than there are data points in the original
          reference set. Because LSH is an approximate method, a query point may
          have fewer than 'k' neighbors. If LSH returns no neighbors at all for
          a query and self-edges are excluded, the query point is omitted from
          the results.

        Examples
        --------
        First construct an SFrame and create a nearest neighbors model:

        >>> sf = graphlab.SFrame({'x1': [0.98, 0.62, 0.11],
        ...                       'x2': [0.69, 0.58, 0.36]})
        ...
        >>> model = graphlab.nearest_neighbors.create(sf, distance='euclidean')

        Unlike the ``query`` method, there is no need for a second dataset with
        ``similarity_graph``.

        >>> g = model.similarity_graph(k=1)  # an SGraph
        >>> g.show()
        >>> g.edges
        +----------+----------+----------------+------+
        | __src_id | __dst_id |    distance    | rank |
        +----------+----------+----------------+------+
        |    0     |    1     | 0.376430604494 |  1   |
        |    2     |    1     | 0.55542776308  |  1   |
        |    1     |    0     | 0.376430604494 |  1   |
        +----------+----------+----------------+------+
        """
        _mt._get_metric_tracker().track(
            'toolkit.nearest_neighbors.similarity_graph')

        ## Validate inputs.
        if k is not None:
            if not isinstance(k, int):
                raise ValueError("Input 'k' must be an integer.")

            if k <= 0:
                raise ValueError("Input 'k' must be larger than 0.")

        if radius is not None:
            if not isinstance(radius, (int, float)):
                raise ValueError("Input 'radius' must be an integer or float.")

            if radius < 0:
                raise ValueError("Input 'radius' must be non-negative.")


        ## Set k and radius to special values to indicate 'None'
        if k is None:
            k = -1

        if radius is None:
            radius = -1.0

        opts = {'model': self.__proxy__,
                'model_name': self.__name__,
                'k': k,
                'radius': radius,
                'include_self_edges': include_self_edges}

        result = _graphlab.toolkits._main.run('_nearest_neighbors.similarity_graph',
                                              opts, verbose)

        knn = _SFrame(None, _proxy=result['neighbors'])

        if output_type == "SFrame":
            return knn

        else:
            sg = _SGraph(edges=knn, src_field='query_label',
                         dst_field='reference_label')
            return sg