Exemplo n.º 1
0
    def test_parallel_evaluation(self):
        xin = 33
        repeat = 8
        # execute the task bulk using one process to get a baseline
        start_time = time.time()
        glconnect.get_unity().eval_lambda(lambda x: [fib(i) for i in x],
                                          [xin] * repeat)
        single_thread_time = time.time() - start_time
        logging.info("Single thread lambda eval takes %s secs" %
                     single_thread_time)

        # execute the task in parallel
        start_time = time.time()
        ans_list = glconnect.get_unity().parallel_eval_lambda(
            lambda x: fib(x), [xin] * repeat)
        multi_thread_time = time.time() - start_time
        logging.info("Multi thread lambda eval takes %s secs" %
                     multi_thread_time)

        # test the speed up by running in parallel
        nproc = multiprocessing.cpu_count()
        if (nproc > 1 and multi_thread_time > (single_thread_time / 1.5)):
            logging.warning(
                "Slow parallel processing: single thread takes %s secs, multithread on %s procs takes %s secs"
                % (single_thread_time, nproc, multi_thread_time))

        # test accuracy
        ans = fib(xin)
        for a in ans_list:
            self.assertEqual(a, ans)
Exemplo n.º 2
0
 def test_exception(self):
     x = 3
     self.assertRaises(RuntimeError,
                       glconnect.get_unity().eval_lambda, lambda y: x / y,
                       0)
     self.assertRaises(RuntimeError,
                       glconnect.get_unity().parallel_eval_lambda,
                       lambda y: x / y, [0 for i in range(10)])
Exemplo n.º 3
0
 def test_simple_evaluation(self):
     x = 3
     self.assertEqual(glconnect.get_unity().eval_lambda(lambda y: y + x, 0),
                      3)
     self.assertEqual(glconnect.get_unity().eval_lambda(lambda y: y + x, 1),
                      4)
     self.assertEqual(
         glconnect.get_unity().eval_lambda(lambda x: x.upper(), 'abc'),
         'ABC')
     self.assertEqual(
         glconnect.get_unity().eval_lambda(lambda x: x.lower(), 'ABC'),
         'abc')
     self.assertEqual(glconnect.get_unity().eval_lambda(fib, 1), 1)
Exemplo n.º 4
0
    def save(self, location):
        """
        Save the model. The model is saved as a directory which can then be
        loaded using the :py:func:`~turicreate.load_model` method.

        Parameters
        ----------
        location : string
            Target destination for the model. Can be a local path or remote URL.

        See Also
        ----------
        turicreate.load_model

        Examples
        ----------
        >>> model.save('my_model_file')
        >>> loaded_model = tc.load_model('my_model_file')

        """
        import copy
        state = copy.copy(self._get_native_state())
        state['model_version'] = self._get_version()
        return glconnect.get_unity().save_model2(self.__class__._native_name(),
                                                 location, state)
Exemplo n.º 5
0
    def test_degree_count(self):
        if "degree_count" in get_unity().list_toolkit_functions():
            m = tc.degree_counting.create(self.graph)
            m.summary()
            self.__test_model_save_load_helper__(m)

            g = m.graph
            expected_out_deg = g.edges.groupby(
                '__src_id', {'expected': tc.aggregate.COUNT})
            expected_out_deg = expected_out_deg.join(g.vertices[['__id']],
                                                     on={
                                                         '__src_id': "__id"
                                                     },
                                                     how="right").fillna(
                                                         "expected", 0)
            expected_out_deg = expected_out_deg.sort("__src_id")['expected']
            expected_in_deg = g.edges.groupby('__dst_id',
                                              {'expected': tc.aggregate.COUNT})
            expected_in_deg = expected_in_deg.join(g.vertices[['__id']],
                                                   on={
                                                       '__dst_id': "__id"
                                                   },
                                                   how="right").fillna(
                                                       "expected", 0)
            expected_in_deg = expected_in_deg.sort("__dst_id")['expected']

            sf = g.vertices.sort('__id')
            actual_out_deg = sf['out_degree']
            actual_in_deg = sf['in_degree']
            actual_all_deg = sf['total_degree']
            self.assertEqual((expected_in_deg - actual_in_deg).sum(), 0)
            self.assertEqual((expected_out_deg - actual_out_deg).sum(), 0)
            self.assertEqual(
                (actual_all_deg - (actual_out_deg + actual_in_deg)).sum(), 0)
Exemplo n.º 6
0
 def test_graph_coloring(self):
     if "graph_coloring" in get_unity().list_toolkit_functions():
         m = tc.graph_coloring.create(self.graph)
         print(m)
         m.summary()
         # coloring is non-deterministic, so we cannot verify the result here
         self.__test_model_save_load_helper__(m)
Exemplo n.º 7
0
 def test_triangle_counting(self):
     if "triangle_counting" in get_unity().list_toolkit_functions():
         m = tc.triangle_counting.create(self.graph)
         print(m)
         m.summary()
         self.__test_model_save_load_helper__(m)
         self.assertEqual(m.num_triangles, 934)
Exemplo n.º 8
0
    def test_pagerank(self):
        if "pagerank" in get_unity().list_toolkit_functions():
            m = tc.pagerank.create(self.graph)
            print(m)
            m.summary()
            self.assertEqual((m.pagerank.num_rows(), m.pagerank.num_columns()),
                             (self.graph.summary()['num_vertices'], 3))
            self.assertEqual(int(m.pagerank['pagerank'].sum()), 2727)
            self.__test_model_save_load_helper__(m)

            m2 = tc.pagerank.create(self.graph, reset_probability=0.5)
            print(m2)
            self.assertEqual(
                (m2.pagerank.num_rows(), m2.pagerank.num_columns()),
                (self.graph.summary()['num_vertices'], 3))
            self.assertAlmostEqual(m2.pagerank['pagerank'].sum(),
                                   7087.08,
                                   delta=1e-2)
            with self.assertRaises(Exception):
                assert_frame_equal(m.pagerank.topk('pagerank'),
                                   m2.pagerank.topk('pagerank'))

            pr_out = m2['pagerank']
            with self.assertRaises(Exception):
                assert_frame_equal(m.pagerank.topk('pagerank'),
                                   pr_out.topk('pagerank'))

            self.__test_model_save_load_helper__(m2)
    def test_shortest_path(self):
        if "sssp" in get_unity().list_toolkit_functions():
            m = tc.shortest_path.create(self.graph, source_vid=0)
            print(m)
            m.summary()
            self.__test_model_save_load_helper__(m)

            m2 = tc.shortest_path.create(self.graph, source_vid=0)
            print(m2)
            self.__test_model_save_load_helper__(m2)

            # Test get_path function on a simple chain graph and star graph
            chain_graph = tc.SGraph().add_edges([tc.Edge(i, i + 1) for i in range(10)])
            m3 = tc.shortest_path.create(chain_graph, source_vid=0)
            for i in range(10):
                self.assertSequenceEqual(m3.get_path(i), [(j, float(j)) for j in range(i + 1)])

            star_graph = tc.SGraph().add_edges([tc.Edge(0, i + 1) for i in range(10)])
            m4 = tc.shortest_path.create(star_graph, source_vid=0)
            for i in range(1, 11):
                self.assertSequenceEqual(m4.get_path(i), [(0, 0.0), (i, 1.0)])

            # Test that get_path with the 'show' parameter set to True doesn't
            # break.
            #
            # Showing is problematic when there is actually a browser.
            # This will pause scripts.
            # m4.get_path(i, show=True)

            # Test sssp ignoring the existing distance field
            star_graph.vertices['distance'] = 0
            m5 = tc.shortest_path.create(star_graph, source_vid=0)
            for i in range(1, 11):
                self.assertSequenceEqual(m5.get_path(i), [(0, 0.0), (i, 1.0)])
Exemplo n.º 10
0
    def test_crash_recovery(self):
        import time, sys
        ls = list(range(1000))

        def good_fun(x):
            return x

        def bad_fun(x):
            if (x + 1) % 251 == 0:
                cy_test_utils.force_exit_fun(
                )  # this will force the worker process to exit
            return x

        self.assertRaises(
            RuntimeError, lambda: glconnect.get_unity().parallel_eval_lambda(
                lambda x: bad_fun(x), ls))
        glconnect.get_unity().parallel_eval_lambda(lambda x: good_fun(x), ls)
Exemplo n.º 11
0
 def test_connected_component(self):
     if "connected_component" in get_unity().list_toolkit_functions():
         m = tc.connected_components.create(self.graph)
         print(m)
         m.summary()
         print(m.component_id)
         print(m.component_size)
         self.assertEqual(m.component_size.num_rows(), 1)
         self.__test_model_save_load_helper__(m)
Exemplo n.º 12
0
 def test_kcore(self):
     if "kcore" in get_unity().list_toolkit_functions():
         m = tc.kcore.create(self.graph)
         print(m)
         m.summary()
         biggest_core = m.core_id.groupby('core_id', tc.aggregate.COUNT).topk('Count').head(1)
         self.assertEqual(biggest_core['core_id'][0], 6)
         self.assertEqual(biggest_core['Count'][0], 4492)
         self.__test_model_save_load_helper__(m)
Exemplo n.º 13
0
    def test_label_propagation(self):
        if "label_propagation" in get_unity().list_toolkit_functions():
            g = self.graph.copy()
            num_vertices = len(g.vertices)
            num_classes = 2

            def get_label(vid):
                if vid < 100:
                    return 0
                elif vid > num_vertices - 100:
                    return 1
                else:
                    return None
            g.vertices['label'] = g.vertices['__id'].apply(get_label, int)
            m = tc.label_propagation.create(g, label_field='label')

            m.summary()
            self.__test_model_save_load_helper__(m)

            for row in m.graph.vertices:
                predicted_label = row['predicted_label']
                if predicted_label is None:
                    for k in ['P%d' % i for i in range(num_classes)]:
                        self.assertAlmostEqual(row[k], 1.0 / num_classes)
                else:
                    sum_of_prob = 0.0
                    for k in ['P%d' % i for i in range(num_classes)]:
                        sum_of_prob += row[k]
                        self.assertGreaterEqual(row['P%d' % predicted_label], row[k])
                    self.assertAlmostEqual(sum_of_prob, 1.0)

            # Add more options: weighted edges, change self weight, and undirected edges
            def get_edge_weight(vid):
                return float(vid) * 10 / num_vertices
            g.edges['weight'] = g.edges['__src_id'].apply(get_edge_weight, float)
            m = tc.label_propagation.create(g, label_field='label', threshold=1e-2,
                                            weight_field='weight', self_weight=0.5,
                                            undirected=True)

            # Test early termination using max_iteration
            max_iter = 3
            m = tc.label_propagation.create(g, label_field='label', threshold=1e-10, max_iterations=max_iter)
            self.assertEqual(m.num_iterations, max_iter)

            # Test that the predict class should be None if all class probabilities are equal
            g = g.add_vertices(tc.SFrame({'__id': [-1]}))
            m = tc.label_propagation.create(g, label_field='label', threshold=1e-10, max_iterations=max_iter)
            result = m.graph.vertices
            self.assertEquals(result[result['__id'] == -1]['predicted_label'][0], None)
Exemplo n.º 14
0
    def save(self, location):
        """
        Save the model. The model is saved as a directory which can then be
        loaded using the :py:func:`~turicreate.load_model` method.

        Parameters
        ----------
        location : string
            Target destination for the model. Can be a local path or remote URL.

        See Also
        ----------
        turicreate.load_model

        Examples
        ----------
        >>> model.save('my_model_file')
        >>> loaded_model = turicreate.load_model('my_model_file')
        """
        return glconnect.get_unity().save_model(self, _make_internal_url(location))
Exemplo n.º 15
0
def load_model(location):
    """
    Load any Turi Create model that was previously saved.

    This function assumes the model (can be any model) was previously saved in
    Turi Create model format with model.save(filename).

    Parameters
    ----------
    location : string
        Location of the model to load. Can be a local path or a remote URL.
        Because models are saved as directories, there is no file extension.

    Examples
    ----------
    >>> model.save('my_model_file')
    >>> loaded_model = tc.load_model('my_model_file')
    """

    # Check if the location is a dir_archive, if not, use glunpickler to load
    # as pure python model
    # If the location is a http location, skip the check, and directly proceed
    # to load model as dir_archive. This is because
    # 1) exists() does not work with http protocol, and
    # 2) GLUnpickler does not support http
    protocol = file_util.get_protocol(location)
    dir_archive_exists = False
    if protocol == '':
        model_path = file_util.expand_full_path(location)
        dir_archive_exists = file_util.exists(
            os.path.join(model_path, 'dir_archive.ini'))
    else:
        model_path = location
        if protocol in ['http', 'https']:
            dir_archive_exists = True
        else:
            import posixpath
            dir_archive_exists = file_util.exists(
                posixpath.join(model_path, 'dir_archive.ini'))
    if not dir_archive_exists:
        raise IOError("Directory %s does not exist" % location)

    _internal_url = _make_internal_url(location)
    saved_state = glconnect.get_unity().load_model(_internal_url)
    # The archive version could be both bytes/unicode
    key = u'archive_version'
    archive_version = saved_state[key] if key in saved_state else saved_state[
        key.encode()]
    if archive_version < 0:
        raise ToolkitError("File does not appear to be a Turi Create model.")
    elif archive_version > 1:
        raise ToolkitError(
            "Unable to load model.\n\n"
            "This model looks to have been saved with a future version of Turi Create.\n"
            "Please upgrade Turi Create before attempting to load this model file."
        )
    elif archive_version == 1:
        cls = MODEL_NAME_MAP[saved_state['model_name']]
        if 'model' in saved_state:
            # this is a native model
            return cls(saved_state['model'])
        else:
            # this is a CustomModel
            model_data = saved_state['side_data']
            model_version = model_data['model_version']
            del model_data['model_version']
            return cls._load_version(model_data, model_version)
    else:
        # very legacy model format. Attempt pickle loading
        import sys
        sys.stderr.write(
            "This model was saved in a legacy model format. Compatibility cannot be guaranteed in future versions.\n"
        )
        if _six.PY3:
            raise ToolkitError(
                "Unable to load legacy model in Python 3.\n\n"
                "To migrate a model, try loading it using Turi Create 4.0 or\n"
                "later in Python 2 and then re-save it. The re-saved model should\n"
                "work in Python 3.")

        if 'graphlab' not in sys.modules:
            sys.modules['graphlab'] = sys.modules['turicreate']
            # backward compatibility. Otherwise old pickles will not load
            sys.modules["turicreate_util"] = sys.modules['turicreate.util']
            sys.modules["graphlab_util"] = sys.modules['turicreate.util']

            # More backwards compatibility with the turicreate namespace code.
            for k, v in list(sys.modules.items()):
                if 'turicreate' in k:
                    sys.modules[k.replace('turicreate', 'graphlab')] = v
        #legacy loader
        import pickle
        model_wrapper = pickle.loads(saved_state[b'model_wrapper'])
        return model_wrapper(saved_state[b'model_base'])