def test_gframe(self): g = SGraph() v = g.vertices self.assertSequenceEqual(v.column_names(), ['__id']) e = g.edges self.assertSequenceEqual(e.column_names(), ['__src_id', '__dst_id']) # Test vertices and edge attributes cannot be modified def set_vertices_empty(g): g.vertices = SFrame() def set_edges_empty(g): g.edges = SFrame() def remove_vertices(g): del g.vertices def remove_edges(g): del g.edges def remove_edge_column(gf, name): del gf[name] self.assertRaises(AttributeError, lambda: remove_vertices(g)) self.assertRaises(AttributeError, lambda: remove_edges(g)) self.assertRaises(AttributeError, lambda: set_vertices_empty(g)) self.assertRaises(AttributeError, lambda: set_edges_empty(g)) # Test gframe operations has the same effect as its sframe+graph equivalent g = SGraph().add_vertices(self.vertices, 'vid').add_edges(self.edges, 'src_id', 'dst_id') v = g.vertices v['id_col'] = v['__id'] e = g.edges e['src_id_col'] = e['__src_id'] e['dst_id_col'] = e['__dst_id'] g2 = SGraph().add_vertices(self.vertices, 'vid').add_edges(self.edges, 'src_id', 'dst_id') new_vdata = g2.get_vertices() new_vdata['id_col'] = new_vdata['__id'] new_edata = g2.get_edges() new_edata['src_id_col'] = new_edata['__src_id'] new_edata['dst_id_col'] = new_edata['__dst_id'] g2 = SGraph().add_vertices(new_vdata, '__id').add_edges(new_edata, '__src_id', '__dst_id') assert_frame_equal(g.get_vertices().to_dataframe().sort('__id').reset_index(drop=True), g2.get_vertices().to_dataframe().sort('__id').reset_index(drop=True)) assert_frame_equal(g.get_edges().to_dataframe().sort(['__src_id', '__dst_id']).reset_index(drop=True), g2.get_edges().to_dataframe().sort(['__src_id', '__dst_id']).reset_index(drop=True)) # check delete a column with exception, and edges is still in a valid state self.assertRaises(KeyError, lambda: remove_edge_column(g.edges, 'badcolumn')) g.edges.head() # test slicing assert_frame_equal(g.edges[:3].to_dataframe(), g.get_edges()[:3].to_dataframe()) assert_frame_equal(g.vertices[:3].to_dataframe(), g.get_vertices()[:3].to_dataframe()) # test add row number e_expected = g.get_edges().to_dataframe() v_expected = g.get_vertices().to_dataframe() e_expected['id'] = range(len(e_expected)) v_expected['id'] = range(len(v_expected))
def test_empty_graph(self): g = SGraph() self.assertEqual(g.summary(), {'num_vertices': 0, 'num_edges': 0}) self.assertEqual(len(g.get_fields()), 3) self.assertTrue(g.get_vertices(format='sframe').shape, (0, 1)) self.assertTrue(g.get_edges(format='sframe').shape, (0, 2)) self.assertTrue(g.vertices.shape, (0, 1)) self.assertTrue(g.edges.shape, (0, 2)) self.assertTrue(len(g.get_vertices(format='list')) == 0) self.assertTrue(len(g.get_edges(format='list')) == 0)
def test_simple_graph(self): for input_type in [pd.DataFrame, SFrame, list]: g = SGraph() if input_type is list: vertices = [Vertex(x[1]['vid'], {'color': x[1]['color'], 'vec': x[1]['vec']}) for x in self.vertices.iterrows()] edges = [Edge(x[1]['src_id'], x[1]['dst_id'], {'weight': x[1]['weight']}) for x in self.edges.iterrows()] g = g.add_vertices(vertices) g = g.add_edges(edges) else: g = g.add_vertices(input_type(self.vertices), vid_field='vid') g = g.add_edges(input_type(self.edges), src_field='src_id', dst_field='dst_id') self.assertEqual(g.summary(), {'num_vertices': 4, 'num_edges': 3}) self.assertItemsEqual(g.get_fields(), ['__id', '__src_id', '__dst_id', 'color', 'vec', 'weight']) self.assertItemsEqual(g.get_vertices(format='dataframe').columns.values, ['color', 'vec']) self.assertItemsEqual(g.get_edges(format='dataframe').columns.values, ['__src_id', '__dst_id', 'weight']) self.assertTrue(g.get_edges(format='dataframe').shape, (3, 3)) self.assertTrue(g.get_vertices(format='dataframe').shape, (4, 3)) self.assertTrue(g.get_vertices(format='dataframe', fields={'color': 'g'}).shape, (1, 2)) self.assertTrue(g.get_edges(format='dataframe', fields={'weight': 0.}).shape, (1, 3)) self.assertItemsEqual(g.get_vertices(format='sframe').column_names(), ['__id', 'color', 'vec']) self.assertItemsEqual(g.get_edges(format='sframe').column_names(), ['__src_id', '__dst_id', 'weight']) self.assertTrue(g.get_edges(format='sframe').shape, (3, 3)) self.assertTrue(g.get_vertices(format='sframe').shape, (4, 3)) self.assertTrue(g.get_vertices(format='sframe', fields={'color': 'g'}).shape, (1, 2)) self.assertTrue(g.get_edges(format='sframe', fields={'weight': 0.}).shape, (1, 3)) vertices = g.get_vertices(format='list') edges = g.get_edges(format='list') self.assertEqual(len(vertices), 4) self.assertEqual(len(edges), 3) # get edges is lazy edges = g.get_edges() self.assertFalse(edges.__is_materialized__())
def test_robust_parse(self): df = pd.DataFrame({'int': [1, 2, 3], 'float': [1., 2., 3.], 'str': ['one', 'two', 'three'], 'nan': [np.nan, np.nan, np.nan], 'sparse_int': [1, 2, np.nan], 'sparse_float': [np.nan, 2., 3.], 'sparse_str': [None, 'two', None] }) g = SGraph().add_vertices(df) self.assertItemsEqual(g.get_fields(), df.columns.tolist() + ['__id', '__src_id', '__dst_id']) df2 = g.get_vertices(format='dataframe') sf = g.get_vertices(format='sframe') for col in df.columns: # potential bug: df2 is missing the 'nan' column. if (col != 'nan'): self.assertItemsEqual(sorted(list(df2[col].dropna())), sorted(list(df[col].dropna()))) self.assertItemsEqual(sorted(list(sf[col].dropna())), sorted(list(df[col].dropna())))
def test_robust_parse(self): df = pd.DataFrame({ 'int': [1, 2, 3], 'float': [1., 2., 3.], 'str': ['one', 'two', 'three'], 'nan': [np.nan, np.nan, np.nan], 'sparse_int': [1, 2, np.nan], 'sparse_float': [np.nan, 2., 3.], 'sparse_str': [None, 'two', None] }) g = SGraph().add_vertices(df) self.assertItemsEqual( g.get_fields(), df.columns.tolist() + ['__id', '__src_id', '__dst_id']) df2 = g.get_vertices(format='dataframe') sf = g.get_vertices(format='sframe') for col in df.columns: # potential bug: df2 is missing the 'nan' column. if (col != 'nan'): self.assertItemsEqual(sorted(list(df2[col].dropna())), sorted(list(df[col].dropna()))) self.assertItemsEqual(sorted(list(sf[col].dropna())), sorted(list(df[col].dropna())))
def test_simple_graph(self): for input_type in [pd.DataFrame, SFrame, list]: g = SGraph() if input_type is list: vertices = [ Vertex(x[1]['vid'], { 'color': x[1]['color'], 'vec': x[1]['vec'] }) for x in self.vertices.iterrows() ] edges = [ Edge(x[1]['src_id'], x[1]['dst_id'], {'weight': x[1]['weight']}) for x in self.edges.iterrows() ] g = g.add_vertices(vertices) g = g.add_edges(edges) else: g = g.add_vertices(input_type(self.vertices), vid_field='vid') g = g.add_edges(input_type(self.edges), src_field='src_id', dst_field='dst_id') self.assertEqual(g.summary(), {'num_vertices': 4, 'num_edges': 3}) self.assertItemsEqual( g.get_fields(), ['__id', '__src_id', '__dst_id', 'color', 'vec', 'weight']) self.assertItemsEqual( g.get_vertices(format='dataframe').columns.values, ['color', 'vec']) self.assertItemsEqual( g.get_edges(format='dataframe').columns.values, ['__src_id', '__dst_id', 'weight']) self.assertTrue(g.get_edges(format='dataframe').shape, (3, 3)) self.assertTrue(g.get_vertices(format='dataframe').shape, (4, 3)) self.assertTrue( g.get_vertices(format='dataframe', fields={ 'color': 'g' }).shape, (1, 2)) self.assertTrue( g.get_edges(format='dataframe', fields={ 'weight': 0. }).shape, (1, 3)) self.assertItemsEqual( g.get_vertices(format='sframe').column_names(), ['__id', 'color', 'vec']) self.assertItemsEqual( g.get_edges(format='sframe').column_names(), ['__src_id', '__dst_id', 'weight']) self.assertTrue(g.get_edges(format='sframe').shape, (3, 3)) self.assertTrue(g.get_vertices(format='sframe').shape, (4, 3)) self.assertTrue( g.get_vertices(format='sframe', fields={ 'color': 'g' }).shape, (1, 2)) self.assertTrue( g.get_edges(format='sframe', fields={ 'weight': 0. }).shape, (1, 3)) vertices = g.get_vertices(format='list') edges = g.get_edges(format='list') self.assertEqual(len(vertices), 4) self.assertEqual(len(edges), 3) # get edges is lazy edges = g.get_edges() self.assertFalse(edges.__is_materialized__())
def test_gframe(self): g = SGraph() v = g.vertices self.assertSequenceEqual(v.column_names(), ['__id']) e = g.edges self.assertSequenceEqual(e.column_names(), ['__src_id', '__dst_id']) # Test vertices and edge attributes cannot be modified def set_vertices_empty(g): g.vertices = SFrame() def set_edges_empty(g): g.edges = SFrame() def remove_vertices(g): del g.vertices def remove_edges(g): del g.edges def remove_edge_column(gf, name): del gf[name] self.assertRaises(AttributeError, lambda: remove_vertices(g)) self.assertRaises(AttributeError, lambda: remove_edges(g)) self.assertRaises(AttributeError, lambda: set_vertices_empty(g)) self.assertRaises(AttributeError, lambda: set_edges_empty(g)) # Test gframe operations has the same effect as its sframe+graph equivalent g = SGraph().add_vertices(self.vertices, 'vid').add_edges(self.edges, 'src_id', 'dst_id') v = g.vertices v['id_col'] = v['__id'] e = g.edges e['src_id_col'] = e['__src_id'] e['dst_id_col'] = e['__dst_id'] g2 = SGraph().add_vertices(self.vertices, 'vid').add_edges(self.edges, 'src_id', 'dst_id') new_vdata = g2.get_vertices() new_vdata['id_col'] = new_vdata['__id'] new_edata = g2.get_edges() new_edata['src_id_col'] = new_edata['__src_id'] new_edata['dst_id_col'] = new_edata['__dst_id'] g2 = SGraph().add_vertices(new_vdata, '__id').add_edges(new_edata, '__src_id', '__dst_id') assert_frame_equal( g.get_vertices().to_dataframe().sort('__id').reset_index( drop=True), g2.get_vertices().to_dataframe().sort('__id').reset_index( drop=True)) assert_frame_equal( g.get_edges().to_dataframe().sort(['__src_id', '__dst_id' ]).reset_index(drop=True), g2.get_edges().to_dataframe().sort(['__src_id', '__dst_id' ]).reset_index(drop=True)) # check delete a column with exception, and edges is still in a valid state self.assertRaises(KeyError, lambda: remove_edge_column(g.edges, 'badcolumn')) g.edges.head() # test slicing assert_frame_equal(g.edges[:3].to_dataframe(), g.get_edges()[:3].to_dataframe()) assert_frame_equal(g.vertices[:3].to_dataframe(), g.get_vertices()[:3].to_dataframe()) # test add row number e_expected = g.get_edges().to_dataframe() v_expected = g.get_vertices().to_dataframe() e_expected['id'] = range(len(e_expected)) v_expected['id'] = range(len(v_expected))