def main(): """ main view original schema graph view simulate schema graph specify indirect map, such as dataset.parent view basic cycles in simulate schema graph view core ambiguous graph generate from simulate schema graph specify split file view the splition result based on the split file """ usage = "usage: %prog -u <database link> -m <mapfile> \n" usage += " --view_origin \n" usage += " --view_simulate \n" usage += " --alias_mapfile=<mapfile with alias tables> \n" usage += " --view_basic_cycles \n" usage += " --view_core_graph \n" usage += " --divide_by=<file specify the splition of core graph> \n" usage += " --view_splition \n" parser = OptionParser(usage=usage, version="%prog 1.0") parser.add_option("-u", "--url", action="store", type="string", dest="url", help="input database connect url") parser.add_option("-m", "--mapfile", action="store", type="string", dest="mapfile", help="input registration yaml map file") parser.add_option("-o", "--view_origin", action="store_true", dest="view_origin", help="view origin schema graph") parser.add_option("-s", "--view_simulate", action="store_true", dest="view_simulate", help="view simulate schema graph") parser.add_option("-a", "--alias_mapfile", action="store", type="string", dest="alias_mapfile", help="input mapfile based on aliased table") parser.add_option("-b", "--view_basic_cycles", action="store_true", dest="view_basics", help="view basic cycles on simulate schema graph") parser.add_option("-c", "--view_core_graph", action="store_true", dest="view_core", help="view core ambiguous graph on simulate graph") parser.add_option("-d", "--divide_by", action="store", type="string", dest="split_file", help="input split file and do dividing") parser.add_option("-r", "--view_splition", action="store_true", dest="view_splition", help="view splition graph") url = "" mapper = Mapper() # mapfile = os.path.join(os.path.dirname(sys.argv[0]), 'map.yaml') mapfile = "" splition_file = None (options, _) = parser.parse_args() if not options.url: parser.error("database url is needed to do schema review") url = options.url dbmanager = DBManager() dbmanager.connect(url) dbalias = dbmanager.get_alias(url) tables = dbmanager.load_tables(dbalias) originschema = None simschema = None originschema = OriginSchema(dbmanager.db_tables[dbalias]) if not originschema.check_connective(): print "schema graph is not connective" # raise Exception('Schema graph is not connective') if options.view_origin: write_original_schema_graph(originschema, "original_schema") check_call(['dot', '-Tpng', 'original_schema.dot', '-o', \ 'original_schema.png']) if not options.mapfile: print "further review need mapfile" return mapfile = options.mapfile mapper.load_mapfile(mapfile) if dbmanager.db_type[dbalias] == 'mysql':# for mysql case sensitive mapper.set_sens(True) if not mapper.validate_map(tables): raise Exception("conflicts between map file and schema") originschema.recognize_type(mapper) originschema.handle_alias() originschema.gen_attr_links(mapper) load_statistics(dbmanager, dbalias, originschema) if options.alias_mapfile: mapper.load_mapfile(options.alias_mapfile) simschemas = originschema.gen_simschema() for simschema in simschemas: simschema.update_nodelist() originschema.recognize_shortcut() if options.view_simulate: write_original_schema_graph(originschema, "simschema0") check_call(['dot', '-Tpng', 'simschema0.dot', '-o', \ 'simschema.png']) for idx in range(len(simschemas)): simschema = simschemas[idx] fname = 'simschema%d.png' % idx if len(simschema.nodelist) > 1: write_simulate_schema_graph(simschema, 'simschema1') check_call(['dot', '-Tpng', 'simschema1.dot', '-o', fname]) if options.view_basics: for idx in range(len(simschemas)): simschema = simschemas[idx] if len(simschema.nodelist) < 2: continue if not write_basics_cycles(simschema, "basic_cycle%d" % idx): continue fname = 'basic_cycle%d_pack.png' % idx p1 = Popen(["dot"] + glob.glob('basic_cycle%d_*.dot' % idx), \ stdout=PIPE) p2 = Popen(["gvpack"], stdin=p1.stdout, stdout=PIPE) p3 = Popen(['dot', '-Tpng', '-o', fname], \ stdin=p2.stdout, stdout=PIPE) p1.stdout.close() # Allow p1 to receive a SIGPIPE if p2 exits. p2.stdout.close() output = p3.communicate()[0] if options.view_core: for idx in range(len(simschemas)): simschema = simschemas[idx] if len(simschema.nodelist) < 2: continue write_core_wgraph(simschema) fname = 'coreschema%d.png' % idx check_call(['dot', '-Tpng', 'coreschema.dot', '-o', fname]) if options.split_file: splition_file = options.split_file if options.view_splition: for idx in range(len(simschemas)): simschema = simschemas[idx] if len(simschema.nodelist) < 2: continue write_splition(simschema, splition_file, "Subgraph%d" % idx) fname = 'subgraph%d_pack.png' % idx p1 = Popen(["dot"] + glob.glob('Subgraph%d_*.dot' % idx), \ stdout=PIPE) p2 = Popen(["gvpack"], stdin=p1.stdout, stdout=PIPE) p3 = Popen(['dot', '-Tpng', '-o', fname], \ stdin=p2.stdout, stdout=PIPE) p1.stdout.close() # Allow p1 to receive a SIGPIPE if p2 exits. p2.stdout.close() output = p3.communicate()[0]
class TestTSchema(unittest.TestCase): """Test TSchema""" def setUp(self): metadata = load_from_file("starting_db.yaml") oschema = OriginSchema(metadata.tables) oschema.check_connective() self.mapper = Mapper() self.mapper.load_mapfile('testmap.yaml') self.mapper.validate_map(metadata.tables) oschema.recognize_type(self.mapper) oschema.handle_alias() attr_path, _ = oschema.gen_attr_links(self.mapper) self.simschemas = oschema.gen_simschema() for simschema in self.simschemas: simschema.update_nodelist() def test_get_wgraph_from_schema(self): """test get wgraph from schema""" graph1 = [[(5, 0), (4, 0)], [(4, 0)], [], [(4, 0)], [(2, 0)], [(4, 0)]] graph2 = self.simschemas[0].get_wgraph_from_schema() self.assertEqual(len(graph1), len(graph2)) for idx in range(len(graph1)): self.assertEqual(graph1[idx].sort(), graph2[idx].sort()) def test_get_graph_from_schema(self): """test get graph from schema""" graph1 = [[5, 4], [4], [], [4], [2], [4]] graph2 = self.simschemas[0].get_graph_from_schema() self.assertEqual(len(graph1), len(graph2)) for idx in range(len(graph1)): self.assertEqual(graph1[idx].sort(), graph2[idx].sort()) def test_write_graph(self): """test write graph""" dot = DotGraph(file("z.dot","w")) self.simschemas[0].write_graph(dot) def test_get_cycle_basis(self): """test get basic cycles""" simschema = self.simschemas[0] relations = simschema.get_graph_from_schema() graph = Graph(relations) ugraph = graph.get_undirected() cycles2 = simschema.get_cycle_basis(ugraph._graph) cycles2 = cycles2[0] cycles = ['Files', 'ProcessedDataset', 'Block'] cycles1 = [\ simschema.ordered.index(simschema.nodelist[node.lower()]) for node in cycles] cycles1.sort() cycles2.sort() self.assertEqual(len(cycles1), len(cycles2)) for idx in range(len(cycles1)): self.assertEqual(cycles1[idx], cycles2[idx]) def test_write_cyclic_graph(self): """test write cyclic graph""" dot = DotGraph(file("z.dot","w")) self.simschemas[0].write_cyclic_graph(dot, "basic_cycles") def test_gen_subgraph(self): """test generating subgraph""" pass
class TestSchemaHandler(unittest.TestCase): """Test class for Querybuilder""" def setUp(self): """default""" metadata = load_from_file("starting_db.yaml") self.handler = SchemaHandler(metadata.tables) self.mapper = Mapper() self.mapper.load_mapfile('testmap.yaml') self.mapper.validate_map(metadata.tables) self.handler._schema.recognize_type(self.mapper) self.handler._schema.handle_alias() self.handler._schema.gen_attr_links(self.mapper) simschemas = self.handler._schema.gen_simschema() for simschema in simschemas: simschema.update_nodelist() graph = simschema.get_wgraph_from_schema() self.handler.subconstructors.append([ConstructQuery(graph, \ weighted=True)]) nodes = set(range(len(simschema.ordered))) self.handler.subnodes.append([nodes]) self.handler._simschemas = simschemas def tearDown(self): """default """ pass def test_gen_clauses(self): """ test clause generation """ keylist = {'keywords':['dataset.createdate', 'dataset.name'], 'constraints':['file.name'], 'keyset':['ProcessedDataset.CreateDate', 'Files.Path', 'Files.Name']} mquery = {'keywords':[['ProcessedDataset.CreateDate'], ['Files.Path']], 'constraints': [{'value': '123', 'keyword': ['Files.Name', 'file.name'], 'sign': '='}]} clause = self.handler.gen_clauses(mquery, keylist) self.assertEqual(str(clause.left).lower(), 'Files.Name'.lower()) def test_gen_clauses_comp(self): """ test complex clause generation ("Files"."Name" = :Name_1) AND (("Files"."Path" = :Path_1) OR ("Files"."Path" = :Path_2)) """ mquery = {'keywords': [['Files.Path']], 'constraints': [{'value': '123', 'keyword': ['Files.Name', 'file.name'], 'sign': '='}, 'and', [{'value': '456', 'keyword': ['Files.Path', 'dataset'], 'sign': '='}, 'or', {'value': '789', 'keyword': ['Files.Path', 'dataset'], 'sign': '='}]]} keylist = {'keywords':['dataset'], 'constraints':['file.name', 'dataset', 'dataset'], 'keyset':['Files.Name','Files.Path','Files.Path']} clause = self.handler.gen_clauses(mquery, keylist) self.assertEqual(str(clause.clauses[0].left).lower(), \ 'Files.Name'.lower()) def test_single_query(self): """ test single query without constaints """ keylist = {'keywords':['dataset.createdate'], 'constraints':None, 'keyset':['ProcessedDataset.CreateDate'], 'mkeywords':['ProcessedDataset.CreateDate']} whereclause = None query = self.handler.build_query(whereclause, keylist) print query def test_single_query2(self): """ test single query with constraints """ mquery = {'keywords': [['Files.Path']], 'constraints': [{'value': '123', 'keyword': ['Files.Name', 'file.name'], 'sign': '='}, 'and', [{'value': '456', 'keyword': ['Files.Path', 'dataset'], 'sign': '='}, 'or', {'value': '789', 'keyword': ['Files.Path', 'dataset'], 'sign': '='}]]} keylist = {'keywords':['dataset'], 'constraints':['file.name', 'dataset', 'dataset'], 'keyset':['Files.Path','Files.Name','Files.Path','Files.Path'], 'mkeywords':[['Files.Path']]} clause = self.handler.gen_clauses(mquery,keylist) query = self.handler.build_query(clause, keylist) print query def test_single_query3(self): """test query with multiple tables""" mquery = {'keywords': [['Files.Path'], ['ProcAlgo.Algorithm'], ['PrimaryDataset.Name']]} keylist = {'keywords': ['dataset', 'algo', 'primds'], 'constraints': [], 'mkeywords':[['Files.Path'],['ProcAlgo.Algorithm'],['PrimaryDataset.Name']], 'keyset':['Files.Path','ProcAlgo.Algorithm','PrimaryDataset.Name']} clause = self.handler.gen_clauses(mquery,keylist) query = self.handler.build_query(clause, keylist) print query mquery = {'keywords': [['Files.Path'], ['ProcAlgo.Algorithm'], ['PrimaryDataset.Name']], 'constraints': [{'value': '123', 'keyword': ['Files.Name', 'file.name'], 'sign': '='}, 'and', [{'value': '456', 'keyword': ['Files.Path', 'dataset'], 'sign': '='}, 'or', {'value': '789', 'keyword': ['Files.Path', 'dataset'], 'sign': '='}]]} keylist = {'keywords': ['dataset', 'algo', 'primds'], 'constraints': ['file.name', 'dataset', 'dataset'], 'keyset':['Files.Path', 'ProcAlgo.Algorithm','PrimaryDataset.Name', 'Files.Name', 'Files.Path', 'Files.Path'], 'mkeywords':[['Files.Path'], ['ProcAlgo.Algorithm'],['PrimaryDataset.Name']]} clause = self.handler.gen_clauses(mquery, keylist) query = self.handler.build_query(clause, keylist) print query def test_single_query4(self): """test query with manually sqlalchemy select""" process_dataset = self.handler.find_table('ProcessedDataset') primary_dataset = self.handler.find_table('PrimaryDataset') # columns, whereclause=None, from_obj=None, distinct=False, having=None, # correlate=True, prefixes=None, **kwargs select_test = select(columns=[process_dataset.c.CreateDate,primary_dataset.c.Name])\ .correlate(primary_dataset.c.ID == process_dataset.c.PrimaryDataset)\ .where(primary_dataset.c.Name == 'test') keylist = {'keywords': ['dataset.createdate','primds'], 'constraints': ['primds.name'], 'keyset':['ProcessedDataset.CreateDate','PrimaryDataset.Name','PrimaryDataset.Name'], 'mkeywords':[['ProcessedDataset.CreateDate'],['PrimaryDataset.Name']]} query = self.handler.build_query(select_test, keylist) print query
class TestOriginSchema(unittest.TestCase): """Test OriginSchema""" def setUp(self): """auto init""" metadata = load_from_file("starting_db.yaml") self.oschema = OriginSchema(metadata.tables) self.mapper = Mapper() self.mapper.load_mapfile('testmap.yaml') self.mapper.validate_map(metadata.tables) self.oschema.check_connective() def test_yaml_graph(self): """test create schema from yaml make a dotgraph output via query_builder""" oschema = self.oschema dot = DotGraph(file("z.dot","w")) oschema.write_graph(dot) self.assertTrue(12 == len(oschema.nodelist)) # def test_check_connective(self): # """test check connectives """ # self.oschema.check_connective() def test_recognize_type(self): """test recognize three type kinds of nodes""" oschema = self.oschema oschema.recognize_type(self.mapper) # for node in oschema.nodelist: # print oschema.nodelist[node],oschema.nodelist[node].parent # print oschema.v_ent # print oschema.v_rel # print oschema.v_attr self.assertTrue(6 == len(oschema.v_ent)) self.assertTrue(1 == len(oschema.v_rel)) self.assertTrue(3 == len(oschema.v_attr)) def test_handle_alias(self): """test handle alias""" oschema = self.oschema oschema.recognize_type(self.mapper) oschema.handle_alias() self.assertTrue(4 == len(oschema.v_attr)) def test_gen_attr_link(self): """test attribute link generation""" self.oschema.recognize_type(self.mapper) attr_path, _ = self.oschema.gen_attr_links(self.mapper) self.assertEqual('Files.Dataset_ProcessedDataset.ID'.lower(), \ attr_path['dataset.name'][0].name.lower()) def test_gen_simschema(self): """test generate simschema""" oschema = self.oschema oschema.recognize_type(self.mapper) oschema.handle_alias() attr_path, _ = oschema.gen_attr_links(self.mapper) simschemas = oschema.gen_simschema() for simschema in simschemas: simschema.update_nodelist() self.assertEqual(1, len(simschemas))
class TestLive(unittest.TestCase): """live test """ def setUp(self): """initialize test sqlite db then metadata""" if os.path.exists('unittest.db'): os.unlink('unittest.db') test_db = UnittestDB() self.metadata = test_db.load_with_fake_data('oracle.sql', 'sqlite:///unittest.db') self.handler = SchemaHandler(self.metadata.tables) self.mapper = Mapper() self.mapper.load_mapfile('map.yaml') self.mapper.validate_map(self.metadata.tables) self.handler._schema.recognize_type(self.mapper) self.handler._schema.handle_alias() self.handler._schema.gen_attr_links(self.mapper) simschemas = self.handler._schema.gen_simschema() for simschema in simschemas: simschema.update_nodelist() graph = simschema.get_wgraph_from_schema() self.handler.subconstructors.append([ConstructQuery(graph, \ weighted=True)]) nodes = set(range(len(simschema.ordered))) self.handler.subnodes.append([nodes]) self.handler._simschemas = simschemas def tearDown(self): """release resources""" self.metadata.bind.engine.dispose() os.unlink('unittest.db') def display_rows(self, rows): """display rows on LOGGER""" for row in rows: continue def test_read_query(self): """test read querys""" metadata = self.metadata process_dataset = self.handler.find_table('ProcessedDataset') data_tier = self.handler.find_table('DataTier') primary_dataset = self.handler.find_table('PrimaryDataset') files = self.handler.find_table('Files') # First try a regular select query. select_test = select([process_dataset.c.Name, data_tier.c.Name, primary_dataset.c.CreatedBy]).where(process_dataset.c.ID == 0) print "regular select ", select_test results = select_test.execute() rows = results.fetchall() self.display_rows(rows) # Then use our software to modify one. keylist = { 'keywords': ['dataset', 'tier','primds.createby'], 'constraints':[], 'keyset':['ProcessedDataset.Name','DataTier.Name','PrimaryDataset.CreatedBy'], 'mkeywords':[['ProcessedDataset.Name'],['DataTier.Name'],['PrimaryDataset.CreatedBy']] } query = self.handler.build_query(None, keylist) print "modified select", query select_clause = query results = select_clause.execute() rows = results.fetchall() self.display_rows(rows) self.assertEqual(len(rows[0]), 3)