def setUp(self): """auto init""" metadata = load_from_file("starting_db.yaml") self.oschema = OriginSchema(metadata.tables) self.mapper = Mapper() self.mapper.load_mapfile('testmap.yaml') self.mapper.validate_map(metadata.tables) self.oschema.check_connective()
def setUp(self): metadata = load_from_file("starting_db.yaml") oschema = OriginSchema(metadata.tables) oschema.check_connective() self.mapper = Mapper() self.mapper.load_mapfile('testmap.yaml') self.mapper.validate_map(metadata.tables) oschema.recognize_type(self.mapper) oschema.handle_alias() attr_path, _ = oschema.gen_attr_links(self.mapper) self.simschemas = oschema.gen_simschema() for simschema in self.simschemas: simschema.update_nodelist()
def setUp(self): """default""" metadata = load_from_file("starting_db.yaml") self.handler = SchemaHandler(metadata.tables) self.mapper = Mapper() self.mapper.load_mapfile('testmap.yaml') self.mapper.validate_map(metadata.tables) self.handler._schema.recognize_type(self.mapper) self.handler._schema.handle_alias() self.handler._schema.gen_attr_links(self.mapper) simschemas = self.handler._schema.gen_simschema() for simschema in simschemas: simschema.update_nodelist() graph = simschema.get_wgraph_from_schema() self.handler.subconstructors.append([ConstructQuery(graph, \ weighted=True)]) nodes = set(range(len(simschema.ordered))) self.handler.subnodes.append([nodes]) self.handler._simschemas = simschemas
def setUp(self): """initialize test sqlite db then metadata""" if os.path.exists('unittest.db'): os.unlink('unittest.db') test_db = UnittestDB() self.metadata = test_db.load_with_fake_data('oracle.sql', 'sqlite:///unittest.db') self.handler = SchemaHandler(self.metadata.tables) self.mapper = Mapper() self.mapper.load_mapfile('map.yaml') self.mapper.validate_map(self.metadata.tables) self.handler._schema.recognize_type(self.mapper) self.handler._schema.handle_alias() self.handler._schema.gen_attr_links(self.mapper) simschemas = self.handler._schema.gen_simschema() for simschema in simschemas: simschema.update_nodelist() graph = simschema.get_wgraph_from_schema() self.handler.subconstructors.append([ConstructQuery(graph, \ weighted=True)]) nodes = set(range(len(simschema.ordered))) self.handler.subnodes.append([nodes]) self.handler._simschemas = simschemas
def main(): """ main view original schema graph view simulate schema graph specify indirect map, such as dataset.parent view basic cycles in simulate schema graph view core ambiguous graph generate from simulate schema graph specify split file view the splition result based on the split file """ usage = "usage: %prog -u <database link> -m <mapfile> \n" usage += " --view_origin \n" usage += " --view_simulate \n" usage += " --alias_mapfile=<mapfile with alias tables> \n" usage += " --view_basic_cycles \n" usage += " --view_core_graph \n" usage += " --divide_by=<file specify the splition of core graph> \n" usage += " --view_splition \n" parser = OptionParser(usage=usage, version="%prog 1.0") parser.add_option("-u", "--url", action="store", type="string", dest="url", help="input database connect url") parser.add_option("-m", "--mapfile", action="store", type="string", dest="mapfile", help="input registration yaml map file") parser.add_option("-o", "--view_origin", action="store_true", dest="view_origin", help="view origin schema graph") parser.add_option("-s", "--view_simulate", action="store_true", dest="view_simulate", help="view simulate schema graph") parser.add_option("-a", "--alias_mapfile", action="store", type="string", dest="alias_mapfile", help="input mapfile based on aliased table") parser.add_option("-b", "--view_basic_cycles", action="store_true", dest="view_basics", help="view basic cycles on simulate schema graph") parser.add_option("-c", "--view_core_graph", action="store_true", dest="view_core", help="view core ambiguous graph on simulate graph") parser.add_option("-d", "--divide_by", action="store", type="string", dest="split_file", help="input split file and do dividing") parser.add_option("-r", "--view_splition", action="store_true", dest="view_splition", help="view splition graph") url = "" mapper = Mapper() # mapfile = os.path.join(os.path.dirname(sys.argv[0]), 'map.yaml') mapfile = "" splition_file = None (options, _) = parser.parse_args() if not options.url: parser.error("database url is needed to do schema review") url = options.url dbmanager = DBManager() dbmanager.connect(url) dbalias = dbmanager.get_alias(url) tables = dbmanager.load_tables(dbalias) originschema = None simschema = None originschema = OriginSchema(dbmanager.db_tables[dbalias]) if not originschema.check_connective(): print "schema graph is not connective" # raise Exception('Schema graph is not connective') if options.view_origin: write_original_schema_graph(originschema, "original_schema") check_call(['dot', '-Tpng', 'original_schema.dot', '-o', \ 'original_schema.png']) if not options.mapfile: print "further review need mapfile" return mapfile = options.mapfile mapper.load_mapfile(mapfile) if dbmanager.db_type[dbalias] == 'mysql':# for mysql case sensitive mapper.set_sens(True) if not mapper.validate_map(tables): raise Exception("conflicts between map file and schema") originschema.recognize_type(mapper) originschema.handle_alias() originschema.gen_attr_links(mapper) load_statistics(dbmanager, dbalias, originschema) if options.alias_mapfile: mapper.load_mapfile(options.alias_mapfile) simschemas = originschema.gen_simschema() for simschema in simschemas: simschema.update_nodelist() originschema.recognize_shortcut() if options.view_simulate: write_original_schema_graph(originschema, "simschema0") check_call(['dot', '-Tpng', 'simschema0.dot', '-o', \ 'simschema.png']) for idx in range(len(simschemas)): simschema = simschemas[idx] fname = 'simschema%d.png' % idx if len(simschema.nodelist) > 1: write_simulate_schema_graph(simschema, 'simschema1') check_call(['dot', '-Tpng', 'simschema1.dot', '-o', fname]) if options.view_basics: for idx in range(len(simschemas)): simschema = simschemas[idx] if len(simschema.nodelist) < 2: continue if not write_basics_cycles(simschema, "basic_cycle%d" % idx): continue fname = 'basic_cycle%d_pack.png' % idx p1 = Popen(["dot"] + glob.glob('basic_cycle%d_*.dot' % idx), \ stdout=PIPE) p2 = Popen(["gvpack"], stdin=p1.stdout, stdout=PIPE) p3 = Popen(['dot', '-Tpng', '-o', fname], \ stdin=p2.stdout, stdout=PIPE) p1.stdout.close() # Allow p1 to receive a SIGPIPE if p2 exits. p2.stdout.close() output = p3.communicate()[0] if options.view_core: for idx in range(len(simschemas)): simschema = simschemas[idx] if len(simschema.nodelist) < 2: continue write_core_wgraph(simschema) fname = 'coreschema%d.png' % idx check_call(['dot', '-Tpng', 'coreschema.dot', '-o', fname]) if options.split_file: splition_file = options.split_file if options.view_splition: for idx in range(len(simschemas)): simschema = simschemas[idx] if len(simschema.nodelist) < 2: continue write_splition(simschema, splition_file, "Subgraph%d" % idx) fname = 'subgraph%d_pack.png' % idx p1 = Popen(["dot"] + glob.glob('Subgraph%d_*.dot' % idx), \ stdout=PIPE) p2 = Popen(["gvpack"], stdin=p1.stdout, stdout=PIPE) p3 = Popen(['dot', '-Tpng', '-o', fname], \ stdin=p2.stdout, stdout=PIPE) p1.stdout.close() # Allow p1 to receive a SIGPIPE if p2 exits. p2.stdout.close() output = p3.communicate()[0]
class QueryBuilder(): """Application QueryBuilder""" def __init__(self, verbose = 0): """initialize""" class MySchema(object): """class encapsulate tables structure""" def __init__(self, tables=None): """initialize """ self.tables = tables def set_tables(self, tables): """set tables""" self.tables = tables def is_loaded(self): """tables loaded""" if self.tables != None: return 1 return 0 self.schema = MySchema() self.querybuilder = None self.mapper = None self.keywords = [] def is_ready(self): """check schema/mapfile is loaded""" if self.querybuilder != None: if self.schema.is_loaded() : if self.mapper != None : if self.mapper.is_ready(): return 1 print "map file is not loaded" return 0 print "schema is not loaded" return 0 def set_mapper(self, mapfile='map.yaml'): """set mapper""" self.mapper = Mapper() self.mapper.load_mapfile(mapfile) def get_mapper(self): """get mapper""" return self.mapper def set_from_tables(self, tables): """set querybuilder from tables""" self.schema.set_tables(tables) self.querybuilder = SchemaHandler(tables) def set_from_files(self, schema_file): """set querybuilder from schema file""" metadata = MetaData() tables = None metadata = load_from_file(schema_file) tables = metadata.tables # self.schema = metadata self.schema.set_tables(tables) self.querybuilder = SchemaHandler(tables) def recognize_schema(self, dbmanager=None, alias=None): """recognize schema""" self.querybuilder.recognize_schema(self.mapper, dbmanager, alias) def parse_input(self, in_puts): """parse input""" return query_parser(self.mapper, in_puts) def generate_sqlalchemy_clauses(self, query, keylist): """generate sqlalcemy query""" if query is None: return None return self.querybuilder.gen_clauses(query, keylist) def get_attr_path(self): """ get attribute path """ return self.querybuilder.get_attr_path() def build_query(self, query): """ build query for dbsh """ query, keylist = self.parse_input(query) if query is None: _LOGGER.debug("""query is not valid""") return None whereclause = self.generate_sqlalchemy_clauses(query, keylist) _LOGGER.debug("""clause is: %s""" % str(whereclause)) query = self.querybuilder.build_query(whereclause, keylist) _LOGGER.debug("""build query is: %s""" % str(query)) return query
def set_mapper(self, mapfile='map.yaml'): """set mapper""" self.mapper = Mapper() self.mapper.load_mapfile(mapfile)
class TestTSchema(unittest.TestCase): """Test TSchema""" def setUp(self): metadata = load_from_file("starting_db.yaml") oschema = OriginSchema(metadata.tables) oschema.check_connective() self.mapper = Mapper() self.mapper.load_mapfile('testmap.yaml') self.mapper.validate_map(metadata.tables) oschema.recognize_type(self.mapper) oschema.handle_alias() attr_path, _ = oschema.gen_attr_links(self.mapper) self.simschemas = oschema.gen_simschema() for simschema in self.simschemas: simschema.update_nodelist() def test_get_wgraph_from_schema(self): """test get wgraph from schema""" graph1 = [[(5, 0), (4, 0)], [(4, 0)], [], [(4, 0)], [(2, 0)], [(4, 0)]] graph2 = self.simschemas[0].get_wgraph_from_schema() self.assertEqual(len(graph1), len(graph2)) for idx in range(len(graph1)): self.assertEqual(graph1[idx].sort(), graph2[idx].sort()) def test_get_graph_from_schema(self): """test get graph from schema""" graph1 = [[5, 4], [4], [], [4], [2], [4]] graph2 = self.simschemas[0].get_graph_from_schema() self.assertEqual(len(graph1), len(graph2)) for idx in range(len(graph1)): self.assertEqual(graph1[idx].sort(), graph2[idx].sort()) def test_write_graph(self): """test write graph""" dot = DotGraph(file("z.dot","w")) self.simschemas[0].write_graph(dot) def test_get_cycle_basis(self): """test get basic cycles""" simschema = self.simschemas[0] relations = simschema.get_graph_from_schema() graph = Graph(relations) ugraph = graph.get_undirected() cycles2 = simschema.get_cycle_basis(ugraph._graph) cycles2 = cycles2[0] cycles = ['Files', 'ProcessedDataset', 'Block'] cycles1 = [\ simschema.ordered.index(simschema.nodelist[node.lower()]) for node in cycles] cycles1.sort() cycles2.sort() self.assertEqual(len(cycles1), len(cycles2)) for idx in range(len(cycles1)): self.assertEqual(cycles1[idx], cycles2[idx]) def test_write_cyclic_graph(self): """test write cyclic graph""" dot = DotGraph(file("z.dot","w")) self.simschemas[0].write_cyclic_graph(dot, "basic_cycles") def test_gen_subgraph(self): """test generating subgraph""" pass
class TestOriginSchema(unittest.TestCase): """Test OriginSchema""" def setUp(self): """auto init""" metadata = load_from_file("starting_db.yaml") self.oschema = OriginSchema(metadata.tables) self.mapper = Mapper() self.mapper.load_mapfile('testmap.yaml') self.mapper.validate_map(metadata.tables) self.oschema.check_connective() def test_yaml_graph(self): """test create schema from yaml make a dotgraph output via query_builder""" oschema = self.oschema dot = DotGraph(file("z.dot","w")) oschema.write_graph(dot) self.assertTrue(12 == len(oschema.nodelist)) # def test_check_connective(self): # """test check connectives """ # self.oschema.check_connective() def test_recognize_type(self): """test recognize three type kinds of nodes""" oschema = self.oschema oschema.recognize_type(self.mapper) # for node in oschema.nodelist: # print oschema.nodelist[node],oschema.nodelist[node].parent # print oschema.v_ent # print oschema.v_rel # print oschema.v_attr self.assertTrue(6 == len(oschema.v_ent)) self.assertTrue(1 == len(oschema.v_rel)) self.assertTrue(3 == len(oschema.v_attr)) def test_handle_alias(self): """test handle alias""" oschema = self.oschema oschema.recognize_type(self.mapper) oschema.handle_alias() self.assertTrue(4 == len(oschema.v_attr)) def test_gen_attr_link(self): """test attribute link generation""" self.oschema.recognize_type(self.mapper) attr_path, _ = self.oschema.gen_attr_links(self.mapper) self.assertEqual('Files.Dataset_ProcessedDataset.ID'.lower(), \ attr_path['dataset.name'][0].name.lower()) def test_gen_simschema(self): """test generate simschema""" oschema = self.oschema oschema.recognize_type(self.mapper) oschema.handle_alias() attr_path, _ = oschema.gen_attr_links(self.mapper) simschemas = oschema.gen_simschema() for simschema in simschemas: simschema.update_nodelist() self.assertEqual(1, len(simschemas))
class TestSchemaHandler(unittest.TestCase): """Test class for Querybuilder""" def setUp(self): """default""" metadata = load_from_file("starting_db.yaml") self.handler = SchemaHandler(metadata.tables) self.mapper = Mapper() self.mapper.load_mapfile('testmap.yaml') self.mapper.validate_map(metadata.tables) self.handler._schema.recognize_type(self.mapper) self.handler._schema.handle_alias() self.handler._schema.gen_attr_links(self.mapper) simschemas = self.handler._schema.gen_simschema() for simschema in simschemas: simschema.update_nodelist() graph = simschema.get_wgraph_from_schema() self.handler.subconstructors.append([ConstructQuery(graph, \ weighted=True)]) nodes = set(range(len(simschema.ordered))) self.handler.subnodes.append([nodes]) self.handler._simschemas = simschemas def tearDown(self): """default """ pass def test_gen_clauses(self): """ test clause generation """ keylist = {'keywords':['dataset.createdate', 'dataset.name'], 'constraints':['file.name'], 'keyset':['ProcessedDataset.CreateDate', 'Files.Path', 'Files.Name']} mquery = {'keywords':[['ProcessedDataset.CreateDate'], ['Files.Path']], 'constraints': [{'value': '123', 'keyword': ['Files.Name', 'file.name'], 'sign': '='}]} clause = self.handler.gen_clauses(mquery, keylist) self.assertEqual(str(clause.left).lower(), 'Files.Name'.lower()) def test_gen_clauses_comp(self): """ test complex clause generation ("Files"."Name" = :Name_1) AND (("Files"."Path" = :Path_1) OR ("Files"."Path" = :Path_2)) """ mquery = {'keywords': [['Files.Path']], 'constraints': [{'value': '123', 'keyword': ['Files.Name', 'file.name'], 'sign': '='}, 'and', [{'value': '456', 'keyword': ['Files.Path', 'dataset'], 'sign': '='}, 'or', {'value': '789', 'keyword': ['Files.Path', 'dataset'], 'sign': '='}]]} keylist = {'keywords':['dataset'], 'constraints':['file.name', 'dataset', 'dataset'], 'keyset':['Files.Name','Files.Path','Files.Path']} clause = self.handler.gen_clauses(mquery, keylist) self.assertEqual(str(clause.clauses[0].left).lower(), \ 'Files.Name'.lower()) def test_single_query(self): """ test single query without constaints """ keylist = {'keywords':['dataset.createdate'], 'constraints':None, 'keyset':['ProcessedDataset.CreateDate'], 'mkeywords':['ProcessedDataset.CreateDate']} whereclause = None query = self.handler.build_query(whereclause, keylist) print query def test_single_query2(self): """ test single query with constraints """ mquery = {'keywords': [['Files.Path']], 'constraints': [{'value': '123', 'keyword': ['Files.Name', 'file.name'], 'sign': '='}, 'and', [{'value': '456', 'keyword': ['Files.Path', 'dataset'], 'sign': '='}, 'or', {'value': '789', 'keyword': ['Files.Path', 'dataset'], 'sign': '='}]]} keylist = {'keywords':['dataset'], 'constraints':['file.name', 'dataset', 'dataset'], 'keyset':['Files.Path','Files.Name','Files.Path','Files.Path'], 'mkeywords':[['Files.Path']]} clause = self.handler.gen_clauses(mquery,keylist) query = self.handler.build_query(clause, keylist) print query def test_single_query3(self): """test query with multiple tables""" mquery = {'keywords': [['Files.Path'], ['ProcAlgo.Algorithm'], ['PrimaryDataset.Name']]} keylist = {'keywords': ['dataset', 'algo', 'primds'], 'constraints': [], 'mkeywords':[['Files.Path'],['ProcAlgo.Algorithm'],['PrimaryDataset.Name']], 'keyset':['Files.Path','ProcAlgo.Algorithm','PrimaryDataset.Name']} clause = self.handler.gen_clauses(mquery,keylist) query = self.handler.build_query(clause, keylist) print query mquery = {'keywords': [['Files.Path'], ['ProcAlgo.Algorithm'], ['PrimaryDataset.Name']], 'constraints': [{'value': '123', 'keyword': ['Files.Name', 'file.name'], 'sign': '='}, 'and', [{'value': '456', 'keyword': ['Files.Path', 'dataset'], 'sign': '='}, 'or', {'value': '789', 'keyword': ['Files.Path', 'dataset'], 'sign': '='}]]} keylist = {'keywords': ['dataset', 'algo', 'primds'], 'constraints': ['file.name', 'dataset', 'dataset'], 'keyset':['Files.Path', 'ProcAlgo.Algorithm','PrimaryDataset.Name', 'Files.Name', 'Files.Path', 'Files.Path'], 'mkeywords':[['Files.Path'], ['ProcAlgo.Algorithm'],['PrimaryDataset.Name']]} clause = self.handler.gen_clauses(mquery, keylist) query = self.handler.build_query(clause, keylist) print query def test_single_query4(self): """test query with manually sqlalchemy select""" process_dataset = self.handler.find_table('ProcessedDataset') primary_dataset = self.handler.find_table('PrimaryDataset') # columns, whereclause=None, from_obj=None, distinct=False, having=None, # correlate=True, prefixes=None, **kwargs select_test = select(columns=[process_dataset.c.CreateDate,primary_dataset.c.Name])\ .correlate(primary_dataset.c.ID == process_dataset.c.PrimaryDataset)\ .where(primary_dataset.c.Name == 'test') keylist = {'keywords': ['dataset.createdate','primds'], 'constraints': ['primds.name'], 'keyset':['ProcessedDataset.CreateDate','PrimaryDataset.Name','PrimaryDataset.Name'], 'mkeywords':[['ProcessedDataset.CreateDate'],['PrimaryDataset.Name']]} query = self.handler.build_query(select_test, keylist) print query
class TestLive(unittest.TestCase): """live test """ def setUp(self): """initialize test sqlite db then metadata""" if os.path.exists('unittest.db'): os.unlink('unittest.db') test_db = UnittestDB() self.metadata = test_db.load_with_fake_data('oracle.sql', 'sqlite:///unittest.db') self.handler = SchemaHandler(self.metadata.tables) self.mapper = Mapper() self.mapper.load_mapfile('map.yaml') self.mapper.validate_map(self.metadata.tables) self.handler._schema.recognize_type(self.mapper) self.handler._schema.handle_alias() self.handler._schema.gen_attr_links(self.mapper) simschemas = self.handler._schema.gen_simschema() for simschema in simschemas: simschema.update_nodelist() graph = simschema.get_wgraph_from_schema() self.handler.subconstructors.append([ConstructQuery(graph, \ weighted=True)]) nodes = set(range(len(simschema.ordered))) self.handler.subnodes.append([nodes]) self.handler._simschemas = simschemas def tearDown(self): """release resources""" self.metadata.bind.engine.dispose() os.unlink('unittest.db') def display_rows(self, rows): """display rows on LOGGER""" for row in rows: continue def test_read_query(self): """test read querys""" metadata = self.metadata process_dataset = self.handler.find_table('ProcessedDataset') data_tier = self.handler.find_table('DataTier') primary_dataset = self.handler.find_table('PrimaryDataset') files = self.handler.find_table('Files') # First try a regular select query. select_test = select([process_dataset.c.Name, data_tier.c.Name, primary_dataset.c.CreatedBy]).where(process_dataset.c.ID == 0) print "regular select ", select_test results = select_test.execute() rows = results.fetchall() self.display_rows(rows) # Then use our software to modify one. keylist = { 'keywords': ['dataset', 'tier','primds.createby'], 'constraints':[], 'keyset':['ProcessedDataset.Name','DataTier.Name','PrimaryDataset.CreatedBy'], 'mkeywords':[['ProcessedDataset.Name'],['DataTier.Name'],['PrimaryDataset.CreatedBy']] } query = self.handler.build_query(None, keylist) print "modified select", query select_clause = query results = select_clause.execute() rows = results.fetchall() self.display_rows(rows) self.assertEqual(len(rows[0]), 3)
def main(): usage = "usage: %prog -m mapfile \n" usage += " -s databaselink -a alias_mapfile\n" parser = OptionParser(usage=usage, version="%prog 1.0") parser.add_option("-m", "--mapfile", action="store", type="string", dest="mapfile", help="input registration yaml map file") parser.add_option("-s", "--source", action="store", type="string", dest="source", help="input validate database source link") parser.add_option("-a", "--alias_mapfile", action="store", type="string", dest="alias_mapfile", help="input mapfile based on aliased table") mapper = Mapper() mapfile = None dbmanager = None source = None originschema = None simschema = None (options, args) = parser.parse_args() if options.mapfile: mapfile = options.mapfile if options.source: source = options.source if mapfile == None or source == None: print "mapfile and source is needed" return mapper.load_mapfile(mapfile) dbmanager = DBManager() dbmanager.connect(source) dbalias = dbmanager.get_alias(source) tables = dbmanager.load_tables(dbalias) columns = mapper.list_column() entities = mapper.list_entity() keys = mapper.list_key() edict = {} edict = gendict(mapper) #gen single gen_single(edict) samples = get_sample(mapper, dbmanager, dbalias) # if dbmanager.db_type[dbalias] == 'mysql':# for mysql case sensitive # mapper.set_sens(True) originschema = OriginSchema(dbmanager.db_tables[dbalias]) originschema.check_connective() originschema.recognize_type(mapper) originschema.handle_alias() # print originschema.alias_table samples = get_sample(mapper, dbmanager, dbalias, originschema.alias_table) # print samples originschema.gen_attr_links(mapper) load_statistics(dbmanager, dbalias, originschema) if options.alias_mapfile: mapper.load_mapfile(options.alias_mapfile) edict = gendict(mapper) single = gen_single(edict) writedown(single, 'single.txt') singlec = gen_single_constraints(edict, single, mapper, samples) writedown(singlec, 'singlec.txt') double = gen_double(edict) # p.pprint (double) doublec = gen_double_constraints(edict, double, mapper, samples) writedown(doublec, 'doublec.txt') multiple = gen_multiple(edict, 3) writedown(multiple, 'multiple.txt') multiple = gen_multiple(edict, 4) writedown(multiple, 'multiple4.txt')