def parse_db(file_name): global DB_FILE_SIZE_IN_BYTES global ROW_ID_COLUMN _adel_log.log("\n############ SQLite PARSER -> " + file_name + " ############ \n", 2) _adel_log.log("parse_db: ----> parsing sqlite3 database file....", 3) # Open the database DB_FILE_SIZE_IN_BYTES = _sqliteFileHandler.open_db(file_name) if DB_FILE_SIZE_IN_BYTES == 0: # file could not be opened correctly return [] # Read first page of database file first_page_hex_string = _sqliteFileHandler.read_page(1) # ensure that read page could retrieve an existing page if (first_page_hex_string == ""): _adel_log.log("parse_db: ERROR - cannot read first page of database", 1) return [] # Parse the database header on the first page (first 100 bytes in the database file) parse_db_header(first_page_hex_string) if HEADER_DATABASE_TEXT_ENCODING > 1: _adel_log.log("parse_db: ERROR - database text encoding " + str(HEADER_DATABASE_TEXT_ENCODING) + " not supported in this version of FSP", 1) return [] # Parse database schema (first page of the database file is root b-tree page for the schema btree) # Database schema is stored in a well defined way (sqlite master table) # CREATE TABLE sqlite_master( # type text, # must be one of the following: ['table', 'index', 'view', 'trigger'] # name text, # tbl_name text, # rootpage integer, # sql text # ); _adel_log.log("\nparseDB: ----> parsing sqlite3 database SCHEMA....", 3) db_schemata = _sqlitePageParser.parse_table_btree_page(first_page_hex_string, 100) # 100 bytes database file header _adel_log.log("parse_db: ----> sqlite3 database SCHEMA parsed", 3) # Initialize the resulting content list result_list = [] final_list = [] # loop through all schemata of the database for db_schema in db_schemata: if len(db_schema) != 5 + 1: # +1 due to manually added leading rowID _adel_log.log("parse_db: WARNING! invalid length of database schema statement entry detected: ", 2) _adel_log.log(str(db_schema), 2) continue # Reset result list for new element result_list = [] # Parse this database element (table, index, view or trigger) if (_helpersStringOperations.starts_with_string(str(db_schema[1]), "TABLE") == 0): # PARSE TABLE STATEMENT # Ensure that we treat a valid schema db_schemata_statement = db_schema[len(db_schema) - 1] if ((db_schemata_statement == None) or (db_schemata_statement == "")): _adel_log.log("parse_db: WARNING! missing database schema statement entry detected, printing schema statement:", 2) _adel_log.log(str(db_schema), 3) continue sql_statement = (db_schema[5]) # db_schema[5] is expected to be the "sql text" as defined in sqlite_master _adel_log.log("\nparseDB: ----> parsing new database structure with SQL statement:", 3) _adel_log.log(str(sql_statement), 3) # Extract and check command (expected to be CREATE) command_tuple = _helpersStringOperations.split_at_first_occurrence(sql_statement, " ") if (len(command_tuple) == 0): _adel_log.log("parse_db: WARNING! invalid sql COMMAND detected, continuing with next database element (e.g. next table)", 2) continue if (_helpersStringOperations.starts_with_string(str(command_tuple[0]), "CREATE") != 0): _adel_log.log("parse_db: WARNING! invalid sql COMMAND detected, expected \"CREATE\" but found: " + str(command_tuple[0]), 2) _adel_log.log(" continuing with next database element (e.g. next table)", 2) continue # Extract and check first command operand (expected to be TEMP, TEMPORARY, TABLE or VIRTUAL TABLE) type_tuple = _helpersStringOperations.split_at_first_occurrence(command_tuple[1], " ") if len(type_tuple) == 0: _adel_log.log("parse_db: WARNING! invalid sql COMMAND TYPE detected, continuing with next database element (e.g. next table)", 2) continue # According to the syntax diagrams of the sqlite SQL create table statement there are TEMP or TEMPORARY key words allowed at this place if (_helpersStringOperations.starts_with_string(str(type_tuple[0]), "TEMP") == 0 or _helpersStringOperations.starts_with_string(str(type_tuple[0]), "TEMPORARY") == 0 or _helpersStringOperations.starts_with_string(str(type_tuple[0]), "VIRTUAL") == 0): # Ignore and proceed with next fragement (must then be TABLE) type_tuple = _helpersStringOperations.split_at_first_occurrence(type_tuple[1], " ") if len(type_tuple) == 0: _adel_log.log("parse_db: WARNING! invalid sql COMMAND TYPE after TEMP(ORARY) detected, continuing with next database element (e.g. next table)", 2) continue # This fragment must be table if (_helpersStringOperations.starts_with_string(str(type_tuple[0]), "TABLE") != 0): _adel_log.log("parse_db: WARNING! invalid sql COMMAND TYPE detected, expected \"TABLE\" but found: " + str(type_tuple[0]), 2) _adel_log.log(" continuing with next database element (e.g. next table)", 2) continue # Extract and check second command operand (expected to be table name) name_tuple = [] next_space = _helpersStringOperations.split_at_first_occurrence(type_tuple[1], " ") next_parenthesis = _helpersStringOperations.split_at_first_occurrence(type_tuple[1], "(") if (next_space < next_parenthesis): # "IF NOT EXISTS" statement possible if (_helpersStringOperations.starts_with_string(str(_helpersStringOperations.crop_whitespace(type_tuple[1])), "IF") == 0): type_tuple[1] = type_tuple[1][2:] if (_helpersStringOperations.starts_with_string(str(_helpersStringOperations.crop_whitespace(type_tuple[1])), "NOT") == 0): type_tuple[1] = type_tuple[1][3:] if (_helpersStringOperations.starts_with_string(str(_helpersStringOperations.crop_whitespace(type_tuple[1])), "EXISTS") == 0): type_tuple[1] = type_tuple[1][6:] type_tuple[1] = _helpersStringOperations.crop_whitespace(type_tuple[1]) # Extract name tuple name_tuple = _helpersStringOperations.split_at_first_occurrence(type_tuple[1], " ") if len(name_tuple) == 0: name_tuple = _helpersStringOperations.split_at_first_occurrence(type_tuple[1], "(") if len(name_tuple) == 0: _adel_log.log("parse_db: WARNING! invalid sql COMMAND TYPE NAME detected, continuing with next database element (e.g. next table)", 2) continue # Append leading opening parenthesis that we cut off before name_tuple[1] = "(" + str(name_tuple[1]) else: # "AS ..." statement possible tmp_string = _helpersStringOperations.crop_whitespace(name_tuple[1]) if (tmp_string.startswith("AS")): _adel_log.log("parse_db: OK - \"AS\" statement detected: " + str(tmp_string), 3) _adel_log.log("parse_db: OK - no data stored, thus continuing with next database element (e.g. next table)", 3) continue else: name_tuple = _helpersStringOperations.split_at_first_occurrence(type_tuple[1], "(") if len(name_tuple) == 0: _adel_log.log("parse_db: WARNING! invalid sql COMMAND TYPE NAME detected, continuing with next database element (e.g. next table)", 2) continue # Append leading opening parenthesis that we cut off before name_tuple[1] = "(" + str(name_tuple[1]) # Now ready to parse TABLE _adel_log.log("parse_db: ----> parsing database structure " + str(type_tuple[0]) + " \"" + str(name_tuple[0]) + "\"", 3) _adel_log.log("parse_db: ----> parsing SQL statement of " + str(type_tuple[0]) + "....", 3) _adel_log.log("parse_db: OK - SQL statement is of type: " + str(command_tuple[0]) + " " + str(type_tuple[0]), 3) # Parse and append sql statement name_tuple[1] = _helpersStringOperations.cut_first_last_exclude(name_tuple[1], "(", ")") result_list.append(parse_sql_statement_params(name_tuple[1])) # Ensure we deal with a real table, virtual tables have no b-tree and thus the b-tree root page pointer is 0 if (db_schema[4] == 0): _adel_log.log("parse_db: OK - this table holds no content (e.g. virtual table), continuing with next database element (e.g. next table)", 3) _adel_log.log("parse_db: ----> database structure " + str(type_tuple[0]) + " \"" + str(name_tuple[0]) + "\" parsed", 3) # Append result from table, index, view or trigger to final list final_list.append(result_list) continue # Parse and append table contents btree_root_page_string = _sqliteFileHandler.read_page(db_schema[4]) # Ensure that read page could retrieve an existing page if (btree_root_page_string == ""): _adel_log.log("parse_db: ERROR - could not refer to b-tree root page: " + str(db_schema[4]), 1) _adel_log.log(" continuing with next database element (e.g. next table)", 1) continue _adel_log.log("parse_db: ----> parsing contents of " + str(type_tuple[0]) + "....", 3) table_contents = _sqlitePageParser.parse_table_btree_page(btree_root_page_string, 0) # Check whether the table contains a dedicated row ID column if (ROW_ID_COLUMN == 0): # Table has no dedicated row ID column, add "rowID" to the table statement (the rowID is already extractet) index_of_last_element_in_result_list = len(result_list) - 1 temp_list = result_list[index_of_last_element_in_result_list] result_list[index_of_last_element_in_result_list] = [["rowID", "INTEGER"]] for element in range(len(temp_list)): result_list[index_of_last_element_in_result_list].append(temp_list[element]) # Append table contents to the result list for row in table_contents: result_list.append(row) else: # Table has a dedicated row ID column (integer primary key column), link values stored as row ID in the b-tree to this column (at the place of this column) # Append table contents to the result list for row in table_contents: # Replace "None" entries in integer primary key column of each row through the actual row ID row[ROW_ID_COLUMN] = row[0] # Delete manually appended row ID column (in parse_sql_statement_params) temp_row = row row = [] for index in range(len(temp_row) - 1): row.append(temp_row[index + 1]) # Append corrected row result_list.append(row) # Append result from table, index, view or trigger to final list final_list.append(result_list) _adel_log.log("parse_db: ----> database structure " + str(type_tuple[0]) + " \"" + str(name_tuple[0]) + "\" parsed", 3) # TODO: comment out the following print statements in productive environment #_adel_log.log("\n_sqliteParser.py:234, parse_db ----> printing database schema for " + str(type_tuple[0]) + " \"" + str(name_tuple[0]) + "\" for test purposes:", 4) #_adel_log.log(str(db_schema[len(db_schema) - 1]), 4) #_adel_log.log("\n_sqliteParser.py:236, parse_db ----> printing database contents for " + str(type_tuple[0]) + " \"" + str(name_tuple[0]) + "\" for test purposes:", 4) #for result in result_list: # _adel_log.log(str(result), 4) # comment out the above print statements in productive environment # PARSE INDEX STATEMENT #if ((str(db_schema[1]) == "INDEX") or (str(db_schema[1]) == "Index") or (str(db_schema[1]) == "index")): # TODO: implement if necessary # IGNORED RIGHT NOW # PARSE VIEW STATEMENT #if ((str(db_schema[1]) == "VIEW") or (str(db_schema[1]) == "View") or (str(db_schema[1]) == "view")): # TODO: implement if necessary # IGNORED RIGHT NOW # PARSE TRIGGER STATEMENT #if ((str(db_schema[1]) == "TRIGGER") or (str(db_schema[1]) == "Trigger") or (str(db_schema[1]) == "trigger")): # TODO: implement if necessary # IGNORED RIGHT NOW _adel_log.log("\nparseDB: ----> returning contents of the database file", 3) # Close the database file _sqliteFileHandler.close_db() _adel_log.log("parse_db: ----> sqlite3 database file parsed", 3) return final_list
def parse_sql_statement_params(hex_string): global ROW_ID_COLUMN # Build params list param_list = _helpersStringOperations.split_parenthesis_sensitive(hex_string, ",") # Initialise result list and reset ROW_ID_COLUMN result_list = [] ROW_ID_COLUMN = 0 # Create correct sql statement parameter list index = 0 max_index = len(param_list) _adel_log.log("parse_sql_statement_params: ----> printing SQL statement parameters in the form [column name, column type]....", 3) while index < max_index: # Crop any white space param_list[index] = _helpersStringOperations.crop_whitespace(param_list[index]) # Ensure that we have a column (starts with column name) and no table constraint if (_helpersStringOperations.starts_with_string(param_list[index], "CONSTRAINT") == 0 or _helpersStringOperations.starts_with_string(param_list[index], "PRIMARY KEY") == 0 or _helpersStringOperations.starts_with_string(param_list[index], "UNIQUE") == 0 or _helpersStringOperations.starts_with_string(param_list[index], "CHECK") == 0 or _helpersStringOperations.starts_with_string(param_list[index], "FOREIGN") == 0): _adel_log.log("parse_sql_statement_params: OK - TABLE constraint detected at positon: " + str(index + 1) + ", constraint is: " + str(param_list[index]), 3) index += 1 continue # Ok, we deal with a column column_tuple = _helpersStringOperations.split_at_first_occurrence(param_list[index], " ") if len(column_tuple) == 0: # Append as is param_tuple = [param_list[index], ""] result_list.append(param_list[index]) _adel_log.log("parse_sql_statement_params: OK - " + str(index + 1) + ". column is: " + str(param_tuple), 3) index += 1 continue # Otherwise we have to parse the statement # at this position we can have a type-name or column constraints column_name = _helpersStringOperations.crop_whitespace(column_tuple[0]) column_string = _helpersStringOperations.crop_whitespace(column_tuple[1]) # Check if we deal with a column constraint if ((_helpersStringOperations.starts_with_string(column_string, "CONSTRAINT") == 0) or (_helpersStringOperations.starts_with_string(column_string, "PRIMARY KEY") == 0) or (_helpersStringOperations.starts_with_string(column_string, "NOT") == 0) or (_helpersStringOperations.starts_with_string(column_string, "UNIQUE") == 0) or (_helpersStringOperations.starts_with_string(column_string, "CHECK") == 0) or (_helpersStringOperations.starts_with_string(column_string, "DEFAULT") == 0) or (_helpersStringOperations.starts_with_string(column_string, "COLLATE") == 0) or (_helpersStringOperations.starts_with_string(column_string, "REFERENCES")) == 0): # Create and append param_tuple param_tuple = [column_name, ""] result_list.append(param_tuple) _adel_log.log("parse_sql_statement_params: OK - " + str(index + 1) + ". column is: " + str(param_tuple) + ", constraint(s): " + column_string, 3) # Check whether this row functions as row ID (integer PRIMARY KEY) if (_helpersStringOperations.starts_with_string(column_string, "PRIMARY KEY") == 0): ROW_ID_COLUMN = index + 1 index += 1 continue # There is no column constraint, so there must be a type-name type_tuple = [] next_space = _helpersStringOperations.fist_occurrence(column_string, " ") next_parenthesis = _helpersStringOperations.fist_occurrence(column_string, "(") if (next_space >= 0) and ((next_space < next_parenthesis) or (next_parenthesis < 0)): # Cut at the next space type_tuple = _helpersStringOperations.split_at_first_occurrence(column_string, " ") else: if (next_parenthesis >= 0) and ((next_parenthesis <= next_space) or (next_space < 0)): # Cut at the next parenthesis type_tuple = _helpersStringOperations.split_at_first_occurrence(column_string, "(") type_tuple[1] = "(" + str(type_tuple[1]) # append the opening paranthesis that was cut off if len(type_tuple) == 0: # Create and append param_tuple param_tuple = [column_name, column_string] result_list.append(param_tuple) _adel_log.log("parse_sql_statement_params: OK - " + str(index + 1) + ". column is: " + str(param_tuple), 3) index += 1 continue # The statement continues, so continue to parse # set type name and type string type_name = _helpersStringOperations.crop_whitespace(type_tuple[0]) type_string = _helpersStringOperations.crop_whitespace(type_tuple[1]) # The remaining string can contain further type name definitions (e.g. varchar(20) or column constraint statements # check if we deal with a column constraint if ((_helpersStringOperations.starts_with_string(type_string, "CONSTRAINT") == 0) or (_helpersStringOperations.starts_with_string(type_string, "PRIMARY KEY") == 0) or (_helpersStringOperations.starts_with_string(type_string, "NOT") == 0) or (_helpersStringOperations.starts_with_string(type_string, "UNIQUE") == 0) or (_helpersStringOperations.starts_with_string(type_string, "CHECK") == 0) or (_helpersStringOperations.starts_with_string(type_string, "DEFAULT") == 0) or (_helpersStringOperations.starts_with_string(type_string, "COLLATE") == 0) or (_helpersStringOperations.starts_with_string(type_string, "REFERENCES")) == 0): # Create and append param_tuple param_tuple = [column_name, type_name] result_list.append(param_tuple) _adel_log.log("parse_sql_statement_params: OK - " + str(index + 1) + ". column is: " + str(param_tuple) + ", constraint(s): " + type_string, 3) # Check whether this row functions as row ID (integer PRIMARY KEY) if (_helpersStringOperations.starts_with_string(type_string, "PRIMARY KEY") == 0): ROW_ID_COLUMN = index + 1 index += 1 continue # Check for further type name definitions, they must be enclosed in parenthesis and belong to the type name restTuple = _helpersStringOperations.cut_first_last_include_into_tuple(type_string, "(", ")") if len(restTuple) == 0: # this case should not occur _adel_log.log("parse_sql_statement_params: WARNING! invalid column TYPE STRING in " + str(index + 1) + ". column: " + str(type_string), 2) _adel_log.log(" continuing with next column definition", 2) index += 1 continue else: # The statement continues, so continue to parse # set rest name and rest string rest_name = _helpersStringOperations.crop_whitespace(restTuple[0]) rest_string = _helpersStringOperations.crop_whitespace(restTuple[1]) # Rest name belongs to the type name, append it type_name = type_name + " " + str(rest_name) # Create and append param_tuple param_tuple = [column_name, type_name] result_list.append(param_tuple) # We log the param_tuple later, so possible column constraints can be included # Rest string can be column constraint, check it if len(rest_string) > 0: if ((_helpersStringOperations.starts_with_string(rest_string, "CONSTRAINT") == 0) or (_helpersStringOperations.starts_with_string(rest_string, "PRIMARY KEY") == 0) or (_helpersStringOperations.starts_with_string(rest_string, "NOT") == 0) or (_helpersStringOperations.starts_with_string(rest_string, "UNIQUE") == 0) or (_helpersStringOperations.starts_with_string(rest_string, "CHECK") == 0) or (_helpersStringOperations.starts_with_string(rest_string, "DEFAULT") == 0) or (_helpersStringOperations.starts_with_string(rest_string, "COLLATE") == 0) or (_helpersStringOperations.starts_with_string(rest_string, "REFERENCES")) == 0): # Log column constraint _adel_log.log("parse_sql_statement_params: OK - " + str(index + 1) + ". column is: " + str(param_tuple) + ", constraint(s): " + rest_string, 3) # Check whether this row functions as row ID (integer PRIMARY KEY) if (_helpersStringOperations.starts_with_string(rest_string, "PRIMARY KEY") == 0): ROW_ID_COLUMN = index + 1 else: # This case should not occur _adel_log.log("parse_sql_statement_params: WARNING! invalid column REST STRING in " + str(index + 1) + ". column: " + str(rest_string), 2) _adel_log.log(" continuing with next column definition", 2) else: # Log without column constraints _adel_log.log("parse_sql_statement_params: OK - " + str(index + 1) + ". column is: " + str(param_tuple), 3) index += 1 return result_list