def updateJson(self, file_path, file=None, cur=None): if not file_path.startswith(self.db_dir): return False # Not from the db dir: Skipping relative_path = file_path[len(self.db_dir):] # File path realative to db file # Check if filename matches any of mappings in schema matched_maps = [] for match, map_settings in self.schema["maps"].items(): try: if SafeRe.match(match, relative_path): matched_maps.append(map_settings) except SafeRe.UnsafePatternError as err: self.log.error(err) # No match found for the file if not matched_maps: return False # Load the json file try: if file is None: # Open file is not file object passed file = open(file_path, "rb") if file is False: # File deleted data = {} else: if file_path.endswith("json.gz"): data = json.load(helper.limitedGzipFile(fileobj=file)) else: data = json.load(file) except Exception, err: self.log.debug("Json file %s load error: %s" % (file_path, err)) data = {}
def updateJson(self, file_path, file=None, cur=None): if not file_path.startswith(self.db_dir): return False # Not from the db dir: Skipping relative_path = file_path[len(self.db_dir):] # File path realative to db file # Check if filename matches any of mappings in schema matched_maps = [] for match, map_settings in self.schema["maps"].items(): try: if SafeRe.match(match, relative_path): matched_maps.append(map_settings) except SafeRe.UnsafePatternError as err: self.log.error(err) # No match found for the file if not matched_maps: return False # Load the json file try: if file is None: # Open file is not file object passed file = open(file_path, "rb") if file is False: # File deleted data = {} else: if file_path.endswith("json.gz"): file = helper.limitedGzipFile(fileobj=file) if sys.version_info.major == 3 and sys.version_info.minor < 6: data = json.loads(file.read().decode("utf8")) else: data = json.load(file) except Exception as err: self.log.debug("Json file %s load error: %s" % (file_path, err)) data = {} # No cursor specificed if not cur: cur = self.getSharedCursor() cur.logging = False # Row for current json file if required if not data or [dbmap for dbmap in matched_maps if "to_keyvalue" in dbmap or "to_table" in dbmap]: json_row = cur.getJsonRow(relative_path) # Check matched mappings in schema for dbmap in matched_maps: # Insert non-relational key values if dbmap.get("to_keyvalue"): # Get current values res = cur.execute("SELECT * FROM keyvalue WHERE json_id = ?", (json_row["json_id"],)) current_keyvalue = {} current_keyvalue_id = {} for row in res: current_keyvalue[row["key"]] = row["value"] current_keyvalue_id[row["key"]] = row["keyvalue_id"] for key in dbmap["to_keyvalue"]: if key not in current_keyvalue: # Keyvalue not exist yet in the db cur.execute( "INSERT INTO keyvalue ?", {"key": key, "value": data.get(key), "json_id": json_row["json_id"]} ) elif data.get(key) != current_keyvalue[key]: # Keyvalue different value cur.execute( "UPDATE keyvalue SET value = ? WHERE keyvalue_id = ?", (data.get(key), current_keyvalue_id[key]) ) # Insert data to json table for easier joins if dbmap.get("to_json_table"): directory, file_name = re.match("^(.*?)/*([^/]*)$", relative_path).groups() data_json_row = dict(cur.getJsonRow(directory + "/" + dbmap.get("file_name", file_name))) changed = False for key in dbmap["to_json_table"]: if data.get(key) != data_json_row.get(key): changed = True if changed: # Add the custom col values data_json_row.update({key: val for key, val in data.items() if key in dbmap["to_json_table"]}) cur.execute("INSERT OR REPLACE INTO json ?", data_json_row) # Insert data to tables for table_settings in dbmap.get("to_table", []): if isinstance(table_settings, dict): # Custom settings table_name = table_settings["table"] # Table name to insert datas node = table_settings.get("node", table_name) # Node keyname in data json file key_col = table_settings.get("key_col") # Map dict key as this col val_col = table_settings.get("val_col") # Map dict value as this col import_cols = table_settings.get("import_cols") replaces = table_settings.get("replaces") else: # Simple settings table_name = table_settings node = table_settings key_col = None val_col = None import_cols = None replaces = None # Fill import cols from table cols if not import_cols: import_cols = set([item[0] for item in self.schema["tables"][table_name]["cols"]]) cur.execute("DELETE FROM %s WHERE json_id = ?" % table_name, (json_row["json_id"],)) if node not in data: continue if key_col: # Map as dict for key, val in data[node].items(): if val_col: # Single value cur.execute( "INSERT OR REPLACE INTO %s ?" % table_name, {key_col: key, val_col: val, "json_id": json_row["json_id"]} ) else: # Multi value if type(val) is dict: # Single row row = val if import_cols: row = {key: row[key] for key in row if key in import_cols} # Filter row by import_cols row[key_col] = key # Replace in value if necessary if replaces: for replace_key, replace in replaces.items(): if replace_key in row: for replace_from, replace_to in replace.items(): row[replace_key] = row[replace_key].replace(replace_from, replace_to) row["json_id"] = json_row["json_id"] cur.execute("INSERT OR REPLACE INTO %s ?" % table_name, row) elif type(val) is list: # Multi row for row in val: row[key_col] = key row["json_id"] = json_row["json_id"] cur.execute("INSERT OR REPLACE INTO %s ?" % table_name, row) else: # Map as list for row in data[node]: row["json_id"] = json_row["json_id"] if import_cols: row = {key: row[key] for key in row if key in import_cols} # Filter row by import_cols cur.execute("INSERT OR REPLACE INTO %s ?" % table_name, row) # Cleanup json row if not data: self.log.debug("Cleanup json row for %s" % file_path) cur.execute("DELETE FROM json WHERE json_id = %s" % json_row["json_id"]) return True