def __init__(self, directory): self.dir = os.path.abspath(directory) if not os.path.exists(self.dir): raise InvalidProject('Project directory does not exist') self.meta_dir = os.path.join(self.dir, '.mergin') if not os.path.exists(self.meta_dir): os.mkdir(self.meta_dir) self.setup_logging(directory) # make sure we can load correct pygeodiff try: self.geodiff = pygeodiff.GeoDiff() except pygeodiff.geodifflib.GeoDiffLibVersionError: # this is a fatal error, we can't live without geodiff self.log.error("Unable to load geodiff! (lib version error)") raise ClientError("Unable to load geodiff library!") # redirect any geodiff output to our log file def _logger_callback(level, text_bytes): text = text_bytes.decode() # convert bytes to str if level == pygeodiff.GeoDiff.LevelError: self.log.error("GEODIFF: " + text) elif level == pygeodiff.GeoDiff.LevelWarning: self.log.warning("GEODIFF: " + text) else: self.log.info("GEODIFF: " + text) self.geodiff.set_logger_callback(_logger_callback) self.geodiff.set_maximum_logger_level(pygeodiff.GeoDiff.LevelDebug)
def setUp(self): # set env os.environ["GEODIFF_LOGGER_LEVEL"] = "4" # load lib lib = os.environ.get("GEODIFFLIB", None) if lib is None: raise TestError("missing GEODIFFLIB env variable") if not os.path.exists(lib): raise TestError("lib {} is missing ".format(lib)) self.geodiff = pygeodiff.GeoDiff(lib)
def setUp(self): # load lib lib = os.environ.get("GEODIFFLIB", None) if lib is None: raise TestError("missing GEODIFFLIB env variable") if not os.path.exists(lib): raise TestError("lib {} is missing ".format(lib)) self.geodiff = pygeodiff.GeoDiff(lib) self.geodiff.set_logger_callback(logger) self.geodiff.set_maximum_logger_level(pygeodiff.GeoDiff.LevelDebug)
def __init__(self, directory): self.dir = os.path.abspath(directory) if not os.path.exists(self.dir): raise InvalidProject('Project directory does not exist') # make sure we can load correct pygeodiff if os.environ.get('GEODIFF_ENABLED', 'True').lower() == 'true': try: self.geodiff = pygeodiff.GeoDiff() except pygeodiff.geodifflib.GeoDiffLibVersionError: self.geodiff = None else: self.geodiff = None self.meta_dir = os.path.join(self.dir, '.mergin') if not os.path.exists(self.meta_dir): os.mkdir(self.meta_dir) # setup logging into project directory's .mergin/client-log.txt file self.log = logging.getLogger('mergin.' + directory) self.log.setLevel( logging.DEBUG ) # log everything (it would otherwise log just warnings+errors) if not self.log.handlers: # we only need to set the handler once # (otherwise we would get things logged multiple times as loggers are cached) log_handler = logging.FileHandler( os.path.join(self.meta_dir, "client-log.txt")) log_handler.setFormatter( logging.Formatter('%(asctime)s %(message)s')) self.log.addHandler(log_handler) # redirect any geodiff output to our log file if self.geodiff: def _logger_callback(level, text_bytes): text = text_bytes.decode() # convert bytes to str if level == pygeodiff.GeoDiff.LevelError: self.log.error("GEODIFF: " + text) elif level == pygeodiff.GeoDiff.LevelWarning: self.log.warning("GEODIFF: " + text) else: self.log.info("GEODIFF: " + text) self.geodiff.set_logger_callback(_logger_callback) self.geodiff.set_maximum_logger_level(pygeodiff.GeoDiff.LevelDebug)
def make_work_packages(data_dir, wp_config): """ This is the core part of the algorithm for merging and splitting data for work packages. It expects a data directory with layout of directories and files as described in the header of this file. The first stage collects changes from the master DB and the work package DBs and combines them together, resolving any conflicts. At the end of the first stage we have updated master database. The second stage then re-creates individual work package DBs. """ base_dir = os.path.join( data_dir, "base") # where the non-modified GPKGs from the last run should be input_dir = os.path.join( data_dir, "input") # where the existing GPKG for each existing WP should be output_dir = os.path.join( data_dir, "output" ) # !!!! we are deleting this directory and recreating it every time! tmp_dir = os.path.join( data_dir, "tmp") # for any temporary stuff (also deleted + recreated) if os.path.exists(output_dir): shutil.rmtree(output_dir) os.makedirs(output_dir) if os.path.exists(tmp_dir): shutil.rmtree(tmp_dir) os.makedirs(tmp_dir) old_wp_names = ( [] ) # names of WPs that have been processed before (and we expect their GPKGs exist and may be modified) if os.path.exists(base_dir): for path in Path(base_dir).iterdir(): filename = path.name if filename == "master.gpkg": continue # skip the master file - it's not a work package if filename.endswith(".gpkg"): wp_name = filename[:-5] # strip the suffix old_wp_names.append(wp_name) print("existing WPs: " + str(old_wp_names)) def _logger_callback(level, text_bytes): text = text_bytes.decode() # convert bytes to str print("GEODIFF: ", text) geodiff = pygeodiff.GeoDiff() geodiff.set_maximum_logger_level(geodiff.LevelDebug) geodiff.set_logger_callback(_logger_callback) master_gpkg_base = os.path.join( base_dir, "master.gpkg") # should not have been modified master_gpkg_input = os.path.join( input_dir, "master.gpkg") # this could have been modified by users master_gpkg_output = os.path.join(output_dir, "master.gpkg") # does not exist yet if os.path.exists(master_gpkg_base): # summarize changes that have happened in master (base master VS input master) # (this is not needed anywhere in the code, but may be useful for debugging) master_base_to_input = os.path.join(tmp_dir, "master-base-input.diff") master_base_to_input_json = os.path.join(tmp_dir, "master-base-input.json") geodiff.create_changeset(master_gpkg_base, master_gpkg_input, master_base_to_input) geodiff.list_changes(master_base_to_input, master_base_to_input_json) # create new master_gpkg in the output directory shutil.copy(master_gpkg_input, master_gpkg_output) # copy "base" remapping DB to "output" where we may be adding some more entries remap_db_base = os.path.join(base_dir, "remap.db") remap_db_output = os.path.join(output_dir, "remap.db") if old_wp_names and not os.path.exists(remap_db_base): raise ValueError("remap.db should exist!") if not old_wp_names and os.path.exists(remap_db_base): raise ValueError("remap.db should not exist yet!") if os.path.exists(remap_db_base): shutil.copy(remap_db_base, remap_db_output) # STAGE 1: Bring the changes from WPs to master # (remap WP database + create changeset + rebase changeset) for wp_name in old_wp_names: print("WP " + wp_name) # get max. fids for tables (so that we know where to start when remapping) db = sqlite3.connect(master_gpkg_output) c = db.cursor() new_master_fids = {} for wp_table in wp_config.wp_tables: wp_table_name = wp_table.name wp_table_name_escaped = escape_double_quotes(wp_table_name) c.execute(f"""SELECT max(fid) FROM {wp_table_name_escaped};""") new_master_fid = c.fetchone()[0] if new_master_fid is None: new_master_fid = 1 # empty table so far else: new_master_fid += 1 new_master_fids[wp_table_name] = new_master_fid c = None db = None # TODO: check whether the changes in the DB are allowed (matching the deciding column) wp_gpkg_base_wp_fids = os.path.join( base_dir, wp_name + ".gpkg") # should not have been modified by user wp_gpkg_input_wp_fids = os.path.join( input_dir, wp_name + ".gpkg") # may have been modified by user wp_gpkg_base = os.path.join( tmp_dir, wp_name + "-base.gpkg") # should not have been modified by user wp_gpkg_input = os.path.join( tmp_dir, wp_name + "-input.gpkg") # may have been modified by user shutil.copy(wp_gpkg_base_wp_fids, wp_gpkg_base) shutil.copy(wp_gpkg_input_wp_fids, wp_gpkg_input) # re-map local fids of the WP gpkg to master fids (based on previously created mapping DB) for x in [wp_gpkg_base, wp_gpkg_input]: db = sqlite3.connect(x) db.enable_load_extension(True) # for spatialite c = db.cursor() c.execute("SELECT load_extension('mod_spatialite');" ) # TODO: how to deal with it? c.execute("ATTACH ? AS remap", (remap_db_output, )) c.execute("BEGIN") for wp_table in wp_config.wp_tables: remap_table_wp_to_master(c, wp_table.name, wp_name, new_master_fids[wp_table.name]) c.execute("COMMIT") wp_changeset_base_input = os.path.join(tmp_dir, wp_name + "-base-input.diff") wp_changeset_base_input_json = os.path.join( tmp_dir, wp_name + "-base-input.json") wp_changeset_conflicts = os.path.join(tmp_dir, wp_name + "-conflicts.json") # create changeset using pygeodiff using wp_gpkg_base + wp_gpkg_input # print("--- create changeset") geodiff.create_changeset(wp_gpkg_base, wp_gpkg_input, wp_changeset_base_input) # summarize changes that have happened in master (base master VS input master) # (this is not needed anywhere in the code, but may be useful for debugging) geodiff.list_changes(wp_changeset_base_input, wp_changeset_base_input_json) # TODO: the following code (copy DB + rebase + copy DB) is a bit stupid... # we should use GEODIFF_createRebasedChangesetEx and then just apply rebased changeset # but this function is not (yet) available in pygeodiff # create tmp_master_with_wp # print("--- copy + apply changeset") tmp_master_with_wp = os.path.join(tmp_dir, "master-" + wp_name + ".gpkg") shutil.copy(master_gpkg_base, tmp_master_with_wp) geodiff.apply_changeset(tmp_master_with_wp, wp_changeset_base_input) # rebase changeset - to resolve conflicts, for example: # - WP1 deleted a row that WP2 also wants to delete # - WP1 updated a row that WP2 also updated # - WP1 updated a row that WP2 deleted # - WP1 deleted a row that WP2 updated # - WP1 inserted a row with FID that WP2 also wants to insert -- this should not happen # because remapping should assign unique master FIDs # print("--- rebase") geodiff.rebase(master_gpkg_base, master_gpkg_output, tmp_master_with_wp, wp_changeset_conflicts) # the tmp_master_with_wp now contains stuff from output master and WP changes on top of that # let's overwrite the output master with this addition :-O # print("--- copy 2") shutil.copy(tmp_master_with_wp, master_gpkg_output) # summarize changes that have happened in WPs (input master VS output master) # (this is not needed anywhere in the code, but may be useful for debugging) master_input_to_output = os.path.join(output_dir, "master-input-output.diff") master_input_to_output_json = os.path.join(output_dir, "master-input-output.json") geodiff.create_changeset(master_gpkg_input, master_gpkg_output, master_input_to_output) geodiff.list_changes(master_input_to_output, master_input_to_output_json) if os.path.exists(master_gpkg_base): # summarize all the changes that have happened since last run (collated master changes + wp changes) # (this is not needed anywhere in the code, but may be useful for debugging) master_base_to_output = os.path.join(output_dir, "master-base-output.diff") master_base_to_output_json = os.path.join(output_dir, "master-base-output.json") geodiff.create_changeset(master_gpkg_base, master_gpkg_output, master_base_to_output) geodiff.list_changes(master_base_to_output, master_base_to_output_json) # STAGE 2: Regenerate WP databases # (make "new" WP database + filter database based on WP + remap DB) for wp in wp_config.wp_names: wp_name, wp_value, wp_mergin_project = wp.name, wp.value, wp.mergin_project wp_gpkg_base = os.path.join( base_dir, wp_name + ".gpkg") # should not have been modified by user wp_gpkg_input = os.path.join(input_dir, wp_name + ".gpkg") # may have been modified by user wp_gpkg_output = os.path.join(output_dir, wp_name + ".gpkg") # does not exist yet wp_changeset_input_to_output = os.path.join( output_dir, wp_name + "-input-output.diff") wp_changeset_input_to_output_json = os.path.join( output_dir, wp_name + "-input-output.json") # start from a copy of the master shutil.copy(master_gpkg_output, wp_gpkg_output) # filter out data that does not belong to the WP # and remap fids in the DB from master to WP-local fids db = sqlite3.connect(os.path.join(output_dir, wp_name + ".gpkg")) db.enable_load_extension(True) # for spatialite c = db.cursor() c.execute("SELECT load_extension('mod_spatialite');" ) # TODO: how to deal with it? c.execute("ATTACH ? AS remap", (remap_db_output, )) c.execute("BEGIN") for wp_table in wp_config.wp_tables: wp_table_name = wp_table.name wp_table_name_escaped = escape_double_quotes(wp_table_name) wp_filter_column = wp_table.filter_column_name wp_filter_column_escaped = escape_double_quotes(wp_filter_column) c.execute( f"""delete from {wp_table_name_escaped} where {wp_filter_column_escaped} IS NULL""" ) if isinstance(wp_value, (str, int, float)): c.execute( f"""delete from {wp_table_name_escaped} where {wp_filter_column_escaped} != ?""", (wp_value, )) elif isinstance(wp_value, list): values_str = ",".join(["?"] * len(wp_value)) c.execute( f"""delete from {wp_table_name_escaped} where {wp_filter_column_escaped} not in ({values_str})""", wp_value, ) else: # we may want to support some custom SQL at some point too raise ValueError("what?") remap_table_master_to_wp(c, wp_table.name, wp_name) # TODO: drop tables that are not listed at all (?) c.execute("COMMIT") # run VACUUM to purge anything that does not belong to the WP data c.execute("VACUUM") # get changeset between the one received from WP and newly created GPKG if os.path.exists(wp_gpkg_input): geodiff.create_changeset(wp_gpkg_input, wp_gpkg_output, wp_changeset_input_to_output) geodiff.list_changes(wp_changeset_input_to_output, wp_changeset_input_to_output_json) else: # first time this WP is created... pass # TODO: what to do?