def test_network_topology_reassignment( trace_gdf: gpd.GeoDataFrame, area_gdf: gpd.GeoDataFrame, tmp_path: Path, network_name: str, truncate_traces: bool, circular_target_area: bool, snap_threshold: float, ): """ Test reassignment of Network branch_gdf and node_gdf. """ network_params: Dict[str, Any] = dict( trace_gdf=trace_gdf, area_gdf=area_gdf, name=network_name, truncate_traces=truncate_traces, circular_target_area=circular_target_area, snap_threshold=snap_threshold, ) network = Network(**network_params, determine_branches_nodes=True) original_description = network.numerical_network_description() # Explicitly name outputs branches_name = f"{network_name}_branches.geojson" nodes_name = f"{network_name}_nodes.geojson" # Save branches and nodes to tmp_path directory network.write_branches_and_nodes( output_dir_path=tmp_path, branches_name=branches_name, nodes_name=nodes_name ) branches_path = tmp_path / branches_name nodes_path = tmp_path / nodes_name assert branches_path.exists() assert nodes_path.exists() branches_gdf = read_geofile(branches_path) nodes_gdf = read_geofile(nodes_path) assert isinstance(branches_gdf, gpd.GeoDataFrame) assert isinstance(nodes_gdf, gpd.GeoDataFrame) new_network = Network( **network_params, determine_branches_nodes=False, branch_gdf=branches_gdf, node_gdf=nodes_gdf, ) new_description = new_network.numerical_network_description() original_df = pd.DataFrame([original_description]) new_df = pd.DataFrame([new_description]) assert_frame_equal(original_df, new_df) assert_frame_equal(new_network.branch_gdf, branches_gdf)
def perf_profile( do_validation: bool = True, do_network: bool = True, do_grid: bool = True, do_subsampling: bool = False, ): """ Profile ``fractopo`` performance. """ if not all(path.exists() for path in (SAMPLE_TRACES_PATH, SAMPLE_AREA_PATH)): logging.error(f"Current path: {Path.cwd().resolve()}") raise FileNotFoundError("Expected sample to exist at:" f" {(SAMPLE_TRACES_PATH, SAMPLE_AREA_PATH)}") traces = read_geofile(SAMPLE_TRACES_PATH) area = read_geofile(SAMPLE_AREA_PATH) name = SAMPLE_AREA_PATH.stem snap_threshold = 0.001 if do_validation: validator = Validation( traces=traces, area=area, SNAP_THRESHOLD=snap_threshold, name=name, allow_fix=True, ) validator.run_validation() if do_network: network = Network( trace_gdf=traces, area_gdf=area, snap_threshold=snap_threshold, name=name, circular_target_area=True, truncate_traces=True, determine_branches_nodes=True, ) network.numerical_network_description() if do_grid: network.contour_grid(cell_width=2.0) if do_subsampling: multi_network = MultiNetwork((network, )) multi_network.subsample(min_radii=5.0, samples=5)
def convert_filetype(original_path: Path, convert_path: Path, driver: str): """ Convert from original_path to convert_path with driver. """ if not original_path.exists(): raise FileNotFoundError( f"Expected {original_path.name} to exist at {original_path}." ) # If already saved same trace dataset no need to overwrite # If exporting to directory that you've already previously exported to # it is removed before exporting if convert_path.exists(): logging.info(f"Dataset already exists at {convert_path}.") return # Read from path gdf = read_geofile(original_path) # Make parent directories as needed convert_path.parent.mkdir(exist_ok=True, parents=True) # Save with new extension and type logging.info(f"Saving to {convert_path} with driver {driver}.") try: gdf.to_file(convert_path, driver=driver) except Exception: logging.error( f"Failed to save {original_path} to {convert_path}" f" with driver {driver} due to error.", exc_info=True, )
def test_crop_to_target_areas(keep_column_data: bool, file_regression): """ Test cropping traces to target area with known right example data results. Also does regression testing with known right data. """ trace_data = general.read_geofile( Path("tests/sample_data/mls_crop_samples/traces.gpkg")) area_data = general.read_geofile( Path("tests/sample_data/mls_crop_samples/mls_area.gpkg")) cropped_traces = general.crop_to_target_areas( traces=trace_data, areas=area_data, keep_column_data=keep_column_data, ) assert isinstance(cropped_traces, gpd.GeoDataFrame) cropped_traces.sort_index(inplace=True) file_regression.check(cropped_traces.to_json(indent=1))
def test_dissolve_multi_part_traces(file_regression): """ Test dissolving MultiLineString containing GeoDataFrame. Dissolve should copy all attribute data to new dissolved LineStrings. """ trace_data = general.read_geofile( Path("tests/sample_data/mls_crop_samples/mls_traces.gpkg")) dissolved_traces = general.dissolve_multi_part_traces(trace_data) assert isinstance(dissolved_traces, gpd.GeoDataFrame) dissolved_traces.sort_index(inplace=True) file_regression.check(dissolved_traces.to_json(indent=1))
def baseanalyze( traces_path_str: str, area_path_str: str, results_path_str: str, other_results_path_str: str, coverage_path_str: str, circle_radius: float, overwrite: bool, ) -> bool: """ Run individual network analyses. """ traces_path = Path(traces_path_str) area_path = Path(area_path_str) results_path = Path(results_path_str) other_results_path = Path(other_results_path_str) coverage_path = Path(coverage_path_str) if not (results_path.exists() and results_path.is_dir()): raise NotADirectoryError(f"Expected {results_path} dir to exist.") traces, area = read_geofile(traces_path), read_geofile(area_path) coverage_gdf = read_geofile(coverage_path) name = area_path.stem result_path = results_path / f"{name}.pickle" if result_path.exists() and not overwrite: return False description_srs = analyze( traces, area, name, other_results_path, coverage_gdf=coverage_gdf, circle_radius=circle_radius, ) if result_path.exists(): result_path.unlink() description_srs.to_pickle(results_path / f"{name}.pickle") return True
def test_determine_proximal_traces_regression(file_regression): """ Test determine_proximal_traces with regression. """ traces = read_geofile(Helpers.sample_trace_100_data) traces.reset_index(drop=True, inplace=True) assert isinstance(traces, gpd.GeoDataFrame) and len(traces) > 0 buffer_value = 1 azimuth_tolerance = 30 result = proximal_traces.determine_proximal_traces( traces, buffer_value, azimuth_tolerance # type: ignore ) assert proximal_traces.MERGE_COLUMN in result.columns assert isinstance(result, gpd.GeoDataFrame) file_regression.check(result.sort_index().to_json(indent=1))
def pandera_reporting( update_tuple: UpdateTuple, metadata: rules.Metadata ) -> Tuple[Dict[rules.ColumnNames, str], pd.DataFrame]: """ Check traces GeoDataFrame column data against schema and report if needed. """ if update_tuple.update_values[rules.ColumnNames.VALIDITY] in ( rules.ValidationResults.EMPTY.value, rules.ValidationResults.CRITICAL.value, ): return dict(), pd.DataFrame() # Read traces from disk. # (Alternative is to keep GeoDataFrame in memory from multiprocessing # but that is risky.) traces = general.read_geofile(update_tuple.traces_path) if traces.empty: logging.error( f"Empty traces uncaught by validation for {update_tuple}.") return dict(), pd.DataFrame() try: pandera_report = perform_pandera_check(traces, metadata=metadata) except Exception as exc: logging.error( f"GeoDataFrame validation critically failed with {update_tuple} traces.", exc_info=True, ) pandera_report = pd.DataFrame( {"ERROR": ["Column validation critically failed...", str(exc)]}) if pandera_report.empty: return dict(), pd.DataFrame() if otherwise_valid(update_tuple=update_tuple): # If the dataset is otherwise marked valid mark it as unfit due # to pandera schema error update_values = { rules.ColumnNames.VALIDITY: rules.ValidationResults.UNFIT.value } return update_values, pandera_report return dict(), pandera_report
def network( trace_file: Path = typer.Argument(..., exists=True, dir_okay=False, help=TRACE_FILE_HELP), area_file: Path = typer.Argument(..., exists=True, dir_okay=False, help=AREA_FILE_HELP), snap_threshold: float = typer.Option(0.001, help=SNAP_THRESHOLD_HELP), determine_branches_nodes: bool = typer.Option( True, help= "Whether to determine branches and nodes as part of analysis. Recommended.", ), name: Optional[str] = typer.Option( None, help="Name for Network. Used when saving outputs and as plot titles."), circular_target_area: bool = typer.Option( False, help="Is/are target area(s) circles?"), truncate_traces: bool = typer.Option( True, help="Whether to cut traces at target area boundary. Recommended."), censoring_area: Optional[Path] = typer.Option( None, help= "Path to area data that delineates censored areas within target areas.", ), branches_output: Optional[Path] = typer.Option( None, help="Where to save branch data."), nodes_output: Optional[Path] = typer.Option( None, help="Where to save node data."), general_output: Optional[Path] = typer.Option( None, help="Where to save general network analysis outputs e.g. plots."), parameters_output: Optional[Path] = typer.Option( None, help="Where to save numerical parameter data from analysis."), ): """ Analyze the geometry and topology of trace network. """ network_name = name if name is not None else area_file.stem console = Console() console.print( Text.assemble("Performing network analysis of ", (network_name, "bold green"), ".")) network = Network( trace_gdf=read_geofile(trace_file), area_gdf=read_geofile(area_file), snap_threshold=snap_threshold, determine_branches_nodes=determine_branches_nodes, name=network_name, circular_target_area=circular_target_area, truncate_traces=truncate_traces, censoring_area=read_geofile(censoring_area) if censoring_area is not None else gpd.GeoDataFrame(), ) ( general_output_path, branches_output_path, nodes_output_path, parameters_output_path, ) = default_network_output_paths( network_name=network_name, general_output=general_output, branches_output=branches_output, nodes_output=nodes_output, parameters_output=parameters_output, ) # Save branches and nodes console.print( Text.assemble( "Saving branches to ", (str(branches_output_path), "bold green"), " and nodes to ", (str(nodes_output_path), "bold green"), ".", )) network.branch_gdf.to_file(branches_output_path, driver="GPKG") network.node_gdf.to_file(nodes_output_path, driver="GPKG") console.print(rich_table_from_parameters(network.parameters)) pd.DataFrame([network.numerical_network_description() ]).to_csv(parameters_output_path) console.print( Text.assemble( "Saving extensive network parameter csv to path:\n", (str(parameters_output_path), "bold green"), )) # Plot ternary XYI-node proportion plot fig, _, _ = network.plot_xyi() save_fig(fig=fig, results_dir=general_output_path, name="xyi_ternary_plot") # Plot ternary branch proportion plot fig, _, _ = network.plot_branch() save_fig(fig=fig, results_dir=general_output_path, name="branch_ternary_plot") # Plot trace azimuth rose plot _, fig, _ = network.plot_trace_azimuth() save_fig(fig=fig, results_dir=general_output_path, name="trace_azimuth") # Plot trace length distribution plot _, fig, _ = network.plot_trace_lengths() save_fig(fig=fig, results_dir=general_output_path, name="trace_length_distribution")
def tracevalidate( trace_file: Path = typer.Argument( ..., exists=True, file_okay=True, dir_okay=False, resolve_path=True, help=TRACE_FILE_HELP, ), area_file: Path = typer.Argument( ..., exists=True, file_okay=True, dir_okay=False, resolve_path=True, help=AREA_FILE_HELP, ), allow_fix: bool = typer.Option( True, "--allow-fix", "--fix", help="Allow the direct modification of trace file to fix errors.", ), summary: bool = typer.Option(True, help="Print summary of validation results."), snap_threshold: float = typer.Option( 0.001, help= "Distance threshold used to estimate whether e.g. a trace abuts in another.", ), output: Optional[Path] = typer.Option( None, help="Where to save validated output trace data."), only_area_validation: bool = typer.Option( False, help="Only validate the area boundary snapping."), allow_empty_area: bool = typer.Option( True, help="Allow empty areas to validation."), ): """ Validate trace data delineated by target area data. If allow_fix is True, some automatic fixing will be done to e.g. convert MultiLineStrings to LineStrings. """ console = Console() # Assert that read files result in GeoDataFrames traces: gpd.GeoDataFrame = read_geofile(trace_file) areas: gpd.GeoDataFrame = read_geofile(area_file) if not all(isinstance(val, gpd.GeoDataFrame) for val in (traces, areas)): raise TypeError( "Expected trace and area files to be readable as GeoDataFrames.") logging.info(f"Validating traces: {trace_file} area: {area_file}.") # Get input crs input_crs = traces.crs # Validate validation = Validation( traces, areas, trace_file.stem, allow_fix, SNAP_THRESHOLD=snap_threshold, ) if only_area_validation: console.print( Text.assemble(("Only performing area validation.", "yellow"))) choose_validators: Optional[Tuple[Type[TargetAreaSnapValidator]]] = ( TargetAreaSnapValidator, ) else: choose_validators = None console.print( Text.assemble("Performing validation of ", (trace_file.name, "blue"), ".")) validated_trace = validation.run_validation( choose_validators=choose_validators, allow_empty_area=allow_empty_area) # Set same crs as input if input had crs if input_crs is not None: validated_trace.crs = input_crs # Get input driver to use as save driver with fiona.open(trace_file) as open_trace_file: assert open_trace_file is not None save_driver = open_trace_file.driver # Resolve output if not explicitly given if output is None: output_dir = make_output_dir(trace_file) output_path = (trace_file.parent / output_dir / f"{trace_file.stem}_validated{trace_file.suffix}") console.print( Text.assemble(( f"Generated output directory at {output_dir}" f"\nwhere validated output will be saved at {output_path}.", "blue", ))) else: output_path = output # Remove file if one exists at output_path if output_path.exists(): console.print( Text.assemble( ("Overwriting old file at given output path.", "yellow"))) output_path.unlink() # Change validation_error column to type: str and consequently save # the GeoDataFrame. assert not isinstance(validated_trace[validation.ERROR_COLUMN].iloc[0], list) validated_trace.astype({ validation.ERROR_COLUMN: str }).to_file(output_path, driver=save_driver) if summary: describe_results(validated_trace, validation.ERROR_COLUMN, console=console)
def tracevalidate_click( trace_file: str, area_file: str, allow_fix: bool, summary: bool, snap_threshold: float, output_path: Union[Path, None], only_area_validation: bool, allow_empty_area: bool, ): """ Validate trace data delineated by target area data. If allow_fix is True, some automatic fixing will be done to e.g. convert MultiLineStrings to LineStrings. """ warn( """ 'tracevalidate' entrypoint is deprecated. Use: 'fractopo tracevalidate' entrypoint instead i.e. if your command was: tracevalidate traces.gpkg area.gpkg --fix the new version will be: fractopo tracevalidate traces.gpkg area.gpkg --allow-fix Note also that --fix was changed to --allow-fix and it is by default True. Run fractopo tracevalidate --help to make sure your arguments are correct. """, DeprecationWarning, ) trace_path = Path(trace_file) area_path = Path(area_file) # Resolve output_path if not explicitly given if output_path is None: output_dir = make_output_dir(trace_path) output_path = (trace_path.parent / output_dir / f"{trace_path.stem}_validated{trace_path.suffix}") print(f"Validating with snap threshold of {snap_threshold}.") # Assert that read files result in GeoDataFrames traces: gpd.GeoDataFrame = read_geofile(trace_path) areas: gpd.GeoDataFrame = read_geofile(area_path) if not all(isinstance(val, gpd.GeoDataFrame) for val in (traces, areas)): raise TypeError( "Expected trace and area data to be resolvable as GeoDataFrames.") # Get input crs input_crs = traces.crs # Validate validation = Validation( traces, areas, trace_path.stem, allow_fix, SNAP_THRESHOLD=snap_threshold, ) if only_area_validation: choose_validators: Optional[Tuple[Type[TargetAreaSnapValidator]]] = ( TargetAreaSnapValidator, ) else: choose_validators = None validated_trace = validation.run_validation( choose_validators=choose_validators, allow_empty_area=allow_empty_area) # Set same crs as input if input had crs if input_crs is not None: validated_trace.crs = input_crs # Get input driver to use as save driver with fiona.open(trace_path) as open_trace_file: assert open_trace_file is not None save_driver = open_trace_file.driver # Remove file if one exists at output_path if Path(output_path).exists(): Path(output_path).unlink() # Change validation_error column to type: `string` and consequently save # the GeoDataFrame. validated_trace.astype({ validation.ERROR_COLUMN: str }).to_file(output_path, driver=save_driver) if summary: describe_results(validated_trace, validation.ERROR_COLUMN)
def subsample( traces_path_str: str, area_path_str: str, results_path_str: str, other_results_path_str: str, coverage_path_str: str, ): """ Conduct single network subsampling within the given sample area. """ # Convert to Paths traces_path = Path(traces_path_str) area_path = Path(area_path_str) other_results_path = Path(other_results_path_str) results_path = Path(results_path_str) coverage_path = Path(coverage_path_str) # Read GeoDataFrames trace_gdf = read_geofile(traces_path) area_gdf = read_geofile(area_path) coverage_gdf = read_geofile(coverage_path) # Initialize NetworkRandomSampler sampler = NetworkRandomSampler( trace_gdf=trace_gdf, area_gdf=area_gdf, min_radius=5, snap_threshold=0.001, random_choice="radius", ) # Create random network sample # Returns fractopo Network instance, centroid shapely Point and # radius of the sample circle network, target_centroid, radius = sampler.random_network_sample() # Assess the amount of censoring within the sample amount_of_coverage = assess_coverage(target_centroid, radius, coverage_gdf) # Use the sample centroid Point and hash its wkt string repr name_hash = abs(hash(target_centroid.wkt)) # Resolve path save_path = (results_path.parent / f"{results_path.stem}_{name_hash}" f"{results_path.suffix}") # If there's hash conflict, make more complex hash if results_path.exists(): more_complex_hash = abs(hash(target_centroid.wkt) + hash(radius)) save_path = (results_path.parent / f"{results_path.stem}_{more_complex_hash}" f"{results_path.suffix}") assert not save_path.exists() # Resolve the Network instance to just a dataframe of the parameters # we are interested in describe_df = describe_random_network( network=network, target_centroid=target_centroid, radius=radius, name=area_path.stem, amount_of_coverage=amount_of_coverage, ) # Save the dataframe save_describe_df(describe_df, results_path=save_path) # Save azimuth rose plots if network is not None: save_azimuth_bin_data( network, other_results_path, loc_hash=f"{area_path.stem}_{target_centroid.wkt}_{radius}", )
def validate_invalid(invalid: utils.TraceTuple) -> utils.UpdateTuple: """ Validate a given trace dataset. """ # invalid is a TraceTuple which has named path attributes traces_path = invalid.traces_path area_path = invalid.area_path # Both should be Paths assert isinstance(traces_path, Path) assert isinstance(area_path, Path) # Read traces GeoDataFrame traces = read_geofile(traces_path) if traces.empty: return utils.UpdateTuple( area_name=area_path.stem, update_values={ rules.ColumnNames.VALIDITY: rules.ValidationResults.EMPTY.value }, traces_path=traces_path, ) # Validate with fractopo trace validation validated, validation_results = validate( traces=traces, area=read_geofile(area_path), snap_threshold=invalid.snap_threshold, name=area_path.name, ) if not validated.crs == traces.crs: raise ValueError("Expected crs to match for gdf before and after validation.") try: # Write the validated traces utils.write_geodata(gdf=validated, path=traces_path) except Exception: # Log exception logging.error( f"Error when writing validated trace GeoDataFrame to {traces_path}.", exc_info=True, ) # Return with critical error # Validation does minor edits that are required for the dataset to be # valid so writing is always required. return utils.UpdateTuple( area_name=area_path.stem, update_values={ rules.ColumnNames.VALIDITY: ValidationResults.CRITICAL.value }, traces_path=traces_path, ) # Create dict with information on validity for trace-area-combo update_tuple = utils.UpdateTuple( area_name=area_path.stem, update_values={rules.ColumnNames.VALIDITY: validation_results.value}, traces_path=traces_path, ) return update_tuple
def __post_init__(self): """ Overload ``__post_init__`` to handle caching. Handle caching by loading branch and node data from ``network_cache_path`` if they exist there. Uses ``sha256`` hexdigest to hash the network data. """ branch_gdf_empty = self.branch_gdf.empty node_gdf_empty = self.node_gdf.empty if not branch_gdf_empty or not node_gdf_empty: error = "Do not pass branch and node GeoDataFrames to CachedNetwork." logging.error( error, extra=dict( branch_gdf_empty=branch_gdf_empty, node_gdf_empty=node_gdf_empty, network_name=self.name, ), ) raise ValueError(error) if not self.determine_branches_nodes: error = ( "CachedNetwork has no utility if branches and nodes are not determined." ) logging.error( error, extra=dict( determine_branches_nodes=self.determine_branches_nodes, network_name=self.name, ), ) raise ValueError(error) try: # Combine jsons of trace_gdf and area_gdf + other relevant network # data network_data_as_string = (str(self.trace_gdf.to_json()) + str(self.area_gdf.to_json()) + str(self.circular_target_area) + str(self.snap_threshold)) # Encode the string to bytes encoded = network_data_as_string.encode() # Create sha256 hexdigest of the bytes sha256_hexdigest = sha256(encoded).hexdigest() except Exception: # Log the exception logging.error( "Failed to sha256 hash trace and area GeoDataFrames." " If this error persists using the regular ``Network``" " instance is recommended.", exc_info=True, ) # If hashing cannot be done no caching can be done raise # Continue with regular Network initialization # return super().__post_init__() branch_path = self.network_cache_path / f"{sha256_hexdigest}_branches.geojson" node_path = self.network_cache_path / f"{sha256_hexdigest}_nodes.geojson" if branch_path.exists() and node_path.exists(): self._cache_hit = True # Cache hit -> Load branch and node data self.branch_gdf = read_geofile(branch_path) self.node_gdf = read_geofile(node_path) logging.info( "Hit cache for branch and node data. Loading.", extra=dict(network_name=self.name, branch_path=branch_path, node_path=node_path), ) # CRS should be the same. It is set here explicitly to confirm # that. if (self.branch_gdf.crs != self.trace_gdf.crs or self.node_gdf.crs != self.trace_gdf.crs): logging.info( "Cache loaded branches and nodes did not have same crs as traces.", extra=dict( branch_gdf_crs=self.branch_gdf.crs, node_gdf_crs=self.node_gdf.crs, trace_gdf_crs=self.trace_gdf.crs, network_name=self.name, ), ) self.branch_gdf.crs = self.trace_gdf.crs self.node_gdf.crs = self.trace_gdf.crs assert self.branch_gdf.crs == self.trace_gdf.crs assert self.node_gdf.crs == self.trace_gdf.crs else: logging.info( "No cache hit for branch and node data. Determining.", extra=dict(network_name=self.name), ) # No cache hit, determine branches and nodes self.assign_branches_nodes() # Create cache directory self.network_cache_path.mkdir(exist_ok=True) # Cache the determined branches and nodes write_geodata(gdf=self.branch_gdf, path=branch_path, allow_list_column_transform=False) write_geodata(gdf=self.node_gdf, path=node_path, allow_list_column_transform=False) logging.info( "Caching determined branches and nodes.", extra=dict(network_name=self.name, branch_path=branch_path, node_path=node_path), ) # Continue with normal Network initialization super().__post_init__()
try: organizer.check() assert False except FileNotFoundError: pass if organized: organizer.organize(simulate=False) organizer.check() return organizer kb11_traces_path = Path("tests/sample_data/KB11/KB11_traces.geojson") kb11_area_path = Path("tests/sample_data/KB11/KB11_area.geojson") kb11_traces = read_geofile(kb11_traces_path) kb11_area = read_geofile(kb11_area_path) kb11_traces_cut_dislocated_path = Path( "tests/sample_data/tmp/KB11_traces_cut_dislocated.gpkg") kb11_traces_cut_path = Path("tests/sample_data/tmp/KB11_traces_cut.gpkg") kb11_traces_cut = cached_sample( path=kb11_traces_cut_path, create_dataset=lambda: cut(dataset=kb11_traces, start=0, end=50), ) kb11_traces_cut_length = kb11_traces_cut.shape[0] kb11_traces_cut_dislocated = cached_sample( path=kb11_traces_cut_dislocated_path,