Exemplo n.º 1
0
    def select_closest_flowline(self, similarity_cutoff=0.6):
        """Select closest flowline.

        Selects closest flowline from flowlines_data, including all evaluation information
        for the flowline. Although name similarity is not considered for selection it is used
        to document if a name matched flowline is available. Requires output from hydrolink_flowlines.

        """
        if self.status == 1:
            df = utils.df_for_selection(self.flowlines_data)
            df = df.rename(columns={"lengthkm": "nhdplusv2 flowline length km",
                                    "reachcode": "nhdplusv2 flowline reachcode",
                                    "gnis_name": "nhdplusv2 flowline gnis name",
                                    "comid": "nhdplusv2 comid",
                                    "terminalflag": "nhdplusv2 terminal flag",
                                    "permanent_identifier": "nhdplusv2 flowline permanent identifier"
                                    })
            self.total_count_flowlines = df.shape[0]
            self.name_match_in_buffer = df.loc[df['flowline name similarity'] >= similarity_cutoff].shape[0]

            df = df.nsmallest(1, 'meters from flowline', keep='all')
            if df.shape[0] > 1:
                self.message = f'multiple flowlines with same snap distance for id: {self.source_id}. Use name_match method.'
                self.error_handling()

            else:
                self.hydrolink_flowline = ((df.to_dict('records'))[0])
Exemplo n.º 2
0
    def select_closest_flowline_w_name_match(self, similarity_cutoff=0.6):
        """Select closest flowline with matching water name.

        HydroLink data to the closest NHD feature with a name similarity that meets the specified
        similarity_cutoff. If no flowlines meet similarity cutoff the method HydroLinks data to the
        closest NHD feature. Requires output from hydrolink_flowlines.
        """
        if self.status == 1:
            df = utils.df_for_selection(self.flowlines_data)
            df = df.rename(
                columns={
                    "lengthkm": "nhdhr flowline length km",
                    "reachcode": "nhdhr flowline reachcode",
                    "gnis_name": "nhdhr flowline gnis name",
                    "permanent_identifier":
                    "nhdhr flowline permanent identifier"
                })
            self.total_count_flowlines = df.shape[0]
            df_1 = df.loc[df['flowline name similarity'] == 1.0]
            df_similarity = df.loc[
                df['flowline name similarity'] >= similarity_cutoff]
            # only 1 flowline has extact matching name
            if df_1.shape[0] == 1:
                self.hydrolink_flowline = ((df.to_dict('records'))[0])
            # more than 1 flowline has exact matching name, grab closest of matching name flowlines
            elif df_1.shape[0] > 1:
                df_1 = df_1.nsmallest(1, 'meters from flowline', keep='all')
                if df_1.shape[0] > 1:
                    self.message = f'multiple flowlines with same snap distance for id: {self.source_id}.'
                    self.error_handling()
                else:
                    self.hydrolink_flowline = ((df_1.to_dict('records'))[0])
            # only one flowline has matching name meeting similarity cutoff
            elif df_1.shape[0] == 0 and df_similarity.shape[0] == 1:
                self.hydrolink_flowline = ((
                    df_similarity.to_dict('records'))[0])
            # select closest flowline meeting name match similarity cutoff
            elif df_1.shape[0] == 0 and df_similarity.shape[0] > 1:
                df_similarity = df_similarity.nsmallest(1,
                                                        'meters from flowline',
                                                        keep='all')
                if df_similarity.shape[0] > 1:
                    self.message = f'multiple flowlines with same snap distance for id: {self.source_id}.'
                    self.error_handling()
                else:
                    self.hydrolink_flowline = ((
                        df_similarity.to_dict('records'))[0])
            # no flowlines with name match, select closest
            else:
                df = df.nsmallest(1, 'meters from flowline', keep='all')
                if df.shape[0] > 1:
                    self.message = f'multiple flowlines with same snap distance for id: {self.source_id}.'
                    self.error_handling()
                else:
                    self.hydrolink_flowline = ((df.to_dict('records'))[0])