pairs = self._link_index(*x) names = self._make_index_names(x[0].index.name, x[1].index.name) # deduplication else: pairs = self._dedup_index(*x) names = self._make_index_names(x[0].index.name, x[0].index.name) pairs.rename(names, inplace=True) return pairs BaseIndexator = DeprecationHelper(BaseIndexAlgorithm) class BaseCompareFeature(object): """Base abstract class for compare feature engineering. Parameters ---------- labels_left : list, str, int The labels to use for comparing record pairs in the left dataframe. labels_right : list, str, int The labels to use for comparing record pairs in the right dataframe (linking) or left dataframe (deduplication). args : tuple Additional arguments to pass to the `_compare_vectorized`
# large dataframes if n_max < 1e6: pairs = random_pairs_without_replacement_small_frames( self.n, shape, self.random_state) # small dataframes else: pairs = random_pairs_without_replacement_large_frames( self.n, shape, self.random_state) levels = [df_a.index.values, df_a.index.values] labels = pairs return pandas.MultiIndex(levels=levels, labels=labels, verify_integrity=False) FullIndex = DeprecationHelper( Full, "class recordlinkage.FullIndex is renamed and moved, " "use recordlinkage.index.Full") BlockIndex = DeprecationHelper( Block, "class recordlinkage.BlockIndex is renamed and moved, " "use recordlinkage.index.Block") SortedNeighbourhoodIndex = DeprecationHelper( SortedNeighbourhood, "class recordlinkage.SortedNeighbourhoodIndex " "is renamed and moved, use recordlinkage.index.SortedNeighbourhood") RandomIndex = DeprecationHelper( Random, "class recordlinkage.RandomIndex is renamed and moved, " "use recordlinkage.index.Random")
"n must be a integer satisfying 0<n<=%s" % n_max) # large dataframes if n_max < 1e6: pairs = random_pairs_without_replacement_small_frames( self.n, shape, self.random_state) # small dataframes else: pairs = random_pairs_without_replacement_large_frames( self.n, shape, self.random_state) levels = [df_a.index.values, df_a.index.values] labels = pairs return pandas.MultiIndex( levels=levels, labels=labels, verify_integrity=False ) FullIndex = DeprecationHelper( Full, "This class is moved to recordlinkage.index.Full.") BlockIndex = DeprecationHelper( Block, "This class is moved to recordlinkage.index.Block.") SortedNeighbourhoodIndex = DeprecationHelper( SortedNeighbourhood, "This class is moved to recordlinkage.index.SortedNeighbourhood.") RandomIndex = DeprecationHelper( Random, "This class is moved to recordlinkage.index.Random.")
if self.swap_months == 'default': self.swap_months = [(6, 7, 0.5), (7, 6, 0.5), (9, 10, 0.5), (10, 9, 0.5)] else: try: if not all([len(x) == 3 for x in self.swap_months]): raise Exception except Exception: raise ValueError( 'swap_months must be a list of (first month, \ second month, value) tuples or lists. ') for month1, month2, value in self.swap_months: c[(s1.dt.year == s2.dt.year) & (s1.dt.month == month1) & (s2.dt.month == month2) & (s1.dt.day == s2.dt.day) & (c != 1)] = value c = pandas.Series(c) c[s1.isnull() | s2.isnull()] = self.missing_value return c CompareExact = DeprecationHelper(Exact, "This class is renamed into Exact.") CompareString = DeprecationHelper(String, "This class is renamed into String.") CompareNumeric = DeprecationHelper(Numeric, "This class is renamed into Numeric.") CompareGeographic = DeprecationHelper( Geographic, "This class is renamed into Geographic.") CompareDate = DeprecationHelper(Date, "This class is renamed into Date.")