def _dedup_index(self, df_a): shape = (len(df_a),) # with replacement if self.replace: pairs = random_pairs_with_replacement( self.n, shape, self.random_state) # without replacement else: n_max = full_index_size(shape) if not isinstance(self.n, int) or self.n <= 0 or self.n > n_max: raise ValueError( "n must be a integer satisfying 0<n<=%s" % n_max) # large dataframes if n_max < 1e6: pairs = random_pairs_without_replacement_small_frames( self.n, shape, self.random_state) # small dataframes else: pairs = random_pairs_without_replacement_large_frames( self.n, shape, self.random_state) levels = [df_a.index.values, df_a.index.values] labels = pairs return pandas.MultiIndex( levels=levels, labels=labels, verify_integrity=False )
def _link_index(self, df_a, df_b): shape = (len(df_a), len(df_b)) n_max = full_index_size(shape) if not isinstance(self.n, int): raise ValueError('n must be an integer') # with replacement if self.replace: if n_max == 0: raise ValueError( "one of the dataframes is empty") pairs = random_pairs_with_replacement( self.n, shape, self.random_state) # without replacement else: if self.n <= 0 or self.n > n_max: raise ValueError( "n must be a integer satisfying 0<n<=%s" % n_max) # large dataframes if n_max < 1e6: pairs = random_pairs_without_replacement_small_frames( self.n, shape, self.random_state) # small dataframes else: pairs = random_pairs_without_replacement_large_frames( self.n, shape, self.random_state) levels = [df_a.index.values, df_b.index.values] labels = pairs names = [df_a.index.name, df_b.index.name] return pandas.MultiIndex( levels=levels, labels=labels, names=names, verify_integrity=False )