def setup(self, N): df = self.df = vaex.open(generate_numerical())[:N] df['id1'] = df['i1_100'] df['id2'] = df['i1_100'] df['id3'] = df['i4_1M'] df['id4'] = df['i1_100'] df['id5'] = df['i1_100'] df['id6'] = df['i4_1M'] df['v1'] = df['i1_10'] df['v2'] = df['i1_10'] df['v3'] = df['x4']
def setup(self, N): self.df = vaex.open(generate_numerical())[:N] self.df.categorize(self.df.i8_10, min_value=5, max_value=15, inplace=True) self.df.categorize(self.df.i4_10, min_value=5, max_value=15, inplace=True) self.df.categorize(self.df.i2_10, min_value=5, max_value=15, inplace=True) self.df.categorize(self.df.i1_10, min_value=5, max_value=15, inplace=True) self.df.categorize(self.df.i8_1K, min_value=5, max_value=1_000+5, inplace=True) self.df.categorize(self.df.i4_1K, min_value=5, max_value=1_000+5, inplace=True) self.df.categorize(self.df.i2_1K, min_value=5, max_value=1_000+5, inplace=True) # self.df.categorize(self.df.i1_1K, min_value=5, max_value=1_000+5) self.df.categorize(self.df.i8_1M, min_value=5, max_value=1_000_000+5, inplace=True) self.df.categorize(self.df.i4_1M, min_value=5, max_value=1_000_000+5, inplace=True)
def time_sort_ints(N, T): df = vaex.open(generate_numerical()) df = df[:N] df.sort(f'i8_{T}')
def setup(self, N): self.df = vaex.open(generate_numerical())[:N] self.df['i4_1M_POT'] = self.df['i4_1M'] * 2**8 self.dff = self.df[(self.df.x > 0) & (self.df.y < 0)] len(self.dff) # fill cache
def setup_cache(self): # ensure the dataframe is generated generate_numerical()
def setup(self, N): self.df = vaex.open(generate_numerical())[:N] self.df['i4_1M_POT'] = self.df['i4_1M'] * 2**8
def setup(self, N): self.df = vaex.open(generate_numerical())[:N] self.df.categorize(self.df.i8_10, list(range(5, 15)), check=False) self.df.categorize(self.df.i4_10, list(range(5, 15)), check=False) self.df.categorize(self.df.i2_10, list(range(5, 15)), check=False) self.df.categorize(self.df.i1_10, list(range(5, 15)), check=False)
def setup(self, N, M): self.df_num = vaex.open(generate_numerical())[:N] self.df_str = vaex.open(generate_strings())[:N]
def setup_cache(self): generate_numerical() generate_strings()