def main(): y = np.random.randint(2, size=(5000, 1)) x = np.random.randint(10, size=(5000, 1)) data = pd.DataFrame(np.concatenate([y, x], axis=1), columns=['y', 'x']) a = time.time() result_1 = target_mean_v2(data, 'y', 'x') print('最开始的第二种方法 ', time.time() - a) a = time.time() result_2 = target_mean_v3(data, 'y', 'x') print('通过transform写的方法 ', time.time() - a) a = time.time() result_3 = tm.target_mean_v3(data, 'y', 'x') print('王然老师的方法 ', time.time() - a) # 可以改的思路:unordered_map, 数据类型:int, float; 该循环 for row from 0 <= row < nrow by 1: a = time.time() result_4_type_change, value_dict, count_dict = tm.target_mean_v4( data, 'y', 'x') # print(result_4, value_dict, count_dict) print('改写数据类型的方法 ', time.time() - a) a = time.time() result_4_unordered_map, value_dict, count_dict = tm.target_mean_v4_unordered_map( data, 'y', 'x') print('修改成unordered_map ', time.time() - a) print(np.linalg.norm(result_2 - result_1)) print(np.linalg.norm(result_3 - result_1)) print(np.linalg.norm(result_4_type_change - result_1)) print(np.linalg.norm(result_4_unordered_map - result_1))
def main(): y = np.random.randint(2, size=(5000, 1)) x = np.random.randint(10, size=(5000, 1)) data = pd.DataFrame(np.concatenate([y, x], axis=1), columns=['y', 'x']) start = time.time() result = tm.target_mean_v3(data, 'y', 'x') end = time.time() print(end - start)
def main(): size = 100000 print(f'{size} test data start in {time()}, please wait.') y = np.random.randint(2, size=(size, 1)) x = np.random.randint(10, size=(size, 1)) data = pd.DataFrame(np.concatenate([y, x], axis=1), columns=['y', 'x']) start_2 = time() target_mean_v2(data, 'y', 'x') end_2 = time() print(f'v2 is the python version, use time: {end_2 - start_2}') start_3 = time() tm.target_mean_v3(data, 'y', 'x') end_3 = time() print(f'v3 is the version showed by Mr.Wang, use time: {end_3 - start_3}') start_4 = time() tm.target_mean_v4(data, 'y', 'x') end_4 = time() print(f'v4 is my job, use time: {end_4 - start_4}')
def main(): y = np.random.randint(2, size=(5000, 1)) x = np.random.randint(10, size=(5000, 1)) data = pd.DataFrame(np.concatenate([y, x], axis=1), columns=['y', 'x']) start = time.time() result_1 = target_mean_v1(data, 'y', 'x') print("1", time.time() - start) start = time.time() result_2 = tm.target_mean_v2(data, 'y', 'x') print("2", time.time() - start) start = time.time() result_3 = tm.target_mean_v3(data, 'y', 'x') print("3", time.time() - start) print(np.linalg.norm(result_1 - result_2)) print(np.linalg.norm(result_2 - result_3))
def main(): y = np.random.randint(2, size=(5000, 1)) x = np.random.randint(10, size=(5000, 1)) data = pd.DataFrame(np.concatenate([y, x], axis=1), columns=['y', 'x']) start = time() result_1 = target_mean_v1(data, 'y', 'x') end = time() print("v1: {}".format(end-start)) result_2 = target_mean_v2(data, 'y', 'x') end2 = time() print("v2: {}".format(end2-end)) result_3 = tm.target_mean_v3(data, 'y', 'x') end3 = time() print("v3: {}".format(end3 - end2)) result_5 = tm.target_mean_v5(data, 'y', 'x') end4 = time() print("v5: {}".format(end4 - end3)) result_4 = tm.target_mean_v4(data, 'y', 'x') end5 = time() print("v4: {}".format(end5 - end4))
def call_tm_target_mean_v3(data, y_name, x_name): start_time = time.time() result = tm.target_mean_v3(data, y_name, x_name) end_time = time.time() print('cython实现tm.target_mean_v3执行时间: ', end_time - start_time)