def main(): f = open( '/home/ubuntu8/huxiaoqian/user_portrait/user_portrait/cron/recommentation_in/core_list.csv', 'wb') writer = csv.writer(f) scan_count = 0 scan_cursor = 0 all_count = 0 dis_dict = dict() fw_dict = dict() n_dict = dict() save_list = [] range_list = [100000, 10000, 1000, 100, 0] while 1: if scan_count == 1000000: break results = r.scan(scan_cursor, count=1000) scan_cursor = results[0] scan_count += 1000 for result in results[1]: if 'be_retweet_' == result[:11]: uid = result[11:] be_retweet_dict = r.hgetall(result) be_retweet_user_count = len(be_retweet_dict) all_count += be_retweet_user_count try: dis_dict[str(be_retweet_user_count)] += 1 except: dis_dict[str(be_retweet_user_count)] = 1 if be_retweet_user_count >= 1000: save_list.append((uid, be_retweet_user_count)) sort_save_list = sorted(save_list, key=lambda x: x[1], reverse=True) for item in sort_save_list: uid = item[0] count = item[1] writer.writerow([uid, item]) for rf in dis_dict: dis_count = dis_dict[rf] for range_up in range_list: if int(rf) > range_up: try: fw_dict[str(range_up)] += int(rf) except: fw_dict[str(range_up)] = int(rf) try: n_dict[str(range_up)] += dis_count except: n_dict[str(range_up)] = dis_count print 'Rf, N, Fw:' for range_up in range_list: try: n = n_dict[str(range_up)] fw = fw_dict[str(range_up)] except: n = 0 fw = 0 print range_up, n, fw f.close()
def main(): f = open('/home/ubuntu8/huxiaoqian/user_portrait/user_portrait/cron/recommentation_in/core_list.csv', 'wb') writer = csv.writer(f) scan_count = 0 scan_cursor = 0 all_count = 0 dis_dict = dict() fw_dict = dict() n_dict = dict() save_list = [] range_list = [100000,10000,1000,100,0] while 1: if scan_count == 1000000: break results = r.scan(scan_cursor, count=1000) scan_cursor = results[0] scan_count += 1000 for result in results[1]: if 'be_retweet_' == result[:11]: uid = result[11:] be_retweet_dict = r.hgetall(result) be_retweet_user_count = len(be_retweet_dict) all_count += be_retweet_user_count try: dis_dict[str(be_retweet_user_count)] += 1 except: dis_dict[str(be_retweet_user_count)] = 1 if be_retweet_user_count >= 1000: save_list.append((uid, be_retweet_user_count)) sort_save_list = sorted(save_list, key=lambda x:x[1], reverse=True) for item in sort_save_list: uid = item[0] count = item[1] writer.writerow([uid, item]) for rf in dis_dict: dis_count = dis_dict[rf] for range_up in range_list: if int(rf) > range_up: try: fw_dict[str(range_up)] += int(rf) except: fw_dict[str(range_up)] = int(rf) try: n_dict[str(range_up)] += dis_count except: n_dict[str(range_up)] = dis_count print 'Rf, N, Fw:' for range_up in range_list: try: n = n_dict[str(range_up)] fw = fw_dict[str(range_up)] except: n = 0 fw = 0 print range_up, n, fw f.close()
def cal_ave_fans(): # test there should use r_dict scan_count = 0 scan_cursor = 0 all_count = 0 while 1: if scan_count == 1000000: break results = r.scan(scan_cursor, count=1000) scan_cursor = results[0] scan_count += 1000 for result in results[1]: if 'be_retweet_' == result[:11]: uid = result[11:] retweet_dict = r.hgetall(result) retweet_user_count = len(retweet_dict) all_count += retweet_user_count ave_count = float(all_count) / scan_count print 'ave_count:', ave_count