def csvize_deleted_unique(csv_filename, error_code=-1, exclude_error_code=False): nowdatetime = weibo_module.get_current_chinatime() deleted_post_ids = weibo_module.get_deleted_postids(error_code, exclude_error_code) num_dead_posts = len(deleted_post_ids) #if we're not tracking any posts, get out of there if (num_dead_posts <= 0): return ## OPEN A FILE with codecs.open(csv_filename, "wb", "utf-16") as wf: #write csv header csv_header = weibo_module.get_csv_header() wf.write(csv_header + "\n") #print csv_header #iterate through posts for this_post_id in deleted_post_ids: this_post = weibo_module.merge_deleted_from_new_old(this_post_id) csvline = weibo_module.make_csvline_from_post(this_post) csvline = map((lambda x: unicode(x)), csvline) #not csv, this is our delimiter now csvline = weibo_settings.delim.join(csvline) # #print csvline wf.write(csvline + "\n")
def csvize_repost_timeline(csv_filename, type="deleted", error_code=-1, exclude_error_code=False): nowdatetime = weibo_module.get_current_chinatime() if type == "deleted": query_post_ids = weibo_module.get_deleted_postids(error_code, exclude_error_code) else: query_post_ids = weibo_module.get_all_postids() # query_post_ids = query_post_ids[:10] print query_post_ids num_query_posts = len(query_post_ids) #if we're not tracking any posts, get out of there if (num_query_posts <= 0): return ## OPEN A FILE with codecs.open(csv_filename, "wb", "utf-16") as wf: #write csv header csv_header = weibo_module.get_csv_header() wf.write(csv_header + "\n") #iterate through posts for this_post_id in query_post_ids: print "\n==WRITING POST #=========", this_post_id # okay first we get the initial post this_post = weibo_module.merge_deleted_from_new_old(this_post_id) # and then we scan the rest this_post_all_logs = weibo_module.get_all_posts(this_post_id) # and then we amass a logline -- a csv file inside a csv file! this_log_line = [] for this_log in this_post_all_logs: if 'post_repost_count' in this_log and this_log["post_repost_count"] <> None: this_log_line.append(str(this_log["post_repost_count"])) this_log_line.append(str(this_log["checked_at"])) this_log_line = weibo_settings.delim_log.join(this_log_line) #get csvline array csvline = weibo_module.make_csvline_from_post(this_post) #make each element in array unicode csvline = map((lambda x: unicode(x)), csvline) #join with delimiter csvline = weibo_settings.delim.join(csvline) #add logline csvline += weibo_settings.delim csvline += this_log_line #write csvline wf.write(csvline + "\n")
def list_deleted_posts(): deleted_post_ids = weibo_module.get_deleted_postids() print "" print "########## DEAD POSTS ##########" num_dead_posts = len(deleted_post_ids) print "# of dead posts we've tracked: " + str(num_dead_posts) if (num_dead_posts > 0): dead_post_ids = [] for this_post_id in deleted_post_ids: print "Checking post # " + this_post_id , "::::: ", # get the post info from postids_live collection, # since if the post was deleted we wouldn't have any of that info anymore this_post_deleted = weibo_module.merge_deleted_from_new_old(this_post_id) print "ERRORCODE = " , this_post_deleted["error_code"] print "alive: new/old repost count (" , this_post_deleted["post_repost_count"] , " / " , this_post_deleted["post_repost_count_initial"] , ") "
def list_deleted_posts(): deleted_post_ids = weibo_module.get_deleted_postids() print "" print "########## DEAD POSTS ##########" num_dead_posts = len(deleted_post_ids) print "# of dead posts we've tracked: " + str(num_dead_posts) if (num_dead_posts > 0): dead_post_ids = [] for this_post_id in deleted_post_ids: print "Checking post # " + this_post_id, "::::: ", # get the post info from postids_live collection, # since if the post was deleted we wouldn't have any of that info anymore this_post_deleted = weibo_module.merge_deleted_from_new_old( this_post_id) print "ERRORCODE = ", this_post_deleted["error_code"] print "alive: new/old repost count (", this_post_deleted[ "post_repost_count"], " / ", this_post_deleted[ "post_repost_count_initial"], ") "
def csvize_deleted_unique(csv_filename, error_code=-1, exclude_error_code=False): nowdatetime = weibo_module.get_current_chinatime() deleted_post_ids = weibo_module.get_deleted_postids( error_code, exclude_error_code) num_dead_posts = len(deleted_post_ids) #if we're not tracking any posts, get out of there if (num_dead_posts <= 0): return ## OPEN A FILE with codecs.open(csv_filename, "wb", "utf-16") as wf: #write csv header csv_header = weibo_module.get_csv_header() wf.write(csv_header + "\n") #print csv_header #iterate through posts for this_post_id in deleted_post_ids: this_post = weibo_module.merge_deleted_from_new_old(this_post_id) csvline = weibo_module.make_csvline_from_post(this_post) csvline = map((lambda x: unicode(x)), csvline) #not csv, this is our delimiter now csvline = weibo_settings.delim.join(csvline) # #print csvline wf.write(csvline + "\n")
def jsonize_repost_timeline(json_filename, type="deleted", error_code=-1, exclude_error_code=False, do_obfuscate=False): nowdatetime = weibo_module.get_current_chinatime() if type == "deleted": query_post_ids = weibo_module.get_deleted_postids(error_code, exclude_error_code) else: query_post_ids = weibo_module.get_all_postids() # query_post_ids = query_post_ids[:10] # limit to the first 10 ids - for debugging print query_post_ids num_query_posts = len(query_post_ids) #if we're not tracking any posts, get out of there if (num_query_posts <= 0): return ## OPEN A FILE with codecs.open(json_filename, "wb") as wf: wf.write("[ " + "\n") #iterate through posts postno = 0 for this_post_id in query_post_ids: postno += 1 print "\n==WRITING (", postno, " / ", num_query_posts, ") POST #=========", this_post_id # okay first we get the initial post this_post = weibo_module.merge_deleted_from_new_old(this_post_id) # and then we scan the rest this_post_all_logs = weibo_module.get_all_posts(this_post_id) # and then we amass a logline this_log_list = [] for this_log in this_post_all_logs: if 'post_repost_count' in this_log and this_log["post_repost_count"] <> None and this_log["checked_at"] <> None: this_pair_dict = {} this_pair_dict["checked_at"] = str(this_log["checked_at"]) this_pair_dict["post_repost_count"] = int(this_log["post_repost_count"]) this_log_list.append(this_pair_dict) #get jsonline array jsonline = weibo_module.make_jsonlist_from_post(this_post) #merge jsonline['post_repost_log'] = this_log_list """ new_jsonline = {} #amass logline new_jsonline['post_repost_log'] = this_log_list #add other items new_jsonline['post_created_at'] = jsonline['post_created_at'] new_jsonline['post_created_at_epoch'] = jsonline['post_created_at_epoch'] new_jsonline['post_lifespan'] = jsonline['post_lifespan'] new_jsonline['last_checked_at'] = jsonline['last_checked_at'] new_jsonline['user_id'] = weibo_module.hashmod(jsonline['user_id'], weibo_settings.salt, weibo_settings.user_id_mod) new_jsonline['post_id'] = jsonline['post_id'] new_jsonline['user_name'] = jsonline['user_name'] new_jsonline['user_follower_count'] = jsonline['user_follower_count'] new_jsonline['post_text'] = jsonline['post_text'] new_jsonline['xxx'] = jsonline['xxx'] new_jsonline['xxx'] = jsonline['xxx'] new_jsonline['xxx'] = jsonline['xxx'] #new_jsonline['post_id'] = weibo_module.hashmod(jsonline['post_id'], weibo_settings.salt, weibo_settings.post_id_mod) """ #wf.write(json.dumps(jsonline, ensure_ascii=False)) wf.write(json.dumps(jsonline)) #wf.write(json.dumps(new_jsonline)) if postno != num_query_posts: wf.write(", ") wf.write("\n") wf.write(" ]" + "\n")
def csvize_repost_timeline(csv_filename, type="deleted", error_code=-1, exclude_error_code=False): nowdatetime = weibo_module.get_current_chinatime() if type == "deleted": query_post_ids = weibo_module.get_deleted_postids( error_code, exclude_error_code) else: query_post_ids = weibo_module.get_all_postids() # query_post_ids = query_post_ids[:10] print query_post_ids num_query_posts = len(query_post_ids) #if we're not tracking any posts, get out of there if (num_query_posts <= 0): return ## OPEN A FILE with codecs.open(csv_filename, "wb", "utf-16") as wf: #write csv header csv_header = weibo_module.get_csv_header() wf.write(csv_header + "\n") #iterate through posts for this_post_id in query_post_ids: print "\n==WRITING POST #=========", this_post_id # okay first we get the initial post this_post = weibo_module.merge_deleted_from_new_old(this_post_id) # and then we scan the rest this_post_all_logs = weibo_module.get_all_posts(this_post_id) # and then we amass a logline -- a csv file inside a csv file! this_log_line = [] for this_log in this_post_all_logs: if 'post_repost_count' in this_log and this_log[ "post_repost_count"] <> None: this_log_line.append(str(this_log["post_repost_count"])) this_log_line.append(str(this_log["checked_at"])) this_log_line = weibo_settings.delim_log.join(this_log_line) #get csvline array csvline = weibo_module.make_csvline_from_post(this_post) #make each element in array unicode csvline = map((lambda x: unicode(x)), csvline) #join with delimiter csvline = weibo_settings.delim.join(csvline) #add logline csvline += weibo_settings.delim csvline += this_log_line #write csvline wf.write(csvline + "\n")