def write_to_filesystem(self): if self.previous_graph_data == self.graph_data: log_line(self.get_debug_statement() + "No update in graph data") return if self.previous_graph_data is not None: # Pickle file already exists for the user for site_data in self.graph_data: for contest_data in self.graph_data[site_data]: try: previous_value = self.contest_mapping[ contest_data["title"]] except KeyError: continue if len(contest_data["data"]) < len(previous_value): contest_data = previous_value pickle.dump(self.graph_data, open(self.pickle_file_path, "wb")) if getuser() == "root": # For production machine as nginx runs with www-data # and it can't delete these files else www_data = getpwnam("www-data") os.chown(self.pickle_file_path, www_data.pw_uid, www_data.pw_gid) log_line(self.get_debug_statement() + "Writing to filesystem done") utilities.clear_profile_page_cache(self.user_record.stopstalk_handle)
def recheck_handle_details(): if session.user_id not in STOPSTALK_ADMIN_USER_IDS: return "Don't be here" response = "" email = request.vars.email site = request.vars.site if site is None or email is None: return "site and email param is required" atable = db.auth_user stable = db.submission ihtable = db.invalid_handle user_record = db(atable.email == email).select().first() if user_record is None: return "User with that email not found" handle = user_record[site.lower() + "_handle"] invalid_handles = db((ihtable.handle == handle) & \ (ihtable.site == site)).delete() response += "Deleted %d handles\n" % invalid_handles query = (stable.user_id == user_record.id) & \ (stable.site == site) submission_records = db(query).delete() response += "Deleted %d submission records\n" % submission_records user_record.update_record(**{site.lower() + "_lr": current.INITIAL_DATE}) response += "User record updated for last retrieved to INITIAL_DATE\n" current.REDIS_CLIENT.rpush("next_retrieve_user", user_record.id) response += "User added to next refresh queue\n" utilities.clear_profile_page_cache(user_record.stopstalk_handle) if site == "CodeChef": current.REDIS_CLIENT.delete( utilities.get_codechef_last_retrieved_key(user_record.id, False)) response += "Profile page cache deleted\n" return response
def retrieve_submissions(record, custom, all_sites=current.SITES.keys(), codechef_retrieval=False): """ Retrieve submissions that are not already in the database """ global INVALID_HANDLES global failed_user_retrievals global todays_date global metric_handlers if concurrent_submission_retrieval_handler("GET", record.id, custom) == "ONGOING": print "Already ongoing retrieval for", record.id, custom return else: concurrent_submission_retrieval_handler("SET", record.id, custom) stopstalk_retrieval_start_time = time.time() sites_retrieval_timings = 0 list_of_submissions = [] retrieval_failures = [] should_clear_cache = False nrtable = db.next_retrieval user_column_name = "custom_user_id" if custom else "user_id" nrtable_record = db( nrtable[user_column_name] == record.id).select().first() skipped_retrieval = set([]) is_daily_retrieval = (retrieval_type == "daily_retrieve") logger = Logger(record.stopstalk_handle, custom) if nrtable_record is None: print "Record not found", user_column_name, record.id nrtable.insert(**{user_column_name: record.id}) nrtable_record = db( nrtable[user_column_name] == record.id).select().first() for site in all_sites: Site = getattr(sites, site.lower()) if Site.Profile.is_website_down(): all_sites.remove(site) common_influx_params = dict(stopstalk_handle=record.stopstalk_handle, retrieval_type=retrieval_type, value=1) for site in all_sites: common_influx_params["site"] = site lower_site = site.lower() site_handle = record[lower_site + "_handle"] site_lr = lower_site + "_lr" site_delay = lower_site + "_delay" last_retrieved = record[site_lr] # Rocked it totally ! ;) if is_daily_retrieval and \ datetime.timedelta(days=nrtable_record[site_delay] / 3 + 1) + \ last_retrieved.date() > todays_date: utilities.push_influx_data( "retrieval_stats", dict(kind="skipped", **common_influx_params)) logger.log(site, "skipped") metric_handlers[lower_site]["skipped_retrievals"].increment_count( "total", 1) skipped_retrieval.add(site) continue last_retrieved = time.strptime(str(last_retrieved), TIME_CONVERSION) if (site_handle, site) in INVALID_HANDLES: logger.log(site, "not found:" + site_handle) utilities.push_influx_data( "retrieval_stats", dict(kind="not_found", **common_influx_params)) metric_handlers[lower_site]["handle_not_found"].increment_count( "total", 1) record.update({site_lr: datetime.datetime.now()}) should_clear_cache = True continue if site_handle: Site = getattr(sites, site.lower()) P = Site.Profile(site_handle) # Retrieve submissions from the profile site site_method = P.get_submissions start_retrieval_time = time.time() if site == "UVa": submissions = site_method(last_retrieved, uva_problem_dict, is_daily_retrieval) else: submissions = site_method(last_retrieved, is_daily_retrieval) total_retrieval_time = time.time() - start_retrieval_time sites_retrieval_timings += total_retrieval_time metric_handlers[lower_site]["retrieval_times"].add_to_list( "list", total_retrieval_time) if submissions in (SERVER_FAILURE, OTHER_FAILURE): utilities.push_influx_data( "retrieval_stats", dict(kind=submissions.lower(), **common_influx_params)) logger.log(site, submissions) metric_handlers[lower_site]["retrieval_count"].increment_count( "failure", 1) # Add the failure sites for inserting data into failed_retrieval retrieval_failures.append(site) should_clear_cache = True current.REDIS_CLIENT.sadd("website_down_" + site.lower(), record.stopstalk_handle) elif submissions == NOT_FOUND: utilities.push_influx_data( "retrieval_stats", dict(kind="new_invalid_handle", **common_influx_params)) logger.log(site, "new invalid handle:" + site_handle) new_handle_not_found(site, site_handle) # Update the last retrieved of an invalid handle as we don't # want new_user script to pick this user again and again record.update({site_lr: datetime.datetime.now()}) should_clear_cache = True else: utilities.push_influx_data( "retrieval_stats", dict(kind="success", **common_influx_params)) submission_len = len(submissions) metric_handlers[lower_site]["retrieval_count"].increment_count( "success", 1) metric_handlers[lower_site][ "submission_count"].increment_count( "total", submission_len) logger.log(site, submission_len) list_of_submissions.append((site, submissions)) # Immediately update the last_retrieved of the record # Note: Only the record object is updated & not reflected in DB record.update({site_lr: datetime.datetime.now()}) should_clear_cache = True else: # Update this time so that this user is not picked # up again and again by new_user cron record.update({site_lr: datetime.datetime.now()}) should_clear_cache = True if retrieval_type == "daily_retrieve": nrtable_record.update({site_delay: 100000}) total_submissions_retrieved = 0 for submissions in list_of_submissions: site = submissions[0] lower_site = site.lower() site_delay = lower_site + "_delay" submissions_count = get_submissions(record.id, record[lower_site + "_handle"], record.stopstalk_handle, submissions[1], site, custom) total_submissions_retrieved += submissions_count if retrieval_type == "daily_retrieve" and \ site not in skipped_retrieval and \ site not in retrieval_failures: if submissions_count == 0: nrtable_record.update( {site_delay: nrtable_record[site_delay] + 1}) else: nrtable_record.update({site_delay: 0}) elif retrieval_type == "daily_retrieve" and site in retrieval_failures: # If retrieval failed for the user, then reset the delay so that # the details can be retrieved the next day nrtable_record.update({site_delay: 0}) # Clear the profile page cache in case there is atleast one submission retrieved if should_clear_cache: utilities.clear_profile_page_cache(record.stopstalk_handle) # To reflect all the updates to record into DB record.update_record() if retrieval_type == "daily_retrieve": nrtable_record.update_record() if retrieval_type == "refreshed_users" and len(retrieval_failures): current.REDIS_CLIENT.rpush( "next_retrieve_custom_user" if custom else "next_retrieve_user", record.id) else: # @ToDo: Too much main memory usage as strings are stored in a list # Aim to store only the ints and let typecasting and # "NULL" insertions happen just when required for site in retrieval_failures: if custom: failed_user_retrievals.append("(%s,%s,'%s')" % ("NULL", str(record.id), site)) else: failed_user_retrievals.append("(%s,%s,'%s')" % (str(record.id), "NULL", site)) # Keep committing the updates to the db to avoid lock wait timeouts db.commit() if total_submissions_retrieved > 0 and not custom: log_message = "Rating updated from %f to " % record.stopstalk_rating new_rating = update_stopstalk_rating(record.id, record.stopstalk_handle, custom) log_message += str(new_rating) logger.generic_log(log_message) concurrent_submission_retrieval_handler("DEL", record.id, custom) total_retrieval_time = time.time() - stopstalk_retrieval_start_time metric_handlers["overall"]["just_stopstalk_code_time"].add_to_list( "list", total_retrieval_time - sites_retrieval_timings)
def update_friend(): """ Update custom friend details """ if len(request.args) != 1: session.flash = T("Please click one of the buttons") redirect(URL("user", "custom_friend")) cftable = db.custom_friend stable = db.submission query = (cftable.user_id == session.user_id) & \ (cftable.id == request.args[0]) record = db(query).select().first() if record is None: session.flash = T("Please click one of the buttons") redirect(URL("user", "custom_friend")) # Do not allow to modify stopstalk_handle cftable.stopstalk_handle.writable = False form_fields = [ "first_name", "last_name", "institute", "country", "stopstalk_handle" ] for site in current.SITES: form_fields.append(site.lower() + "_handle") for field in form_fields: if record[field] is None: continue record[field] = unicode(record[field], "utf-8").encode("utf-8") form = SQLFORM(cftable, record, fields=form_fields, deletable=True, showid=False) form.vars.stopstalk_handle = record.stopstalk_handle.replace("cus_", "") if form.validate(onvalidation=current.sanitize_fields): form.vars.stopstalk_handle = record.stopstalk_handle pickle_file_path = "./applications/stopstalk/graph_data/" + \ str(record.id) + "_custom.pickle" import os utilities.clear_profile_page_cache(record.stopstalk_handle) if form.deleted: ## DELETE # If delete checkbox is checked => just process it redirect back session.flash = T("Custom User deleted") duplicate_cus = db(cftable.duplicate_cu == record.id).select() if os.path.exists(pickle_file_path): os.remove(pickle_file_path) if len(duplicate_cus): # The current custom user is a parent of other duplicate custom users first_dcu = duplicate_cus.first() # Populate stopstalk_handle of first child to submission tabls squery = (stable.stopstalk_handle == record.stopstalk_handle) db(squery).update(stopstalk_handle=first_dcu.stopstalk_handle) # Pick the first cu child and copy the stopstalk_handle to the parent record.update_record( user_id=first_dcu.user_id, stopstalk_handle=first_dcu.stopstalk_handle, institute=first_dcu.institute) # Now delete the first child as the parent is now modified # and the previous siblings remain as child to this parent first_dcu.delete_record() else: record.delete_record() redirect(URL("user", "custom_friend")) else: updated_sites = utilities.handles_updated(record, form) ## UPDATE if updated_sites != []: if os.path.exists(pickle_file_path): os.remove(pickle_file_path) submission_query = (stable.custom_user_id == int( request.args[0])) reset_sites = current.SITES if record.duplicate_cu else updated_sites nrtable = db.next_retrieval nrtable_record = db(db.next_retrieval.custom_user_id == int( request.args[0])).select().first() if nrtable_record is None: nid = nrtable.insert(custom_user_id=int(request.args[0])) nrtable_record = nrtable(nid) for site in reset_sites: form.vars[site.lower() + "_lr"] = current.INITIAL_DATE nrtable_record.update({site.lower() + "_delay": 0}) nrtable_record.update_record() submission_query &= (stable.site.belongs(reset_sites)) form.vars["duplicate_cu"] = None form.vars["stopstalk_rating"] = 0 form.vars["stopstalk_prev_rating"] = 0 form.vars["per_day"] = 0.0 form.vars["per_day_change"] = "0.0" form.vars["graph_data_retrieved"] = False # Only delete the submission of those particular sites # whose site handles are updated db(submission_query).delete() record.update_record(**dict(form.vars)) session.flash = T("User details updated") redirect(URL("user", "custom_friend")) elif form.errors: form.vars.stopstalk_handle = record.stopstalk_handle response.flash = T("Form has errors") return dict(form=form)
def update_details(): """ Update user details """ form_fields = [ "first_name", "last_name", "email", "institute", "country", "stopstalk_handle" ] for site in current.SITES: form_fields.append(site.lower() + "_handle") atable = db.auth_user stable = db.submission record = utilities.get_user_records([session.user_id], "id", "id", True) for field in form_fields: if record[field] is None: continue record[field] = record[field].encode("utf-8") # Do not allow to modify stopstalk_handle and email atable.stopstalk_handle.writable = False atable.stopstalk_handle.comment = T("StopStalk handle cannot be updated") atable.email.readable = True atable.email.writable = False atable.email.comment = T("Email cannot be updated") form = SQLFORM(db.auth_user, record, fields=form_fields, showid=False) if form.process(onvalidation=current.sanitize_fields).accepted: current.REDIS_CLIENT.delete( utilities.get_user_record_cache_key(session.user_id)) session.flash = T("User details updated") updated_sites = utilities.handles_updated(record, form) if updated_sites != []: utilities.clear_profile_page_cache(record.stopstalk_handle) site_lrs = {} nrtable = db.next_retrieval submission_query = (stable.user_id == session.user_id) nrtable_record = db( nrtable.user_id == session.user_id).select().first() if nrtable_record is None: nid = nrtable.insert(user_id=session.user_id) nrtable_record = nrtable(nid) for site in updated_sites: site_lrs[site.lower() + "_lr"] = current.INITIAL_DATE nrtable_record.update({site.lower() + "_delay": 0}) nrtable_record.update_record() pickle_file_path = "./applications/stopstalk/graph_data/" + \ str(session.user_id) + ".pickle" import os if os.path.exists(pickle_file_path): os.remove(pickle_file_path) # Reset the user only if any of the profile site handle is updated query = (atable.id == session.user_id) db(query).update(stopstalk_rating=0, stopstalk_prev_rating=0, per_day=0.0, per_day_change="0.0", authentic=False, graph_data_retrieved=False, **site_lrs) submission_query &= (stable.site.belongs(updated_sites)) # Only delete the submission of those particular sites # whose site handles are updated db(submission_query).delete() session.auth.user = db.auth_user(session.user_id) current.REDIS_CLIENT.delete( CARD_CACHE_REDIS_KEYS["more_accounts_prefix"] + str(session.user_id)) redirect(URL("default", "index")) elif form.errors: response.flash = T("Form has errors") return dict(form=form)
def retrieve_submissions(record, custom, all_sites=current.SITES.keys(), codechef_retrieval=False): """ Retrieve submissions that are not already in the database """ global INVALID_HANDLES global failed_user_retrievals global plink_to_id time_conversion = "%Y-%m-%d %H:%M:%S" list_of_submissions = [] retrieval_failures = [] nrtable = db.next_retrieval user_column_name = "custom_user_id" if custom else "user_id" nrtable_record = db( nrtable[user_column_name] == record.id).select().first() skipped_retrieval = set([]) if nrtable_record is None: print "Record not found", user_column_name, record.id nrtable.insert(**{user_column_name: record.id}) nrtable_record = db( nrtable[user_column_name] == record.id).select().first() disabled_sites = current.REDIS_CLIENT.smembers("disabled_retrieval") for site in disabled_sites: if site in all_sites: all_sites.remove(site) for site in all_sites: site_handle = record[site.lower() + "_handle"] site_lr = site.lower() + "_lr" site_delay = site.lower() + "_delay" last_retrieved = record[site_lr] # Rocked it totally ! ;) if retrieval_type == "daily_retrieve" and \ datetime.timedelta(days=nrtable_record[site_delay] / 5 + 1) + \ last_retrieved.date() > datetime.datetime.today().date(): print "Skipping " + site + " for " + record.stopstalk_handle skipped_retrieval.add(site) continue last_retrieved = time.strptime(str(last_retrieved), time_conversion) if (site_handle, site) in INVALID_HANDLES: print "Not found %s %s" % (site_handle, site) record.update({site_lr: datetime.datetime.now()}) continue if site_handle: Site = getattr(sites, site.lower()) P = Site.Profile(site_handle) # Retrieve submissions from the profile site site_method = P.get_submissions if site == "CodeForces": submissions = site_method(last_retrieved, plink_to_id) else: submissions = site_method(last_retrieved) if submissions in (SERVER_FAILURE, OTHER_FAILURE): print "%s %s %s" % (submissions, site, record.stopstalk_handle) # Add the failure sites for inserting data into failed_retrieval retrieval_failures.append(site) current.REDIS_CLIENT.sadd("website_down_" + site.lower(), record.stopstalk_handle) elif submissions == NOT_FOUND: print "New invalid handle %s %s" % (site_handle, site) handle_not_found(site, site_handle) # Update the last retrieved of an invalid handle as we don't # want new_user script to pick this user again and again record.update({site_lr: datetime.datetime.now()}) else: list_of_submissions.append((site, submissions)) # Immediately update the last_retrieved of the record # Note: Only the record object is updated & not reflected in DB record.update({site_lr: datetime.datetime.now()}) else: # Update this time so that this user is not picked # up again and again by new_user cron record.update({site_lr: datetime.datetime.now()}) if retrieval_type == "daily_retrieve": nrtable_record.update({site_delay: 100000}) total_submissions_retrieved = 0 for submissions in list_of_submissions: site = submissions[0] _debug(record.stopstalk_handle, site, custom) site_delay = site.lower() + "_delay" submissions_count = get_submissions(record.id, record[site.lower() + "_handle"], record.stopstalk_handle, submissions[1], site, custom) total_submissions_retrieved += submissions_count if retrieval_type == "daily_retrieve" and \ site not in skipped_retrieval and \ site not in retrieval_failures: if submissions_count == 0: nrtable_record.update( {site_delay: nrtable_record[site_delay] + 1}) else: nrtable_record.update({site_delay: 0}) elif retrieval_type == "daily_retrieve" and site in retrieval_failures: # If retrieval failed for the user, then reset the delay so that # the details can be retrieved the next day nrtable_record.update({site_delay: 0}) # Clear the profile page cache in case there is atleast one submission retrieved if total_submissions_retrieved != 0: utilities.clear_profile_page_cache(record.stopstalk_handle) # To reflect all the updates to record into DB record.update_record() if retrieval_type == "daily_retrieve": nrtable_record.update_record() if retrieval_type == "refreshed_users" and len(retrieval_failures): current.REDIS_CLIENT.rpush( "next_retrieve_custom_user" if custom else "next_retrieve_user", record.id) else: # @ToDo: Too much main memory usage as strings are stored in a list # Aim to store only the ints and let typecasting and # "NULL" insertions happen just when required for site in retrieval_failures: if custom: failed_user_retrievals.append("(%s,%s,'%s')" % ("NULL", str(record.id), site)) else: failed_user_retrievals.append("(%s,%s,'%s')" % (str(record.id), "NULL", site)) # Keep committing the updates to the db to avoid lock wait timeouts db.commit()